diff --git a/.github/workflows/muelu_clang_format.yml b/.github/workflows/muelu_clang_format.yml new file mode 100644 index 000000000000..95d2bea2f1ec --- /dev/null +++ b/.github/workflows/muelu_clang_format.yml @@ -0,0 +1,18 @@ +name: Check MueLu clang-format + +on: [pull_request] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - uses: DoozyX/clang-format-lint-action@v0.16.2 + with: + source: './packages/muelu/src' + exclude: './lib' + extensions: 'cpp,hpp' + clangFormatVersion: 8 + style: file + inplace: False diff --git a/packages/muelu/src/.clang-format b/packages/muelu/src/.clang-format new file mode 100644 index 000000000000..2303eb8283a5 --- /dev/null +++ b/packages/muelu/src/.clang-format @@ -0,0 +1,13 @@ +#Official Tool: clang-format version 8.0.1 +#Kokkos options +BasedOnStyle: google +SortIncludes: false +AlignConsecutiveAssignments: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortIfStatementsOnASingleLine: true +#MueLu-specific options +ColumnLimit: 0 +BreakConstructorInitializersBeforeComma: true +BreakConstructorInitializers: BeforeComma +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 2 diff --git a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp index 6b38a9f96750..07f908279e99 100644 --- a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp @@ -77,98 +77,96 @@ namespace MueLu { - template - class BrickAggregationFactory : public SingleLevelFactoryBase { +template +class BrickAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_BRICKAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - private: - typedef Teuchos::ScalarTraits STS; + private: + typedef Teuchos::ScalarTraits STS; - // Comparator for doubles - // Generally, the coordinates for coarser levels would come out of averaging of fine level coordinates - // It is possible that the result of the averaging differs slightly between clusters, as we might have - // 3x2 and 2x2 cluster which would result in averaging 6 and 4 y-coordinates respectively, leading to - // slightly different results. - // Therefore, we hardcode a constant so that close points are considered the same. - class compare { - public: - bool operator()(const Scalar& x, const Scalar& y) const { - if (STS::magnitude(x - y) < 1e-14) - return false; - return STS::real(x) < STS::real(y); - } - }; - typedef std::map container; + // Comparator for doubles + // Generally, the coordinates for coarser levels would come out of averaging of fine level coordinates + // It is possible that the result of the averaging differs slightly between clusters, as we might have + // 3x2 and 2x2 cluster which would result in averaging 6 and 4 y-coordinates respectively, leading to + // slightly different results. + // Therefore, we hardcode a constant so that close points are considered the same. + class compare { + public: + bool operator()(const Scalar& x, const Scalar& y) const { + if (STS::magnitude(x - y) < 1e-14) + return false; + return STS::real(x) < STS::real(y); + } + }; + typedef std::map container; - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - BrickAggregationFactory() : nDim_(-1), nx_(-1), ny_(-1), nz_(-1), bx_(-1), by_(-1), bz_(-1) { }; + //! Constructor. + BrickAggregationFactory() + : nDim_(-1) + , nx_(-1) + , ny_(-1) + , nz_(-1) + , bx_(-1) + , by_(-1) + , bz_(-1){}; - //! Destructor. - virtual ~BrickAggregationFactory() { } + //! Destructor. + virtual ~BrickAggregationFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - // Options shared by all aggregation algorithms + // Options shared by all aggregation algorithms - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + /*! @brief Build aggregates. */ + void Build(Level& currentLevel) const; - //@} + //@} - private: - void Setup(const RCP >& comm, const RCP::magnitudeType,LO,GO,NO> >& coords, const RCP& map) const; - RCP Construct1DMap(const RCP >& comm, const ArrayRCP::magnitudeType>& x) const; + private: + void Setup(const RCP >& comm, const RCP::magnitudeType, LO, GO, NO> >& coords, const RCP& map) const; + RCP Construct1DMap(const RCP >& comm, const ArrayRCP::magnitudeType>& x) const; - void BuildGraph(Level& currentLevel, const RCP& A) const; + void BuildGraph(Level& currentLevel, const RCP& A) const; + bool isDirichlet(LocalOrdinal LID) const; + bool isRoot(LocalOrdinal LID) const; + GlobalOrdinal getRoot(LocalOrdinal LID) const; + GlobalOrdinal getAggGID(LocalOrdinal LID) const; - bool isDirichlet(LocalOrdinal LID) const; - bool isRoot (LocalOrdinal LID) const; - GlobalOrdinal getRoot (LocalOrdinal LID) const; - GlobalOrdinal getAggGID(LocalOrdinal LID) const; + void getIJK(LocalOrdinal LID, int& i, int& j, int& k) const; + void getAggIJK(LocalOrdinal LID, int& i, int& j, int& k) const; - void getIJK(LocalOrdinal LID, int &i, int &j, int &k) const; - void getAggIJK(LocalOrdinal LID, int &i, int &j, int &k) const; + mutable int nDim_; + mutable RCP xMap_, yMap_, zMap_; + mutable ArrayRCP::magnitudeType> x_, y_, z_; + mutable int nx_, ny_, nz_; + mutable int bx_, by_, bz_; + mutable bool dirichletX_, dirichletY_, dirichletZ_; + mutable int naggx_, naggy_, naggz_; - mutable - int nDim_; - mutable - RCP xMap_, yMap_, zMap_; - mutable - ArrayRCP::magnitudeType> x_, y_, z_; - mutable - int nx_, ny_, nz_; - mutable - int bx_, by_, bz_; - mutable - bool dirichletX_,dirichletY_,dirichletZ_; - mutable - int naggx_, naggy_, naggz_; + mutable std::map revMap_; +}; // class BrickAggregationFactory - mutable - std::map revMap_; - }; // class BrickAggregationFactory - -} +} // namespace MueLu #define MUELU_BRICKAGGREGATIONFACTORY_SHORT #endif /* MUELU_BRICKAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp index aa6863becd89..f1c61b72e5e6 100644 --- a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp @@ -70,518 +70,506 @@ #include "MueLu_Graph.hpp" #include "MueLu_LWGraph.hpp" - namespace MueLu { - template - RCP BrickAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP BrickAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: brick x size"); - SET_VALID_ENTRY("aggregation: brick y size"); - SET_VALID_ENTRY("aggregation: brick z size"); - SET_VALID_ENTRY("aggregation: brick x Dirichlet"); - SET_VALID_ENTRY("aggregation: brick y Dirichlet"); - SET_VALID_ENTRY("aggregation: brick z Dirichlet"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory for matrix"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for coordinates"); - return validParamList; + SET_VALID_ENTRY("aggregation: brick x size"); + SET_VALID_ENTRY("aggregation: brick y size"); + SET_VALID_ENTRY("aggregation: brick z size"); + SET_VALID_ENTRY("aggregation: brick x Dirichlet"); + SET_VALID_ENTRY("aggregation: brick y Dirichlet"); + SET_VALID_ENTRY("aggregation: brick z Dirichlet"); +#undef SET_VALID_ENTRY + + validParamList->set >("A", Teuchos::null, "Generating factory for matrix"); + validParamList->set >("Coordinates", Teuchos::null, "Generating factory for coordinates"); + return validParamList; +} + +template +void BrickAggregationFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "Coordinates"); +} + +// The current implementation cannot deal with bricks larger than 3x3(x3) in +// parallel. The reason is that aggregation infrastructure in place has +// major drawbacks. +// +// Aggregates class is constructed with a help of a provided map, either +// taken from a graph, or provided directly. This map is usually taken to be +// a column map of a matrix. The reason for that is that if we have an +// overlapped aggregation, we want the processor owning aggregates to store +// agg id for all nodes in this aggregate. If we used row map, there would +// be no way for the processor to know whether there are some other nodes on +// a different processor which belong to its aggregate. On the other hand, +// using column map allows both vertex2AggId and procWinner arrays in +// Aggregates class to store some extra data, such as whether nodes belonging +// to a different processor belong to this processor aggregate. +// +// The drawback of this is that it stores only overlap=1 data. For aggressive +// coarsening, such a brick aggregation with a large single dimension of +// brick, it could happen that we need to know depth two or more extra nodes +// in the other processor subdomain. +// +// Another issue is that we may have some implicit connection between +// aggregate map and maps of A used in the construction of a tentative +// prolongator. +// +// Another issue is that it seems that some info is unused or not required. +// Specifically, it seems that if a node belongs to an aggregate on a +// different processor, we don't actually need to set vertex2AggId and +// procWinner, despite the following comment in +// Aggregates decl: +// vertex2AggId[k] gives a local id +// corresponding to the aggregate to which +// local id k has been assigned. While k +// is the local id on my processor (MyPID) +// vertex2AggId[k] is the local id on the +// processor which actually owns the +// aggregate. This owning processor has id +// given by procWinner[k]. +// It is possible that that info is only used during arbitration in +// CoupledAggregationFactory. +// +// The steps that we need to do to resolve this issue: +// - Break the link between maps in TentativePFactory, allowing any maps in Aggregates +// - Allow Aggregates to construct their own maps, if necessary, OR +// - construct aggregates based on row map +template +void BrickAggregationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + typedef Xpetra::MultiVector::magnitudeType, LO, GO, NO> MultiVector_d; + + const ParameterList& pL = GetParameterList(); + RCP coords = Get >(currentLevel, "Coordinates"); + RCP A = Get >(currentLevel, "A"); + RCP rowMap = A->getRowMap(); + RCP colMap = A->getColMap(); + GO GO_INVALID = Teuchos::OrdinalTraits::invalid(); + + RCP > comm = rowMap->getComm(); + int numProcs = comm->getSize(); + int myRank = comm->getRank(); + + int numPoints = colMap->getLocalNumElements(); + + bx_ = pL.get("aggregation: brick x size"); + by_ = pL.get("aggregation: brick y size"); + bz_ = pL.get("aggregation: brick z size"); + + dirichletX_ = pL.get("aggregation: brick x Dirichlet"); + dirichletY_ = pL.get("aggregation: brick y Dirichlet"); + dirichletZ_ = pL.get("aggregation: brick z Dirichlet"); + if (dirichletX_) GetOStream(Runtime0) << "Dirichlet boundaries in the x direction" << std::endl; + if (dirichletY_) GetOStream(Runtime0) << "Dirichlet boundaries in the y direction" << std::endl; + if (dirichletZ_) GetOStream(Runtime0) << "Dirichlet boundaries in the z direction" << std::endl; + + if (numProcs > 1) { + // TODO: deal with block size > 1 (see comments above) + //TEUCHOS_TEST_FOR_EXCEPTION(bx_ > 3 || by_ > 3 || bz_ > 3, Exceptions::RuntimeError, "Currently cannot deal with brick size > 3"); } - template - void BrickAggregationFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "Coordinates"); + RCP overlappedCoords = coords; + RCP importer = ImportFactory::Build(coords->getMap(), colMap); + if (!importer.is_null()) { + overlappedCoords = Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO>::Build(colMap, coords->getNumVectors()); + overlappedCoords->doImport(*coords, *importer, Xpetra::INSERT); } - // The current implementation cannot deal with bricks larger than 3x3(x3) in - // parallel. The reason is that aggregation infrastructure in place has - // major drawbacks. - // - // Aggregates class is constructed with a help of a provided map, either - // taken from a graph, or provided directly. This map is usually taken to be - // a column map of a matrix. The reason for that is that if we have an - // overlapped aggregation, we want the processor owning aggregates to store - // agg id for all nodes in this aggregate. If we used row map, there would - // be no way for the processor to know whether there are some other nodes on - // a different processor which belong to its aggregate. On the other hand, - // using column map allows both vertex2AggId and procWinner arrays in - // Aggregates class to store some extra data, such as whether nodes belonging - // to a different processor belong to this processor aggregate. - // - // The drawback of this is that it stores only overlap=1 data. For aggressive - // coarsening, such a brick aggregation with a large single dimension of - // brick, it could happen that we need to know depth two or more extra nodes - // in the other processor subdomain. - // - // Another issue is that we may have some implicit connection between - // aggregate map and maps of A used in the construction of a tentative - // prolongator. - // - // Another issue is that it seems that some info is unused or not required. - // Specifically, it seems that if a node belongs to an aggregate on a - // different processor, we don't actually need to set vertex2AggId and - // procWinner, despite the following comment in - // Aggregates decl: - // vertex2AggId[k] gives a local id - // corresponding to the aggregate to which - // local id k has been assigned. While k - // is the local id on my processor (MyPID) - // vertex2AggId[k] is the local id on the - // processor which actually owns the - // aggregate. This owning processor has id - // given by procWinner[k]. - // It is possible that that info is only used during arbitration in - // CoupledAggregationFactory. - // - // The steps that we need to do to resolve this issue: - // - Break the link between maps in TentativePFactory, allowing any maps in Aggregates - // - Allow Aggregates to construct their own maps, if necessary, OR - // - construct aggregates based on row map - template - void BrickAggregationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - typedef Xpetra::MultiVector::magnitudeType,LO,GO,NO> MultiVector_d; - - const ParameterList& pL = GetParameterList(); - RCP coords = Get >(currentLevel, "Coordinates"); - RCP A = Get< RCP > (currentLevel, "A"); - RCP rowMap = A->getRowMap(); - RCP colMap = A->getColMap(); - GO GO_INVALID = Teuchos::OrdinalTraits::invalid(); - - RCP > comm = rowMap->getComm(); - int numProcs = comm->getSize(); - int myRank = comm->getRank(); - - int numPoints = colMap->getLocalNumElements(); - - bx_ = pL.get("aggregation: brick x size"); - by_ = pL.get("aggregation: brick y size"); - bz_ = pL.get("aggregation: brick z size"); - - dirichletX_ = pL.get("aggregation: brick x Dirichlet"); - dirichletY_ = pL.get("aggregation: brick y Dirichlet"); - dirichletZ_ = pL.get("aggregation: brick z Dirichlet"); - if(dirichletX_) GetOStream(Runtime0) << "Dirichlet boundaries in the x direction"< 1) { - // TODO: deal with block size > 1 (see comments above) - //TEUCHOS_TEST_FOR_EXCEPTION(bx_ > 3 || by_ > 3 || bz_ > 3, Exceptions::RuntimeError, "Currently cannot deal with brick size > 3"); - } - - RCP overlappedCoords = coords; - RCP importer = ImportFactory::Build(coords->getMap(), colMap); - if (!importer.is_null()) { - overlappedCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(colMap, coords->getNumVectors()); - overlappedCoords->doImport(*coords, *importer, Xpetra::INSERT); - } - - // Setup misc structures - // Logically, we construct enough data to query topological information of a rectangular grid - Setup(comm, overlappedCoords, colMap); - - GetOStream(Runtime0) << "Using brick size: " << bx_ - << (nDim_ > 1 ? "x " + toString(by_) : "") - << (nDim_ > 2 ? "x " + toString(bz_) : "") << std::endl; - - // Build the graph - BuildGraph(currentLevel,A); - - // Construct aggregates - RCP aggregates = rcp(new Aggregates(colMap)); - aggregates->setObjectLabel("Brick"); - - ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); - - // In the first pass, we set a mapping from a vertex to aggregate global id. We deal with a structured - // rectangular mesh, therefore we know the structure of aggregates. For each vertex we can tell exactly - // which aggregate it belongs to. - // If we determine that the aggregate does not belong to us (i.e. the root vertex does not belong to this - // processor, or is outside and we lost "" arbitration), we record the global aggregate id in order to - // fetch the local info from the processor owning the aggregate. This is required for aggregates, as it - // uses the local aggregate ids of the owning processor. - std::set myAggGIDs, remoteAggGIDs; - for (LO LID = 0; LID < numPoints; LID++) { - GO aggGID = getAggGID(LID); - // printf("[%d] (%d,%d,%d) => agg %d\n",LID,(int)(*xMap_)[x_[LID]],nDim_ > 1 ? (int)(*yMap_)[y_[LID]] : -1,nDim_ > 2 ? (int)(*zMap_)[z_[LID]] : -1,(int)aggGID); - if(aggGID == GO_INVALID) continue; - // printf("[%d] getRoot = %d\n",(int)LID,(int)getRoot(LID)); - - if ((revMap_.find(getRoot(LID)) != revMap_.end()) && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) { - // Root of the brick aggregate containing GID (<- LID) belongs to us - vertex2AggId[LID] = aggGID; - myAggGIDs.insert(aggGID); - - if (isRoot(LID)) - aggregates->SetIsRoot(LID); - // printf("[%d] initial vertex2AggId = %d\n",(int)LID,(int)vertex2AggId[LID]); - } else { - remoteAggGIDs.insert(aggGID); - } - } - size_t numAggregates = myAggGIDs .size(); - size_t numRemote = remoteAggGIDs.size(); - aggregates->SetNumAggregates(numAggregates); - - std::map AggG2L; // Map: Agg GID -> Agg LID (possibly on a different processor) - std::map AggG2R; // Map: Agg GID -> processor rank owning aggregate - - Array myAggGIDsArray(numAggregates), remoteAggGIDsArray(numRemote); - - // Fill in the maps for aggregates that we own - size_t ind = 0; - for (typename std::set::const_iterator it = myAggGIDs.begin(); it != myAggGIDs.end(); it++) { - AggG2L[*it] = ind; - AggG2R[*it] = myRank; - - myAggGIDsArray[ind++] = *it; + // Setup misc structures + // Logically, we construct enough data to query topological information of a rectangular grid + Setup(comm, overlappedCoords, colMap); + + GetOStream(Runtime0) << "Using brick size: " << bx_ + << (nDim_ > 1 ? "x " + toString(by_) : "") + << (nDim_ > 2 ? "x " + toString(bz_) : "") << std::endl; + + // Build the graph + BuildGraph(currentLevel, A); + + // Construct aggregates + RCP aggregates = rcp(new Aggregates(colMap)); + aggregates->setObjectLabel("Brick"); + + ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates->GetProcWinner()->getDataNonConst(0); + + // In the first pass, we set a mapping from a vertex to aggregate global id. We deal with a structured + // rectangular mesh, therefore we know the structure of aggregates. For each vertex we can tell exactly + // which aggregate it belongs to. + // If we determine that the aggregate does not belong to us (i.e. the root vertex does not belong to this + // processor, or is outside and we lost "" arbitration), we record the global aggregate id in order to + // fetch the local info from the processor owning the aggregate. This is required for aggregates, as it + // uses the local aggregate ids of the owning processor. + std::set myAggGIDs, remoteAggGIDs; + for (LO LID = 0; LID < numPoints; LID++) { + GO aggGID = getAggGID(LID); + // printf("[%d] (%d,%d,%d) => agg %d\n",LID,(int)(*xMap_)[x_[LID]],nDim_ > 1 ? (int)(*yMap_)[y_[LID]] : -1,nDim_ > 2 ? (int)(*zMap_)[z_[LID]] : -1,(int)aggGID); + if (aggGID == GO_INVALID) continue; + // printf("[%d] getRoot = %d\n",(int)LID,(int)getRoot(LID)); + + if ((revMap_.find(getRoot(LID)) != revMap_.end()) && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) { + // Root of the brick aggregate containing GID (<- LID) belongs to us + vertex2AggId[LID] = aggGID; + myAggGIDs.insert(aggGID); + + if (isRoot(LID)) + aggregates->SetIsRoot(LID); + // printf("[%d] initial vertex2AggId = %d\n",(int)LID,(int)vertex2AggId[LID]); + } else { + remoteAggGIDs.insert(aggGID); } - - // The map is a convenient way to fetch remote local indices from global indices. - RCP aggMap = MapFactory::Build(rowMap->lib(), Teuchos::OrdinalTraits::invalid(), - myAggGIDsArray, 0, comm); - - ind = 0; - for (typename std::set::const_iterator it = remoteAggGIDs.begin(); it != remoteAggGIDs.end(); it++) - remoteAggGIDsArray[ind++] = *it; - - // Fetch the required aggregate local ids and ranks - Array remoteProcIDs(numRemote); - Array remoteLIDs (numRemote); - aggMap->getRemoteIndexList(remoteAggGIDsArray, remoteProcIDs, remoteLIDs); - - // Fill in the maps for aggregates that we don't own but which have some of our vertices - for (size_t i = 0; i < numRemote; i++) { - AggG2L[remoteAggGIDsArray[i]] = remoteLIDs [i]; - AggG2R[remoteAggGIDsArray[i]] = remoteProcIDs[i]; - } - - // Remap aggregate GIDs to LIDs and set up owning processors - for (LO LID = 0; LID < numPoints; LID++) { - if (revMap_.find(getRoot(LID)) != revMap_.end() && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) { - GO aggGID = vertex2AggId[LID]; - if(aggGID != MUELU_UNAGGREGATED) { - vertex2AggId[LID] = AggG2L[aggGID]; - procWinner [LID] = AggG2R[aggGID]; - } - } - } - - - GO numGlobalRemote; - MueLu_sumAll(comm, as(numRemote), numGlobalRemote); - aggregates->AggregatesCrossProcessors(numGlobalRemote); - - Set(currentLevel, "Aggregates", aggregates); - - GetOStream(Statistics1) << aggregates->description() << std::endl; } + size_t numAggregates = myAggGIDs.size(); + size_t numRemote = remoteAggGIDs.size(); + aggregates->SetNumAggregates(numAggregates); - template - void BrickAggregationFactory:: - Setup(const RCP >& comm, const RCP::magnitudeType,LO,GO,NO> >& coords, const RCP& /* map */) const { - nDim_ = coords->getNumVectors(); - - x_ = coords->getData(0); - xMap_ = Construct1DMap(comm, x_); - nx_ = xMap_->size(); - - ny_ = 1; - if (nDim_ > 1) { - y_ = coords->getData(1); - yMap_ = Construct1DMap(comm, y_); - ny_ = yMap_->size(); - } + std::map AggG2L; // Map: Agg GID -> Agg LID (possibly on a different processor) + std::map AggG2R; // Map: Agg GID -> processor rank owning aggregate - nz_ = 1; - if (nDim_ > 2) { - z_ = coords->getData(2); - zMap_ = Construct1DMap(comm, z_); - nz_ = zMap_->size(); - } + Array myAggGIDsArray(numAggregates), remoteAggGIDsArray(numRemote); - for (size_t ind = 0; ind < coords->getLocalLength(); ind++) { - GO i = (*xMap_)[(coords->getData(0))[ind]], j = 0, k = 0; - if (nDim_ > 1) - j = (*yMap_)[(coords->getData(1))[ind]]; - if (nDim_ > 2) - k = (*zMap_)[(coords->getData(2))[ind]]; + // Fill in the maps for aggregates that we own + size_t ind = 0; + for (typename std::set::const_iterator it = myAggGIDs.begin(); it != myAggGIDs.end(); it++) { + AggG2L[*it] = ind; + AggG2R[*it] = myRank; - revMap_[k*ny_*nx_ + j*nx_ + i] = ind; - } + myAggGIDsArray[ind++] = *it; + } - - // Get the number of aggregates in each direction, correcting for Dirichlet - int xboost = dirichletX_ ? 1 : 0; - int yboost = dirichletY_ ? 1 : 0; - int zboost = dirichletZ_ ? 1 : 0; - naggx_ = (nx_-2*xboost)/bx_ + ((nx_-2*xboost) % bx_ ? 1 : 0); + // The map is a convenient way to fetch remote local indices from global indices. + RCP aggMap = MapFactory::Build(rowMap->lib(), Teuchos::OrdinalTraits::invalid(), + myAggGIDsArray, 0, comm); - if(nDim_ > 1) - naggy_ = (ny_-2*yboost)/by_ + ( (ny_-2*yboost) % by_ ? 1 : 0); - else - naggy_ = 1; + ind = 0; + for (typename std::set::const_iterator it = remoteAggGIDs.begin(); it != remoteAggGIDs.end(); it++) + remoteAggGIDsArray[ind++] = *it; - if(nDim_ > 2) - naggz_ = (nz_-2*zboost)/bz_ + ( (nz_-2*zboost) % bz_ ? 1 : 0); - else - naggz_ = 1; + // Fetch the required aggregate local ids and ranks + Array remoteProcIDs(numRemote); + Array remoteLIDs(numRemote); + aggMap->getRemoteIndexList(remoteAggGIDsArray, remoteProcIDs, remoteLIDs); + // Fill in the maps for aggregates that we don't own but which have some of our vertices + for (size_t i = 0; i < numRemote; i++) { + AggG2L[remoteAggGIDsArray[i]] = remoteLIDs[i]; + AggG2R[remoteAggGIDsArray[i]] = remoteProcIDs[i]; } - template - RCP::container> - BrickAggregationFactory:: - Construct1DMap (const RCP >& comm, - const ArrayRCP::magnitudeType>& x) const - { - int n = x.size(); - - // Step 1: Create a local vector with unique coordinate points - RCP gMap = rcp(new container); - for (int i = 0; i < n; i++) - (*gMap)[x[i]] = 0; - -#ifdef HAVE_MPI - // Step 2: exchange coordinates - // NOTE: we assume the coordinates are double, or double compatible - // That means that for complex case, we assume that all imaginary parts are zeros - int numProcs = comm->getSize(); - if (numProcs > 1) { - RCP > dupMpiComm = rcp_dynamic_cast >(comm->duplicate()); - - MPI_Comm rawComm = (*dupMpiComm->getRawMpiComm())(); - - int sendCnt = gMap->size(), cnt = 0, recvSize; - Array recvCnt(numProcs), Displs(numProcs); - Array sendBuf, recvBuf; - - sendBuf.resize(sendCnt); - for (typename container::const_iterator cit = gMap->begin(); cit != gMap->end(); cit++) - sendBuf[cnt++] = Teuchos::as(STS::real(cit->first)); - - MPI_Allgather(&sendCnt, 1, MPI_INT, recvCnt.getRawPtr(), 1, MPI_INT, rawComm); - Displs[0] = 0; - for (int i = 0; i < numProcs-1; i++) - Displs[i+1] = Displs[i] + recvCnt[i]; - recvSize = Displs[numProcs-1] + recvCnt[numProcs-1]; - recvBuf.resize(recvSize); - MPI_Allgatherv(sendBuf.getRawPtr(), sendCnt, MPI_DOUBLE, recvBuf.getRawPtr(), recvCnt.getRawPtr(), Displs.getRawPtr(), MPI_DOUBLE, rawComm); - - for (int i = 0; i < recvSize; i++) - (*gMap)[as(recvBuf[i])] = 0; + // Remap aggregate GIDs to LIDs and set up owning processors + for (LO LID = 0; LID < numPoints; LID++) { + if (revMap_.find(getRoot(LID)) != revMap_.end() && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) { + GO aggGID = vertex2AggId[LID]; + if (aggGID != MUELU_UNAGGREGATED) { + vertex2AggId[LID] = AggG2L[aggGID]; + procWinner[LID] = AggG2R[aggGID]; + } } -#endif - - GO cnt = 0; - for (typename container::iterator it = gMap->begin(); it != gMap->end(); it++) - it->second = cnt++; - - return gMap; } - template - bool BrickAggregationFactory::isRoot(LocalOrdinal LID) const { - int i,j,k; - getIJK(LID,i,j,k); - - return (k*ny_*nx_ + j*nx_ + i) == getRoot(LID); - } + GO numGlobalRemote; + MueLu_sumAll(comm, as(numRemote), numGlobalRemote); + aggregates->AggregatesCrossProcessors(numGlobalRemote); - template - bool BrickAggregationFactory::isDirichlet(LocalOrdinal LID) const { - bool boundary = false; - int i,j,k; - getIJK(LID,i,j,k); - if( dirichletX_ && (i == 0 || i == nx_-1) ) - boundary = true; - if(nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_-1) ) - boundary = true; - if(nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_-1) ) - boundary = true; - - return boundary; - } + Set(currentLevel, "Aggregates", aggregates); + GetOStream(Statistics1) << aggregates->description() << std::endl; +} - template - GlobalOrdinal BrickAggregationFactory::getRoot(LocalOrdinal LID) const { - if(isDirichlet(LID)) - return Teuchos::OrdinalTraits::invalid(); +template +void BrickAggregationFactory:: + Setup(const RCP >& comm, const RCP::magnitudeType, LO, GO, NO> >& coords, const RCP& /* map */) const { + nDim_ = coords->getNumVectors(); - int aggI,aggJ,aggK; - getAggIJK(LID,aggI,aggJ,aggK); - int xboost = dirichletX_ ? 1 : 0; - int yboost = dirichletY_ ? 1 : 0; - int zboost = dirichletZ_ ? 1 : 0; - - int i = xboost + aggI*bx_ + (bx_-1)/2; - int j = (nDim_>1) ? yboost + aggJ*by_ + (by_-1)/2 : 0; - int k = (nDim_>2) ? zboost + aggK*bz_ + (bz_-1)/2 : 0; + x_ = coords->getData(0); + xMap_ = Construct1DMap(comm, x_); + nx_ = xMap_->size(); - return k*ny_*nx_ + j*nx_ + i; + ny_ = 1; + if (nDim_ > 1) { + y_ = coords->getData(1); + yMap_ = Construct1DMap(comm, y_); + ny_ = yMap_->size(); } - template - void BrickAggregationFactory::getIJK(LocalOrdinal LID, int &i, int &j, int &k) const { - i = (*xMap_)[x_[LID]]; - j = (nDim_>1) ? (*yMap_)[y_[LID]] : 0; - k = (nDim_>2) ? (*zMap_)[z_[LID]] : 0; + nz_ = 1; + if (nDim_ > 2) { + z_ = coords->getData(2); + zMap_ = Construct1DMap(comm, z_); + nz_ = zMap_->size(); } + for (size_t ind = 0; ind < coords->getLocalLength(); ind++) { + GO i = (*xMap_)[(coords->getData(0))[ind]], j = 0, k = 0; + if (nDim_ > 1) + j = (*yMap_)[(coords->getData(1))[ind]]; + if (nDim_ > 2) + k = (*zMap_)[(coords->getData(2))[ind]]; - template - void BrickAggregationFactory::getAggIJK(LocalOrdinal LID, int &i, int &j, int &k) const { - int xboost = dirichletX_ ? 1 : 0; - int yboost = dirichletY_ ? 1 : 0; - int zboost = dirichletZ_ ? 1 : 0; - int pointI, pointJ, pointK; - getIJK(LID,pointI,pointJ,pointK); - i = (pointI-xboost)/bx_; - - if (nDim_ > 1) j = (pointJ-yboost)/by_; - else j = 0; - - if (nDim_ > 2) k = (pointK-zboost)/bz_; - else k = 0; + revMap_[k * ny_ * nx_ + j * nx_ + i] = ind; } - template - GlobalOrdinal BrickAggregationFactory::getAggGID(LocalOrdinal LID) const { - bool boundary = false; + // Get the number of aggregates in each direction, correcting for Dirichlet + int xboost = dirichletX_ ? 1 : 0; + int yboost = dirichletY_ ? 1 : 0; + int zboost = dirichletZ_ ? 1 : 0; + naggx_ = (nx_ - 2 * xboost) / bx_ + ((nx_ - 2 * xboost) % bx_ ? 1 : 0); + + if (nDim_ > 1) + naggy_ = (ny_ - 2 * yboost) / by_ + ((ny_ - 2 * yboost) % by_ ? 1 : 0); + else + naggy_ = 1; + + if (nDim_ > 2) + naggz_ = (nz_ - 2 * zboost) / bz_ + ((nz_ - 2 * zboost) % bz_ ? 1 : 0); + else + naggz_ = 1; +} + +template +RCP::container> +BrickAggregationFactory:: + Construct1DMap(const RCP >& comm, + const ArrayRCP::magnitudeType>& x) const { + int n = x.size(); + + // Step 1: Create a local vector with unique coordinate points + RCP gMap = rcp(new container); + for (int i = 0; i < n; i++) + (*gMap)[x[i]] = 0; - int i, j, k; - getIJK(LID,i,j,k); - int ii , jj, kk; - getAggIJK(LID,ii,jj,kk); - - if( dirichletX_ && (i == 0 || i == nx_ - 1)) boundary = true; - if (nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_ - 1)) boundary = true; - if (nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_ - 1)) boundary = true; +#ifdef HAVE_MPI + // Step 2: exchange coordinates + // NOTE: we assume the coordinates are double, or double compatible + // That means that for complex case, we assume that all imaginary parts are zeros + int numProcs = comm->getSize(); + if (numProcs > 1) { + RCP > dupMpiComm = rcp_dynamic_cast >(comm->duplicate()); + + MPI_Comm rawComm = (*dupMpiComm->getRawMpiComm())(); + + int sendCnt = gMap->size(), cnt = 0, recvSize; + Array recvCnt(numProcs), Displs(numProcs); + Array sendBuf, recvBuf; + + sendBuf.resize(sendCnt); + for (typename container::const_iterator cit = gMap->begin(); cit != gMap->end(); cit++) + sendBuf[cnt++] = Teuchos::as(STS::real(cit->first)); + + MPI_Allgather(&sendCnt, 1, MPI_INT, recvCnt.getRawPtr(), 1, MPI_INT, rawComm); + Displs[0] = 0; + for (int i = 0; i < numProcs - 1; i++) + Displs[i + 1] = Displs[i] + recvCnt[i]; + recvSize = Displs[numProcs - 1] + recvCnt[numProcs - 1]; + recvBuf.resize(recvSize); + MPI_Allgatherv(sendBuf.getRawPtr(), sendCnt, MPI_DOUBLE, recvBuf.getRawPtr(), recvCnt.getRawPtr(), Displs.getRawPtr(), MPI_DOUBLE, rawComm); + + for (int i = 0; i < recvSize; i++) + (*gMap)[as(recvBuf[i])] = 0; + } +#endif - /* + GO cnt = 0; + for (typename container::iterator it = gMap->begin(); it != gMap->end(); it++) + it->second = cnt++; + + return gMap; +} + +template +bool BrickAggregationFactory::isRoot(LocalOrdinal LID) const { + int i, j, k; + getIJK(LID, i, j, k); + + return (k * ny_ * nx_ + j * nx_ + i) == getRoot(LID); +} + +template +bool BrickAggregationFactory::isDirichlet(LocalOrdinal LID) const { + bool boundary = false; + int i, j, k; + getIJK(LID, i, j, k); + if (dirichletX_ && (i == 0 || i == nx_ - 1)) + boundary = true; + if (nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_ - 1)) + boundary = true; + if (nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_ - 1)) + boundary = true; + + return boundary; +} + +template +GlobalOrdinal BrickAggregationFactory::getRoot(LocalOrdinal LID) const { + if (isDirichlet(LID)) + return Teuchos::OrdinalTraits::invalid(); + + int aggI, aggJ, aggK; + getAggIJK(LID, aggI, aggJ, aggK); + int xboost = dirichletX_ ? 1 : 0; + int yboost = dirichletY_ ? 1 : 0; + int zboost = dirichletZ_ ? 1 : 0; + + int i = xboost + aggI * bx_ + (bx_ - 1) / 2; + int j = (nDim_ > 1) ? yboost + aggJ * by_ + (by_ - 1) / 2 : 0; + int k = (nDim_ > 2) ? zboost + aggK * bz_ + (bz_ - 1) / 2 : 0; + + return k * ny_ * nx_ + j * nx_ + i; +} + +template +void BrickAggregationFactory::getIJK(LocalOrdinal LID, int& i, int& j, int& k) const { + i = (*xMap_)[x_[LID]]; + j = (nDim_ > 1) ? (*yMap_)[y_[LID]] : 0; + k = (nDim_ > 2) ? (*zMap_)[z_[LID]] : 0; +} + +template +void BrickAggregationFactory::getAggIJK(LocalOrdinal LID, int& i, int& j, int& k) const { + int xboost = dirichletX_ ? 1 : 0; + int yboost = dirichletY_ ? 1 : 0; + int zboost = dirichletZ_ ? 1 : 0; + int pointI, pointJ, pointK; + getIJK(LID, pointI, pointJ, pointK); + i = (pointI - xboost) / bx_; + + if (nDim_ > 1) + j = (pointJ - yboost) / by_; + else + j = 0; + + if (nDim_ > 2) + k = (pointK - zboost) / bz_; + else + k = 0; +} + +template +GlobalOrdinal BrickAggregationFactory::getAggGID(LocalOrdinal LID) const { + bool boundary = false; + + int i, j, k; + getIJK(LID, i, j, k); + int ii, jj, kk; + getAggIJK(LID, ii, jj, kk); + + if (dirichletX_ && (i == 0 || i == nx_ - 1)) boundary = true; + if (nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_ - 1)) boundary = true; + if (nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_ - 1)) boundary = true; + + /* if(boundary) printf("[%d] coord = (%d,%d,%d) {%d,%d,%d} agg = (%d,%d,%d) {%d,%d,%d} => agg %s\n",LID,i,j,k,nx_,ny_,nz_,ii,jj,kk,naggx_,naggy_,naggz_,"BOUNDARY"); else printf("[%d] coord = (%d,%d,%d) {%d,%d,%d} agg = (%d,%d,%d) {%d,%d,%d} => agg %d\n",LID,i,j,k,nx_,ny_,nz_,ii,jj,kk,naggx_,naggy_,naggz_,kk*naggy_*naggx_ + jj*naggx_ + ii); */ - if (boundary) - return Teuchos::OrdinalTraits::invalid(); - else - return Teuchos::as(kk*naggy_*naggx_) + Teuchos::as(jj*naggx_) + ii; - - } - - - template - void BrickAggregationFactory::BuildGraph(Level& currentLevel, const RCP& A) const { - // TODO: Currently only works w/ 1 DOF per node - double dirichletThreshold = 0.0; - - if(bx_ > 1 && (nDim_ <= 1 || by_ > 1) && (nDim_ <=2 || bz_>1) ) { - FactoryMonitor m(*this, "Generating Graph (trivial)", currentLevel); - /*** Case 1: Use the matrix is the graph ***/ - // Bricks are of non-trivial size in all active dimensions - RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - graph->SetBoundaryNodeMap(boundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } - Set(currentLevel, "DofsPerNode", 1); - Set(currentLevel, "Graph", graph); - Set(currentLevel, "Filtering",false); + if (boundary) + return Teuchos::OrdinalTraits::invalid(); + else + return Teuchos::as(kk * naggy_ * naggx_) + Teuchos::as(jj * naggx_) + ii; +} + +template +void BrickAggregationFactory::BuildGraph(Level& currentLevel, const RCP& A) const { + // TODO: Currently only works w/ 1 DOF per node + double dirichletThreshold = 0.0; + + if (bx_ > 1 && (nDim_ <= 1 || by_ > 1) && (nDim_ <= 2 || bz_ > 1)) { + FactoryMonitor m(*this, "Generating Graph (trivial)", currentLevel); + /*** Case 1: Use the matrix is the graph ***/ + // Bricks are of non-trivial size in all active dimensions + RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + graph->SetBoundaryNodeMap(boundaryNodes); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP > comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; } - else { - FactoryMonitor m(*this, "Generating Graph", currentLevel); - /*** Case 2: Dropping required ***/ - // There is at least one active dimension in which we are not coarsening. - // Those connections need to be dropped - bool drop_x = (bx_ == 1); - bool drop_y = (nDim_> 1 && by_ == 1); - bool drop_z = (nDim_> 2 && bz_ == 1); - - ArrayRCP rows (A->getLocalNumRows()+1); - ArrayRCP columns(A->getLocalNumEntries()); - - size_t N = A->getRowMap()->getLocalNumElements(); - - // FIXME: Do this on the host because indexing functions are host functions - auto G = A->getLocalMatrixHost().graph; - auto rowptr = G.row_map; - auto colind = G.entries; - - int ct=0; - rows[0] = 0; - for(size_t row=0; rowgetColMap()->getLocalElement(A->getRowMap()->getGlobalElement(row)); - getIJK(row2,ir,jr,kr); - - for(size_t cidx=rowptr[row]; cidx 1 && by_ == 1); + bool drop_z = (nDim_ > 2 && bz_ == 1); + + ArrayRCP rows(A->getLocalNumRows() + 1); + ArrayRCP columns(A->getLocalNumEntries()); + + size_t N = A->getRowMap()->getLocalNumElements(); + + // FIXME: Do this on the host because indexing functions are host functions + auto G = A->getLocalMatrixHost().graph; + auto rowptr = G.row_map; + auto colind = G.entries; + + int ct = 0; + rows[0] = 0; + for (size_t row = 0; row < N; row++) { + // NOTE: Assumes that the first part of the colmap is the rowmap + int ir, jr, kr; + LO row2 = A->getColMap()->getLocalElement(A->getRowMap()->getGlobalElement(row)); + getIJK(row2, ir, jr, kr); + + for (size_t cidx = rowptr[row]; cidx < rowptr[row + 1]; cidx++) { + int ic, jc, kc; + LO col = colind[cidx]; + getIJK(col, ic, jc, kc); + + if ((row2 != col) && ((drop_x && ir != ic) || (drop_y && jr != jc) || (drop_z && kr != kc))) { + // Drop it + // printf("[%4d] DROP row = (%d,%d,%d) col = (%d,%d,%d)\n",(int)row,ir,jr,kr,ic,jc,kc); + } else { + // Keep it + // printf("[%4d] KEEP row = (%d,%d,%d) col = (%d,%d,%d)\n",(int)row,ir,jr,kr,ic,jc,kc); + columns[ct] = col; + ct++; } - rows[row+1] = ct; - }//end for - - RCP graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A")); - - - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - graph->SetBoundaryNodeMap(boundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; } - Set(currentLevel, "DofsPerNode", 1); - Set(currentLevel, "Graph", graph); - Set(currentLevel, "Filtering",true); - }//end else - - - }//end BuildGraph - - + rows[row + 1] = ct; + } //end for + + RCP graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A")); + + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + graph->SetBoundaryNodeMap(boundaryNodes); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP > comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } + Set(currentLevel, "DofsPerNode", 1); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "Filtering", true); + } //end else +} //end BuildGraph -} //namespace MueLu +} //namespace MueLu #endif /* MUELU_BRICKAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp index 056673b7b9db..ac37114160af 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp @@ -66,20 +66,20 @@ #include "MueLu_IndexManager.hpp" #include "MueLu_IndexManager_kokkos.hpp" -#define MUELU_UNAGGREGATED -1 /* indicates that a node is unassigned to */ - /* any aggregate. */ - -#define MUELU_UNASSIGNED -1 /* indicates a vertex is not yet claimed */ - /* by a processor during aggregation. */ - /* Note, it is possible at */ - /* this stage that some processors may have*/ - /* claimed their copy of a vertex for one */ - /* of their aggregates. However, some */ - /* arbitration still needs to occur. */ - /* The corresponding procWinner[]'s remain */ - /* as MUELU_UNASSIGNED until */ - /* ArbitrateAndCommunicate() is */ - /* invoked to arbitrate. */ +#define MUELU_UNAGGREGATED -1 /* indicates that a node is unassigned to */ + /* any aggregate. */ + +#define MUELU_UNASSIGNED -1 /* indicates a vertex is not yet claimed */ + /* by a processor during aggregation. */ + /* Note, it is possible at */ + /* this stage that some processors may have*/ + /* claimed their copy of a vertex for one */ + /* of their aggregates. However, some */ + /* arbitration still needs to occur. */ + /* The corresponding procWinner[]'s remain */ + /* as MUELU_UNASSIGNED until */ + /* ArbitrateAndCommunicate() is */ + /* invoked to arbitrate. */ /***************************************************************************** @@ -102,173 +102,172 @@ namespace MueLu { where rows (or vertices) correspond to aggregates and colunmns (or edges) correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class Aggregates; - - template - class Aggregates > : public BaseClass { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - using device_type = DeviceType; - using range_type = Kokkos::RangePolicy; - using LO_view = Kokkos::View; - - using aggregates_sizes_type = Kokkos::View; - - private: - // For compatibility - typedef node_type Node; +template +class Aggregates; + +template +class Aggregates > : public BaseClass { + public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + using device_type = DeviceType; + using range_type = Kokkos::RangePolicy; + using LO_view = Kokkos::View; + + using aggregates_sizes_type = Kokkos::View; + + private: + // For compatibility + typedef node_type Node; #undef MUELU_AGGREGATES_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + // Defining types that require the short names included above + using local_graph_type = typename LWGraph_kokkos::local_graph_type; + using colors_view_type = Kokkos::View; - // Defining types that require the short names included above - using local_graph_type = typename LWGraph_kokkos::local_graph_type; - using colors_view_type = Kokkos::View; - - /*! @brief Standard constructor for Aggregates structure + /*! @brief Standard constructor for Aggregates structure * * Standard constructor of aggregates takes a Graph object as parameter. * Uses the graph.GetImportMap() to initialize the internal vector for mapping nodes to (local) aggregate ids as well as * the mapping of node to the owning processor id. * */ - Aggregates(const GraphBase & graph); + Aggregates(const GraphBase& graph); - /*! @brief Standard constructor for Aggregates structure + /*! @brief Standard constructor for Aggregates structure * * Standard constructor of aggregates takes a LWGraph object as parameter. * Uses the graph.GetImportMap() to initialize the internal vector for mapping nodes to (local) aggregate ids as well as * the mapping of node to the owning processor id. * */ - Aggregates(LWGraph_kokkos graph); + Aggregates(LWGraph_kokkos graph); - /*! @brief Constructor for Aggregates structure + /*! @brief Constructor for Aggregates structure * * This constructor takes a RCP pointer to a map which is used for the internal mappings of nodes to the (local) aggregate ids and the owning processor. * */ - Aggregates(const RCP& map); + Aggregates(const RCP& map); - /*! @brief Destructor + /*! @brief Destructor * */ - virtual ~Aggregates() { } + virtual ~Aggregates() {} - //! @name Set/Get Methods for specific aggregation data - //@{ + //! @name Set/Get Methods for specific aggregation data + //@{ - /*! @brief Get the index manager used by structured aggregation algorithms. + /*! @brief Get the index manager used by structured aggregation algorithms. This has to be done by the aggregation factory. */ - RCP& GetIndexManagerKokkos() { return geoDataKokkos_; } + RCP& GetIndexManagerKokkos() { return geoDataKokkos_; } - /*! @brief Set the index manager used by structured aggregation algorithms. + /*! @brief Set the index manager used by structured aggregation algorithms. This has to be done by the aggregation factory. */ - void SetIndexManagerKokkos(RCP & geoDataKokkos) { geoDataKokkos_ = geoDataKokkos; } + void SetIndexManagerKokkos(RCP& geoDataKokkos) { geoDataKokkos_ = geoDataKokkos; } - /*! @brief Get the index manager used by various aggregation algorithms. + /*! @brief Get the index manager used by various aggregation algorithms. This has to be done by the aggregation factory. */ - RCP& GetIndexManager() { return geoData_; } + RCP& GetIndexManager() { return geoData_; } - /*! @brief Set the index manager used by various aggregation algorithms. + /*! @brief Set the index manager used by various aggregation algorithms. This has to be done by the aggregation factory. */ - void SetIndexManager(RCP & geoData) { geoData_ = geoData; } + void SetIndexManager(RCP& geoData) { geoData_ = geoData; } - /*! @brief Get a distance 2 coloring of the underlying graph. + /*! @brief Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of aggregation. */ - colors_view_type& GetGraphColors() { return graphColors_; } + colors_view_type& GetGraphColors() { return graphColors_; } - /*! @brief Set a distance 2 coloring of the underlying graph. + /*! @brief Set a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of aggregation. */ - void SetGraphColors(colors_view_type graphColors) { graphColors_ = graphColors; } + void SetGraphColors(colors_view_type graphColors) { graphColors_ = graphColors; } - /*! @brief Get the number of colors needed by the distance 2 coloring. + /*! @brief Get the number of colors needed by the distance 2 coloring. */ - LO GetGraphNumColors() { return graphNumColors_; } + LO GetGraphNumColors() { return graphNumColors_; } - /*! @brief Set the number of colors needed by the distance 2 coloring. + /*! @brief Set the number of colors needed by the distance 2 coloring. */ - void SetGraphNumColors(const LO graphNumColors) { graphNumColors_ = graphNumColors; } + void SetGraphNumColors(const LO graphNumColors) { graphNumColors_ = graphNumColors; } - //@} + //@} - /*! @brief Set number of local aggregates on current processor. + /*! @brief Set number of local aggregates on current processor. This has to be done by the aggregation routines. */ - void SetNumAggregates(LO nAggregates) { numAggregates_ = nAggregates; } + void SetNumAggregates(LO nAggregates) { numAggregates_ = nAggregates; } - /*! @brief Set number of global aggregates on current processor. + /*! @brief Set number of global aggregates on current processor. This has to be done by the aggregation routines. */ - void SetNumGlobalAggregates(GO nGlobalAggregates) { numGlobalAggregates_ = nGlobalAggregates; } + void SetNumGlobalAggregates(GO nGlobalAggregates) { numGlobalAggregates_ = nGlobalAggregates; } - ///< returns the number of aggregates of the current processor. Note: could/should be renamed to GetNumLocalAggregates? - KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const { - return numAggregates_; - } + ///< returns the number of aggregates of the current processor. Note: could/should be renamed to GetNumLocalAggregates? + KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const { + return numAggregates_; + } - //! @brief Record whether aggregates include DOFs from other processes. - KOKKOS_INLINE_FUNCTION void AggregatesCrossProcessors(const bool& flag) { - aggregatesIncludeGhosts_ = flag; - } + //! @brief Record whether aggregates include DOFs from other processes. + KOKKOS_INLINE_FUNCTION void AggregatesCrossProcessors(const bool& flag) { + aggregatesIncludeGhosts_ = flag; + } - /*! @brief Return false if and only if no aggregates include DOFs from other processes. + /*! @brief Return false if and only if no aggregates include DOFs from other processes. Used in construction of tentative prolongator to skip a communication phase. */ - KOKKOS_INLINE_FUNCTION bool AggregatesCrossProcessors() const { - return aggregatesIncludeGhosts_; - } + KOKKOS_INLINE_FUNCTION bool AggregatesCrossProcessors() const { + return aggregatesIncludeGhosts_; + } - /*! @brief Returns a nonconstant vector that maps local node IDs to local aggregates IDs. + /*! @brief Returns a nonconstant vector that maps local node IDs to local aggregates IDs. For local node ID i, the corresponding vector entry v[i] is the local aggregate id to which i belongs on the current processor. */ - RCP& GetVertex2AggIdNonConst() { return vertex2AggId_; } + RCP& GetVertex2AggIdNonConst() { return vertex2AggId_; } - /*! @brief Returns nonconstant vector that maps local node IDs to owning processor IDs. + /*! @brief Returns nonconstant vector that maps local node IDs to owning processor IDs. For local node ID i, the corresponding vector entry v[i] is the owning processor ID. */ - RCP& GetProcWinnerNonConst() { return procWinner_; } - /*! @brief Returns constant vector that maps local node IDs to local aggregates IDs. + RCP& GetProcWinnerNonConst() { return procWinner_; } + /*! @brief Returns constant vector that maps local node IDs to local aggregates IDs. For local node ID i, the corresponding vector entry v[i] is the local aggregate id to which i belongs on the current processor. */ - const RCP& GetVertex2AggId() const { return vertex2AggId_; } + const RCP& GetVertex2AggId() const { return vertex2AggId_; } - /*! @brief Returns constant vector that maps local node IDs to owning processor IDs. + /*! @brief Returns constant vector that maps local node IDs to owning processor IDs. For local node ID i, the corresponding vector entry v[i] is the owning processor ID. */ - const RCP& GetProcWinner() const { return procWinner_; } + const RCP& GetProcWinner() const { return procWinner_; } - //! Returns true if node with given local node id is marked to be a root node - inline bool IsRoot(LO i) const { return isRoot_[i]; } + //! Returns true if node with given local node id is marked to be a root node + inline bool IsRoot(LO i) const { return isRoot_[i]; } - /*! @brief Set root node information. + /*! @brief Set root node information. Used by aggregation methods only. */ - inline void SetIsRoot(LO i, bool value = true) { isRoot_[i] = value; } + inline void SetIsRoot(LO i, bool value = true) { isRoot_[i] = value; } - const RCP GetMap() const; ///< returns (overlapping) map of aggregate/node distribution + const RCP GetMap() const; ///< returns (overlapping) map of aggregate/node distribution - /*! @brief Compute sizes of aggregates + /*! @brief Compute sizes of aggregates Returns the number of nodes in each aggregate in an array. If the aggregate sizes are not stored internally (which is the default), they are computed and returned. @@ -277,9 +276,9 @@ namespace MueLu { @param[in] forceRecompute if true, force recomputation of the aggregate sizes. */ - typename aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute = false) const; + typename aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute = false) const; - /*! @brief Compute sizes of aggregates + /*! @brief Compute sizes of aggregates Returns the number of nodes in each aggregate in an array. If the aggregate sizes are not stored internally (which is the default), they are computed and returned. @@ -288,89 +287,87 @@ namespace MueLu { @param[in] forceRecompute if true, force recomputation of the aggregate sizes. */ - Teuchos::ArrayRCP ComputeAggregateSizesArrayRCP(bool forceRecompute = false) const; + Teuchos::ArrayRCP ComputeAggregateSizesArrayRCP(bool forceRecompute = false) const; - local_graph_type GetGraph() const; + local_graph_type GetGraph() const; - /*! @brief Generates a compressed list of nodes in each aggregate, where + /*! @brief Generates a compressed list of nodes in each aggregate, where the entries in aggNodes[aggPtr[i]] up to aggNodes[aggPtr[i+1]-1] contain the nodes in aggregate i. unaggregated contains the list of nodes which are, for whatever reason, not aggregated (e.g. Dirichlet) */ - void ComputeNodesInAggregate(LO_view & aggPtr, LO_view & aggNodes, LO_view & unaggregated) const; + void ComputeNodesInAggregate(LO_view& aggPtr, LO_view& aggNodes, LO_view& unaggregated) const; - //! Get global number of aggregates - // If # of global aggregates is unknown, this method does coummunication and internally record the value - GO GetNumGlobalAggregatesComputeIfNeeded(); + //! Get global number of aggregates + // If # of global aggregates is unknown, this method does coummunication and internally record the value + GO GetNumGlobalAggregatesComputeIfNeeded(); - //! @name Overridden from Teuchos::Describable - //@{ + //! @name Overridden from Teuchos::Describable + //@{ - //! Return a simple one-line description of this object. - std::string description() const; + //! Return a simple one-line description of this object. + std::string description() const; - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - void print(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = verbLevel_default) const; + //! Print the object with some verbosity level to an FancyOStream object. + //using MueLu::Describable::describe; // overloading, not hiding + void print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel = verbLevel_default) const; - private: - LO numAggregates_; ///< Number of aggregates on this processor - GO numGlobalAggregates_; ///< Number of global aggregates + private: + LO numAggregates_; ///< Number of aggregates on this processor + GO numGlobalAggregates_; ///< Number of global aggregates - /*! vertex2AggId[k] gives a local id corresponding to the aggregate to which + /*! vertex2AggId[k] gives a local id corresponding to the aggregate to which * local id k has been assigned. While k is the local id on my processor (MyPID), * vertex2AggId[k] is the local id on the processor which actually owns the aggregate. */ - RCP vertex2AggId_; + RCP vertex2AggId_; - /*! + /*! * If k is the local id on my processor (MyPID), the owning processor has the * id given by procWinner[k] */ - RCP procWinner_; + RCP procWinner_; - /*! geoData stores an index manager object that is used to perform structured aggreation + /*! geoData stores an index manager object that is used to perform structured aggreation * on a problem. */ - RCP geoDataKokkos_; + RCP geoDataKokkos_; - /*! geoData stores an index manager object that is used to perform structured aggreation + /*! geoData stores an index manager object that is used to perform structured aggreation * on a problem. */ - RCP geoData_; + RCP geoData_; - /*! graphColors_ stores a view that assigns a color to each node in the graph + /*! graphColors_ stores a view that assigns a color to each node in the graph * These colors are used to parallelize the aggregation process in UncoupledAggregation */ - colors_view_type graphColors_; + colors_view_type graphColors_; - /*! graphNumColors_ stores the number of colors that are needed to perform a distance 2 + /*! graphNumColors_ stores the number of colors that are needed to perform a distance 2 * coloring of the underlying graph. */ - LO graphNumColors_; + LO graphNumColors_; - //! An ArrayRCP of booleans specifying if a local entry is an aggregate root. - Teuchos::ArrayRCP isRoot_; + //! An ArrayRCP of booleans specifying if a local entry is an aggregate root. + Teuchos::ArrayRCP isRoot_; - //! Set to false iff aggregates do not include any DOFs belong to other processes. - bool aggregatesIncludeGhosts_; + //! Set to false iff aggregates do not include any DOFs belong to other processes. + bool aggregatesIncludeGhosts_; - //! Array of sizes of each local aggregate. - mutable - aggregates_sizes_type aggregateSizes_; + //! Array of sizes of each local aggregate. + mutable aggregates_sizes_type aggregateSizes_; - /*! aggragateSizesHost_ is a host copy of aggregate sizes, which + /*! aggragateSizesHost_ is a host copy of aggregate sizes, which * helps slightly reduce the cost of calling ComputeAggregateSizes * from different parts of MueLu that require such data on the host device. */ - mutable - typename aggregates_sizes_type::HostMirror aggregateSizesHost_; + mutable + typename aggregates_sizes_type::HostMirror aggregateSizesHost_; - //! Aggregates represented as Kokkos graph type - mutable - local_graph_type graph_; - }; + //! Aggregates represented as Kokkos graph type + mutable local_graph_type graph_; +}; -} //namespace MueLu +} //namespace MueLu #define MUELU_AGGREGATES_SHORT -#endif // MUELU_AGGREGATES_DECL_HPP +#endif // MUELU_AGGREGATES_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp index c9940f5524ac..9f2f07f345b6 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp @@ -59,254 +59,262 @@ namespace MueLu { - template - Aggregates >::Aggregates(const GraphBase & graph) { - numAggregates_ = 0; - numGlobalAggregates_ = 0; +template +Aggregates>::Aggregates(const GraphBase& graph) { + numAggregates_ = 0; + numGlobalAggregates_ = 0; - vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); - vertex2AggId_->putScalar(MUELU_UNAGGREGATED); + vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); + vertex2AggId_->putScalar(MUELU_UNAGGREGATED); - procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); - procWinner_->putScalar(MUELU_UNASSIGNED); + procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); + procWinner_->putScalar(MUELU_UNASSIGNED); - isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), false); + isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), false); - // slow but safe, force TentativePFactory to build column map for P itself - aggregatesIncludeGhosts_ = true; - } + // slow but safe, force TentativePFactory to build column map for P itself + aggregatesIncludeGhosts_ = true; +} - template - Aggregates >:: - Aggregates(LWGraph_kokkos graph) { - numAggregates_ = 0; - numGlobalAggregates_ = 0; +template +Aggregates>:: + Aggregates(LWGraph_kokkos graph) { + numAggregates_ = 0; + numGlobalAggregates_ = 0; - vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); - vertex2AggId_->putScalar(MUELU_UNAGGREGATED); + vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); + vertex2AggId_->putScalar(MUELU_UNAGGREGATED); - procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); - procWinner_->putScalar(MUELU_UNASSIGNED); + procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); + procWinner_->putScalar(MUELU_UNASSIGNED); - isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), false); + isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), false); - // slow but safe, force TentativePFactory to build column map for P itself - aggregatesIncludeGhosts_ = true; - } + // slow but safe, force TentativePFactory to build column map for P itself + aggregatesIncludeGhosts_ = true; +} - template - Aggregates >:: - Aggregates(const RCP& map) { - numAggregates_ = 0; - numGlobalAggregates_ = 0; +template +Aggregates>:: + Aggregates(const RCP& map) { + numAggregates_ = 0; + numGlobalAggregates_ = 0; - vertex2AggId_ = LOMultiVectorFactory::Build(map, 1); - vertex2AggId_->putScalar(MUELU_UNAGGREGATED); + vertex2AggId_ = LOMultiVectorFactory::Build(map, 1); + vertex2AggId_->putScalar(MUELU_UNAGGREGATED); - procWinner_ = LOVectorFactory::Build(map); - procWinner_->putScalar(MUELU_UNASSIGNED); + procWinner_ = LOVectorFactory::Build(map); + procWinner_->putScalar(MUELU_UNASSIGNED); - isRoot_ = Teuchos::ArrayRCP(map->getLocalNumElements(), false); + isRoot_ = Teuchos::ArrayRCP(map->getLocalNumElements(), false); - // slow but safe, force TentativePFactory to build column map for P itself - aggregatesIncludeGhosts_ = true; - } + // slow but safe, force TentativePFactory to build column map for P itself + aggregatesIncludeGhosts_ = true; +} - template - typename Aggregates >::aggregates_sizes_type::const_type - Aggregates >::ComputeAggregateSizes(bool forceRecompute) const { - if (aggregateSizes_.size() && !forceRecompute) { - return aggregateSizes_; +template +typename Aggregates>::aggregates_sizes_type::const_type +Aggregates>::ComputeAggregateSizes(bool forceRecompute) const { + if (aggregateSizes_.size() && !forceRecompute) { + return aggregateSizes_; - } else { - // It is necessary to initialize this to 0 - aggregates_sizes_type aggregateSizes("aggregates", numAggregates_); + } else { + // It is necessary to initialize this to 0 + aggregates_sizes_type aggregateSizes("aggregates", numAggregates_); - int myPID = GetMap()->getComm()->getRank(); + int myPID = GetMap()->getComm()->getRank(); - auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly); - typename AppendTrait::type aggregateSizesAtomic = aggregateSizes; - Kokkos::parallel_for("MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0,procWinner.size()), + typename AppendTrait::type aggregateSizesAtomic = aggregateSizes; + Kokkos::parallel_for( + "MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0, procWinner.size()), KOKKOS_LAMBDA(const LO i) { if (procWinner(i, 0) == myPID) aggregateSizesAtomic(vertex2AggId(i, 0))++; }); - aggregateSizes_ = aggregateSizes; - - return aggregateSizes; - } + aggregateSizes_ = aggregateSizes; + return aggregateSizes; } - - template - typename Teuchos::ArrayRCP - Aggregates >:: - ComputeAggregateSizesArrayRCP(bool forceRecompute) const { - auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute); - - // if this is the first time this is called, setup the host mirror and fill it - if(!aggregateSizesHost_.is_allocated()) { - aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes); +} + +template +typename Teuchos::ArrayRCP +Aggregates>:: + ComputeAggregateSizesArrayRCP(bool forceRecompute) const { + auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute); + + // if this is the first time this is called, setup the host mirror and fill it + if (!aggregateSizesHost_.is_allocated()) { + aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes); + Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes); + } else { + // otherwise, only update if we forced a recompute + if (forceRecompute) Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes); - } else { - // otherwise, only update if we forced a recompute - if(forceRecompute) - Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes); - } + } - // put the data in an ArrayRCP, but do not give it ownership of the data - Teuchos::ArrayRCP aggregateSizesArrayRCP(aggregateSizesHost_.data(),0,aggregateSizesHost_.extent(0),false); + // put the data in an ArrayRCP, but do not give it ownership of the data + Teuchos::ArrayRCP aggregateSizesArrayRCP(aggregateSizesHost_.data(), 0, aggregateSizesHost_.extent(0), false); - return aggregateSizesArrayRCP; - } + return aggregateSizesArrayRCP; +} - template - typename Aggregates >::local_graph_type - Aggregates >::GetGraph() const { - using row_map_type = typename local_graph_type::row_map_type; - using entries_type = typename local_graph_type::entries_type; - using size_type = typename local_graph_type::size_type; +template +typename Aggregates>::local_graph_type +Aggregates>::GetGraph() const { + using row_map_type = typename local_graph_type::row_map_type; + using entries_type = typename local_graph_type::entries_type; + using size_type = typename local_graph_type::size_type; - auto numAggregates = numAggregates_; + auto numAggregates = numAggregates_; - if (static_cast(graph_.numRows()) == numAggregates) - return graph_; + if (static_cast(graph_.numRows()) == numAggregates) + return graph_; - auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto sizes = ComputeAggregateSizes(); + auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto sizes = ComputeAggregateSizes(); - // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0. - typename row_map_type::non_const_type rows("Agg_rows", numAggregates+1); // rows(0) = 0 automatically + // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0. + typename row_map_type::non_const_type rows("Agg_rows", numAggregates + 1); // rows(0) = 0 automatically - // parallel_scan (exclusive) - Kokkos::parallel_scan("MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates), + // parallel_scan (exclusive) + Kokkos::parallel_scan( + "MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates), KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) { update += sizes(i); if (final_pass) - rows(i+1) = update; + rows(i + 1) = update; }); - decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates+1); // +1 is just for ease - Kokkos::deep_copy(offsets, rows); + decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates + 1); // +1 is just for ease + Kokkos::deep_copy(offsets, rows); - int myPID = GetMap()->getComm()->getRank(); + int myPID = GetMap()->getComm()->getRank(); - size_type numNNZ; - { - Kokkos::View numNNZ_device = Kokkos::subview(rows, numAggregates); - typename Kokkos::View::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device); - Kokkos::deep_copy(numNNZ_host, numNNZ_device); - numNNZ = numNNZ_host(); - } - typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ); - size_t realnnz = 0; - Kokkos::parallel_reduce("MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()), + size_type numNNZ; + { + Kokkos::View numNNZ_device = Kokkos::subview(rows, numAggregates); + typename Kokkos::View::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device); + Kokkos::deep_copy(numNNZ_host, numNNZ_device); + numNNZ = numNNZ_host(); + } + typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ); + size_t realnnz = 0; + Kokkos::parallel_reduce( + "MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()), KOKKOS_LAMBDA(const LO i, size_t& nnz) { if (procWinner(i, 0) == myPID) { - typedef typename std::remove_reference< decltype( offsets(0) ) >::type atomic_incr_type; - auto idx = Kokkos::atomic_fetch_add( &offsets(vertex2AggId(i,0)), atomic_incr_type(1)); + typedef typename std::remove_reference::type atomic_incr_type; + auto idx = Kokkos::atomic_fetch_add(&offsets(vertex2AggId(i, 0)), atomic_incr_type(1)); cols(idx) = i; nnz++; } - }, realnnz); - TEUCHOS_TEST_FOR_EXCEPTION(realnnz != numNNZ, Exceptions::RuntimeError, - "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz"); - - graph_ = local_graph_type(cols, rows); - - return graph_; - } - - template - void - Aggregates >::ComputeNodesInAggregate(LO_view & aggPtr, LO_view & aggNodes, LO_view & unaggregated) const { - LO numAggs = GetNumAggregates(); - LO numNodes = vertex2AggId_->getLocalLength(); - auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); - typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true); - LO INVALID = Teuchos::OrdinalTraits::invalid(); - - aggPtr = LO_view("aggPtr",numAggs+1); - aggNodes = LO_view("aggNodes",numNodes); - LO_view aggCurr("agg curr",numAggs+1); - - // Construct the "rowptr" and the counter - Kokkos::parallel_scan("MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0,numAggs+1), + }, + realnnz); + TEUCHOS_TEST_FOR_EXCEPTION(realnnz != numNNZ, Exceptions::RuntimeError, + "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz"); + + graph_ = local_graph_type(cols, rows); + + return graph_; +} + +template +void Aggregates>::ComputeNodesInAggregate(LO_view& aggPtr, LO_view& aggNodes, LO_view& unaggregated) const { + LO numAggs = GetNumAggregates(); + LO numNodes = vertex2AggId_->getLocalLength(); + auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); + typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true); + LO INVALID = Teuchos::OrdinalTraits::invalid(); + + aggPtr = LO_view("aggPtr", numAggs + 1); + aggNodes = LO_view("aggNodes", numNodes); + LO_view aggCurr("agg curr", numAggs + 1); + + // Construct the "rowptr" and the counter + Kokkos::parallel_scan( + "MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0, numAggs + 1), KOKKOS_LAMBDA(const LO aggIdx, LO& aggOffset, bool final_pass) { LO count = 0; - if(aggIdx < numAggs) + if (aggIdx < numAggs) count = aggSizes(aggIdx); - if(final_pass) { - aggPtr(aggIdx) = aggOffset; + if (final_pass) { + aggPtr(aggIdx) = aggOffset; aggCurr(aggIdx) = aggOffset; - if(aggIdx==numAggs) - aggCurr(numAggs) = 0; // use this for counting unaggregated nodes + if (aggIdx == numAggs) + aggCurr(numAggs) = 0; // use this for counting unaggregated nodes } aggOffset += count; }); - // Preallocate unaggregated to the correct size - LO numUnaggregated = 0; - Kokkos::parallel_reduce("MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0,numNodes), - KOKKOS_LAMBDA(const LO nodeIdx, LO & count) { - if(vertex2AggId(nodeIdx,0)==INVALID) + // Preallocate unaggregated to the correct size + LO numUnaggregated = 0; + Kokkos::parallel_reduce( + "MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0, numNodes), + KOKKOS_LAMBDA(const LO nodeIdx, LO& count) { + if (vertex2AggId(nodeIdx, 0) == INVALID) count++; - }, numUnaggregated); - unaggregated = LO_view("unaggregated",numUnaggregated); + }, + numUnaggregated); + unaggregated = LO_view("unaggregated", numUnaggregated); - // Stick the nodes in each aggregate's spot - Kokkos::parallel_for("MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0,numNodes), + // Stick the nodes in each aggregate's spot + Kokkos::parallel_for( + "MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0, numNodes), KOKKOS_LAMBDA(const LO nodeIdx) { - LO aggIdx = vertex2AggId(nodeIdx,0); - if(aggIdx != INVALID) { + LO aggIdx = vertex2AggId(nodeIdx, 0); + if (aggIdx != INVALID) { // atomic postincrement aggCurr(aggIdx) each time - aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx),1)) = nodeIdx; + aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx), 1)) = nodeIdx; } else { // same, but using last entry of aggCurr for unaggregated nodes - unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs),1)) = nodeIdx; + unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs), 1)) = nodeIdx; } }); - - } - - template - std::string Aggregates >::description() const { - if (numGlobalAggregates_ == -1) return BaseClass::description() + "{nGlobalAggregates = not computed}"; - else return BaseClass::description() + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}"; +} + +template +std::string Aggregates>::description() const { + if (numGlobalAggregates_ == -1) + return BaseClass::description() + "{nGlobalAggregates = not computed}"; + else + return BaseClass::description() + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}"; +} + +template +void Aggregates>::print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { + MUELU_DESCRIBE; + + if (verbLevel & Statistics1) { + if (numGlobalAggregates_ == -1) + out0 << "Global number of aggregates: not computed " << std::endl; + else + out0 << "Global number of aggregates: " << numGlobalAggregates_ << std::endl; } - - template - void Aggregates >::print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { - MUELU_DESCRIBE; - - if (verbLevel & Statistics1) { - if (numGlobalAggregates_ == -1) out0 << "Global number of aggregates: not computed " << std::endl; - else out0 << "Global number of aggregates: " << numGlobalAggregates_ << std::endl; - } +} + +template +GlobalOrdinal Aggregates>::GetNumGlobalAggregatesComputeIfNeeded() { + if (numGlobalAggregates_ != -1) { + LO nAggregates = GetNumAggregates(); + GO nGlobalAggregates; + MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates); + SetNumGlobalAggregates(nGlobalAggregates); } + return numGlobalAggregates_; +} - template - GlobalOrdinal Aggregates >::GetNumGlobalAggregatesComputeIfNeeded() { - - if (numGlobalAggregates_ != -1) { - LO nAggregates = GetNumAggregates(); - GO nGlobalAggregates; - MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates); - SetNumGlobalAggregates(nGlobalAggregates); - } - return numGlobalAggregates_; - } - - template - const RCP> > - Aggregates>::GetMap() const { - return vertex2AggId_->getMap(); - } +template +const RCP>> +Aggregates>::GetMap() const { + return vertex2AggId_->getMap(); +} -} //namespace MueLu +} //namespace MueLu -#endif // MUELU_AGGREGATES_DEF_HPP +#endif // MUELU_AGGREGATES_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp b/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp index 462f8aef3153..59ae158adf04 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp @@ -46,7 +46,7 @@ #ifndef MUELU_GRAPHBASE_HPP #define MUELU_GRAPHBASE_HPP -#include // global_size_t +#include // global_size_t #include #include "MueLu_ConfigDefs.hpp" @@ -61,69 +61,68 @@ namespace MueLu { Pure virtual base class for MueLu representations of graphs. */ - template - class GraphBase - : public BaseClass { +template +class GraphBase + : public BaseClass { #undef MUELU_GRAPHBASE_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - // For Zoltan2 compatibility - using lno_t = LocalOrdinal; - using gno_t = GlobalOrdinal; - using node_t = Node; + public: + // For Zoltan2 compatibility + using lno_t = LocalOrdinal; + using gno_t = GlobalOrdinal; + using node_t = Node; - //! @name Constructors/Destructors. - //@{ - virtual ~GraphBase() {}; - //@} + //! @name Constructors/Destructors. + //@{ + virtual ~GraphBase(){}; + //@} - virtual const RCP > GetComm() const = 0; - virtual const RCP GetDomainMap() const = 0; - virtual const RCP GetImportMap() const = 0; + virtual const RCP > GetComm() const = 0; + virtual const RCP GetDomainMap() const = 0; + virtual const RCP GetImportMap() const = 0; - //! @name Query graph attributes. - //@{ + //! @name Query graph attributes. + //@{ - //! Return number of vertices owned by the calling node. - virtual size_t GetNodeNumVertices() const = 0; + //! Return number of vertices owned by the calling node. + virtual size_t GetNodeNumVertices() const = 0; - //! Return number of edges owned by the calling node. - virtual size_t GetNodeNumEdges() const = 0; + //! Return number of edges owned by the calling node. + virtual size_t GetNodeNumEdges() const = 0; - virtual void SetBoundaryNodeMap(const ArrayRCP & boundaryArray) = 0; + virtual void SetBoundaryNodeMap(const ArrayRCP &boundaryArray) = 0; - virtual size_t getLocalMaxNumRowEntries() const = 0; + virtual size_t getLocalMaxNumRowEntries() const = 0; - virtual const ArrayRCP GetBoundaryNodeMap() const = 0; + virtual const ArrayRCP GetBoundaryNodeMap() const = 0; - //FIXME is this necessary? - //! Return number of global edges in the graph. - virtual Xpetra::global_size_t GetGlobalNumEdges() const = 0; + //FIXME is this necessary? + //! Return number of global edges in the graph. + virtual Xpetra::global_size_t GetGlobalNumEdges() const = 0; - //! Return the list of vertices adjacent to the vertex 'v'. - virtual Teuchos::ArrayView getNeighborVertices(LocalOrdinal v) const = 0; + //! Return the list of vertices adjacent to the vertex 'v'. + virtual Teuchos::ArrayView getNeighborVertices(LocalOrdinal v) const = 0; - //! Return true if vertex with local id 'v' is on current process. - virtual bool isLocalNeighborVertex(LocalOrdinal v) const = 0; - //@} + //! Return true if vertex with local id 'v' is on current process. + virtual bool isLocalNeighborVertex(LocalOrdinal v) const = 0; + //@} - //! @name Print graph. - //@{ - /// Return a simple one-line description of the Graph. - virtual std::string description() const = 0; + //! @name Print graph. + //@{ + /// Return a simple one-line description of the Graph. + virtual std::string description() const = 0; - //! Print the Graph with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - virtual void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const = 0; - //@} + //! Print the Graph with some verbosity level to an FancyOStream object. + //using MueLu::Describable::describe; // overloading, not hiding + //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; + virtual void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const = 0; + //@} +}; - }; - -} // namespace MueLu +} // namespace MueLu #define MUELU_GRAPHBASE_SHORT -#endif // MUELU_GRAPHBASE_HPP +#endif // MUELU_GRAPHBASE_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp index 5ecc7c87e000..1287300d8949 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp @@ -46,8 +46,8 @@ #ifndef MUELU_GRAPH_DECL_HPP #define MUELU_GRAPH_DECL_HPP -#include // global_size_t -#include // inline functions requires class declaration +#include // global_size_t +#include // inline functions requires class declaration #include #include "MueLu_ConfigDefs.hpp" @@ -64,78 +64,76 @@ namespace MueLu { This class holds an underlying Xpetra_CrsGraph. This class can be considered a facade, as MueLu needs only limited functionality for aggregation. */ - template - class Graph - : public MueLu::GraphBase { //FIXME shortnames isn't working +template +class Graph + : public MueLu::GraphBase { //FIXME shortnames isn't working #undef MUELU_GRAPH_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ + Graph(const RCP& graph, const std::string& /* objectLabel */ = ""); - //! @name Constructors/Destructors. - //@{ - Graph(const RCP & graph, const std::string & /* objectLabel */=""); + virtual ~Graph() {} + //@} - virtual ~Graph() {} - //@} + size_t GetNodeNumVertices() const { return graph_->getLocalNumRows(); } + size_t GetNodeNumEdges() const { return graph_->getLocalNumEntries(); } - size_t GetNodeNumVertices() const { return graph_->getLocalNumRows(); } - size_t GetNodeNumEdges() const { return graph_->getLocalNumEntries(); } + Xpetra::global_size_t GetGlobalNumEdges() const { return graph_->getGlobalNumEntries(); } - Xpetra::global_size_t GetGlobalNumEdges() const { return graph_->getGlobalNumEntries(); } + const RCP > GetComm() const { return graph_->getComm(); } + const RCP GetDomainMap() const { return graph_->getDomainMap(); } + //! Returns overlapping import map (nodes). + const RCP GetImportMap() const { return graph_->getColMap(); } - const RCP > GetComm() const { return graph_->getComm(); } - const RCP GetDomainMap() const { return graph_->getDomainMap(); } - //! Returns overlapping import map (nodes). - const RCP GetImportMap() const { return graph_->getColMap(); } + const RCP GetGraph() const { return graph_; } - const RCP GetGraph() const {return graph_;} + //! Set map with local ids of boundary nodes. + void SetBoundaryNodeMap(const ArrayRCP& localDirichletNodes) { localDirichletNodes_ = localDirichletNodes; } - //! Set map with local ids of boundary nodes. - void SetBoundaryNodeMap(const ArrayRCP& localDirichletNodes) { localDirichletNodes_ = localDirichletNodes; } + //! Returns map with local ids of boundary nodes. + const ArrayRCP GetBoundaryNodeMap() const { return localDirichletNodes_; } - //! Returns map with local ids of boundary nodes. - const ArrayRCP GetBoundaryNodeMap() const { return localDirichletNodes_; } + //! Returns the maximum number of entries across all rows/columns on this node + size_t getLocalMaxNumRowEntries() const { return graph_->getLocalMaxNumRowEntries(); } - //! Returns the maximum number of entries across all rows/columns on this node - size_t getLocalMaxNumRowEntries () const { return graph_->getLocalMaxNumRowEntries(); } + //! Return the list of vertices adjacent to the vertex 'v'. + ArrayView getNeighborVertices(LO i) const { + ArrayView rowView; + graph_->getLocalRowView(i, rowView); + return rowView; + } - //! Return the list of vertices adjacent to the vertex 'v'. - ArrayView getNeighborVertices(LO i) const { - ArrayView rowView; - graph_->getLocalRowView(i, rowView); - return rowView; - } - - //! Return true if vertex with local id 'v' is on current process. - bool isLocalNeighborVertex(LO i) const { return i >= minLocalIndex_ && i <= maxLocalIndex_; } + //! Return true if vertex with local id 'v' is on current process. + bool isLocalNeighborVertex(LO i) const { return i >= minLocalIndex_ && i <= maxLocalIndex_; } #ifdef MUELU_UNUSED - size_t GetNodeNumGhost() const; + size_t GetNodeNumGhost() const; #endif - /// Return a simple one-line description of the Graph. - std::string description() const { return "MueLu.description()"; } - - //! Print the Graph with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; + /// Return a simple one-line description of the Graph. + std::string description() const { return "MueLu.description()"; } - private: + //! Print the Graph with some verbosity level to an FancyOStream object. + //using MueLu::Describable::describe; // overloading, not hiding + //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; + void print(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; - RCP graph_; + private: + RCP graph_; - //! Vector of Dirichlet boundary node IDs on current process. - ArrayRCP localDirichletNodes_; + //! Vector of Dirichlet boundary node IDs on current process. + ArrayRCP localDirichletNodes_; - // local index boundaries (cached from domain map) - LO minLocalIndex_, maxLocalIndex_; - }; + // local index boundaries (cached from domain map) + LO minLocalIndex_, maxLocalIndex_; +}; -} // namespace MueLu +} // namespace MueLu #define MUELU_GRAPH_SHORT -#endif // MUELU_GRAPH_DECL_HPP +#endif // MUELU_GRAPH_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp index 7d9bf76e1cf3..db3075048e8f 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp @@ -54,51 +54,52 @@ namespace MueLu { - template - Graph::Graph(const RCP & graph, const std::string & /* objectLabel */) : graph_(graph) { - minLocalIndex_ = graph_->getDomainMap()->getMinLocalIndex(); - maxLocalIndex_ = graph_->getDomainMap()->getMaxLocalIndex(); - } +template +Graph::Graph(const RCP &graph, const std::string & /* objectLabel */) + : graph_(graph) { + minLocalIndex_ = graph_->getDomainMap()->getMinLocalIndex(); + maxLocalIndex_ = graph_->getDomainMap()->getMaxLocalIndex(); +} #ifdef MUELU_UNUSED - template - size_t Graph::GetNodeNumGhost() const { - /* +template +size_t Graph::GetNodeNumGhost() const { + /* Ray's comments about nGhost: Graph->NGhost == graph_->RowMatrixColMap()->NumMyElements() - graph_->MatrixDomainMap()->NumMyElements() is basically right. But we've had some issues about how epetra handles empty columns. Probably worth discussing this with Jonathan and Chris to see if this is ALWAYS right. */ - size_t nGhost = graph_->getColMap()->getLocalNumElements() - graph_->getDomainMap()->getLocalNumElements(); - if (nGhost < 0) nGhost = 0; // FIXME: size_t is unsigned. + size_t nGhost = graph_->getColMap()->getLocalNumElements() - graph_->getDomainMap()->getLocalNumElements(); + if (nGhost < 0) nGhost = 0; // FIXME: size_t is unsigned. - return nGhost; - } + return nGhost; +} #endif - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const { - template - void Graph::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { - MUELU_DESCRIBE; - - if (verbLevel & Parameters0) { - //out0 << "Prec. type: " << type_ << std::endl; - } +//! Print the object with some verbosity level to an FancyOStream object. +//using MueLu::Describable::describe; // overloading, not hiding +//void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const { +template +void Graph::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + MUELU_DESCRIBE; - if (verbLevel & Parameters1) { - //out0 << "Linear Algebra: " << toString(lib_) << std::endl; - //out0 << "PrecType: " << type_ << std::endl; - //out0 << "Parameter list: " << std::endl; { Teuchos::OSTab tab2(out); out << paramList_; } - //out0 << "Overlap: " << overlap_ << std::endl; - } + if (verbLevel & Parameters0) { + //out0 << "Prec. type: " << type_ << std::endl; + } - if (verbLevel & Debug) { - graph_->describe(out0, Teuchos::VERB_EXTREME); - } + if (verbLevel & Parameters1) { + //out0 << "Linear Algebra: " << toString(lib_) << std::endl; + //out0 << "PrecType: " << type_ << std::endl; + //out0 << "Parameter list: " << std::endl; { Teuchos::OSTab tab2(out); out << paramList_; } + //out0 << "Overlap: " << overlap_ << std::endl; } + if (verbLevel & Debug) { + graph_->describe(out0, Teuchos::VERB_EXTREME); + } } -#endif // MUELU_GRAPH_DEF_HPP +} // namespace MueLu + +#endif // MUELU_GRAPH_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp index f6b3c8338cbe..06b6a5e2b7fc 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp @@ -46,8 +46,8 @@ #ifndef MUELU_LWGRAPH_DECL_HPP #define MUELU_LWGRAPH_DECL_HPP -#include // global_size_t -#include // inline functions requires class declaration +#include // global_size_t +#include // inline functions requires class declaration #include #include "MueLu_ConfigDefs.hpp" @@ -66,117 +66,117 @@ namespace MueLu { fillComplete. TODO handle systems */ - template - class LWGraph : public MueLu::GraphBase { +template +class LWGraph : public MueLu::GraphBase { #undef MUELU_LWGRAPH_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! LWGraph constructor - // - // @param[in] rowPtrs: Array containing row offsets (CSR format) - // @param[in] colPtrs: Array containing local column indices (CSR format) - // @param[in] domainMap: non-overlapping (domain) map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] importMap: overlapping map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] objectLabel: label string - LWGraph(const ArrayRCP& rowPtrs, const ArrayRCP& colPtrs, - const RCP& domainMap, const RCP& importMap, const std::string& objectLabel = "") - : rows_(rowPtrs), columns_(colPtrs), domainMap_(domainMap), importMap_(importMap), domainMapRef_(*domainMap), objectLabel_(objectLabel) - { - minLocalIndex_ = domainMapRef_.getMinLocalIndex(); - maxLocalIndex_ = domainMapRef_.getMaxLocalIndex(); - - maxNumRowEntries_ = 0; - - LO nRows = as(rowPtrs.size()-1); - for (LO i = 0; i < nRows; i++) - maxNumRowEntries_ = std::max(maxNumRowEntries_, as(rowPtrs[i+1] - rowPtrs[i])); - } - - virtual ~LWGraph() {} - //@} - - size_t GetNodeNumVertices() const { return rows_.size()-1; } - size_t GetNodeNumEdges() const { return rows_[rows_.size()-1]; } - - // TODO: do we really need this function - // It is being called from CoupledAggregation, but do we need it there? - Xpetra::global_size_t GetGlobalNumEdges() const { - Xpetra::global_size_t in = GetNodeNumEdges(), out; - Teuchos::reduceAll(*domainMap_->getComm(), Teuchos::REDUCE_SUM, in, Teuchos::outArg(out)); - return out; - } - - const RCP > GetComm() const { return domainMap_->getComm(); } - const RCP GetDomainMap() const { return domainMap_; } - //! Returns overlapping import map (nodes). - const RCP GetImportMap() const { return importMap_; } - - void SetBoundaryNodeMap(RCP const &/* map */) { throw Exceptions::NotImplemented("LWGraph: Boundary node map not implemented."); } - - //! Return the list of vertices adjacent to the vertex 'v'. - Teuchos::ArrayView getNeighborVertices(LO i) const { return columns_.view(rows_[i], rows_[i+1]-rows_[i]); } - - //! Return true if vertex with local id 'v' is on current process. - bool isLocalNeighborVertex(LO i) const { return i >= minLocalIndex_ && i <= maxLocalIndex_; } - - //! Set boolean array indicating which rows correspond to Dirichlet boundaries. - void SetBoundaryNodeMap(const ArrayRCP& bndry) { dirichletBoundaries_ = bndry; } - - //! Returns the maximum number of entries across all rows/columns on this node - size_t getLocalMaxNumRowEntries () const { return maxNumRowEntries_; } - - //! Returns map with global ids of boundary nodes. - const ArrayRCP GetBoundaryNodeMap() const { return dirichletBoundaries_; } - - - /// Return a simple one-line description of the Graph. - std::string description() const { return "MueLu.description()"; } //FIXME use object's label - - //! Return the row pointers of the local graph - const ArrayRCP getRowPtrs() const { - return rows_; - } - - //! Return the list entries in the local graph - const ArrayRCP getEntries() const { - return columns_; - } - - //! Print the Graph with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - - - RCP GetCrsGraph() const; - - private: - - //! Indices into columns_ array. Part of local graph information. - const ArrayRCP rows_; - //! Columns corresponding to connections. Part of local graph information. - const ArrayRCP columns_; - //! Graph maps - const RCP domainMap_, importMap_; - const Map& domainMapRef_; - //! Name of this graph. - const std::string objectLabel_; - //! Boolean array marking Dirichlet rows. - ArrayRCP dirichletBoundaries_; - - // local index boundaries (cached from domain map) - LO minLocalIndex_, maxLocalIndex_; - size_t maxNumRowEntries_; - }; - -} // namespace MueLu + public: + //! @name Constructors/Destructors. + //@{ + + //! LWGraph constructor + // + // @param[in] rowPtrs: Array containing row offsets (CSR format) + // @param[in] colPtrs: Array containing local column indices (CSR format) + // @param[in] domainMap: non-overlapping (domain) map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] importMap: overlapping map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] objectLabel: label string + LWGraph(const ArrayRCP& rowPtrs, const ArrayRCP& colPtrs, + const RCP& domainMap, const RCP& importMap, const std::string& objectLabel = "") + : rows_(rowPtrs) + , columns_(colPtrs) + , domainMap_(domainMap) + , importMap_(importMap) + , domainMapRef_(*domainMap) + , objectLabel_(objectLabel) { + minLocalIndex_ = domainMapRef_.getMinLocalIndex(); + maxLocalIndex_ = domainMapRef_.getMaxLocalIndex(); + + maxNumRowEntries_ = 0; + + LO nRows = as(rowPtrs.size() - 1); + for (LO i = 0; i < nRows; i++) + maxNumRowEntries_ = std::max(maxNumRowEntries_, as(rowPtrs[i + 1] - rowPtrs[i])); + } + + virtual ~LWGraph() {} + //@} + + size_t GetNodeNumVertices() const { return rows_.size() - 1; } + size_t GetNodeNumEdges() const { return rows_[rows_.size() - 1]; } + + // TODO: do we really need this function + // It is being called from CoupledAggregation, but do we need it there? + Xpetra::global_size_t GetGlobalNumEdges() const { + Xpetra::global_size_t in = GetNodeNumEdges(), out; + Teuchos::reduceAll(*domainMap_->getComm(), Teuchos::REDUCE_SUM, in, Teuchos::outArg(out)); + return out; + } + + const RCP > GetComm() const { return domainMap_->getComm(); } + const RCP GetDomainMap() const { return domainMap_; } + //! Returns overlapping import map (nodes). + const RCP GetImportMap() const { return importMap_; } + + void SetBoundaryNodeMap(RCP const& /* map */) { throw Exceptions::NotImplemented("LWGraph: Boundary node map not implemented."); } + + //! Return the list of vertices adjacent to the vertex 'v'. + Teuchos::ArrayView getNeighborVertices(LO i) const { return columns_.view(rows_[i], rows_[i + 1] - rows_[i]); } + + //! Return true if vertex with local id 'v' is on current process. + bool isLocalNeighborVertex(LO i) const { return i >= minLocalIndex_ && i <= maxLocalIndex_; } + + //! Set boolean array indicating which rows correspond to Dirichlet boundaries. + void SetBoundaryNodeMap(const ArrayRCP& bndry) { dirichletBoundaries_ = bndry; } + + //! Returns the maximum number of entries across all rows/columns on this node + size_t getLocalMaxNumRowEntries() const { return maxNumRowEntries_; } + + //! Returns map with global ids of boundary nodes. + const ArrayRCP GetBoundaryNodeMap() const { return dirichletBoundaries_; } + + /// Return a simple one-line description of the Graph. + std::string description() const { return "MueLu.description()"; } //FIXME use object's label + + //! Return the row pointers of the local graph + const ArrayRCP getRowPtrs() const { + return rows_; + } + + //! Return the list entries in the local graph + const ArrayRCP getEntries() const { + return columns_; + } + + //! Print the Graph with some verbosity level to an FancyOStream object. + //using MueLu::Describable::describe; // overloading, not hiding + //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; + void print(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; + + RCP GetCrsGraph() const; + + private: + //! Indices into columns_ array. Part of local graph information. + const ArrayRCP rows_; + //! Columns corresponding to connections. Part of local graph information. + const ArrayRCP columns_; + //! Graph maps + const RCP domainMap_, importMap_; + const Map& domainMapRef_; + //! Name of this graph. + const std::string objectLabel_; + //! Boolean array marking Dirichlet rows. + ArrayRCP dirichletBoundaries_; + + // local index boundaries (cached from domain map) + LO minLocalIndex_, maxLocalIndex_; + size_t maxNumRowEntries_; +}; + +} // namespace MueLu #define MUELU_LWGRAPH_SHORT -#endif // MUELU_LWGRAPH_DECL_HPP +#endif // MUELU_LWGRAPH_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp index 230a6c908587..e6ce20b3d7d8 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp @@ -52,46 +52,45 @@ namespace MueLu { - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const { - template - void LWGraph::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { - // MUELU_DESCRIBE; +//! Print the object with some verbosity level to an FancyOStream object. +//using MueLu::Describable::describe; // overloading, not hiding +//void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const { +template +void LWGraph::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + // MUELU_DESCRIBE; - if (verbLevel & Parameters0) { - //out0 << "Prec. type: " << type_ << std::endl; - } - - if (verbLevel & Parameters1) { - //out0 << "Linear Algebra: " << toString(lib_) << std::endl; - //out0 << "PrecType: " << type_ << std::endl; - //out0 << "Parameter list: " << std::endl; { Teuchos::OSTab tab2(out); out << paramList_; } - //out0 << "Overlap: " << overlap_ << std::endl; - } - - if (verbLevel & Debug) { - RCP col_map = importMap_.is_null() ? domainMap_ : importMap_; + if (verbLevel & Parameters0) { + //out0 << "Prec. type: " << type_ << std::endl; + } - for (LO i = 0; i < rows_.size()-1; i++) { - for (LO j = rows_[i]; j < rows_[i+1]; j++) - out<< domainMap_->getGlobalElement(i) << " " << col_map->getGlobalElement(columns_[j])< col_map = importMap_.is_null() ? domainMap_ : importMap_; - template - RCP > LWGraph::GetCrsGraph() const { - ArrayRCP rowPtrs; - rowPtrs.resize(rows_.size()); - for (size_t i=0; i(rows_.size()); i++) - rowPtrs[i] = rows_[i]; - auto graph = Xpetra::CrsGraphFactory::Build(GetDomainMap(), GetImportMap(), rowPtrs, Teuchos::arcp_const_cast(getEntries())); - graph->fillComplete(); - return graph; + for (LO i = 0; i < rows_.size() - 1; i++) { + for (LO j = rows_[i]; j < rows_[i + 1]; j++) + out << domainMap_->getGlobalElement(i) << " " << col_map->getGlobalElement(columns_[j]) << std::endl; } + } +} +template +RCP > LWGraph::GetCrsGraph() const { + ArrayRCP rowPtrs; + rowPtrs.resize(rows_.size()); + for (size_t i = 0; i < Teuchos::as(rows_.size()); i++) + rowPtrs[i] = rows_[i]; + auto graph = Xpetra::CrsGraphFactory::Build(GetDomainMap(), GetImportMap(), rowPtrs, Teuchos::arcp_const_cast(getEntries())); + graph->fillComplete(); + return graph; } -#endif // MUELU_LWGRAPH_DEF_HPP +} // namespace MueLu + +#endif // MUELU_LWGRAPH_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp index dd8a0231b15a..2a8b73b96c88 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp @@ -51,7 +51,7 @@ #include #include -#include // global_size_t +#include // global_size_t #include #include "MueLu_VerbosityLevel.hpp" @@ -62,110 +62,111 @@ namespace MueLu { - /*! +/*! @class LWGraph_kokkos @brief Lightweight MueLu representation of a compressed row storage graph This class is lightweight in the sense that it holds to local graph information. These were built without using fillComplete. */ - template - class LWGraph_kokkos; - - // Partial specialization for DeviceType - template - class LWGraph_kokkos> { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using memory_space = typename DeviceType::memory_space; - using device_type = Kokkos::Device; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - using local_lw_graph_type = MueLu::LocalLWGraph_kokkos; - using size_type = size_t; - - using map_type = Xpetra::Map; - using local_graph_type = typename local_lw_graph_type::local_graph_type; - using boundary_nodes_type = typename local_lw_graph_type::boundary_nodes_type; - - private: - // For compatibility - typedef node_type Node; +template +class LWGraph_kokkos; + +// Partial specialization for DeviceType +template +class LWGraph_kokkos> { + public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using memory_space = typename DeviceType::memory_space; + using device_type = Kokkos::Device; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + using local_lw_graph_type = MueLu::LocalLWGraph_kokkos; + using size_type = size_t; + + using map_type = Xpetra::Map; + using local_graph_type = typename local_lw_graph_type::local_graph_type; + using boundary_nodes_type = typename local_lw_graph_type::boundary_nodes_type; + + private: + // For compatibility + typedef node_type Node; #undef MUELU_LWGRAPH_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! LWGraph constructor - // - // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS data - // @param[in] domainMap: non-overlapping (domain) map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] importMap: overlapping map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] objectLabel: label string - LWGraph_kokkos(const local_graph_type& graph, - const RCP& domainMap, - const RCP& importMap, - const std::string& objectLabel = "") - : lclLWGraph_(graph, domainMap), domainMap_(domainMap), importMap_(importMap), objectLabel_(objectLabel) { } - - ~LWGraph_kokkos() = default; - //@} - - const RCP > GetComm() const { - return domainMap_->getComm(); - } - const RCP GetDomainMap() const { - return domainMap_; - } - //! Return overlapping import map (nodes). - const RCP GetImportMap() const { - return importMap_; - } - - //! Return number of graph vertices - KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { - return lclLWGraph_.GetNodeNumVertices(); - } - //! Return number of graph edges - KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { - return lclLWGraph_.GetNodeNumEdges(); - } - - //! Returns the maximum number of entries across all rows/columns on this node - KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries () const { - return lclLWGraph_.getLocalMaxNumRowEntries(); - } - - /// Return a simple one-line description of the Graph. - std::string description() const { - return "LWGraph (" + objectLabel_ + ")"; - } - - //! Print the Graph with some verbosity level to an FancyOStream object. - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - - local_lw_graph_type& getLocalLWGraph() const { - return lclLWGraph_; - } - - private: - - //! Underlying graph (with label) - mutable local_lw_graph_type lclLWGraph_; - - //! Graph maps - const RCP domainMap_; - const RCP importMap_; - - //! Name of this graph. - const std::string objectLabel_; - }; - -} + public: + //! @name Constructors/Destructors. + //@{ + + //! LWGraph constructor + // + // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS data + // @param[in] domainMap: non-overlapping (domain) map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] importMap: overlapping map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] objectLabel: label string + LWGraph_kokkos(const local_graph_type& graph, + const RCP& domainMap, + const RCP& importMap, + const std::string& objectLabel = "") + : lclLWGraph_(graph, domainMap) + , domainMap_(domainMap) + , importMap_(importMap) + , objectLabel_(objectLabel) {} + + ~LWGraph_kokkos() = default; + //@} + + const RCP> GetComm() const { + return domainMap_->getComm(); + } + const RCP GetDomainMap() const { + return domainMap_; + } + //! Return overlapping import map (nodes). + const RCP GetImportMap() const { + return importMap_; + } + + //! Return number of graph vertices + KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { + return lclLWGraph_.GetNodeNumVertices(); + } + //! Return number of graph edges + KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { + return lclLWGraph_.GetNodeNumEdges(); + } + + //! Returns the maximum number of entries across all rows/columns on this node + KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const { + return lclLWGraph_.getLocalMaxNumRowEntries(); + } + + /// Return a simple one-line description of the Graph. + std::string description() const { + return "LWGraph (" + objectLabel_ + ")"; + } + + //! Print the Graph with some verbosity level to an FancyOStream object. + void print(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; + + local_lw_graph_type& getLocalLWGraph() const { + return lclLWGraph_; + } + + private: + //! Underlying graph (with label) + mutable local_lw_graph_type lclLWGraph_; + + //! Graph maps + const RCP domainMap_; + const RCP importMap_; + + //! Name of this graph. + const std::string objectLabel_; +}; + +} // namespace MueLu #define MUELU_LWGRAPH_KOKKOS_SHORT -#endif // MUELU_LWGRAPH_KOKKOS_DECL_HPP +#endif // MUELU_LWGRAPH_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp index 4d164f1b8f50..42da467f4cb1 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp @@ -55,38 +55,37 @@ namespace MueLu { - template - void LWGraph_kokkos>:: - print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { +template +void LWGraph_kokkos>:: + print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + if (verbLevel & Debug) { + auto graph = lclLWGraph_.getGraph(); + RCP col_map = importMap_.is_null() ? domainMap_ : importMap_; + int mypid = col_map->getComm()->getRank(); - if (verbLevel & Debug) { - auto graph = lclLWGraph_.getGraph(); - RCP col_map = importMap_.is_null() ? domainMap_ : importMap_; - int mypid = col_map->getComm()->getRank(); - - { + { std::ostringstream ss; ss << "[pid " << mypid << "] num entries=" << graph.entries.size(); out << ss.str() << std::endl; - } + } - const size_t numRows = graph.numRows(); - auto rowPtrs = graph.row_map; - auto columns = graph.entries; - for (size_t i=0; i < numRows; ++i) { - std::ostringstream ss; - ss << "[pid " << mypid << "] row " << domainMap_->getGlobalElement(i) << ":"; - ss << " (numEntries=" << rowPtrs(i+1)-rowPtrs(i) << ")"; + const size_t numRows = graph.numRows(); + auto rowPtrs = graph.row_map; + auto columns = graph.entries; + for (size_t i = 0; i < numRows; ++i) { + std::ostringstream ss; + ss << "[pid " << mypid << "] row " << domainMap_->getGlobalElement(i) << ":"; + ss << " (numEntries=" << rowPtrs(i + 1) - rowPtrs(i) << ")"; - auto rowView = graph.rowConst(i); - for (LO j = 0; j < rowView.length; j++) { - ss << " " << col_map->getGlobalElement(rowView.colidx(j)); - } - out << ss.str() << std::endl; + auto rowView = graph.rowConst(i); + for (LO j = 0; j < rowView.length; j++) { + ss << " " << col_map->getGlobalElement(rowView.colidx(j)); } + out << ss.str() << std::endl; } } +} -} //namespace MueLu +} //namespace MueLu -#endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP +#endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp index 04192eee8d72..d79bf9532daa 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp @@ -43,51 +43,53 @@ // *********************************************************************** // // @HEADER -#include // for NULL +#include // for NULL #include "MueLu_LinkedList.hpp" namespace MueLu { - LinkedList::LinkedList() : nodeHead(NULL), nodeTail(NULL) { } +LinkedList::LinkedList() + : nodeHead(NULL) + , nodeTail(NULL) {} - LinkedList::~LinkedList() { - while (nodeHead != NULL) - DeleteHead(); - } +LinkedList::~LinkedList() { + while (nodeHead != NULL) + DeleteHead(); +} - bool LinkedList::IsEmpty() { - return nodeHead == NULL; - } +bool LinkedList::IsEmpty() { + return nodeHead == NULL; +} - void LinkedList::Add(int iNode) { - MueLu_Node *newNode = new MueLu_Node; - newNode->nodeId = iNode; - newNode->next = NULL; - if (nodeHead == NULL) { - nodeHead = newNode; - nodeTail = newNode; - } else { - nodeTail->next = newNode; - nodeTail = newNode; - } +void LinkedList::Add(int iNode) { + MueLu_Node *newNode = new MueLu_Node; + newNode->nodeId = iNode; + newNode->next = NULL; + if (nodeHead == NULL) { + nodeHead = newNode; + nodeTail = newNode; + } else { + nodeTail->next = newNode; + nodeTail = newNode; } +} - int LinkedList::Pop() { // get head and remove first node - if (IsEmpty()) return -1; - - int iNode = nodeHead->nodeId; - DeleteHead(); - return iNode; - } +int LinkedList::Pop() { // get head and remove first node + if (IsEmpty()) return -1; - void LinkedList::DeleteHead() { - if (IsEmpty()) return; + int iNode = nodeHead->nodeId; + DeleteHead(); + return iNode; +} - MueLu_Node *newNode = nodeHead; - nodeHead = newNode->next; - delete newNode; - } +void LinkedList::DeleteHead() { + if (IsEmpty()) return; + MueLu_Node *newNode = nodeHead; + nodeHead = newNode->next; + delete newNode; } +} // namespace MueLu + //TODO: nodeTail unused -> remove? diff --git a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp index ba8d95e30751..8546ec3ccfec 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp @@ -52,33 +52,30 @@ namespace MueLu { - typedef struct MueLu_Node_Struct - { - int nodeId; - struct MueLu_Node_Struct *next; - } MueLu_Node; +typedef struct MueLu_Node_Struct { + int nodeId; + struct MueLu_Node_Struct *next; +} MueLu_Node; - class LinkedList { +class LinkedList { + public: + LinkedList(); - public: - LinkedList(); + ~LinkedList(); - ~LinkedList(); + bool IsEmpty(); - bool IsEmpty(); + void Add(int iNode); - void Add(int iNode); + int Pop(); - int Pop(); + private: + MueLu_Node *nodeHead; + MueLu_Node *nodeTail; - private: - MueLu_Node *nodeHead; - MueLu_Node *nodeTail; + void DeleteHead(); +}; - void DeleteHead(); +} // namespace MueLu - }; - -} - -#endif // MUELU_LINKEDLIST_HPP +#endif // MUELU_LINKEDLIST_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp index 294fe160e530..57d30885471b 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp @@ -51,7 +51,7 @@ #include #include -#include // global_size_t +#include // global_size_t #include #include "MueLu_VerbosityLevel.hpp" @@ -61,125 +61,122 @@ namespace MueLu { - /*! +/*! @class LocalLWGraph_kokkos @brief Lightweight MueLu representation of a compressed row storage graph This class is lightweight in the sense that it holds to local graph information. These were built without using fillComplete. */ - template - class LocalLWGraph_kokkos; - - // Partial specialization for DeviceType - template - class LocalLWGraph_kokkos> { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using memory_space = typename DeviceType::memory_space; - using device_type = Kokkos::Device; - using range_type = Kokkos::RangePolicy; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - using size_type = size_t; - - using local_graph_type = Kokkos::StaticCrsGraph; - using boundary_nodes_type = Kokkos::View; - using row_type = Kokkos::View; - using map_type = Xpetra::Map; - - private: - // For compatibility - typedef node_type Node; +template +class LocalLWGraph_kokkos; + +// Partial specialization for DeviceType +template +class LocalLWGraph_kokkos> { + public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using memory_space = typename DeviceType::memory_space; + using device_type = Kokkos::Device; + using range_type = Kokkos::RangePolicy; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + using size_type = size_t; + + using local_graph_type = Kokkos::StaticCrsGraph; + using boundary_nodes_type = Kokkos::View; + using row_type = Kokkos::View; + using map_type = Xpetra::Map; + + private: + // For compatibility + typedef node_type Node; #undef MUELU_LOCALLWGRAPH_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! LocalLWGraph constructor - // - // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS data - LocalLWGraph_kokkos(const local_graph_type& graph, - const RCP& domainMap); - - ~LocalLWGraph_kokkos() = default; - //@} - - //! Return number of graph vertices - KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { - return graph_.numRows(); - } - //! Return number of graph edges - KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { - return graph_.row_map(GetNodeNumVertices()); - } - - //! Returns the maximum number of entries across all rows/columns on this node - KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries () const { - return maxNumRowEntries_; - } - - //! Return the row pointers of the local graph - KOKKOS_INLINE_FUNCTION typename local_graph_type::row_map_type getRowPtrs() const { - return graph_.row_map; - } - - //! Return the list entries in the local graph - KOKKOS_INLINE_FUNCTION typename local_graph_type::entries_type getEntries() const { - return graph_.entries; - } - - //! Return the list of vertices adjacent to the vertex 'v'. - // Unfortunately, C++11 does not support the following: - // auto getNeighborVertices(LO i) const -> decltype(rowView) - // auto return with decltype was only introduced in C++14 - KOKKOS_INLINE_FUNCTION - Kokkos::GraphRowViewConst getNeighborVertices(LO i) const { - auto rowView = graph_.rowConst(i); - - return rowView; - } - - //! Return true if vertex with local id 'v' is on current process. - KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const { - return i >= minLocalIndex_ && i <= maxLocalIndex_; - } - - //! Set boolean array indicating which rows correspond to Dirichlet boundaries. - KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry) { - dirichletBoundaries_ = bndry; - } - - //! Returns map with global ids of boundary nodes. - KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const { - return dirichletBoundaries_; - } - - const local_graph_type& getGraph() const { - return graph_; - } - - private: - - //! Underlying graph (with label) - const local_graph_type graph_; - - //! Boolean array marking Dirichlet rows. - boundary_nodes_type dirichletBoundaries_; - - //! Local index boundaries (cached from domain map) - LO minLocalIndex_, maxLocalIndex_; - size_type maxNumRowEntries_; - - }; - -} + public: + //! @name Constructors/Destructors. + //@{ + + //! LocalLWGraph constructor + // + // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS data + LocalLWGraph_kokkos(const local_graph_type& graph, + const RCP& domainMap); + + ~LocalLWGraph_kokkos() = default; + //@} + + //! Return number of graph vertices + KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { + return graph_.numRows(); + } + //! Return number of graph edges + KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { + return graph_.row_map(GetNodeNumVertices()); + } + + //! Returns the maximum number of entries across all rows/columns on this node + KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const { + return maxNumRowEntries_; + } + + //! Return the row pointers of the local graph + KOKKOS_INLINE_FUNCTION typename local_graph_type::row_map_type getRowPtrs() const { + return graph_.row_map; + } + + //! Return the list entries in the local graph + KOKKOS_INLINE_FUNCTION typename local_graph_type::entries_type getEntries() const { + return graph_.entries; + } + + //! Return the list of vertices adjacent to the vertex 'v'. + // Unfortunately, C++11 does not support the following: + // auto getNeighborVertices(LO i) const -> decltype(rowView) + // auto return with decltype was only introduced in C++14 + KOKKOS_INLINE_FUNCTION + Kokkos::GraphRowViewConst getNeighborVertices(LO i) const { + auto rowView = graph_.rowConst(i); + + return rowView; + } + + //! Return true if vertex with local id 'v' is on current process. + KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const { + return i >= minLocalIndex_ && i <= maxLocalIndex_; + } + + //! Set boolean array indicating which rows correspond to Dirichlet boundaries. + KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry) { + dirichletBoundaries_ = bndry; + } + + //! Returns map with global ids of boundary nodes. + KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const { + return dirichletBoundaries_; + } + + const local_graph_type& getGraph() const { + return graph_; + } + + private: + //! Underlying graph (with label) + const local_graph_type graph_; + + //! Boolean array marking Dirichlet rows. + boundary_nodes_type dirichletBoundaries_; + + //! Local index boundaries (cached from domain map) + LO minLocalIndex_, maxLocalIndex_; + size_type maxNumRowEntries_; +}; + +} // namespace MueLu #define MUELU_LOCALLWGRAPH_KOKKOS_SHORT -#endif // MUELU_LOCALLWGRAPH_KOKKOS_DECL_HPP +#endif // MUELU_LOCALLWGRAPH_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp index 785706b1a002..ba30720851ef 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp @@ -55,49 +55,49 @@ namespace MueLu { - namespace { // anonymous +namespace { // anonymous - template - class MaxNumRowEntriesFunctor { - public: - MaxNumRowEntriesFunctor(RowType rowPointers) : rowPointers_(rowPointers) { } +template +class MaxNumRowEntriesFunctor { + public: + MaxNumRowEntriesFunctor(RowType rowPointers) + : rowPointers_(rowPointers) {} - KOKKOS_INLINE_FUNCTION - void operator()(const LocalOrdinal i, size_t& maxLength) const { - size_t d = rowPointers_(i+1) - rowPointers_(i); + KOKKOS_INLINE_FUNCTION + void operator()(const LocalOrdinal i, size_t& maxLength) const { + size_t d = rowPointers_(i + 1) - rowPointers_(i); - maxLength = (d > maxLength ? d : maxLength); - } + maxLength = (d > maxLength ? d : maxLength); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile size_t& dest, const volatile size_t& src) { + dest = (dest > src ? dest : src); + } - KOKKOS_INLINE_FUNCTION - void join(volatile size_t& dest, const volatile size_t& src) { - dest = (dest > src ? dest : src); - } + KOKKOS_INLINE_FUNCTION + void init(size_t& initValue) { + initValue = 0; + } - KOKKOS_INLINE_FUNCTION - void init(size_t& initValue) { - initValue = 0; - } + private: + RowType rowPointers_; +}; - private: - RowType rowPointers_; - }; +} // namespace - } +template +LocalLWGraph_kokkos>:: + LocalLWGraph_kokkos(const local_graph_type& graph, + const RCP& domainMap) + : graph_(graph) { + minLocalIndex_ = domainMap->getMinLocalIndex(); + maxLocalIndex_ = domainMap->getMaxLocalIndex(); - template - LocalLWGraph_kokkos>:: - LocalLWGraph_kokkos(const local_graph_type& graph, - const RCP& domainMap) - : graph_(graph) - { - minLocalIndex_ = domainMap->getMinLocalIndex(); - maxLocalIndex_ = domainMap->getMaxLocalIndex(); - - MaxNumRowEntriesFunctor maxNumRowEntriesFunctor(graph_.row_map); - Kokkos::parallel_reduce("MueLu:LocalLWGraph:LWGraph:maxnonzeros", range_type(0,graph_.numRows()), maxNumRowEntriesFunctor, maxNumRowEntries_); - } + MaxNumRowEntriesFunctor maxNumRowEntriesFunctor(graph_.row_map); + Kokkos::parallel_reduce("MueLu:LocalLWGraph:LWGraph:maxnonzeros", range_type(0, graph_.numRows()), maxNumRowEntriesFunctor, maxNumRowEntries_); +} -} //namespace MueLu +} //namespace MueLu -#endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP +#endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp b/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp index e227583d92f2..c653e9f0dcf7 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp @@ -61,70 +61,63 @@ #include #include "MueLu_GraphBase.hpp" - - // Zoltab2 InputTraits for MueLu Graph objects namespace Zoltan2 { template -struct InputTraits > -{ +struct InputTraits > { typedef Zoltan2::default_scalar_t scalar_t; - typedef LocalOrdinal lno_t; + typedef LocalOrdinal lno_t; typedef GlobalOrdinal gno_t; typedef size_t offset_t; - typedef Zoltan2::default_part_t part_t; - typedef Node node_t; - static inline std::string name() {return "MueLu::Graph";} + typedef Zoltan2::default_part_t part_t; + typedef Node node_t; + static inline std::string name() { return "MueLu::Graph"; } - Z2_STATIC_ASSERT_TYPES // validate the types + Z2_STATIC_ASSERT_TYPES // validate the types }; -}//end namespace Zoltan2 - +} //end namespace Zoltan2 namespace MueLu { -template -class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { -public: - +template +class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { + public: #ifndef DOXYGEN_SHOULD_SKIP_THIS - typedef typename Zoltan2::InputTraits::scalar_t scalar_t; - typedef typename Zoltan2::InputTraits::offset_t offset_t; - typedef typename Zoltan2::InputTraits::lno_t lno_t; - typedef typename Zoltan2::InputTraits::gno_t gno_t; - typedef typename Zoltan2::InputTraits::part_t part_t; - typedef typename Zoltan2::InputTraits::node_t node_t; + typedef typename Zoltan2::InputTraits::scalar_t scalar_t; + typedef typename Zoltan2::InputTraits::offset_t offset_t; + typedef typename Zoltan2::InputTraits::lno_t lno_t; + typedef typename Zoltan2::InputTraits::gno_t gno_t; + typedef typename Zoltan2::InputTraits::part_t part_t; + typedef typename Zoltan2::InputTraits::node_t node_t; typedef User xgraph_t; typedef User user_t; typedef UserCoord userCoord_t; #endif //! MueLu::GraphBase Compatibility Layer - const Teuchos::RCP< const Teuchos::Comm< int > > getComm() const { return graph_->GetComm();} - const Teuchos::RCP< const Xpetra::Map > getRowMap() const { return graph_->GetDomainMap();} - const RCP< const Xpetra::Map > getColMap() const { + const Teuchos::RCP > getComm() const { return graph_->GetComm(); } + const Teuchos::RCP > getRowMap() const { return graph_->GetDomainMap(); } + const RCP > getColMap() const { // For some GraphBases' this is a ColMap, in others it is a seperate map that is // only non-null in parallel. - Teuchos::RCP > map = graph_->GetImportMap(); - if(map.is_null()) map = graph_->GetDomainMap(); + Teuchos::RCP > map = graph_->GetImportMap(); + if (map.is_null()) map = graph_->GetDomainMap(); return map; } - size_t getLocalNumEntries() const { return graph_->GetNodeNumEdges();} - size_t getLocalNumRows() const { return getRowMap()->getLocalNumElements();} - size_t getLocalNumCols() const { return getColMap()->getLocalNumElements();} + size_t getLocalNumEntries() const { return graph_->GetNodeNumEdges(); } + size_t getLocalNumRows() const { return getRowMap()->getLocalNumElements(); } + size_t getLocalNumCols() const { return getColMap()->getLocalNumElements(); } - void getLocalRowView(lno_t LocalRow, Teuchos::ArrayView< const lno_t > &indices) const { - indices = graph_->getNeighborVertices(LocalRow); + void getLocalRowView(lno_t LocalRow, Teuchos::ArrayView &indices) const { + indices = graph_->getNeighborVertices(LocalRow); } - - /*! \brief Destructor */ - ~MueLuGraphBaseAdapter() { } + ~MueLuGraphBaseAdapter() {} /*! \brief Constructor for graph with no weights or coordinates. * \param ingraph the Epetra_CrsGraph, Tpetra::CrsGraph or Xpetra::CrsGraph @@ -135,8 +128,8 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { * one does because the user is obviously a Trilinos user. */ - MueLuGraphBaseAdapter(const RCP &ingraph, - int nVtxWeights=0, int nEdgeWeights=0); + MueLuGraphBaseAdapter(const RCP &ingraph, + int nVtxWeights = 0, int nEdgeWeights = 0); /*! \brief Provide a pointer to weights for the primary entity type. * \param val A pointer to the weights for index \c idx. @@ -227,8 +220,7 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { // TODO: Need to add option for columns or nonzeros? size_t getLocalNumVertices() const { return getLocalNumRows(); } - void getVertexIDsView(const gno_t *&ids) const - { + void getVertexIDsView(const gno_t *&ids) const { ids = NULL; if (getLocalNumVertices()) ids = getRowMap()->getLocalElementList().getRawPtr(); @@ -236,67 +228,57 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { size_t getLocalNumEdges() const { return getLocalNumEntries(); } - void getEdgesView(const offset_t *&offsets, const gno_t *&adjIds) const - { + void getEdgesView(const offset_t *&offsets, const gno_t *&adjIds) const { offsets = offs_.getRawPtr(); - adjIds = (getLocalNumEdges() ? adjids_.getRawPtr() : NULL); + adjIds = (getLocalNumEdges() ? adjids_.getRawPtr() : NULL); } - int getNumWeightsPerVertex() const { return nWeightsPerVertex_;} + int getNumWeightsPerVertex() const { return nWeightsPerVertex_; } void getVertexWeightsView(const scalar_t *&weights, int &stride, - int idx) const - { - if(idx<0 || idx >= nWeightsPerVertex_) - { + int idx) const { + if (idx < 0 || idx >= nWeightsPerVertex_) { std::ostringstream emsg; emsg << __FILE__ << ":" << __LINE__ << " Invalid vertex weight index " << idx << std::endl; throw std::runtime_error(emsg.str()); } - size_t length; vertexWeights_[idx].getStridedList(length, weights, stride); } - bool useDegreeAsVertexWeight(int idx) const {return vertexDegreeWeight_[idx];} + bool useDegreeAsVertexWeight(int idx) const { return vertexDegreeWeight_[idx]; } - int getNumWeightsPerEdge() const { return nWeightsPerEdge_;} + int getNumWeightsPerEdge() const { return nWeightsPerEdge_; } - void getEdgeWeightsView(const scalar_t *&weights, int &stride, int idx) const - { - if(idx<0 || idx >= nWeightsPerEdge_) - { + void getEdgeWeightsView(const scalar_t *&weights, int &stride, int idx) const { + if (idx < 0 || idx >= nWeightsPerEdge_) { std::ostringstream emsg; emsg << __FILE__ << ":" << __LINE__ << " Invalid edge weight index " << idx << std::endl; throw std::runtime_error(emsg.str()); } - size_t length; edgeWeights_[idx].getStridedList(length, weights, stride); } - template void applyPartitioningSolution(const User &in, User *&out, const Zoltan2::PartitioningSolution &solution) const { - TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument,"applyPartitionlingSolution not implemeneted"); -} + TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument, "applyPartitionlingSolution not implemeneted"); + } template void applyPartitioningSolution(const User &in, RCP &out, const Zoltan2::PartitioningSolution &solution) const { - TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument,"applyPartitionlingSolution not implemeneted"); + TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument, "applyPartitionlingSolution not implemeneted"); } - -private: - - RCP ingraph_; - RCP graph_; + private: + RCP ingraph_; + RCP graph_; RCP > comm_; ArrayRCP offs_; @@ -311,67 +293,69 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { int coordinateDim_; ArrayRCP > coords_; - }; - ///////////////////////////////////////////////////////////////// // Definitions ///////////////////////////////////////////////////////////////// template - MueLuGraphBaseAdapter::MueLuGraphBaseAdapter( - const RCP &ingraph, int nVtxWgts, int nEdgeWgts): - ingraph_(ingraph), graph_(), comm_() , offs_(), adjids_(), - nWeightsPerVertex_(nVtxWgts), vertexWeights_(), vertexDegreeWeight_(), - nWeightsPerEdge_(nEdgeWgts), edgeWeights_(), - coordinateDim_(0), coords_() -{ - typedef Zoltan2::StridedData input_t; +MueLuGraphBaseAdapter::MueLuGraphBaseAdapter( + const RCP &ingraph, int nVtxWgts, int nEdgeWgts) + : ingraph_(ingraph) + , graph_() + , comm_() + , offs_() + , adjids_() + , nWeightsPerVertex_(nVtxWgts) + , vertexWeights_() + , vertexDegreeWeight_() + , nWeightsPerEdge_(nEdgeWgts) + , edgeWeights_() + , coordinateDim_(0) + , coords_() { + typedef Zoltan2::StridedData input_t; graph_ = ingraph; - comm_ = getRowMap()->getComm(); - size_t nvtx = getLocalNumRows(); + comm_ = getRowMap()->getComm(); + size_t nvtx = getLocalNumRows(); size_t nedges = getLocalNumEntries(); // Unfortunately we have to copy the offsets and edge Ids // because edge Ids are not usually stored in vertex id order. size_t n = nvtx + 1; offs_.resize(n); - offset_t* offs = const_cast(offs_.getRawPtr()); - gno_t* adjids=0; - if(nedges > 0) { + offset_t *offs = const_cast(offs_.getRawPtr()); + gno_t *adjids = 0; + if (nedges > 0) { adjids_.resize(nedges); - adjids = const_cast(adjids_.getRawPtr()); + adjids = const_cast(adjids_.getRawPtr()); } offs[0] = 0; - for (size_t v=0; v < nvtx; v++){ + for (size_t v = 0; v < nvtx; v++) { ArrayView nbors; getLocalRowView(v, nbors); - offs[v+1] = offs[v] + nbors.size(); - for (offset_t e=offs[v], i=0; e < offs[v+1]; e++) { + offs[v + 1] = offs[v] + nbors.size(); + for (offset_t e = offs[v], i = 0; e < offs[v + 1]; e++) { adjids[e] = getColMap()->getGlobalElement(nbors[i++]); } } if (nWeightsPerVertex_ > 0) { vertexWeights_ = - arcp(new input_t[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); + arcp(new input_t[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); vertexDegreeWeight_ = - arcp(new bool[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); - for (int i=0; i < nWeightsPerVertex_; i++) + arcp(new bool[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); + for (int i = 0; i < nWeightsPerVertex_; i++) vertexDegreeWeight_[i] = false; } - - } //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setWeights( - const scalar_t *weightVal, int stride, int idx) -{ +void MueLuGraphBaseAdapter::setWeights( + const scalar_t *weightVal, int stride, int idx) { if (this->getPrimaryEntityType() == Zoltan2::GRAPH_VERTEX) setVertexWeights(weightVal, stride, idx); else @@ -380,29 +364,26 @@ template //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setVertexWeights( - const scalar_t *weightVal, int stride, int idx) -{ - typedef Zoltan2::StridedData input_t; +void MueLuGraphBaseAdapter::setVertexWeights( + const scalar_t *weightVal, int stride, int idx) { + typedef Zoltan2::StridedData input_t; - if(idx<0 || idx >= nWeightsPerVertex_) - { - std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid vertex weight index " << idx << std::endl; - throw std::runtime_error(emsg.str()); + if (idx < 0 || idx >= nWeightsPerVertex_) { + std::ostringstream emsg; + emsg << __FILE__ << ":" << __LINE__ + << " Invalid vertex weight index " << idx << std::endl; + throw std::runtime_error(emsg.str()); } size_t nvtx = getLocalNumVertices(); - ArrayRCP weightV(weightVal, 0, nvtx*stride, false); + ArrayRCP weightV(weightVal, 0, nvtx * stride, false); vertexWeights_[idx] = input_t(weightV, stride); } //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setWeightIsDegree( - int idx) -{ +void MueLuGraphBaseAdapter::setWeightIsDegree( + int idx) { if (this->getPrimaryEntityType() == Zoltan2::GRAPH_VERTEX) setVertexWeightIsDegree(idx); else { @@ -416,15 +397,13 @@ template //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setVertexWeightIsDegree( - int idx) -{ - if(idx<0 || idx >= nWeightsPerVertex_) - { - std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid vertex weight index " << idx << std::endl; - throw std::runtime_error(emsg.str()); +void MueLuGraphBaseAdapter::setVertexWeightIsDegree( + int idx) { + if (idx < 0 || idx >= nWeightsPerVertex_) { + std::ostringstream emsg; + emsg << __FILE__ << ":" << __LINE__ + << " Invalid vertex weight index " << idx << std::endl; + throw std::runtime_error(emsg.str()); } vertexDegreeWeight_[idx] = true; @@ -432,28 +411,24 @@ template //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setEdgeWeights( - const scalar_t *weightVal, int stride, int idx) -{ - typedef Zoltan2::StridedData input_t; +void MueLuGraphBaseAdapter::setEdgeWeights( + const scalar_t *weightVal, int stride, int idx) { + typedef Zoltan2::StridedData input_t; - if(idx<0 || idx >= nWeightsPerEdge_) - { - std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid edge weight index " << idx << std::endl; - throw std::runtime_error(emsg.str()); + if (idx < 0 || idx >= nWeightsPerEdge_) { + std::ostringstream emsg; + emsg << __FILE__ << ":" << __LINE__ + << " Invalid edge weight index " << idx << std::endl; + throw std::runtime_error(emsg.str()); } size_t nedges = getLocalNumEdges(); - ArrayRCP weightV(weightVal, 0, nedges*stride, false); + ArrayRCP weightV(weightVal, 0, nedges * stride, false); edgeWeights_[idx] = input_t(weightV, stride); } - } //namespace MueLu +#endif // MUELU_HAVE_ZOLTAN2 -#endif// MUELU_HAVE_ZOLTAN2 - #endif diff --git a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp index 879332ca0d44..4e2c5302bbd1 100644 --- a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_HYBRIDAGGREGATIONFACTORY_DECL_HPP_ #define MUELU_HYBRIDAGGREGATIONFACTORY_DECL_HPP_ - #include #include "MueLu_ConfigDefs.hpp" @@ -129,59 +128,58 @@ namespace MueLu { | Aggregates | HybridAggregationFactory | Container class with aggregation information. See also Aggregates. */ - template - class HybridAggregationFactory : public SingleLevelFactoryBase { +template +class HybridAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_HYBRIDAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - HybridAggregationFactory(); + public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~HybridAggregationFactory() { } + //! Constructor. + HybridAggregationFactory(); - RCP GetValidParameterList() const; + //! Destructor. + virtual ~HybridAggregationFactory() {} - //@} + RCP GetValidParameterList() const; - //! Input - //@{ + //@} - void DeclareInput(Level ¤tLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level& currentLevel) const; - //! @name Build methods. - //@{ + //@} - /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + //! @name Build methods. + //@{ - /*! @brief Specifically build aggregates along interfaces */ - void BuildInterfaceAggregates(Level& currentLevel, RCP aggregates, - std::vector& aggStat, LO& numNonAggregatedNodes, - Array coarseRate) const; + /*! @brief Build aggregates. */ + void Build(Level& currentLevel) const; - //@} + /*! @brief Specifically build aggregates along interfaces */ + void BuildInterfaceAggregates(Level& currentLevel, RCP aggregates, + std::vector& aggStat, LO& numNonAggregatedNodes, + Array coarseRate) const; - private: + //@} - //! aggregation algorithms - // will be filled in Build routine - mutable std::vector > > algos_; + private: + //! aggregation algorithms + // will be filled in Build routine + mutable std::vector > > algos_; - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - }; // class HybridAggregationFactory +}; // class HybridAggregationFactory -} +} // namespace MueLu #define MUELU_HYBRIDAGGREGATIONFACTORY_SHORT #endif /* MUELU_HYBRIDAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp index d0ac7bcc7496..84a37851d183 100644 --- a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp @@ -77,265 +77,258 @@ #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" - namespace MueLu { - template - HybridAggregationFactory:: - HybridAggregationFactory() : bDefinitionPhase_(true) - { } +template +HybridAggregationFactory:: + HybridAggregationFactory() + : bDefinitionPhase_(true) {} - template - RCP HybridAggregationFactory:: - GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP HybridAggregationFactory:: + GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - // From UncoupledAggregationFactory - SET_VALID_ENTRY("aggregation: max agg size"); - SET_VALID_ENTRY("aggregation: min agg size"); - SET_VALID_ENTRY("aggregation: max selected neighbors"); - SET_VALID_ENTRY("aggregation: ordering"); - validParamList->getEntry("aggregation: ordering").setValidator( - rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); - SET_VALID_ENTRY("aggregation: enable phase 1"); - SET_VALID_ENTRY("aggregation: enable phase 2a"); - SET_VALID_ENTRY("aggregation: enable phase 2b"); - SET_VALID_ENTRY("aggregation: enable phase 3"); - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: match ML phase2a"); - SET_VALID_ENTRY("aggregation: phase2a agg factor"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - - // From StructuredAggregationFactory - SET_VALID_ENTRY("aggregation: coarsening rate"); - SET_VALID_ENTRY("aggregation: coarsening order"); - SET_VALID_ENTRY("aggregation: number of spatial dimensions"); - - // From HybridAggregationFactory - SET_VALID_ENTRY("aggregation: use interface aggregation"); -#undef SET_VALID_ENTRY - - /* From UncoupledAggregation */ - // general variables needed in AggregationFactory - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - // special variables necessary for OnePtAggregationAlgorithm - validParamList->set ("OnePt aggregate map name", "", - "Name of input map for single node aggregates. (default='')"); - validParamList->set ("OnePt aggregate map factory", "", - "Generating factory of (DOF) map for single node aggregates."); - - // InterfaceAggregation parameters - validParamList->set ("Interface aggregate map name", "", - "Name of input map for interface aggregates. (default='')"); - validParamList->set ("Interface aggregate map factory", "", - "Generating factory of (DOF) map for interface aggregates."); - validParamList->set > ("interfacesDimensions", Teuchos::null, - "Describes the dimensions of all the interfaces on this rank."); - validParamList->set > ("nodeOnInterface", Teuchos::null, - "List the LIDs of the nodes on any interface."); - - /* From StructuredAggregation */ - // general variables needed in AggregationFactory - validParamList->set >("numDimensions", Teuchos::null, - "Number of spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("lNodesPerDim", Teuchos::null, - "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory."); - - - // Hybrid Aggregation Params - validParamList->set > ("aggregationRegionType", Teuchos::null, - "Type of aggregation to use on the region (\"structured\" or \"uncoupled\")"); - - return validParamList; + // From UncoupledAggregationFactory + SET_VALID_ENTRY("aggregation: max agg size"); + SET_VALID_ENTRY("aggregation: min agg size"); + SET_VALID_ENTRY("aggregation: max selected neighbors"); + SET_VALID_ENTRY("aggregation: ordering"); + validParamList->getEntry("aggregation: ordering").setValidator(rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); + SET_VALID_ENTRY("aggregation: enable phase 1"); + SET_VALID_ENTRY("aggregation: enable phase 2a"); + SET_VALID_ENTRY("aggregation: enable phase 2b"); + SET_VALID_ENTRY("aggregation: enable phase 3"); + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: match ML phase2a"); + SET_VALID_ENTRY("aggregation: phase2a agg factor"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + + // From StructuredAggregationFactory + SET_VALID_ENTRY("aggregation: coarsening rate"); + SET_VALID_ENTRY("aggregation: coarsening order"); + SET_VALID_ENTRY("aggregation: number of spatial dimensions"); + + // From HybridAggregationFactory + SET_VALID_ENTRY("aggregation: use interface aggregation"); +#undef SET_VALID_ENTRY + + /* From UncoupledAggregation */ + // general variables needed in AggregationFactory + validParamList->set >("Graph", null, "Generating factory of the graph"); + validParamList->set >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); + // special variables necessary for OnePtAggregationAlgorithm + validParamList->set("OnePt aggregate map name", "", + "Name of input map for single node aggregates. (default='')"); + validParamList->set("OnePt aggregate map factory", "", + "Generating factory of (DOF) map for single node aggregates."); + + // InterfaceAggregation parameters + validParamList->set("Interface aggregate map name", "", + "Name of input map for interface aggregates. (default='')"); + validParamList->set("Interface aggregate map factory", "", + "Generating factory of (DOF) map for interface aggregates."); + validParamList->set >("interfacesDimensions", Teuchos::null, + "Describes the dimensions of all the interfaces on this rank."); + validParamList->set >("nodeOnInterface", Teuchos::null, + "List the LIDs of the nodes on any interface."); + + /* From StructuredAggregation */ + // general variables needed in AggregationFactory + validParamList->set >("numDimensions", Teuchos::null, + "Number of spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set >("lNodesPerDim", Teuchos::null, + "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory."); + + // Hybrid Aggregation Params + validParamList->set >("aggregationRegionType", Teuchos::null, + "Type of aggregation to use on the region (\"structured\" or \"uncoupled\")"); + + return validParamList; +} + +template +void HybridAggregationFactory:: + DeclareInput(Level& currentLevel) const { + Input(currentLevel, "Graph"); + + ParameterList pL = GetParameterList(); + + /* StructuredAggregation */ + + // Request the local number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("aggregationRegionType", NoFactory::get())) { + currentLevel.DeclareInput("aggregationRegionType", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("aggregationRegionType", NoFactory::get()), + Exceptions::RuntimeError, + "Aggregation region type was not provided by the user!"); + } + if (currentLevel.IsAvailable("numDimensions", NoFactory::get())) { + currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), + Exceptions::RuntimeError, + "numDimensions was not provided by the user on level0!"); + } + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); + } + } else { + Input(currentLevel, "aggregationRegionType"); + Input(currentLevel, "numDimensions"); + Input(currentLevel, "lNodesPerDim"); } - template - void HybridAggregationFactory:: - DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - - ParameterList pL = GetParameterList(); - - + /* UncoupledAggregation */ + Input(currentLevel, "DofsPerNode"); - /* StructuredAggregation */ - - // Request the local number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("aggregationRegionType", NoFactory::get())) { - currentLevel.DeclareInput("aggregationRegionType", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("aggregationRegionType",NoFactory::get()), - Exceptions::RuntimeError, - "Aggregation region type was not provided by the user!"); - } - if(currentLevel.IsAvailable("numDimensions", NoFactory::get())) { - currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); + // request special data necessary for InterfaceAggregation + if (pL.get("aggregation: use interface aggregation") == true) { + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("interfacesDimensions", NoFactory::get())) { + currentLevel.DeclareInput("interfacesDimensions", NoFactory::get(), this); } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), + TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("interfacesDimensions", NoFactory::get()), Exceptions::RuntimeError, - "numDimensions was not provided by the user on level0!"); + "interfacesDimensions was not provided by the user on level0!"); } - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); + if (currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { + currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); + "nodeOnInterface was not provided by the user on level0!"); } } else { - Input(currentLevel, "aggregationRegionType"); - Input(currentLevel, "numDimensions"); - Input(currentLevel, "lNodesPerDim"); - } - - - - /* UncoupledAggregation */ - Input(currentLevel, "DofsPerNode"); - - // request special data necessary for InterfaceAggregation - if (pL.get("aggregation: use interface aggregation") == true){ - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("interfacesDimensions", NoFactory::get())) { - currentLevel.DeclareInput("interfacesDimensions", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("interfacesDimensions", NoFactory::get()), - Exceptions::RuntimeError, - "interfacesDimensions was not provided by the user on level0!"); - } - if(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { - currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), - Exceptions::RuntimeError, - "nodeOnInterface was not provided by the user on level0!"); - } - } else { - Input(currentLevel, "interfacesDimensions"); - Input(currentLevel, "nodeOnInterface"); - } + Input(currentLevel, "interfacesDimensions"); + Input(currentLevel, "nodeOnInterface"); } + } - // request special data necessary for OnePtAggregationAlgorithm - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - if (mapOnePtName.length() > 0) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); - } + // request special data necessary for OnePtAggregationAlgorithm + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + if (mapOnePtName.length() > 0) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); } - } // DeclareInput() + } +} // DeclareInput() + +template +void HybridAggregationFactory:: + Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP out; + if (const char* dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - template - void HybridAggregationFactory:: - Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); + *out << "Entering hybrid aggregation" << std::endl; - RCP out; - if(const char* dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - *out << "Entering hybrid aggregation" << std::endl; + if (pL.get("aggregation: max agg size") == -1) + pL.set("aggregation: max agg size", INT_MAX); - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed + // define aggregation algorithms + RCP graphFact = GetFactory("Graph"); - if (pL.get("aggregation: max agg size") == -1) - pL.set("aggregation: max agg size", INT_MAX); + // General problem informations are gathered from data stored in the problem matix. + RCP graph = Get >(currentLevel, "Graph"); + RCP fineMap = graph->GetDomainMap(); + const int myRank = fineMap->getComm()->getRank(); + const int numRanks = fineMap->getComm()->getSize(); - // define aggregation algorithms - RCP graphFact = GetFactory("Graph"); + out->setProcRankAndSize(graph->GetImportMap()->getComm()->getRank(), + graph->GetImportMap()->getComm()->getSize()); - // General problem informations are gathered from data stored in the problem matix. - RCP graph = Get< RCP >(currentLevel, "Graph"); - RCP fineMap = graph->GetDomainMap(); - const int myRank = fineMap->getComm()->getRank(); - const int numRanks = fineMap->getComm()->getSize(); + // Build aggregates + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("HB"); - out->setProcRankAndSize(graph->GetImportMap()->getComm()->getRank(), - graph->GetImportMap()->getComm()->getSize()); + // construct aggStat information + const LO numRows = graph->GetNodeNumVertices(); + std::vector aggStat(numRows, READY); - // Build aggregates - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("HB"); + // Get aggregation type for region + std::string regionType; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated factory. + regionType = currentLevel.Get("aggregationRegionType", NoFactory::get()); + } else { + // On level > 0, data is provided directly by generating factories. + regionType = Get(currentLevel, "aggregationRegionType"); + } - // construct aggStat information - const LO numRows = graph->GetNodeNumVertices(); - std::vector aggStat(numRows, READY); + int numDimensions = 0; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated factory. + numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + } else { + // On level > 0, data is provided directly by generating factories. + numDimensions = Get(currentLevel, "numDimensions"); + } - // Get aggregation type for region - std::string regionType; - if(currentLevel.GetLevelID() == 0) { + // Get the coarsening rate (potentially used for both structured and uncoupled aggregation if interface) + std::string coarseningRate = pL.get("aggregation: coarsening rate"); + Teuchos::Array coarseRate; + try { + coarseRate = Teuchos::fromStringToArray(coarseningRate); + } catch (const Teuchos::InvalidArrayStringRepresentation& e) { + GetOStream(Errors, -1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " + << std::endl; + throw e; + } + TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), + Exceptions::RuntimeError, + "\"aggregation: coarsening rate\" must have at least as many" + " components as the number of spatial dimensions in the problem."); + + algos_.clear(); + LO numNonAggregatedNodes = numRows; + if (regionType == "structured") { + // Add AggregationStructuredAlgorithm + algos_.push_back(rcp(new AggregationStructuredAlgorithm(graphFact))); + + // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to + // obtain a nodeMap. + const int interpolationOrder = pL.get("aggregation: coarsening order"); + Array lFineNodesPerDir(3); + if (currentLevel.GetLevelID() == 0) { // On level 0, data is provided by applications and has no associated factory. - regionType = currentLevel.Get("aggregationRegionType", NoFactory::get()); + lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); } else { // On level > 0, data is provided directly by generating factories. - regionType = Get< std::string >(currentLevel, "aggregationRegionType"); + lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); } - int numDimensions = 0; - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); - } else { - // On level > 0, data is provided directly by generating factories. - numDimensions = Get(currentLevel, "numDimensions"); + // Set lFineNodesPerDir to 1 for directions beyond numDimensions + for (int dim = numDimensions; dim < 3; ++dim) { + lFineNodesPerDir[dim] = 1; } - // Get the coarsening rate (potentially used for both structured and uncoupled aggregation if interface) - std::string coarseningRate = pL.get("aggregation: coarsening rate"); - Teuchos::Array coarseRate; - try { - coarseRate = Teuchos::fromStringToArray(coarseningRate); - } catch(const Teuchos::InvalidArrayStringRepresentation& e) { - GetOStream(Errors,-1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " - << std::endl; - throw e; - } - TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), - Exceptions::RuntimeError, - "\"aggregation: coarsening rate\" must have at least as many" - " components as the number of spatial dimensions in the problem."); - - algos_.clear(); - LO numNonAggregatedNodes = numRows; - if (regionType == "structured") { - // Add AggregationStructuredAlgorithm - algos_.push_back(rcp(new AggregationStructuredAlgorithm(graphFact))); - - // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to - // obtain a nodeMap. - const int interpolationOrder = pL.get("aggregation: coarsening order"); - Array lFineNodesPerDir(3); - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); - } else { - // On level > 0, data is provided directly by generating factories. - lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); - } - - // Set lFineNodesPerDir to 1 for directions beyond numDimensions - for(int dim = numDimensions; dim < 3; ++dim) { - lFineNodesPerDir[dim] = 1; - } - - // Now that we have extracted info from the level, create the IndexManager - RCP > geoData; - geoData = rcp(new MueLu::UncoupledIndexManager(fineMap->getComm(), + // Now that we have extracted info from the level, create the IndexManager + RCP > geoData; + geoData = rcp(new MueLu::UncoupledIndexManager(fineMap->getComm(), false, numDimensions, interpolationOrder, @@ -345,241 +338,245 @@ namespace MueLu { lFineNodesPerDir, coarseRate, false)); - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() - != static_cast(geoData->getNumLocalFineNodes()), - Exceptions::RuntimeError, - "The local number of elements in the graph's map is not equal to " - "the number of nodes given by: lNodesPerDim!"); - - aggregates->SetIndexManager(geoData); - aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); - - Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); - - } // end structured aggregation setup - - if (regionType == "uncoupled"){ - // Add unstructred aggregation phases - algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); - if (pL.get("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); - if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm (graphFact))); - - *out << " Build interface aggregates" << std::endl; - // interface - if (pL.get("aggregation: use interface aggregation") == true) { - BuildInterfaceAggregates(currentLevel, aggregates, aggStat, numNonAggregatedNodes, - coarseRate); - } + TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() != static_cast(geoData->getNumLocalFineNodes()), + Exceptions::RuntimeError, + "The local number of elements in the graph's map is not equal to " + "the number of nodes given by: lNodesPerDim!"); + + aggregates->SetIndexManager(geoData); + aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); + + Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); + + } // end structured aggregation setup + + if (regionType == "uncoupled") { + // Add unstructred aggregation phases + algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 1") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm(graphFact))); + if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 3") == true) algos_.push_back(rcp(new AggregationPhase3Algorithm(graphFact))); + + *out << " Build interface aggregates" << std::endl; + // interface + if (pL.get("aggregation: use interface aggregation") == true) { + BuildInterfaceAggregates(currentLevel, aggregates, aggStat, numNonAggregatedNodes, + coarseRate); + } - *out << "Treat Dirichlet BC" << std::endl; - // Dirichlet boundary - ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); - if (dirichletBoundaryMap != Teuchos::null) - for (LO i = 0; i < numRows; i++) - if (dirichletBoundaryMap[i] == true) - aggStat[i] = BOUNDARY; - - // OnePt aggregation - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - RCP OnePtMap = Teuchos::null; - if (mapOnePtName.length()) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); - } - } + *out << "Treat Dirichlet BC" << std::endl; + // Dirichlet boundary + ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); + if (dirichletBoundaryMap != Teuchos::null) + for (LO i = 0; i < numRows; i++) + if (dirichletBoundaryMap[i] == true) + aggStat[i] = BOUNDARY; - LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); - GO indexBase = graph->GetDomainMap()->getIndexBase(); - if (OnePtMap != Teuchos::null) { - for (LO i = 0; i < numRows; i++) { - // reconstruct global row id (FIXME only works for contiguous maps) - GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; - for (LO kr = 0; kr < nDofsPerNode; kr++) - if (OnePtMap->isNodeGlobalElement(grid + kr)) - aggStat[i] = ONEPT; - } + // OnePt aggregation + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + RCP OnePtMap = Teuchos::null; + if (mapOnePtName.length()) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); } + } - // Create a fake lCoarseNodesPerDir for CoordinatesTranferFactory - Array lCoarseNodesPerDir(3,-1); - Set(currentLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir); - } // end uncoupled aggregation setup - - aggregates->AggregatesCrossProcessors(false); // No coupled aggregation + LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); + GO indexBase = graph->GetDomainMap()->getIndexBase(); + if (OnePtMap != Teuchos::null) { + for (LO i = 0; i < numRows; i++) { + // reconstruct global row id (FIXME only works for contiguous maps) + GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase; + for (LO kr = 0; kr < nDofsPerNode; kr++) + if (OnePtMap->isNodeGlobalElement(grid + kr)) + aggStat[i] = ONEPT; + } + } - *out << "Run all the algorithms on the local rank" << std::endl; - for (size_t a = 0; a < algos_.size(); a++) { - std::string phase = algos_[a]->description(); - SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); - *out << regionType <<" | Executing phase " << a << std::endl; + // Create a fake lCoarseNodesPerDir for CoordinatesTranferFactory + Array lCoarseNodesPerDir(3, -1); + Set(currentLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir); + } // end uncoupled aggregation setup - int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); - algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); - algos_[a]->SetProcRankVerbose(oldRank); - *out << regionType <<" | Done Executing phase " << a << std::endl; - } + aggregates->AggregatesCrossProcessors(false); // No coupled aggregation - *out << "Compute statistics on aggregates" << std::endl; - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); + *out << "Run all the algorithms on the local rank" << std::endl; + for (size_t a = 0; a < algos_.size(); a++) { + std::string phase = algos_[a]->description(); + SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); + *out << regionType << " | Executing phase " << a << std::endl; - Set(currentLevel, "Aggregates", aggregates); - Set(currentLevel, "numDimensions", numDimensions); - Set(currentLevel, "aggregationRegionTypeCoarse", regionType); + int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); + algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); + algos_[a]->SetProcRankVerbose(oldRank); + *out << regionType << " | Done Executing phase " << a << std::endl; + } - GetOStream(Statistics1) << aggregates->description() << std::endl; - *out << "HybridAggregation done!" << std::endl; + *out << "Compute statistics on aggregates" << std::endl; + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + + Set(currentLevel, "Aggregates", aggregates); + Set(currentLevel, "numDimensions", numDimensions); + Set(currentLevel, "aggregationRegionTypeCoarse", regionType); + + GetOStream(Statistics1) << aggregates->description() << std::endl; + *out << "HybridAggregation done!" << std::endl; +} + +template +void HybridAggregationFactory:: + BuildInterfaceAggregates(Level& currentLevel, RCP aggregates, + std::vector& aggStat, LO& numNonAggregatedNodes, + Array coarseRate) const { + FactoryMonitor m(*this, "BuildInterfaceAggregates", currentLevel); + + RCP out; + if (const char* dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - template - void HybridAggregationFactory:: - BuildInterfaceAggregates(Level& currentLevel, RCP aggregates, - std::vector& aggStat, LO& numNonAggregatedNodes, - Array coarseRate) const { - FactoryMonitor m(*this, "BuildInterfaceAggregates", currentLevel); - - RCP out; - if(const char* dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + // Extract and format input data for algo + if (coarseRate.size() == 1) { + coarseRate.resize(3, coarseRate[0]); + } + ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates->GetProcWinner()->getDataNonConst(0); + Array interfacesDimensions = Get >(currentLevel, "interfacesDimensions"); + Array nodesOnInterfaces = Get >(currentLevel, "nodeOnInterface"); + const int numInterfaces = interfacesDimensions.size() / 3; + const int myRank = aggregates->GetMap()->getComm()->getRank(); + + // Create coarse level container to gather data on the fly + Array coarseInterfacesDimensions(interfacesDimensions.size()); + Array nodesOnCoarseInterfaces; + { // Scoping the temporary variables... + LO endRate, totalNumCoarseNodes = 0, numCoarseNodes; + for (int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { + numCoarseNodes = 1; + for (int dim = 0; dim < 3; ++dim) { + endRate = (interfacesDimensions[3 * interfaceIdx + dim] - 1) % coarseRate[dim]; + if (interfacesDimensions[3 * interfaceIdx + dim] == 1) { + coarseInterfacesDimensions[3 * interfaceIdx + dim] = 1; + } else { + coarseInterfacesDimensions[3 * interfaceIdx + dim] = (interfacesDimensions[3 * interfaceIdx + dim] - 1) / coarseRate[dim] + 2; + if (endRate == 0) { + coarseInterfacesDimensions[3 * interfaceIdx + dim]--; + } + } + numCoarseNodes *= coarseInterfacesDimensions[3 * interfaceIdx + dim]; + } + totalNumCoarseNodes += numCoarseNodes; } + nodesOnCoarseInterfaces.resize(totalNumCoarseNodes, -1); + } - // Extract and format input data for algo - if(coarseRate.size() == 1) {coarseRate.resize(3, coarseRate[0]);} - ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); - Array interfacesDimensions = Get >(currentLevel, "interfacesDimensions"); - Array nodesOnInterfaces = Get >(currentLevel, "nodeOnInterface"); - const int numInterfaces = interfacesDimensions.size() / 3; - const int myRank = aggregates->GetMap()->getComm()->getRank(); - - // Create coarse level container to gather data on the fly - Array coarseInterfacesDimensions(interfacesDimensions.size()); - Array nodesOnCoarseInterfaces; - { // Scoping the temporary variables... - LO endRate, totalNumCoarseNodes = 0, numCoarseNodes; - for(int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { - numCoarseNodes = 1; - for(int dim = 0; dim < 3; ++dim) { - endRate = (interfacesDimensions[3*interfaceIdx + dim] - 1) % coarseRate[dim]; - if(interfacesDimensions[3*interfaceIdx + dim] == 1) { - coarseInterfacesDimensions[3*interfaceIdx + dim] = 1; - } else { - coarseInterfacesDimensions[3*interfaceIdx + dim] - = (interfacesDimensions[3*interfaceIdx+dim]-1) / coarseRate[dim] + 2; - if(endRate==0){ coarseInterfacesDimensions[3*interfaceIdx + dim]--;} - } - numCoarseNodes *= coarseInterfacesDimensions[3*interfaceIdx + dim]; + Array endRate(3); + LO interfaceOffset = 0, aggregateCount = 0, coarseNodeCount = 0; + for (int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { + ArrayView fineNodesPerDim = interfacesDimensions(3 * interfaceIdx, 3); + ArrayView coarseNodesPerDim = coarseInterfacesDimensions(3 * interfaceIdx, 3); + LO numInterfaceNodes = 1, numCoarseNodes = 1; + for (int dim = 0; dim < 3; ++dim) { + numInterfaceNodes *= fineNodesPerDim[dim]; + numCoarseNodes *= coarseNodesPerDim[dim]; + endRate[dim] = (fineNodesPerDim[dim] - 1) % coarseRate[dim]; + } + ArrayView interfaceNodes = nodesOnInterfaces(interfaceOffset, numInterfaceNodes); + + interfaceOffset += numInterfaceNodes; + + LO rem, rate, fineNodeIdx; + Array nodeIJK(3), coarseIJK(3), rootIJK(3); + // First find treat coarse nodes as they generate the aggregate IDs + // and they might be repeated on multiple interfaces (think corners and edges). + for (LO coarseNodeIdx = 0; coarseNodeIdx < numCoarseNodes; ++coarseNodeIdx) { + coarseIJK[2] = coarseNodeIdx / (coarseNodesPerDim[0] * coarseNodesPerDim[1]); + rem = coarseNodeIdx % (coarseNodesPerDim[0] * coarseNodesPerDim[1]); + coarseIJK[1] = rem / coarseNodesPerDim[0]; + coarseIJK[0] = rem % coarseNodesPerDim[0]; + + for (LO dim = 0; dim < 3; ++dim) { + if (coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { + nodeIJK[dim] = fineNodesPerDim[dim] - 1; + } else { + nodeIJK[dim] = coarseIJK[dim] * coarseRate[dim]; } - totalNumCoarseNodes += numCoarseNodes; } - nodesOnCoarseInterfaces.resize(totalNumCoarseNodes, -1); + fineNodeIdx = (nodeIJK[2] * fineNodesPerDim[1] + nodeIJK[1]) * fineNodesPerDim[0] + nodeIJK[0]; + + if (aggStat[interfaceNodes[fineNodeIdx]] == READY) { + vertex2AggId[interfaceNodes[fineNodeIdx]] = aggregateCount; + procWinner[interfaceNodes[fineNodeIdx]] = myRank; + aggStat[interfaceNodes[fineNodeIdx]] = AGGREGATED; + ++aggregateCount; + --numNonAggregatedNodes; + } + nodesOnCoarseInterfaces[coarseNodeCount] = vertex2AggId[interfaceNodes[fineNodeIdx]]; + ++coarseNodeCount; } - Array endRate(3); - LO interfaceOffset = 0, aggregateCount = 0, coarseNodeCount = 0; - for(int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { - ArrayView fineNodesPerDim = interfacesDimensions(3*interfaceIdx, 3); - ArrayView coarseNodesPerDim = coarseInterfacesDimensions(3*interfaceIdx, 3); - LO numInterfaceNodes = 1, numCoarseNodes = 1; - for(int dim = 0; dim < 3; ++dim) { - numInterfaceNodes *= fineNodesPerDim[dim]; - numCoarseNodes *= coarseNodesPerDim[dim]; - endRate[dim] = (fineNodesPerDim[dim]-1) % coarseRate[dim]; + // Now loop over all the node on the interface + // skip the coarse nodes as they are already aggregated + // and find the appropriate aggregate ID for the fine nodes. + for (LO nodeIdx = 0; nodeIdx < numInterfaceNodes; ++nodeIdx) { + // If the node is already aggregated skip it! + if (aggStat[interfaceNodes[nodeIdx]] == AGGREGATED) { + continue; } - ArrayView interfaceNodes = nodesOnInterfaces(interfaceOffset, numInterfaceNodes); - - interfaceOffset += numInterfaceNodes; - - LO rem, rate, fineNodeIdx; - Array nodeIJK(3), coarseIJK(3), rootIJK(3); - // First find treat coarse nodes as they generate the aggregate IDs - // and they might be repeated on multiple interfaces (think corners and edges). - for(LO coarseNodeIdx = 0; coarseNodeIdx < numCoarseNodes; ++coarseNodeIdx) { - coarseIJK[2] = coarseNodeIdx / (coarseNodesPerDim[0]*coarseNodesPerDim[1]); - rem = coarseNodeIdx % (coarseNodesPerDim[0]*coarseNodesPerDim[1]); - coarseIJK[1] = rem / coarseNodesPerDim[0]; - coarseIJK[0] = rem % coarseNodesPerDim[0]; - - for(LO dim = 0; dim < 3; ++dim) { - if(coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { - nodeIJK[dim] = fineNodesPerDim[dim] - 1; - } else { - nodeIJK[dim] = coarseIJK[dim]*coarseRate[dim]; - } + + nodeIJK[2] = nodeIdx / (fineNodesPerDim[0] * fineNodesPerDim[1]); + rem = nodeIdx % (fineNodesPerDim[0] * fineNodesPerDim[1]); + nodeIJK[1] = rem / fineNodesPerDim[0]; + nodeIJK[0] = rem % fineNodesPerDim[0]; + + for (int dim = 0; dim < 3; ++dim) { + coarseIJK[dim] = nodeIJK[dim] / coarseRate[dim]; + rem = nodeIJK[dim] % coarseRate[dim]; + if (nodeIJK[dim] < fineNodesPerDim[dim] - endRate[dim]) { + rate = coarseRate[dim]; + } else { + rate = endRate[dim]; } - fineNodeIdx = (nodeIJK[2]*fineNodesPerDim[1] + nodeIJK[1])*fineNodesPerDim[0] + nodeIJK[0]; - - if(aggStat[interfaceNodes[fineNodeIdx]] == READY) { - vertex2AggId[interfaceNodes[fineNodeIdx]] = aggregateCount; - procWinner[interfaceNodes[fineNodeIdx]] = myRank; - aggStat[interfaceNodes[fineNodeIdx]] = AGGREGATED; - ++aggregateCount; - --numNonAggregatedNodes; + if (rem > (rate / 2)) { + ++coarseIJK[dim]; } - nodesOnCoarseInterfaces[coarseNodeCount] = vertex2AggId[interfaceNodes[fineNodeIdx]]; - ++coarseNodeCount; } - // Now loop over all the node on the interface - // skip the coarse nodes as they are already aggregated - // and find the appropriate aggregate ID for the fine nodes. - for(LO nodeIdx = 0; nodeIdx < numInterfaceNodes; ++nodeIdx) { - - // If the node is already aggregated skip it! - if(aggStat[interfaceNodes[nodeIdx]] == AGGREGATED) {continue;} - - nodeIJK[2] = nodeIdx / (fineNodesPerDim[0]*fineNodesPerDim[1]); - rem = nodeIdx % (fineNodesPerDim[0]*fineNodesPerDim[1]); - nodeIJK[1] = rem / fineNodesPerDim[0]; - nodeIJK[0] = rem % fineNodesPerDim[0]; - - for(int dim = 0; dim < 3; ++dim) { - coarseIJK[dim] = nodeIJK[dim] / coarseRate[dim]; - rem = nodeIJK[dim] % coarseRate[dim]; - if(nodeIJK[dim] < fineNodesPerDim[dim] - endRate[dim]) { - rate = coarseRate[dim]; - } else { - rate = endRate[dim]; - } - if(rem > (rate / 2)) {++coarseIJK[dim];} - } - - for(LO dim = 0; dim < 3; ++dim) { - if(coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { - nodeIJK[dim] = fineNodesPerDim[dim] - 1; - } else { - nodeIJK[dim] = coarseIJK[dim]*coarseRate[dim]; - } + for (LO dim = 0; dim < 3; ++dim) { + if (coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { + nodeIJK[dim] = fineNodesPerDim[dim] - 1; + } else { + nodeIJK[dim] = coarseIJK[dim] * coarseRate[dim]; } - fineNodeIdx = (nodeIJK[2]*fineNodesPerDim[1] + nodeIJK[1])*fineNodesPerDim[0] + nodeIJK[0]; - - vertex2AggId[interfaceNodes[nodeIdx]] = vertex2AggId[interfaceNodes[fineNodeIdx]]; - procWinner[interfaceNodes[nodeIdx]] = myRank; - aggStat[interfaceNodes[nodeIdx]] = AGGREGATED; - --numNonAggregatedNodes; - } // Loop over interface nodes - } // Loop over the interfaces + } + fineNodeIdx = (nodeIJK[2] * fineNodesPerDim[1] + nodeIJK[1]) * fineNodesPerDim[0] + nodeIJK[0]; - // Update aggregates information before subsequent aggregation algorithms - aggregates->SetNumAggregates(aggregateCount); + vertex2AggId[interfaceNodes[nodeIdx]] = vertex2AggId[interfaceNodes[fineNodeIdx]]; + procWinner[interfaceNodes[nodeIdx]] = myRank; + aggStat[interfaceNodes[nodeIdx]] = AGGREGATED; + --numNonAggregatedNodes; + } // Loop over interface nodes + } // Loop over the interfaces - // Set coarse data for next level - Set(currentLevel, "coarseInterfacesDimensions", coarseInterfacesDimensions); - Set(currentLevel, "nodeOnCoarseInterface", nodesOnCoarseInterfaces); + // Update aggregates information before subsequent aggregation algorithms + aggregates->SetNumAggregates(aggregateCount); - } // BuildInterfaceAggregates() + // Set coarse data for next level + Set(currentLevel, "coarseInterfacesDimensions", coarseInterfacesDimensions); + Set(currentLevel, "nodeOnCoarseInterface", nodesOnCoarseInterfaces); -} //namespace MueLu +} // BuildInterfaceAggregates() +} //namespace MueLu #endif /* MUELU_HYBRIDAGGREGATIONFACTORY_DEF_HPP */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp index 7cca1e45f86e..d296ac70d5e3 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp @@ -58,7 +58,7 @@ namespace MueLu { - /*! +/*! @class AmalgamationFactory @brief AmalgamationFactory for subblocks of strided map based amalgamation data @@ -71,39 +71,38 @@ namespace MueLu { */ - template - class AmalgamationFactory : public SingleLevelFactoryBase { +template +class AmalgamationFactory : public SingleLevelFactoryBase { #undef MUELU_AMALGAMATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor + AmalgamationFactory() = default; - //! Constructor - AmalgamationFactory() = default; + //! Destructor + virtual ~AmalgamationFactory() = default; - //! Destructor - virtual ~AmalgamationFactory() = default; + RCP GetValidParameterList() const override; - RCP GetValidParameterList() const override; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level& currentLevel) const override; - void DeclareInput(Level ¤tLevel) const override; + //@} - //@} + void Build(Level& currentLevel) const override; - void Build(Level ¤tLevel) const override; - - /*! @brief Translate global (row/column) id to global amalgamation block id + /*! @brief Translate global (row/column) id to global amalgamation block id * * @note Assume that the node map has the same \c indexBase as the dof map * @@ -112,22 +111,21 @@ namespace MueLu { * @param offset (GlobalOrdinal): global offset for dofs (stored in strided map, default = 0) * @param indexBase (GlobalOrdinal): indexBase for DOF map (and node map, default = 0) */ - static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset /*= 0*/, - const GlobalOrdinal indexBase/* = 0*/); + static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset /*= 0*/, + const GlobalOrdinal indexBase /* = 0*/); - /*! @brief Method to create merged map for systems of PDEs. + /*! @brief Method to create merged map for systems of PDEs. * * @param sourceMap (const Map&): source map with dofs which shall be amalgamated to a node map * @param A (const Matrix&): operator A (matrix) with striding information (if available) * @param amalgamatedMap (const Map&): amalgamated node based map * @param translation (Array&): array storing local node ids given local dof ids (needed in CoalesceDropFactory) */ - static void AmalgamateMap(const Map& sourceMap, const Matrix& A, RCP& amalgamatedMap, Array& translation); - + static void AmalgamateMap(const Map& sourceMap, const Matrix& A, RCP& amalgamatedMap, Array& translation); - }; //class AmalgamationFactory +}; //class AmalgamationFactory -} //namespace MueLu +} //namespace MueLu #define MUELU_AMALGAMATIONFACTORY_SHORT -#endif // MUELU_AMALGAMATIONFACTORY_DECL_HPP +#endif // MUELU_AMALGAMATIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp index 60baf9efe0fe..da7336ad9f68 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp @@ -56,26 +56,25 @@ namespace MueLu { - template - RCP AmalgamationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - return validParamList; - } +template +RCP AmalgamationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); + return validParamList; +} - template - void AmalgamationFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); // sub-block from blocked A - } +template +void AmalgamationFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); // sub-block from blocked A +} - template - void AmalgamationFactory::Build(Level ¤tLevel) const - { - FactoryMonitor m(*this, "Build", currentLevel); +template +void AmalgamationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); - RCP A = Get< RCP >(currentLevel, "A"); + RCP A = Get >(currentLevel, "A"); - /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. + /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. fullblocksize is the number of storage blocks that must kept together during the amalgamation process. Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: @@ -92,150 +91,147 @@ namespace MueLu { Thus far, only storageblocksize=numPDEs and fullblocksize=1 has been tested. */ + LO fullblocksize = 1; // block dim for fixed size blocks + GO offset = 0; // global offset of dof gids + LO blockid = -1; // block id in strided map + LO nStridedOffset = 0; // DOF offset for strided block id "blockid" (default = 0) + LO stridedblocksize = fullblocksize; // size of strided block id "blockid" (default = fullblocksize, only if blockid!=-1 stridedblocksize <= fullblocksize) + LO storageblocksize = A->GetStorageBlockSize(); + // GO indexBase = A->getRowMap()->getIndexBase(); // index base for maps (unused) + + // 1) check for blocking/striding information + + if (A->IsView("stridedMaps") && Teuchos::rcp_dynamic_cast(A->getRowMap("stridedMaps")) != Teuchos::null) { + Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // NOTE: "stridedMaps are always non-overlapping (correspond to range and domain maps!) + RCP stridedRowMap = Teuchos::rcp_dynamic_cast(A->getRowMap()); + TEUCHOS_TEST_FOR_EXCEPTION(stridedRowMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed."); + fullblocksize = stridedRowMap->getFixedBlockSize(); + offset = stridedRowMap->getOffset(); + blockid = stridedRowMap->getStridedBlockId(); + + if (blockid > -1) { + std::vector stridingInfo = stridedRowMap->getStridingData(); + for (size_t j = 0; j < Teuchos::as(blockid); j++) + nStridedOffset += stridingInfo[j]; + stridedblocksize = Teuchos::as(stridingInfo[blockid]); - LO fullblocksize = 1; // block dim for fixed size blocks - GO offset = 0; // global offset of dof gids - LO blockid = -1; // block id in strided map - LO nStridedOffset = 0; // DOF offset for strided block id "blockid" (default = 0) - LO stridedblocksize = fullblocksize; // size of strided block id "blockid" (default = fullblocksize, only if blockid!=-1 stridedblocksize <= fullblocksize) - LO storageblocksize = A->GetStorageBlockSize(); - // GO indexBase = A->getRowMap()->getIndexBase(); // index base for maps (unused) - - // 1) check for blocking/striding information - - if (A->IsView("stridedMaps") && Teuchos::rcp_dynamic_cast(A->getRowMap("stridedMaps")) != Teuchos::null) { - Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // NOTE: "stridedMaps are always non-overlapping (correspond to range and domain maps!) - RCP stridedRowMap = Teuchos::rcp_dynamic_cast(A->getRowMap()); - TEUCHOS_TEST_FOR_EXCEPTION(stridedRowMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed."); - fullblocksize = stridedRowMap->getFixedBlockSize(); - offset = stridedRowMap->getOffset(); - blockid = stridedRowMap->getStridedBlockId(); - - if (blockid > -1) { - std::vector stridingInfo = stridedRowMap->getStridingData(); - for (size_t j = 0; j < Teuchos::as(blockid); j++) - nStridedOffset += stridingInfo[j]; - stridedblocksize = Teuchos::as(stridingInfo[blockid]); - - } else { - stridedblocksize = fullblocksize; - } - // Correct for the storageblocksize - // NOTE: Before this point fullblocksize is actually numPDEs - TEUCHOS_TEST_FOR_EXCEPTION(fullblocksize % storageblocksize != 0,Exceptions::RuntimeError,"AmalgamationFactory: fullblocksize needs to be a multiple of A->GetStorageBlockSize()"); - fullblocksize /= storageblocksize; - stridedblocksize /= storageblocksize; - - oldView = A->SwitchToView(oldView); - GetOStream(Runtime1) << "AmalagamationFactory::Build():" << " found fullblocksize=" << fullblocksize << " and stridedblocksize=" << stridedblocksize << " from strided maps. offset=" << offset << std::endl; - - } else { - GetOStream(Warnings0) << "AmalagamationFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl; - } - - - // build node row map (uniqueMap) and node column map (nonUniqueMap) - // the arrays rowTranslation and colTranslation contain the local node id - // given a local dof id. They are only necessary for the CoalesceDropFactory if - // fullblocksize > 1 - RCP uniqueMap, nonUniqueMap; - RCP amalgamationData; - RCP > rowTranslation = Teuchos::null; - RCP > colTranslation = Teuchos::null; - - if (fullblocksize > 1) { - // mfh 14 Apr 2015: These need to have different names than - // rowTranslation and colTranslation, in order to avoid - // shadowing warnings (-Wshadow with GCC). Alternately, it - // looks like you could just assign to the existing variables in - // this scope, rather than creating new ones. - RCP > theRowTranslation = rcp(new Array); - RCP > theColTranslation = rcp(new Array); - AmalgamateMap(*(A->getRowMap()), *A, uniqueMap, *theRowTranslation); - AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, *theColTranslation); - - amalgamationData = rcp(new AmalgamationInfo(theRowTranslation, - theColTranslation, - uniqueMap, - nonUniqueMap, - A->getColMap(), - fullblocksize, - offset, - blockid, - nStridedOffset, - stridedblocksize) ); } else { - amalgamationData = rcp(new AmalgamationInfo(rowTranslation, // Teuchos::null - colTranslation, // Teuchos::null - A->getRowMap(), // unique map of graph - A->getColMap(), // non-unique map of graph - A->getColMap(), // column map of A - fullblocksize, - offset, - blockid, - nStridedOffset, - stridedblocksize) ); + stridedblocksize = fullblocksize; } + // Correct for the storageblocksize + // NOTE: Before this point fullblocksize is actually numPDEs + TEUCHOS_TEST_FOR_EXCEPTION(fullblocksize % storageblocksize != 0, Exceptions::RuntimeError, "AmalgamationFactory: fullblocksize needs to be a multiple of A->GetStorageBlockSize()"); + fullblocksize /= storageblocksize; + stridedblocksize /= storageblocksize; + + oldView = A->SwitchToView(oldView); + GetOStream(Runtime1) << "AmalagamationFactory::Build():" + << " found fullblocksize=" << fullblocksize << " and stridedblocksize=" << stridedblocksize << " from strided maps. offset=" << offset << std::endl; + + } else { + GetOStream(Warnings0) << "AmalagamationFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl; + } - // store (un)amalgamation information on current level - Set(currentLevel, "UnAmalgamationInfo", amalgamationData); + // build node row map (uniqueMap) and node column map (nonUniqueMap) + // the arrays rowTranslation and colTranslation contain the local node id + // given a local dof id. They are only necessary for the CoalesceDropFactory if + // fullblocksize > 1 + RCP uniqueMap, nonUniqueMap; + RCP amalgamationData; + RCP > rowTranslation = Teuchos::null; + RCP > colTranslation = Teuchos::null; + + if (fullblocksize > 1) { + // mfh 14 Apr 2015: These need to have different names than + // rowTranslation and colTranslation, in order to avoid + // shadowing warnings (-Wshadow with GCC). Alternately, it + // looks like you could just assign to the existing variables in + // this scope, rather than creating new ones. + RCP > theRowTranslation = rcp(new Array); + RCP > theColTranslation = rcp(new Array); + AmalgamateMap(*(A->getRowMap()), *A, uniqueMap, *theRowTranslation); + AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, *theColTranslation); + + amalgamationData = rcp(new AmalgamationInfo(theRowTranslation, + theColTranslation, + uniqueMap, + nonUniqueMap, + A->getColMap(), + fullblocksize, + offset, + blockid, + nStridedOffset, + stridedblocksize)); + } else { + amalgamationData = rcp(new AmalgamationInfo(rowTranslation, // Teuchos::null + colTranslation, // Teuchos::null + A->getRowMap(), // unique map of graph + A->getColMap(), // non-unique map of graph + A->getColMap(), // column map of A + fullblocksize, + offset, + blockid, + nStridedOffset, + stridedblocksize)); } - template - void AmalgamationFactory::AmalgamateMap(const Map& sourceMap, const Matrix& A, RCP& amalgamatedMap, Array& translation) { - typedef typename ArrayView::size_type size_type; - typedef std::unordered_map container; - - GO indexBase = sourceMap.getIndexBase(); - ArrayView elementAList = sourceMap.getLocalElementList(); - size_type numElements = elementAList.size(); - container filter; - - GO offset = 0; - LO blkSize = A.GetFixedBlockSize() / A.GetStorageBlockSize(); - if (A.IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A.getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - offset = strMap->getOffset(); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - } + // store (un)amalgamation information on current level + Set(currentLevel, "UnAmalgamationInfo", amalgamationData); +} + +template +void AmalgamationFactory::AmalgamateMap(const Map& sourceMap, const Matrix& A, RCP& amalgamatedMap, Array& translation) { + typedef typename ArrayView::size_type size_type; + typedef std::unordered_map container; + + GO indexBase = sourceMap.getIndexBase(); + ArrayView elementAList = sourceMap.getLocalElementList(); + size_type numElements = elementAList.size(); + container filter; + + GO offset = 0; + LO blkSize = A.GetFixedBlockSize() / A.GetStorageBlockSize(); + if (A.IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A.getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + offset = strMap->getOffset(); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + } - Array elementList(numElements); - translation.resize(numElements); + Array elementList(numElements); + translation.resize(numElements); - size_type numRows = 0; - for (size_type id = 0; id < numElements; id++) { - GO dofID = elementAList[id]; - GO nodeID = AmalgamationFactory::DOFGid2NodeId(dofID, blkSize, offset, indexBase); + size_type numRows = 0; + for (size_type id = 0; id < numElements; id++) { + GO dofID = elementAList[id]; + GO nodeID = AmalgamationFactory::DOFGid2NodeId(dofID, blkSize, offset, indexBase); - typename container::iterator it = filter.find(nodeID); - if (it == filter.end()) { - filter[nodeID] = numRows; + typename container::iterator it = filter.find(nodeID); + if (it == filter.end()) { + filter[nodeID] = numRows; - translation[id] = numRows; - elementList[numRows] = nodeID; + translation[id] = numRows; + elementList[numRows] = nodeID; - numRows++; + numRows++; - } else { - translation[id] = it->second; - } + } else { + translation[id] = it->second; } - elementList.resize(numRows); - - amalgamatedMap = MapFactory::Build(sourceMap.lib(), Teuchos::OrdinalTraits::invalid(), elementList, indexBase, sourceMap.getComm()); - } + elementList.resize(numRows); - template - const GlobalOrdinal AmalgamationFactory::DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, - const GlobalOrdinal offset, const GlobalOrdinal indexBase) { - GlobalOrdinal globalblockid = ((GlobalOrdinal) gid - offset - indexBase) / blockSize + indexBase; - return globalblockid; - } + amalgamatedMap = MapFactory::Build(sourceMap.lib(), Teuchos::OrdinalTraits::invalid(), elementList, indexBase, sourceMap.getComm()); +} -} //namespace MueLu +template +const GlobalOrdinal AmalgamationFactory::DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, + const GlobalOrdinal offset, const GlobalOrdinal indexBase) { + GlobalOrdinal globalblockid = ((GlobalOrdinal)gid - offset - indexBase) / blockSize + indexBase; + return globalblockid; +} -#endif /* MUELU_SUBBLOCKUNAMALGAMATIONFACTORY_DEF_HPP */ +} //namespace MueLu +#endif /* MUELU_SUBBLOCKUNAMALGAMATIONFACTORY_DEF_HPP */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp index b14a14a1cc10..5d751938d592 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp @@ -53,7 +53,7 @@ #ifndef MUELU_AMALGAMATIONINFO_DECL_HPP_ #define MUELU_AMALGAMATIONINFO_DECL_HPP_ -#include // global_size_t +#include // global_size_t #include #include #include @@ -75,94 +75,90 @@ namespace MueLu { current processor. That mapping is used for unamalgamation. */ - template - class AmalgamationInfo - : public BaseClass { +template +class AmalgamationInfo + : public BaseClass { #undef MUELU_AMALGAMATIONINFO_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - /// Constructor - AmalgamationInfo(RCP > rowTranslation, - RCP > colTranslation, - RCP nodeRowMap, - RCP nodeColMap, - RCP const &columnMap, - LO fullblocksize, GO offset, LO blockid, LO nStridedOffset, LO stridedblocksize) : - rowTranslation_(rowTranslation), - colTranslation_(colTranslation), - nodeRowMap_(nodeRowMap), - nodeColMap_(nodeColMap), - columnMap_(columnMap), - fullblocksize_(fullblocksize), - offset_(offset), - blockid_(blockid), - nStridedOffset_(nStridedOffset), - stridedblocksize_(stridedblocksize), - indexBase_(columnMap->getIndexBase()) - {} - - /// Destructor - virtual ~AmalgamationInfo() {} - - /// Return a simple one-line description of this object. - std::string description() const { return "AmalgamationInfo"; } - - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - - RCP getNodeRowMap() const { return nodeRowMap_; } //! < returns the node row map for the graph - RCP getNodeColMap() const { return nodeColMap_; } //! < returns the node column map for the graph - - /* @brief Translation arrays + public: + /// Constructor + AmalgamationInfo(RCP > rowTranslation, + RCP > colTranslation, + RCP nodeRowMap, + RCP nodeColMap, + RCP const& columnMap, + LO fullblocksize, GO offset, LO blockid, LO nStridedOffset, LO stridedblocksize) + : rowTranslation_(rowTranslation) + , colTranslation_(colTranslation) + , nodeRowMap_(nodeRowMap) + , nodeColMap_(nodeColMap) + , columnMap_(columnMap) + , fullblocksize_(fullblocksize) + , offset_(offset) + , blockid_(blockid) + , nStridedOffset_(nStridedOffset) + , stridedblocksize_(stridedblocksize) + , indexBase_(columnMap->getIndexBase()) {} + + /// Destructor + virtual ~AmalgamationInfo() {} + + /// Return a simple one-line description of this object. + std::string description() const { return "AmalgamationInfo"; } + + //! Print the object with some verbosity level to an FancyOStream object. + //using MueLu::Describable::describe; // overloading, not hiding + //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; + void print(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; + + RCP getNodeRowMap() const { return nodeRowMap_; } //! < returns the node row map for the graph + RCP getNodeColMap() const { return nodeColMap_; } //! < returns the node column map for the graph + + /* @brief Translation arrays * * Returns translation arrays providing local node ids given local dof ids built from either * the non-overlapping (unique) row map or the overlapping (non-unique) column map. * The getColTranslation routine, e.g., is used for the MergeRows routine in CoalesceDropFactory. */ - //@{ - RCP > getRowTranslation() const { return rowTranslation_; } - RCP > getColTranslation() const { return colTranslation_; } - //@} + //@{ + RCP > getRowTranslation() const { return rowTranslation_; } + RCP > getColTranslation() const { return colTranslation_; } + //@} - /*! @brief UnamalgamateAggregates + /*! @brief UnamalgamateAggregates Puts all dofs for aggregate \c i in aggToRowMap[\c i]. Also calculate aggregate sizes. */ - void UnamalgamateAggregates(const Aggregates& aggregates, Teuchos::ArrayRCP& aggStart, Teuchos::ArrayRCP& aggToRowMap) const; - void UnamalgamateAggregatesLO(const Aggregates& aggregates, Teuchos::ArrayRCP& aggStart, Teuchos::ArrayRCP& aggToRowMap) const; + void UnamalgamateAggregates(const Aggregates& aggregates, Teuchos::ArrayRCP& aggStart, Teuchos::ArrayRCP& aggToRowMap) const; + void UnamalgamateAggregatesLO(const Aggregates& aggregates, Teuchos::ArrayRCP& aggStart, Teuchos::ArrayRCP& aggToRowMap) const; - /*! @brief ComputeUnamalgamatedImportDofMap + /*! @brief ComputeUnamalgamatedImportDofMap * build overlapping dof row map from aggregates needed for overlapping null space */ - Teuchos::RCP< Xpetra::Map > ComputeUnamalgamatedImportDofMap(const Aggregates& aggregates) const; - - private: - - void UnamalgamateAggregates(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, + Teuchos::RCP > ComputeUnamalgamatedImportDofMap(const Aggregates& aggregates) const; + + private: + void UnamalgamateAggregates(const Teuchos::RCP& nodeMap, + const RCP& procWinnerVec, + const RCP& vertex2AggIdVec, + const GO numAggregates, + Teuchos::ArrayRCP& aggStart, + Teuchos::ArrayRCP& aggToRowMap) const; + + void UnamalgamateAggregatesLO(const Teuchos::RCP& nodeMap, + const RCP& procWinnerVec, + const RCP& vertex2AggIdVec, const GO numAggregates, Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const; - - void UnamalgamateAggregatesLO(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, - const GO numAggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const; + Teuchos::ArrayRCP& aggToRowMap) const; - Teuchos::RCP< Xpetra::Map > ComputeUnamalgamatedImportDofMap(const Teuchos::RCP &nodeMap) const; + Teuchos::RCP > ComputeUnamalgamatedImportDofMap(const Teuchos::RCP& nodeMap) const; - public: - - /*! @brief ComputeGlobalDOF + public: + /*! @brief ComputeGlobalDOF * * Return global dof id associated with global node id gNodeID and dof index k * @@ -172,67 +168,65 @@ namespace MueLu { * @param (LO): local dof index within node * @return (GO): global dof id */ - GO ComputeGlobalDOF(GO const &gNodeID, LO const &k=0) const; + GO ComputeGlobalDOF(GO const& gNodeID, LO const& k = 0) const; - /*! @brief ComputeLocalDOF + /*! @brief ComputeLocalDOF * return locbal dof id associated with local node id lNodeID and dof index k * * @param (LO): local node id * @param (LO): local dof index within node * @return (LO): local dof id */ - LO ComputeLocalDOF(LocalOrdinal const &lNodeID, LocalOrdinal const &k) const; - - LO ComputeLocalNode(LocalOrdinal const &ldofID) const; + LO ComputeLocalDOF(LocalOrdinal const& lNodeID, LocalOrdinal const& k) const; - /*! Access routines */ + LO ComputeLocalNode(LocalOrdinal const& ldofID) const; - /// returns offset of global dof ids - GO GlobalOffset() { return offset_; } + /*! Access routines */ - /// returns striding information - void GetStridingInformation(LO& fullBlockSize, LO& blockID, LO& stridingOffset, LO& stridedBlockSize, GO& indexBase) { - fullBlockSize = fullblocksize_; - blockID = blockid_; - stridingOffset = nStridedOffset_; - stridedBlockSize = stridedblocksize_; - indexBase = indexBase_; - } + /// returns offset of global dof ids + GO GlobalOffset() { return offset_; } - private: + /// returns striding information + void GetStridingInformation(LO& fullBlockSize, LO& blockID, LO& stridingOffset, LO& stridedBlockSize, GO& indexBase) { + fullBlockSize = fullblocksize_; + blockID = blockid_; + stridingOffset = nStridedOffset_; + stridedBlockSize = stridedblocksize_; + indexBase = indexBase_; + } - //! @name amalgamation information variables - //@{ + private: + //! @name amalgamation information variables + //@{ - //! Arrays containing local node ids given local dof ids - RCP > rowTranslation_; - RCP > colTranslation_; + //! Arrays containing local node ids given local dof ids + RCP > rowTranslation_; + RCP > colTranslation_; - //! node row and column map of graph (built from row and column map of A) - RCP nodeRowMap_; - RCP nodeColMap_; + //! node row and column map of graph (built from row and column map of A) + RCP nodeRowMap_; + RCP nodeColMap_; - /*! @brief DOF map (really column map of A) + /*! @brief DOF map (really column map of A) We keep a RCP on the column map to make sure that the map is still valid when it is used. */ - RCP columnMap_; - - //@} - - //! @name Strided map information. - //@{ - LO fullblocksize_; - GO offset_; - LO blockid_; - LO nStridedOffset_; - LO stridedblocksize_; - GO indexBase_; - //@} - - }; - -} // namespace MueLu + RCP columnMap_; + + //@} + + //! @name Strided map information. + //@{ + LO fullblocksize_; + GO offset_; + LO blockid_; + LO nStridedOffset_; + LO stridedblocksize_; + GO indexBase_; + //@} +}; + +} // namespace MueLu #define MUELU_AMALGAMATIONINFO_SHORT #endif /* MUELU_AMALGAMATIONINFO_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp index f46746657142..f473d906bcfd 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp @@ -63,265 +63,257 @@ namespace MueLu { - template - void AmalgamationInfo:: - UnamalgamateAggregates(const Aggregates& aggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - - UnamalgamateAggregates(aggregates.GetMap(), - aggregates.GetProcWinner(), - aggregates.GetVertex2AggId(), - aggregates.GetNumAggregates(), - aggStart, - aggToRowMap); - - } //UnamalgamateAggregates - - template - void AmalgamationInfo:: - UnamalgamateAggregates(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, - const GO numAggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - - int myPid = nodeMap->getComm()->getRank(); - Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); - Teuchos::ArrayRCP procWinner = procWinnerVec->getDataNonConst(0); - Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); - const LO size = procWinner.size(); - - std::vector sizes(numAggregates); - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - if (procWinner[lnode] == myPid) - sizes[myAgg] += 1; - } - } else { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - if (procWinner[lnode] == myPid) { - GO gnodeid = nodeGlobalElts[lnode]; - for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { - GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid,k); - if (columnMap_->isNodeGlobalElement(gDofIndex)) - sizes[myAgg] += 1; - } - } - } - } - aggStart = ArrayRCP(numAggregates+1,0); - aggStart[0] = Teuchos::ScalarTraits::zero(); - for (GO i=0; i +void AmalgamationInfo:: + UnamalgamateAggregates(const Aggregates &aggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + UnamalgamateAggregates(aggregates.GetMap(), + aggregates.GetProcWinner(), + aggregates.GetVertex2AggId(), + aggregates.GetNumAggregates(), + aggStart, + aggToRowMap); + +} //UnamalgamateAggregates + +template +void AmalgamationInfo:: + UnamalgamateAggregates(const Teuchos::RCP &nodeMap, + const RCP &procWinnerVec, + const RCP &vertex2AggIdVec, + const GO numAggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + int myPid = nodeMap->getComm()->getRank(); + Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); + Teuchos::ArrayRCP procWinner = procWinnerVec->getDataNonConst(0); + Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); + const LO size = procWinner.size(); + + std::vector sizes(numAggregates); + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; + if (procWinner[lnode] == myPid) + sizes[myAgg] += 1; } - aggToRowMap = ArrayRCP(aggStart[numAggregates],0); - - // count, how many dofs have been recorded for each aggregate so far - Array numDofs(numAggregates, 0); // empty array with number of Dofs for each aggregate - - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - if (procWinner[lnode] == myPid) { - aggToRowMap[ aggStart[myAgg] + numDofs[myAgg] ] = ComputeGlobalDOF(nodeGlobalElts[lnode]); - ++(numDofs[myAgg]); - } - } - } else { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - - if (procWinner[lnode] == myPid) { - GO gnodeid = nodeGlobalElts[lnode]; - for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { - GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid,k); - if (columnMap_->isNodeGlobalElement(gDofIndex)) { - aggToRowMap[ aggStart[myAgg] + numDofs[myAgg] ] = gDofIndex; - ++(numDofs[myAgg]); - } - } + } else { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; + if (procWinner[lnode] == myPid) { + GO gnodeid = nodeGlobalElts[lnode]; + for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid, k); + if (columnMap_->isNodeGlobalElement(gDofIndex)) + sizes[myAgg] += 1; } } } - // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() - - } //UnamalgamateAggregates - - template - void AmalgamationInfo:: - UnamalgamateAggregatesLO(const Aggregates& aggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - UnamalgamateAggregatesLO(aggregates.GetMap(), - aggregates.GetProcWinner(), - aggregates.GetVertex2AggId(), - aggregates.GetNumAggregates(), - aggStart, - aggToRowMap); } + aggStart = ArrayRCP(numAggregates + 1, 0); + aggStart[0] = Teuchos::ScalarTraits::zero(); + for (GO i = 0; i < numAggregates; ++i) { + aggStart[i + 1] = aggStart[i] + sizes[i]; + } + aggToRowMap = ArrayRCP(aggStart[numAggregates], 0); - template - void AmalgamationInfo:: - UnamalgamateAggregatesLO(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, - const GO numAggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - - int myPid = nodeMap->getComm()->getRank(); - Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); - - Teuchos::ArrayRCP procWinner = procWinnerVec ->getDataNonConst(0); - Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); - - - // FIXME: Do we need to compute size here? Or can we use existing? - const LO size = procWinner.size(); - - std::vector sizes(numAggregates); - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; lnode++) - if (procWinner[lnode] == myPid) - sizes[vertex2AggId[lnode]]++; - } else { - for (LO lnode = 0; lnode < size; lnode++) - if (procWinner[lnode] == myPid) { - GO nodeGID = nodeGlobalElts[lnode]; + // count, how many dofs have been recorded for each aggregate so far + Array numDofs(numAggregates, 0); // empty array with number of Dofs for each aggregate - for (LO k = 0; k < stridedblocksize_; k++) { - GO GID = ComputeGlobalDOF(nodeGID, k); - if (columnMap_->isNodeGlobalElement(GID)) - sizes[vertex2AggId[lnode]]++; - } - } + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; + if (procWinner[lnode] == myPid) { + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = ComputeGlobalDOF(nodeGlobalElts[lnode]); + ++(numDofs[myAgg]); + } } + } else { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; - aggStart = ArrayRCP(numAggregates+1); // FIXME: useless initialization with zeros - aggStart[0] = 0; - for (GO i = 0; i < numAggregates; i++) - aggStart[i+1] = aggStart[i] + sizes[i]; - - aggToRowMap = ArrayRCP(aggStart[numAggregates], 0); - - // count, how many dofs have been recorded for each aggregate so far - Array numDofs(numAggregates, 0); // empty array with number of DOFs for each aggregate - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; ++lnode) - if (procWinner[lnode] == myPid) { - LO myAgg = vertex2AggId[lnode]; - aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode; - numDofs[myAgg]++; - } - } else { - for (LO lnode = 0; lnode < size; ++lnode) - if (procWinner[lnode] == myPid) { - LO myAgg = vertex2AggId[lnode]; - GO nodeGID = nodeGlobalElts[lnode]; - - for (LO k = 0; k < stridedblocksize_; k++) { - GO GID = ComputeGlobalDOF(nodeGID, k); - if (columnMap_->isNodeGlobalElement(GID)) { - aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode*stridedblocksize_ + k; - numDofs[myAgg]++; - } + if (procWinner[lnode] == myPid) { + GO gnodeid = nodeGlobalElts[lnode]; + for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid, k); + if (columnMap_->isNodeGlobalElement(gDofIndex)) { + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = gDofIndex; + ++(numDofs[myAgg]); } } + } } - // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() - - } //UnamalgamateAggregatesLO - - template - void AmalgamationInfo::print(Teuchos::FancyOStream &out, - const VerbLevel verbLevel) const - { - if (!(verbLevel & Debug)) - return; - - out << "AmalgamationInfo -- Striding information:" - << "\n fullBlockSize = " << fullblocksize_ - << "\n blockID = " << blockid_ - << "\n stridingOffset = " << nStridedOffset_ - << "\n stridedBlockSize = " << stridedblocksize_ - << "\n indexBase = " << indexBase_ - << std::endl; - - out << "AmalgamationInfo -- DOFs to nodes mapping:\n" - << " Mapping of row DOFs to row nodes:" << *rowTranslation_() - << "\n\n Mapping of column DOFs to column nodes:" << *colTranslation_() - << std::endl; - - out << "AmalgamationInfo -- row node map:" << std::endl; - nodeRowMap_->describe(out, Teuchos::VERB_EXTREME); - - out << "AmalgamationInfo -- column node map:" << std::endl; - nodeColMap_->describe(out, Teuchos::VERB_EXTREME); } + // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() - ///////////////////////////////////////////////////////////////////////////// +} //UnamalgamateAggregates - template - RCP > AmalgamationInfo:: - ComputeUnamalgamatedImportDofMap(const Aggregates& aggregates) const { - return ComputeUnamalgamatedImportDofMap(aggregates.GetMap()); +template +void AmalgamationInfo:: + UnamalgamateAggregatesLO(const Aggregates &aggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + UnamalgamateAggregatesLO(aggregates.GetMap(), + aggregates.GetProcWinner(), + aggregates.GetVertex2AggId(), + aggregates.GetNumAggregates(), + aggStart, + aggToRowMap); +} + +template +void AmalgamationInfo:: + UnamalgamateAggregatesLO(const Teuchos::RCP &nodeMap, + const RCP &procWinnerVec, + const RCP &vertex2AggIdVec, + const GO numAggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + int myPid = nodeMap->getComm()->getRank(); + Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); + + Teuchos::ArrayRCP procWinner = procWinnerVec->getDataNonConst(0); + Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); + + // FIXME: Do we need to compute size here? Or can we use existing? + const LO size = procWinner.size(); + + std::vector sizes(numAggregates); + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; lnode++) + if (procWinner[lnode] == myPid) + sizes[vertex2AggId[lnode]]++; + } else { + for (LO lnode = 0; lnode < size; lnode++) + if (procWinner[lnode] == myPid) { + GO nodeGID = nodeGlobalElts[lnode]; + + for (LO k = 0; k < stridedblocksize_; k++) { + GO GID = ComputeGlobalDOF(nodeGID, k); + if (columnMap_->isNodeGlobalElement(GID)) + sizes[vertex2AggId[lnode]]++; + } + } } - template - RCP > AmalgamationInfo:: - ComputeUnamalgamatedImportDofMap(const Teuchos::RCP &nodeMap) const { - - Teuchos::RCP > myDofGids = Teuchos::rcp(new std::vector); - Teuchos::ArrayView gEltList = nodeMap->getLocalElementList(); - LO nodeElements = Teuchos::as(nodeMap->getLocalNumElements()); - if (stridedblocksize_ == 1) { - for (LO n = 0; npush_back(gDofIndex); + aggStart = ArrayRCP(numAggregates + 1); // FIXME: useless initialization with zeros + aggStart[0] = 0; + for (GO i = 0; i < numAggregates; i++) + aggStart[i + 1] = aggStart[i] + sizes[i]; + + aggToRowMap = ArrayRCP(aggStart[numAggregates], 0); + + // count, how many dofs have been recorded for each aggregate so far + Array numDofs(numAggregates, 0); // empty array with number of DOFs for each aggregate + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; ++lnode) + if (procWinner[lnode] == myPid) { + LO myAgg = vertex2AggId[lnode]; + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode; + numDofs[myAgg]++; } - } else { - for (LO n = 0; nisNodeGlobalElement(gDofIndex)) - myDofGids->push_back(gDofIndex); + } else { + for (LO lnode = 0; lnode < size; ++lnode) + if (procWinner[lnode] == myPid) { + LO myAgg = vertex2AggId[lnode]; + GO nodeGID = nodeGlobalElts[lnode]; + + for (LO k = 0; k < stridedblocksize_; k++) { + GO GID = ComputeGlobalDOF(nodeGID, k); + if (columnMap_->isNodeGlobalElement(GID)) { + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode * stridedblocksize_ + k; + numDofs[myAgg]++; + } } } - } - - Teuchos::ArrayRCP arr_myDofGids = Teuchos::arcp( myDofGids ); - Teuchos::RCP importDofMap = MapFactory::Build(nodeMap->lib(), Teuchos::OrdinalTraits::invalid(), arr_myDofGids(), nodeMap->getIndexBase(), nodeMap->getComm()); - return importDofMap; - } - - ///////////////////////////////////////////////////////////////////////////// - - template - GlobalOrdinal AmalgamationInfo:: - ComputeGlobalDOF(GlobalOrdinal const &gNodeID, LocalOrdinal const &k) const { - // here, the assumption is, that the node map has the same indexBase as the dof map - // this is the node map index base this is the dof map index base - GlobalOrdinal gDofIndex = offset_ + (gNodeID-indexBase_)*fullblocksize_ + nStridedOffset_ + k + indexBase_; - return gDofIndex; } - - template - LocalOrdinal AmalgamationInfo::ComputeLocalDOF(LocalOrdinal const &lNodeID, LocalOrdinal const &k) const { - LocalOrdinal lDofIndex = lNodeID*fullblocksize_ + k; - return lDofIndex; - } - - - template - LocalOrdinal AmalgamationInfo::ComputeLocalNode(LocalOrdinal const &ldofID) const { - return (ldofID - ldofID%fullblocksize_) / fullblocksize_; + // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() + +} //UnamalgamateAggregatesLO + +template +void AmalgamationInfo::print(Teuchos::FancyOStream &out, + const VerbLevel verbLevel) const { + if (!(verbLevel & Debug)) + return; + + out << "AmalgamationInfo -- Striding information:" + << "\n fullBlockSize = " << fullblocksize_ + << "\n blockID = " << blockid_ + << "\n stridingOffset = " << nStridedOffset_ + << "\n stridedBlockSize = " << stridedblocksize_ + << "\n indexBase = " << indexBase_ + << std::endl; + + out << "AmalgamationInfo -- DOFs to nodes mapping:\n" + << " Mapping of row DOFs to row nodes:" << *rowTranslation_() + << "\n\n Mapping of column DOFs to column nodes:" << *colTranslation_() + << std::endl; + + out << "AmalgamationInfo -- row node map:" << std::endl; + nodeRowMap_->describe(out, Teuchos::VERB_EXTREME); + + out << "AmalgamationInfo -- column node map:" << std::endl; + nodeColMap_->describe(out, Teuchos::VERB_EXTREME); +} + +///////////////////////////////////////////////////////////////////////////// + +template +RCP > AmalgamationInfo:: + ComputeUnamalgamatedImportDofMap(const Aggregates &aggregates) const { + return ComputeUnamalgamatedImportDofMap(aggregates.GetMap()); +} + +template +RCP > AmalgamationInfo:: + ComputeUnamalgamatedImportDofMap(const Teuchos::RCP &nodeMap) const { + Teuchos::RCP > myDofGids = Teuchos::rcp(new std::vector); + Teuchos::ArrayView gEltList = nodeMap->getLocalElementList(); + LO nodeElements = Teuchos::as(nodeMap->getLocalNumElements()); + if (stridedblocksize_ == 1) { + for (LO n = 0; n < nodeElements; n++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gEltList[n]); + myDofGids->push_back(gDofIndex); + } + } else { + for (LO n = 0; n < nodeElements; n++) { + for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gEltList[n], k); + if (columnMap_->isNodeGlobalElement(gDofIndex)) + myDofGids->push_back(gDofIndex); + } + } } -} //namespace - + Teuchos::ArrayRCP arr_myDofGids = Teuchos::arcp(myDofGids); + Teuchos::RCP importDofMap = MapFactory::Build(nodeMap->lib(), Teuchos::OrdinalTraits::invalid(), arr_myDofGids(), nodeMap->getIndexBase(), nodeMap->getComm()); + return importDofMap; +} + +///////////////////////////////////////////////////////////////////////////// + +template +GlobalOrdinal AmalgamationInfo:: + ComputeGlobalDOF(GlobalOrdinal const &gNodeID, LocalOrdinal const &k) const { + // here, the assumption is, that the node map has the same indexBase as the dof map + // this is the node map index base this is the dof map index base + GlobalOrdinal gDofIndex = offset_ + (gNodeID - indexBase_) * fullblocksize_ + nStridedOffset_ + k + indexBase_; + return gDofIndex; +} + +template +LocalOrdinal AmalgamationInfo::ComputeLocalDOF(LocalOrdinal const &lNodeID, LocalOrdinal const &k) const { + LocalOrdinal lDofIndex = lNodeID * fullblocksize_ + k; + return lDofIndex; +} + +template +LocalOrdinal AmalgamationInfo::ComputeLocalNode(LocalOrdinal const &ldofID) const { + return (ldofID - ldofID % fullblocksize_) / fullblocksize_; +} + +} // namespace MueLu #endif /* MUELU_AMALGAMATIONINFO_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp index 5e03e2fae3ef..33f92d801ae6 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp @@ -52,7 +52,7 @@ #include #include #include -#include //TODO +#include //TODO #include #include @@ -72,7 +72,7 @@ namespace MueLu { - /*! +/*! @class CoalesceDropFactory @brief Factory for creating a graph based on a given matrix. @@ -127,61 +127,57 @@ namespace MueLu { However, there are also some situations (e.g. when doing rebalancing based on HyperGraph partitioning without coordinate information) where one has not access to a "Graph" or "Coordinates" variable. */ - template - class CoalesceDropFactory : public SingleLevelFactoryBase { +template +class CoalesceDropFactory : public SingleLevelFactoryBase { #undef MUELU_COALESCEDROPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor + CoalesceDropFactory(); - //! Constructor - CoalesceDropFactory(); + //! Destructor + virtual ~CoalesceDropFactory() {} - //! Destructor - virtual ~CoalesceDropFactory() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level& currentLevel) const; - void DeclareInput(Level ¤tLevel) const; + /// set predrop function + void SetPreDropFunction(const RCP >& predrop) { predrop_ = predrop; } - /// set predrop function - void SetPreDropFunction(const RCP > &predrop) { predrop_ = predrop; } + //@} - //@} + void Build(Level& currentLevel) const; // Build - void Build(Level ¤tLevel) const; // Build + private: + // pre-drop function + mutable RCP predrop_; - private: + //! Method to merge rows of matrix for systems of PDEs. + void MergeRows(const Matrix& A, const LO row, Array& cols, const Array& translation) const; + void MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP& ghostedDiagVals, SC threshold, Array& cols, const Array& translation) const; - // pre-drop function - mutable - RCP predrop_; + // When we want to decouple a block diagonal system (returns Teuchos::null if generate_matrix is false) + Teuchos::RCP > BlockDiagonalize(Level& currentLevel, const RCP& A, bool generate_matrix) const; - //! Method to merge rows of matrix for systems of PDEs. - void MergeRows(const Matrix& A, const LO row, Array& cols, const Array& translation) const; - void MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP& ghostedDiagVals, SC threshold, Array& cols, const Array& translation) const; + // When we want to decouple a block diagonal system via a *graph* + void BlockDiagonalizeGraph(const RCP& inputGraph, const RCP& ghostedBlockNumber, RCP& outputGraph, RCP& importer) const; +}; //class CoalesceDropFactory - // When we want to decouple a block diagonal system (returns Teuchos::null if generate_matrix is false) - Teuchos::RCP > BlockDiagonalize(Level & currentLevel,const RCP & A, bool generate_matrix) const; - - // When we want to decouple a block diagonal system via a *graph* - void BlockDiagonalizeGraph(const RCP & inputGraph, const RCP & ghostedBlockNumber, RCP & outputGraph, RCP & importer) const; - - }; //class CoalesceDropFactory - -} //namespace MueLu +} //namespace MueLu #define MUELU_COALESCEDROPFACTORY_SHORT -#endif // MUELU_COALESCEDROPFACTORY_DECL_HPP +#endif // MUELU_COALESCEDROPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp index c4bf4178cbaf..4d4bbe81457e 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp @@ -87,236 +87,226 @@ // Should be removed once we are confident that this works. //#define DJS_READ_ENV_VARIABLES - namespace MueLu { - namespace Details { - template - struct DropTol { - - DropTol() = default; - DropTol(DropTol const&) = default; - DropTol(DropTol &&) = default; - - DropTol& operator=(DropTol const&) = default; - DropTol& operator=(DropTol &&) = default; - - DropTol(real_type val_, real_type diag_, LO col_, bool drop_) - : val{val_}, diag{diag_}, col{col_}, drop{drop_} - {} - - real_type val {Teuchos::ScalarTraits::zero()}; - real_type diag {Teuchos::ScalarTraits::zero()}; - LO col {Teuchos::OrdinalTraits::invalid()}; - bool drop {true}; - - // CMS: Auxillary information for debugging info - // real_type aux_val {Teuchos::ScalarTraits::nan()}; - }; - } - - - template - RCP CoalesceDropFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +namespace Details { +template +struct DropTol { + DropTol() = default; + DropTol(DropTol const&) = default; + DropTol(DropTol&&) = default; -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop tol"); - SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: greedy Dirichlet"); - SET_VALID_ENTRY("aggregation: row sum drop tol"); - SET_VALID_ENTRY("aggregation: drop scheme"); - SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); - SET_VALID_ENTRY("aggregation: distance laplacian directional weights"); - SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); - - { - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; - // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values) - validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple("signed classical sa","classical", "distance laplacian","signed classical","block diagonal","block diagonal classical","block diagonal distance laplacian","block diagonal signed classical","block diagonal colored signed classical"), "aggregation: drop scheme"))); - - } - SET_VALID_ENTRY("aggregation: distance laplacian algo"); - SET_VALID_ENTRY("aggregation: classical algo"); - SET_VALID_ENTRY("aggregation: coloring: localize color graph"); -#undef SET_VALID_ENTRY - validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access"); - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); - validParamList->set< RCP >("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber"); - - return validParamList; - } - - template - CoalesceDropFactory::CoalesceDropFactory() : predrop_(Teuchos::null) { } - - template - void CoalesceDropFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "UnAmalgamationInfo"); - - const ParameterList& pL = GetParameterList(); - if (pL.get("lightweight wrap") == true) { - std::string algo = pL.get("aggregation: drop scheme"); - if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") { - Input(currentLevel, "Coordinates"); - } - if(algo == "signed classical sa") - ; - else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) { - Input(currentLevel, "BlockNumber"); - } - } - - } - - template - void CoalesceDropFactory::Build(Level ¤tLevel) const { - - FactoryMonitor m(*this, "Build", currentLevel); - - typedef Teuchos::ScalarTraits STS; - typedef typename STS::magnitudeType real_type; - typedef Xpetra::MultiVector RealValuedMultiVector; - typedef Xpetra::MultiVectorFactory RealValuedMultiVectorFactory; - - if (predrop_ != Teuchos::null) - GetOStream(Parameters0) << predrop_->description(); - - RCP realA = Get< RCP >(currentLevel, "A"); - RCP amalInfo = Get< RCP >(currentLevel, "UnAmalgamationInfo"); - const ParameterList & pL = GetParameterList(); - bool doExperimentalWrap = pL.get("lightweight wrap"); - - GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl; - std::string algo = pL.get("aggregation: drop scheme"); - const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); - - RCP Coords; - RCP A; + DropTol& operator=(DropTol const&) = default; + DropTol& operator=(DropTol&&) = default; - bool use_block_algorithm=false; - LO interleaved_blocksize = as(pL.get("aggregation: block diagonal: interleaved blocksize")); - bool useSignedClassicalRS = false; - bool useSignedClassicalSA = false; - bool generateColoringGraph = false; + DropTol(real_type val_, real_type diag_, LO col_, bool drop_) + : val{val_} + , diag{diag_} + , col{col_} + , drop{drop_} {} - // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it - // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case - typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + real_type val{Teuchos::ScalarTraits::zero()}; + real_type diag{Teuchos::ScalarTraits::zero()}; + LO col{Teuchos::OrdinalTraits::invalid()}; + bool drop{true}; + // CMS: Auxillary information for debugging info + // real_type aux_val {Teuchos::ScalarTraits::nan()}; +}; +} // namespace Details - RCP ghostedBlockNumber; - ArrayRCP g_block_id; +template +RCP CoalesceDropFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - if(algo == "distance laplacian" ) { - // Grab the coordinates for distance laplacian - Coords = Get< RCP >(currentLevel, "Coordinates"); - A = realA; - } - else if(algo == "signed classical sa") { - useSignedClassicalSA = true; - algo = "classical"; - A = realA; +#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: drop tol"); + SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: greedy Dirichlet"); + SET_VALID_ENTRY("aggregation: row sum drop tol"); + SET_VALID_ENTRY("aggregation: drop scheme"); + SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); + SET_VALID_ENTRY("aggregation: distance laplacian directional weights"); + SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); + + { + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values) + validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"), "aggregation: drop scheme"))); + } + SET_VALID_ENTRY("aggregation: distance laplacian algo"); + SET_VALID_ENTRY("aggregation: classical algo"); + SET_VALID_ENTRY("aggregation: coloring: localize color graph"); +#undef SET_VALID_ENTRY + validParamList->set("lightweight wrap", true, "Experimental option for lightweight graph access"); + + validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); + validParamList->set>("Coordinates", Teuchos::null, "Generating factory for Coordinates"); + validParamList->set>("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber"); + + return validParamList; +} + +template +CoalesceDropFactory::CoalesceDropFactory() + : predrop_(Teuchos::null) {} + +template +void CoalesceDropFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "UnAmalgamationInfo"); + + const ParameterList& pL = GetParameterList(); + if (pL.get("lightweight wrap") == true) { + std::string algo = pL.get("aggregation: drop scheme"); + if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") { + Input(currentLevel, "Coordinates"); } - else if(algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") { - useSignedClassicalRS = true; - // if(realA->GetFixedBlockSize() > 1) { - RCP BlockNumber = Get >(currentLevel, "BlockNumber"); - // Ghost the column block numbers if we need to - RCP importer = realA->getCrsGraph()->getImporter(); - if(!importer.is_null()) { - SubFactoryMonitor m1(*this, "Block Number import", currentLevel); - ghostedBlockNumber= Xpetra::VectorFactory::Build(importer->getTargetMap()); - ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); - } - else { - ghostedBlockNumber = BlockNumber; - } - g_block_id = ghostedBlockNumber->getData(0); - // } - if(algo == "block diagonal colored signed classical") - generateColoringGraph=true; - algo = "classical"; - A = realA; - + if (algo == "signed classical sa") + ; + else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) { + Input(currentLevel, "BlockNumber"); } - else if(algo == "block diagonal") { - // Handle the "block diagonal" filtering and then leave - BlockDiagonalize(currentLevel,realA,false); - return; + } +} + +template +void CoalesceDropFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + typedef Xpetra::MultiVectorFactory RealValuedMultiVectorFactory; + + if (predrop_ != Teuchos::null) + GetOStream(Parameters0) << predrop_->description(); + + RCP realA = Get>(currentLevel, "A"); + RCP amalInfo = Get>(currentLevel, "UnAmalgamationInfo"); + const ParameterList& pL = GetParameterList(); + bool doExperimentalWrap = pL.get("lightweight wrap"); + + GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl; + std::string algo = pL.get("aggregation: drop scheme"); + const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + + RCP Coords; + RCP A; + + bool use_block_algorithm = false; + LO interleaved_blocksize = as(pL.get("aggregation: block diagonal: interleaved blocksize")); + bool useSignedClassicalRS = false; + bool useSignedClassicalSA = false; + bool generateColoringGraph = false; + + // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it + // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case + typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + + RCP ghostedBlockNumber; + ArrayRCP g_block_id; + + if (algo == "distance laplacian") { + // Grab the coordinates for distance laplacian + Coords = Get>(currentLevel, "Coordinates"); + A = realA; + } else if (algo == "signed classical sa") { + useSignedClassicalSA = true; + algo = "classical"; + A = realA; + } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") { + useSignedClassicalRS = true; + // if(realA->GetFixedBlockSize() > 1) { + RCP BlockNumber = Get>(currentLevel, "BlockNumber"); + // Ghost the column block numbers if we need to + RCP importer = realA->getCrsGraph()->getImporter(); + if (!importer.is_null()) { + SubFactoryMonitor m1(*this, "Block Number import", currentLevel); + ghostedBlockNumber = Xpetra::VectorFactory::Build(importer->getTargetMap()); + ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); + } else { + ghostedBlockNumber = BlockNumber; } - else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") { - // Handle the "block diagonal" filtering, and then continue onward - use_block_algorithm = true; - RCP filteredMatrix = BlockDiagonalize(currentLevel,realA,true); - if(algo == "block diagonal distance laplacian") { - // We now need to expand the coordinates by the interleaved blocksize - RCP OldCoords = Get< RCP >(currentLevel, "Coordinates"); - if (OldCoords->getLocalLength() != realA->getLocalNumRows()) { - LO dim = (LO) OldCoords->getNumVectors(); - Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(),dim); - for(LO k=0; k old_vec = OldCoords->getData(k); - ArrayRCP new_vec = Coords->getDataNonConst(k); - for(LO i=0; i <(LO)OldCoords->getLocalLength(); i++) { - LO new_base = i*dim; - for(LO j=0; jgetData(0); + // } + if (algo == "block diagonal colored signed classical") + generateColoringGraph = true; + algo = "classical"; + A = realA; + + } else if (algo == "block diagonal") { + // Handle the "block diagonal" filtering and then leave + BlockDiagonalize(currentLevel, realA, false); + return; + } else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") { + // Handle the "block diagonal" filtering, and then continue onward + use_block_algorithm = true; + RCP filteredMatrix = BlockDiagonalize(currentLevel, realA, true); + if (algo == "block diagonal distance laplacian") { + // We now need to expand the coordinates by the interleaved blocksize + RCP OldCoords = Get>(currentLevel, "Coordinates"); + if (OldCoords->getLocalLength() != realA->getLocalNumRows()) { + LO dim = (LO)OldCoords->getNumVectors(); + Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim); + for (LO k = 0; k < dim; k++) { + ArrayRCP old_vec = OldCoords->getData(k); + ArrayRCP new_vec = Coords->getDataNonConst(k); + for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) { + LO new_base = i * dim; + for (LO j = 0; j < interleaved_blocksize; j++) + new_vec[new_base + j] = old_vec[i]; } } - else { - Coords = OldCoords; - } - algo = "distance laplacian"; + } else { + Coords = OldCoords; } - else if(algo == "block diagonal classical") { - algo = "classical"; - } - // All cases - A = filteredMatrix; - rowSumTol = -1.0; - } - else { - A = realA; + algo = "distance laplacian"; + } else if (algo == "block diagonal classical") { + algo = "classical"; } + // All cases + A = filteredMatrix; + rowSumTol = -1.0; + } else { + A = realA; + } - // Distance Laplacian weights - Array dlap_weights = pL.get >("aggregation: distance laplacian directional weights"); - enum {NO_WEIGHTS=0, SINGLE_WEIGHTS, BLOCK_WEIGHTS}; - int use_dlap_weights = NO_WEIGHTS; - if(algo == "distance laplacian") { - LO dim = (LO) Coords->getNumVectors(); - // If anything isn't 1.0 we need to turn on the weighting - bool non_unity = false; - for (LO i=0; !non_unity && i<(LO)dlap_weights.size(); i++) { - if(dlap_weights[i] != 1.0) { - non_unity = true; - } + // Distance Laplacian weights + Array dlap_weights = pL.get>("aggregation: distance laplacian directional weights"); + enum { NO_WEIGHTS = 0, + SINGLE_WEIGHTS, + BLOCK_WEIGHTS }; + int use_dlap_weights = NO_WEIGHTS; + if (algo == "distance laplacian") { + LO dim = (LO)Coords->getNumVectors(); + // If anything isn't 1.0 we need to turn on the weighting + bool non_unity = false; + for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) { + if (dlap_weights[i] != 1.0) { + non_unity = true; } - if(non_unity) { - LO blocksize = use_block_algorithm ? as(pL.get("aggregation: block diagonal: interleaved blocksize")) : 1; - if((LO)dlap_weights.size() == dim) - use_dlap_weights = SINGLE_WEIGHTS; - else if((LO)dlap_weights.size() == blocksize * dim) - use_dlap_weights = BLOCK_WEIGHTS; - else { - TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, - "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize"); - } - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Using distance laplacian weights: "<(pL.get("aggregation: block diagonal: interleaved blocksize")) : 1; + if ((LO)dlap_weights.size() == dim) + use_dlap_weights = SINGLE_WEIGHTS; + else if ((LO)dlap_weights.size() == blocksize * dim) + use_dlap_weights = BLOCK_WEIGHTS; + else { + TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, + "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize"); } + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl; } + } - // decide wether to use the fast-track code path for standard maps or the somewhat slower - // code path for non-standard maps - /*bool bNonStandardMaps = false; + // decide wether to use the fast-track code path for standard maps or the somewhat slower + // code path for non-standard maps + /*bool bNonStandardMaps = false; if (A->IsView("stridedMaps") == true) { Teuchos::RCP myMap = A->getRowMap("stridedMaps"); Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); @@ -325,88 +315,88 @@ namespace MueLu { bNonStandardMaps = true; }*/ - if (doExperimentalWrap) { - TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm"); - TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)"); - - SC threshold; - // If we're doing the ML-style halving of the drop tol at each level, we do that here. - if (pL.get("aggregation: use ml scaling of drop tol")) - threshold = pL.get("aggregation: drop tol") / pow(2.0,currentLevel.GetLevelID()); - else - threshold = as(pL.get("aggregation: drop tol")); + if (doExperimentalWrap) { + TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm"); + TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)"); + SC threshold; + // If we're doing the ML-style halving of the drop tol at each level, we do that here. + if (pL.get("aggregation: use ml scaling of drop tol")) + threshold = pL.get("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID()); + else + threshold = as(pL.get("aggregation: drop tol")); - std::string distanceLaplacianAlgoStr = pL.get("aggregation: distance laplacian algo"); - std::string classicalAlgoStr = pL.get("aggregation: classical algo"); - real_type realThreshold = STS::magnitude(threshold);// CMS: Rename this to "magnitude threshold" sometime + std::string distanceLaplacianAlgoStr = pL.get("aggregation: distance laplacian algo"); + std::string classicalAlgoStr = pL.get("aggregation: classical algo"); + real_type realThreshold = STS::magnitude(threshold); // CMS: Rename this to "magnitude threshold" sometime - //////////////////////////////////////////////////// - // Remove this bit once we are confident that cut-based dropping works. + //////////////////////////////////////////////////// + // Remove this bit once we are confident that cut-based dropping works. #ifdef HAVE_MUELU_DEBUG - int distanceLaplacianCutVerbose = 0; + int distanceLaplacianCutVerbose = 0; #endif #ifdef DJS_READ_ENV_VARIABLES - if (getenv("MUELU_DROP_TOLERANCE_MODE")) { - distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE")); - } + if (getenv("MUELU_DROP_TOLERANCE_MODE")) { + distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE")); + } - if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) { - auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD")); - realThreshold = 1e-4*tmp; - } + if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) { + auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD")); + realThreshold = 1e-4 * tmp; + } -# ifdef HAVE_MUELU_DEBUG - if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) { - distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE")); - } -# endif +#ifdef HAVE_MUELU_DEBUG + if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) { + distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE")); + } #endif - //////////////////////////////////////////////////// - - enum decisionAlgoType {defaultAlgo, unscaled_cut, scaled_cut, scaled_cut_symmetric}; - - decisionAlgoType distanceLaplacianAlgo = defaultAlgo; - decisionAlgoType classicalAlgo = defaultAlgo; - if (algo == "distance laplacian") { - if (distanceLaplacianAlgoStr == "default") - distanceLaplacianAlgo = defaultAlgo; - else if (distanceLaplacianAlgoStr == "unscaled cut") - distanceLaplacianAlgo = unscaled_cut; - else if (distanceLaplacianAlgoStr == "scaled cut") - distanceLaplacianAlgo = scaled_cut; - else if (distanceLaplacianAlgoStr == "scaled cut symmetric") - distanceLaplacianAlgo = scaled_cut_symmetric; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\""); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize()<< std::endl; - } else if (algo == "classical") { - if (classicalAlgoStr == "default") - classicalAlgo = defaultAlgo; - else if (classicalAlgoStr == "unscaled cut") - classicalAlgo = unscaled_cut; - else if (classicalAlgoStr == "scaled cut") - classicalAlgo = scaled_cut; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\""); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - - } else - GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - Set(currentLevel, "Filtering", (threshold != STS::zero())); +#endif + //////////////////////////////////////////////////// + + enum decisionAlgoType { defaultAlgo, + unscaled_cut, + scaled_cut, + scaled_cut_symmetric }; + + decisionAlgoType distanceLaplacianAlgo = defaultAlgo; + decisionAlgoType classicalAlgo = defaultAlgo; + if (algo == "distance laplacian") { + if (distanceLaplacianAlgoStr == "default") + distanceLaplacianAlgo = defaultAlgo; + else if (distanceLaplacianAlgoStr == "unscaled cut") + distanceLaplacianAlgo = unscaled_cut; + else if (distanceLaplacianAlgoStr == "scaled cut") + distanceLaplacianAlgo = scaled_cut; + else if (distanceLaplacianAlgoStr == "scaled cut symmetric") + distanceLaplacianAlgo = scaled_cut_symmetric; + else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\""); + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + } else if (algo == "classical") { + if (classicalAlgoStr == "default") + classicalAlgo = defaultAlgo; + else if (classicalAlgoStr == "unscaled cut") + classicalAlgo = unscaled_cut; + else if (classicalAlgoStr == "scaled cut") + classicalAlgo = scaled_cut; + else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\""); + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + } else + GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + Set(currentLevel, "Filtering", (threshold != STS::zero())); + const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); - // NOTE: We don't support signed classical RS or SA with cut drop at present - TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation"); - TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation"); + // NOTE: We don't support signed classical RS or SA with cut drop at present + TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation"); + TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation"); - GO numDropped = 0, numTotal = 0; - std::string graphType = "unamalgamated"; //for description purposes only + GO numDropped = 0, numTotal = 0; + std::string graphType = "unamalgamated"; //for description purposes only - - /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. + /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. BlockSize is the number of storage blocks that must kept together during the amalgamation process. Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: @@ -422,712 +412,697 @@ namespace MueLu { If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n. Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested. */ - TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,Exceptions::RuntimeError,"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); - const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); - + TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); + const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); + + /************************** RS or SA-style Classical Dropping (and variants) **************************/ + if (algo == "classical") { + if (predrop_ == null) { + // ap: this is a hack: had to declare predrop_ as mutable + predrop_ = rcp(new PreDropFunctionConstVal(threshold)); + } - /************************** RS or SA-style Classical Dropping (and variants) **************************/ - if (algo == "classical") { - if (predrop_ == null) { - // ap: this is a hack: had to declare predrop_ as mutable - predrop_ = rcp(new PreDropFunctionConstVal(threshold)); + if (predrop_ != null) { + RCP predropConstVal = rcp_dynamic_cast(predrop_); + TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast, + "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed."); + // If a user provided a predrop function, it overwrites the XML threshold parameter + SC newt = predropConstVal->GetThreshold(); + if (newt != threshold) { + GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl; + threshold = newt; + } + } + // At this points we either have + // (predrop_ != null) + // Therefore, it is sufficient to check only threshold + if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) { + // Case 1: scalar problem, no dropping => just use matrix graph + RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); + // Detect and record rows that correspond to Dirichlet boundary conditions + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); + + graph->SetBoundaryNodeMap(boundaryNodes); + numTotal = A->getLocalNumEntries(); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; } - if (predrop_ != null) { - RCP predropConstVal = rcp_dynamic_cast(predrop_); - TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast, - "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed."); - // If a user provided a predrop function, it overwrites the XML threshold parameter - SC newt = predropConstVal->GetThreshold(); - if (newt != threshold) { - GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl; - threshold = newt; + Set(currentLevel, "DofsPerNode", 1); + Set(currentLevel, "Graph", graph); + + } else if ((BlockSize == 1 && threshold != STS::zero()) || + (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) || + (BlockSize == 1 && useSignedClassicalRS) || + (BlockSize == 1 && useSignedClassicalSA)) { + // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original + // graph's map information, e.g., whether index is local + // OR a matrix without a CrsGraph + + // allocate space for the local graph + ArrayRCP rows(A->getLocalNumRows() + 1); + ArrayRCP columns(A->getLocalNumEntries()); + + using MT = typename STS::magnitudeType; + RCP ghostedDiag; + ArrayRCP ghostedDiagVals; + ArrayRCP negMaxOffDiagonal; + // RS style needs the max negative off-diagonal, SA style needs the diagonal + if (useSignedClassicalRS) { + if (ghostedBlockNumber.is_null()) { + negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A); + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl; + } else { + negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber); + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl; } + } else { + ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); + ghostedDiagVals = ghostedDiag->getData(0); } - // At this points we either have - // (predrop_ != null) - // Therefore, it is sufficient to check only threshold - if ( BlockSize==1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) { - // Case 1: scalar problem, no dropping => just use matrix graph - RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); - // Detect and record rows that correspond to Dirichlet boundary conditions - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) { + if (ghostedBlockNumber.is_null()) { + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl; Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - - graph->SetBoundaryNodeMap(boundaryNodes); - numTotal = A->getLocalNumEntries(); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } - - Set(currentLevel, "DofsPerNode", 1); - Set(currentLevel, "Graph", graph); - - } else if ( (BlockSize == 1 && threshold != STS::zero()) || - (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) || - (BlockSize == 1 && useSignedClassicalRS) || - (BlockSize == 1 && useSignedClassicalSA) ) { - // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original - // graph's map information, e.g., whether index is local - // OR a matrix without a CrsGraph - - // allocate space for the local graph - ArrayRCP rows (A->getLocalNumRows()+1); - ArrayRCP columns(A->getLocalNumEntries()); - - using MT = typename STS::magnitudeType; - RCP ghostedDiag; - ArrayRCP ghostedDiagVals; - ArrayRCP negMaxOffDiagonal; - // RS style needs the max negative off-diagonal, SA style needs the diagonal - if(useSignedClassicalRS) { - if(ghostedBlockNumber.is_null()) { - negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A); - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl; - } - else { - negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A,*ghostedBlockNumber); - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl; - } - } - else { - ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); - ghostedDiagVals = ghostedDiag->getData(0); - } - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) { - if(ghostedBlockNumber.is_null()) { - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl; - Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - } else { - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl; - Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes); - } + } else { + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl; + Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes); } + } - LO realnnz = 0; - rows[0] = 0; - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - size_t nnz = A->getNumEntriesInLocalRow(row); - bool rowIsDirichlet = boundaryNodes[row]; - ArrayView indices; - ArrayView vals; - A->getLocalRowView(row, indices, vals); - - if(classicalAlgo == defaultAlgo) { - //FIXME the current predrop function uses the following - //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid ) - //FIXME but the threshold doesn't take into account the rows' diagonal entries - //FIXME For now, hardwiring the dropping in here - - LO rownnz = 0; - if(useSignedClassicalRS) { - // Signed classical RS style - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; - MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]); - MT neg_aij = - STS::real(vals[colID]); - /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID], + LO realnnz = 0; + rows[0] = 0; + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { + size_t nnz = A->getNumEntriesInLocalRow(row); + bool rowIsDirichlet = boundaryNodes[row]; + ArrayView indices; + ArrayView vals; + A->getLocalRowView(row, indices, vals); + + if (classicalAlgo == defaultAlgo) { + //FIXME the current predrop function uses the following + //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid ) + //FIXME but the threshold doesn't take into account the rows' diagonal entries + //FIXME For now, hardwiring the dropping in here + + LO rownnz = 0; + if (useSignedClassicalRS) { + // Signed classical RS style + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; + MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]); + MT neg_aij = -STS::real(vals[colID]); + /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID], g_block_id.is_null() ? -1 : g_block_id[row], g_block_id.is_null() ? -1 : g_block_id[col], neg_aij, max_neg_aik);*/ - if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows[row+1] = realnnz; + if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) { + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; } - else if(useSignedClassicalSA) { - // Signed classical SA style - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; + rows[row + 1] = realnnz; + } else if (useSignedClassicalSA) { + // Signed classical SA style + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; - bool is_nonpositive = STS::real(vals[colID]) <= 0; - MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| - MT aij = is_nonpositive ? STS::magnitude(vals[colID]*vals[colID]) : (-STS::magnitude(vals[colID]*vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0 - /* + bool is_nonpositive = STS::real(vals[colID]) <= 0; + MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| + MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0 + /* if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID], vals[colID],aij, aiiajj); */ - if ((!rowIsDirichlet && aij > aiiajj) || row == col) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows[row+1] = realnnz; + if ((!rowIsDirichlet && aij > aiiajj) || row == col) { + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; } - else { - // Standard abs classical - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; - MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| - MT aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2 - - if ((!rowIsDirichlet && aij > aiiajj) || row == col) { + rows[row + 1] = realnnz; + } else { + // Standard abs classical + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; + MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| + MT aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2 + + if ((!rowIsDirichlet && aij > aiiajj) || row == col) { columns[realnnz++] = col; rownnz++; - } else - numDropped++; - } - rows[row+1] = realnnz; + } else + numDropped++; } + rows[row + 1] = realnnz; } - else { - /* Cut Algorithm */ - //CMS - using DropTol = Details::DropTol; - std::vector drop_vec; - drop_vec.reserve(nnz); - const real_type zero = Teuchos::ScalarTraits::zero(); - const real_type one = Teuchos::ScalarTraits::one(); - LO rownnz = 0; - // NOTE: This probably needs to be fixed for rowsum - - // find magnitudes - for (LO colID = 0; colID < (LO)nnz; colID++) { - LO col = indices[colID]; - if (row == col) { - drop_vec.emplace_back( zero, one, colID, false); - continue; - } - - // Don't aggregate boundaries - if(boundaryNodes[colID]) continue; - typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| - typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2 - drop_vec.emplace_back(aij, aiiajj, colID, false); + } else { + /* Cut Algorithm */ + //CMS + using DropTol = Details::DropTol; + std::vector drop_vec; + drop_vec.reserve(nnz); + const real_type zero = Teuchos::ScalarTraits::zero(); + const real_type one = Teuchos::ScalarTraits::one(); + LO rownnz = 0; + // NOTE: This probably needs to be fixed for rowsum + + // find magnitudes + for (LO colID = 0; colID < (LO)nnz; colID++) { + LO col = indices[colID]; + if (row == col) { + drop_vec.emplace_back(zero, one, colID, false); + continue; } - const size_t n = drop_vec.size(); - - if (classicalAlgo == unscaled_cut) { - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val > b.val; - }); - - bool drop = false; - for (size_t i=1; i realThreshold*b) { - drop = true; + // Don't aggregate boundaries + if (boundaryNodes[colID]) continue; + typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| + typename STS::magnitudeType aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2 + drop_vec.emplace_back(aij, aiiajj, colID, false); + } + + const size_t n = drop_vec.size(); + + if (classicalAlgo == unscaled_cut) { + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.val > b.val; + }); + + bool drop = false; + for (size_t i = 1; i < n; ++i) { + if (!drop) { + auto const& x = drop_vec[i - 1]; + auto const& y = drop_vec[i]; + auto a = x.val; + auto b = y.val; + if (a > realThreshold * b) { + drop = true; #ifdef HAVE_MUELU_DEBUG - if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; - } -#endif + if (distanceLaplacianCutVerbose) { + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl; } +#endif } - drop_vec[i].drop = drop; } - } else if (classicalAlgo == scaled_cut) { - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val/a.diag > b.val/b.diag; - }); - bool drop = false; - // printf("[%d] Scaled Cut: ",(int)row); - // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep"); - for (size_t i=1; i realThreshold*b) { - drop = true; + drop_vec[i].drop = drop; + } + } else if (classicalAlgo == scaled_cut) { + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.val / a.diag > b.val / b.diag; + }); + bool drop = false; + // printf("[%d] Scaled Cut: ",(int)row); + // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep"); + for (size_t i = 1; i < n; ++i) { + if (!drop) { + auto const& x = drop_vec[i - 1]; + auto const& y = drop_vec[i]; + auto a = x.val / x.diag; + auto b = y.val / y.diag; + if (a > realThreshold * b) { + drop = true; #ifdef HAVE_MUELU_DEBUG - if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; - } -#endif - } - // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep"); - + if (distanceLaplacianCutVerbose) { + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl; } - drop_vec[i].drop = drop; +#endif } - // printf("\n"); - } - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.col < b.col; - } - ); - - for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) { - LO col = indices[drop_vec[idxID].col]; - // don't drop diagonal - if (row == col) { - columns[realnnz++] = col; - rownnz++; - continue; - } - - if (!drop_vec[idxID].drop) { - columns[realnnz++] = col; - rownnz++; - } else { - numDropped++; + // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep"); } + drop_vec[i].drop = drop; } - // CMS - rows[row+1] = realnnz; - + // printf("\n"); } - }//end for row - - columns.resize(realnnz); - numTotal = A->getLocalNumEntries(); + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.col < b.col; + }); + + for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) { + LO col = indices[drop_vec[idxID].col]; + // don't drop diagonal + if (row == col) { + columns[realnnz++] = col; + rownnz++; + continue; + } - if (aggregationMayCreateDirichlet) { - // If the only element remaining after filtering is diagonal, mark node as boundary - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - if (rows[row+1]- rows[row] <= 1) - boundaryNodes[row] = true; + if (!drop_vec[idxID].drop) { + columns[realnnz++] = col; + rownnz++; + } else { + numDropped++; + } } + // CMS + rows[row + 1] = realnnz; } + } //end for row + + columns.resize(realnnz); + numTotal = A->getLocalNumEntries(); - RCP graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A")); - graph->SetBoundaryNodeMap(boundaryNodes); - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + if (aggregationMayCreateDirichlet) { + // If the only element remaining after filtering is diagonal, mark node as boundary + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { + if (rows[row + 1] - rows[row] <= 1) + boundaryNodes[row] = true; } - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", 1); - - // If we're doing signed classical, we might want to block-diagonalize *after* the dropping - if(generateColoringGraph) { - RCP colorGraph; - RCP importer = A->getCrsGraph()->getImporter(); - BlockDiagonalizeGraph(graph,ghostedBlockNumber,colorGraph,importer); - Set(currentLevel, "Coloring Graph",colorGraph); - // #define CMS_DUMP + } + + RCP graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A")); + graph->SetBoundaryNodeMap(boundaryNodes); + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", 1); + + // If we're doing signed classical, we might want to block-diagonalize *after* the dropping + if (generateColoringGraph) { + RCP colorGraph; + RCP importer = A->getCrsGraph()->getImporter(); + BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer); + Set(currentLevel, "Coloring Graph", colorGraph); + // #define CMS_DUMP #ifdef CMS_DUMP - { - Xpetra::IO::Write("m_regular_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast(graph)->GetCrsGraph()); - Xpetra::IO::Write("m_color_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast(colorGraph)->GetCrsGraph()); - // int rank = graph->GetDomainMap()->getComm()->getRank(); - // { - // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out); - // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); - // colorGraph->print(*fancy,Debug); - // } - // { - // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out); - // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); - // graph->print(*fancy,Debug); - // } - - } + { + Xpetra::IO::Write("m_regular_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast(graph)->GetCrsGraph()); + Xpetra::IO::Write("m_color_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast(colorGraph)->GetCrsGraph()); + // int rank = graph->GetDomainMap()->getComm()->getRank(); + // { + // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out); + // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); + // colorGraph->print(*fancy,Debug); + // } + // { + // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out); + // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); + // graph->print(*fancy,Debug); + // } + } #endif - }//end generateColoringGraph - } else if (BlockSize > 1 && threshold == STS::zero()) { - // Case 3: Multiple DOF/node problem without dropping - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); + } //end generateColoringGraph + } else if (BlockSize > 1 && threshold == STS::zero()) { + // Case 3: Multiple DOF/node problem without dropping + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); - graphType = "amalgamated"; + graphType = "amalgamated"; - // build node row map (uniqueMap) and node column map (nonUniqueMap) - // the arrays rowTranslation and colTranslation contain the local node id - // given a local dof id. The data is calculated by the AmalgamationFactory and - // stored in the variable container "UnAmalgamationInfo" - RCP uniqueMap = amalInfo->getNodeRowMap(); - RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslation = *(amalInfo->getRowTranslation()); - Array colTranslation = *(amalInfo->getColTranslation()); - - // get number of local nodes - LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - - // Allocate space for the local graph - ArrayRCP rows = ArrayRCP(numRows+1); - ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); - - const ArrayRCP amalgBoundaryNodes(numRows, false); - - // Detect and record rows that correspond to Dirichlet boundary conditions - // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size - // TODO the array one bigger than the number of local rows, and the last entry can - // TODO hold the actual number of boundary nodes. Clever, huh? - ArrayRCP pointBoundaryNodes; - pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - - - // extract striding information - LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map) - LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A->IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A->getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); - } + // build node row map (uniqueMap) and node column map (nonUniqueMap) + // the arrays rowTranslation and colTranslation contain the local node id + // given a local dof id. The data is calculated by the AmalgamationFactory and + // stored in the variable container "UnAmalgamationInfo" + RCP uniqueMap = amalInfo->getNodeRowMap(); + RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslation = *(amalInfo->getRowTranslation()); + Array colTranslation = *(amalInfo->getColTranslation()); - // loop over all local nodes - LO realnnz = 0; - rows[0] = 0; - Array indicesExtra; - for (LO row = 0; row < numRows; row++) { - ArrayView indices; - indicesExtra.resize(0); + // get number of local nodes + LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet - // Note, that pointBoundaryNodes lives on the dofmap (and not the node map). - // Therefore, looping over all dofs is fine here. We use blkPartSize as we work - // with local ids. - // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet) - // node. - bool isBoundary = false; - if (pL.get("aggregation: greedy Dirichlet") == true) { - for (LO j = 0; j < blkPartSize; j++) { - if (pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = true; - break; - } + // Allocate space for the local graph + ArrayRCP rows = ArrayRCP(numRows + 1); + ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); + + const ArrayRCP amalgBoundaryNodes(numRows, false); + + // Detect and record rows that correspond to Dirichlet boundary conditions + // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size + // TODO the array one bigger than the number of local rows, and the last entry can + // TODO hold the actual number of boundary nodes. Clever, huh? + ArrayRCP pointBoundaryNodes; + pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); + + // extract striding information + LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) + LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map) + LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map + if (A->IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A->getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + } + + // loop over all local nodes + LO realnnz = 0; + rows[0] = 0; + Array indicesExtra; + for (LO row = 0; row < numRows; row++) { + ArrayView indices; + indicesExtra.resize(0); + + // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet + // Note, that pointBoundaryNodes lives on the dofmap (and not the node map). + // Therefore, looping over all dofs is fine here. We use blkPartSize as we work + // with local ids. + // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet) + // node. + bool isBoundary = false; + if (pL.get("aggregation: greedy Dirichlet") == true) { + for (LO j = 0; j < blkPartSize; j++) { + if (pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = true; + break; } - } else { - isBoundary = true; - for (LO j = 0; j < blkPartSize; j++) { - if (!pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = false; - break; - } + } + } else { + isBoundary = true; + for (LO j = 0; j < blkPartSize; j++) { + if (!pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = false; + break; } } + } - // Merge rows of A - // The array indicesExtra contains local column node ids for the current local node "row" - if (!isBoundary) - MergeRows(*A, row, indicesExtra, colTranslation); - else - indicesExtra.push_back(row); - indices = indicesExtra; - numTotal += indices.size(); + // Merge rows of A + // The array indicesExtra contains local column node ids for the current local node "row" + if (!isBoundary) + MergeRows(*A, row, indicesExtra, colTranslation); + else + indicesExtra.push_back(row); + indices = indicesExtra; + numTotal += indices.size(); + + // add the local column node ids to the full columns array which + // contains the local column node ids for all local node rows + LO nnz = indices.size(), rownnz = 0; + for (LO colID = 0; colID < nnz; colID++) { + LO col = indices[colID]; + columns[realnnz++] = col; + rownnz++; + } - // add the local column node ids to the full columns array which - // contains the local column node ids for all local node rows - LO nnz = indices.size(), rownnz = 0; - for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; - columns[realnnz++] = col; - rownnz++; - } + if (rownnz == 1) { + // If the only element remaining after filtering is diagonal, mark node as boundary + // FIXME: this should really be replaced by the following + // if (indices.size() == 1 && indices[0] == row) + // boundaryNodes[row] = true; + // We do not do it this way now because there is no framework for distinguishing isolated + // and boundary nodes in the aggregation algorithms + amalgBoundaryNodes[row] = true; + } + rows[row + 1] = realnnz; + } //for (LO row = 0; row < numRows; row++) + columns.resize(realnnz); - if (rownnz == 1) { - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - amalgBoundaryNodes[row] = true; - } - rows[row+1] = realnnz; - } //for (LO row = 0; row < numRows; row++) - columns.resize(realnnz); + RCP graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); + graph->SetBoundaryNodeMap(amalgBoundaryNodes); - RCP graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - graph->SetBoundaryNodeMap(amalgBoundaryNodes); + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " agglomerated Dirichlet nodes" << std::endl; + } - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes - << " agglomerated Dirichlet nodes" << std::endl; - } + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); // full block size - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", blkSize); // full block size + } else if (BlockSize > 1 && threshold != STS::zero()) { + // Case 4: Multiple DOF/node problem with dropping + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); + graphType = "amalgamated"; - } else if (BlockSize > 1 && threshold != STS::zero()) { - // Case 4: Multiple DOF/node problem with dropping - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); - graphType = "amalgamated"; + // build node row map (uniqueMap) and node column map (nonUniqueMap) + // the arrays rowTranslation and colTranslation contain the local node id + // given a local dof id. The data is calculated by the AmalgamationFactory and + // stored in the variable container "UnAmalgamationInfo" + RCP uniqueMap = amalInfo->getNodeRowMap(); + RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslation = *(amalInfo->getRowTranslation()); + Array colTranslation = *(amalInfo->getColTranslation()); - // build node row map (uniqueMap) and node column map (nonUniqueMap) - // the arrays rowTranslation and colTranslation contain the local node id - // given a local dof id. The data is calculated by the AmalgamationFactory and - // stored in the variable container "UnAmalgamationInfo" - RCP uniqueMap = amalInfo->getNodeRowMap(); - RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslation = *(amalInfo->getRowTranslation()); - Array colTranslation = *(amalInfo->getColTranslation()); - - // get number of local nodes - LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - - // Allocate space for the local graph - ArrayRCP rows = ArrayRCP(numRows+1); - ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); - - const ArrayRCP amalgBoundaryNodes(numRows, false); - - // Detect and record rows that correspond to Dirichlet boundary conditions - // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size - // TODO the array one bigger than the number of local rows, and the last entry can - // TODO hold the actual number of boundary nodes. Clever, huh? - ArrayRCP pointBoundaryNodes; - pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - - - // extract striding information - LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map) - LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A->IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A->getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); - } + // get number of local nodes + LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - // extract diagonal data for dropping strategy - RCP ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); - const ArrayRCP ghostedDiagVals = ghostedDiag->getData(0); + // Allocate space for the local graph + ArrayRCP rows = ArrayRCP(numRows + 1); + ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); - // loop over all local nodes - LO realnnz = 0; - rows[0] = 0; - Array indicesExtra; - for (LO row = 0; row < numRows; row++) { - ArrayView indices; - indicesExtra.resize(0); + const ArrayRCP amalgBoundaryNodes(numRows, false); - // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet - // Note, that pointBoundaryNodes lives on the dofmap (and not the node map). - // Therefore, looping over all dofs is fine here. We use blkPartSize as we work - // with local ids. - // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet) - // node. - bool isBoundary = false; - if (pL.get("aggregation: greedy Dirichlet") == true) { - for (LO j = 0; j < blkPartSize; j++) { - if (pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = true; - break; - } - } - } else { - isBoundary = true; - for (LO j = 0; j < blkPartSize; j++) { - if (!pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = false; - break; - } - } - } + // Detect and record rows that correspond to Dirichlet boundary conditions + // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size + // TODO the array one bigger than the number of local rows, and the last entry can + // TODO hold the actual number of boundary nodes. Clever, huh? + ArrayRCP pointBoundaryNodes; + pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - // Merge rows of A - // The array indicesExtra contains local column node ids for the current local node "row" - if (!isBoundary) - MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation); - else - indicesExtra.push_back(row); - indices = indicesExtra; - numTotal += indices.size(); + // extract striding information + LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) + LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map) + LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map + if (A->IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A->getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + } - // add the local column node ids to the full columns array which - // contains the local column node ids for all local node rows - LO nnz = indices.size(), rownnz = 0; - for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; - columns[realnnz++] = col; - rownnz++; - } + // extract diagonal data for dropping strategy + RCP ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); + const ArrayRCP ghostedDiagVals = ghostedDiag->getData(0); - if (rownnz == 1) { - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - amalgBoundaryNodes[row] = true; + // loop over all local nodes + LO realnnz = 0; + rows[0] = 0; + Array indicesExtra; + for (LO row = 0; row < numRows; row++) { + ArrayView indices; + indicesExtra.resize(0); + + // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet + // Note, that pointBoundaryNodes lives on the dofmap (and not the node map). + // Therefore, looping over all dofs is fine here. We use blkPartSize as we work + // with local ids. + // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet) + // node. + bool isBoundary = false; + if (pL.get("aggregation: greedy Dirichlet") == true) { + for (LO j = 0; j < blkPartSize; j++) { + if (pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = true; + break; + } } - rows[row+1] = realnnz; - } //for (LO row = 0; row < numRows; row++) - columns.resize(realnnz); + } else { + isBoundary = true; + for (LO j = 0; j < blkPartSize; j++) { + if (!pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = false; + break; + } + } + } - RCP graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - graph->SetBoundaryNodeMap(amalgBoundaryNodes); + // Merge rows of A + // The array indicesExtra contains local column node ids for the current local node "row" + if (!isBoundary) + MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation); + else + indicesExtra.push_back(row); + indices = indicesExtra; + numTotal += indices.size(); + + // add the local column node ids to the full columns array which + // contains the local column node ids for all local node rows + LO nnz = indices.size(), rownnz = 0; + for (LO colID = 0; colID < nnz; colID++) { + LO col = indices[colID]; + columns[realnnz++] = col; + rownnz++; + } - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; + if (rownnz == 1) { + // If the only element remaining after filtering is diagonal, mark node as boundary + // FIXME: this should really be replaced by the following + // if (indices.size() == 1 && indices[0] == row) + // boundaryNodes[row] = true; + // We do not do it this way now because there is no framework for distinguishing isolated + // and boundary nodes in the aggregation algorithms + amalgBoundaryNodes[row] = true; + } + rows[row + 1] = realnnz; + } //for (LO row = 0; row < numRows; row++) + columns.resize(realnnz); - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; + RCP graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); + graph->SetBoundaryNodeMap(amalgBoundaryNodes); - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes - << " agglomerated Dirichlet nodes" << std::endl; - } + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", blkSize); // full block size + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " agglomerated Dirichlet nodes" << std::endl; } - } else if (algo == "distance laplacian") { - LO blkSize = A->GetFixedBlockSize(); - GO indexBase = A->getRowMap()->getIndexBase(); - // [*0*] : FIXME - // ap: somehow, if I move this line to [*1*], Belos throws an error - // I'm not sure what's going on. Do we always have to Get data, if we did - // DeclareInput for it? - // RCP Coords = Get< RCP >(currentLevel, "Coordinates"); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); // full block size + } - // Detect and record rows that correspond to Dirichlet boundary conditions - // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size - // TODO the array one bigger than the number of local rows, and the last entry can - // TODO hold the actual number of boundary nodes. Clever, huh? - ArrayRCP pointBoundaryNodes; - pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); + } else if (algo == "distance laplacian") { + LO blkSize = A->GetFixedBlockSize(); + GO indexBase = A->getRowMap()->getIndexBase(); + // [*0*] : FIXME + // ap: somehow, if I move this line to [*1*], Belos throws an error + // I'm not sure what's going on. Do we always have to Get data, if we did + // DeclareInput for it? + // RCP Coords = Get< RCP >(currentLevel, "Coordinates"); - if ( (blkSize == 1) && (threshold == STS::zero()) ) { - // Trivial case: scalar problem, no dropping. Can return original graph - RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); - graph->SetBoundaryNodeMap(pointBoundaryNodes); - graphType="unamalgamated"; - numTotal = A->getLocalNumEntries(); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < pointBoundaryNodes.size(); ++i) - if (pointBoundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } + // Detect and record rows that correspond to Dirichlet boundary conditions + // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size + // TODO the array one bigger than the number of local rows, and the last entry can + // TODO hold the actual number of boundary nodes. Clever, huh? + ArrayRCP pointBoundaryNodes; + pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); + + if ((blkSize == 1) && (threshold == STS::zero())) { + // Trivial case: scalar problem, no dropping. Can return original graph + RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); + graph->SetBoundaryNodeMap(pointBoundaryNodes); + graphType = "unamalgamated"; + numTotal = A->getLocalNumEntries(); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < pointBoundaryNodes.size(); ++i) + if (pointBoundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } - Set(currentLevel, "DofsPerNode", blkSize); - Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); + Set(currentLevel, "Graph", graph); + + } else { + // ap: We make quite a few assumptions here; general case may be a lot different, + // but much much harder to implement. We assume that: + // 1) all maps are standard maps, not strided maps + // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic + // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i + // + // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo, + // but as I totally don't understand that code, here is my solution + + // [*1*]: see [*0*] + + // Check that the number of local coordinates is consistent with the #rows in A + TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() / blkSize != Coords->getLocalLength(), Exceptions::Incompatible, + "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size (" << blkSize << ")."); + + const RCP colMap = A->getColMap(); + RCP uniqueMap, nonUniqueMap; + Array colTranslation; + if (blkSize == 1) { + uniqueMap = A->getRowMap(); + nonUniqueMap = A->getColMap(); + graphType = "unamalgamated"; } else { - // ap: We make quite a few assumptions here; general case may be a lot different, - // but much much harder to implement. We assume that: - // 1) all maps are standard maps, not strided maps - // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic - // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i - // - // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo, - // but as I totally don't understand that code, here is my solution - - // [*1*]: see [*0*] - - // Check that the number of local coordinates is consistent with the #rows in A - TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements()/blkSize != Coords->getLocalLength(), Exceptions::Incompatible, - "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size ("<< blkSize <<")."); - - const RCP colMap = A->getColMap(); - RCP uniqueMap, nonUniqueMap; - Array colTranslation; - if (blkSize == 1) { - uniqueMap = A->getRowMap(); - nonUniqueMap = A->getColMap(); - graphType="unamalgamated"; + uniqueMap = Coords->getMap(); + TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible, + "Different index bases for matrix and coordinates"); - } else { - uniqueMap = Coords->getMap(); - TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible, - "Different index bases for matrix and coordinates"); + AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation); - AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation); - - graphType = "amalgamated"; - } - LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - - RCP ghostedCoords; - RCP ghostedLaplDiag; - Teuchos::ArrayRCP ghostedLaplDiagData; - if (threshold != STS::zero()) { - // Get ghost coordinates - RCP importer; - { - SubFactoryMonitor m1(*this, "Import construction", currentLevel); - if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) { - GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl; - importer = realA->getCrsGraph()->getImporter(); - } else { - GetOStream(Warnings0) << "Constructing new importer instance" << std::endl; - importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - } - } //subtimer - ghostedCoords = Xpetra::MultiVectorFactory::Build(nonUniqueMap, Coords->getNumVectors()); - { + graphType = "amalgamated"; + } + LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); + + RCP ghostedCoords; + RCP ghostedLaplDiag; + Teuchos::ArrayRCP ghostedLaplDiagData; + if (threshold != STS::zero()) { + // Get ghost coordinates + RCP importer; + { + SubFactoryMonitor m1(*this, "Import construction", currentLevel); + if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) { + GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl; + importer = realA->getCrsGraph()->getImporter(); + } else { + GetOStream(Warnings0) << "Constructing new importer instance" << std::endl; + importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + } + } //subtimer + ghostedCoords = Xpetra::MultiVectorFactory::Build(nonUniqueMap, Coords->getNumVectors()); + { SubFactoryMonitor m1(*this, "Coordinate import", currentLevel); ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT); - } //subtimer + } //subtimer - // Construct Distance Laplacian diagonal - RCP localLaplDiag = VectorFactory::Build(uniqueMap); - Array indicesExtra; - Teuchos::Array> coordData; - if (threshold != STS::zero()) { - const size_t numVectors = ghostedCoords->getNumVectors(); - coordData.reserve(numVectors); - for (size_t j = 0; j < numVectors; j++) { - Teuchos::ArrayRCP tmpData=ghostedCoords->getData(j); - coordData.push_back(tmpData); - } + // Construct Distance Laplacian diagonal + RCP localLaplDiag = VectorFactory::Build(uniqueMap); + Array indicesExtra; + Teuchos::Array> coordData; + if (threshold != STS::zero()) { + const size_t numVectors = ghostedCoords->getNumVectors(); + coordData.reserve(numVectors); + for (size_t j = 0; j < numVectors; j++) { + Teuchos::ArrayRCP tmpData = ghostedCoords->getData(j); + coordData.push_back(tmpData); } - { + } + { SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel); ArrayRCP localLaplDiagData = localLaplDiag->getDataNonConst(0); for (LO row = 0; row < numRows; row++) { @@ -1144,26 +1119,24 @@ namespace MueLu { indices = indicesExtra; } - LO nnz = indices.size(); + LO nnz = indices.size(); bool haveAddedToDiag = false; for (LO colID = 0; colID < nnz; colID++) { const LO col = indices[colID]; if (row != col) { - if(use_dlap_weights == SINGLE_WEIGHTS) { + if (use_dlap_weights == SINGLE_WEIGHTS) { /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col, MueLu::Utilities::Distance2(coordData, row, col), MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col));*/ - localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col); - } - else if(use_dlap_weights == BLOCK_WEIGHTS) { - int block_id = row % interleaved_blocksize; + localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(dlap_weights(), coordData, row, col); + } else if (use_dlap_weights == BLOCK_WEIGHTS) { + int block_id = row % interleaved_blocksize; int block_start = block_id * interleaved_blocksize; - localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col); - } - else { + localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col); + } else { // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities::Distance2(coordData, row, col)); - localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(coordData, row, col); + localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(coordData, row, col); } haveAddedToDiag = true; } @@ -1173,67 +1146,67 @@ namespace MueLu { if (!haveAddedToDiag) localLaplDiagData[row] = STS::rmax(); } - } //subtimer - { + } //subtimer + { SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel); ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0); - } //subtimer + } //subtimer - } else { - GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl; - } + } else { + GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl; + } - // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian + // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian - // allocate space for the local graph - ArrayRCP rows = ArrayRCP(numRows+1); - ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); + // allocate space for the local graph + ArrayRCP rows = ArrayRCP(numRows + 1); + ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); #ifdef HAVE_MUELU_DEBUG - // DEBUGGING - for(LO i=0; i<(LO)columns.size(); i++) columns[i]=-666; + // DEBUGGING + for (LO i = 0; i < (LO)columns.size(); i++) columns[i] = -666; #endif - // Extra array for if we're allowing symmetrization with cutting - ArrayRCP rows_stop; - bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric; - if(use_stop_array) - rows_stop.resize(numRows); - - const ArrayRCP amalgBoundaryNodes(numRows, false); + // Extra array for if we're allowing symmetrization with cutting + ArrayRCP rows_stop; + bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric; + if (use_stop_array) + rows_stop.resize(numRows); - LO realnnz = 0; - rows[0] = 0; + const ArrayRCP amalgBoundaryNodes(numRows, false); - Array indicesExtra; - { + LO realnnz = 0; + rows[0] = 0; + + Array indicesExtra; + { SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel); Teuchos::Array> coordData; if (threshold != STS::zero()) { const size_t numVectors = ghostedCoords->getNumVectors(); coordData.reserve(numVectors); for (size_t j = 0; j < numVectors; j++) { - Teuchos::ArrayRCP tmpData=ghostedCoords->getData(j); + Teuchos::ArrayRCP tmpData = ghostedCoords->getData(j); coordData.push_back(tmpData); } } - ArrayView vals;//CMS hackery + ArrayView vals; //CMS hackery for (LO row = 0; row < numRows; row++) { ArrayView indices; indicesExtra.resize(0); - bool isBoundary = false; + bool isBoundary = false; if (blkSize == 1) { - // ArrayView vals;//CMS uncomment + // ArrayView vals;//CMS uncomment A->getLocalRowView(row, indices, vals); - isBoundary = pointBoundaryNodes[row]; + isBoundary = pointBoundaryNodes[row]; } else { // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet for (LO j = 0; j < blkSize; j++) { - if (!pointBoundaryNodes[row*blkSize+j]) { + if (!pointBoundaryNodes[row * blkSize + j]) { isBoundary = false; break; } @@ -1250,17 +1223,16 @@ namespace MueLu { LO nnz = indices.size(), rownnz = 0; - if(use_stop_array) { - rows[row+1] = rows[row]+nnz; - realnnz = rows[row]; - } + if (use_stop_array) { + rows[row + 1] = rows[row] + nnz; + realnnz = rows[row]; + } if (threshold != STS::zero()) { // default if (distanceLaplacianAlgo == defaultAlgo) { - /* Standard Distance Laplacian */ + /* Standard Distance Laplacian */ for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; if (row == col) { @@ -1269,23 +1241,21 @@ namespace MueLu { continue; } - // We do not want the distance Laplacian aggregating boundary nodes - if(isBoundary) continue; + // We do not want the distance Laplacian aggregating boundary nodes + if (isBoundary) continue; SC laplVal; - if(use_dlap_weights == SINGLE_WEIGHTS) { - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col); - } - else if(use_dlap_weights == BLOCK_WEIGHTS) { - int block_id = row % interleaved_blocksize; + if (use_dlap_weights == SINGLE_WEIGHTS) { + laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(), coordData, row, col); + } else if (use_dlap_weights == BLOCK_WEIGHTS) { + int block_id = row % interleaved_blocksize; int block_start = block_id * interleaved_blocksize; - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col); - } - else { - laplVal = STS::one() / MueLu::Utilities::Distance2(coordData, row, col); + laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col); + } else { + laplVal = STS::one() / MueLu::Utilities::Distance2(coordData, row, col); } - real_type aiiajj = STS::magnitude(realThreshold*realThreshold * ghostedLaplDiagData[row]*ghostedLaplDiagData[col]); - real_type aij = STS::magnitude(laplVal*laplVal); + real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]); + real_type aij = STS::magnitude(laplVal * laplVal); if (aij > aiiajj) { columns[realnnz++] = col; @@ -1295,8 +1265,8 @@ namespace MueLu { } } } else { - /* Cut Algorithm */ - using DropTol = Details::DropTol; + /* Cut Algorithm */ + using DropTol = Details::DropTol; std::vector drop_vec; drop_vec.reserve(nnz); const real_type zero = Teuchos::ScalarTraits::zero(); @@ -1304,31 +1274,28 @@ namespace MueLu { // find magnitudes for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; if (row == col) { - drop_vec.emplace_back( zero, one, colID, false); + drop_vec.emplace_back(zero, one, colID, false); continue; } - // We do not want the distance Laplacian aggregating boundary nodes - if(isBoundary) continue; + // We do not want the distance Laplacian aggregating boundary nodes + if (isBoundary) continue; SC laplVal; - if(use_dlap_weights == SINGLE_WEIGHTS) { - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col); - } - else if(use_dlap_weights == BLOCK_WEIGHTS) { - int block_id = row % interleaved_blocksize; + if (use_dlap_weights == SINGLE_WEIGHTS) { + laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(), coordData, row, col); + } else if (use_dlap_weights == BLOCK_WEIGHTS) { + int block_id = row % interleaved_blocksize; int block_start = block_id * interleaved_blocksize; - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col); - } - else { - laplVal = STS::one() / MueLu::Utilities::Distance2(coordData, row, col); + laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col); + } else { + laplVal = STS::one() / MueLu::Utilities::Distance2(coordData, row, col); } - real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row]*ghostedLaplDiagData[col]); - real_type aij = STS::magnitude(laplVal*laplVal); + real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]); + real_type aij = STS::magnitude(laplVal * laplVal); drop_vec.emplace_back(aij, aiiajj, colID, false); } @@ -1336,53 +1303,46 @@ namespace MueLu { const size_t n = drop_vec.size(); if (distanceLaplacianAlgo == unscaled_cut) { - - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val > b.val; - } - ); + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.val > b.val; + }); bool drop = false; - for (size_t i=1; i realThreshold*b) { + auto a = x.val; + auto b = y.val; + if (a > realThreshold * b) { drop = true; #ifdef HAVE_MUELU_DEBUG if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl; } #endif } } drop_vec[i].drop = drop; } - } - else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) { - - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val/a.diag > b.val/b.diag; - } - ); + } else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) { + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.val / a.diag > b.val / b.diag; + }); bool drop = false; - for (size_t i=1; i realThreshold*b) { + auto a = x.val / x.diag; + auto b = y.val / y.diag; + if (a > realThreshold * b) { drop = true; #ifdef HAVE_MUELU_DEBUG if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; - } + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl; + } #endif } } @@ -1390,30 +1350,27 @@ namespace MueLu { } } - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.col < b.col; - } - ); + std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { + return a.col < b.col; + }); - for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) { + for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) { LO col = indices[drop_vec[idxID].col]; - // don't drop diagonal if (row == col) { columns[realnnz++] = col; rownnz++; - // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val); + // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val); continue; } if (!drop_vec[idxID].drop) { columns[realnnz++] = col; - // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); + // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); rownnz++; } else { - // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); + // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); numDropped++; } } @@ -1421,13 +1378,13 @@ namespace MueLu { } else { // Skip laplace calculation and threshold comparison for zero threshold for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; + LO col = indices[colID]; columns[realnnz++] = col; rownnz++; } } - if ( rownnz == 1) { + if (rownnz == 1) { // If the only element remaining after filtering is diagonal, mark node as boundary // FIXME: this should really be replaced by the following // if (indices.size() == 1 && indices[0] == row) @@ -1437,584 +1394,575 @@ namespace MueLu { amalgBoundaryNodes[row] = true; } - if(use_stop_array) - rows_stop[row] = rownnz + rows[row]; - else - rows[row+1] = realnnz; - } //for (LO row = 0; row < numRows; row++) - - } //subtimer - - if (use_stop_array) { - // Do symmetrization of the cut matrix - // NOTE: We assume nested row/column maps here - for (LO row = 0; row < numRows; row++) { - for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) { - LO col = columns[colidx]; - if(col >= numRows) continue; - - bool found = false; - for(LO t_col = rows[col] ; !found && t_col < rows_stop[col]; t_col++) { - if (columns[t_col] == row) - found = true; - } - // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing - // into a Dirichlet unknown. In that case don't. - if(!found && !pointBoundaryNodes[col] && rows_stop[col] < rows[col+1]) { - LO new_idx = rows_stop[col]; - // printf("(%d,%d) SYMADD entry\n",col,row); - columns[new_idx] = row; - rows_stop[col]++; - numDropped--; - } - } - } - - // Condense everything down - LO current_start=0; - for (LO row = 0; row < numRows; row++) { - LO old_start = current_start; - for (LO col = rows[row]; col < rows_stop[row]; col++) { - if(current_start != col) { - columns[current_start] = columns[col]; - } - current_start++; - } - rows[row] = old_start; - } - rows[numRows] = realnnz = current_start; - - } - - columns.resize(realnnz); - - RCP graph; - { + if (use_stop_array) + rows_stop[row] = rownnz + rows[row]; + else + rows[row + 1] = realnnz; + } //for (LO row = 0; row < numRows; row++) + + } //subtimer + + if (use_stop_array) { + // Do symmetrization of the cut matrix + // NOTE: We assume nested row/column maps here + for (LO row = 0; row < numRows; row++) { + for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) { + LO col = columns[colidx]; + if (col >= numRows) continue; + + bool found = false; + for (LO t_col = rows[col]; !found && t_col < rows_stop[col]; t_col++) { + if (columns[t_col] == row) + found = true; + } + // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing + // into a Dirichlet unknown. In that case don't. + if (!found && !pointBoundaryNodes[col] && rows_stop[col] < rows[col + 1]) { + LO new_idx = rows_stop[col]; + // printf("(%d,%d) SYMADD entry\n",col,row); + columns[new_idx] = row; + rows_stop[col]++; + numDropped--; + } + } + } + + // Condense everything down + LO current_start = 0; + for (LO row = 0; row < numRows; row++) { + LO old_start = current_start; + for (LO col = rows[row]; col < rows_stop[row]; col++) { + if (current_start != col) { + columns[current_start] = columns[col]; + } + current_start++; + } + rows[row] = old_start; + } + rows[numRows] = realnnz = current_start; + } + + columns.resize(realnnz); + + RCP graph; + { SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel); graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); graph->SetBoundaryNodeMap(amalgBoundaryNodes); - } //subtimer + } //subtimer - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes" - << " using threshold " << dirichletThreshold << std::endl; - } + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes" + << " using threshold " << dirichletThreshold << std::endl; + } + + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); + } + } + + if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) { + RCP> comm = A->getRowMap()->getComm(); + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); + GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; + if (numGlobalTotal != 0) + GetOStream(Statistics1) << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)"; + GetOStream(Statistics1) << std::endl; + } + + } else { + //what Tobias has implemented + + SC threshold = as(pL.get("aggregation: drop tol")); + //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + GetOStream(Runtime0) << "algorithm = \"" + << "failsafe" + << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + Set(currentLevel, "Filtering", (threshold != STS::zero())); + + RCP rowMap = A->getRowMap(); + RCP colMap = A->getColMap(); + + LO blockdim = 1; // block dim for fixed size blocks + GO indexBase = rowMap->getIndexBase(); // index base of maps + GO offset = 0; + + // 1) check for blocking/striding information + if (A->IsView("stridedMaps") && + Teuchos::rcp_dynamic_cast(A->getRowMap("stridedMaps")) != Teuchos::null) { + Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!) + RCP strMap = Teuchos::rcp_dynamic_cast(A->getRowMap()); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed."); + blockdim = strMap->getFixedBlockSize(); + offset = strMap->getOffset(); + oldView = A->SwitchToView(oldView); + GetOStream(Statistics1) << "CoalesceDropFactory::Build():" + << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl; + } else + GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl; + + // 2) get row map for amalgamated matrix (graph of A) + // with same distribution over all procs as row map of A + RCP nodeMap = amalInfo->getNodeRowMap(); + GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl; + + // 3) create graph of amalgamated matrix + RCP crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim); + + LO numRows = A->getRowMap()->getLocalNumElements(); + LO numNodes = nodeMap->getLocalNumElements(); + const ArrayRCP amalgBoundaryNodes(numNodes, false); + const ArrayRCP numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node + bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid + + // 4) do amalgamation. generate graph of amalgamated matrix + // Note, this code is much more inefficient than the leightwight implementation + // Most of the work has already been done in the AmalgamationFactory + for (LO row = 0; row < numRows; row++) { + // get global DOF id + GO grid = rowMap->getGlobalElement(row); + + // reinitialize boolean helper variable + bIsDiagonalEntry = false; + + // translate grid to nodeid + GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase); - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", blkSize); + size_t nnz = A->getNumEntriesInLocalRow(row); + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + A->getLocalRowView(row, indices, vals); + + RCP> cnodeIds = Teuchos::rcp(new std::vector); // global column block ids + LO realnnz = 0; + for (LO col = 0; col < Teuchos::as(nnz); col++) { + GO gcid = colMap->getGlobalElement(indices[col]); // global column id + + if (vals[col] != STS::zero()) { + GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase); + cnodeIds->push_back(cnodeId); + realnnz++; // increment number of nnz in matrix row + if (grid == gcid) bIsDiagonalEntry = true; } } - if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) { - RCP > comm = A->getRowMap()->getComm(); - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; - if (numGlobalTotal != 0) - GetOStream(Statistics1) << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)"; - GetOStream(Statistics1) << std::endl; + if (realnnz == 1 && bIsDiagonalEntry == true) { + LO lNodeId = nodeMap->getLocalElement(nodeId); + numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId + if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes + amalgBoundaryNodes[lNodeId] = true; } - } else { - //what Tobias has implemented - - SC threshold = as(pL.get("aggregation: drop tol")); - //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - GetOStream(Runtime0) << "algorithm = \"" << "failsafe" << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - Set(currentLevel, "Filtering", (threshold != STS::zero())); - - RCP rowMap = A->getRowMap(); - RCP colMap = A->getColMap(); - - LO blockdim = 1; // block dim for fixed size blocks - GO indexBase = rowMap->getIndexBase(); // index base of maps - GO offset = 0; - - // 1) check for blocking/striding information - if(A->IsView("stridedMaps") && - Teuchos::rcp_dynamic_cast(A->getRowMap("stridedMaps")) != Teuchos::null) { - Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!) - RCP strMap = Teuchos::rcp_dynamic_cast(A->getRowMap()); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed."); - blockdim = strMap->getFixedBlockSize(); - offset = strMap->getOffset(); - oldView = A->SwitchToView(oldView); - GetOStream(Statistics1) << "CoalesceDropFactory::Build():" << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl; - } else GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl; - - // 2) get row map for amalgamated matrix (graph of A) - // with same distribution over all procs as row map of A - RCP nodeMap = amalInfo->getNodeRowMap(); - GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl; - - // 3) create graph of amalgamated matrix - RCP crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries()*blockdim); - - LO numRows = A->getRowMap()->getLocalNumElements(); - LO numNodes = nodeMap->getLocalNumElements(); - const ArrayRCP amalgBoundaryNodes(numNodes, false); - const ArrayRCP numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node - bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid - - // 4) do amalgamation. generate graph of amalgamated matrix - // Note, this code is much more inefficient than the leightwight implementation - // Most of the work has already been done in the AmalgamationFactory - for(LO row=0; rowgetGlobalElement(row); - - // reinitialize boolean helper variable - bIsDiagonalEntry = false; - - // translate grid to nodeid - GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase); - - size_t nnz = A->getNumEntriesInLocalRow(row); - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - A->getLocalRowView(row, indices, vals); - - RCP > cnodeIds = Teuchos::rcp(new std::vector); // global column block ids - LO realnnz = 0; - for(LO col=0; col(nnz); col++) { - GO gcid = colMap->getGlobalElement(indices[col]); // global column id - - if(vals[col]!=STS::zero()) { - GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase); - cnodeIds->push_back(cnodeId); - realnnz++; // increment number of nnz in matrix row - if (grid == gcid) bIsDiagonalEntry = true; - } - } + Teuchos::ArrayRCP arr_cnodeIds = Teuchos::arcp(cnodeIds); - if(realnnz == 1 && bIsDiagonalEntry == true) { - LO lNodeId = nodeMap->getLocalElement(nodeId); - numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId - if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes - amalgBoundaryNodes[lNodeId] = true; - } + if (arr_cnodeIds.size() > 0) + crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds()); + } + // fill matrix graph + crsGraph->fillComplete(nodeMap, nodeMap); - Teuchos::ArrayRCP arr_cnodeIds = Teuchos::arcp( cnodeIds ); + // 5) create MueLu Graph object + RCP graph = rcp(new Graph(crsGraph, "amalgamated graph of A")); - if(arr_cnodeIds.size() > 0 ) - crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds()); - } - // fill matrix graph - crsGraph->fillComplete(nodeMap,nodeMap); + // Detect and record rows that correspond to Dirichlet boundary conditions + graph->SetBoundaryNodeMap(amalgBoundaryNodes); - // 5) create MueLu Graph object - RCP graph = rcp(new Graph(crsGraph, "amalgamated graph of A")); + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } - // Detect and record rows that correspond to Dirichlet boundary conditions - graph->SetBoundaryNodeMap(amalgBoundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } + // 6) store results in Level + //graph->SetBoundaryNodeMap(gBoundaryNodeMap); + Set(currentLevel, "DofsPerNode", blockdim); + Set(currentLevel, "Graph", graph); - // 6) store results in Level - //graph->SetBoundaryNodeMap(gBoundaryNodeMap); - Set(currentLevel, "DofsPerNode", blockdim); - Set(currentLevel, "Graph", graph); + } //if (doExperimentalWrap) ... else ... - } //if (doExperimentalWrap) ... else ... +} //Build +template +void CoalesceDropFactory::MergeRows(const Matrix& A, const LO row, Array& cols, const Array& translation) const { + typedef typename ArrayView::size_type size_type; - } //Build + // extract striding information + LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map + if (A.IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A.getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + if (strMap->getStridedBlockId() > -1) + blkSize = Teuchos::as(strMap->getStridingData()[strMap->getStridedBlockId()]); + } - template - void CoalesceDropFactory::MergeRows(const Matrix& A, const LO row, Array& cols, const Array& translation) const { - typedef typename ArrayView::size_type size_type; + // count nonzero entries in all dof rows associated with node row + size_t nnz = 0, pos = 0; + for (LO j = 0; j < blkSize; j++) + nnz += A.getNumEntriesInLocalRow(row * blkSize + j); - // extract striding information - LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A.IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A.getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - if (strMap->getStridedBlockId() > -1) - blkSize = Teuchos::as(strMap->getStridingData()[strMap->getStridedBlockId()]); + if (nnz == 0) { + cols.resize(0); + return; + } + + cols.resize(nnz); + + // loop over all local dof rows associated with local node "row" + ArrayView inds; + ArrayView vals; + for (LO j = 0; j < blkSize; j++) { + A.getLocalRowView(row * blkSize + j, inds, vals); + size_type numIndices = inds.size(); + + if (numIndices == 0) // skip empty dof rows + continue; + + // cols: stores all local node ids for current local node id "row" + cols[pos++] = translation[inds[0]]; + for (size_type k = 1; k < numIndices; k++) { + LO nodeID = translation[inds[k]]; + // Here we try to speed up the process by reducing the size of an array + // to sort. This works if the column nonzeros belonging to the same + // node are stored consequently. + if (nodeID != cols[pos - 1]) + cols[pos++] = nodeID; } + } + cols.resize(pos); + nnz = pos; + + // Sort and remove duplicates + std::sort(cols.begin(), cols.end()); + pos = 0; + for (size_t j = 1; j < nnz; j++) + if (cols[j] != cols[pos]) + cols[++pos] = cols[j]; + cols.resize(pos + 1); +} + +template +void CoalesceDropFactory::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP& ghostedDiagVals, SC threshold, Array& cols, const Array& translation) const { + typedef typename ArrayView::size_type size_type; + typedef Teuchos::ScalarTraits STS; + + // extract striding information + LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map + if (A.IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A.getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); + if (strMap->getStridedBlockId() > -1) + blkSize = Teuchos::as(strMap->getStridingData()[strMap->getStridedBlockId()]); + } - // count nonzero entries in all dof rows associated with node row - size_t nnz = 0, pos = 0; - for (LO j = 0; j < blkSize; j++) - nnz += A.getNumEntriesInLocalRow(row*blkSize+j); + // count nonzero entries in all dof rows associated with node row + size_t nnz = 0, pos = 0; + for (LO j = 0; j < blkSize; j++) + nnz += A.getNumEntriesInLocalRow(row * blkSize + j); - if (nnz == 0) { - cols.resize(0); - return; - } + if (nnz == 0) { + cols.resize(0); + return; + } - cols.resize(nnz); + cols.resize(nnz); - // loop over all local dof rows associated with local node "row" - ArrayView inds; - ArrayView vals; - for (LO j = 0; j < blkSize; j++) { - A.getLocalRowView(row*blkSize+j, inds, vals); - size_type numIndices = inds.size(); + // loop over all local dof rows associated with local node "row" + ArrayView inds; + ArrayView vals; + for (LO j = 0; j < blkSize; j++) { + A.getLocalRowView(row * blkSize + j, inds, vals); + size_type numIndices = inds.size(); + + if (numIndices == 0) // skip empty dof rows + continue; + + // cols: stores all local node ids for current local node id "row" + LO prevNodeID = -1; + for (size_type k = 0; k < numIndices; k++) { + LO dofID = inds[k]; + LO nodeID = translation[inds[k]]; + + // we avoid a square root by using squared values + typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]); // eps^2 * |a_ii| * |a_jj| + typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]); - if (numIndices == 0) // skip empty dof rows - continue; + // check dropping criterion + if (aij > aiiajj || (row * blkSize + j == dofID)) { + // accept entry in graph - // cols: stores all local node ids for current local node id "row" - cols[pos++] = translation[inds[0]]; - for (size_type k = 1; k < numIndices; k++) { - LO nodeID = translation[inds[k]]; // Here we try to speed up the process by reducing the size of an array // to sort. This works if the column nonzeros belonging to the same // node are stored consequently. - if (nodeID != cols[pos-1]) + if (nodeID != prevNodeID) { cols[pos++] = nodeID; + prevNodeID = nodeID; + } } } - cols.resize(pos); - nnz = pos; - - // Sort and remove duplicates - std::sort(cols.begin(), cols.end()); - pos = 0; - for (size_t j = 1; j < nnz; j++) - if (cols[j] != cols[pos]) - cols[++pos] = cols[j]; - cols.resize(pos+1); + } + cols.resize(pos); + nnz = pos; + + // Sort and remove duplicates + std::sort(cols.begin(), cols.end()); + pos = 0; + for (size_t j = 1; j < nnz; j++) + if (cols[j] != cols[pos]) + cols[++pos] = cols[j]; + cols.resize(pos + 1); + + return; +} + +template +Teuchos::RCP> CoalesceDropFactory::BlockDiagonalize(Level& currentLevel, const RCP& A, bool generate_matrix) const { + typedef Teuchos::ScalarTraits STS; + + const ParameterList& pL = GetParameterList(); + const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + const typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + + RCP BlockNumber = Get>(currentLevel, "BlockNumber"); + RCP ghostedBlockNumber; + GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl; + + // Ghost the column block numbers if we need to + RCP importer = A->getCrsGraph()->getImporter(); + if (!importer.is_null()) { + SubFactoryMonitor m1(*this, "Block Number import", currentLevel); + ghostedBlockNumber = Xpetra::VectorFactory::Build(importer->getTargetMap()); + ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); + } else { + ghostedBlockNumber = BlockNumber; } - template - void CoalesceDropFactory::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP& ghostedDiagVals, SC threshold, Array& cols, const Array& translation) const { - typedef typename ArrayView::size_type size_type; - typedef Teuchos::ScalarTraits STS; + // Accessors for block numbers + Teuchos::ArrayRCP row_block_number = BlockNumber->getData(0); + Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); + + // allocate space for the local graph + ArrayRCP rows_mat; + ArrayRCP rows_graph, columns; + ArrayRCP values; + RCP crs_matrix_wrap; + + if (generate_matrix) { + crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0)); + crs_matrix_wrap->getCrsMatrix()->allocateAllValues(A->getLocalNumEntries(), rows_mat, columns, values); + } else { + rows_graph.resize(A->getLocalNumRows() + 1); + columns.resize(A->getLocalNumEntries()); + values.resize(A->getLocalNumEntries()); + } - // extract striding information - LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A.IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A.getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - if (strMap->getStridedBlockId() > -1) - blkSize = Teuchos::as(strMap->getStridingData()[strMap->getStridedBlockId()]); - } + LO realnnz = 0; + GO numDropped = 0, numTotal = 0; + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { + LO row_block = row_block_number[row]; + size_t nnz = A->getNumEntriesInLocalRow(row); + ArrayView indices; + ArrayView vals; + A->getLocalRowView(row, indices, vals); - // count nonzero entries in all dof rows associated with node row - size_t nnz = 0, pos = 0; - for (LO j = 0; j < blkSize; j++) - nnz += A.getNumEntriesInLocalRow(row*blkSize+j); + LO rownnz = 0; + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; + LO col_block = col_block_number[col]; - if (nnz == 0) { - cols.resize(0); - return; + if (row_block == col_block) { + if (generate_matrix) values[realnnz] = vals[colID]; + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; } + if (generate_matrix) + rows_mat[row + 1] = realnnz; + else + rows_graph[row + 1] = realnnz; + } - cols.resize(nnz); - - // loop over all local dof rows associated with local node "row" - ArrayView inds; - ArrayView vals; - for (LO j = 0; j < blkSize; j++) { - A.getLocalRowView(row*blkSize+j, inds, vals); - size_type numIndices = inds.size(); - - if (numIndices == 0) // skip empty dof rows - continue; - - // cols: stores all local node ids for current local node id "row" - LO prevNodeID = -1; - for (size_type k = 0; k < numIndices; k++) { - LO dofID = inds[k]; - LO nodeID = translation[inds[k]]; - - // we avoid a square root by using squared values - typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold*ghostedDiagVals[dofID]*ghostedDiagVals[row*blkSize+j]); // eps^2 * |a_ii| * |a_jj| - typename STS::magnitudeType aij = STS::magnitude(vals[k]*vals[k]); - - // check dropping criterion - if (aij > aiiajj || (row*blkSize+j == dofID)) { - // accept entry in graph - - // Here we try to speed up the process by reducing the size of an array - // to sort. This works if the column nonzeros belonging to the same - // node are stored consequently. - if (nodeID != prevNodeID) { - cols[pos++] = nodeID; - prevNodeID = nodeID; - } - } - } - } - cols.resize(pos); - nnz = pos; + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - // Sort and remove duplicates - std::sort(cols.begin(), cols.end()); - pos = 0; - for (size_t j = 1; j < nnz; j++) - if (cols[j] != cols[pos]) - cols[++pos] = cols[j]; - cols.resize(pos+1); + if (!generate_matrix) { + // We can't resize an Arrayrcp and pass the checks for setAllValues + values.resize(realnnz); + columns.resize(realnnz); + } + numTotal = A->getLocalNumEntries(); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); + GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; + if (numGlobalTotal != 0) + GetOStream(Statistics1) << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)"; + GetOStream(Statistics1) << std::endl; + } - return; + Set(currentLevel, "Filtering", true); + + if (generate_matrix) { + // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import + // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects + // here, which is legit, because we never use them anyway. + crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values); + crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap()); + } else { + RCP graph = rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), "block-diagonalized graph of A")); + graph->SetBoundaryNodeMap(boundaryNodes); + Set(currentLevel, "Graph", graph); } + Set(currentLevel, "DofsPerNode", 1); + return crs_matrix_wrap; +} +template +void CoalesceDropFactory::BlockDiagonalizeGraph(const RCP& inputGraph, const RCP& ghostedBlockNumber, RCP& outputGraph, RCP& importer) const { + TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null."); + const ParameterList& pL = GetParameterList(); - template - Teuchos::RCP > CoalesceDropFactory::BlockDiagonalize(Level & currentLevel,const RCP& A,bool generate_matrix) const { - typedef Teuchos::ScalarTraits STS; - - const ParameterList & pL = GetParameterList(); - const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); - const typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + const bool localizeColoringGraph = pL.get("aggregation: coloring: localize color graph"); - RCP BlockNumber = Get >(currentLevel, "BlockNumber"); - RCP ghostedBlockNumber; - GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)"< importer = A->getCrsGraph()->getImporter(); - if(!importer.is_null()) { - SubFactoryMonitor m1(*this, "Block Number import", currentLevel); - ghostedBlockNumber= Xpetra::VectorFactory::Build(importer->getTargetMap()); - ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); - } - else { - ghostedBlockNumber = BlockNumber; - } + // Accessors for block numbers + Teuchos::ArrayRCP row_block_number = ghostedBlockNumber->getData(0); + Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); - // Accessors for block numbers - Teuchos::ArrayRCP row_block_number = BlockNumber->getData(0); - Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); - - // allocate space for the local graph - ArrayRCP rows_mat; - ArrayRCP rows_graph,columns; - ArrayRCP values; - RCP crs_matrix_wrap; - - if(generate_matrix) { - crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0)); - crs_matrix_wrap->getCrsMatrix()->allocateAllValues(A->getLocalNumEntries(), rows_mat, columns, values); - } - else { - rows_graph.resize(A->getLocalNumRows()+1); - columns.resize(A->getLocalNumEntries()); - values.resize(A->getLocalNumEntries()); - } - - LO realnnz = 0; - GO numDropped = 0, numTotal = 0; - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - LO row_block = row_block_number[row]; - size_t nnz = A->getNumEntriesInLocalRow(row); - ArrayView indices; - ArrayView vals; - A->getLocalRowView(row, indices, vals); + // allocate space for the local graph + ArrayRCP rows_mat; + ArrayRCP rows_graph, columns; + + rows_graph.resize(inputGraph->GetNodeNumVertices() + 1); + columns.resize(inputGraph->GetNodeNumEdges()); + + LO realnnz = 0; + GO numDropped = 0, numTotal = 0; + const LO numRows = Teuchos::as(inputGraph->GetDomainMap()->getLocalNumElements()); + if (localizeColoringGraph) { + for (LO row = 0; row < numRows; ++row) { + LO row_block = row_block_number[row]; + ArrayView indices = inputGraph->getNeighborVertices(row); LO rownnz = 0; - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; + for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { + LO col = indices[colID]; LO col_block = col_block_number[col]; - - if(row_block == col_block) { - if(generate_matrix) values[realnnz] = vals[colID]; + + if ((row_block == col_block) && (col < numRows)) { columns[realnnz++] = col; rownnz++; } else numDropped++; } - if(generate_matrix) rows_mat[row+1] = realnnz; - else rows_graph[row+1] = realnnz; + rows_graph[row + 1] = realnnz; } - - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - - - if(!generate_matrix) { - // We can't resize an Arrayrcp and pass the checks for setAllValues - values.resize(realnnz); - columns.resize(realnnz); - } - numTotal = A->getLocalNumEntries(); + } else { + // ghosting of boundary node map + Teuchos::ArrayRCP boundaryNodes = inputGraph->GetBoundaryNodeMap(); + auto boundaryNodesVector = Xpetra::VectorFactory::Build(inputGraph->GetDomainMap()); + for (size_t i = 0; i < inputGraph->GetNodeNumVertices(); i++) + boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i]; + // Xpetra::IO::Write("boundary",*boundaryNodesVector); + auto boundaryColumnVector = Xpetra::VectorFactory::Build(inputGraph->GetImportMap()); + boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT); + auto boundaryColumn = boundaryColumnVector->getData(0); + + for (LO row = 0; row < numRows; ++row) { + LO row_block = row_block_number[row]; + ArrayView indices = inputGraph->getNeighborVertices(row); - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; - if (numGlobalTotal != 0) - GetOStream(Statistics1) << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)"; - GetOStream(Statistics1) << std::endl; - } - - Set(currentLevel, "Filtering", true); + LO rownnz = 0; + for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { + LO col = indices[colID]; + LO col_block = col_block_number[col]; - if(generate_matrix) { - // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import - // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects - // here, which is legit, because we never use them anyway. - crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat,columns,values); - crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap()); - } - else { - RCP graph = rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), "block-diagonalized graph of A")); - graph->SetBoundaryNodeMap(boundaryNodes); - Set(currentLevel, "Graph", graph); + if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) { + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; + } + rows_graph[row + 1] = realnnz; } - - - Set(currentLevel, "DofsPerNode", 1); - return crs_matrix_wrap; } + columns.resize(realnnz); + numTotal = inputGraph->GetNodeNumEdges(); + + if (GetVerbLevel() & Statistics1) { + RCP> comm = inputGraph->GetDomainMap()->getComm(); + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); + GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; + if (numGlobalTotal != 0) + GetOStream(Statistics1) << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)"; + GetOStream(Statistics1) << std::endl; + } - template - void CoalesceDropFactory::BlockDiagonalizeGraph(const RCP & inputGraph, const RCP & ghostedBlockNumber, RCP & outputGraph, RCP & importer) const { - - TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null."); - const ParameterList & pL = GetParameterList(); - - const bool localizeColoringGraph = pL.get("aggregation: coloring: localize color graph"); - - GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)"; - if (localizeColoringGraph) - GetOStream(Statistics1) << ", with localization" < row_block_number = ghostedBlockNumber->getData(0); - Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); - - // allocate space for the local graph - ArrayRCP rows_mat; - ArrayRCP rows_graph,columns; - - rows_graph.resize(inputGraph->GetNodeNumVertices()+1); - columns.resize(inputGraph->GetNodeNumEdges()); - - LO realnnz = 0; - GO numDropped = 0, numTotal = 0; - const LO numRows = Teuchos::as(inputGraph->GetDomainMap()->getLocalNumElements()); - if (localizeColoringGraph) { - - for (LO row = 0; row < numRows; ++row) { - LO row_block = row_block_number[row]; - ArrayView indices = inputGraph->getNeighborVertices(row); - - LO rownnz = 0; - for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { - LO col = indices[colID]; - LO col_block = col_block_number[col]; - - if((row_block == col_block) && (col < numRows)) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows_graph[row+1] = realnnz; - } - } else { - // ghosting of boundary node map - Teuchos::ArrayRCP boundaryNodes = inputGraph->GetBoundaryNodeMap(); - auto boundaryNodesVector = Xpetra::VectorFactory::Build(inputGraph->GetDomainMap()); - for (size_t i=0; iGetNodeNumVertices(); i++) - boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i]; - // Xpetra::IO::Write("boundary",*boundaryNodesVector); - auto boundaryColumnVector = Xpetra::VectorFactory::Build(inputGraph->GetImportMap()); - boundaryColumnVector->doImport(*boundaryNodesVector,*importer, Xpetra::INSERT); - auto boundaryColumn = boundaryColumnVector->getData(0); - - for (LO row = 0; row < numRows; ++row) { - LO row_block = row_block_number[row]; - ArrayView indices = inputGraph->getNeighborVertices(row); - - LO rownnz = 0; - for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { - LO col = indices[colID]; - LO col_block = col_block_number[col]; - - if((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows_graph[row+1] = realnnz; - } - } - - columns.resize(realnnz); - numTotal = inputGraph->GetNodeNumEdges(); - - if (GetVerbLevel() & Statistics1) { - RCP > comm = inputGraph->GetDomainMap()->getComm(); - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; - if (numGlobalTotal != 0) - GetOStream(Statistics1) << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)"; - GetOStream(Statistics1) << std::endl; - } - - if (localizeColoringGraph) { - outputGraph = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A")); - outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); - } else { - TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra); + if (localizeColoringGraph) { + outputGraph = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A")); + outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); + } else { + TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra); #ifdef HAVE_XPETRA_TPETRA - auto outputGraph2 = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A")); - - auto tpGraph = Xpetra::toTpetra(rcp_const_cast(outputGraph2->GetCrsGraph())); - auto sym = rcp(new Tpetra::CrsGraphTransposer(tpGraph)); - auto tpGraphSym = sym->symmetrize(); - - auto colIndsSym = // FIXME persistingView is temporary; better fix would be change to LWGraph constructor - Kokkos::Compat::persistingView(tpGraphSym->getLocalIndicesHost()); - - auto rowsSym = tpGraphSym->getLocalRowPtrsHost(); - ArrayRCP rows_graphSym; - rows_graphSym.resize(rowsSym.size()); - for (size_t row = 0; row < rowsSym.size(); row++) - rows_graphSym[row] = rowsSym[row]; - outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A")); - outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); + auto outputGraph2 = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A")); + + auto tpGraph = Xpetra::toTpetra(rcp_const_cast(outputGraph2->GetCrsGraph())); + auto sym = rcp(new Tpetra::CrsGraphTransposer(tpGraph)); + auto tpGraphSym = sym->symmetrize(); + + auto colIndsSym = // FIXME persistingView is temporary; better fix would be change to LWGraph constructor + Kokkos::Compat::persistingView(tpGraphSym->getLocalIndicesHost()); + + auto rowsSym = tpGraphSym->getLocalRowPtrsHost(); + ArrayRCP rows_graphSym; + rows_graphSym.resize(rowsSym.size()); + for (size_t row = 0; row < rowsSym.size(); row++) + rows_graphSym[row] = rowsSym[row]; + outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A")); + outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); #endif - } - - } - - + } +} -} //namespace MueLu +} //namespace MueLu -#endif // MUELU_COALESCEDROPFACTORY_DEF_HPP +#endif // MUELU_COALESCEDROPFACTORY_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp index 45d2601b230d..75f6f3ef42b0 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp @@ -62,7 +62,7 @@ namespace MueLu { - /*! +/*! @class CoalesceDropFactory_kokkos @brief Factory for creating a graph based on a given matrix. @@ -127,51 +127,49 @@ namespace MueLu { on HyperGraph partitioning without coordinate information) where one has not access to a "Graph" or "Coordinates" variable. */ - template - class CoalesceDropFactory_kokkos; - - template - class CoalesceDropFactory_kokkos > : public SingleLevelFactoryBase { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using range_type = Kokkos::RangePolicy; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - - private: - // For compatibility - using Node = node_type; +template +class CoalesceDropFactory_kokkos; + +template +class CoalesceDropFactory_kokkos > : public SingleLevelFactoryBase { + public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using range_type = Kokkos::RangePolicy; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + + private: + // For compatibility + using Node = node_type; #undef MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor + CoalesceDropFactory_kokkos() {} - //! Constructor - CoalesceDropFactory_kokkos() { } + //! Destructor + virtual ~CoalesceDropFactory_kokkos() {} - //! Destructor - virtual ~CoalesceDropFactory_kokkos() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level& currentLevel) const; - void DeclareInput(Level& currentLevel) const; + //@} - //@} + void Build(Level& currentLevel) const; +}; - void Build(Level& currentLevel) const; - - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT -#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DECL_HPP +#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp index af490b6a5c24..a2f3a7933f21 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp @@ -63,447 +63,438 @@ namespace MueLu { +namespace CoalesceDrop_Kokkos_Details { // anonymous + +template +class ScanFunctor { + public: + ScanFunctor(RowType rows_) + : rows(rows_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const LO i, LO& upd, const bool& final) const { + upd += rows(i); + if (final) + rows(i) = upd; + } - namespace CoalesceDrop_Kokkos_Details { // anonymous + private: + RowType rows; +}; + +template +class ClassicalDropFunctor { + private: + typedef typename GhostedViewType::value_type SC; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::magnitudeType magnitudeType; + + GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) + magnitudeType eps; + + public: + ClassicalDropFunctor(GhostedViewType ghostedDiag, magnitudeType threshold) + : diag(ghostedDiag) + , eps(threshold) {} + + // Return true if we drop, false if not + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(LO row, LO col, SC val) const { + // We avoid square root by using squared values + auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + + return (aij2 <= eps * eps * aiiajj); + } +}; + +template +class DistanceFunctor { + private: + typedef typename CoordsType::value_type SC; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::magnitudeType magnitudeType; + + public: + typedef SC value_type; + + public: + DistanceFunctor(CoordsType coords_) + : coords(coords_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType distance2(LO row, LO col) const { + SC d = ATS::zero(), s; + for (size_t j = 0; j < coords.extent(1); j++) { + s = coords(row, j) - coords(col, j); + d += s * s; + } + return ATS::magnitude(d); + } - template - class ScanFunctor { - public: - ScanFunctor(RowType rows_) : rows(rows_) { } + private: + CoordsType coords; +}; + +template +class DistanceLaplacianDropFunctor { + private: + typedef typename GhostedViewType::value_type SC; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::magnitudeType magnitudeType; + + public: + DistanceLaplacianDropFunctor(GhostedViewType ghostedLaplDiag, DistanceFunctor distFunctor_, magnitudeType threshold) + : diag(ghostedLaplDiag) + , distFunctor(distFunctor_) + , eps(threshold) {} + + // Return true if we drop, false if not + KOKKOS_INLINE_FUNCTION + bool operator()(LO row, LO col, SC /* val */) const { + // We avoid square root by using squared values + + // We ignore incoming value of val as we operate on an auxiliary + // distance Laplacian matrix + typedef typename DistanceFunctor::value_type dSC; + typedef Kokkos::ArithTraits dATS; + auto fval = dATS::one() / distFunctor.distance2(row, col); + + auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(fval) * ATS::magnitude(fval); // |a_ij|^2 + + return (aij2 <= eps * eps * aiiajj); + } - KOKKOS_INLINE_FUNCTION - void operator()(const LO i, LO& upd, const bool& final) const { - upd += rows(i); - if (final) - rows(i) = upd; - } + private: + GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) + DistanceFunctor distFunctor; + magnitudeType eps; +}; + +template +class ScalarFunctor { + private: + typedef typename MatrixType::StaticCrsGraphType graph_type; + typedef typename graph_type::row_map_type rows_type; + typedef typename graph_type::entries_type cols_type; + typedef typename MatrixType::values_type vals_type; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::val_type impl_Scalar; + typedef Kokkos::ArithTraits impl_ATS; + typedef typename ATS::magnitudeType magnitudeType; + + public: + ScalarFunctor(MatrixType A_, BndViewType bndNodes_, DropFunctorType dropFunctor_, + typename rows_type::non_const_type rows_, + typename cols_type::non_const_type colsAux_, + typename vals_type::non_const_type valsAux_, + bool reuseGraph_, bool lumping_, SC /* threshold_ */, + bool aggregationMayCreateDirichlet_) + : A(A_) + , bndNodes(bndNodes_) + , dropFunctor(dropFunctor_) + , rows(rows_) + , colsAux(colsAux_) + , valsAux(valsAux_) + , reuseGraph(reuseGraph_) + , lumping(lumping_) + , aggregationMayCreateDirichlet(aggregationMayCreateDirichlet_) { + rowsA = A.graph.row_map; + zero = impl_ATS::zero(); + } - private: - RowType rows; - }; - - template - class ClassicalDropFunctor { - private: - typedef typename GhostedViewType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) - magnitudeType eps; - - public: - ClassicalDropFunctor(GhostedViewType ghostedDiag, magnitudeType threshold) : - diag(ghostedDiag), - eps(threshold) - { } - - // Return true if we drop, false if not - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(LO row, LO col, SC val) const { - // We avoid square root by using squared values - auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| - auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 - - return (aij2 <= eps*eps * aiiajj); - } - }; - - template - class DistanceFunctor { - private: - typedef typename CoordsType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - typedef SC value_type; - - public: - DistanceFunctor(CoordsType coords_) : coords(coords_) { } - - KOKKOS_INLINE_FUNCTION - magnitudeType distance2(LO row, LO col) const { - SC d = ATS::zero(), s; - for (size_t j = 0; j < coords.extent(1); j++) { - s = coords(row,j) - coords(col,j); - d += s*s; - } - return ATS::magnitude(d); - } - private: - CoordsType coords; - }; - - template - class DistanceLaplacianDropFunctor { - private: - typedef typename GhostedViewType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - DistanceLaplacianDropFunctor(GhostedViewType ghostedLaplDiag, DistanceFunctor distFunctor_, magnitudeType threshold) : - diag(ghostedLaplDiag), - distFunctor(distFunctor_), - eps(threshold) - { } - - // Return true if we drop, false if not - KOKKOS_INLINE_FUNCTION - bool operator()(LO row, LO col, SC /* val */) const { - // We avoid square root by using squared values - - // We ignore incoming value of val as we operate on an auxiliary - // distance Laplacian matrix - typedef typename DistanceFunctor::value_type dSC; - typedef Kokkos::ArithTraits dATS; - auto fval = dATS::one() / distFunctor.distance2(row, col); - - auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| - auto aij2 = ATS::magnitude(fval) * ATS::magnitude(fval); // |a_ij|^2 - - return (aij2 <= eps*eps * aiiajj); - } + KOKKOS_INLINE_FUNCTION + void operator()(const LO row, LO& nnz) const { + auto rowView = A.rowConst(row); + auto length = rowView.length; + auto offset = rowsA(row); - private: - GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) - DistanceFunctor distFunctor; - magnitudeType eps; - }; - - template - class ScalarFunctor { - private: - typedef typename MatrixType::StaticCrsGraphType graph_type; - typedef typename graph_type::row_map_type rows_type; - typedef typename graph_type::entries_type cols_type; - typedef typename MatrixType::values_type vals_type; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::val_type impl_Scalar; - typedef Kokkos::ArithTraits impl_ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - ScalarFunctor(MatrixType A_, BndViewType bndNodes_, DropFunctorType dropFunctor_, - typename rows_type::non_const_type rows_, - typename cols_type::non_const_type colsAux_, - typename vals_type::non_const_type valsAux_, - bool reuseGraph_, bool lumping_, SC /* threshold_ */, - bool aggregationMayCreateDirichlet_ ) : - A(A_), - bndNodes(bndNodes_), - dropFunctor(dropFunctor_), - rows(rows_), - colsAux(colsAux_), - valsAux(valsAux_), - reuseGraph(reuseGraph_), - lumping(lumping_), - aggregationMayCreateDirichlet(aggregationMayCreateDirichlet_) - { - rowsA = A.graph.row_map; - zero = impl_ATS::zero(); - } + impl_Scalar diag = zero; + LO rownnz = 0; + LO diagID = -1; + for (decltype(length) colID = 0; colID < length; colID++) { + LO col = rowView.colidx(colID); + impl_Scalar val = rowView.value(colID); - KOKKOS_INLINE_FUNCTION - void operator()(const LO row, LO& nnz) const { - auto rowView = A.rowConst(row); - auto length = rowView.length; - auto offset = rowsA(row); - - impl_Scalar diag = zero; - LO rownnz = 0; - LO diagID = -1; - for (decltype(length) colID = 0; colID < length; colID++) { - LO col = rowView.colidx(colID); - impl_Scalar val = rowView.value (colID); - - if (!dropFunctor(row, col, rowView.value(colID)) || row == col) { - colsAux(offset+rownnz) = col; - - LO valID = (reuseGraph ? colID : rownnz); - valsAux(offset+valID) = val; - if (row == col) - diagID = valID; - - rownnz++; - - } else { - // Rewrite with zeros (needed for reuseGraph) - valsAux(offset+colID) = zero; - diag += val; - } - } - // How to assert on the device? - // assert(diagIndex != -1); - rows(row+1) = rownnz; - // if (lumping && diagID != -1) { - if (lumping) { - // Add diag to the diagonal - - // NOTE_KOKKOS: valsAux was allocated with - // ViewAllocateWithoutInitializing. This is not a problem here - // because we explicitly set this value above. - valsAux(offset+diagID) += diag; - } + if (!dropFunctor(row, col, rowView.value(colID)) || row == col) { + colsAux(offset + rownnz) = col; - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - bndNodes(row) = (rownnz == 1 && aggregationMayCreateDirichlet); + LO valID = (reuseGraph ? colID : rownnz); + valsAux(offset + valID) = val; + if (row == col) + diagID = valID; - nnz += rownnz; - } + rownnz++; - private: - MatrixType A; - BndViewType bndNodes; - DropFunctorType dropFunctor; - - rows_type rowsA; - - typename rows_type::non_const_type rows; - typename cols_type::non_const_type colsAux; - typename vals_type::non_const_type valsAux; - - bool reuseGraph; - bool lumping; - bool aggregationMayCreateDirichlet; - impl_Scalar zero; - }; - - // collect number nonzeros of blkSize rows in nnz_(row+1) - template - class Stage1aVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - - public: - Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) : - kokkosMatrix(kokkosMatrix_), - nnz(nnz_), - blkSize(blkSize_) { } - - KOKKOS_INLINE_FUNCTION - void operator()(const LO row, LO& totalnnz) const { - - // the following code is more or less what MergeRows is doing - // count nonzero entries in all dof rows associated with node row - LO nodeRowMaxNonZeros = 0; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(row * blkSize + j); - nodeRowMaxNonZeros += rowView.length; - } - nnz(row + 1) = nodeRowMaxNonZeros; - totalnnz += nodeRowMaxNonZeros; + } else { + // Rewrite with zeros (needed for reuseGraph) + valsAux(offset + colID) = zero; + diag += val; } + } + // How to assert on the device? + // assert(diagIndex != -1); + rows(row + 1) = rownnz; + // if (lumping && diagID != -1) { + if (lumping) { + // Add diag to the diagonal + + // NOTE_KOKKOS: valsAux was allocated with + // ViewAllocateWithoutInitializing. This is not a problem here + // because we explicitly set this value above. + valsAux(offset + diagID) += diag; + } + // If the only element remaining after filtering is diagonal, mark node as boundary + // FIXME: this should really be replaced by the following + // if (indices.size() == 1 && indices[0] == row) + // boundaryNodes[row] = true; + // We do not do it this way now because there is no framework for distinguishing isolated + // and boundary nodes in the aggregation algorithms + bndNodes(row) = (rownnz == 1 && aggregationMayCreateDirichlet); - private: - MatrixType kokkosMatrix; //< local matrix part - NnzType nnz; //< View containing number of nonzeros for current row - blkSizeType blkSize; //< block size (or partial block size in strided maps) - }; - - - // build the dof-based column map containing the local dof ids belonging to blkSize rows in matrix - // sort column ids - // translate them into (unique) node ids - // count the node column ids per node row - template - class Stage1bcVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - - private: - MatrixType kokkosMatrix; //< local matrix part - NnzType coldofnnz; //< view containing start and stop indices for subviews - blkSizeType blkSize; //< block size (or partial block size in strided maps) - ColDofType coldofs; //< view containing the local dof ids associated with columns for the blkSize rows (not sorted) - Dof2NodeTranslationType dof2node; //< view containing the local node id associated with the local dof id - NnzType colnodennz; //< view containing number of column nodes for each node row - BdryNodeTypeConst dirichletdof; //< view containing with num dofs booleans. True if dof (not necessarily entire node) is dirichlet boundardy dof. - BdryNodeType bdrynode; //< view containing with numNodes booleans. True if node is (full) dirichlet boundardy node. - boolType usegreedydirichlet; //< boolean for use of greedy Dirichlet (if any dof is Dirichlet, entire node is dirichlet) default false (need all dofs in node to be Dirichlet for node to be Dirichlet) - - public: - Stage1bcVectorFunctor(MatrixType kokkosMatrix_, - NnzType coldofnnz_, - blkSizeType blkSize_, - ColDofType coldofs_, - Dof2NodeTranslationType dof2node_, - NnzType colnodennz_, - BdryNodeTypeConst dirichletdof_, - BdryNodeType bdrynode_, - boolType usegreedydirichlet_) : - kokkosMatrix(kokkosMatrix_), - coldofnnz(coldofnnz_), - blkSize(blkSize_), - coldofs(coldofs_), - dof2node(dof2node_), - colnodennz(colnodennz_), - dirichletdof(dirichletdof_), - bdrynode(bdrynode_), - usegreedydirichlet(usegreedydirichlet_) { - } + nnz += rownnz; + } - KOKKOS_INLINE_FUNCTION - void operator()(const LO rowNode, LO& nnz) const { + private: + MatrixType A; + BndViewType bndNodes; + DropFunctorType dropFunctor; + + rows_type rowsA; + + typename rows_type::non_const_type rows; + typename cols_type::non_const_type colsAux; + typename vals_type::non_const_type valsAux; + + bool reuseGraph; + bool lumping; + bool aggregationMayCreateDirichlet; + impl_Scalar zero; +}; + +// collect number nonzeros of blkSize rows in nnz_(row+1) +template +class Stage1aVectorFunctor { + private: + typedef typename MatrixType::ordinal_type LO; + + public: + Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) + : kokkosMatrix(kokkosMatrix_) + , nnz(nnz_) + , blkSize(blkSize_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const LO row, LO& totalnnz) const { + // the following code is more or less what MergeRows is doing + // count nonzero entries in all dof rows associated with node row + LO nodeRowMaxNonZeros = 0; + for (LO j = 0; j < blkSize; j++) { + auto rowView = kokkosMatrix.row(row * blkSize + j); + nodeRowMaxNonZeros += rowView.length; + } + nnz(row + 1) = nodeRowMaxNonZeros; + totalnnz += nodeRowMaxNonZeros; + } - LO pos = coldofnnz(rowNode); - if( usegreedydirichlet ){ - bdrynode(rowNode) = false; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(rowNode * blkSize + j); - auto numIndices = rowView.length; - - // if any dof in the node is Dirichlet - if( dirichletdof(rowNode * blkSize + j) ) - bdrynode(rowNode) = true; - - for (decltype(numIndices) k = 0; k < numIndices; k++) { - auto dofID = rowView.colidx(k); - coldofs(pos) = dofID; - pos ++; - } - } - }else{ + private: + MatrixType kokkosMatrix; //< local matrix part + NnzType nnz; //< View containing number of nonzeros for current row + blkSizeType blkSize; //< block size (or partial block size in strided maps) +}; + +// build the dof-based column map containing the local dof ids belonging to blkSize rows in matrix +// sort column ids +// translate them into (unique) node ids +// count the node column ids per node row +template +class Stage1bcVectorFunctor { + private: + typedef typename MatrixType::ordinal_type LO; + + private: + MatrixType kokkosMatrix; //< local matrix part + NnzType coldofnnz; //< view containing start and stop indices for subviews + blkSizeType blkSize; //< block size (or partial block size in strided maps) + ColDofType coldofs; //< view containing the local dof ids associated with columns for the blkSize rows (not sorted) + Dof2NodeTranslationType dof2node; //< view containing the local node id associated with the local dof id + NnzType colnodennz; //< view containing number of column nodes for each node row + BdryNodeTypeConst dirichletdof; //< view containing with num dofs booleans. True if dof (not necessarily entire node) is dirichlet boundardy dof. + BdryNodeType bdrynode; //< view containing with numNodes booleans. True if node is (full) dirichlet boundardy node. + boolType usegreedydirichlet; //< boolean for use of greedy Dirichlet (if any dof is Dirichlet, entire node is dirichlet) default false (need all dofs in node to be Dirichlet for node to be Dirichlet) + + public: + Stage1bcVectorFunctor(MatrixType kokkosMatrix_, + NnzType coldofnnz_, + blkSizeType blkSize_, + ColDofType coldofs_, + Dof2NodeTranslationType dof2node_, + NnzType colnodennz_, + BdryNodeTypeConst dirichletdof_, + BdryNodeType bdrynode_, + boolType usegreedydirichlet_) + : kokkosMatrix(kokkosMatrix_) + , coldofnnz(coldofnnz_) + , blkSize(blkSize_) + , coldofs(coldofs_) + , dof2node(dof2node_) + , colnodennz(colnodennz_) + , dirichletdof(dirichletdof_) + , bdrynode(bdrynode_) + , usegreedydirichlet(usegreedydirichlet_) { + } + + KOKKOS_INLINE_FUNCTION + void operator()(const LO rowNode, LO& nnz) const { + LO pos = coldofnnz(rowNode); + if (usegreedydirichlet) { + bdrynode(rowNode) = false; + for (LO j = 0; j < blkSize; j++) { + auto rowView = kokkosMatrix.row(rowNode * blkSize + j); + auto numIndices = rowView.length; + + // if any dof in the node is Dirichlet + if (dirichletdof(rowNode * blkSize + j)) bdrynode(rowNode) = true; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(rowNode * blkSize + j); - auto numIndices = rowView.length; - - // if any dof in the node is not Dirichlet - if( dirichletdof(rowNode * blkSize + j) == false ) - bdrynode(rowNode) = false; - - for (decltype(numIndices) k = 0; k < numIndices; k++) { - auto dofID = rowView.colidx(k); - coldofs(pos) = dofID; - pos ++; - } - } - } - // sort coldofs - LO begin = coldofnnz(rowNode); - LO end = coldofnnz(rowNode+1); - LO n = end - begin; - for (LO i = 0; i < (n-1); i++) { - for (LO j = 0; j < (n-i-1); j++) { - if (coldofs(j+begin) > coldofs(j+begin+1)) { - LO temp = coldofs(j+begin); - coldofs(j+begin) = coldofs(j+begin+1); - coldofs(j+begin+1) = temp; - } - } - } - size_t cnt = 0; - LO lastNodeID = -1; - for (LO i = 0; i < n; i++) { - LO dofID = coldofs(begin + i); - LO nodeID = dof2node(dofID); - if(nodeID != lastNodeID) { - lastNodeID = nodeID; - coldofs(begin+cnt) = nodeID; - cnt++; - } + for (decltype(numIndices) k = 0; k < numIndices; k++) { + auto dofID = rowView.colidx(k); + coldofs(pos) = dofID; + pos++; } - colnodennz(rowNode+1) = cnt; - nnz += cnt; } + } else { + bdrynode(rowNode) = true; + for (LO j = 0; j < blkSize; j++) { + auto rowView = kokkosMatrix.row(rowNode * blkSize + j); + auto numIndices = rowView.length; - }; - - // fill column node id view - template - class Stage1dVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - typedef typename MatrixType::value_type SC; - - private: - ColDofType coldofs; //< view containing mixed node and dof indices (only input) - ColDofNnzType coldofnnz; //< view containing the start and stop indices for subviews (dofs) - ColNodeType colnodes; //< view containing the local node ids associated with columns - ColNodeNnzType colnodennz; //< view containing start and stop indices for subviews - - public: - Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) : - coldofs(coldofs_), - coldofnnz(coldofnnz_), - colnodes(colnodes_), - colnodennz(colnodennz_) { - } + // if any dof in the node is not Dirichlet + if (dirichletdof(rowNode * blkSize + j) == false) + bdrynode(rowNode) = false; - KOKKOS_INLINE_FUNCTION - void operator()(const LO rowNode) const { - auto dofbegin = coldofnnz(rowNode); - auto nodebegin = colnodennz(rowNode); - auto nodeend = colnodennz(rowNode+1); - auto n = nodeend - nodebegin; + for (decltype(numIndices) k = 0; k < numIndices; k++) { + auto dofID = rowView.colidx(k); + coldofs(pos) = dofID; + pos++; + } + } + } - for (decltype(nodebegin) i = 0; i < n; i++) { - colnodes(nodebegin + i) = coldofs(dofbegin + i); + // sort coldofs + LO begin = coldofnnz(rowNode); + LO end = coldofnnz(rowNode + 1); + LO n = end - begin; + for (LO i = 0; i < (n - 1); i++) { + for (LO j = 0; j < (n - i - 1); j++) { + if (coldofs(j + begin) > coldofs(j + begin + 1)) { + LO temp = coldofs(j + begin); + coldofs(j + begin) = coldofs(j + begin + 1); + coldofs(j + begin + 1) = temp; } } - }; + } + size_t cnt = 0; + LO lastNodeID = -1; + for (LO i = 0; i < n; i++) { + LO dofID = coldofs(begin + i); + LO nodeID = dof2node(dofID); + if (nodeID != lastNodeID) { + lastNodeID = nodeID; + coldofs(begin + cnt) = nodeID; + cnt++; + } + } + colnodennz(rowNode + 1) = cnt; + nnz += cnt; + } +}; + +// fill column node id view +template +class Stage1dVectorFunctor { + private: + typedef typename MatrixType::ordinal_type LO; + typedef typename MatrixType::value_type SC; + + private: + ColDofType coldofs; //< view containing mixed node and dof indices (only input) + ColDofNnzType coldofnnz; //< view containing the start and stop indices for subviews (dofs) + ColNodeType colnodes; //< view containing the local node ids associated with columns + ColNodeNnzType colnodennz; //< view containing start and stop indices for subviews + + public: + Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) + : coldofs(coldofs_) + , coldofnnz(coldofnnz_) + , colnodes(colnodes_) + , colnodennz(colnodennz_) { + } + + KOKKOS_INLINE_FUNCTION + void operator()(const LO rowNode) const { + auto dofbegin = coldofnnz(rowNode); + auto nodebegin = colnodennz(rowNode); + auto nodeend = colnodennz(rowNode + 1); + auto n = nodeend - nodebegin; + for (decltype(nodebegin) i = 0; i < n; i++) { + colnodes(nodebegin + i) = coldofs(dofbegin + i); + } + } +}; - } // namespace +} // namespace CoalesceDrop_Kokkos_Details - template - RCP CoalesceDropFactory_kokkos>::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP CoalesceDropFactory_kokkos>::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop tol"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: drop scheme"); - SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); - SET_VALID_ENTRY("aggregation: greedy Dirichlet"); - SET_VALID_ENTRY("filtered matrix: use lumping"); - SET_VALID_ENTRY("filtered matrix: reuse graph"); - SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); - { - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; - validParamList->getEntry("aggregation: drop scheme").setValidator( - rcp(new validatorType(Teuchos::tuple("classical", "distance laplacian"), "aggregation: drop scheme"))); - } -#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); - - return validParamList; + SET_VALID_ENTRY("aggregation: drop tol"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: drop scheme"); + SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); + SET_VALID_ENTRY("aggregation: greedy Dirichlet"); + SET_VALID_ENTRY("filtered matrix: use lumping"); + SET_VALID_ENTRY("filtered matrix: reuse graph"); + SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); + { + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple("classical", "distance laplacian"), "aggregation: drop scheme"))); } +#undef SET_VALID_ENTRY + validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); + validParamList->set>("Coordinates", Teuchos::null, "Generating factory for Coordinates"); - template - void CoalesceDropFactory_kokkos>::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "UnAmalgamationInfo"); + return validParamList; +} - const ParameterList& pL = GetParameterList(); - if (pL.get("aggregation: drop scheme") == "distance laplacian") - Input(currentLevel, "Coordinates"); - } +template +void CoalesceDropFactory_kokkos>::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "UnAmalgamationInfo"); - template - void CoalesceDropFactory_kokkos>:: - Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); + const ParameterList& pL = GetParameterList(); + if (pL.get("aggregation: drop scheme") == "distance laplacian") + Input(currentLevel, "Coordinates"); +} - typedef Teuchos::ScalarTraits STS; - typedef typename STS::magnitudeType MT; - const MT zero = Teuchos::ScalarTraits::zero(); +template +void CoalesceDropFactory_kokkos>:: + Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); - auto A = Get< RCP >(currentLevel, "A"); + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType MT; + const MT zero = Teuchos::ScalarTraits::zero(); + auto A = Get>(currentLevel, "A"); - /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. + /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. blkSize is the number of storage blocks that must kept together during the amalgamation process. Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: @@ -519,158 +510,158 @@ namespace MueLu { If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n. Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested. */ - - TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,Exceptions::RuntimeError,"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); - LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); - auto amalInfo = Get< RCP >(currentLevel, "UnAmalgamationInfo"); + TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); + LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); - const ParameterList& pL = GetParameterList(); + auto amalInfo = Get>(currentLevel, "UnAmalgamationInfo"); - std::string algo = pL.get("aggregation: drop scheme"); + const ParameterList& pL = GetParameterList(); - double threshold = pL.get("aggregation: drop tol"); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold - << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + std::string algo = pL.get("aggregation: drop scheme"); - const typename STS::magnitudeType dirichletThreshold = - STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + double threshold = pL.get("aggregation: drop tol"); + GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold + << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - GO numDropped = 0, numTotal = 0; + const typename STS::magnitudeType dirichletThreshold = + STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); - RCP graph; - LO dofsPerNode = -1; + GO numDropped = 0, numTotal = 0; - typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type; - boundary_nodes_type boundaryNodes; + RCP graph; + LO dofsPerNode = -1; - RCP filteredA; - if (blkSize == 1 && threshold == zero) { - // Scalar problem without dropping + typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type; + boundary_nodes_type boundaryNodes; - // Detect and record rows that correspond to Dirichlet boundary conditions - boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); + RCP filteredA; + if (blkSize == 1 && threshold == zero) { + // Scalar problem without dropping - // Trivial LWGraph construction - graph = rcp(new LWGraph_kokkos(A->getCrsGraph()->getLocalGraphDevice(), A->getRowMap(), A->getColMap(), "graph of A")); - graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); + // Detect and record rows that correspond to Dirichlet boundary conditions + boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - numTotal = A->getLocalNumEntries(); - dofsPerNode = 1; + // Trivial LWGraph construction + graph = rcp(new LWGraph_kokkos(A->getCrsGraph()->getLocalGraphDevice(), A->getRowMap(), A->getColMap(), "graph of A")); + graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); - filteredA = A; + numTotal = A->getLocalNumEntries(); + dofsPerNode = 1; - } else if (blkSize == 1 && threshold != zero) { - // Scalar problem with dropping + filteredA = A; - typedef typename Matrix::local_matrix_type local_matrix_type; - typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; - typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type; - typedef typename kokkos_graph_type::entries_type::non_const_type cols_type; - typedef typename local_matrix_type::values_type::non_const_type vals_type; + } else if (blkSize == 1 && threshold != zero) { + // Scalar problem with dropping - LO numRows = A->getLocalNumRows(); - local_matrix_type kokkosMatrix = A->getLocalMatrixDevice(); - auto nnzA = kokkosMatrix.nnz(); - auto rowsA = kokkosMatrix.graph.row_map; + typedef typename Matrix::local_matrix_type local_matrix_type; + typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; + typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type; + typedef typename kokkos_graph_type::entries_type::non_const_type cols_type; + typedef typename local_matrix_type::values_type::non_const_type vals_type; + LO numRows = A->getLocalNumRows(); + local_matrix_type kokkosMatrix = A->getLocalMatrixDevice(); + auto nnzA = kokkosMatrix.nnz(); + auto rowsA = kokkosMatrix.graph.row_map; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::val_type impl_Scalar; - typedef Kokkos::ArithTraits impl_ATS; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::val_type impl_Scalar; + typedef Kokkos::ArithTraits impl_ATS; - bool reuseGraph = pL.get("filtered matrix: reuse graph"); - bool lumping = pL.get("filtered matrix: use lumping"); - if (lumping) - GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; + bool reuseGraph = pL.get("filtered matrix: reuse graph"); + bool lumping = pL.get("filtered matrix: use lumping"); + if (lumping) + GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; - const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); - // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + setting a single value - rows_type rows ("FA_rows", numRows+1); - cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing("FA_aux_cols"), nnzA); - vals_type valsAux; - if (reuseGraph) { - SubFactoryMonitor m2(*this, "CopyMatrix", currentLevel); + // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + setting a single value + rows_type rows("FA_rows", numRows + 1); + cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing("FA_aux_cols"), nnzA); + vals_type valsAux; + if (reuseGraph) { + SubFactoryMonitor m2(*this, "CopyMatrix", currentLevel); - // Share graph with the original matrix - filteredA = MatrixFactory::Build(A->getCrsGraph()); + // Share graph with the original matrix + filteredA = MatrixFactory::Build(A->getCrsGraph()); - // Do a no-op fill-complete - RCP fillCompleteParams(new ParameterList); - fillCompleteParams->set("No Nonlocal Changes", true); - filteredA->fillComplete(fillCompleteParams); + // Do a no-op fill-complete + RCP fillCompleteParams(new ParameterList); + fillCompleteParams->set("No Nonlocal Changes", true); + filteredA->fillComplete(fillCompleteParams); - // No need to reuseFill, just modify in place - valsAux = filteredA->getLocalMatrixDevice().values; + // No need to reuseFill, just modify in place + valsAux = filteredA->getLocalMatrixDevice().values; - } else { - // Need an extra array to compress - valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_aux_vals"), nnzA); - } + } else { + // Need an extra array to compress + valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_aux_vals"), nnzA); + } - typename boundary_nodes_type::non_const_type bndNodes(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows); + typename boundary_nodes_type::non_const_type bndNodes(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows); - LO nnzFA = 0; - { - if (algo == "classical") { - // Construct overlapped matrix diagonal - RCP ghostedDiag; - { - kokkosMatrix = local_matrix_type(); - SubFactoryMonitor m2(*this, "Ghosted diag construction", currentLevel); - ghostedDiag = Utilities::GetMatrixOverlappedDiagonal(*A); - kokkosMatrix=A->getLocalMatrixDevice(); - } + LO nnzFA = 0; + { + if (algo == "classical") { + // Construct overlapped matrix diagonal + RCP ghostedDiag; + { + kokkosMatrix = local_matrix_type(); + SubFactoryMonitor m2(*this, "Ghosted diag construction", currentLevel); + ghostedDiag = Utilities::GetMatrixOverlappedDiagonal(*A); + kokkosMatrix = A->getLocalMatrixDevice(); + } - // Filter out entries - { - SubFactoryMonitor m2(*this, "MainLoop", currentLevel); + // Filter out entries + { + SubFactoryMonitor m2(*this, "MainLoop", currentLevel); - auto ghostedDiagView = ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto ghostedDiagView = ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); - CoalesceDrop_Kokkos_Details::ClassicalDropFunctor dropFunctor(ghostedDiagView, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor + CoalesceDrop_Kokkos_Details::ClassicalDropFunctor dropFunctor(ghostedDiagView, threshold); + CoalesceDrop_Kokkos_Details::ScalarFunctor scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, aggregationMayCreateDirichlet); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0,numRows), - scalarFunctor, nnzFA); - } + Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows), + scalarFunctor, nnzFA); + } - } else if (algo == "distance laplacian") { - typedef Xpetra::MultiVector::magnitudeType,LO,GO,NO> doubleMultiVector; - auto coords = Get >(currentLevel, "Coordinates"); + } else if (algo == "distance laplacian") { + typedef Xpetra::MultiVector::magnitudeType, LO, GO, NO> doubleMultiVector; + auto coords = Get>(currentLevel, "Coordinates"); - auto uniqueMap = A->getRowMap(); - auto nonUniqueMap = A->getColMap(); + auto uniqueMap = A->getRowMap(); + auto nonUniqueMap = A->getColMap(); - // Construct ghosted coordinates - RCP importer; - { - SubFactoryMonitor m2(*this, "Coords Import construction", currentLevel); - importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - } - RCP ghostedCoords; - { - SubFactoryMonitor m2(*this, "Ghosted coords construction", currentLevel); - ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(nonUniqueMap, coords->getNumVectors()); - ghostedCoords->doImport(*coords, *importer, Xpetra::INSERT); - } + // Construct ghosted coordinates + RCP importer; + { + SubFactoryMonitor m2(*this, "Coords Import construction", currentLevel); + importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + } + RCP ghostedCoords; + { + SubFactoryMonitor m2(*this, "Ghosted coords construction", currentLevel); + ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO>::Build(nonUniqueMap, coords->getNumVectors()); + ghostedCoords->doImport(*coords, *importer, Xpetra::INSERT); + } - auto ghostedCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadWrite); - CoalesceDrop_Kokkos_Details::DistanceFunctor distFunctor(ghostedCoordsView); + auto ghostedCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadWrite); + CoalesceDrop_Kokkos_Details::DistanceFunctor distFunctor(ghostedCoordsView); - // Construct Laplacian diagonal - RCP localLaplDiag; - { - SubFactoryMonitor m2(*this, "Local Laplacian diag construction", currentLevel); + // Construct Laplacian diagonal + RCP localLaplDiag; + { + SubFactoryMonitor m2(*this, "Local Laplacian diag construction", currentLevel); - localLaplDiag = VectorFactory::Build(uniqueMap); + localLaplDiag = VectorFactory::Build(uniqueMap); - auto localLaplDiagView = localLaplDiag->getDeviceLocalView(Xpetra::Access::OverwriteAll); - auto kokkosGraph = kokkosMatrix.graph; + auto localLaplDiagView = localLaplDiag->getDeviceLocalView(Xpetra::Access::OverwriteAll); + auto kokkosGraph = kokkosMatrix.graph; - Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", range_type(0,numRows), + Kokkos::parallel_for( + "MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", range_type(0, numRows), KOKKOS_LAMBDA(const LO row) { auto rowView = kokkosGraph.rowConst(row); auto length = rowView.length; @@ -679,264 +670,267 @@ namespace MueLu { for (decltype(length) colID = 0; colID < length; colID++) { auto col = rowView(colID); if (row != col) - d += impl_ATS::one()/distFunctor.distance2(row, col); + d += impl_ATS::one() / distFunctor.distance2(row, col); } - localLaplDiagView(row,0) = d; + localLaplDiagView(row, 0) = d; }); - } - - // Construct ghosted Laplacian diagonal - RCP ghostedLaplDiag; - { - SubFactoryMonitor m2(*this, "Ghosted Laplacian diag construction", currentLevel); - ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); - ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); - } - - // Filter out entries - { - SubFactoryMonitor m2(*this, "MainLoop", currentLevel); - - auto ghostedLaplDiagView = ghostedLaplDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); - - CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor - dropFunctor(ghostedLaplDiagView, distFunctor, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor - scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true); - - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0,numRows), - scalarFunctor, nnzFA); - } } + // Construct ghosted Laplacian diagonal + RCP ghostedLaplDiag; + { + SubFactoryMonitor m2(*this, "Ghosted Laplacian diag construction", currentLevel); + ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); + ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); + } + + // Filter out entries + { + SubFactoryMonitor m2(*this, "MainLoop", currentLevel); + + auto ghostedLaplDiagView = ghostedLaplDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); + + CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor + dropFunctor(ghostedLaplDiagView, distFunctor, threshold); + CoalesceDrop_Kokkos_Details::ScalarFunctor + scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true); + + Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows), + scalarFunctor, nnzFA); + } } - numDropped = nnzA - nnzFA; + } + numDropped = nnzA - nnzFA; - boundaryNodes = bndNodes; + boundaryNodes = bndNodes; - { - SubFactoryMonitor m2(*this, "CompressRows", currentLevel); + { + SubFactoryMonitor m2(*this, "CompressRows", currentLevel); - // parallel_scan (exclusive) - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:compress_rows", range_type(0,numRows+1), + // parallel_scan (exclusive) + Kokkos::parallel_scan( + "MueLu:CoalesceDropF:Build:scalar_filter:compress_rows", range_type(0, numRows + 1), KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) { update += rows(i); if (final_pass) rows(i) = update; }); - } + } - // Compress cols (and optionally vals) - // We use a trick here: we moved all remaining elements to the beginning - // of the original row in the main loop, so we don't need to check for - // INVALID here, and just stop when achieving the new number of elements - // per row. - cols_type cols(Kokkos::ViewAllocateWithoutInitializing("FA_cols"), nnzFA); - vals_type vals; - if (reuseGraph) { - GetOStream(Runtime1) << "reuse matrix graph for filtering (compress matrix columns only)" << std::endl; - // Only compress cols - SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); - - Kokkos::parallel_for("MueLu:TentativePF:Build:compress_cols", range_type(0,numRows), + // Compress cols (and optionally vals) + // We use a trick here: we moved all remaining elements to the beginning + // of the original row in the main loop, so we don't need to check for + // INVALID here, and just stop when achieving the new number of elements + // per row. + cols_type cols(Kokkos::ViewAllocateWithoutInitializing("FA_cols"), nnzFA); + vals_type vals; + if (reuseGraph) { + GetOStream(Runtime1) << "reuse matrix graph for filtering (compress matrix columns only)" << std::endl; + // Only compress cols + SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); + + Kokkos::parallel_for( + "MueLu:TentativePF:Build:compress_cols", range_type(0, numRows), KOKKOS_LAMBDA(const LO i) { // Is there Kokkos memcpy? - LO rowStart = rows(i); - LO rowAStart = rowsA(i); - size_t rownnz = rows(i+1) - rows(i); + LO rowStart = rows(i); + LO rowAStart = rowsA(i); + size_t rownnz = rows(i + 1) - rows(i); for (size_t j = 0; j < rownnz; j++) - cols(rowStart+j) = colsAux(rowAStart+j); + cols(rowStart + j) = colsAux(rowAStart + j); }); - } else { - // Compress cols and vals - GetOStream(Runtime1) << "new matrix graph for filtering (compress matrix columns and values)" << std::endl; - SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); + } else { + // Compress cols and vals + GetOStream(Runtime1) << "new matrix graph for filtering (compress matrix columns and values)" << std::endl; + SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); - vals = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_vals"), nnzFA); + vals = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_vals"), nnzFA); - Kokkos::parallel_for("MueLu:TentativePF:Build:compress_cols", range_type(0,numRows), + Kokkos::parallel_for( + "MueLu:TentativePF:Build:compress_cols", range_type(0, numRows), KOKKOS_LAMBDA(const LO i) { - LO rowStart = rows(i); - LO rowAStart = rowsA(i); - size_t rownnz = rows(i+1) - rows(i); + LO rowStart = rows(i); + LO rowAStart = rowsA(i); + size_t rownnz = rows(i + 1) - rows(i); for (size_t j = 0; j < rownnz; j++) { - cols(rowStart+j) = colsAux(rowAStart+j); - vals(rowStart+j) = valsAux(rowAStart+j); + cols(rowStart + j) = colsAux(rowAStart + j); + vals(rowStart + j) = valsAux(rowAStart + j); } }); - } + } - kokkos_graph_type kokkosGraph(cols, rows); + kokkos_graph_type kokkosGraph(cols, rows); - { - SubFactoryMonitor m2(*this, "LWGraph construction", currentLevel); + { + SubFactoryMonitor m2(*this, "LWGraph construction", currentLevel); - graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), "filtered graph of A")); - graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); - } + graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), "filtered graph of A")); + graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); + } - numTotal = A->getLocalNumEntries(); + numTotal = A->getLocalNumEntries(); - dofsPerNode = 1; + dofsPerNode = 1; - if (!reuseGraph) { - SubFactoryMonitor m2(*this, "LocalMatrix+FillComplete", currentLevel); + if (!reuseGraph) { + SubFactoryMonitor m2(*this, "LocalMatrix+FillComplete", currentLevel); - local_matrix_type localFA = local_matrix_type("A", numRows, A->getLocalMatrixDevice().numCols(), nnzFA, vals, rows, cols); - auto filteredACrs = CrsMatrixFactory::Build(localFA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap(), - A->getCrsGraph()->getImporter(), A->getCrsGraph()->getExporter()); - filteredA = rcp(new CrsMatrixWrap(filteredACrs)); - } + local_matrix_type localFA = local_matrix_type("A", numRows, A->getLocalMatrixDevice().numCols(), nnzFA, vals, rows, cols); + auto filteredACrs = CrsMatrixFactory::Build(localFA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap(), + A->getCrsGraph()->getImporter(), A->getCrsGraph()->getExporter()); + filteredA = rcp(new CrsMatrixWrap(filteredACrs)); + } - filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); + filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); - if (pL.get("filtered matrix: reuse eigenvalue")) { - // Reuse max eigenvalue from A - // It is unclear what eigenvalue is the best for the smoothing, but we already may have - // the D^{-1}A estimate in A, may as well use it. - // NOTE: ML does that too - filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); - } else { - filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); - } + if (pL.get("filtered matrix: reuse eigenvalue")) { + // Reuse max eigenvalue from A + // It is unclear what eigenvalue is the best for the smoothing, but we already may have + // the D^{-1}A estimate in A, may as well use it. + // NOTE: ML does that too + filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); + } else { + filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } - } else if (blkSize > 1 && threshold == zero) { - // Case 3: block problem without filtering - // - // FIXME_KOKKOS: this code is completely unoptimized. It really should do - // a very simple thing: merge rows and produce nodal graph. But the code - // seems very complicated. Can we do better? - - TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiply of " << blkSize); - - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); - - // build a node row map (uniqueMap = non-overlapping) and a node column map - // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation - // stored in the AmalgamationInfo class container contain the local node id - // given a local dof id. The data is calculated in the AmalgamationFactory and - // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo) - const RCP uniqueMap = amalInfo->getNodeRowMap(); - const RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View? - Array colTranslationArray = *(amalInfo->getColTranslation()); - - Kokkos::View - rowTranslationView(rowTranslationArray.getRawPtr(),rowTranslationArray.size() ); - Kokkos::View - colTranslationView(colTranslationArray.getRawPtr(),colTranslationArray.size() ); - - // get number of local nodes - LO numNodes = Teuchos::as(uniqueMap->getLocalNumElements()); - typedef typename Kokkos::View id_translation_type; - id_translation_type rowTranslation("dofId2nodeId",rowTranslationArray.size()); - id_translation_type colTranslation("ov_dofId2nodeId",colTranslationArray.size()); - Kokkos::deep_copy(rowTranslation, rowTranslationView); - Kokkos::deep_copy(colTranslation, colTranslationView); - - // extract striding information - blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map - LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size) - if(A->IsView("stridedMaps") == true) { - const RCP myMap = A->getRowMap("stridedMaps"); - const RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); - } - auto kokkosMatrix = A->getLocalMatrixDevice(); // access underlying kokkos data - - // - typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; - typedef typename kokkos_graph_type::row_map_type row_map_type; - //typedef typename row_map_type::HostMirror row_map_type_h; - typedef typename kokkos_graph_type::entries_type entries_type; - - // Stage 1c: get number of dof-nonzeros per blkSize node rows - typename row_map_type::non_const_type dofNnz("nnz_map", numNodes + 1); - LO numDofCols = 0; - CoalesceDrop_Kokkos_Details::Stage1aVectorFunctor stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0,numNodes), stage1aFunctor, numDofCols); - // parallel_scan (exclusive) - CoalesceDrop_Kokkos_Details::ScanFunctor scanFunctor(dofNnz); - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanFunctor); + } else if (blkSize > 1 && threshold == zero) { + // Case 3: block problem without filtering + // + // FIXME_KOKKOS: this code is completely unoptimized. It really should do + // a very simple thing: merge rows and produce nodal graph. But the code + // seems very complicated. Can we do better? + + TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiply of " << blkSize); + + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); + + // build a node row map (uniqueMap = non-overlapping) and a node column map + // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation + // stored in the AmalgamationInfo class container contain the local node id + // given a local dof id. The data is calculated in the AmalgamationFactory and + // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo) + const RCP uniqueMap = amalInfo->getNodeRowMap(); + const RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View? + Array colTranslationArray = *(amalInfo->getColTranslation()); + + Kokkos::View + rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size()); + Kokkos::View + colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size()); + + // get number of local nodes + LO numNodes = Teuchos::as(uniqueMap->getLocalNumElements()); + typedef typename Kokkos::View id_translation_type; + id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size()); + id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size()); + Kokkos::deep_copy(rowTranslation, rowTranslationView); + Kokkos::deep_copy(colTranslation, colTranslationView); + + // extract striding information + blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) + LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map + LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size) + if (A->IsView("stridedMaps") == true) { + const RCP myMap = A->getRowMap("stridedMaps"); + const RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + } + auto kokkosMatrix = A->getLocalMatrixDevice(); // access underlying kokkos data - // Detect and record dof rows that correspond to Dirichlet boundary conditions - boundary_nodes_type singleEntryRows = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); + // + typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; + typedef typename kokkos_graph_type::row_map_type row_map_type; + //typedef typename row_map_type::HostMirror row_map_type_h; + typedef typename kokkos_graph_type::entries_type entries_type; - typename entries_type::non_const_type dofcols("dofcols", numDofCols/*dofNnz(numNodes)*/); // why does dofNnz(numNodes) work? should be a parallel reduce, i guess + // Stage 1c: get number of dof-nonzeros per blkSize node rows + typename row_map_type::non_const_type dofNnz("nnz_map", numNodes + 1); + LO numDofCols = 0; + CoalesceDrop_Kokkos_Details::Stage1aVectorFunctor stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize); + Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0, numNodes), stage1aFunctor, numDofCols); + // parallel_scan (exclusive) + CoalesceDrop_Kokkos_Details::ScanFunctor scanFunctor(dofNnz); + Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0, numNodes + 1), scanFunctor); - // we have dofcols and dofids from Stage1dVectorFunctor - LO numNodeCols = 0; - typename row_map_type::non_const_type rows("nnz_nodemap", numNodes + 1); - typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numNodes); + // Detect and record dof rows that correspond to Dirichlet boundary conditions + boundary_nodes_type singleEntryRows = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - CoalesceDrop_Kokkos_Details::Stage1bcVectorFunctor stage1bcFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols, colTranslation, rows, singleEntryRows, bndNodes, pL.get("aggregation: greedy Dirichlet")); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1bcFunctor,numNodeCols); + typename entries_type::non_const_type dofcols("dofcols", numDofCols /*dofNnz(numNodes)*/); // why does dofNnz(numNodes) work? should be a parallel reduce, i guess - // parallel_scan (exclusive) - CoalesceDrop_Kokkos_Details::ScanFunctor scanNodeFunctor(rows); - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanNodeFunctor); + // we have dofcols and dofids from Stage1dVectorFunctor + LO numNodeCols = 0; + typename row_map_type::non_const_type rows("nnz_nodemap", numNodes + 1); + typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numNodes); - // create column node view - typename entries_type::non_const_type cols("nodecols", numNodeCols); + CoalesceDrop_Kokkos_Details::Stage1bcVectorFunctor stage1bcFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols, colTranslation, rows, singleEntryRows, bndNodes, pL.get("aggregation: greedy Dirichlet")); + Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0, numNodes), stage1bcFunctor, numNodeCols); + // parallel_scan (exclusive) + CoalesceDrop_Kokkos_Details::ScanFunctor scanNodeFunctor(rows); + Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0, numNodes + 1), scanNodeFunctor); - CoalesceDrop_Kokkos_Details::Stage1dVectorFunctor stage1dFunctor(dofcols, dofNnz, cols, rows); - Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1dFunctor); - kokkos_graph_type kokkosGraph(cols, rows); + // create column node view + typename entries_type::non_const_type cols("nodecols", numNodeCols); - // create LW graph - graph = rcp(new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A")); + CoalesceDrop_Kokkos_Details::Stage1dVectorFunctor stage1dFunctor(dofcols, dofNnz, cols, rows); + Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0, numNodes), stage1dFunctor); + kokkos_graph_type kokkosGraph(cols, rows); - boundaryNodes = bndNodes; - graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); - numTotal = A->getLocalNumEntries(); + // create LW graph + graph = rcp(new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - dofsPerNode = blkSize; + boundaryNodes = bndNodes; + graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); + numTotal = A->getLocalNumEntries(); - filteredA = A; + dofsPerNode = blkSize; - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu: CoalesceDropFactory_kokkos: Block filtering is not implemented"); - } + filteredA = A; + + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu: CoalesceDropFactory_kokkos: Block filtering is not implemented"); + } - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)), + Kokkos::parallel_reduce( + "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)), KOKKOS_LAMBDA(const LO i, GO& n) { if (boundaryNodes(i)) n++; - }, numLocalBoundaryNodes); + }, + numLocalBoundaryNodes); - auto comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } + auto comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } - if ((GetVerbLevel() & Statistics1) && threshold != zero) { - auto comm = A->getRowMap()->getComm(); + if ((GetVerbLevel() & Statistics1) && threshold != zero) { + auto comm = A->getRowMap()->getComm(); - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); - if (numGlobalTotal != 0) { - GetOStream(Statistics1) << "Number of dropped entries: " - << numGlobalDropped << "/" << numGlobalTotal - << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)" << std::endl; - } + if (numGlobalTotal != 0) { + GetOStream(Statistics1) << "Number of dropped entries: " + << numGlobalDropped << "/" << numGlobalTotal + << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)" << std::endl; } - - Set(currentLevel, "DofsPerNode", dofsPerNode); - Set(currentLevel, "Graph", graph); - Set(currentLevel, "A", filteredA); } + + Set(currentLevel, "DofsPerNode", dofsPerNode); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "A", filteredA); } -#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP +} // namespace MueLu +#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp index 7f94599c3ba1..8d2ebc59c3f6 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp @@ -46,35 +46,32 @@ #ifndef MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP #define MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP - #include "MueLu_ConfigDefs.hpp" #include "MueLu_BaseClass.hpp" #include "MueLu_PreDropFunctionBaseClass_fwd.hpp" namespace MueLu { - /*! +/*! * Base class you can derive from to allow user defined dropping * */ - template - class PreDropFunctionBaseClass : public BaseClass { +template +class PreDropFunctionBaseClass : public BaseClass { #undef MUELU_PREDROPFUNCTIONBASECLASS_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! Destructor - virtual ~PreDropFunctionBaseClass() { } - - //! Drop - virtual bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, GlobalOrdinal gcid, const Teuchos::ArrayView & indices, const Teuchos::ArrayView & vals) = 0; + public: + //! Destructor + virtual ~PreDropFunctionBaseClass() {} - }; -} + //! Drop + virtual bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, GlobalOrdinal gcid, const Teuchos::ArrayView& indices, const Teuchos::ArrayView& vals) = 0; +}; +} // namespace MueLu #define MUELU_PREDROPFUNCTIONBASECLASS_SHORT -#endif // MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP +#endif // MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp index a42b58f56b18..4a56f6008378 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp @@ -50,31 +50,28 @@ #include "MueLu_PreDropFunctionBaseClass.hpp" #include "MueLu_PreDropFunctionConstVal_fwd.hpp" - namespace MueLu { - /*! +/*! * Example implementation for dropping values smaller then a constant threshold * */ - template - class PreDropFunctionConstVal : - public MueLu::PreDropFunctionBaseClass { +template +class PreDropFunctionConstVal : public MueLu::PreDropFunctionBaseClass { #undef MUELU_PREDROPFUNCTIONCONSTVAL_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! Constructor - explicit PreDropFunctionConstVal(const Scalar threshold = 0.0); + public: + //! Constructor + explicit PreDropFunctionConstVal(const Scalar threshold = 0.0); - //! Destructor - virtual ~PreDropFunctionConstVal() { } + //! Destructor + virtual ~PreDropFunctionConstVal() {} - /*! Drop + /*! Drop * @param lrow (size_t): local row index (=lrowid) * @param grow (GlobalOrdinal: global row id * @param k (size_t): local column iterator @@ -84,30 +81,28 @@ namespace MueLu { * @param vals (ArrayView): array of corresponding values in current row (lrow) * @return bool: false, if value in (lrow, lcid) shall be kept, true if it should be dropped */ - bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, GlobalOrdinal gcid, const Teuchos::ArrayView & indices, const Teuchos::ArrayView & vals); - - //! Return threshold value. - Scalar GetThreshold() const; - - //! @name Overridden from Teuchos::Describable - //@{ + bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, GlobalOrdinal gcid, const Teuchos::ArrayView& indices, const Teuchos::ArrayView& vals); - //! Return a simple one-line description of this object. - std::string description() const; + //! Return threshold value. + Scalar GetThreshold() const; - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; + //! @name Overridden from Teuchos::Describable + //@{ - //@} + //! Return a simple one-line description of this object. + std::string description() const; - private: + //! Print the object with some verbosity level to an FancyOStream object. + //using MueLu::Describable::describe; // overloading, not hiding + //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - Scalar threshold_; + //@} - }; + private: + Scalar threshold_; +}; -} +} // namespace MueLu #define MUELU_PREDROPFUNCTIONCONSTVAL_SHORT -#endif // MUELU_PREDROPFUNCTIONCONSTVAL_DECL_HPP +#endif // MUELU_PREDROPFUNCTIONCONSTVAL_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp index 4c1577d90146..5c8245caf779 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp @@ -53,32 +53,31 @@ namespace MueLu { - template - PreDropFunctionConstVal::PreDropFunctionConstVal(const Scalar threshold) - : threshold_(threshold) { } +template +PreDropFunctionConstVal::PreDropFunctionConstVal(const Scalar threshold) + : threshold_(threshold) {} - template - bool PreDropFunctionConstVal::Drop(size_t /* lrow */, GlobalOrdinal grow, size_t k, LocalOrdinal /* lcid */, GlobalOrdinal gcid, const Teuchos::ArrayView & /* indices */, const Teuchos::ArrayView & vals) { - if(Teuchos::ScalarTraits::magnitude(vals[k]) > Teuchos::ScalarTraits::magnitude(threshold_) || grow == gcid ) { - return false; // keep values - } - return true; // values too small -> drop them - } - - template - Scalar PreDropFunctionConstVal::GetThreshold() const { - return threshold_; +template +bool PreDropFunctionConstVal::Drop(size_t /* lrow */, GlobalOrdinal grow, size_t k, LocalOrdinal /* lcid */, GlobalOrdinal gcid, const Teuchos::ArrayView& /* indices */, const Teuchos::ArrayView& vals) { + if (Teuchos::ScalarTraits::magnitude(vals[k]) > Teuchos::ScalarTraits::magnitude(threshold_) || grow == gcid) { + return false; // keep values } + return true; // values too small -> drop them +} +template +Scalar PreDropFunctionConstVal::GetThreshold() const { + return threshold_; +} - template - std::string PreDropFunctionConstVal::description() const { - std::ostringstream out; - out << "PreDropFunctionConstVal: threshold = " << threshold_ << std::endl; - return out.str(); - } +template +std::string PreDropFunctionConstVal::description() const { + std::ostringstream out; + out << "PreDropFunctionConstVal: threshold = " << threshold_ << std::endl; + return out.str(); +} - /*template +/*template void PreDropFunctionConstVal::describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { MUELU_DESCRIBE; if (verbLevel & Parameters0) { @@ -86,7 +85,7 @@ namespace MueLu { } }*/ -} +} // namespace MueLu #define MUELU_PREDROPFUNCTIONCONSTVAL_SHORT -#endif // MUELU_PREDROPFUNCTIONCONSTVAL_DEF_HPP +#endif // MUELU_PREDROPFUNCTIONCONSTVAL_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp index b763c93cd955..7c8e3b65936f 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp @@ -65,7 +65,7 @@ namespace MueLu { - /*! +/*! @class SmooVecCoalesceDropFactory @brief Factory for creating a graph base on a given matrix. @@ -114,54 +114,51 @@ namespace MueLu { */ - template - class SmooVecCoalesceDropFactory : public SingleLevelFactoryBase { +template +class SmooVecCoalesceDropFactory : public SingleLevelFactoryBase { #undef MUELU_SMOOVECCOALESCEDROPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor + SmooVecCoalesceDropFactory(); - //! Constructor - SmooVecCoalesceDropFactory(); + //! Destructor + virtual ~SmooVecCoalesceDropFactory() {} - //! Destructor - virtual ~SmooVecCoalesceDropFactory() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level& currentLevel) const; - void DeclareInput(Level ¤tLevel) const; + /// set predrop function + void SetPreDropFunction(const RCP >& predrop) { predrop_ = predrop; } - /// set predrop function - void SetPreDropFunction(const RCP > &predrop) { predrop_ = predrop; } + //@} - //@} + void Build(Level& currentLevel) const; // Build - void Build(Level ¤tLevel) const; // Build + private: + // pre-drop function + mutable RCP predrop_; - private: + //! Methods to support compatible-relaxation style dropping + void badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP& dropParams, LO nPDEs, const MultiVector& smoothedTVecs, const MultiVector& smoothedNull, RCP& filteredGraph) const; + void badGuysDropfunc(LO row, const Teuchos::ArrayView& indices, const Teuchos::ArrayView& vals, const MultiVector& smoothedTVecs, LO nPDEs, Teuchos::ArrayRCP& penalties, const MultiVector& smoothedNull, Teuchos::ArrayRCP& Bcols, Teuchos::ArrayRCP& keepOrNot, LO& Nbcols, LO nLoc) const; - // pre-drop function - mutable - RCP predrop_; +}; //class SmooVecCoalesceDropFactory - //! Methods to support compatible-relaxation style dropping - void badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP & dropParams, LO nPDEs, const MultiVector& smoothedTVecs, const MultiVector& smoothedNull, RCP& filteredGraph) const; - void badGuysDropfunc(LO row, const Teuchos::ArrayView& indices, const Teuchos::ArrayView& vals, const MultiVector& smoothedTVecs, LO nPDEs, Teuchos::ArrayRCP & penalties, const MultiVector& smoothedNull, Teuchos::ArrayRCP& Bcols, Teuchos::ArrayRCP& keepOrNot, LO &Nbcols, LO nLoc) const; - - }; //class SmooVecCoalesceDropFactory - -} //namespace MueLu +} //namespace MueLu #define MUELU_SMOOVECCOALESCEDROPFACTORY_SHORT -#endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DECL_HPP +#endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp index 24a9a8334275..5c1f6202f7b4 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp @@ -69,7 +69,6 @@ #include "MueLu_Monitor.hpp" #include "MueLu_PreDropFunctionBaseClass.hpp" - #include #include @@ -84,160 +83,154 @@ #include #include - -#define poly0thOrderCoef 0 -#define poly1stOrderCoef 1 -#define poly2ndOrderCoef 2 -#define poly3rdOrderCoef 3 -#define poly4thOrderCoef 4 +#define poly0thOrderCoef 0 +#define poly1stOrderCoef 1 +#define poly2ndOrderCoef 2 +#define poly3rdOrderCoef 3 +#define poly4thOrderCoef 4 namespace MueLu { - template - RCP SmooVecCoalesceDropFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP SmooVecCoalesceDropFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop scheme"); - { - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; - validParamList->getEntry("aggregation: drop scheme").setValidator( - rcp(new validatorType(Teuchos::tuple("unsupported vector smoothing"), "aggregation: drop scheme"))); - } - SET_VALID_ENTRY("aggregation: number of random vectors"); - SET_VALID_ENTRY("aggregation: number of times to pre or post smooth"); - SET_VALID_ENTRY("aggregation: penalty parameters"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("PreSmoother", Teuchos::null, "Generating factory of the PreSmoother"); - validParamList->set< RCP >("PostSmoother", Teuchos::null, "Generating factory of the PostSmoother"); - - return validParamList; + SET_VALID_ENTRY("aggregation: drop scheme"); + { + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple("unsupported vector smoothing"), "aggregation: drop scheme"))); } - - template - SmooVecCoalesceDropFactory::SmooVecCoalesceDropFactory() : predrop_(Teuchos::null) { } - - template - void SmooVecCoalesceDropFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - if (currentLevel.IsAvailable("PreSmoother")) { // rst: totally unsure that this is legal - Input(currentLevel, "PreSmoother"); // my guess is that this is not yet available - } // so this always comes out false. - else if (currentLevel.IsAvailable("PostSmoother")) { // perhaps we can look on the param list? - Input(currentLevel, "PostSmoother"); - } + SET_VALID_ENTRY("aggregation: number of random vectors"); + SET_VALID_ENTRY("aggregation: number of times to pre or post smooth"); + SET_VALID_ENTRY("aggregation: penalty parameters"); +#undef SET_VALID_ENTRY + + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set >("PreSmoother", Teuchos::null, "Generating factory of the PreSmoother"); + validParamList->set >("PostSmoother", Teuchos::null, "Generating factory of the PostSmoother"); + + return validParamList; +} + +template +SmooVecCoalesceDropFactory::SmooVecCoalesceDropFactory() + : predrop_(Teuchos::null) {} + +template +void SmooVecCoalesceDropFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + if (currentLevel.IsAvailable("PreSmoother")) { // rst: totally unsure that this is legal + Input(currentLevel, "PreSmoother"); // my guess is that this is not yet available + } // so this always comes out false. + else if (currentLevel.IsAvailable("PostSmoother")) { // perhaps we can look on the param list? + Input(currentLevel, "PostSmoother"); } +} - template - void SmooVecCoalesceDropFactory::Build(Level ¤tLevel) const { - - FactoryMonitor m(*this, "Build", currentLevel); +template +void SmooVecCoalesceDropFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); - typedef Teuchos::ScalarTraits STS; + typedef Teuchos::ScalarTraits STS; - if (predrop_ != Teuchos::null) - GetOStream(Parameters0) << predrop_->description(); + if (predrop_ != Teuchos::null) + GetOStream(Parameters0) << predrop_->description(); - RCP A = Get< RCP >(currentLevel, "A"); + RCP A = Get >(currentLevel, "A"); - const ParameterList & pL = GetParameterList(); + const ParameterList& pL = GetParameterList(); - LO nPDEs = A->GetFixedBlockSize(); + LO nPDEs = A->GetFixedBlockSize(); - RCP< MultiVector > testVecs; - RCP< MultiVector > nearNull; + RCP testVecs; + RCP nearNull; #ifdef takeOut - testVecs = Xpetra::IO::ReadMultiVector("TpetraTVecs.mm", A->getRowMap()); + testVecs = Xpetra::IO::ReadMultiVector("TpetraTVecs.mm", A->getRowMap()); #endif - size_t numRandom= as(pL.get("aggregation: number of random vectors")); - testVecs = MultiVectorFactory::Build(A->getRowMap(), numRandom, true); - // use random test vectors but should be positive in order to not get - // crummy results ... so take abs() of randomize(). - testVecs->randomize(); - for (size_t kk = 0; kk < testVecs->getNumVectors(); kk++ ) { - Teuchos::ArrayRCP< Scalar > curVec = testVecs->getDataNonConst(kk); - for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); ii++ ) curVec[ii] = Teuchos::ScalarTraits::magnitude(curVec[ii]); - } - nearNull = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); - - // initialize null space to constants - for (size_t kk = 0; kk < nearNull->getNumVectors(); kk++ ) { - Teuchos::ArrayRCP< Scalar > curVec = nearNull->getDataNonConst(kk); - for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); ii += nearNull->getNumVectors() ) curVec[ii] = Teuchos::ScalarTraits::one(); - } - - RCP< MultiVector > zeroVec_TVecs; - RCP< MultiVector > zeroVec_Null; - - zeroVec_TVecs = MultiVectorFactory::Build(A->getRowMap(), testVecs->getNumVectors(), true); - zeroVec_Null = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); - zeroVec_TVecs->putScalar(Teuchos::ScalarTraits::zero()); - zeroVec_Null->putScalar( Teuchos::ScalarTraits::zero()); - - size_t nInvokeSmoother=as(pL.get("aggregation: number of times to pre or post smooth")); - if (currentLevel.IsAvailable("PreSmoother")) { - RCP preSmoo = currentLevel.Get< RCP >("PreSmoother"); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*testVecs,*zeroVec_TVecs,false); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*nearNull,*zeroVec_Null,false); - } - else if (currentLevel.IsAvailable("PostSmoother")) { - RCP postSmoo = currentLevel.Get< RCP >("PostSmoother"); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*testVecs,*zeroVec_TVecs,false); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*nearNull, *zeroVec_Null,false); - } - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Must set a smoother"); - - Teuchos::ArrayRCP penaltyPolyCoef(5); - Teuchos::ArrayView inputPolyCoef; - - penaltyPolyCoef[poly0thOrderCoef] = 12.; - penaltyPolyCoef[poly1stOrderCoef] = -.2; - penaltyPolyCoef[poly2ndOrderCoef] = 0.0; - penaltyPolyCoef[poly3rdOrderCoef] = 0.0; - penaltyPolyCoef[poly4thOrderCoef] = 0.0; - - if(pL.isParameter("aggregation: penalty parameters") && pL.get >("aggregation: penalty parameters").size() > 0) { - if (pL.get >("aggregation: penalty parameters").size() > penaltyPolyCoef.size()) - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of penalty parameters must be " << penaltyPolyCoef.size() << " or less"); - inputPolyCoef = pL.get >("aggregation: penalty parameters")(); - - for (size_t i = 0; i < as(inputPolyCoef.size()) ; i++) penaltyPolyCoef[i] = as(inputPolyCoef[i]); - for (size_t i = as(inputPolyCoef.size()); i < as(penaltyPolyCoef.size()); i++) penaltyPolyCoef[i] = Teuchos::ScalarTraits::zero(); - } - - - RCP filteredGraph; - badGuysCoalesceDrop(*A, penaltyPolyCoef, nPDEs, *testVecs, *nearNull, filteredGraph); + size_t numRandom = as(pL.get("aggregation: number of random vectors")); + testVecs = MultiVectorFactory::Build(A->getRowMap(), numRandom, true); + // use random test vectors but should be positive in order to not get + // crummy results ... so take abs() of randomize(). + testVecs->randomize(); + for (size_t kk = 0; kk < testVecs->getNumVectors(); kk++) { + Teuchos::ArrayRCP curVec = testVecs->getDataNonConst(kk); + for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); ii++) curVec[ii] = Teuchos::ScalarTraits::magnitude(curVec[ii]); + } + nearNull = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); + + // initialize null space to constants + for (size_t kk = 0; kk < nearNull->getNumVectors(); kk++) { + Teuchos::ArrayRCP curVec = nearNull->getDataNonConst(kk); + for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); ii += nearNull->getNumVectors()) curVec[ii] = Teuchos::ScalarTraits::one(); + } + + RCP zeroVec_TVecs; + RCP zeroVec_Null; + + zeroVec_TVecs = MultiVectorFactory::Build(A->getRowMap(), testVecs->getNumVectors(), true); + zeroVec_Null = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); + zeroVec_TVecs->putScalar(Teuchos::ScalarTraits::zero()); + zeroVec_Null->putScalar(Teuchos::ScalarTraits::zero()); + + size_t nInvokeSmoother = as(pL.get("aggregation: number of times to pre or post smooth")); + if (currentLevel.IsAvailable("PreSmoother")) { + RCP preSmoo = currentLevel.Get >("PreSmoother"); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*testVecs, *zeroVec_TVecs, false); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*nearNull, *zeroVec_Null, false); + } else if (currentLevel.IsAvailable("PostSmoother")) { + RCP postSmoo = currentLevel.Get >("PostSmoother"); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*testVecs, *zeroVec_TVecs, false); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*nearNull, *zeroVec_Null, false); + } else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Must set a smoother"); + + Teuchos::ArrayRCP penaltyPolyCoef(5); + Teuchos::ArrayView inputPolyCoef; + + penaltyPolyCoef[poly0thOrderCoef] = 12.; + penaltyPolyCoef[poly1stOrderCoef] = -.2; + penaltyPolyCoef[poly2ndOrderCoef] = 0.0; + penaltyPolyCoef[poly3rdOrderCoef] = 0.0; + penaltyPolyCoef[poly4thOrderCoef] = 0.0; + + if (pL.isParameter("aggregation: penalty parameters") && pL.get >("aggregation: penalty parameters").size() > 0) { + if (pL.get >("aggregation: penalty parameters").size() > penaltyPolyCoef.size()) + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of penalty parameters must be " << penaltyPolyCoef.size() << " or less"); + inputPolyCoef = pL.get >("aggregation: penalty parameters")(); + + for (size_t i = 0; i < as(inputPolyCoef.size()); i++) penaltyPolyCoef[i] = as(inputPolyCoef[i]); + for (size_t i = as(inputPolyCoef.size()); i < as(penaltyPolyCoef.size()); i++) penaltyPolyCoef[i] = Teuchos::ScalarTraits::zero(); + } + RCP filteredGraph; + badGuysCoalesceDrop(*A, penaltyPolyCoef, nPDEs, *testVecs, *nearNull, filteredGraph); #ifdef takeOut - /* write out graph for serial debugging purposes only. */ - - FILE* fp = fopen("codeOutput","w"); - fprintf(fp,"%d %d %d\n",(int) filteredGraph->GetNodeNumVertices(),(int) filteredGraph->GetNodeNumVertices(), - (int) filteredGraph->GetNodeNumEdges()); - for (size_t i = 0; i < filteredGraph->GetNodeNumVertices(); i++) { - ArrayView inds = filteredGraph->getNeighborVertices(as(i)); - for (size_t j = 0; j < as(inds.size()); j++) { - fprintf(fp,"%d %d 1.00e+00\n",(int) i+1,(int) inds[j]+1); - } - } - fclose(fp); + /* write out graph for serial debugging purposes only. */ + + FILE* fp = fopen("codeOutput", "w"); + fprintf(fp, "%d %d %d\n", (int)filteredGraph->GetNodeNumVertices(), (int)filteredGraph->GetNodeNumVertices(), + (int)filteredGraph->GetNodeNumEdges()); + for (size_t i = 0; i < filteredGraph->GetNodeNumVertices(); i++) { + ArrayView inds = filteredGraph->getNeighborVertices(as(i)); + for (size_t j = 0; j < as(inds.size()); j++) { + fprintf(fp, "%d %d 1.00e+00\n", (int)i + 1, (int)inds[j] + 1); + } + } + fclose(fp); #endif - SC threshold = .01; - Set(currentLevel, "Filtering", (threshold != STS::zero())); - Set(currentLevel, "Graph", filteredGraph); - Set(currentLevel, "DofsPerNode", 1); + SC threshold = .01; + Set(currentLevel, "Filtering", (threshold != STS::zero())); + Set(currentLevel, "Graph", filteredGraph); + Set(currentLevel, "DofsPerNode", 1); - } //Build +} //Build - template - void SmooVecCoalesceDropFactory::badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP & penaltyPolyCoef , LO nPDEs, const MultiVector& testVecs, const MultiVector& nearNull, RCP& filteredGraph) const { +template +void SmooVecCoalesceDropFactory::badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP& penaltyPolyCoef, LO nPDEs, const MultiVector& testVecs, const MultiVector& nearNull, RCP& filteredGraph) const { /* * Compute coalesce/drop graph (in filteredGraph) for A. The basic idea is to * balance trade-offs associated with @@ -274,28 +267,28 @@ namespace MueLu { * Note: testVecs is supplied by the user, but normally is the result of * applying a relaxation scheme to Au = 0 where u is initial random. */ - - GO numMyNnz = Teuchos::as(Amat.getLocalNumEntries()); - size_t nLoc = Amat.getRowMap()->getLocalNumElements(); - size_t nBlks = nLoc/nPDEs; - if (nBlks*nPDEs != nLoc ) - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of local dofs not divisible by BlkSize"); + GO numMyNnz = Teuchos::as(Amat.getLocalNumEntries()); + size_t nLoc = Amat.getRowMap()->getLocalNumElements(); - Teuchos::ArrayRCP newRowPtr(nBlks+1); /* coalesce & drop matrix */ - Teuchos::ArrayRCP newCols(numMyNnz); /* arrays */ + size_t nBlks = nLoc / nPDEs; + if (nBlks * nPDEs != nLoc) + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of local dofs not divisible by BlkSize"); - Teuchos::ArrayRCP bcols(nBlks); /* returned by dropfun(j,...) */ - Teuchos::ArrayRCP keepOrNot(nBlks); /* gives cols for jth row and */ - /* whether or not entry is */ - /* kept or dropped. */ + Teuchos::ArrayRCP newRowPtr(nBlks + 1); /* coalesce & drop matrix */ + Teuchos::ArrayRCP newCols(numMyNnz); /* arrays */ - LO maxNzPerRow = 200; + Teuchos::ArrayRCP bcols(nBlks); /* returned by dropfun(j,...) */ + Teuchos::ArrayRCP keepOrNot(nBlks); /* gives cols for jth row and */ + /* whether or not entry is */ + /* kept or dropped. */ + + LO maxNzPerRow = 200; Teuchos::ArrayRCP penalties(maxNzPerRow); /* Penalty function */ /* described above. */ - - Teuchos::ArrayRCP keepStatus(nBlks,true); /* accumulated keepOrNot info */ - Teuchos::ArrayRCP bColList(nBlks); /* accumulated bcols info */ + + Teuchos::ArrayRCP keepStatus(nBlks, true); /* accumulated keepOrNot info */ + Teuchos::ArrayRCP bColList(nBlks); /* accumulated bcols info */ /* for an entire block as */ /* opposed to a single row */ /* Additionally, keepOrNot[j] */ @@ -305,125 +298,123 @@ namespace MueLu { /* whether the jth block is */ /* kept within the block row. */ - Teuchos::ArrayRCP alreadyOnBColList(nBlks,false); /* used to avoid recording the*/ - /* same block column when */ - /* processing different pt */ - /* rows within a block. */ - - Teuchos::ArrayRCP boundaryNodes(nBlks,false); + Teuchos::ArrayRCP alreadyOnBColList(nBlks, false); /* used to avoid recording the*/ + /* same block column when */ + /* processing different pt */ + /* rows within a block. */ + Teuchos::ArrayRCP boundaryNodes(nBlks, false); - for (LO i = 0; i < maxNzPerRow; i++) + for (LO i = 0; i < maxNzPerRow; i++) penalties[i] = penaltyPolyCoef[poly0thOrderCoef] + - penaltyPolyCoef[poly1stOrderCoef]*(as(i)) + - penaltyPolyCoef[poly2ndOrderCoef]*(as(i*i)) + - (penaltyPolyCoef[poly3rdOrderCoef]*(as(i*i))*(as(i))) + //perhaps avoids overflow? - (penaltyPolyCoef[poly4thOrderCoef]*(as(i*i))*(as(i*i))); - - LO nzTotal = 0, numBCols = 0, row = -1, Nbcols, bcol; - newRowPtr[0] = 0; - - /* proceed block by block */ - for (LO i = 0; i < as(nBlks); i++) { - newRowPtr[i+1] = newRowPtr[i]; - for (LO j = 0; j < nPDEs; j++) { - row = row + 1; - - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - - Amat.getLocalRowView(row, indices, vals); - - if (indices.size() > maxNzPerRow) { - LO oldSize = maxNzPerRow; - maxNzPerRow = indices.size() + 100; - penalties.resize(as(maxNzPerRow),0.0); - for (LO k = oldSize; k < maxNzPerRow; k++) - penalties[k] = penaltyPolyCoef[poly0thOrderCoef] + - penaltyPolyCoef[poly1stOrderCoef]*(as(i)) + - penaltyPolyCoef[poly2ndOrderCoef]*(as(i*i)) + - (penaltyPolyCoef[poly3rdOrderCoef]*(as(i*i))*(as(i))) + - (penaltyPolyCoef[poly4thOrderCoef]*(as(i*i))*(as(i*i))); - } - badGuysDropfunc(row, indices, vals, testVecs, nPDEs, penalties, nearNull, bcols,keepOrNot,Nbcols,nLoc); - for (LO k=0; k < Nbcols; k++) { - bcol = bcols[k]; - - /* add to bColList if not already on it */ - - if (alreadyOnBColList[bcol] == false) {/* for PDE systems only record */ - bColList[numBCols++] = bcol; /* neighboring block one time */ - alreadyOnBColList[bcol] = true; - } - /* drop if any pt row within block indicates entry should be dropped */ - - if (keepOrNot[k] == false) keepStatus[bcol] = false; - - } /* for (k=0; k < Nbcols; k++) */ - } /* for (j = 0; i < nPDEs; j++) */ - - /* finished with block row. Now record block entries that we keep */ - /* and reset keepStatus, bColList, and alreadyOnBColList. */ - - if ( numBCols < 2) boundaryNodes[i] = true; - for (LO j=0; j < numBCols; j++) { - bcol = bColList[j]; - if (keepStatus[bcol] == true) { - newCols[nzTotal] = bColList[j]; - newRowPtr[i+1]++; - nzTotal = nzTotal + 1; - } - keepStatus[bcol] = true; - alreadyOnBColList[bcol] = false; - bColList[j] = 0; - } - numBCols = 0; - } /* for (i = 0; i < nBlks; i++) */ - - /* create array of the correct size and copy over newCols to it */ - - Teuchos::ArrayRCP finalCols(nzTotal); - for (LO i = 0; i < nzTotal; i++) finalCols[i] = newCols[i]; - - // Not using column map because we do not allow for any off-proc stuff. - // Not sure if this is okay. FIXME - - RCP rowMap = Amat.getRowMap(); // , colMap = Amat.getColMap(); - - LO nAmalgNodesOnProc = rowMap->getLocalNumElements()/nPDEs; - Teuchos::Array nodalGIDs(nAmalgNodesOnProc); - typename Teuchos::ScalarTraits::coordinateType temp; - for (size_t i = 0; i < as(nAmalgNodesOnProc); i++ ) { - GO gid = rowMap->getGlobalElement(i*nPDEs); - temp = ((typename Teuchos::ScalarTraits::coordinateType) (gid))/((typename Teuchos::ScalarTraits::coordinateType) (nPDEs)); - nodalGIDs[i] = as(floor(temp)); - } - GO nAmalgNodesGlobal = rowMap->getGlobalNumElements(); - GO nBlkGlobal = nAmalgNodesGlobal/nPDEs; - if (nBlkGlobal*nPDEs != nAmalgNodesGlobal) - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of global dofs not divisible by BlkSize"); - - Teuchos::RCP AmalgRowMap = MapFactory::Build(rowMap->lib(), nBlkGlobal, - nodalGIDs(),0,rowMap->getComm()); - - filteredGraph = rcp(new LWGraph(newRowPtr, finalCols, AmalgRowMap, AmalgRowMap, "thresholded graph of A")); - filteredGraph->SetBoundaryNodeMap(boundaryNodes); + penaltyPolyCoef[poly1stOrderCoef] * (as(i)) + + penaltyPolyCoef[poly2ndOrderCoef] * (as(i * i)) + + (penaltyPolyCoef[poly3rdOrderCoef] * (as(i * i)) * (as(i))) + //perhaps avoids overflow? + (penaltyPolyCoef[poly4thOrderCoef] * (as(i * i)) * (as(i * i))); + + LO nzTotal = 0, numBCols = 0, row = -1, Nbcols, bcol; + newRowPtr[0] = 0; + + /* proceed block by block */ + for (LO i = 0; i < as(nBlks); i++) { + newRowPtr[i + 1] = newRowPtr[i]; + for (LO j = 0; j < nPDEs; j++) { + row = row + 1; + + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + + Amat.getLocalRowView(row, indices, vals); + + if (indices.size() > maxNzPerRow) { + LO oldSize = maxNzPerRow; + maxNzPerRow = indices.size() + 100; + penalties.resize(as(maxNzPerRow), 0.0); + for (LO k = oldSize; k < maxNzPerRow; k++) + penalties[k] = penaltyPolyCoef[poly0thOrderCoef] + + penaltyPolyCoef[poly1stOrderCoef] * (as(i)) + + penaltyPolyCoef[poly2ndOrderCoef] * (as(i * i)) + + (penaltyPolyCoef[poly3rdOrderCoef] * (as(i * i)) * (as(i))) + + (penaltyPolyCoef[poly4thOrderCoef] * (as(i * i)) * (as(i * i))); + } + badGuysDropfunc(row, indices, vals, testVecs, nPDEs, penalties, nearNull, bcols, keepOrNot, Nbcols, nLoc); + for (LO k = 0; k < Nbcols; k++) { + bcol = bcols[k]; + + /* add to bColList if not already on it */ + if (alreadyOnBColList[bcol] == false) { /* for PDE systems only record */ + bColList[numBCols++] = bcol; /* neighboring block one time */ + alreadyOnBColList[bcol] = true; + } + /* drop if any pt row within block indicates entry should be dropped */ + + if (keepOrNot[k] == false) keepStatus[bcol] = false; + + } /* for (k=0; k < Nbcols; k++) */ + } /* for (j = 0; i < nPDEs; j++) */ + + /* finished with block row. Now record block entries that we keep */ + /* and reset keepStatus, bColList, and alreadyOnBColList. */ + + if (numBCols < 2) boundaryNodes[i] = true; + for (LO j = 0; j < numBCols; j++) { + bcol = bColList[j]; + if (keepStatus[bcol] == true) { + newCols[nzTotal] = bColList[j]; + newRowPtr[i + 1]++; + nzTotal = nzTotal + 1; + } + keepStatus[bcol] = true; + alreadyOnBColList[bcol] = false; + bColList[j] = 0; + } + numBCols = 0; + } /* for (i = 0; i < nBlks; i++) */ + + /* create array of the correct size and copy over newCols to it */ + + Teuchos::ArrayRCP finalCols(nzTotal); + for (LO i = 0; i < nzTotal; i++) finalCols[i] = newCols[i]; + + // Not using column map because we do not allow for any off-proc stuff. + // Not sure if this is okay. FIXME + + RCP rowMap = Amat.getRowMap(); // , colMap = Amat.getColMap(); + + LO nAmalgNodesOnProc = rowMap->getLocalNumElements() / nPDEs; + Teuchos::Array nodalGIDs(nAmalgNodesOnProc); + typename Teuchos::ScalarTraits::coordinateType temp; + for (size_t i = 0; i < as(nAmalgNodesOnProc); i++) { + GO gid = rowMap->getGlobalElement(i * nPDEs); + temp = ((typename Teuchos::ScalarTraits::coordinateType)(gid)) / ((typename Teuchos::ScalarTraits::coordinateType)(nPDEs)); + nodalGIDs[i] = as(floor(temp)); } + GO nAmalgNodesGlobal = rowMap->getGlobalNumElements(); + GO nBlkGlobal = nAmalgNodesGlobal / nPDEs; + if (nBlkGlobal * nPDEs != nAmalgNodesGlobal) + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of global dofs not divisible by BlkSize"); + + Teuchos::RCP AmalgRowMap = MapFactory::Build(rowMap->lib(), nBlkGlobal, + nodalGIDs(), 0, rowMap->getComm()); - template - void SmooVecCoalesceDropFactory::badGuysDropfunc(LO row, const Teuchos::ArrayView& cols, const Teuchos::ArrayView& vals, const MultiVector& testVecs, LO nPDEs, Teuchos::ArrayRCP & penalties, const MultiVector& nearNull, Teuchos::ArrayRCP& Bcols, Teuchos::ArrayRCP& keepOrNot, LO &Nbcols, LO nLoc) const { - using TST=Teuchos::ScalarTraits; + filteredGraph = rcp(new LWGraph(newRowPtr, finalCols, AmalgRowMap, AmalgRowMap, "thresholded graph of A")); + filteredGraph->SetBoundaryNodeMap(boundaryNodes); +} - LO nLeng = cols.size(); +template +void SmooVecCoalesceDropFactory::badGuysDropfunc(LO row, const Teuchos::ArrayView& cols, const Teuchos::ArrayView& vals, const MultiVector& testVecs, LO nPDEs, Teuchos::ArrayRCP& penalties, const MultiVector& nearNull, Teuchos::ArrayRCP& Bcols, Teuchos::ArrayRCP& keepOrNot, LO& Nbcols, LO nLoc) const { + using TST = Teuchos::ScalarTraits; + + LO nLeng = cols.size(); typename TST::coordinateType temp; - temp = ((typename TST::coordinateType) (row))/((typename TST::coordinateType) (nPDEs)); + temp = ((typename TST::coordinateType)(row)) / ((typename TST::coordinateType)(nPDEs)); LO blkRow = as(floor(temp)); - Teuchos::ArrayRCP badGuy( nLeng, 0.0); - Teuchos::ArrayRCP subNull(nLeng, 0.0); /* subset of nearNull */ - /* associated with current */ - /* dof within node. */ - + Teuchos::ArrayRCP badGuy(nLeng, 0.0); + Teuchos::ArrayRCP subNull(nLeng, 0.0); /* subset of nearNull */ + /* associated with current */ + /* dof within node. */ + /* Only consider testVecs associated with same dof & on processor. Further */ /* collapse testVecs to a single badGuy vector by basically taking the worst */ /* (least smooth) values for each of the off diags. In particular, we look at*/ @@ -433,39 +424,37 @@ namespace MueLu { /* two guys are aggregated. So, the biggest ratio mismatch is used to choose */ /* the testVec entry associated with each off-diagonal entry. */ - for (LO i = 0; i < nLeng; i++) keepOrNot[i] = false; - LO diagInd = -1; - Nbcols = 0; - LO rowDof = row - blkRow*nPDEs; - Teuchos::ArrayRCP< const Scalar > oneNull = nearNull.getData( as(rowDof)); + LO diagInd = -1; + Nbcols = 0; + LO rowDof = row - blkRow * nPDEs; + Teuchos::ArrayRCP oneNull = nearNull.getData(as(rowDof)); for (LO i = 0; i < nLeng; i++) { - if ((cols[i] < nLoc ) && (TST::magnitude(vals[i]) != 0.0)) { /* on processor */ - temp = ((typename TST::coordinateType) (cols[i]))/((typename TST::coordinateType) (nPDEs)); - LO colDof = cols[i] - (as(floor( temp )))*nPDEs; + if ((cols[i] < nLoc) && (TST::magnitude(vals[i]) != 0.0)) { /* on processor */ + temp = ((typename TST::coordinateType)(cols[i])) / ((typename TST::coordinateType)(nPDEs)); + LO colDof = cols[i] - (as(floor(temp))) * nPDEs; if (colDof == rowDof) { /* same dof within node as row */ - Bcols[ Nbcols] = (cols[i] - colDof)/nPDEs; + Bcols[Nbcols] = (cols[i] - colDof) / nPDEs; subNull[Nbcols] = oneNull[cols[i]]; if (cols[i] != row) { /* not diagonal */ - Scalar worstRatio = -TST::one(); - Scalar targetRatio = subNull[Nbcols]/oneNull[row]; + Scalar worstRatio = -TST::one(); + Scalar targetRatio = subNull[Nbcols] / oneNull[row]; Scalar actualRatio; - for (size_t kk = 0; kk < testVecs.getNumVectors(); kk++ ) { - Teuchos::ArrayRCP< const Scalar > curVec = testVecs.getData(kk); - actualRatio = curVec[cols[i]]/curVec[row]; + for (size_t kk = 0; kk < testVecs.getNumVectors(); kk++) { + Teuchos::ArrayRCP curVec = testVecs.getData(kk); + actualRatio = curVec[cols[i]] / curVec[row]; if (TST::magnitude(actualRatio - targetRatio) > TST::magnitude(worstRatio)) { - badGuy[Nbcols] = actualRatio; - worstRatio = Teuchos::ScalarTraits::magnitude(actualRatio - targetRatio); + badGuy[Nbcols] = actualRatio; + worstRatio = Teuchos::ScalarTraits::magnitude(actualRatio - targetRatio); } } - } - else { - badGuy[ Nbcols] = 1.; + } else { + badGuy[Nbcols] = 1.; keepOrNot[Nbcols] = true; - diagInd = Nbcols; + diagInd = Nbcols; } (Nbcols)++; } @@ -475,18 +464,18 @@ namespace MueLu { /* Make sure that diagonal entry is in block col list */ if (diagInd == -1) { - Bcols[ Nbcols] = (row - rowDof)/nPDEs; - subNull[ Nbcols] = 1.; - badGuy[ Nbcols] = 1.; + Bcols[Nbcols] = (row - rowDof) / nPDEs; + subNull[Nbcols] = 1.; + badGuy[Nbcols] = 1.; keepOrNot[Nbcols] = true; - diagInd = Nbcols; + diagInd = Nbcols; (Nbcols)++; } - Scalar currentRP = oneNull[row]*oneNull[row]; - Scalar currentRTimesBadGuy= oneNull[row]*badGuy[diagInd]; - Scalar currentScore = penalties[0]; /* (I - P inv(R*P)*R )=0 for size */ - /* size 1 agg, so fit is perfect */ + Scalar currentRP = oneNull[row] * oneNull[row]; + Scalar currentRTimesBadGuy = oneNull[row] * badGuy[diagInd]; + Scalar currentScore = penalties[0]; /* (I - P inv(R*P)*R )=0 for size */ + /* size 1 agg, so fit is perfect */ /* starting from a set that only includes the diagonal entry consider adding */ /* one off-diagonal at a time until the fitValue exceeds the penalty term. */ @@ -496,7 +485,7 @@ namespace MueLu { /* includes the diagonal, all already determined neighbors, and the potential*/ /* new neighbor */ - LO nKeep = 1, flag = 1, minId; + LO nKeep = 1, flag = 1, minId; Scalar minFit, minFitRP = 0., minFitRTimesBadGuy = 0.; Scalar newRP, newRTimesBadGuy; @@ -507,45 +496,46 @@ namespace MueLu { minFit = 1000000.; minId = -1; - for (LO i=0; i < Nbcols; i++) { + for (LO i = 0; i < Nbcols; i++) { if (keepOrNot[i] == false) { - keepOrNot[i] = true; /* temporarily view i as non-dropped neighbor */ - newRP = currentRP + subNull[i]*subNull[i]; - newRTimesBadGuy= currentRTimesBadGuy + subNull[i]*badGuy[i]; - Scalar ratio = newRTimesBadGuy/newRP; + keepOrNot[i] = true; /* temporarily view i as non-dropped neighbor */ + newRP = currentRP + subNull[i] * subNull[i]; + newRTimesBadGuy = currentRTimesBadGuy + subNull[i] * badGuy[i]; + Scalar ratio = newRTimesBadGuy / newRP; Scalar newFit = 0.0; - for (LO k=0; k < Nbcols; k++) { + for (LO k = 0; k < Nbcols; k++) { if (keepOrNot[k] == true) { - Scalar diff = badGuy[k] - ratio*subNull[k]; - newFit = newFit + diff*diff; + Scalar diff = badGuy[k] - ratio * subNull[k]; + newFit = newFit + diff * diff; } } if (Teuchos::ScalarTraits::magnitude(newFit) < Teuchos::ScalarTraits::magnitude(minFit)) { - minId = i; - minFit = newFit; - minFitRP = newRP; - minFitRTimesBadGuy= newRTimesBadGuy; + minId = i; + minFit = newFit; + minFitRP = newRP; + minFitRTimesBadGuy = newRTimesBadGuy; } keepOrNot[i] = false; } } - if (minId == -1) flag = 0; + if (minId == -1) + flag = 0; else { - minFit = sqrt(minFit); + minFit = sqrt(minFit); Scalar newScore = penalties[nKeep] + minFit; if (Teuchos::ScalarTraits::magnitude(newScore) < Teuchos::ScalarTraits::magnitude(currentScore)) { - nKeep = nKeep + 1; - keepOrNot[minId]= true; - currentScore = newScore; - currentRP = minFitRP; - currentRTimesBadGuy= minFitRTimesBadGuy; - } - else flag = 0; + nKeep = nKeep + 1; + keepOrNot[minId] = true; + currentScore = newScore; + currentRP = minFitRP; + currentRTimesBadGuy = minFitRTimesBadGuy; + } else + flag = 0; } } - } +} -} //namespace MueLu +} //namespace MueLu -#endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP +#endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp index 80b779e26a7d..5a95b30ddc97 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp @@ -54,7 +54,7 @@ namespace MueLu { - /*! +/*! @class UnsmooshFactory class. @brief Factory for building "unsmooshed" transfer operators from transfer operators associated with a scalar helper problem (built by the VariableDofLaplacianFactory) @@ -90,49 +90,43 @@ namespace MueLu { ----------|--------------|------------ | P | UnsmooshFactory | Unsmooshed prolongation operator */ - template - class UnsmooshFactory : public PFactory { +template +class UnsmooshFactory : public PFactory { #undef MUELU_UNSMOOSHFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: + public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor + UnsmooshFactory(); - //! Constructor - UnsmooshFactory(); + //! Destructor + virtual ~UnsmooshFactory() {} - //! Destructor - virtual ~UnsmooshFactory() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level &fineLevel, Level &coarseLevel) const; - void DeclareInput(Level &fineLevel, Level &coarseLevel) const; + //@} - //@} + void Build(Level &fineLevel, Level &coarseLevel) const; // Build + void BuildP(Level & /* fineLevel */, Level & /* coarseLevel */) const {}; // TAW no real need for an extra BuildP routine. Just use Build - void Build (Level &fineLevel, Level &coarseLevel) const; // Build - void BuildP(Level &/* fineLevel */, Level &/* coarseLevel */) const {}; // TAW no real need for an extra BuildP routine. Just use Build + private: +}; //class UnsmooshFactory - private: - - - - - }; //class UnsmooshFactory - -} //namespace MueLu +} //namespace MueLu #define MUELU_UNSMOOSHFACTORY_SHORT - #endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_UNSMOOSHFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp index f49f488463ea..9370276d7d07 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp @@ -53,198 +53,193 @@ namespace MueLu { - template - UnsmooshFactory::UnsmooshFactory() { } - - template - RCP UnsmooshFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory for unamalgamated matrix. Row map of (unamalgamted) output prolongation operator should match row map of this A."); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the (amalgamated) prolongator P"); - validParamList->set< RCP >("DofStatus", Teuchos::null, "Generating factory for dofStatus array (usually the VariableDofLaplacdianFactory)"); - - validParamList->set< int > ("maxDofPerNode", 1, "Maximum number of DOFs per node"); - validParamList->set< bool > ("fineIsPadded" , false, "true if finest level input matrix is padded"); - - return validParamList; - } - - template - void UnsmooshFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - //const ParameterList& pL = GetParameterList(); - Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - // DofStatus only provided on the finest level (by user) - // On the coarser levels it is auto-generated using the DBC information from the unamalgamated matrix A - if(fineLevel.GetLevelID() == 0) - Input(fineLevel, "DofStatus"); - } - - template - void UnsmooshFactory::Build(Level &fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); - typedef Teuchos::ScalarTraits STS; - - const ParameterList & pL = GetParameterList(); - - // extract matrices (unamalgamated A and amalgamated P) - RCP unamalgA = Get< RCP >(fineLevel, "A"); - RCP amalgP = Get< RCP >(coarseLevel, "P"); - - // extract user parameters - int maxDofPerNode = pL.get ("maxDofPerNode"); - bool fineIsPadded = pL.get("fineIsPadded"); - - // get dofStatus information - // On the finest level it is provided by the user. On the coarser levels it is constructed - // using the DBC information of the matrix A - Teuchos::Array dofStatus; - if(fineLevel.GetLevelID() == 0) { - dofStatus = Get >(fineLevel, "DofStatus"); - } else { - // dof status is the dirichlet information of unsmooshed/unamalgamated A (fine level) - dofStatus = Teuchos::Array(unamalgA->getRowMap()->getLocalNumElements() /*amalgP->getRowMap()->getLocalNumElements() * maxDofPerNode*/,'s'); - - bool bHasZeroDiagonal = false; - Teuchos::ArrayRCP dirOrNot = MueLu::Utilities::DetectDirichletRowsExt(*unamalgA,bHasZeroDiagonal,STS::magnitude(0.5)); - - TEUCHOS_TEST_FOR_EXCEPTION(dirOrNot.size() != dofStatus.size(), MueLu::Exceptions::RuntimeError,"MueLu::UnsmooshFactory::Build: inconsistent number of coarse DBC array and dofStatus array. dirOrNot.size() = " << dirOrNot.size() << " dofStatus.size() = " << dofStatus.size()); - for(decltype(dirOrNot.size()) i = 0; i < dirOrNot.size(); ++i) { - if(dirOrNot[i] == true) dofStatus[i] = 'p'; - } +template +UnsmooshFactory::UnsmooshFactory() {} + +template +RCP UnsmooshFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + validParamList->set >("A", Teuchos::null, "Generating factory for unamalgamated matrix. Row map of (unamalgamted) output prolongation operator should match row map of this A."); + validParamList->set >("P", Teuchos::null, "Generating factory of the (amalgamated) prolongator P"); + validParamList->set >("DofStatus", Teuchos::null, "Generating factory for dofStatus array (usually the VariableDofLaplacdianFactory)"); + + validParamList->set("maxDofPerNode", 1, "Maximum number of DOFs per node"); + validParamList->set("fineIsPadded", false, "true if finest level input matrix is padded"); + + return validParamList; +} + +template +void UnsmooshFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { + //const ParameterList& pL = GetParameterList(); + Input(fineLevel, "A"); + Input(coarseLevel, "P"); + + // DofStatus only provided on the finest level (by user) + // On the coarser levels it is auto-generated using the DBC information from the unamalgamated matrix A + if (fineLevel.GetLevelID() == 0) + Input(fineLevel, "DofStatus"); +} + +template +void UnsmooshFactory::Build(Level &fineLevel, Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); + typedef Teuchos::ScalarTraits STS; + + const ParameterList &pL = GetParameterList(); + + // extract matrices (unamalgamated A and amalgamated P) + RCP unamalgA = Get >(fineLevel, "A"); + RCP amalgP = Get >(coarseLevel, "P"); + + // extract user parameters + int maxDofPerNode = pL.get("maxDofPerNode"); + bool fineIsPadded = pL.get("fineIsPadded"); + + // get dofStatus information + // On the finest level it is provided by the user. On the coarser levels it is constructed + // using the DBC information of the matrix A + Teuchos::Array dofStatus; + if (fineLevel.GetLevelID() == 0) { + dofStatus = Get >(fineLevel, "DofStatus"); + } else { + // dof status is the dirichlet information of unsmooshed/unamalgamated A (fine level) + dofStatus = Teuchos::Array(unamalgA->getRowMap()->getLocalNumElements() /*amalgP->getRowMap()->getLocalNumElements() * maxDofPerNode*/, 's'); + + bool bHasZeroDiagonal = false; + Teuchos::ArrayRCP dirOrNot = MueLu::Utilities::DetectDirichletRowsExt(*unamalgA, bHasZeroDiagonal, STS::magnitude(0.5)); + + TEUCHOS_TEST_FOR_EXCEPTION(dirOrNot.size() != dofStatus.size(), MueLu::Exceptions::RuntimeError, "MueLu::UnsmooshFactory::Build: inconsistent number of coarse DBC array and dofStatus array. dirOrNot.size() = " << dirOrNot.size() << " dofStatus.size() = " << dofStatus.size()); + for (decltype(dirOrNot.size()) i = 0; i < dirOrNot.size(); ++i) { + if (dirOrNot[i] == true) dofStatus[i] = 'p'; } + } - // TODO: TAW the following check is invalid for SA-AMG based input prolongators - //TEUCHOS_TEST_FOR_EXCEPTION(amalgP->getDomainMap()->isSameAs(*amalgP->getColMap()) == false, MueLu::Exceptions::RuntimeError,"MueLu::UnsmooshFactory::Build: only support for non-overlapping aggregates. (column map of Ptent must be the same as domain map of Ptent)"); - - // extract CRS information from amalgamated prolongation operator - Teuchos::ArrayRCP amalgRowPtr(amalgP->getLocalNumRows()); - Teuchos::ArrayRCP amalgCols(amalgP->getLocalNumEntries()); - Teuchos::ArrayRCP amalgVals(amalgP->getLocalNumEntries()); - Teuchos::RCP amalgPwrap = Teuchos::rcp_dynamic_cast(amalgP); - Teuchos::RCP amalgPcrs = amalgPwrap->getCrsMatrix(); - amalgPcrs->getAllValues(amalgRowPtr, amalgCols, amalgVals); - - // calculate number of dof rows for new prolongator - size_t paddedNrows = amalgP->getRowMap()->getLocalNumElements() * Teuchos::as(maxDofPerNode); - - // reserve CSR arrays for new prolongation operator - Teuchos::ArrayRCP newPRowPtr(paddedNrows+1); - Teuchos::ArrayRCP newPCols(amalgP->getLocalNumEntries() * maxDofPerNode); - Teuchos::ArrayRCP newPVals(amalgP->getLocalNumEntries() * maxDofPerNode); - - size_t rowCount = 0; // actual number of (local) in unamalgamated prolongator - if(fineIsPadded == true || fineLevel.GetLevelID() > 0) { - - // build prolongation operator for padded fine level matrices. - // Note: padded fine level dofs are transferred by injection. - // That is, these interpolation stencils do not take averages of - // coarse level variables. Further, fine level Dirichlet points - // also use injection. - - size_t cnt = 0; // local id counter - for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { - // determine number of entries in amalgamated dof row i - size_t rowLength = amalgRowPtr[i+1] - amalgRowPtr[i]; - - // loop over dofs per node (unamalgamation) - for(int j = 0; j < maxDofPerNode; j++) { - newPRowPtr[i*maxDofPerNode+j] = cnt; - if (dofStatus[i*maxDofPerNode+j] == 's') { // add only "standard" dofs to unamalgamated prolongator - // loop over column entries in amalgamated P - for (size_t k = 0; k < rowLength; k++) { - newPCols[cnt ] = amalgCols[k+amalgRowPtr[i]] * maxDofPerNode + j; - newPVals[cnt++] = amalgVals[k+amalgRowPtr[i]]; - } - + // TODO: TAW the following check is invalid for SA-AMG based input prolongators + //TEUCHOS_TEST_FOR_EXCEPTION(amalgP->getDomainMap()->isSameAs(*amalgP->getColMap()) == false, MueLu::Exceptions::RuntimeError,"MueLu::UnsmooshFactory::Build: only support for non-overlapping aggregates. (column map of Ptent must be the same as domain map of Ptent)"); + + // extract CRS information from amalgamated prolongation operator + Teuchos::ArrayRCP amalgRowPtr(amalgP->getLocalNumRows()); + Teuchos::ArrayRCP amalgCols(amalgP->getLocalNumEntries()); + Teuchos::ArrayRCP amalgVals(amalgP->getLocalNumEntries()); + Teuchos::RCP amalgPwrap = Teuchos::rcp_dynamic_cast(amalgP); + Teuchos::RCP amalgPcrs = amalgPwrap->getCrsMatrix(); + amalgPcrs->getAllValues(amalgRowPtr, amalgCols, amalgVals); + + // calculate number of dof rows for new prolongator + size_t paddedNrows = amalgP->getRowMap()->getLocalNumElements() * Teuchos::as(maxDofPerNode); + + // reserve CSR arrays for new prolongation operator + Teuchos::ArrayRCP newPRowPtr(paddedNrows + 1); + Teuchos::ArrayRCP newPCols(amalgP->getLocalNumEntries() * maxDofPerNode); + Teuchos::ArrayRCP newPVals(amalgP->getLocalNumEntries() * maxDofPerNode); + + size_t rowCount = 0; // actual number of (local) in unamalgamated prolongator + if (fineIsPadded == true || fineLevel.GetLevelID() > 0) { + // build prolongation operator for padded fine level matrices. + // Note: padded fine level dofs are transferred by injection. + // That is, these interpolation stencils do not take averages of + // coarse level variables. Further, fine level Dirichlet points + // also use injection. + + size_t cnt = 0; // local id counter + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + // determine number of entries in amalgamated dof row i + size_t rowLength = amalgRowPtr[i + 1] - amalgRowPtr[i]; + + // loop over dofs per node (unamalgamation) + for (int j = 0; j < maxDofPerNode; j++) { + newPRowPtr[i * maxDofPerNode + j] = cnt; + if (dofStatus[i * maxDofPerNode + j] == 's') { // add only "standard" dofs to unamalgamated prolongator + // loop over column entries in amalgamated P + for (size_t k = 0; k < rowLength; k++) { + newPCols[cnt] = amalgCols[k + amalgRowPtr[i]] * maxDofPerNode + j; + newPVals[cnt++] = amalgVals[k + amalgRowPtr[i]]; } } } + } - newPRowPtr[paddedNrows] = cnt; // close row CSR array - rowCount = paddedNrows; - } else { - // Build prolongation operator for non-padded fine level matrices. - // Need to map from non-padded dofs to padded dofs. For this, look - // at the status array and skip padded dofs. - - size_t cnt = 0; // local id counter - - for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { - // determine number of entries in amalgamated dof row i - size_t rowLength = amalgRowPtr[i+1] - amalgRowPtr[i]; - - // loop over dofs per node (unamalgamation) - for(int j = 0; j < maxDofPerNode; j++) { - // no interpolation for padded fine dofs as they do not exist - - if (dofStatus[i*maxDofPerNode+j] == 's') { // add only "standard" dofs to unamalgamated prolongator - newPRowPtr[rowCount++] = cnt; - // loop over column entries in amalgamated P - for (size_t k = 0; k < rowLength; k++) { - newPCols[cnt ] = amalgCols[k+amalgRowPtr[i]] * maxDofPerNode + j; - newPVals[cnt++] = amalgVals[k+amalgRowPtr[i]]; - } - - } - if (dofStatus[i*maxDofPerNode+j] == 'd') { // Dirichlet handling - newPRowPtr[rowCount++] = cnt; + newPRowPtr[paddedNrows] = cnt; // close row CSR array + rowCount = paddedNrows; + } else { + // Build prolongation operator for non-padded fine level matrices. + // Need to map from non-padded dofs to padded dofs. For this, look + // at the status array and skip padded dofs. + + size_t cnt = 0; // local id counter + + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + // determine number of entries in amalgamated dof row i + size_t rowLength = amalgRowPtr[i + 1] - amalgRowPtr[i]; + + // loop over dofs per node (unamalgamation) + for (int j = 0; j < maxDofPerNode; j++) { + // no interpolation for padded fine dofs as they do not exist + + if (dofStatus[i * maxDofPerNode + j] == 's') { // add only "standard" dofs to unamalgamated prolongator + newPRowPtr[rowCount++] = cnt; + // loop over column entries in amalgamated P + for (size_t k = 0; k < rowLength; k++) { + newPCols[cnt] = amalgCols[k + amalgRowPtr[i]] * maxDofPerNode + j; + newPVals[cnt++] = amalgVals[k + amalgRowPtr[i]]; } } - } - newPRowPtr[rowCount] = cnt; // close row CSR array - } // fineIsPadded == false - - // generate coarse domain map - // So far no support for gid offset or strided maps. This information - // could be gathered easily from the unamalgamated fine level operator A. - std::vector stridingInfo(1, maxDofPerNode); - - GlobalOrdinal nCoarseDofs = amalgP->getDomainMap()->getLocalNumElements() * maxDofPerNode; - GlobalOrdinal indexBase = amalgP->getDomainMap()->getIndexBase(); - RCP coarseDomainMap = StridedMapFactory::Build(amalgP->getDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - nCoarseDofs, - indexBase, - stridingInfo, - amalgP->getDomainMap()->getComm(), - -1 /* stridedBlockId */, - 0 /*domainGidOffset */); - - size_t nColCoarseDofs = Teuchos::as(amalgP->getColMap()->getLocalNumElements() * maxDofPerNode); - Teuchos::Array unsmooshColMapGIDs(nColCoarseDofs); - for(size_t c = 0; c < amalgP->getColMap()->getLocalNumElements(); ++c) { - GlobalOrdinal gid = (amalgP->getColMap()->getGlobalElement(c)-indexBase) * maxDofPerNode + indexBase; - - for(int i = 0; i < maxDofPerNode; ++i) { - unsmooshColMapGIDs[c * maxDofPerNode + i] = gid + i; + if (dofStatus[i * maxDofPerNode + j] == 'd') { // Dirichlet handling + newPRowPtr[rowCount++] = cnt; + } } } - Teuchos::RCP coarseColMap = MapFactory::Build(amalgP->getDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - unsmooshColMapGIDs(), //View, - indexBase, - amalgP->getDomainMap()->getComm()); - - // Assemble unamalgamated P - Teuchos::RCP unamalgPCrs = CrsMatrixFactory::Build(unamalgA->getRowMap(), - coarseColMap, - maxDofPerNode*amalgP->getLocalMaxNumRowEntries()); - for (size_t i = 0; i < rowCount; i++) { - unamalgPCrs->insertLocalValues(i, - newPCols.view(newPRowPtr[i], newPRowPtr[i+1] - newPRowPtr[i]), - newPVals.view(newPRowPtr[i], newPRowPtr[i+1] - newPRowPtr[i])); + newPRowPtr[rowCount] = cnt; // close row CSR array + } // fineIsPadded == false + + // generate coarse domain map + // So far no support for gid offset or strided maps. This information + // could be gathered easily from the unamalgamated fine level operator A. + std::vector stridingInfo(1, maxDofPerNode); + + GlobalOrdinal nCoarseDofs = amalgP->getDomainMap()->getLocalNumElements() * maxDofPerNode; + GlobalOrdinal indexBase = amalgP->getDomainMap()->getIndexBase(); + RCP coarseDomainMap = StridedMapFactory::Build(amalgP->getDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + nCoarseDofs, + indexBase, + stridingInfo, + amalgP->getDomainMap()->getComm(), + -1 /* stridedBlockId */, + 0 /*domainGidOffset */); + + size_t nColCoarseDofs = Teuchos::as(amalgP->getColMap()->getLocalNumElements() * maxDofPerNode); + Teuchos::Array unsmooshColMapGIDs(nColCoarseDofs); + for (size_t c = 0; c < amalgP->getColMap()->getLocalNumElements(); ++c) { + GlobalOrdinal gid = (amalgP->getColMap()->getGlobalElement(c) - indexBase) * maxDofPerNode + indexBase; + + for (int i = 0; i < maxDofPerNode; ++i) { + unsmooshColMapGIDs[c * maxDofPerNode + i] = gid + i; } - unamalgPCrs->fillComplete(coarseDomainMap, unamalgA->getRowMap()); - - Teuchos::RCP unamalgP = Teuchos::rcp(new CrsMatrixWrap(unamalgPCrs)); - - Set(coarseLevel,"P",unamalgP); } + Teuchos::RCP coarseColMap = MapFactory::Build(amalgP->getDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + unsmooshColMapGIDs(), //View, + indexBase, + amalgP->getDomainMap()->getComm()); + + // Assemble unamalgamated P + Teuchos::RCP unamalgPCrs = CrsMatrixFactory::Build(unamalgA->getRowMap(), + coarseColMap, + maxDofPerNode * amalgP->getLocalMaxNumRowEntries()); + for (size_t i = 0; i < rowCount; i++) { + unamalgPCrs->insertLocalValues(i, + newPCols.view(newPRowPtr[i], newPRowPtr[i + 1] - newPRowPtr[i]), + newPVals.view(newPRowPtr[i], newPRowPtr[i + 1] - newPRowPtr[i])); + } + unamalgPCrs->fillComplete(coarseDomainMap, unamalgA->getRowMap()); + Teuchos::RCP unamalgP = Teuchos::rcp(new CrsMatrixWrap(unamalgPCrs)); -} /* MueLu */ + Set(coarseLevel, "P", unamalgP); +} +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_UNSMOOSHFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp index 51a333e52d81..93e67105266d 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp @@ -47,7 +47,6 @@ #ifndef PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DECL_HPP_ #define PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DECL_HPP_ - #include "MueLu_ConfigDefs.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_VariableDofLaplacianFactory_fwd.hpp" @@ -57,7 +56,7 @@ namespace MueLu { - /*! +/*! @class VariableDofLaplacianFactory class. @brief Factory for building scalar Laplace operator (that is used as fake operator for variable dof size problems) @@ -97,269 +96,255 @@ namespace MueLu { | A | VariableDofLaplacianFactory | Laplacian operator | DofStatus | VariableDofLaplacianFactory | Status array for next coarse level */ - template - class VariableDofLaplacianFactory : public SingleLevelFactoryBase { +template +class VariableDofLaplacianFactory : public SingleLevelFactoryBase { #undef MUELU_VARIABLEDOFLAPLACIANFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! Constructor - VariableDofLaplacianFactory(); + public: + //! @name Constructors/Destructors. + //@{ - //! Destructor - virtual ~VariableDofLaplacianFactory() { } + //! Constructor + VariableDofLaplacianFactory(); - RCP GetValidParameterList() const; + //! Destructor + virtual ~VariableDofLaplacianFactory() {} - //@} + RCP GetValidParameterList() const; - //! Input - //@{ + //@} - void DeclareInput(Level ¤tLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level& currentLevel) const; - void Build(Level ¤tLevel) const; // Build + //@} - private: + void Build(Level& currentLevel) const; // Build - void buildPaddedMap(const Teuchos::ArrayRCP & dofPresent, std::vector & map, size_t nDofs) const; - void assignGhostLocalNodeIds(const Teuchos::RCP & rowDofMap, const Teuchos::RCP & colDofMap, std::vector & myLocalNodeIds, const std::vector & dofMap, size_t maxDofPerNode, size_t& nLocalNodes, size_t& nLocalPlusGhostNodes, Teuchos::RCP< const Teuchos::Comm< int > > comm) const; - void squeezeOutNnzs(Teuchos::ArrayRCP & rowPtr, Teuchos::ArrayRCP & cols, Teuchos::ArrayRCP & vals, const std::vector& keep) const; - void buildLaplacian(const Teuchos::ArrayRCP& rowPtr, const Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const size_t& numdim, const RCP::magnitudeType,LocalOrdinal,GlobalOrdinal,Node> > & ghostedCoords) const; + private: + void buildPaddedMap(const Teuchos::ArrayRCP& dofPresent, std::vector& map, size_t nDofs) const; + void assignGhostLocalNodeIds(const Teuchos::RCP& rowDofMap, const Teuchos::RCP& colDofMap, std::vector& myLocalNodeIds, const std::vector& dofMap, size_t maxDofPerNode, size_t& nLocalNodes, size_t& nLocalPlusGhostNodes, Teuchos::RCP > comm) const; + void squeezeOutNnzs(Teuchos::ArrayRCP& rowPtr, Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const std::vector& keep) const; + void buildLaplacian(const Teuchos::ArrayRCP& rowPtr, const Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const size_t& numdim, const RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node> >& ghostedCoords) const; - template - void MueLu_az_sort(listType list[], size_t N, size_t list2[], Scalar list3[]) const { - /* local variables */ + template + void MueLu_az_sort(listType list[], size_t N, size_t list2[], Scalar list3[]) const { + /* local variables */ - listType RR, K; - size_t l, r, j, i; - int flag; - size_t RR2; - Scalar RR3; + listType RR, K; + size_t l, r, j, i; + int flag; + size_t RR2; + Scalar RR3; - /*********************** execution begins ******************************/ + /*********************** execution begins ******************************/ - if (N <= 1) return; + if (N <= 1) return; - l = N / 2 + 1; - r = N - 1; - l = l - 1; - RR = list[l - 1]; - K = list[l - 1]; + l = N / 2 + 1; + r = N - 1; + l = l - 1; + RR = list[l - 1]; + K = list[l - 1]; - if ((list2 != NULL) && (list3 != NULL)) { - RR2 = list2[l - 1]; - RR3 = list3[l - 1]; - while (r != 0) { - j = l; - flag = 1; + if ((list2 != NULL) && (list3 != NULL)) { + RR2 = list2[l - 1]; + RR3 = list3[l - 1]; + while (r != 0) { + j = l; + flag = 1; - while (flag == 1) { - i = j; - j = j + j; + while (flag == 1) { + i = j; + j = j + j; - if (j > r + 1) + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; + + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list2[i - 1] = list2[j - 1]; + list3[i - 1] = list3[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - list2[i - 1] = list2[j - 1]; - list3[i - 1] = list3[j - 1]; - } - else { - flag = 0; - } } } - - list[ i - 1] = RR; - list2[i - 1] = RR2; - list3[i - 1] = RR3; - - if (l == 1) { - RR = list [r]; - RR2 = list2[r]; - RR3 = list3[r]; - - K = list[r]; - list[r ] = list[0]; - list2[r] = list2[0]; - list3[r] = list3[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - RR2 = list2[l - 1]; - RR3 = list3[l - 1]; - K = list[l - 1]; - } } - list[ 0] = RR; - list2[0] = RR2; - list3[0] = RR3; + list[i - 1] = RR; + list2[i - 1] = RR2; + list3[i - 1] = RR3; + + if (l == 1) { + RR = list[r]; + RR2 = list2[r]; + RR3 = list3[r]; + + K = list[r]; + list[r] = list[0]; + list2[r] = list2[0]; + list3[r] = list3[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR2 = list2[l - 1]; + RR3 = list3[l - 1]; + K = list[l - 1]; + } } - else if (list2 != NULL) { - RR2 = list2[l - 1]; - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; + list[0] = RR; + list2[0] = RR2; + list3[0] = RR3; + } else if (list2 != NULL) { + RR2 = list2[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; - if (j > r + 1) + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list2[i - 1] = list2[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - list2[i - 1] = list2[j - 1]; - } - else { - flag = 0; - } } } - - list[ i - 1] = RR; - list2[i - 1] = RR2; - - if (l == 1) { - RR = list [r]; - RR2 = list2[r]; - - K = list[r]; - list[r ] = list[0]; - list2[r] = list2[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - RR2 = list2[l - 1]; - K = list[l - 1]; - } } - list[ 0] = RR; - list2[0] = RR2; + list[i - 1] = RR; + list2[i - 1] = RR2; + + if (l == 1) { + RR = list[r]; + RR2 = list2[r]; + + K = list[r]; + list[r] = list[0]; + list2[r] = list2[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR2 = list2[l - 1]; + K = list[l - 1]; + } } - else if (list3 != NULL) { - RR3 = list3[l - 1]; - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; + list[0] = RR; + list2[0] = RR2; + } else if (list3 != NULL) { + RR3 = list3[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; - if (j > r + 1) + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; + + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list3[i - 1] = list3[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - list3[i - 1] = list3[j - 1]; - } - else { - flag = 0; - } } } + } - list[ i - 1] = RR; - list3[i - 1] = RR3; - - if (l == 1) { - RR = list [r]; - RR3 = list3[r]; - - K = list[r]; - list[r ] = list[0]; - list3[r] = list3[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - RR3 = list3[l - 1]; - K = list[l - 1]; - } + list[i - 1] = RR; + list3[i - 1] = RR3; + + if (l == 1) { + RR = list[r]; + RR3 = list3[r]; + + K = list[r]; + list[r] = list[0]; + list3[r] = list3[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR3 = list3[l - 1]; + K = list[l - 1]; } + } - list[ 0] = RR; - list3[0] = RR3; + list[0] = RR; + list3[0] = RR3; - } - else { - while (r != 0) { - j = l; - flag = 1; + } else { + while (r != 0) { + j = l; + flag = 1; - while (flag == 1) { - i = j; - j = j + j; + while (flag == 1) { + i = j; + j = j + j; - if (j > r + 1) + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) j = j + 1; + + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - } - else { - flag = 0; - } } } + } - list[ i - 1] = RR; + list[i - 1] = RR; - if (l == 1) { - RR = list [r]; + if (l == 1) { + RR = list[r]; - K = list[r]; - list[r ] = list[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - K = list[l - 1]; - } + K = list[r]; + list[r] = list[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + K = list[l - 1]; } - - list[ 0] = RR; } + + list[0] = RR; } + } - }; //class CoalesceDropFactory +}; //class CoalesceDropFactory -} //namespace MueLu +} //namespace MueLu #define MUELU_VARIABLEDOFLAPLACIANFACTORY_SHORT - #endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp index 21aa1c77369d..b0f9431984f0 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp @@ -47,559 +47,551 @@ #ifndef PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DEF_HPP_ #define PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DEF_HPP_ - #include "MueLu_Monitor.hpp" #include "MueLu_VariableDofLaplacianFactory_decl.hpp" namespace MueLu { - template - RCP VariableDofLaplacianFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP VariableDofLaplacianFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - validParamList->set< double > ("Advanced Dirichlet: threshold", 1e-5, "Drop tolerance for Dirichlet detection"); - validParamList->set< double > ("Variable DOF amalgamation: threshold", 1.8e-9, "Drop tolerance for amalgamation process"); - validParamList->set< int > ("maxDofPerNode", 1, "Maximum number of DOFs per node"); + validParamList->set("Advanced Dirichlet: threshold", 1e-5, "Drop tolerance for Dirichlet detection"); + validParamList->set("Variable DOF amalgamation: threshold", 1.8e-9, "Drop tolerance for amalgamation process"); + validParamList->set("maxDofPerNode", 1, "Maximum number of DOFs per node"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); - return validParamList; - } + return validParamList; +} - template - VariableDofLaplacianFactory::VariableDofLaplacianFactory() { } +template +VariableDofLaplacianFactory::VariableDofLaplacianFactory() {} - template - void VariableDofLaplacianFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "Coordinates"); +template +void VariableDofLaplacianFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "Coordinates"); - //if (currentLevel.GetLevelID() == 0) // TODO check for finest level (special treatment) - if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { - currentLevel.DeclareInput("DofPresent", NoFactory::get(), this); - } + //if (currentLevel.GetLevelID() == 0) // TODO check for finest level (special treatment) + if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { + currentLevel.DeclareInput("DofPresent", NoFactory::get(), this); } +} - template - void VariableDofLaplacianFactory::Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - typedef Teuchos::ScalarTraits STS; +template +void VariableDofLaplacianFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + typedef Teuchos::ScalarTraits STS; - const ParameterList & pL = GetParameterList(); + const ParameterList& pL = GetParameterList(); - RCP A = Get< RCP >(currentLevel, "A"); + RCP A = Get >(currentLevel, "A"); - Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm(); - Xpetra::UnderlyingLib lib = A->getRowMap()->lib(); + Teuchos::RCP > comm = A->getRowMap()->getComm(); + Xpetra::UnderlyingLib lib = A->getRowMap()->lib(); - typedef Xpetra::MultiVector::magnitudeType,LO,GO,NO> dxMV; - RCP Coords = Get< RCP::magnitudeType,LO,GO,NO> > >(currentLevel, "Coordinates"); + typedef Xpetra::MultiVector::magnitudeType, LO, GO, NO> dxMV; + RCP Coords = Get::magnitudeType, LO, GO, NO> > >(currentLevel, "Coordinates"); - int maxDofPerNode = pL.get("maxDofPerNode"); - Scalar dirDropTol = Teuchos::as(pL.get("Advanced Dirichlet: threshold")); // "ML advnaced Dirichlet: threshold" - Scalar amalgDropTol = Teuchos::as(pL.get("Variable DOF amalgamation: threshold")); //"variable DOF amalgamation: threshold") + int maxDofPerNode = pL.get("maxDofPerNode"); + Scalar dirDropTol = Teuchos::as(pL.get("Advanced Dirichlet: threshold")); // "ML advnaced Dirichlet: threshold" + Scalar amalgDropTol = Teuchos::as(pL.get("Variable DOF amalgamation: threshold")); //"variable DOF amalgamation: threshold") - bool bHasZeroDiagonal = false; - Teuchos::ArrayRCP dirOrNot = MueLu::Utilities::DetectDirichletRowsExt(*A,bHasZeroDiagonal,STS::magnitude(dirDropTol)); + bool bHasZeroDiagonal = false; + Teuchos::ArrayRCP dirOrNot = MueLu::Utilities::DetectDirichletRowsExt(*A, bHasZeroDiagonal, STS::magnitude(dirDropTol)); - // check availability of DofPresent array - Teuchos::ArrayRCP dofPresent; - if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { - dofPresent = currentLevel.Get< Teuchos::ArrayRCP >("DofPresent", NoFactory::get()); - } else { - // TAW: not sure about size of array. We cannot determine the expected size in the non-padded case correctly... - dofPresent = Teuchos::ArrayRCP(A->getRowMap()->getLocalNumElements(),1); - } + // check availability of DofPresent array + Teuchos::ArrayRCP dofPresent; + if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { + dofPresent = currentLevel.Get >("DofPresent", NoFactory::get()); + } else { + // TAW: not sure about size of array. We cannot determine the expected size in the non-padded case correctly... + dofPresent = Teuchos::ArrayRCP(A->getRowMap()->getLocalNumElements(), 1); + } - // map[k] indicates that the kth dof in the variable dof matrix A would - // correspond to the map[k]th dof in the padded system. If, i.e., it is - // map[35] = 39 then dof no 35 in the variable dof matrix A corresponds to - // row map id 39 in an imaginary padded matrix Apadded. - // The padded system is never built but would be the associated matrix if - // every node had maxDofPerNode dofs. - std::vector map(A->getLocalNumRows()); - this->buildPaddedMap(dofPresent, map, A->getLocalNumRows()); + // map[k] indicates that the kth dof in the variable dof matrix A would + // correspond to the map[k]th dof in the padded system. If, i.e., it is + // map[35] = 39 then dof no 35 in the variable dof matrix A corresponds to + // row map id 39 in an imaginary padded matrix Apadded. + // The padded system is never built but would be the associated matrix if + // every node had maxDofPerNode dofs. + std::vector map(A->getLocalNumRows()); + this->buildPaddedMap(dofPresent, map, A->getLocalNumRows()); - // map of size of number of DOFs containing local node id (dof id -> node id, inclusive ghosted dofs/nodes) - std::vector myLocalNodeIds(A->getColMap()->getLocalNumElements()); // possible maximum (we need the ghost nodes, too) + // map of size of number of DOFs containing local node id (dof id -> node id, inclusive ghosted dofs/nodes) + std::vector myLocalNodeIds(A->getColMap()->getLocalNumElements()); // possible maximum (we need the ghost nodes, too) - // assign the local node ids for the ghosted nodes - size_t nLocalNodes, nLocalPlusGhostNodes; - this->assignGhostLocalNodeIds(A->getRowMap(), A->getColMap(), myLocalNodeIds, map, maxDofPerNode, nLocalNodes, nLocalPlusGhostNodes, comm); + // assign the local node ids for the ghosted nodes + size_t nLocalNodes, nLocalPlusGhostNodes; + this->assignGhostLocalNodeIds(A->getRowMap(), A->getColMap(), myLocalNodeIds, map, maxDofPerNode, nLocalNodes, nLocalPlusGhostNodes, comm); - //RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)," ",0,false,10,false, true); + //RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)," ",0,false,10,false, true); - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as(dofPresent.size()) != Teuchos::as(nLocalNodes * maxDofPerNode),MueLu::Exceptions::RuntimeError,"VariableDofLaplacianFactory: size of provided DofPresent array is " << dofPresent.size() << " but should be " << nLocalNodes * maxDofPerNode << " on the current processor."); + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as(dofPresent.size()) != Teuchos::as(nLocalNodes * maxDofPerNode), MueLu::Exceptions::RuntimeError, "VariableDofLaplacianFactory: size of provided DofPresent array is " << dofPresent.size() << " but should be " << nLocalNodes * maxDofPerNode << " on the current processor."); - // put content of assignGhostLocalNodeIds here... + // put content of assignGhostLocalNodeIds here... - // fill nodal maps + // fill nodal maps - Teuchos::ArrayView< const GlobalOrdinal > myGids = A->getColMap()->getLocalElementList(); + Teuchos::ArrayView myGids = A->getColMap()->getLocalElementList(); - // vector containing row/col gids of amalgamated matrix (with holes) + // vector containing row/col gids of amalgamated matrix (with holes) - size_t nLocalDofs = A->getRowMap()->getLocalNumElements(); - size_t nLocalPlusGhostDofs = A->getColMap()->getLocalNumElements(); + size_t nLocalDofs = A->getRowMap()->getLocalNumElements(); + size_t nLocalPlusGhostDofs = A->getColMap()->getLocalNumElements(); - // myLocalNodeIds (dof -> node) + // myLocalNodeIds (dof -> node) - Teuchos::Array amalgRowMapGIDs(nLocalNodes); - Teuchos::Array amalgColMapGIDs(nLocalPlusGhostNodes); + Teuchos::Array amalgRowMapGIDs(nLocalNodes); + Teuchos::Array amalgColMapGIDs(nLocalPlusGhostNodes); - // initialize - size_t count = 0; - if (nLocalDofs > 0) { - amalgRowMapGIDs[count] = myGids[0]; - amalgColMapGIDs[count] = myGids[0]; + // initialize + size_t count = 0; + if (nLocalDofs > 0) { + amalgRowMapGIDs[count] = myGids[0]; + amalgColMapGIDs[count] = myGids[0]; + count++; + } + + for (size_t i = 1; i < nLocalDofs; i++) { + if (myLocalNodeIds[i] != myLocalNodeIds[i - 1]) { + amalgRowMapGIDs[count] = myGids[i]; + amalgColMapGIDs[count] = myGids[i]; count++; } + } - for(size_t i = 1; i < nLocalDofs; i++) { - if (myLocalNodeIds[i] != myLocalNodeIds[i-1]) { - amalgRowMapGIDs[count] = myGids[i]; - amalgColMapGIDs[count] = myGids[i]; - count++; - } - } + RCP tempAmalgColVec = GOVectorFactory::Build(A->getDomainMap()); + { + Teuchos::ArrayRCP tempAmalgColVecData = tempAmalgColVec->getDataNonConst(0); + for (size_t i = 0; i < A->getDomainMap()->getLocalNumElements(); i++) + tempAmalgColVecData[i] = amalgColMapGIDs[myLocalNodeIds[i]]; + } - RCP tempAmalgColVec = GOVectorFactory::Build(A->getDomainMap()); - { - Teuchos::ArrayRCP tempAmalgColVecData = tempAmalgColVec->getDataNonConst(0); - for (size_t i = 0; i < A->getDomainMap()->getLocalNumElements(); i++) - tempAmalgColVecData[i] = amalgColMapGIDs[ myLocalNodeIds[i]]; - } + RCP tempAmalgColVecTarget = GOVectorFactory::Build(A->getColMap()); + Teuchos::RCP dofImporter = ImportFactory::Build(A->getDomainMap(), A->getColMap()); + tempAmalgColVecTarget->doImport(*tempAmalgColVec, *dofImporter, Xpetra::INSERT); - RCP tempAmalgColVecTarget = GOVectorFactory::Build(A->getColMap()); - Teuchos::RCP dofImporter = ImportFactory::Build(A->getDomainMap(), A->getColMap()); - tempAmalgColVecTarget->doImport(*tempAmalgColVec, *dofImporter, Xpetra::INSERT); + { + Teuchos::ArrayRCP tempAmalgColVecBData = tempAmalgColVecTarget->getData(0); + // copy from dof vector to nodal vector + for (size_t i = 0; i < myLocalNodeIds.size(); i++) + amalgColMapGIDs[myLocalNodeIds[i]] = tempAmalgColVecBData[i]; + } - { - Teuchos::ArrayRCP tempAmalgColVecBData = tempAmalgColVecTarget->getData(0); - // copy from dof vector to nodal vector - for (size_t i = 0; i < myLocalNodeIds.size(); i++) - amalgColMapGIDs[ myLocalNodeIds[i]] = tempAmalgColVecBData[i]; + Teuchos::RCP amalgRowMap = MapFactory::Build(lib, + Teuchos::OrdinalTraits::invalid(), + amalgRowMapGIDs(), //View, + A->getRowMap()->getIndexBase(), + comm); + + Teuchos::RCP amalgColMap = MapFactory::Build(lib, + Teuchos::OrdinalTraits::invalid(), + amalgColMapGIDs(), //View, + A->getRangeMap()->getIndexBase(), + comm); + + // end fill nodal maps + + // start variable dof amalgamation + + Teuchos::RCP Awrap = Teuchos::rcp_dynamic_cast(A); + Teuchos::RCP Acrs = Awrap->getCrsMatrix(); + //Acrs->describe(*fancy, Teuchos::VERB_EXTREME); + + size_t nNonZeros = 0; + std::vector isNonZero(nLocalPlusGhostDofs, false); + std::vector nonZeroList(nLocalPlusGhostDofs); // ??? + + // also used in DetectDirichletExt + Teuchos::RCP diagVecUnique = VectorFactory::Build(A->getRowMap()); + Teuchos::RCP diagVec = VectorFactory::Build(A->getColMap()); + A->getLocalDiagCopy(*diagVecUnique); + diagVec->doImport(*diagVecUnique, *dofImporter, Xpetra::INSERT); + Teuchos::ArrayRCP diagVecData = diagVec->getData(0); + + Teuchos::ArrayRCP rowptr(Acrs->getLocalNumRows()); + Teuchos::ArrayRCP colind(Acrs->getLocalNumEntries()); + Teuchos::ArrayRCP values(Acrs->getLocalNumEntries()); + Acrs->getAllValues(rowptr, colind, values); + + // create arrays for amalgamated matrix + Teuchos::ArrayRCP amalgRowPtr(nLocalNodes + 1); + Teuchos::ArrayRCP amalgCols(rowptr[rowptr.size() - 1]); + + LocalOrdinal oldBlockRow = 0; + LocalOrdinal blockRow = 0; + LocalOrdinal blockColumn = 0; + + size_t newNzs = 0; + amalgRowPtr[0] = newNzs; + + bool doNotDrop = false; + if (amalgDropTol == Teuchos::ScalarTraits::zero()) doNotDrop = true; + if (values.size() == 0) doNotDrop = true; + + for (decltype(rowptr.size()) i = 0; i < rowptr.size() - 1; i++) { + blockRow = std::floor(map[i] / maxDofPerNode); + if (blockRow != oldBlockRow) { + // zero out info recording nonzeros in oldBlockRow + for (size_t j = 0; j < nNonZeros; j++) isNonZero[nonZeroList[j]] = false; + nNonZeros = 0; + amalgRowPtr[blockRow] = newNzs; // record start of next row } - - Teuchos::RCP amalgRowMap = MapFactory::Build(lib, - Teuchos::OrdinalTraits::invalid(), - amalgRowMapGIDs(), //View, - A->getRowMap()->getIndexBase(), - comm); - - Teuchos::RCP amalgColMap = MapFactory::Build(lib, - Teuchos::OrdinalTraits::invalid(), - amalgColMapGIDs(), //View, - A->getRangeMap()->getIndexBase(), - comm); - - // end fill nodal maps - - - // start variable dof amalgamation - - Teuchos::RCP Awrap = Teuchos::rcp_dynamic_cast(A); - Teuchos::RCP Acrs = Awrap->getCrsMatrix(); - //Acrs->describe(*fancy, Teuchos::VERB_EXTREME); - - size_t nNonZeros = 0; - std::vector isNonZero(nLocalPlusGhostDofs,false); - std::vector nonZeroList(nLocalPlusGhostDofs); // ??? - - // also used in DetectDirichletExt - Teuchos::RCP diagVecUnique = VectorFactory::Build(A->getRowMap()); - Teuchos::RCP diagVec = VectorFactory::Build(A->getColMap()); - A->getLocalDiagCopy(*diagVecUnique); - diagVec->doImport(*diagVecUnique, *dofImporter, Xpetra::INSERT); - Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0); - - Teuchos::ArrayRCP rowptr(Acrs->getLocalNumRows()); - Teuchos::ArrayRCP colind(Acrs->getLocalNumEntries()); - Teuchos::ArrayRCP values(Acrs->getLocalNumEntries()); - Acrs->getAllValues(rowptr, colind, values); - - - // create arrays for amalgamated matrix - Teuchos::ArrayRCP amalgRowPtr(nLocalNodes+1); - Teuchos::ArrayRCP amalgCols(rowptr[rowptr.size()-1]); - - LocalOrdinal oldBlockRow = 0; - LocalOrdinal blockRow = 0; - LocalOrdinal blockColumn = 0; - - size_t newNzs = 0; - amalgRowPtr[0] = newNzs; - - bool doNotDrop = false; - if (amalgDropTol == Teuchos::ScalarTraits::zero()) doNotDrop = true; - if (values.size() == 0) doNotDrop = true; - - for(decltype(rowptr.size()) i = 0; i < rowptr.size()-1; i++) { - blockRow = std::floor( map[i] / maxDofPerNode); - if (blockRow != oldBlockRow) { - // zero out info recording nonzeros in oldBlockRow - for(size_t j = 0; j < nNonZeros; j++) isNonZero[nonZeroList[j]] = false; - nNonZeros = 0; - amalgRowPtr[blockRow] = newNzs; // record start of next row - } - for (size_t j = rowptr[i]; j < rowptr[i+1]; j++) { - if(doNotDrop == true || - ( STS::magnitude(values[j] / STS::magnitude(sqrt(STS::magnitude(diagVecData[i]) * STS::magnitude(diagVecData[colind[j]]))) ) >= STS::magnitude(amalgDropTol) )) { - blockColumn = myLocalNodeIds[colind[j]]; - if(isNonZero[blockColumn] == false) { - isNonZero[blockColumn] = true; - nonZeroList[nNonZeros++] = blockColumn; - amalgCols[newNzs++] = blockColumn; - } + for (size_t j = rowptr[i]; j < rowptr[i + 1]; j++) { + if (doNotDrop == true || + (STS::magnitude(values[j] / STS::magnitude(sqrt(STS::magnitude(diagVecData[i]) * STS::magnitude(diagVecData[colind[j]])))) >= STS::magnitude(amalgDropTol))) { + blockColumn = myLocalNodeIds[colind[j]]; + if (isNonZero[blockColumn] == false) { + isNonZero[blockColumn] = true; + nonZeroList[nNonZeros++] = blockColumn; + amalgCols[newNzs++] = blockColumn; } } - oldBlockRow = blockRow; } - amalgRowPtr[blockRow+1] = newNzs; - - TEUCHOS_TEST_FOR_EXCEPTION((blockRow+1 != Teuchos::as(nLocalNodes)) && (nLocalNodes !=0), MueLu::Exceptions::RuntimeError, "VariableDofsPerNodeAmalgamation: error, computed # block rows (" << blockRow+1 <<") != nLocalNodes (" << nLocalNodes <<")"); - - amalgCols.resize(amalgRowPtr[nLocalNodes]); - - // end variableDofAmalg - - // begin rm differentDofsCrossings - - // Remove matrix entries (i,j) where the ith node and the jth node have - // different dofs that are 'present' - // Specifically, on input: - // dofPresent[i*maxDofPerNode+k] indicates whether or not the kth - // dof at the ith node is present in the - // variable dof matrix (e.g., the ith node - // has an air pressure dof). true means - // the dof is present while false means it - // is not. - // We create a unique id for the ith node (i.e. uniqueId[i]) via - // sum_{k=0 to maxDofPerNode-1} dofPresent[i*maxDofPerNode+k]*2^k - // and use this unique idea to remove entries (i,j) when uniqueId[i]!=uniqueId[j] - - Teuchos::ArrayRCP uniqueId(nLocalPlusGhostNodes); // unique id associated with DOF - std::vector keep(amalgRowPtr[amalgRowPtr.size()-1],true); // keep connection associated with node - - size_t ii = 0; // iteration index for present dofs - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - LocalOrdinal temp = 1; // basis for dof-id - uniqueId[i] = 0; - for (decltype(maxDofPerNode) j = 0; j < maxDofPerNode; j++) { - if (dofPresent[ii++]) uniqueId[i] += temp; // encode dof to be present - temp = temp * 2; // check next dof - } + oldBlockRow = blockRow; + } + amalgRowPtr[blockRow + 1] = newNzs; + + TEUCHOS_TEST_FOR_EXCEPTION((blockRow + 1 != Teuchos::as(nLocalNodes)) && (nLocalNodes != 0), MueLu::Exceptions::RuntimeError, "VariableDofsPerNodeAmalgamation: error, computed # block rows (" << blockRow + 1 << ") != nLocalNodes (" << nLocalNodes << ")"); + + amalgCols.resize(amalgRowPtr[nLocalNodes]); + + // end variableDofAmalg + + // begin rm differentDofsCrossings + + // Remove matrix entries (i,j) where the ith node and the jth node have + // different dofs that are 'present' + // Specifically, on input: + // dofPresent[i*maxDofPerNode+k] indicates whether or not the kth + // dof at the ith node is present in the + // variable dof matrix (e.g., the ith node + // has an air pressure dof). true means + // the dof is present while false means it + // is not. + // We create a unique id for the ith node (i.e. uniqueId[i]) via + // sum_{k=0 to maxDofPerNode-1} dofPresent[i*maxDofPerNode+k]*2^k + // and use this unique idea to remove entries (i,j) when uniqueId[i]!=uniqueId[j] + + Teuchos::ArrayRCP uniqueId(nLocalPlusGhostNodes); // unique id associated with DOF + std::vector keep(amalgRowPtr[amalgRowPtr.size() - 1], true); // keep connection associated with node + + size_t ii = 0; // iteration index for present dofs + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + LocalOrdinal temp = 1; // basis for dof-id + uniqueId[i] = 0; + for (decltype(maxDofPerNode) j = 0; j < maxDofPerNode; j++) { + if (dofPresent[ii++]) uniqueId[i] += temp; // encode dof to be present + temp = temp * 2; // check next dof } + } - Teuchos::RCP nodeImporter = ImportFactory::Build(amalgRowMap, amalgColMap); + Teuchos::RCP nodeImporter = ImportFactory::Build(amalgRowMap, amalgColMap); - RCP nodeIdSrc = Xpetra::VectorFactory::Build(amalgRowMap,true); - RCP nodeIdTarget = Xpetra::VectorFactory::Build(amalgColMap,true); + RCP nodeIdSrc = Xpetra::VectorFactory::Build(amalgRowMap, true); + RCP nodeIdTarget = Xpetra::VectorFactory::Build(amalgColMap, true); - Teuchos::ArrayRCP< LocalOrdinal > nodeIdSrcData = nodeIdSrc->getDataNonConst(0); - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - nodeIdSrcData[i] = uniqueId[i]; - } + Teuchos::ArrayRCP nodeIdSrcData = nodeIdSrc->getDataNonConst(0); + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + nodeIdSrcData[i] = uniqueId[i]; + } - nodeIdTarget->doImport(*nodeIdSrc, *nodeImporter, Xpetra::INSERT); + nodeIdTarget->doImport(*nodeIdSrc, *nodeImporter, Xpetra::INSERT); - Teuchos::ArrayRCP< const LocalOrdinal > nodeIdTargetData = nodeIdTarget->getData(0); - for(decltype(uniqueId.size()) i = 0; i < uniqueId.size(); i++) { - uniqueId[i] = nodeIdTargetData[i]; - } + Teuchos::ArrayRCP nodeIdTargetData = nodeIdTarget->getData(0); + for (decltype(uniqueId.size()) i = 0; i < uniqueId.size(); i++) { + uniqueId[i] = nodeIdTargetData[i]; + } - // nodal comm uniqueId, myLocalNodeIds + // nodal comm uniqueId, myLocalNodeIds - // uniqueId now should contain ghosted data + // uniqueId now should contain ghosted data - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - for(size_t j = amalgRowPtr[i]; j < amalgRowPtr[i+1]; j++) { - if (uniqueId[i] != uniqueId[amalgCols[j]]) keep [j] = false; - } + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + for (size_t j = amalgRowPtr[i]; j < amalgRowPtr[i + 1]; j++) { + if (uniqueId[i] != uniqueId[amalgCols[j]]) keep[j] = false; } + } - // squeeze out hard-coded zeros from CSR arrays - Teuchos::ArrayRCP amalgVals; - this->squeezeOutNnzs(amalgRowPtr,amalgCols,amalgVals,keep); + // squeeze out hard-coded zeros from CSR arrays + Teuchos::ArrayRCP amalgVals; + this->squeezeOutNnzs(amalgRowPtr, amalgCols, amalgVals, keep); - typedef Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO> dxMVf; - RCP ghostedCoords = dxMVf::Build(amalgColMap,Coords->getNumVectors()); + typedef Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO> dxMVf; + RCP ghostedCoords = dxMVf::Build(amalgColMap, Coords->getNumVectors()); - TEUCHOS_TEST_FOR_EXCEPTION(amalgRowMap->getLocalNumElements() != Coords->getMap()->getLocalNumElements(), MueLu::Exceptions::RuntimeError, "MueLu::VariableDofLaplacianFactory: the number of Coordinates and amalgamated nodes is inconsistent."); + TEUCHOS_TEST_FOR_EXCEPTION(amalgRowMap->getLocalNumElements() != Coords->getMap()->getLocalNumElements(), MueLu::Exceptions::RuntimeError, "MueLu::VariableDofLaplacianFactory: the number of Coordinates and amalgamated nodes is inconsistent."); - // Coords might live on a special nodeMap with consecutive ids (the natural numbering) - // The amalgRowMap might have the same number of entries, but with holes in the ids. - // e.g. 0,3,6,9,... as GIDs. - // We need the ghosted Coordinates in the buildLaplacian routine. But we access the data - // through getData only, i.e., the global ids are not interesting as long as we do not change - // the ordering of the entries - Coords->replaceMap(amalgRowMap); - ghostedCoords->doImport(*Coords, *nodeImporter, Xpetra::INSERT); + // Coords might live on a special nodeMap with consecutive ids (the natural numbering) + // The amalgRowMap might have the same number of entries, but with holes in the ids. + // e.g. 0,3,6,9,... as GIDs. + // We need the ghosted Coordinates in the buildLaplacian routine. But we access the data + // through getData only, i.e., the global ids are not interesting as long as we do not change + // the ordering of the entries + Coords->replaceMap(amalgRowMap); + ghostedCoords->doImport(*Coords, *nodeImporter, Xpetra::INSERT); - Teuchos::ArrayRCP lapVals(amalgRowPtr[nLocalNodes]); - this->buildLaplacian(amalgRowPtr, amalgCols, lapVals, Coords->getNumVectors(), ghostedCoords); + Teuchos::ArrayRCP lapVals(amalgRowPtr[nLocalNodes]); + this->buildLaplacian(amalgRowPtr, amalgCols, lapVals, Coords->getNumVectors(), ghostedCoords); - // sort column GIDs - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - size_t j = amalgRowPtr[i]; - this->MueLu_az_sort(&(amalgCols[j]), amalgRowPtr[i+1] - j, NULL, &(lapVals[j])); - } + // sort column GIDs + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + size_t j = amalgRowPtr[i]; + this->MueLu_az_sort(&(amalgCols[j]), amalgRowPtr[i + 1] - j, NULL, &(lapVals[j])); + } - // Caluclate status array for next level - Teuchos::Array status(nLocalNodes * maxDofPerNode); + // Caluclate status array for next level + Teuchos::Array status(nLocalNodes * maxDofPerNode); - // dir or not Teuchos::ArrayRCP dirOrNot - for(decltype(status.size()) i = 0; i < status.size(); i++) status[i] = 's'; - for(decltype(status.size()) i = 0; i < status.size(); i++) { - if(dofPresent[i] == false) status[i] = 'p'; - } - if(dirOrNot.size() > 0) { - for(decltype(map.size()) i = 0; i < map.size(); i++) { - if(dirOrNot[i] == true){ - status[map[i]] = 'd'; - } + // dir or not Teuchos::ArrayRCP dirOrNot + for (decltype(status.size()) i = 0; i < status.size(); i++) status[i] = 's'; + for (decltype(status.size()) i = 0; i < status.size(); i++) { + if (dofPresent[i] == false) status[i] = 'p'; + } + if (dirOrNot.size() > 0) { + for (decltype(map.size()) i = 0; i < map.size(); i++) { + if (dirOrNot[i] == true) { + status[map[i]] = 'd'; } } - Set(currentLevel,"DofStatus",status); - - // end status array - - Teuchos::RCP lapCrsMat = CrsMatrixFactory::Build(amalgRowMap, amalgColMap, 10); // TODO better approx for max nnz per row + } + Set(currentLevel, "DofStatus", status); - for (size_t i = 0; i < nLocalNodes; i++) { - lapCrsMat->insertLocalValues(i, amalgCols.view(amalgRowPtr[i],amalgRowPtr[i+1]-amalgRowPtr[i]), - lapVals.view(amalgRowPtr[i],amalgRowPtr[i+1]-amalgRowPtr[i])); - } - lapCrsMat->fillComplete(amalgRowMap,amalgRowMap); + // end status array - //lapCrsMat->describe(*fancy, Teuchos::VERB_EXTREME); + Teuchos::RCP lapCrsMat = CrsMatrixFactory::Build(amalgRowMap, amalgColMap, 10); // TODO better approx for max nnz per row - Teuchos::RCP lapMat = Teuchos::rcp(new CrsMatrixWrap(lapCrsMat)); - Set(currentLevel,"A",lapMat); + for (size_t i = 0; i < nLocalNodes; i++) { + lapCrsMat->insertLocalValues(i, amalgCols.view(amalgRowPtr[i], amalgRowPtr[i + 1] - amalgRowPtr[i]), + lapVals.view(amalgRowPtr[i], amalgRowPtr[i + 1] - amalgRowPtr[i])); } - - template - void VariableDofLaplacianFactory::buildLaplacian(const Teuchos::ArrayRCP& rowPtr, const Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals,const size_t& numdim, const RCP::magnitudeType,LocalOrdinal,GlobalOrdinal,Node> > & ghostedCoords) const { - TEUCHOS_TEST_FOR_EXCEPTION(numdim != 2 && numdim !=3, MueLu::Exceptions::RuntimeError,"buildLaplacian only works for 2d or 3d examples. numdim = " << numdim); - - if(numdim == 2) { // 2d - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > x = ghostedCoords->getData(0); - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > y = ghostedCoords->getData(1); - - for(decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { - Scalar sum = Teuchos::ScalarTraits::zero(); - LocalOrdinal diag = -1; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if(cols[j] != Teuchos::as(i)){ - vals[j] = std::sqrt( (x[i]-x[cols[j]]) * (x[i]-x[cols[j]]) + - (y[i]-y[cols[j]]) * (y[i]-y[cols[j]]) ); - TEUCHOS_TEST_FOR_EXCEPTION(vals[j] == Teuchos::ScalarTraits::zero(), MueLu::Exceptions::RuntimeError, "buildLaplacian: error, " << i << " and " << cols[j] << " have same coordinates: " << x[i] << " and " << y[i]); - vals[j] = -Teuchos::ScalarTraits::one()/vals[j]; - sum = sum - vals[j]; - } - else diag = j; - } - if(sum == Teuchos::ScalarTraits::zero()) sum = Teuchos::ScalarTraits::one(); - TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, "buildLaplacian: error, row " << i << " has zero diagonal!"); - - vals[diag] = sum; + lapCrsMat->fillComplete(amalgRowMap, amalgRowMap); + + //lapCrsMat->describe(*fancy, Teuchos::VERB_EXTREME); + + Teuchos::RCP lapMat = Teuchos::rcp(new CrsMatrixWrap(lapCrsMat)); + Set(currentLevel, "A", lapMat); +} + +template +void VariableDofLaplacianFactory::buildLaplacian(const Teuchos::ArrayRCP& rowPtr, const Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const size_t& numdim, const RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node> >& ghostedCoords) const { + TEUCHOS_TEST_FOR_EXCEPTION(numdim != 2 && numdim != 3, MueLu::Exceptions::RuntimeError, "buildLaplacian only works for 2d or 3d examples. numdim = " << numdim); + + if (numdim == 2) { // 2d + Teuchos::ArrayRCP::magnitudeType> x = ghostedCoords->getData(0); + Teuchos::ArrayRCP::magnitudeType> y = ghostedCoords->getData(1); + + for (decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { + Scalar sum = Teuchos::ScalarTraits::zero(); + LocalOrdinal diag = -1; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (cols[j] != Teuchos::as(i)) { + vals[j] = std::sqrt((x[i] - x[cols[j]]) * (x[i] - x[cols[j]]) + + (y[i] - y[cols[j]]) * (y[i] - y[cols[j]])); + TEUCHOS_TEST_FOR_EXCEPTION(vals[j] == Teuchos::ScalarTraits::zero(), MueLu::Exceptions::RuntimeError, "buildLaplacian: error, " << i << " and " << cols[j] << " have same coordinates: " << x[i] << " and " << y[i]); + vals[j] = -Teuchos::ScalarTraits::one() / vals[j]; + sum = sum - vals[j]; + } else + diag = j; } - } else { // 3d - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > x = ghostedCoords->getData(0); - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > y = ghostedCoords->getData(1); - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > z = ghostedCoords->getData(2); - - for(decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { - Scalar sum = Teuchos::ScalarTraits::zero(); - LocalOrdinal diag = -1; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if(cols[j] != Teuchos::as(i)){ - vals[j] = std::sqrt( (x[i]-x[cols[j]]) * (x[i]-x[cols[j]]) + - (y[i]-y[cols[j]]) * (y[i]-y[cols[j]]) + - (z[i]-z[cols[j]]) * (z[i]-z[cols[j]]) ); - - TEUCHOS_TEST_FOR_EXCEPTION(vals[j] == Teuchos::ScalarTraits::zero(), MueLu::Exceptions::RuntimeError, "buildLaplacian: error, " << i << " and " << cols[j] << " have same coordinates: " << x[i] << " and " << y[i] << " and " << z[i]); - - vals[j] = -Teuchos::ScalarTraits::one()/vals[j]; - sum = sum - vals[j]; - } - else diag = j; - } - if(sum == Teuchos::ScalarTraits::zero()) sum = Teuchos::ScalarTraits::one(); - TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, "buildLaplacian: error, row " << i << " has zero diagonal!"); + if (sum == Teuchos::ScalarTraits::zero()) sum = Teuchos::ScalarTraits::one(); + TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, "buildLaplacian: error, row " << i << " has zero diagonal!"); - vals[diag] = sum; + vals[diag] = sum; + } + } else { // 3d + Teuchos::ArrayRCP::magnitudeType> x = ghostedCoords->getData(0); + Teuchos::ArrayRCP::magnitudeType> y = ghostedCoords->getData(1); + Teuchos::ArrayRCP::magnitudeType> z = ghostedCoords->getData(2); + + for (decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { + Scalar sum = Teuchos::ScalarTraits::zero(); + LocalOrdinal diag = -1; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (cols[j] != Teuchos::as(i)) { + vals[j] = std::sqrt((x[i] - x[cols[j]]) * (x[i] - x[cols[j]]) + + (y[i] - y[cols[j]]) * (y[i] - y[cols[j]]) + + (z[i] - z[cols[j]]) * (z[i] - z[cols[j]])); + + TEUCHOS_TEST_FOR_EXCEPTION(vals[j] == Teuchos::ScalarTraits::zero(), MueLu::Exceptions::RuntimeError, "buildLaplacian: error, " << i << " and " << cols[j] << " have same coordinates: " << x[i] << " and " << y[i] << " and " << z[i]); + + vals[j] = -Teuchos::ScalarTraits::one() / vals[j]; + sum = sum - vals[j]; + } else + diag = j; } + if (sum == Teuchos::ScalarTraits::zero()) sum = Teuchos::ScalarTraits::one(); + TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, "buildLaplacian: error, row " << i << " has zero diagonal!"); + + vals[diag] = sum; } } - - template - void VariableDofLaplacianFactory::squeezeOutNnzs(Teuchos::ArrayRCP & rowPtr, Teuchos::ArrayRCP & cols, Teuchos::ArrayRCP & vals, const std::vector& keep) const { - // get rid of nonzero entries that have 0's in them and properly change - // the row ptr array to reflect this removal (either vals == NULL or vals != NULL) - // Note, the arrays are squeezed. No memory is freed. - - size_t count = 0; - - size_t nRows = rowPtr.size()-1; - if(vals.size() > 0) { - for(size_t i = 0; i < nRows; i++) { - size_t newStart = count; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if(vals[j] != Teuchos::ScalarTraits::zero()) { - cols[count ] = cols[j]; - vals[count++] = vals[j]; - } +} + +template +void VariableDofLaplacianFactory::squeezeOutNnzs(Teuchos::ArrayRCP& rowPtr, Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const std::vector& keep) const { + // get rid of nonzero entries that have 0's in them and properly change + // the row ptr array to reflect this removal (either vals == NULL or vals != NULL) + // Note, the arrays are squeezed. No memory is freed. + + size_t count = 0; + + size_t nRows = rowPtr.size() - 1; + if (vals.size() > 0) { + for (size_t i = 0; i < nRows; i++) { + size_t newStart = count; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (vals[j] != Teuchos::ScalarTraits::zero()) { + cols[count] = cols[j]; + vals[count++] = vals[j]; } - rowPtr[i] = newStart; } - } else { - for (size_t i = 0; i < nRows; i++) { - size_t newStart = count; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if (keep[j] == true) { - cols[count++] = cols[j]; - } + rowPtr[i] = newStart; + } + } else { + for (size_t i = 0; i < nRows; i++) { + size_t newStart = count; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (keep[j] == true) { + cols[count++] = cols[j]; } - rowPtr[i] = newStart; } + rowPtr[i] = newStart; } - rowPtr[nRows] = count; } - - template - void VariableDofLaplacianFactory::buildPaddedMap(const Teuchos::ArrayRCP & dofPresent, std::vector & map, size_t nDofs) const { - size_t count = 0; - for (decltype(dofPresent.size()) i = 0; i < dofPresent.size(); i++) - if(dofPresent[i] == 1) map[count++] = Teuchos::as(i); - TEUCHOS_TEST_FOR_EXCEPTION(nDofs != count, MueLu::Exceptions::RuntimeError, "VariableDofLaplacianFactory::buildPaddedMap: #dofs in dofPresent does not match the expected value (number of rows of A): " << nDofs << " vs. " << count); + rowPtr[nRows] = count; +} + +template +void VariableDofLaplacianFactory::buildPaddedMap(const Teuchos::ArrayRCP& dofPresent, std::vector& map, size_t nDofs) const { + size_t count = 0; + for (decltype(dofPresent.size()) i = 0; i < dofPresent.size(); i++) + if (dofPresent[i] == 1) map[count++] = Teuchos::as(i); + TEUCHOS_TEST_FOR_EXCEPTION(nDofs != count, MueLu::Exceptions::RuntimeError, "VariableDofLaplacianFactory::buildPaddedMap: #dofs in dofPresent does not match the expected value (number of rows of A): " << nDofs << " vs. " << count); +} + +template +void VariableDofLaplacianFactory::assignGhostLocalNodeIds(const Teuchos::RCP& rowDofMap, const Teuchos::RCP& colDofMap, std::vector& myLocalNodeIds, const std::vector& dofMap, size_t maxDofPerNode, size_t& nLocalNodes, size_t& nLocalPlusGhostNodes, Teuchos::RCP > comm) const { + size_t nLocalDofs = rowDofMap->getLocalNumElements(); + size_t nLocalPlusGhostDofs = colDofMap->getLocalNumElements(); // TODO remove parameters + + // create importer for dof-based information + Teuchos::RCP importer = ImportFactory::Build(rowDofMap, colDofMap); + + // create a vector living on column map of A (dof based) + Teuchos::RCP localNodeIdsTemp = LOVectorFactory::Build(rowDofMap, true); + Teuchos::RCP localNodeIds = LOVectorFactory::Build(colDofMap, true); + + // fill local dofs (padded local ids) + { + Teuchos::ArrayRCP localNodeIdsTempData = localNodeIdsTemp->getDataNonConst(0); + for (size_t i = 0; i < localNodeIdsTemp->getLocalLength(); i++) + localNodeIdsTempData[i] = std::floor(dofMap[i] / maxDofPerNode); } - template - void VariableDofLaplacianFactory::assignGhostLocalNodeIds(const Teuchos::RCP & rowDofMap, const Teuchos::RCP & colDofMap, std::vector & myLocalNodeIds, const std::vector & dofMap, size_t maxDofPerNode, size_t& nLocalNodes, size_t& nLocalPlusGhostNodes, Teuchos::RCP< const Teuchos::Comm< int > > comm) const { + localNodeIds->doImport(*localNodeIdsTemp, *importer, Xpetra::INSERT); + Teuchos::ArrayRCP localNodeIdsData = localNodeIds->getData(0); - size_t nLocalDofs = rowDofMap->getLocalNumElements(); - size_t nLocalPlusGhostDofs = colDofMap->getLocalNumElements(); // TODO remove parameters + // Note: localNodeIds contains local ids for the padded version as vector values - // create importer for dof-based information - Teuchos::RCP importer = ImportFactory::Build(rowDofMap, colDofMap); + // we use Scalar instead of int as type + Teuchos::RCP myProcTemp = LOVectorFactory::Build(rowDofMap, true); + Teuchos::RCP myProc = LOVectorFactory::Build(colDofMap, true); - // create a vector living on column map of A (dof based) - Teuchos::RCP localNodeIdsTemp = LOVectorFactory::Build(rowDofMap,true); - Teuchos::RCP localNodeIds = LOVectorFactory::Build(colDofMap,true); - - // fill local dofs (padded local ids) - { - Teuchos::ArrayRCP< LocalOrdinal > localNodeIdsTempData = localNodeIdsTemp->getDataNonConst(0); - for(size_t i = 0; i < localNodeIdsTemp->getLocalLength(); i++) - localNodeIdsTempData[i] = std::floor( dofMap[i] / maxDofPerNode ); - } - - localNodeIds->doImport(*localNodeIdsTemp, *importer, Xpetra::INSERT); - Teuchos::ArrayRCP< const LocalOrdinal > localNodeIdsData = localNodeIds->getData(0); - - // Note: localNodeIds contains local ids for the padded version as vector values - - - // we use Scalar instead of int as type - Teuchos::RCP myProcTemp = LOVectorFactory::Build(rowDofMap,true); - Teuchos::RCP myProc = LOVectorFactory::Build(colDofMap,true); - - // fill local dofs (padded local ids) - { - Teuchos::ArrayRCP< LocalOrdinal > myProcTempData = myProcTemp->getDataNonConst(0); - for(size_t i = 0; i < myProcTemp->getLocalLength(); i++) - myProcTempData[i] = Teuchos::as(comm->getRank()); - } - myProc->doImport(*myProcTemp, *importer, Xpetra::INSERT); - Teuchos::ArrayRCP myProcData = myProc->getDataNonConst(0); // we have to modify the data (therefore the non-const version) - - // At this point, the ghost part of localNodeIds corresponds to the local ids - // associated with the current owning processor. We want to convert these to - // local ids associated with the processor on which these are ghosts. - // Thus we have to re-number them. In doing this re-numbering we must make sure - // that we find all ghosts with the same id & proc and assign a unique local - // id to this group (id&proc). To do this find, we sort all ghost entries in - // localNodeIds that are owned by the same processor. Then we can look for - // duplicates (i.e., several ghost entries corresponding to dofs with the same - // node id) easily and make sure these are all assigned to the same local id. - // To do the sorting we'll make a temporary copy of the ghosts via tempId and - // tempProc and sort this multiple times for each group owned by the same proc. - - - std::vector location(nLocalPlusGhostDofs - nLocalDofs + 1); - std::vector tempId (nLocalPlusGhostDofs - nLocalDofs + 1); - std::vector tempProc(nLocalPlusGhostDofs - nLocalDofs + 1); - - size_t notProcessed = nLocalDofs; // iteration index over all ghosted dofs - size_t tempIndex = 0; - size_t first = tempIndex; - LocalOrdinal neighbor; - - while (notProcessed < nLocalPlusGhostDofs) { - neighbor = myProcData[notProcessed]; // get processor id of not-processed element - first = tempIndex; - location[tempIndex] = notProcessed; - tempId[tempIndex++] = localNodeIdsData[notProcessed]; - myProcData[notProcessed] = -1 - neighbor; - - for(size_t i = notProcessed + 1; i < nLocalPlusGhostDofs; i++) { - if(myProcData[i] == neighbor) { - location[tempIndex] = i; - tempId[tempIndex++] = localNodeIdsData[i]; - myProcData[i] = -1; // mark as visited - } + // fill local dofs (padded local ids) + { + Teuchos::ArrayRCP myProcTempData = myProcTemp->getDataNonConst(0); + for (size_t i = 0; i < myProcTemp->getLocalLength(); i++) + myProcTempData[i] = Teuchos::as(comm->getRank()); + } + myProc->doImport(*myProcTemp, *importer, Xpetra::INSERT); + Teuchos::ArrayRCP myProcData = myProc->getDataNonConst(0); // we have to modify the data (therefore the non-const version) + + // At this point, the ghost part of localNodeIds corresponds to the local ids + // associated with the current owning processor. We want to convert these to + // local ids associated with the processor on which these are ghosts. + // Thus we have to re-number them. In doing this re-numbering we must make sure + // that we find all ghosts with the same id & proc and assign a unique local + // id to this group (id&proc). To do this find, we sort all ghost entries in + // localNodeIds that are owned by the same processor. Then we can look for + // duplicates (i.e., several ghost entries corresponding to dofs with the same + // node id) easily and make sure these are all assigned to the same local id. + // To do the sorting we'll make a temporary copy of the ghosts via tempId and + // tempProc and sort this multiple times for each group owned by the same proc. + + std::vector location(nLocalPlusGhostDofs - nLocalDofs + 1); + std::vector tempId(nLocalPlusGhostDofs - nLocalDofs + 1); + std::vector tempProc(nLocalPlusGhostDofs - nLocalDofs + 1); + + size_t notProcessed = nLocalDofs; // iteration index over all ghosted dofs + size_t tempIndex = 0; + size_t first = tempIndex; + LocalOrdinal neighbor; + + while (notProcessed < nLocalPlusGhostDofs) { + neighbor = myProcData[notProcessed]; // get processor id of not-processed element + first = tempIndex; + location[tempIndex] = notProcessed; + tempId[tempIndex++] = localNodeIdsData[notProcessed]; + myProcData[notProcessed] = -1 - neighbor; + + for (size_t i = notProcessed + 1; i < nLocalPlusGhostDofs; i++) { + if (myProcData[i] == neighbor) { + location[tempIndex] = i; + tempId[tempIndex++] = localNodeIdsData[i]; + myProcData[i] = -1; // mark as visited } - this->MueLu_az_sort(&(tempId[first]), tempIndex - first, &(location[first]), NULL); - for(size_t i = first; i < tempIndex; i++) tempProc[i] = neighbor; - - // increment index. Find next notProcessed dof index corresponding to first non-visited element - notProcessed++; - while ( (notProcessed < nLocalPlusGhostDofs) && (myProcData[notProcessed] < 0)) - notProcessed++; } - TEUCHOS_TEST_FOR_EXCEPTION(tempIndex != nLocalPlusGhostDofs-nLocalDofs, MueLu::Exceptions::RuntimeError,"Number of nonzero ghosts is inconsistent."); - - // Now assign ids to all ghost nodes (giving the same id to those with the - // same myProc[] and the same local id on the proc that actually owns the - // variable associated with the ghost - - nLocalNodes = 0; // initialize return value - if(nLocalDofs > 0) nLocalNodes = localNodeIdsData[nLocalDofs-1] + 1; + this->MueLu_az_sort(&(tempId[first]), tempIndex - first, &(location[first]), NULL); + for (size_t i = first; i < tempIndex; i++) tempProc[i] = neighbor; - nLocalPlusGhostNodes = nLocalNodes; // initialize return value - if(nLocalDofs < nLocalPlusGhostDofs) nLocalPlusGhostNodes++; // 1st ghost node is unique (not accounted for). number will be increased later, if there are more ghost nodes + // increment index. Find next notProcessed dof index corresponding to first non-visited element + notProcessed++; + while ((notProcessed < nLocalPlusGhostDofs) && (myProcData[notProcessed] < 0)) + notProcessed++; + } + TEUCHOS_TEST_FOR_EXCEPTION(tempIndex != nLocalPlusGhostDofs - nLocalDofs, MueLu::Exceptions::RuntimeError, "Number of nonzero ghosts is inconsistent."); - // check if two adjacent ghost dofs correspond to different nodes. To do this, - // check if they are from different processors or whether they have different - // local node ids + // Now assign ids to all ghost nodes (giving the same id to those with the + // same myProc[] and the same local id on the proc that actually owns the + // variable associated with the ghost - // loop over all (remaining) ghost dofs - for (size_t i = nLocalDofs+1; i < nLocalPlusGhostDofs; i++) { - size_t lagged = nLocalPlusGhostNodes-1; + nLocalNodes = 0; // initialize return value + if (nLocalDofs > 0) nLocalNodes = localNodeIdsData[nLocalDofs - 1] + 1; - // i is a new unique ghost node (not already accounted for) - if ((tempId[i-nLocalDofs] != tempId[i-1-nLocalDofs]) || - (tempProc[i-nLocalDofs] != tempProc[i-1-nLocalDofs])) - nLocalPlusGhostNodes++; // update number of ghost nodes - tempId[i-1-nLocalDofs] = lagged; - } - if (nLocalPlusGhostDofs > nLocalDofs) - tempId[nLocalPlusGhostDofs-1-nLocalDofs] = nLocalPlusGhostNodes - 1; + nLocalPlusGhostNodes = nLocalNodes; // initialize return value + if (nLocalDofs < nLocalPlusGhostDofs) nLocalPlusGhostNodes++; // 1st ghost node is unique (not accounted for). number will be increased later, if there are more ghost nodes - // fill myLocalNodeIds array. Start with local part (not ghosted) - for(size_t i = 0; i < nLocalDofs; i++) - myLocalNodeIds[i] = std::floor( dofMap[i] / maxDofPerNode ); + // check if two adjacent ghost dofs correspond to different nodes. To do this, + // check if they are from different processors or whether they have different + // local node ids - // copy ghosted nodal ids into myLocalNodeIds - for(size_t i = nLocalDofs; i < nLocalPlusGhostDofs; i++) - myLocalNodeIds[location[i-nLocalDofs]] = tempId[i-nLocalDofs]; + // loop over all (remaining) ghost dofs + for (size_t i = nLocalDofs + 1; i < nLocalPlusGhostDofs; i++) { + size_t lagged = nLocalPlusGhostNodes - 1; + // i is a new unique ghost node (not already accounted for) + if ((tempId[i - nLocalDofs] != tempId[i - 1 - nLocalDofs]) || + (tempProc[i - nLocalDofs] != tempProc[i - 1 - nLocalDofs])) + nLocalPlusGhostNodes++; // update number of ghost nodes + tempId[i - 1 - nLocalDofs] = lagged; } + if (nLocalPlusGhostDofs > nLocalDofs) + tempId[nLocalPlusGhostDofs - 1 - nLocalDofs] = nLocalPlusGhostNodes - 1; + + // fill myLocalNodeIds array. Start with local part (not ghosted) + for (size_t i = 0; i < nLocalDofs; i++) + myLocalNodeIds[i] = std::floor(dofMap[i] / maxDofPerNode); -} /* MueLu */ + // copy ghosted nodal ids into myLocalNodeIds + for (size_t i = nLocalDofs; i < nLocalPlusGhostDofs; i++) + myLocalNodeIds[location[i - nLocalDofs]] = tempId[i - nLocalDofs]; +} +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp index 8a78e56cd9ca..b181a7176ac0 100644 --- a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp +++ b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp @@ -57,42 +57,40 @@ namespace MueLu { - /*! +/*! @class AggregationAlgorithmBase @brief Pure virtual base class for all MueLu aggregation algorithms @ingroup MueLuBaseClasses */ - template - class AggregationAlgorithmBase : public BaseClass { +template +class AggregationAlgorithmBase : public BaseClass { #undef MUELU_AGGREGATIONALGORITHMBASE_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + //! @name Constructors/Destructors + //@{ - //! @name Constructors/Destructors - //@{ + //! Destructor. + virtual ~AggregationAlgorithmBase() {} - //! Destructor. - virtual ~AggregationAlgorithmBase() {} + //@} - //@} + //! @name Build routines + //@{ - //! @name Build routines - //@{ + //! BuildAggregates routine. + virtual void BuildAggregates(const Teuchos::ParameterList& params, + const GraphBase& graph, + Aggregates& aggregates, + std::vector& aggStat, + LO& numNonAggregatedNodes) const = 0; + //@} +}; - //! BuildAggregates routine. - virtual void BuildAggregates(const Teuchos::ParameterList& params, - const GraphBase& graph, - Aggregates& aggregates, - std::vector& aggStat, - LO& numNonAggregatedNodes) const = 0; - //@} - - }; - -} // namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONALGORITHMBASE_SHORT #endif /* MUELU_AGGREGATIONALGORITHMBASE_HPP_ */ diff --git a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp index e758bdf1c84c..d76eb9f857f7 100644 --- a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp +++ b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp @@ -59,42 +59,42 @@ namespace MueLu { - /*! +/*! @class AggregationAlgorithmBase @brief Pure virtual base class for all MueLu aggregation algorithms @ingroup MueLuBaseClasses */ - template - class AggregationAlgorithmBase_kokkos : public BaseClass { +template +class AggregationAlgorithmBase_kokkos : public BaseClass { #undef MUELU_AGGREGATIONALGORITHMBASE_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; + public: + using device_type = typename LWGraph_kokkos::device_type; - //! @name Constructors/Destructors - //@{ + //! @name Constructors/Destructors + //@{ - //! Destructor. - virtual ~AggregationAlgorithmBase_kokkos() {} + //! Destructor. + virtual ~AggregationAlgorithmBase_kokkos() {} - //@} + //@} - //! @name Build routines - //@{ + //! @name Build routines + //@{ - //! BuildAggregates routine. - virtual void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const = 0; - //@} - }; + //! BuildAggregates routine. + virtual void BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const = 0; + //@} +}; -} // namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONALGORITHMBASE_KOKKOS_SHORT -#endif // MUELU_AGGREGATIONALGORITHMBASE_KOKKOS_HPP +#endif // MUELU_AGGREGATIONALGORITHMBASE_KOKKOS_HPP diff --git a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp index ccdf3e3a7782..7252edd8ea03 100644 --- a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp @@ -48,11 +48,9 @@ #include "MueLu_ConfigDefs.hpp" - #include #include - #include #include "MueLu_GraphBase_fwd.hpp" @@ -67,15 +65,15 @@ namespace MueLu { -template +template class NotayAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_NOTAYAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" -public: + public: //! @name typedefs //@{ using local_matrix_type = typename Matrix::local_matrix_type; @@ -86,15 +84,14 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { using row_sum_type = typename Kokkos::View; //@} - //! @name Constructors/Destructors. //@{ //! Constructor. - NotayAggregationFactory() { }; + NotayAggregationFactory(){}; //! Destructor. - virtual ~NotayAggregationFactory() { } + virtual ~NotayAggregationFactory() {} RCP GetValidParameterList() const; @@ -108,7 +105,7 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { //! Input //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level& currentLevel) const; //@} @@ -116,7 +113,7 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { //@{ /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + void Build(Level& currentLevel) const; /*! @brief Initial aggregation phase. */ void BuildInitialAggregates(const Teuchos::ParameterList& params, @@ -131,7 +128,7 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { /*! @brief Further aggregation phase increases coarsening rate by a factor of ~2 per iteration. */ void BuildFurtherAggregates(const Teuchos::ParameterList& params, const RCP& A, - const Teuchos::ArrayView & orderingVector, + const Teuchos::ArrayView& orderingVector, const local_matrix_type& coarseA, const magnitude_type kappa, const row_sum_type& rowSum, @@ -160,13 +157,12 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { const std::string matrixLabel, local_matrix_type& C) const; - //@} -private: -}; // class NotayAggregationFactory + private: +}; // class NotayAggregationFactory -} +} // namespace MueLu #define MUELU_NOTAYAGGREGATIONFACTORY_SHORT #endif /* MUELU_NOTAYAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp index b84022432e41..46d6ab0d49af 100644 --- a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp @@ -66,880 +66,865 @@ #include "MueLu_Types.hpp" #include "MueLu_Utilities.hpp" - namespace MueLu { - namespace NotayUtils { - template - LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max) { - return min + as((max-min+1) * (static_cast(std::rand()) / (RAND_MAX + 1.0))); - } - - template - void RandomReorder(Teuchos::Array & list) { - typedef LocalOrdinal LO; - LO n = Teuchos::as(list.size()); - for(LO i = 0; i < n-1; i++) - std::swap(list[i], list[RandomOrdinal(i,n-1)]); - } - } - - template - RCP NotayAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - +namespace NotayUtils { +template +LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max) { + return min + as((max - min + 1) * (static_cast(std::rand()) / (RAND_MAX + 1.0))); +} + +template +void RandomReorder(Teuchos::Array& list) { + typedef LocalOrdinal LO; + LO n = Teuchos::as(list.size()); + for (LO i = 0; i < n - 1; i++) + std::swap(list[i], list[RandomOrdinal(i, n - 1)]); +} +} // namespace NotayUtils + +template +RCP NotayAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: pairwise: size"); - SET_VALID_ENTRY("aggregation: pairwise: tie threshold"); - SET_VALID_ENTRY("aggregation: compute aggregate qualities"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: ordering"); + SET_VALID_ENTRY("aggregation: pairwise: size"); + SET_VALID_ENTRY("aggregation: pairwise: tie threshold"); + SET_VALID_ENTRY("aggregation: compute aggregate qualities"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: ordering"); #undef SET_VALID_ENTRY - // general variables needed in AggregationFactory - validParamList->set< RCP >("A", null, "Generating factory of the matrix"); - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - validParamList->set< RCP >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); + // general variables needed in AggregationFactory + validParamList->set >("A", null, "Generating factory of the matrix"); + validParamList->set >("Graph", null, "Generating factory of the graph"); + validParamList->set >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); + validParamList->set >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); + return validParamList; +} - return validParamList; - } - - template - void NotayAggregationFactory::DeclareInput(Level& currentLevel) const { - const ParameterList& pL = GetParameterList(); - - Input(currentLevel, "A"); - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - if (pL.get("aggregation: compute aggregate qualities")) { - Input(currentLevel, "AggregateQualities"); - } - +template +void NotayAggregationFactory::DeclareInput(Level& currentLevel) const { + const ParameterList& pL = GetParameterList(); + Input(currentLevel, "A"); + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + if (pL.get("aggregation: compute aggregate qualities")) { + Input(currentLevel, "AggregateQualities"); + } +} + +template +void NotayAggregationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + using STS = Teuchos::ScalarTraits; + using MT = typename STS::magnitudeType; + + const MT MT_TWO = Teuchos::ScalarTraits::one() + Teuchos::ScalarTraits::one(); + + RCP out; + if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } + const ParameterList& pL = GetParameterList(); + + const MT kappa = static_cast(pL.get("aggregation: Dirichlet threshold")); + TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO, + Exceptions::RuntimeError, + "Pairwise requires kappa > 2" + " otherwise all rows are considered as Dirichlet rows."); + + // Parameters + int maxNumIter = 3; + if (pL.isParameter("aggregation: pairwise: size")) + maxNumIter = pL.get("aggregation: pairwise: size"); + TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1, + Exceptions::RuntimeError, + "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\"" + " must be a strictly positive integer"); + + RCP graph = Get >(currentLevel, "Graph"); + RCP A = Get >(currentLevel, "A"); + + // Setup aggregates & aggStat objects + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("PW"); + + const LO numRows = graph->GetNodeNumVertices(); + + // construct aggStat information + std::vector aggStat(numRows, READY); + + const int DofsPerNode = Get(currentLevel, "DofsPerNode"); + TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError, + "Pairwise only supports one dof per node"); + + // This follows the paper: + // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations", + // SISC 34(3), pp. A2288-2316. + + // Handle Ordering + std::string orderingStr = pL.get("aggregation: ordering"); + enum { + O_NATURAL, + O_RANDOM, + O_CUTHILL_MCKEE, + } ordering; + + ordering = O_NATURAL; + if (orderingStr == "random") + ordering = O_RANDOM; + else if (orderingStr == "natural") { + } else if (orderingStr == "cuthill-mckee" || orderingStr == "cm") + ordering = O_CUTHILL_MCKEE; + else { + TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "Invalid ordering type"); + } - template - void NotayAggregationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - using STS = Teuchos::ScalarTraits; - using MT = typename STS::magnitudeType; - - const MT MT_TWO = Teuchos::ScalarTraits::one() + Teuchos::ScalarTraits::one(); - - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - const ParameterList& pL = GetParameterList(); - - const MT kappa = static_cast(pL.get("aggregation: Dirichlet threshold")); - TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO, - Exceptions::RuntimeError, - "Pairwise requires kappa > 2" - " otherwise all rows are considered as Dirichlet rows."); - - // Parameters - int maxNumIter = 3; - if (pL.isParameter("aggregation: pairwise: size")) - maxNumIter = pL.get("aggregation: pairwise: size"); - TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1, - Exceptions::RuntimeError, - "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\"" - " must be a strictly positive integer"); - - - RCP graph = Get< RCP >(currentLevel, "Graph"); - RCP A = Get< RCP >(currentLevel, "A"); - - // Setup aggregates & aggStat objects - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("PW"); - - const LO numRows = graph->GetNodeNumVertices(); - - // construct aggStat information - std::vector aggStat(numRows, READY); - - - const int DofsPerNode = Get(currentLevel,"DofsPerNode"); - TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError, - "Pairwise only supports one dof per node"); + // Get an ordering vector + // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns + // will get ignored in the aggregation phases, so we don't need to worry about + // running off the end. + Array orderingVector(numRows); + for (LO i = 0; i < numRows; i++) + orderingVector[i] = i; + if (ordering == O_RANDOM) + MueLu::NotayUtils::RandomReorder(orderingVector); + else if (ordering == O_CUTHILL_MCKEE) { + RCP > rcmVector = MueLu::Utilities::CuthillMcKee(*A); + auto localVector = rcmVector->getData(0); + for (LO i = 0; i < numRows; i++) + orderingVector[i] = localVector[i]; + } - // This follows the paper: - // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations", - // SISC 34(3), pp. A2288-2316. + // Get the party stated + LO numNonAggregatedNodes = numRows, numDirichletNodes = 0; + BuildInitialAggregates(pL, A, orderingVector(), kappa, + *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes); + TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError, + "Initial pairwise aggregation failed to aggregate all nodes"); + LO numLocalAggregates = aggregates->GetNumAggregates(); + GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - " + << A->getLocalNumRows() / numLocalAggregates << std::endl; + + // Temporary data storage for further aggregation steps + local_matrix_type intermediateP; + local_matrix_type coarseLocalA; + + // Compute the on rank part of the local matrix + // that the square submatrix that only contains + // columns corresponding to local rows. + LO numLocalDirichletNodes = numDirichletNodes; + Array localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows)); + BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA); + for (LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) { + // Compute the intermediate prolongator + BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates, + localVertex2AggId(), intermediateP); + + // Compute the coarse local matrix and coarse row sum + BuildCoarseLocalMatrix(intermediateP, coarseLocalA); + + // Directly compute rowsum from A, rather than coarseA + row_sum_type rowSum("rowSum", numLocalAggregates); + { + std::vector > agg2vertex(numLocalAggregates); + auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0); + for (LO i = 0; i < (LO)numRows; i++) { + if (aggStat[i] != AGGREGATED) + continue; + LO agg = vertex2AggId[i]; + agg2vertex[agg].push_back(i); + } - // Handle Ordering - std::string orderingStr = pL.get("aggregation: ordering"); - enum { - O_NATURAL, - O_RANDOM, - O_CUTHILL_MCKEE, - } ordering; + typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum); + for (LO i = 0; i < numRows; i++) { + // If not aggregated already, skip this guy + if (aggStat[i] != AGGREGATED) + continue; + int agg = vertex2AggId[i]; + std::vector& myagg = agg2vertex[agg]; + + size_t nnz = A->getNumEntriesInLocalRow(i); + ArrayView indices; + ArrayView vals; + A->getLocalRowView(i, indices, vals); + + SC mysum = Teuchos::ScalarTraits::zero(); + for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { + bool found = false; + if (indices[colidx] < numRows) { + for (LO j = 0; j < (LO)myagg.size(); j++) + if (vertex2AggId[indices[colidx]] == agg) + found = true; + } + if (!found) { + *out << "- ADDING col " << indices[colidx] << " = " << vals[colidx] << std::endl; + mysum += vals[colidx]; + } else { + *out << "- NOT ADDING col " << indices[colidx] << " = " << vals[colidx] << std::endl; + } + } - ordering = O_NATURAL; - if (orderingStr == "random" ) ordering = O_RANDOM; - else if(orderingStr == "natural") {} - else if(orderingStr == "cuthill-mckee" || orderingStr == "cm") ordering = O_CUTHILL_MCKEE; - else { - TEUCHOS_TEST_FOR_EXCEPTION(1,Exceptions::RuntimeError,"Invalid ordering type"); + rowSum_h[agg] = mysum; + } + Kokkos::deep_copy(rowSum, rowSum_h); } - // Get an ordering vector - // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns - // will get ignored in the aggregation phases, so we don't need to worry about - // running off the end. - Array orderingVector(numRows); + // Get local orderingVector + Array localOrderingVector(numRows); for (LO i = 0; i < numRows; i++) - orderingVector[i] = i; + localOrderingVector[i] = i; if (ordering == O_RANDOM) - MueLu::NotayUtils::RandomReorder(orderingVector); + MueLu::NotayUtils::RandomReorder(localOrderingVector); else if (ordering == O_CUTHILL_MCKEE) { - RCP > rcmVector = MueLu::Utilities::CuthillMcKee(*A); - auto localVector = rcmVector->getData(0); + RCP > rcmVector = MueLu::Utilities::CuthillMcKee(*A); + auto localVector = rcmVector->getData(0); for (LO i = 0; i < numRows; i++) - orderingVector[i] = localVector[i]; + localOrderingVector[i] = localVector[i]; } - // Get the party stated - LO numNonAggregatedNodes = numRows, numDirichletNodes = 0; - BuildInitialAggregates(pL, A, orderingVector(), kappa, - *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes); - TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError, - "Initial pairwise aggregation failed to aggregate all nodes"); - LO numLocalAggregates = aggregates->GetNumAggregates(); - GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - " - << A->getLocalNumRows() / numLocalAggregates << std::endl; - - // Temporary data storage for further aggregation steps - local_matrix_type intermediateP; - local_matrix_type coarseLocalA; - - // Compute the on rank part of the local matrix - // that the square submatrix that only contains - // columns corresponding to local rows. - LO numLocalDirichletNodes = numDirichletNodes; - Array localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows)); - BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA); - for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) { - // Compute the intermediate prolongator - BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates, - localVertex2AggId(), intermediateP); - - // Compute the coarse local matrix and coarse row sum - BuildCoarseLocalMatrix(intermediateP, coarseLocalA); - - // Directly compute rowsum from A, rather than coarseA - row_sum_type rowSum("rowSum", numLocalAggregates); - { - std::vector > agg2vertex(numLocalAggregates); - auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0); - for(LO i=0; i<(LO)numRows; i++) { - if(aggStat[i] != AGGREGATED) - continue; - LO agg=vertex2AggId[i]; - agg2vertex[agg].push_back(i); - } - - typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum); - for(LO i = 0; i < numRows; i++) { - // If not aggregated already, skip this guy - if(aggStat[i] != AGGREGATED) - continue; - int agg = vertex2AggId[i]; - std::vector & myagg = agg2vertex[agg]; - - size_t nnz = A->getNumEntriesInLocalRow(i); - ArrayView indices; - ArrayView vals; - A->getLocalRowView(i, indices, vals); - - SC mysum = Teuchos::ScalarTraits::zero(); - for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { - bool found = false; - if(indices[colidx] < numRows) { - for(LO j=0; j<(LO)myagg.size(); j++) - if (vertex2AggId[indices[colidx]] == agg) - found=true; - } - if(!found) { - *out << "- ADDING col "<getLocalNumRows() / numLocalAggregates << std::endl; } - aggregates->SetNumAggregates(numLocalAggregates); - aggregates->AggregatesCrossProcessors(false); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); - // DO stuff - Set(currentLevel, "Aggregates", aggregates); - GetOStream(Statistics0) << aggregates->description() << std::endl; + // We could probably print some better statistics at some point + GetOStream(Statistics0) << "Iter " << aggregationIter << ": " << numLocalAggregates << " - " + << A->getLocalNumRows() / numLocalAggregates << std::endl; + } + aggregates->SetNumAggregates(numLocalAggregates); + aggregates->AggregatesCrossProcessors(false); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + + // DO stuff + Set(currentLevel, "Aggregates", aggregates); + GetOStream(Statistics0) << aggregates->description() << std::endl; +} + +template +void NotayAggregationFactory:: + BuildInitialAggregates(const Teuchos::ParameterList& params, + const RCP& A, + const Teuchos::ArrayView& orderingVector, + const typename Teuchos::ScalarTraits::magnitudeType kappa, + Aggregates& aggregates, + std::vector& aggStat, + LO& numNonAggregatedNodes, + LO& numDirichletNodes) const { + Monitor m(*this, "BuildInitialAggregates"); + using STS = Teuchos::ScalarTraits; + using MT = typename STS::magnitudeType; + using RealValuedVector = Xpetra::Vector; + + RCP out; + if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - - template - void NotayAggregationFactory:: - BuildInitialAggregates(const Teuchos::ParameterList& params, - const RCP& A, - const Teuchos::ArrayView & orderingVector, - const typename Teuchos::ScalarTraits::magnitudeType kappa, - Aggregates& aggregates, - std::vector& aggStat, - LO& numNonAggregatedNodes, - LO& numDirichletNodes) const { - - Monitor m(*this, "BuildInitialAggregates"); - using STS = Teuchos::ScalarTraits; - using MT = typename STS::magnitudeType; - using RealValuedVector = Xpetra::Vector; - - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - - const SC SC_ZERO = Teuchos::ScalarTraits::zero(); - const MT MT_ZERO = Teuchos::ScalarTraits::zero(); - const MT MT_ONE = Teuchos::ScalarTraits::one(); - const MT MT_TWO = MT_ONE + MT_ONE; - const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); - const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); - - const MT kappa_init = kappa / (kappa - MT_TWO); - const LO numRows = aggStat.size(); - const int myRank = A->getMap()->getComm()->getRank(); - - // For finding "ties" where we fall back to the ordering. Making this larger than - // hard zero substantially increases code robustness. - double tie_criterion = params.get("aggregation: pairwise: tie threshold"); - double tie_less = 1.0 - tie_criterion; - double tie_more = 1.0 + tie_criterion; - - // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(), - // and so we're not doing again here. - // This should probably be fixed at some point. - - // Extract diagonal, rowsums, etc - // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S - RCP ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); - RCP ghostedRowSum = MueLu::Utilities::GetMatrixOverlappedDeletedRowsum(*A); - RCP ghostedAbsRowSum = MueLu::Utilities::GetMatrixOverlappedAbsDeletedRowsum(*A); - const ArrayRCP D = ghostedDiag->getData(0); - const ArrayRCP S = ghostedRowSum->getData(0); - const ArrayRCP AbsRs = ghostedAbsRowSum->getData(0); - - // Aggregates stuff - ArrayRCP vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner_rcp = aggregates.GetProcWinner() ->getDataNonConst(0); - ArrayView vertex2AggId = vertex2AggId_rcp(); - ArrayView procWinner = procWinner_rcp(); - - // Algorithm 4.2 - - // 0,1 : Initialize: Flag boundary conditions - // Modification: We assume symmetry here aij = aji - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - MT aii = STS::magnitude(D[row]); - MT rowsum = AbsRs[row]; - - if(aii >= kappa_init * rowsum) { - *out << "Flagging index " << row << " as dirichlet " - "aii >= kappa*rowsum = " << aii << " >= " << kappa_init << " " << rowsum << std::endl; - aggStat[row] = IGNORED; - --numNonAggregatedNodes; - ++numDirichletNodes; - } + const SC SC_ZERO = Teuchos::ScalarTraits::zero(); + const MT MT_ZERO = Teuchos::ScalarTraits::zero(); + const MT MT_ONE = Teuchos::ScalarTraits::one(); + const MT MT_TWO = MT_ONE + MT_ONE; + const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); + + const MT kappa_init = kappa / (kappa - MT_TWO); + const LO numRows = aggStat.size(); + const int myRank = A->getMap()->getComm()->getRank(); + + // For finding "ties" where we fall back to the ordering. Making this larger than + // hard zero substantially increases code robustness. + double tie_criterion = params.get("aggregation: pairwise: tie threshold"); + double tie_less = 1.0 - tie_criterion; + double tie_more = 1.0 + tie_criterion; + + // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(), + // and so we're not doing again here. + // This should probably be fixed at some point. + + // Extract diagonal, rowsums, etc + // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S + RCP ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); + RCP ghostedRowSum = MueLu::Utilities::GetMatrixOverlappedDeletedRowsum(*A); + RCP ghostedAbsRowSum = MueLu::Utilities::GetMatrixOverlappedAbsDeletedRowsum(*A); + const ArrayRCP D = ghostedDiag->getData(0); + const ArrayRCP S = ghostedRowSum->getData(0); + const ArrayRCP AbsRs = ghostedAbsRowSum->getData(0); + + // Aggregates stuff + ArrayRCP vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner_rcp = aggregates.GetProcWinner()->getDataNonConst(0); + ArrayView vertex2AggId = vertex2AggId_rcp(); + ArrayView procWinner = procWinner_rcp(); + + // Algorithm 4.2 + + // 0,1 : Initialize: Flag boundary conditions + // Modification: We assume symmetry here aij = aji + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { + MT aii = STS::magnitude(D[row]); + MT rowsum = AbsRs[row]; + + if (aii >= kappa_init * rowsum) { + *out << "Flagging index " << row << " as dirichlet " + "aii >= kappa*rowsum = " + << aii << " >= " << kappa_init << " " << rowsum << std::endl; + aggStat[row] = IGNORED; + --numNonAggregatedNodes; + ++numDirichletNodes; } + } - - // 2 : Iteration - LO aggIndex = LO_ZERO; - for(LO i = 0; i < numRows; i++) { - LO current_idx = orderingVector[i]; - // If we're aggregated already, skip this guy - if(aggStat[current_idx] != READY) + // 2 : Iteration + LO aggIndex = LO_ZERO; + for (LO i = 0; i < numRows; i++) { + LO current_idx = orderingVector[i]; + // If we're aggregated already, skip this guy + if (aggStat[current_idx] != READY) + continue; + + MT best_mu = MT_ZERO; + LO best_idx = LO_INVALID; + + size_t nnz = A->getNumEntriesInLocalRow(current_idx); + ArrayView indices; + ArrayView vals; + A->getLocalRowView(current_idx, indices, vals); + + MT aii = STS::real(D[current_idx]); + MT si = STS::real(S[current_idx]); + for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { + // Skip aggregated neighbors, off-rank neighbors, hard zeros and self + LO col = indices[colidx]; + SC val = vals[colidx]; + if (current_idx == col || col >= numRows || aggStat[col] != READY || val == SC_ZERO) continue; - MT best_mu = MT_ZERO; - LO best_idx = LO_INVALID; - - size_t nnz = A->getNumEntriesInLocalRow(current_idx); - ArrayView indices; - ArrayView vals; - A->getLocalRowView(current_idx, indices, vals); - - MT aii = STS::real(D[current_idx]); - MT si = STS::real(S[current_idx]); - for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { - // Skip aggregated neighbors, off-rank neighbors, hard zeros and self - LO col = indices[colidx]; - SC val = vals[colidx]; - if(current_idx == col || col >= numRows || aggStat[col] != READY || val == SC_ZERO) - continue; - - MT aij = STS::real(val); - MT ajj = STS::real(D[col]); - MT sj = - STS::real(S[col]); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S - if(aii - si + ajj - sj >= MT_ZERO) { - // Modification: We assume symmetry here aij = aji - MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj); - MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) ); - MT mu = mu_top / mu_bottom; - - // Modification: Explicitly check the tie criterion here - if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less || - (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) { - best_mu = mu; - best_idx = col; - *out << "[" << current_idx << "] Column UPDATED " << col << ": " - << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj - << " = " << aii - si + ajj - sj<< ", aij = "<= MT_ZERO) { + // Modification: We assume symmetry here aij = aji + MT mu_top = MT_TWO / (MT_ONE / aii + MT_ONE / ajj); + MT mu_bottom = -aij + MT_ONE / (MT_ONE / (aii - si) + MT_ONE / (ajj - sj)); + MT mu = mu_top / mu_bottom; + + // Modification: Explicitly check the tie criterion here + if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less || + (mu < best_mu * tie_more && orderingVector[col] < orderingVector[best_idx]))) { + best_mu = mu; + best_idx = col; + *out << "[" << current_idx << "] Column UPDATED " << col << ": " << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj - << " = " << aii - si + ajj - sj << ", aij = "<(vertex2AggId.size()); ++i) { - *out << i << "(" << vertex2AggId[i] << ")"; - } - *out << std::endl; - - // update aggregate object - aggregates.SetNumAggregates(aggIndex); - } // BuildInitialAggregates - - template - void NotayAggregationFactory:: - BuildFurtherAggregates(const Teuchos::ParameterList& params, - const RCP& A, - const Teuchos::ArrayView & orderingVector, - const typename Matrix::local_matrix_type& coarseA, - const typename Teuchos::ScalarTraits::magnitudeType kappa, - const Kokkos::View::val_type*, - Kokkos::LayoutLeft, - typename Matrix::local_matrix_type::device_type>& rowSum, - std::vector& localAggStat, - Teuchos::Array& localVertex2AggID, - LO& numLocalAggregates, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildFurtherAggregates"); - - // Set debug outputs based on environment variable - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } + aggStat[current_idx] = ONEPT; + vertex2AggId[current_idx] = aggIndex; + procWinner[current_idx] = myRank; + numNonAggregatedNodes--; + aggIndex++; + } // best_mu + } // best_idx + } // end Algorithm 4.2 - using value_type = typename local_matrix_type::value_type; - const value_type KAT_zero = Kokkos::ArithTraits::zero(); - const magnitude_type MT_zero = Teuchos::ScalarTraits::zero(); - const magnitude_type MT_one = Teuchos::ScalarTraits::one(); - const magnitude_type MT_two = MT_one + MT_one; - const LO LO_INVALID = Teuchos::OrdinalTraits::invalid() ; - - // For finding "ties" where we fall back to the ordering. Making this larger than - // hard zero substantially increases code robustness. - double tie_criterion = params.get("aggregation: pairwise: tie threshold"); - double tie_less = 1.0 - tie_criterion; - double tie_more = 1.0 + tie_criterion; - - typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum); - Kokkos::deep_copy(rowSum_h, rowSum); - - // Extracting the diagonal of a KokkosSparse::CrsMatrix - // is not currently provided in kokkos-kernels so here - // is an ugly way to get that done... - const LO numRows = static_cast(coarseA.numRows()); - typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows); - typename local_matrix_type::row_map_type::HostMirror row_map_h - = Kokkos::create_mirror_view(coarseA.graph.row_map); - Kokkos::deep_copy(row_map_h, coarseA.graph.row_map); - typename local_matrix_type::index_type::HostMirror entries_h - = Kokkos::create_mirror_view(coarseA.graph.entries); - Kokkos::deep_copy(entries_h, coarseA.graph.entries); - typename local_matrix_type::values_type::HostMirror values_h - = Kokkos::create_mirror_view(coarseA.values); - Kokkos::deep_copy(values_h, coarseA.values); - for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) { - for(LO entryIdx = static_cast(row_map_h(rowIdx)); - entryIdx < static_cast(row_map_h(rowIdx + 1)); - ++entryIdx) { - if(rowIdx == static_cast(entries_h(entryIdx))) { - diagA_h(rowIdx) = values_h(entryIdx); - } + *out << "vertex2aggid :"; + for (int i = 0; i < static_cast(vertex2AggId.size()); ++i) { + *out << i << "(" << vertex2AggId[i] << ")"; + } + *out << std::endl; + + // update aggregate object + aggregates.SetNumAggregates(aggIndex); +} // BuildInitialAggregates + +template +void NotayAggregationFactory:: + BuildFurtherAggregates(const Teuchos::ParameterList& params, + const RCP& A, + const Teuchos::ArrayView& orderingVector, + const typename Matrix::local_matrix_type& coarseA, + const typename Teuchos::ScalarTraits::magnitudeType kappa, + const Kokkos::View::val_type*, + Kokkos::LayoutLeft, + typename Matrix::local_matrix_type::device_type>& rowSum, + std::vector& localAggStat, + Teuchos::Array& localVertex2AggID, + LO& numLocalAggregates, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildFurtherAggregates"); + + // Set debug outputs based on environment variable + RCP out; + if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + using value_type = typename local_matrix_type::value_type; + const value_type KAT_zero = Kokkos::ArithTraits::zero(); + const magnitude_type MT_zero = Teuchos::ScalarTraits::zero(); + const magnitude_type MT_one = Teuchos::ScalarTraits::one(); + const magnitude_type MT_two = MT_one + MT_one; + const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + // For finding "ties" where we fall back to the ordering. Making this larger than + // hard zero substantially increases code robustness. + double tie_criterion = params.get("aggregation: pairwise: tie threshold"); + double tie_less = 1.0 - tie_criterion; + double tie_more = 1.0 + tie_criterion; + + typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum); + Kokkos::deep_copy(rowSum_h, rowSum); + + // Extracting the diagonal of a KokkosSparse::CrsMatrix + // is not currently provided in kokkos-kernels so here + // is an ugly way to get that done... + const LO numRows = static_cast(coarseA.numRows()); + typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows); + typename local_matrix_type::row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(coarseA.graph.row_map); + Kokkos::deep_copy(row_map_h, coarseA.graph.row_map); + typename local_matrix_type::index_type::HostMirror entries_h = Kokkos::create_mirror_view(coarseA.graph.entries); + Kokkos::deep_copy(entries_h, coarseA.graph.entries); + typename local_matrix_type::values_type::HostMirror values_h = Kokkos::create_mirror_view(coarseA.values); + Kokkos::deep_copy(values_h, coarseA.values); + for (LO rowIdx = 0; rowIdx < numRows; ++rowIdx) { + for (LO entryIdx = static_cast(row_map_h(rowIdx)); + entryIdx < static_cast(row_map_h(rowIdx + 1)); + ++entryIdx) { + if (rowIdx == static_cast(entries_h(entryIdx))) { + diagA_h(rowIdx) = values_h(entryIdx); } } + } + + for (LO currentIdx = 0; currentIdx < numRows; ++currentIdx) { + if (localAggStat[currentIdx] != READY) { + continue; + } - for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) { - if(localAggStat[currentIdx] != READY) { + LO bestIdx = Teuchos::OrdinalTraits::invalid(); + magnitude_type best_mu = Teuchos::ScalarTraits::zero(); + const magnitude_type aii = Teuchos::ScalarTraits::real(diagA_h(currentIdx)); + const magnitude_type si = Teuchos::ScalarTraits::real(rowSum_h(currentIdx)); + for (auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) { + const LO colIdx = static_cast(entries_h(entryIdx)); + if (currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) { continue; } - LO bestIdx = Teuchos::OrdinalTraits::invalid(); - magnitude_type best_mu = Teuchos::ScalarTraits::zero(); - const magnitude_type aii = Teuchos::ScalarTraits::real(diagA_h(currentIdx)); - const magnitude_type si = Teuchos::ScalarTraits::real(rowSum_h(currentIdx)); - for(auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) { - const LO colIdx = static_cast(entries_h(entryIdx)); - if(currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) { - continue; - } - - const magnitude_type aij = Teuchos::ScalarTraits::real(values_h(entryIdx)); - const magnitude_type ajj = Teuchos::ScalarTraits::real(diagA_h(colIdx)); - const magnitude_type sj = - Teuchos::ScalarTraits::real(rowSum_h(colIdx)); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S - if(aii - si + ajj - sj >= MT_zero) { - const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj ); - const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj)); - const magnitude_type mu = mu_top / mu_bottom; - - // Modification: Explicitly check the tie criterion here - if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less || - (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) { - best_mu = mu; - bestIdx = colIdx; - *out << "[" << currentIdx << "] Column UPDATED " << colIdx << ": " - << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj - << " = " << aii - si + ajj - sj << ", aij = "< - void NotayAggregationFactory:: - BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA, - typename Matrix::local_matrix_type& onrankA) const { - Monitor m(*this, "BuildOnRankLocalMatrix"); - - // Set debug outputs based on environment variable - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } + } // end loop over matrix rows + +} // BuildFurtherAggregates + +template +void NotayAggregationFactory:: + BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA, + typename Matrix::local_matrix_type& onrankA) const { + Monitor m(*this, "BuildOnRankLocalMatrix"); + + // Set debug outputs based on environment variable + RCP out; + if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - // Extract on rank part of A - // Simply check that the column index is less than the number of local rows - // otherwise remove it. - - const int numRows = static_cast(localA.numRows()); - row_pointer_type rowPtr("onrankA row pointer", numRows + 1); - typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr); - typename local_graph_type::row_map_type::HostMirror origRowPtr_h - = Kokkos::create_mirror_view(localA.graph.row_map); - typename local_graph_type::entries_type::HostMirror origColind_h - = Kokkos::create_mirror_view(localA.graph.entries); - typename values_type::HostMirror origValues_h - = Kokkos::create_mirror_view(localA.values); - Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map); - Kokkos::deep_copy(origColind_h, localA.graph.entries); - Kokkos::deep_copy(origValues_h, localA.values); - - // Compute the number of nnz entries per row - rowPtr_h(0) = 0; - for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) { - for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { - if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;} + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = Kokkos::View; + using col_indices_type = Kokkos::View; + // Extract on rank part of A + // Simply check that the column index is less than the number of local rows + // otherwise remove it. + + const int numRows = static_cast(localA.numRows()); + row_pointer_type rowPtr("onrankA row pointer", numRows + 1); + typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr); + typename local_graph_type::row_map_type::HostMirror origRowPtr_h = Kokkos::create_mirror_view(localA.graph.row_map); + typename local_graph_type::entries_type::HostMirror origColind_h = Kokkos::create_mirror_view(localA.graph.entries); + typename values_type::HostMirror origValues_h = Kokkos::create_mirror_view(localA.values); + Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map); + Kokkos::deep_copy(origColind_h, localA.graph.entries); + Kokkos::deep_copy(origValues_h, localA.values); + + // Compute the number of nnz entries per row + rowPtr_h(0) = 0; + for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) { + for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { + if (origColind_h(entryIdx) < numRows) { + rowPtr_h(rowIdx + 1) += 1; } - rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx); } - Kokkos::deep_copy(rowPtr, rowPtr_h); - - const LO nnzOnrankA = rowPtr_h(numRows); - - // Now use nnz per row to allocate matrix views and store column indices and values - col_indices_type colInd("onrankA column indices", rowPtr_h(numRows)); - values_type values("onrankA values", rowPtr_h(numRows)); - typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd); - typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values); - int entriesInRow; - for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) { - entriesInRow = 0; - for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { - if(origColind_h(entryIdx) < numRows) { - colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx); - values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx); - ++entriesInRow; - } + rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx); + } + Kokkos::deep_copy(rowPtr, rowPtr_h); + + const LO nnzOnrankA = rowPtr_h(numRows); + + // Now use nnz per row to allocate matrix views and store column indices and values + col_indices_type colInd("onrankA column indices", rowPtr_h(numRows)); + values_type values("onrankA values", rowPtr_h(numRows)); + typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd); + typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values); + int entriesInRow; + for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) { + entriesInRow = 0; + for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { + if (origColind_h(entryIdx) < numRows) { + colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx); + values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx); + ++entriesInRow; } } - Kokkos::deep_copy(colInd, colInd_h); - Kokkos::deep_copy(values, values_h); - - onrankA = local_matrix_type("onrankA", numRows, numRows, - nnzOnrankA, values, rowPtr, colInd); + } + Kokkos::deep_copy(colInd, colInd_h); + Kokkos::deep_copy(values, values_h); + + onrankA = local_matrix_type("onrankA", numRows, numRows, + nnzOnrankA, values, rowPtr, colInd); +} + +template +void NotayAggregationFactory:: + BuildIntermediateProlongator(const LocalOrdinal numRows, + const LocalOrdinal numDirichletNodes, + const LocalOrdinal numLocalAggregates, + const Teuchos::ArrayView& localVertex2AggID, + typename Matrix::local_matrix_type& intermediateP) const { + Monitor m(*this, "BuildIntermediateProlongator"); + + // Set debug outputs based on environment variable + RCP out; + if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - template - void NotayAggregationFactory:: - BuildIntermediateProlongator(const LocalOrdinal numRows, - const LocalOrdinal numDirichletNodes, - const LocalOrdinal numLocalAggregates, - const Teuchos::ArrayView& localVertex2AggID, - typename Matrix::local_matrix_type& intermediateP) const { - Monitor m(*this, "BuildIntermediateProlongator"); - - // Set debug outputs based on environment variable - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = Kokkos::View; + using col_indices_type = Kokkos::View; + + const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + const int intermediatePnnz = numRows - numDirichletNodes; + row_pointer_type rowPtr("intermediateP row pointer", numRows + 1); + col_indices_type colInd("intermediateP column indices", intermediatePnnz); + values_type values("intermediateP values", intermediatePnnz); + typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr); + typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd); + + rowPtr_h(0) = 0; + for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) { + // Skip Dirichlet nodes + if (localVertex2AggID[rowIdx] == LO_INVALID) { + rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - - const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); - - const int intermediatePnnz = numRows - numDirichletNodes; - row_pointer_type rowPtr("intermediateP row pointer", numRows + 1); - col_indices_type colInd("intermediateP column indices", intermediatePnnz); - values_type values("intermediateP values", intermediatePnnz); - typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr); - typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd); - - rowPtr_h(0) = 0; - for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) { - // Skip Dirichlet nodes - if(localVertex2AggID[rowIdx] == LO_INVALID) { - rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx); - } else { - rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1; - colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx]; - } + rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1; + colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx]; } + } - Kokkos::deep_copy(rowPtr, rowPtr_h); - Kokkos::deep_copy(colInd, colInd_h); - Kokkos::deep_copy(values, Kokkos::ArithTraits::one()); - - intermediateP = local_matrix_type("intermediateP", - numRows, numLocalAggregates, intermediatePnnz, - values, rowPtr, colInd); - } // BuildIntermediateProlongator - - template - void NotayAggregationFactory:: - BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP, - typename Matrix::local_matrix_type& coarseA) const { - Monitor m(*this, "BuildCoarseLocalMatrix"); - - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - - local_matrix_type AP; - localSpGEMM(coarseA, intermediateP, "AP", AP); - - // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle - // I am not sure but doing it for safety in case it stashes data from the previous - // spgemm computation... - - // Compute Ac = Pt * AP - // Two steps needed: - // 1. compute Pt - // 2. perform multiplication - - // Step 1 compute Pt - // Obviously this requires the same amount of storage as P except for the rowPtr - row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"), - intermediateP.numCols() + 1); - col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"), - intermediateP.nnz()); - values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"), - intermediateP.nnz()); - - typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt); - typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries); - Kokkos::deep_copy(entries_h, intermediateP.graph.entries); - Kokkos::deep_copy(rowPtrPt_h, 0); - for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) { - rowPtrPt_h(entries_h(entryIdx) + 1) += 1; - } - for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) { - rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx); - } - Kokkos::deep_copy(rowPtrPt, rowPtrPt_h); - - typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map); - Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map); - typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries); - Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries); - typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values); - Kokkos::deep_copy(valuesP_h, intermediateP.values); - typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt); - typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt); - const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits::invalid(); - Kokkos::deep_copy(colIndPt_h, invalidColumnIndex); - - col_index_type colIdx = 0; - for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) { - for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) { - colIdx = entries_h(entryIdxP); - for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) { - if(colIndPt_h(entryIdxPt) == invalidColumnIndex) { - colIndPt_h(entryIdxPt) = rowIdx; - valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP); - break; - } - } // Loop over entries in row of Pt - } // Loop over entries in row of P - } // Loop over rows of P - - Kokkos::deep_copy(colIndPt, colIndPt_h); - Kokkos::deep_copy(valuesPt, valuesPt_h); - - - local_matrix_type intermediatePt("intermediatePt", - intermediateP.numCols(), - intermediateP.numRows(), - intermediateP.nnz(), - valuesPt, rowPtrPt, colIndPt); - - // Create views for coarseA matrix - localSpGEMM(intermediatePt, AP, "coarseA", coarseA); - } // BuildCoarseLocalMatrix - - template - void NotayAggregationFactory:: - localSpGEMM(const typename Matrix::local_matrix_type& A, - const typename Matrix::local_matrix_type& B, - const std::string matrixLabel, - typename Matrix::local_matrix_type& C) const { - - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_space = typename device_type::memory_space; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - - // Options - int team_work_size = 16; - std::string myalg("SPGEMM_KK_MEMORY"); - KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg); - KokkosKernels::Experimental::KokkosKernelsHandle kh; - kh.create_spgemm_handle(alg_enum); - kh.set_team_work_size(team_work_size); - - // Create views for AP matrix - row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"), - A.numRows() + 1); - col_indices_type colIndC; - values_type valuesC; - - // Symbolic multiplication - KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(), - B.numRows(), B.numCols(), - A.graph.row_map, A.graph.entries, false, - B.graph.row_map, B.graph.entries, false, - rowPtrC); - - // allocate column indices and values of AP - size_t nnzC = kh.get_spgemm_handle()->get_c_nnz(); - if (nnzC) { - colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC); - valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC); - } + Kokkos::deep_copy(rowPtr, rowPtr_h); + Kokkos::deep_copy(colInd, colInd_h); + Kokkos::deep_copy(values, Kokkos::ArithTraits::one()); + + intermediateP = local_matrix_type("intermediateP", + numRows, numLocalAggregates, intermediatePnnz, + values, rowPtr, colInd); +} // BuildIntermediateProlongator + +template +void NotayAggregationFactory:: + BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP, + typename Matrix::local_matrix_type& coarseA) const { + Monitor m(*this, "BuildCoarseLocalMatrix"); + + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = Kokkos::View; + using col_indices_type = Kokkos::View; + + local_matrix_type AP; + localSpGEMM(coarseA, intermediateP, "AP", AP); + + // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle + // I am not sure but doing it for safety in case it stashes data from the previous + // spgemm computation... + + // Compute Ac = Pt * AP + // Two steps needed: + // 1. compute Pt + // 2. perform multiplication + + // Step 1 compute Pt + // Obviously this requires the same amount of storage as P except for the rowPtr + row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"), + intermediateP.numCols() + 1); + col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"), + intermediateP.nnz()); + values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"), + intermediateP.nnz()); + + typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt); + typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries); + Kokkos::deep_copy(entries_h, intermediateP.graph.entries); + Kokkos::deep_copy(rowPtrPt_h, 0); + for (size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) { + rowPtrPt_h(entries_h(entryIdx) + 1) += 1; + } + for (LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) { + rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx); + } + Kokkos::deep_copy(rowPtrPt, rowPtrPt_h); + + typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map); + Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map); + typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries); + Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries); + typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values); + Kokkos::deep_copy(valuesP_h, intermediateP.values); + typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt); + typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt); + const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits::invalid(); + Kokkos::deep_copy(colIndPt_h, invalidColumnIndex); + + col_index_type colIdx = 0; + for (LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) { + for (size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) { + colIdx = entries_h(entryIdxP); + for (size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) { + if (colIndPt_h(entryIdxPt) == invalidColumnIndex) { + colIndPt_h(entryIdxPt) = rowIdx; + valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP); + break; + } + } // Loop over entries in row of Pt + } // Loop over entries in row of P + } // Loop over rows of P + + Kokkos::deep_copy(colIndPt, colIndPt_h); + Kokkos::deep_copy(valuesPt, valuesPt_h); + + local_matrix_type intermediatePt("intermediatePt", + intermediateP.numCols(), + intermediateP.numRows(), + intermediateP.nnz(), + valuesPt, rowPtrPt, colIndPt); + + // Create views for coarseA matrix + localSpGEMM(intermediatePt, AP, "coarseA", coarseA); +} // BuildCoarseLocalMatrix + +template +void NotayAggregationFactory:: + localSpGEMM(const typename Matrix::local_matrix_type& A, + const typename Matrix::local_matrix_type& B, + const std::string matrixLabel, + typename Matrix::local_matrix_type& C) const { + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_space = typename device_type::memory_space; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = Kokkos::View; + using col_indices_type = Kokkos::View; + + // Options + int team_work_size = 16; + std::string myalg("SPGEMM_KK_MEMORY"); + KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg); + KokkosKernels::Experimental::KokkosKernelsHandle + kh; + kh.create_spgemm_handle(alg_enum); + kh.set_team_work_size(team_work_size); + + // Create views for AP matrix + row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"), + A.numRows() + 1); + col_indices_type colIndC; + values_type valuesC; + + // Symbolic multiplication + KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(), + B.numRows(), B.numCols(), + A.graph.row_map, A.graph.entries, false, + B.graph.row_map, B.graph.entries, false, + rowPtrC); + + // allocate column indices and values of AP + size_t nnzC = kh.get_spgemm_handle()->get_c_nnz(); + if (nnzC) { + colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC); + valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC); + } - // Numeric multiplication - KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(), - B.numRows(), B.numCols(), - A.graph.row_map, A.graph.entries, A.values, false, - B.graph.row_map, B.graph.entries, B.values, false, - rowPtrC, colIndC, valuesC); - kh.destroy_spgemm_handle(); + // Numeric multiplication + KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(), + B.numRows(), B.numCols(), + A.graph.row_map, A.graph.entries, A.values, false, + B.graph.row_map, B.graph.entries, B.values, false, + rowPtrC, colIndC, valuesC); + kh.destroy_spgemm_handle(); - C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC); + C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC); - } // localSpGEMM +} // localSpGEMM -} //namespace MueLu +} //namespace MueLu #endif /* MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp index 2b89f58299a9..2840d0855a29 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp @@ -56,7 +56,7 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! +/*! @class AggregationStructuredAlgorithm class. @brief Algorithm for coarsening a graph with structured aggregation. @@ -74,60 +74,56 @@ namespace MueLu { aggregation: coarsen | describe the coarsening rate to be used in each direction */ - template - class AggregationStructuredAlgorithm : - public MueLu::AggregationAlgorithmBase { +template +class AggregationStructuredAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationStructuredAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationStructuredAlgorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationStructuredAlgorithm() { } + //! Destructor. + virtual ~AggregationStructuredAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList& params, const GraphBase& graph, + Aggregates& aggregates, std::vector& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregates(const Teuchos::ParameterList& params, const GraphBase& graph, - Aggregates& aggregates, std::vector& aggStat, - LO& numNonAggregatedNodes) const; + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildGraph(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, + RCP& myGraph, RCP& coarseCoordinatesFineMap, + RCP& coarseCoordinatesMap) const; + //@} - void BuildGraph(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, - RCP& myGraph, RCP& coarseCoordinatesFineMap, - RCP& coarseCoordinatesMap) const; - //@} + std::string description() const { return "Aggretation: structured algorithm"; } - std::string description() const { return "Aggretation: structured algorithm"; } - - private: - - void ComputeGraphDataConstant(const GraphBase& graph, RCP& geoData, - const LO dofsPerNode, const int numInterpolationPoints, - ArrayRCP& nnzOnRow, Array& rowPtr, - Array& colIndex) const; - - void ComputeGraphDataLinear(const GraphBase& graph, RCP& geoData, + private: + void ComputeGraphDataConstant(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, const int numInterpolationPoints, ArrayRCP& nnzOnRow, Array& rowPtr, Array& colIndex) const; - }; + void ComputeGraphDataLinear(const GraphBase& graph, RCP& geoData, + const LO dofsPerNode, const int numInterpolationPoints, + ArrayRCP& nnzOnRow, Array& rowPtr, + Array& colIndex) const; +}; -} //namespace MueLu +} //namespace MueLu #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_SHORT #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp index 325b932fb453..1c62e2108021 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ - #include #include @@ -65,370 +64,361 @@ namespace MueLu { - template - void AggregationStructuredAlgorithm:: - BuildAggregates(const Teuchos::ParameterList& /* params */, const GraphBase& graph, - Aggregates& aggregates, std::vector& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); +template +void AggregationStructuredAlgorithm:: + BuildAggregates(const Teuchos::ParameterList& /* params */, const GraphBase& graph, + Aggregates& aggregates, std::vector& aggStat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + RCP geoData = aggregates.GetIndexManager(); + const bool coupled = geoData->isAggregationCoupled(); + const bool singleCoarsePoint = geoData->isSingleCoarsePoint(); + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + Array ghostedCoarseNodeCoarseLIDs; + Array ghostedCoarseNodeCoarsePIDs; + Array ghostedCoarseNodeCoarseGIDs; + + *out << "Extract data for ghosted nodes" << std::endl; + geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, + ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); + + LO rem, rate; + Array ghostedIdx(3), coarseIdx(3); + LO ghostedCoarseNodeCoarseLID, aggId; + *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl; + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + // Compute coarse ID associated with fine LID + geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); + + for (int dim = 0; dim < 3; ++dim) { + if (singleCoarsePoint && (geoData->getLocalFineNodesInDir(dim) - 1 < geoData->getCoarseningRate(dim))) { + coarseIdx[dim] = 0; + } else { + coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); + rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); + if (ghostedIdx[dim] - geoData->getOffset(dim) < geoData->getLocalFineNodesInDir(dim) - geoData->getCoarseningEndRate(dim)) { + rate = geoData->getCoarseningRate(dim); + } else { + rate = geoData->getCoarseningEndRate(dim); + } + if (rem > (rate / 2)) { + ++coarseIdx[dim]; + } + if (coupled && (geoData->getStartGhostedCoarseNode(dim) * geoData->getCoarseningRate(dim) > geoData->getStartIndex(dim))) { + --coarseIdx[dim]; + } + } } - RCP geoData = aggregates.GetIndexManager(); - const bool coupled = geoData->isAggregationCoupled(); - const bool singleCoarsePoint = geoData->isSingleCoarsePoint(); - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); - Array ghostedCoarseNodeCoarseLIDs; - Array ghostedCoarseNodeCoarsePIDs; - Array ghostedCoarseNodeCoarseGIDs; - + geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], + ghostedCoarseNodeCoarseLID); + + aggId = ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID]; + vertex2AggId[nodeIdx] = aggId; + procWinner[nodeIdx] = ghostedCoarseNodeCoarsePIDs[ghostedCoarseNodeCoarseLID]; + aggStat[nodeIdx] = AGGREGATED; + --numNonAggregatedNodes; + + } // Loop over fine points +} // BuildAggregates() + +template +void AggregationStructuredAlgorithm:: + BuildGraph(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, + RCP& myGraph, RCP& coarseCoordinatesFineMap, + RCP& coarseCoordinatesMap) const { + Monitor m(*this, "BuildGraphP"); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + const bool coupled = geoData->isAggregationCoupled(); + + // Compute the number of coarse points needed to interpolate quantities to a fine point + int numInterpolationPoints = 0; + if (geoData->getInterpolationOrder() == 0) { + numInterpolationPoints = 1; + } else if (geoData->getInterpolationOrder() == 1) { + // Compute 2^numDimensions using bit logic to avoid round-off errors + numInterpolationPoints = 1 << geoData->getNumDimensions(); + } + *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; + + Array colIndex((geoData->getNumLocalCoarseNodes() + numInterpolationPoints * + (geoData->getNumLocalFineNodes() - geoData->getNumLocalCoarseNodes())) * + dofsPerNode); + Array rowPtr(geoData->getNumLocalFineNodes() * dofsPerNode + 1); + rowPtr[0] = 0; + ArrayRCP nnzOnRow(geoData->getNumLocalFineNodes() * dofsPerNode); + + *out << "Compute prolongatorGraph data" << std::endl; + if (geoData->getInterpolationOrder() == 0) { + ComputeGraphDataConstant(graph, geoData, dofsPerNode, numInterpolationPoints, + nnzOnRow, rowPtr, colIndex); + } else if (geoData->getInterpolationOrder() == 1) { + ComputeGraphDataLinear(graph, geoData, dofsPerNode, numInterpolationPoints, + nnzOnRow, rowPtr, colIndex); + } + + // Compute graph's rowMap, colMap and domainMap + RCP rowMap = MapFactory::Build(graph.GetDomainMap(), dofsPerNode); + RCP colMap, domainMap; + *out << "Compute domain and column maps of the CrsGraph" << std::endl; + if (coupled) { *out << "Extract data for ghosted nodes" << std::endl; + Array ghostedCoarseNodeCoarseLIDs; + Array ghostedCoarseNodeCoarsePIDs; + Array ghostedCoarseNodeCoarseGIDs; geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); - LO rem, rate; - Array ghostedIdx(3), coarseIdx(3); - LO ghostedCoarseNodeCoarseLID, aggId; - *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl; - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - // Compute coarse ID associated with fine LID - geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); - - for(int dim = 0; dim < 3; ++dim) { - if(singleCoarsePoint - && (geoData->getLocalFineNodesInDir(dim) - 1 < geoData->getCoarseningRate(dim))) { - coarseIdx[dim] = 0; - } else { - coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); - rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); - if(ghostedIdx[dim] - geoData->getOffset(dim) - < geoData->getLocalFineNodesInDir(dim) - geoData->getCoarseningEndRate(dim)) { - rate = geoData->getCoarseningRate(dim); - } else { - rate = geoData->getCoarseningEndRate(dim); - } - if(rem > (rate / 2)) {++coarseIdx[dim];} - if(coupled && (geoData->getStartGhostedCoarseNode(dim)*geoData->getCoarseningRate(dim) - > geoData->getStartIndex(dim))) {--coarseIdx[dim];} - } + // In this case we specify the global number of nodes on the coarse mesh + // as well as the GIDs needed on rank. + colMap = MapFactory::Build(graph.GetDomainMap()->lib(), + geoData->getNumGlobalCoarseNodes(), + ghostedCoarseNodeCoarseGIDs(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + + LO coarseNodeIdx = 0; + Array coarseNodeCoarseGIDs, coarseNodeFineGIDs; + geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, coarseNodeFineGIDs); + for (LO nodeIdx = 0; nodeIdx < ghostedCoarseNodeCoarseGIDs.size(); ++nodeIdx) { + if (ghostedCoarseNodeCoarsePIDs[nodeIdx] == colMap->getComm()->getRank()) { + coarseNodeCoarseGIDs[coarseNodeIdx] = ghostedCoarseNodeCoarseGIDs[nodeIdx]; + ++coarseNodeIdx; } - - geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], - ghostedCoarseNodeCoarseLID); - - aggId = ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID]; - vertex2AggId[nodeIdx] = aggId; - procWinner[nodeIdx] = ghostedCoarseNodeCoarsePIDs[ghostedCoarseNodeCoarseLID]; - aggStat[nodeIdx] = AGGREGATED; - --numNonAggregatedNodes; - - } // Loop over fine points - } // BuildAggregates() - - - template - void AggregationStructuredAlgorithm:: - BuildGraph(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, - RCP& myGraph, RCP& coarseCoordinatesFineMap, - RCP& coarseCoordinatesMap) const { - Monitor m(*this, "BuildGraphP"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - - const bool coupled = geoData->isAggregationCoupled(); - - // Compute the number of coarse points needed to interpolate quantities to a fine point - int numInterpolationPoints = 0; - if(geoData->getInterpolationOrder() == 0) { - numInterpolationPoints = 1; - } else if(geoData->getInterpolationOrder() == 1) { - // Compute 2^numDimensions using bit logic to avoid round-off errors - numInterpolationPoints = 1 << geoData->getNumDimensions(); - } - *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; - - Array colIndex((geoData->getNumLocalCoarseNodes() + numInterpolationPoints* - (geoData->getNumLocalFineNodes() - geoData->getNumLocalCoarseNodes()))*dofsPerNode); - Array rowPtr(geoData->getNumLocalFineNodes()*dofsPerNode + 1); - rowPtr[0] = 0; - ArrayRCP nnzOnRow(geoData->getNumLocalFineNodes()*dofsPerNode); - - *out << "Compute prolongatorGraph data" << std::endl; - if(geoData->getInterpolationOrder() == 0) { - ComputeGraphDataConstant(graph, geoData, dofsPerNode, numInterpolationPoints, - nnzOnRow, rowPtr, colIndex); - } else if(geoData->getInterpolationOrder() == 1) { - ComputeGraphDataLinear(graph, geoData, dofsPerNode, numInterpolationPoints, - nnzOnRow, rowPtr, colIndex); + domainMap = MapFactory::Build(graph.GetDomainMap()->lib(), + geoData->getNumGlobalCoarseNodes(), + coarseNodeCoarseGIDs(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + coarseCoordinatesMap = MapFactory::Build(graph.GetDomainMap()->lib(), + geoData->getNumGlobalCoarseNodes(), + coarseNodeCoarseGIDs(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + coarseCoordinatesFineMap = MapFactory::Build(graph.GetDomainMap()->lib(), + geoData->getNumGlobalCoarseNodes(), + coarseNodeFineGIDs(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + } else { + // In this case the map will compute the global number of nodes on the coarse mesh + // and it will assign GIDs to the local coarse nodes. + colMap = MapFactory::Build(graph.GetDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + geoData->getNumLocalCoarseNodes() * dofsPerNode, + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + domainMap = colMap; + + Array coarseNodeCoarseGIDs(geoData->getNumLocalCoarseNodes()); + Array coarseNodeFineGIDs(geoData->getNumLocalCoarseNodes()); + geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, coarseNodeFineGIDs); + coarseCoordinatesMap = MapFactory::Build(graph.GetDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + geoData->getNumLocalCoarseNodes(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + coarseCoordinatesFineMap = MapFactory::Build(graph.GetDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + coarseNodeFineGIDs(), + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + } + + *out << "Call constructor of CrsGraph" << std::endl; + myGraph = CrsGraphFactory::Build(rowMap, + colMap, + nnzOnRow); + + *out << "Fill CrsGraph" << std::endl; + LO rowIdx = 0; + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + for (LO dof = 0; dof < dofsPerNode; ++dof) { + rowIdx = nodeIdx * dofsPerNode + dof; + myGraph->insertLocalIndices(rowIdx, colIndex(rowPtr[rowIdx], nnzOnRow[rowIdx])); } - - // Compute graph's rowMap, colMap and domainMap - RCP rowMap = MapFactory::Build(graph.GetDomainMap(), dofsPerNode); - RCP colMap, domainMap; - *out << "Compute domain and column maps of the CrsGraph" << std::endl; - if(coupled){ - *out << "Extract data for ghosted nodes" << std::endl; - Array ghostedCoarseNodeCoarseLIDs; - Array ghostedCoarseNodeCoarsePIDs; - Array ghostedCoarseNodeCoarseGIDs; - geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, - ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); - - // In this case we specify the global number of nodes on the coarse mesh - // as well as the GIDs needed on rank. - colMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - ghostedCoarseNodeCoarseGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - - LO coarseNodeIdx = 0; - Array coarseNodeCoarseGIDs, coarseNodeFineGIDs; - geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, coarseNodeFineGIDs); - for(LO nodeIdx = 0; nodeIdx < ghostedCoarseNodeCoarseGIDs.size(); ++nodeIdx) { - if(ghostedCoarseNodeCoarsePIDs[nodeIdx] == colMap->getComm()->getRank()) { - coarseNodeCoarseGIDs[coarseNodeIdx] = ghostedCoarseNodeCoarseGIDs[nodeIdx]; - ++coarseNodeIdx; + } + + *out << "Call fillComplete on CrsGraph" << std::endl; + myGraph->fillComplete(domainMap, rowMap); + *out << "Prolongator CrsGraph computed" << std::endl; + +} // BuildGraph() + +template +void AggregationStructuredAlgorithm:: + ComputeGraphDataConstant(const GraphBase& graph, RCP& geoData, + const LO dofsPerNode, const int /* numInterpolationPoints */, + ArrayRCP& nnzOnRow, Array& rowPtr, + Array& colIndex) const { + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + Array ghostedCoarseNodeCoarseLIDs; + Array ghostedCoarseNodeCoarsePIDs; + Array ghostedCoarseNodeCoarseGIDs; + geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, + ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); + + LO ghostedCoarseNodeCoarseLID, rem, rate; + Array ghostedIdx(3), coarseIdx(3); + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + // Compute coarse ID associated with fine LID + geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); + + for (int dim = 0; dim < 3; ++dim) { + if (geoData->isSingleCoarsePoint() && (geoData->getLocalFineNodesInDir(dim) - 1 < geoData->getCoarseningRate(dim))) { + coarseIdx[dim] = 0; + } else { + coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); + rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); + if (ghostedIdx[dim] - geoData->getOffset(dim) < geoData->getLocalFineNodesInDir(dim) - geoData->getCoarseningEndRate(dim)) { + rate = geoData->getCoarseningRate(dim); + } else { + rate = geoData->getCoarseningEndRate(dim); + } + if (rem > (rate / 2)) { + ++coarseIdx[dim]; + } + if ((geoData->getStartGhostedCoarseNode(dim) * geoData->getCoarseningRate(dim) > geoData->getStartIndex(dim)) && geoData->isAggregationCoupled()) { + --coarseIdx[dim]; } - } - domainMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - coarseNodeCoarseGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - coarseCoordinatesMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - coarseNodeCoarseGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - coarseCoordinatesFineMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - coarseNodeFineGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - } else { - // In this case the map will compute the global number of nodes on the coarse mesh - // and it will assign GIDs to the local coarse nodes. - colMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - geoData->getNumLocalCoarseNodes()*dofsPerNode, - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - domainMap = colMap; - - Array coarseNodeCoarseGIDs(geoData->getNumLocalCoarseNodes()); - Array coarseNodeFineGIDs(geoData->getNumLocalCoarseNodes()); - geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, coarseNodeFineGIDs); - coarseCoordinatesMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - geoData->getNumLocalCoarseNodes(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - coarseCoordinatesFineMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - coarseNodeFineGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - } - - *out << "Call constructor of CrsGraph" << std::endl; - myGraph = CrsGraphFactory::Build(rowMap, - colMap, - nnzOnRow); - - *out << "Fill CrsGraph" << std::endl; - LO rowIdx = 0; - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - for(LO dof = 0; dof < dofsPerNode; ++dof) { - rowIdx = nodeIdx*dofsPerNode + dof; - myGraph->insertLocalIndices(rowIdx, colIndex(rowPtr[rowIdx], nnzOnRow[rowIdx]) ); } } - *out << "Call fillComplete on CrsGraph" << std::endl; - myGraph->fillComplete(domainMap, rowMap); - *out << "Prolongator CrsGraph computed" << std::endl; + geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], + ghostedCoarseNodeCoarseLID); - } // BuildGraph() + for (LO dof = 0; dof < dofsPerNode; ++dof) { + nnzOnRow[nodeIdx * dofsPerNode + dof] = 1; + rowPtr[nodeIdx * dofsPerNode + dof + 1] = rowPtr[nodeIdx * dofsPerNode + dof] + 1; + colIndex[rowPtr[nodeIdx * dofsPerNode + dof]] = + ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID] * dofsPerNode + dof; + } + } // Loop over fine points +} // ComputeGraphDataConstant() - template - void AggregationStructuredAlgorithm:: - ComputeGraphDataConstant(const GraphBase& graph, RCP& geoData, - const LO dofsPerNode, const int /* numInterpolationPoints */, +template +void AggregationStructuredAlgorithm:: + ComputeGraphDataLinear(const GraphBase& /* graph */, RCP& geoData, + const LO dofsPerNode, const int numInterpolationPoints, ArrayRCP& nnzOnRow, Array& rowPtr, Array& colIndex) const { - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - Array ghostedCoarseNodeCoarseLIDs; - Array ghostedCoarseNodeCoarsePIDs; - Array ghostedCoarseNodeCoarseGIDs; - geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, - ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); - - LO ghostedCoarseNodeCoarseLID, rem, rate; - Array ghostedIdx(3), coarseIdx(3); - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - - // Compute coarse ID associated with fine LID - geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); - - for(int dim = 0; dim < 3; ++dim) { - if(geoData->isSingleCoarsePoint() - && (geoData->getLocalFineNodesInDir(dim) - 1 < geoData->getCoarseningRate(dim))) { - coarseIdx[dim] = 0; - } else { - coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); - rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); - if(ghostedIdx[dim] - geoData->getOffset(dim) - < geoData->getLocalFineNodesInDir(dim) - geoData->getCoarseningEndRate(dim)) { - rate = geoData->getCoarseningRate(dim); - } else { - rate = geoData->getCoarseningEndRate(dim); - } - if(rem > (rate / 2)) {++coarseIdx[dim];} - if( (geoData->getStartGhostedCoarseNode(dim)*geoData->getCoarseningRate(dim) - > geoData->getStartIndex(dim)) && geoData->isAggregationCoupled() ) { - --coarseIdx[dim]; - } + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + const bool coupled = geoData->isAggregationCoupled(); + const int numDimensions = geoData->getNumDimensions(); + Array ghostedIdx(3, 0); + Array coarseIdx(3, 0); + Array ijkRem(3, 0); + const LO coarsePointOffset[8][3] = {{0, 0, 0}, {1, 0, 0}, {0, 1, 0}, {1, 1, 0}, {0, 0, 1}, {1, 0, 1}, {0, 1, 1}, {1, 1, 1}}; + + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + // Compute coarse ID associated with fine LID + geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); + for (int dim = 0; dim < numDimensions; dim++) { + coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); + ijkRem[dim] = ghostedIdx[dim] % geoData->getCoarseningRate(dim); + if (coupled) { + if (geoData->getStartGhostedCoarseNode(dim) * geoData->getCoarseningRate(dim) > geoData->getStartIndex(dim)) { + --coarseIdx[dim]; } - } - - geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], - ghostedCoarseNodeCoarseLID); - - for(LO dof = 0; dof < dofsPerNode; ++dof) { - nnzOnRow[nodeIdx*dofsPerNode + dof] = 1; - rowPtr[nodeIdx*dofsPerNode + dof + 1] = rowPtr[nodeIdx*dofsPerNode + dof] + 1; - colIndex[rowPtr[nodeIdx*dofsPerNode + dof]] = - ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID]*dofsPerNode + dof; - } - } // Loop over fine points - - } // ComputeGraphDataConstant() - - - template - void AggregationStructuredAlgorithm:: - ComputeGraphDataLinear(const GraphBase& /* graph */, RCP& geoData, - const LO dofsPerNode, const int numInterpolationPoints, - ArrayRCP& nnzOnRow, Array& rowPtr, - Array& colIndex) const { - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - const bool coupled = geoData->isAggregationCoupled(); - const int numDimensions = geoData->getNumDimensions(); - Array ghostedIdx(3,0); - Array coarseIdx(3,0); - Array ijkRem(3,0); - const LO coarsePointOffset[8][3] = {{0, 0, 0}, {1, 0, 0}, {0, 1, 0}, {1, 1, 0}, - {0, 0, 1}, {1, 0, 1}, {0, 1, 1}, {1, 1, 1}}; - - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - - // Compute coarse ID associated with fine LID - geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); - for(int dim=0; dim < numDimensions; dim++){ - coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); - ijkRem[dim] = ghostedIdx[dim] % geoData->getCoarseningRate(dim); - if(coupled) { - if (geoData->getStartGhostedCoarseNode(dim)*geoData->getCoarseningRate(dim) - > geoData->getStartIndex(dim)) { - --coarseIdx[dim]; - } - } else { - if(ghostedIdx[dim] == geoData->getLocalFineNodesInDir(dim) - 1) { - coarseIdx[dim] = geoData->getLocalCoarseNodesInDir(dim) - 1; - } + } else { + if (ghostedIdx[dim] == geoData->getLocalFineNodesInDir(dim) - 1) { + coarseIdx[dim] = geoData->getLocalCoarseNodesInDir(dim) - 1; } } + } - // Fill Graph - // Check if Fine node lies on Coarse Node - bool allCoarse = true; - Array isCoarse(numDimensions); - for(int dim = 0; dim < numDimensions; ++dim) { - isCoarse[dim] = false; - if(ijkRem[dim] == 0) + // Fill Graph + // Check if Fine node lies on Coarse Node + bool allCoarse = true; + Array isCoarse(numDimensions); + for (int dim = 0; dim < numDimensions; ++dim) { + isCoarse[dim] = false; + if (ijkRem[dim] == 0) + isCoarse[dim] = true; + + if (coupled) { + if (ghostedIdx[dim] - geoData->getOffset(dim) == geoData->getLocalFineNodesInDir(dim) - 1 && + geoData->getMeshEdge(dim * 2 + 1)) + isCoarse[dim] = true; + } else { + if (ghostedIdx[dim] - geoData->getOffset(dim) == geoData->getLocalFineNodesInDir(dim) - 1) isCoarse[dim] = true; - - if(coupled){ - if( ghostedIdx[dim]-geoData->getOffset(dim) == geoData->getLocalFineNodesInDir(dim)-1 && - geoData->getMeshEdge(dim*2+1) ) - isCoarse[dim] = true; - } else { - if( ghostedIdx[dim]-geoData->getOffset(dim) == geoData->getLocalFineNodesInDir(dim)-1) - isCoarse[dim] = true; - } - - if(!isCoarse[dim]) - allCoarse = false; } - LO rowIdx = 0, colIdx = 0; - if(allCoarse) { - for(LO dof = 0; dof < dofsPerNode; ++dof) { - rowIdx = nodeIdx*dofsPerNode + dof; - nnzOnRow[rowIdx] = 1; - rowPtr[rowIdx + 1] = rowPtr[rowIdx] + 1; + if (!isCoarse[dim]) + allCoarse = false; + } - // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node. - geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], colIdx); - colIndex[rowPtr[rowIdx]] = colIdx*dofsPerNode + dof; - } - } else { - // Harder case, we need the LIDs of all the coarse nodes contributing to the interpolation - for(int dim = 0; dim < numDimensions; ++dim) { - if(coarseIdx[dim] == geoData->getGhostedNodesInDir(dim) - 1) - --coarseIdx[dim]; - } + LO rowIdx = 0, colIdx = 0; + if (allCoarse) { + for (LO dof = 0; dof < dofsPerNode; ++dof) { + rowIdx = nodeIdx * dofsPerNode + dof; + nnzOnRow[rowIdx] = 1; + rowPtr[rowIdx + 1] = rowPtr[rowIdx] + 1; - for(LO dof = 0; dof < dofsPerNode; ++dof) { - // at the current node. - rowIdx = nodeIdx*dofsPerNode + dof; - nnzOnRow[rowIdx] = Teuchos::as( numInterpolationPoints ); - rowPtr[rowIdx + 1] = rowPtr[rowIdx] + Teuchos::as(numInterpolationPoints); - // Compute Coarse Node LID - for(LO interpIdx = 0; interpIdx < numInterpolationPoints; ++interpIdx) { - geoData->getCoarseNodeGhostedLID(coarseIdx[0] + coarsePointOffset[interpIdx][0], - coarseIdx[1] + coarsePointOffset[interpIdx][1], - coarseIdx[2] + coarsePointOffset[interpIdx][2], - colIdx); - colIndex[rowPtr[rowIdx] + interpIdx] = colIdx*dofsPerNode + dof; - } // Loop over numInterpolationPoints - } // Loop over dofsPerNode + // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node. + geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], colIdx); + colIndex[rowPtr[rowIdx]] = colIdx * dofsPerNode + dof; + } + } else { + // Harder case, we need the LIDs of all the coarse nodes contributing to the interpolation + for (int dim = 0; dim < numDimensions; ++dim) { + if (coarseIdx[dim] == geoData->getGhostedNodesInDir(dim) - 1) + --coarseIdx[dim]; } - } // Loop over fine points - } // ComputeGraphDataLinear() -} // end namespace + for (LO dof = 0; dof < dofsPerNode; ++dof) { + // at the current node. + rowIdx = nodeIdx * dofsPerNode + dof; + nnzOnRow[rowIdx] = Teuchos::as(numInterpolationPoints); + rowPtr[rowIdx + 1] = rowPtr[rowIdx] + Teuchos::as(numInterpolationPoints); + // Compute Coarse Node LID + for (LO interpIdx = 0; interpIdx < numInterpolationPoints; ++interpIdx) { + geoData->getCoarseNodeGhostedLID(coarseIdx[0] + coarsePointOffset[interpIdx][0], + coarseIdx[1] + coarsePointOffset[interpIdx][1], + coarseIdx[2] + coarsePointOffset[interpIdx][2], + colIdx); + colIndex[rowPtr[rowIdx] + interpIdx] = colIdx * dofsPerNode + dof; + } // Loop over numInterpolationPoints + } // Loop over dofsPerNode + } + } // Loop over fine points +} // ComputeGraphDataLinear() +} // namespace MueLu #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp index 5d83bf9a5a42..690b225c0371 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp @@ -55,7 +55,7 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! +/*! @class AggregationStructuredAlgorithm class. @brief Algorithm for coarsening a graph with structured aggregation. @@ -69,155 +69,146 @@ namespace MueLu { All the parameters needed are passed to this class by the StructuredAggregationFactory class. */ - template - class AggregationStructuredAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +template +class AggregationStructuredAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + using local_graph_type = typename LWGraph_kokkos::local_graph_type; + using non_const_row_map_type = typename local_graph_type::row_map_type::non_const_type; + using size_type = typename local_graph_type::size_type; + using entries_type = typename local_graph_type::entries_type; + using device_type = typename local_graph_type::device_type; + using execution_space = typename local_graph_type::device_type::execution_space; + using memory_space = typename local_graph_type::device_type::memory_space; - using local_graph_type = typename LWGraph_kokkos::local_graph_type; - using non_const_row_map_type = typename local_graph_type::row_map_type::non_const_type; - using size_type = typename local_graph_type::size_type; - using entries_type = typename local_graph_type::entries_type; - using device_type = typename local_graph_type::device_type; - using execution_space = typename local_graph_type::device_type::execution_space; - using memory_space = typename local_graph_type::device_type::memory_space; + using LOVectorView = decltype(std::declval().getDeviceLocalView(Xpetra::Access::ReadWrite)); + using constIntTupleView = typename Kokkos::View; + using constLOTupleView = typename Kokkos::View; - using LOVectorView = decltype(std::declval().getDeviceLocalView(Xpetra::Access::ReadWrite)); - using constIntTupleView = typename Kokkos::View; - using constLOTupleView = typename Kokkos::View; + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + //! Constructor. + AggregationStructuredAlgorithm_kokkos() {} - //! Constructor. - AggregationStructuredAlgorithm_kokkos() { } + //! Destructor. + virtual ~AggregationStructuredAlgorithm_kokkos() {} - //! Destructor. - virtual ~AggregationStructuredAlgorithm_kokkos() { } + //@} - //@} + //! @name Aggregation methods. + //@{ + /*! @brief Build aggregates object. */ - //! @name Aggregation methods. - //@{ + void BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - /*! @brief Build aggregates object. */ + /*! @brief Build a CrsGraph instead of aggregates. */ - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildGraph(const LWGraph_kokkos& graph, + RCP& geoData, + const LO dofsPerNode, + RCP& myGraph) const; + //@} - /*! @brief Build a CrsGraph instead of aggregates. */ + std::string description() const { return "Aggretation: structured algorithm"; } - void BuildGraph(const LWGraph_kokkos& graph, - RCP& geoData, - const LO dofsPerNode, - RCP& myGraph) const; - //@} + struct fillAggregatesFunctor { + IndexManager_kokkos geoData_; + const int myRank_; + Kokkos::View aggStat_; + LOVectorView vertex2AggID_; + LOVectorView procWinner_; - std::string description() const { return "Aggretation: structured algorithm"; } + fillAggregatesFunctor(RCP geoData, + const int myRank, + Kokkos::View aggStat, + LOVectorView vertex2AggID, + LOVectorView procWinner); - struct fillAggregatesFunctor{ + KOKKOS_INLINE_FUNCTION + void operator()(const LO nodeIdx, LO& lNumAggregatedNodes) const; - IndexManager_kokkos geoData_; - const int myRank_; - Kokkos::View aggStat_; - LOVectorView vertex2AggID_; - LOVectorView procWinner_; + }; // struct fillAggregatesFunctor - fillAggregatesFunctor(RCP geoData, - const int myRank, - Kokkos::View aggStat, - LOVectorView vertex2AggID, - LOVectorView procWinner); + struct computeGraphDataConstantFunctor { + IndexManager_kokkos geoData_; + const int numGhostedNodes_; + const LO dofsPerNode_; + constIntTupleView coarseRate_; + constIntTupleView endRate_; + constLOTupleView lFineNodesPerDir_; + non_const_row_map_type rowPtr_; + entries_type colIndex_; - KOKKOS_INLINE_FUNCTION - void operator() (const LO nodeIdx, LO& lNumAggregatedNodes) const; - - }; // struct fillAggregatesFunctor - - struct computeGraphDataConstantFunctor { - - IndexManager_kokkos geoData_; - const int numGhostedNodes_; - const LO dofsPerNode_; - constIntTupleView coarseRate_; - constIntTupleView endRate_; - constLOTupleView lFineNodesPerDir_; - non_const_row_map_type rowPtr_; - entries_type colIndex_; - - - computeGraphDataConstantFunctor(RCP geoData, - const LO numGhostedNodes, const LO dofsPerNode, - constIntTupleView coarseRate, constIntTupleView endRate, - constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr, entries_type colIndex); - - KOKKOS_INLINE_FUNCTION - void operator() (const LO nodeIdx) const; - - }; // struct computeGraphDataConstantFunctor - - struct computeGraphRowPtrFunctor { - - IndexManager_kokkos geoData_; - const LO dofsPerNode_; - const int numInterpolationPoints_; - const LO numLocalRows_; - constIntTupleView coarseRate_; - constLOTupleView lFineNodesPerDir_; - non_const_row_map_type rowPtr_; - - computeGraphRowPtrFunctor(RCP geoData, - const LO dofsPerNode, - const int numInterpolationPoints, const LO numLocalRows, - constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr); - - KOKKOS_INLINE_FUNCTION - void operator() (const LO rowIdx, GO& update, const bool final) const; - }; // struct computeGraphRowPtrFunctor - - struct computeGraphDataLinearFunctor { - - IndexManager_kokkos geoData_; - const int numDimensions_; - const int numGhostedNodes_; - const LO dofsPerNode_; - const int numInterpolationPoints_; - constIntTupleView coarseRate_; - constIntTupleView endRate_; - constLOTupleView lFineNodesPerDir_; - constLOTupleView ghostedNodesPerDir_; - non_const_row_map_type rowPtr_; - entries_type colIndex_; - - - computeGraphDataLinearFunctor(RCP geoData, - const int numDimensions, + computeGraphDataConstantFunctor(RCP geoData, const LO numGhostedNodes, const LO dofsPerNode, - const int numInterpolationPoints, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, - constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex); - KOKKOS_INLINE_FUNCTION - void operator() (const LO nodeIdx) const; - - }; // struct computeGraphDataLinearFunctor - - }; // class AggregationStructuredAlgorithm_kokkos - -} //namespace MueLu + KOKKOS_INLINE_FUNCTION + void operator()(const LO nodeIdx) const; + + }; // struct computeGraphDataConstantFunctor + + struct computeGraphRowPtrFunctor { + IndexManager_kokkos geoData_; + const LO dofsPerNode_; + const int numInterpolationPoints_; + const LO numLocalRows_; + constIntTupleView coarseRate_; + constLOTupleView lFineNodesPerDir_; + non_const_row_map_type rowPtr_; + + computeGraphRowPtrFunctor(RCP geoData, + const LO dofsPerNode, + const int numInterpolationPoints, const LO numLocalRows, + constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, + non_const_row_map_type rowPtr); + + KOKKOS_INLINE_FUNCTION + void operator()(const LO rowIdx, GO& update, const bool final) const; + }; // struct computeGraphRowPtrFunctor + + struct computeGraphDataLinearFunctor { + IndexManager_kokkos geoData_; + const int numDimensions_; + const int numGhostedNodes_; + const LO dofsPerNode_; + const int numInterpolationPoints_; + constIntTupleView coarseRate_; + constIntTupleView endRate_; + constLOTupleView lFineNodesPerDir_; + constLOTupleView ghostedNodesPerDir_; + non_const_row_map_type rowPtr_; + entries_type colIndex_; + + computeGraphDataLinearFunctor(RCP geoData, + const int numDimensions, + const LO numGhostedNodes, const LO dofsPerNode, + const int numInterpolationPoints, + constIntTupleView coarseRate, constIntTupleView endRate, + constLOTupleView lFineNodesPerDir, + constLOTupleView ghostedNodesPerDir, + non_const_row_map_type rowPtr, entries_type colIndex); + + KOKKOS_INLINE_FUNCTION + void operator()(const LO nodeIdx) const; + + }; // struct computeGraphDataLinearFunctor + +}; // class AggregationStructuredAlgorithm_kokkos + +} //namespace MueLu #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_SHORT #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp index 09ab332b27f4..527d1299368c 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP - #include #include @@ -65,348 +64,363 @@ namespace MueLu { - template - void AggregationStructuredAlgorithm_kokkos:: - BuildAggregates(const Teuchos::ParameterList& /* params */, const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - RCP geoData = aggregates.GetIndexManagerKokkos(); - const LO numLocalFineNodes= geoData->getNumLocalFineNodes(); - const LO numCoarseNodes = geoData->getNumCoarseNodes(); - LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - LOVectorView procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - - *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl; - LO numAggregatedNodes; - fillAggregatesFunctor fillAggregates(geoData, - graph.GetComm()->getRank(), - aggStat, - vertex2AggId, - procWinner); - Kokkos::parallel_reduce("StructuredAggregation: fill aggregates data", - Kokkos::RangePolicy(0, numLocalFineNodes), - fillAggregates, - numAggregatedNodes); - - *out << "numCoarseNodes= " << numCoarseNodes - << ", numAggregatedNodes= " << numAggregatedNodes << std::endl; - numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes; - - } // BuildAggregates() - - - template - void AggregationStructuredAlgorithm_kokkos:: - BuildGraph(const LWGraph_kokkos& graph, RCP& geoData, const LO dofsPerNode, - RCP& myGraph) const { - Monitor m(*this, "BuildGraphP"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - // Compute the number of coarse points needed to interpolate quantities to a fine point - int numInterpolationPoints = 0; - if(geoData->getInterpolationOrder() == 0) { - numInterpolationPoints = 1; - } else if(geoData->getInterpolationOrder() == 1) { - // Compute 2^numDimensions using bit logic to avoid round-off errors from std::pow() - numInterpolationPoints = 1 << geoData->getNumDimensions(); - } - *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; - - const LO numLocalFineNodes = geoData->getNumLocalFineNodes(); - const LO numCoarseNodes = geoData->getNumCoarseNodes(); - const LO numNnzEntries = dofsPerNode*(numCoarseNodes + numInterpolationPoints - *(numLocalFineNodes - numCoarseNodes)); - - non_const_row_map_type rowPtr("Prolongator graph, rowPtr", dofsPerNode*(numLocalFineNodes + 1)); - entries_type colIndex("Prolongator graph, colIndices", numNnzEntries); - - *out << "Compute prolongatorGraph data" << std::endl; - if(geoData->getInterpolationOrder() == 0) { - computeGraphDataConstantFunctor computeGraphData(geoData, - numCoarseNodes, - dofsPerNode, - geoData->getCoarseningRates(), - geoData->getCoarseningEndRates(), - geoData->getLocalFineNodesPerDir(), - rowPtr, - colIndex); - Kokkos::parallel_for("Structured Aggregation: compute loca graph data", - Kokkos::RangePolicy(0, numLocalFineNodes), - computeGraphData); - } else if(geoData->getInterpolationOrder() == 1) { - // Note, lbv 2018-11-08: in the piece-wise linear case I am computing the rowPtr - // using a parallel scan, it might be possible to do something faster than that - // by including this calculation in computeGraphDataLinearFunctor but at the moment - // all the ideas I have include a bunch of if statements which I would like to avoid. - computeGraphRowPtrFunctor computeGraphRowPtr(geoData, - dofsPerNode, - numInterpolationPoints, - numLocalFineNodes, - geoData->getCoarseningRates(), - geoData->getLocalFineNodesPerDir(), - rowPtr); - Kokkos::parallel_scan("Structured Aggregation: compute rowPtr for prolongator graph", - Kokkos::RangePolicy(0, numLocalFineNodes + 1), - computeGraphRowPtr); - - computeGraphDataLinearFunctor computeGraphData(geoData, - geoData->getNumDimensions(), +template +void AggregationStructuredAlgorithm_kokkos:: + BuildAggregates(const Teuchos::ParameterList& /* params */, const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + RCP geoData = aggregates.GetIndexManagerKokkos(); + const LO numLocalFineNodes = geoData->getNumLocalFineNodes(); + const LO numCoarseNodes = geoData->getNumCoarseNodes(); + LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + LOVectorView procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + + *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl; + LO numAggregatedNodes; + fillAggregatesFunctor fillAggregates(geoData, + graph.GetComm()->getRank(), + aggStat, + vertex2AggId, + procWinner); + Kokkos::parallel_reduce("StructuredAggregation: fill aggregates data", + Kokkos::RangePolicy(0, numLocalFineNodes), + fillAggregates, + numAggregatedNodes); + + *out << "numCoarseNodes= " << numCoarseNodes + << ", numAggregatedNodes= " << numAggregatedNodes << std::endl; + numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes; + +} // BuildAggregates() + +template +void AggregationStructuredAlgorithm_kokkos:: + BuildGraph(const LWGraph_kokkos& graph, RCP& geoData, const LO dofsPerNode, + RCP& myGraph) const { + Monitor m(*this, "BuildGraphP"); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + // Compute the number of coarse points needed to interpolate quantities to a fine point + int numInterpolationPoints = 0; + if (geoData->getInterpolationOrder() == 0) { + numInterpolationPoints = 1; + } else if (geoData->getInterpolationOrder() == 1) { + // Compute 2^numDimensions using bit logic to avoid round-off errors from std::pow() + numInterpolationPoints = 1 << geoData->getNumDimensions(); + } + *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; + + const LO numLocalFineNodes = geoData->getNumLocalFineNodes(); + const LO numCoarseNodes = geoData->getNumCoarseNodes(); + const LO numNnzEntries = dofsPerNode * (numCoarseNodes + numInterpolationPoints * (numLocalFineNodes - numCoarseNodes)); + + non_const_row_map_type rowPtr("Prolongator graph, rowPtr", dofsPerNode * (numLocalFineNodes + 1)); + entries_type colIndex("Prolongator graph, colIndices", numNnzEntries); + + *out << "Compute prolongatorGraph data" << std::endl; + if (geoData->getInterpolationOrder() == 0) { + computeGraphDataConstantFunctor computeGraphData(geoData, numCoarseNodes, dofsPerNode, - numInterpolationPoints, geoData->getCoarseningRates(), geoData->getCoarseningEndRates(), geoData->getLocalFineNodesPerDir(), - geoData->getCoarseNodesPerDir(), rowPtr, colIndex); - Kokkos::parallel_for("Structured Aggregation: compute loca graph data", - Kokkos::RangePolicy(0, numLocalFineNodes), - computeGraphData); + Kokkos::parallel_for("Structured Aggregation: compute loca graph data", + Kokkos::RangePolicy(0, numLocalFineNodes), + computeGraphData); + } else if (geoData->getInterpolationOrder() == 1) { + // Note, lbv 2018-11-08: in the piece-wise linear case I am computing the rowPtr + // using a parallel scan, it might be possible to do something faster than that + // by including this calculation in computeGraphDataLinearFunctor but at the moment + // all the ideas I have include a bunch of if statements which I would like to avoid. + computeGraphRowPtrFunctor computeGraphRowPtr(geoData, + dofsPerNode, + numInterpolationPoints, + numLocalFineNodes, + geoData->getCoarseningRates(), + geoData->getLocalFineNodesPerDir(), + rowPtr); + Kokkos::parallel_scan("Structured Aggregation: compute rowPtr for prolongator graph", + Kokkos::RangePolicy(0, numLocalFineNodes + 1), + computeGraphRowPtr); + + computeGraphDataLinearFunctor computeGraphData(geoData, + geoData->getNumDimensions(), + numCoarseNodes, + dofsPerNode, + numInterpolationPoints, + geoData->getCoarseningRates(), + geoData->getCoarseningEndRates(), + geoData->getLocalFineNodesPerDir(), + geoData->getCoarseNodesPerDir(), + rowPtr, + colIndex); + Kokkos::parallel_for("Structured Aggregation: compute loca graph data", + Kokkos::RangePolicy(0, numLocalFineNodes), + computeGraphData); + } + + local_graph_type myLocalGraph(colIndex, rowPtr); + + // Compute graph's colMap and domainMap + RCP colMap, domainMap; + *out << "Compute domain and column maps of the CrsGraph" << std::endl; + colMap = MapFactory::Build(graph.GetDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + numCoarseNodes, + graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + domainMap = colMap; + + myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap, + colMap, graph.GetDomainMap()); + +} // BuildGraph() + +template +AggregationStructuredAlgorithm_kokkos:: + fillAggregatesFunctor::fillAggregatesFunctor(RCP geoData, + const int myRank, + Kokkos::View aggStat, + LOVectorView vertex2AggID, + LOVectorView procWinner) + : geoData_(*geoData) + , myRank_(myRank) + , aggStat_(aggStat) + , vertex2AggID_(vertex2AggID) + , procWinner_(procWinner) {} + +template +KOKKOS_INLINE_FUNCTION void AggregationStructuredAlgorithm_kokkos:: + fillAggregatesFunctor::operator()(const LO nodeIdx, LO& lNumAggregatedNodes) const { + // Compute coarse ID associated with fine LID + LO rem, rate; + LO coarseNodeCoarseLID; + LO nodeFineTuple[3], coarseIdx[3]; + auto coarseRate = geoData_.getCoarseningRates(); + auto endRate = geoData_.getCoarseningEndRates(); + auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir(); + // Compute coarse ID associated with fine LID + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + + for (int dim = 0; dim < 3; ++dim) { + coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim); + rem = nodeFineTuple[dim] % coarseRate(dim); + rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim); + if (rem > (rate / 2)) { + ++coarseIdx[dim]; } - - local_graph_type myLocalGraph(colIndex, rowPtr); - - // Compute graph's colMap and domainMap - RCP colMap, domainMap; - *out << "Compute domain and column maps of the CrsGraph" << std::endl; - colMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - numCoarseNodes, - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - domainMap = colMap; - - myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap, - colMap, graph.GetDomainMap()); - - } // BuildGraph() - - - template - AggregationStructuredAlgorithm_kokkos:: - fillAggregatesFunctor::fillAggregatesFunctor(RCP geoData, - const int myRank, - Kokkos::View aggStat, - LOVectorView vertex2AggID, - LOVectorView procWinner) : - geoData_(*geoData), myRank_(myRank), aggStat_(aggStat), - vertex2AggID_(vertex2AggID), procWinner_(procWinner) {} - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - fillAggregatesFunctor::operator() (const LO nodeIdx, LO& lNumAggregatedNodes) const { - // Compute coarse ID associated with fine LID - LO rem, rate; - LO coarseNodeCoarseLID; - LO nodeFineTuple[3], coarseIdx[3]; - auto coarseRate = geoData_.getCoarseningRates(); - auto endRate = geoData_.getCoarseningEndRates(); - auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir(); - // Compute coarse ID associated with fine LID - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - - for(int dim = 0; dim < 3; ++dim) { - coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim); - rem = nodeFineTuple[dim] % coarseRate(dim); - rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim); - if(rem > (rate / 2)) {++coarseIdx[dim];} + } + + geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], + coarseNodeCoarseLID); + + vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID; + procWinner_(nodeIdx, 0) = myRank_; + aggStat_(nodeIdx) = AGGREGATED; + ++lNumAggregatedNodes; + +} // fillAggregatesFunctor::operator() + +template +AggregationStructuredAlgorithm_kokkos:: + computeGraphDataConstantFunctor:: + computeGraphDataConstantFunctor(RCP geoData, + const LO NumGhostedNodes, + const LO dofsPerNode, + constIntTupleView coarseRate, + constIntTupleView endRate, + constLOTupleView lFineNodesPerDir, + non_const_row_map_type rowPtr, + entries_type colIndex) + : geoData_(*geoData) + , numGhostedNodes_(NumGhostedNodes) + , dofsPerNode_(dofsPerNode) + , coarseRate_(coarseRate) + , endRate_(endRate) + , lFineNodesPerDir_(lFineNodesPerDir) + , rowPtr_(rowPtr) + , colIndex_(colIndex) { +} // computeGraphDataConstantFunctor() + +template +KOKKOS_INLINE_FUNCTION void AggregationStructuredAlgorithm_kokkos:: + computeGraphDataConstantFunctor::operator()(const LO nodeIdx) const { + LO nodeFineTuple[3] = {0, 0, 0}; + LO nodeCoarseTuple[3] = {0, 0, 0}; + + // Compute ghosted tuple associated with fine LID + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + + // Compute coarse tuple associated with fine point + // then overwrite it with tuple associated with aggregate + LO rem, rate, coarseNodeCoarseLID; + for (int dim = 0; dim < 3; ++dim) { + nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); + rem = nodeFineTuple[dim] % coarseRate_(dim); + if (nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim))) { + rate = coarseRate_(dim); + } else { + rate = endRate_(dim); } - - geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], - coarseNodeCoarseLID); - - vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID; - procWinner_(nodeIdx, 0) = myRank_; - aggStat_(nodeIdx) = AGGREGATED; - ++lNumAggregatedNodes; - - } // fillAggregatesFunctor::operator() - - template - AggregationStructuredAlgorithm_kokkos:: - computeGraphDataConstantFunctor:: - computeGraphDataConstantFunctor(RCP geoData, - const LO NumGhostedNodes, - const LO dofsPerNode, - constIntTupleView coarseRate, - constIntTupleView endRate, - constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr, - entries_type colIndex) : geoData_(*geoData), - numGhostedNodes_(NumGhostedNodes), dofsPerNode_(dofsPerNode), - coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir), - rowPtr_(rowPtr), colIndex_(colIndex) { - - } // computeGraphDataConstantFunctor() - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - computeGraphDataConstantFunctor::operator() (const LO nodeIdx) const { - LO nodeFineTuple[3] = {0, 0, 0}; - LO nodeCoarseTuple[3] = {0, 0, 0}; - - // Compute ghosted tuple associated with fine LID - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - - // Compute coarse tuple associated with fine point - // then overwrite it with tuple associated with aggregate - LO rem, rate, coarseNodeCoarseLID; - for(int dim = 0; dim < 3; ++dim) { - nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); - rem = nodeFineTuple[dim] % coarseRate_(dim); - if( nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim)) ) { - rate = coarseRate_(dim); - } else { - rate = endRate_(dim); - } - if(rem > (rate / 2)) {++nodeCoarseTuple[dim];} + if (rem > (rate / 2)) { + ++nodeCoarseTuple[dim]; } + } + + // get LID associted with aggregate + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], + coarseNodeCoarseLID); + + // store data into CrsGraph taking care of multiple dofs case + for (LO dof = 0; dof < dofsPerNode_; ++dof) { + rowPtr_(nodeIdx * dofsPerNode_ + dof + 1) = nodeIdx * dofsPerNode_ + dof + 1; + colIndex_(nodeIdx * dofsPerNode_ + dof) = coarseNodeCoarseLID * dofsPerNode_ + dof; + } + +} // computeGraphDataConstantFunctor::operator() + +template +AggregationStructuredAlgorithm_kokkos:: + computeGraphRowPtrFunctor::computeGraphRowPtrFunctor(RCP geoData, + const LO dofsPerNode, + const int numInterpolationPoints, + const LO numLocalRows, + constIntTupleView coarseRate, + constLOTupleView lFineNodesPerDir, + non_const_row_map_type rowPtr) + : geoData_(*geoData) + , dofsPerNode_(dofsPerNode) + , numInterpolationPoints_(numInterpolationPoints) + , numLocalRows_(numLocalRows) + , coarseRate_(coarseRate) + , lFineNodesPerDir_(lFineNodesPerDir) + , rowPtr_(rowPtr) {} + +template +KOKKOS_INLINE_FUNCTION void AggregationStructuredAlgorithm_kokkos:: + computeGraphRowPtrFunctor::operator()(const LO rowIdx, GO& update, const bool final) const { + if (final) { + // Kokkos uses a multipass algorithm to implement scan. + // Only update the array on the final pass. Updating the + // array before changing 'update' means that we do an + // exclusive scan. Update the array after for an inclusive + // scan. + rowPtr_(rowIdx) = update; + } + if (rowIdx < numLocalRows_) { + LO nodeIdx = rowIdx / dofsPerNode_; + bool allCoarse = true; + LO nodeFineTuple[3] = {0, 0, 0}; + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + for (int dim = 0; dim < 3; ++dim) { + const LO rem = nodeFineTuple[dim] % coarseRate_(dim); - // get LID associted with aggregate - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], - coarseNodeCoarseLID); - - // store data into CrsGraph taking care of multiple dofs case - for(LO dof = 0; dof < dofsPerNode_; ++dof) { - rowPtr_(nodeIdx*dofsPerNode_ + dof + 1) = nodeIdx*dofsPerNode_ + dof + 1; - colIndex_(nodeIdx*dofsPerNode_ + dof) = coarseNodeCoarseLID*dofsPerNode_ + dof; + // Check if Fine node lies on Coarse Node + allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1))); } - - } // computeGraphDataConstantFunctor::operator() - - template - AggregationStructuredAlgorithm_kokkos:: - computeGraphRowPtrFunctor::computeGraphRowPtrFunctor(RCP geoData, - const LO dofsPerNode, - const int numInterpolationPoints, - const LO numLocalRows, - constIntTupleView coarseRate, - constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr) : - geoData_(*geoData), dofsPerNode_(dofsPerNode), - numInterpolationPoints_(numInterpolationPoints), numLocalRows_(numLocalRows), - coarseRate_(coarseRate), lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr) {} - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - computeGraphRowPtrFunctor::operator() (const LO rowIdx, GO& update, const bool final) const { - if (final) { - // Kokkos uses a multipass algorithm to implement scan. - // Only update the array on the final pass. Updating the - // array before changing 'update' means that we do an - // exclusive scan. Update the array after for an inclusive - // scan. - rowPtr_(rowIdx) = update; + update += (allCoarse ? 1 : numInterpolationPoints_); + } +} // computeGraphRowPtrFunctor::operator() + +template +AggregationStructuredAlgorithm_kokkos:: + computeGraphDataLinearFunctor::computeGraphDataLinearFunctor(RCP geoData, + const int numDimensions, + const LO numGhostedNodes, + const LO dofsPerNode, + const int numInterpolationPoints, + constIntTupleView coarseRate, + constIntTupleView endRate, + constLOTupleView lFineNodesPerDir, + constLOTupleView ghostedNodesPerDir, + non_const_row_map_type rowPtr, + entries_type colIndex) + : geoData_(*geoData) + , numDimensions_(numDimensions) + , numGhostedNodes_(numGhostedNodes) + , dofsPerNode_(dofsPerNode) + , numInterpolationPoints_(numInterpolationPoints) + , coarseRate_(coarseRate) + , endRate_(endRate) + , lFineNodesPerDir_(lFineNodesPerDir) + , ghostedNodesPerDir_(ghostedNodesPerDir) + , rowPtr_(rowPtr) + , colIndex_(colIndex) { +} // computeGraphDataLinearFunctor() + +template +KOKKOS_INLINE_FUNCTION void AggregationStructuredAlgorithm_kokkos:: + computeGraphDataLinearFunctor::operator()(const LO nodeIdx) const { + LO nodeFineTuple[3] = {0, 0, 0}; + LO nodeCoarseTuple[3] = {0, 0, 0}; + + // Compute coarse ID associated with fine LID + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + + LO coarseNodeCoarseLID; + bool allCoarse = false; + for (int dim = 0; dim < 3; ++dim) { + nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); + } + if (rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) { + allCoarse = true; + } + + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], + coarseNodeCoarseLID); + + if (allCoarse) { + // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node. + for (LO dof = 0; dof < dofsPerNode_; ++dof) { + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof)) = coarseNodeCoarseLID * dofsPerNode_ + dof; } - if (rowIdx < numLocalRows_) { - LO nodeIdx = rowIdx / dofsPerNode_; - bool allCoarse = true; - LO nodeFineTuple[3] = {0, 0, 0}; - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - for(int dim = 0; dim < 3; ++dim) { - const LO rem = nodeFineTuple[dim] % coarseRate_(dim); - - // Check if Fine node lies on Coarse Node - allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1))); + } else { + for (int dim = 0; dim < numDimensions_; ++dim) { + if (nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { + --nodeCoarseTuple[dim]; } - update += (allCoarse ? 1 : numInterpolationPoints_); } - } // computeGraphRowPtrFunctor::operator() - - template - AggregationStructuredAlgorithm_kokkos:: - computeGraphDataLinearFunctor::computeGraphDataLinearFunctor(RCP geoData, - const int numDimensions, - const LO numGhostedNodes, - const LO dofsPerNode, - const int numInterpolationPoints, - constIntTupleView coarseRate, - constIntTupleView endRate, - constLOTupleView lFineNodesPerDir, - constLOTupleView ghostedNodesPerDir, - non_const_row_map_type rowPtr, - entries_type colIndex) : - geoData_(*geoData), numDimensions_(numDimensions), - numGhostedNodes_(numGhostedNodes), - dofsPerNode_(dofsPerNode), numInterpolationPoints_(numInterpolationPoints), - coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir), - ghostedNodesPerDir_(ghostedNodesPerDir), rowPtr_(rowPtr), colIndex_(colIndex) { - - } // computeGraphDataLinearFunctor() - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - computeGraphDataLinearFunctor::operator() (const LO nodeIdx) const { - LO nodeFineTuple[3] = {0, 0, 0}; - LO nodeCoarseTuple[3] = {0, 0, 0}; - - // Compute coarse ID associated with fine LID - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - - LO coarseNodeCoarseLID; - bool allCoarse = false; - for(int dim = 0; dim < 3; ++dim) { - nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); - } - if(rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) {allCoarse = true;} - - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], - coarseNodeCoarseLID); - - if(allCoarse) { - // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node. - for(LO dof = 0; dof < dofsPerNode_; ++dof) { - colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)) = coarseNodeCoarseLID*dofsPerNode_ + dof; - } - } else { - - for(int dim = 0; dim < numDimensions_; ++dim) { - if(nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { --nodeCoarseTuple[dim]; } - } - // Compute Coarse Node LID - // Note lbv 10-06-2018: it is likely benefitial to remove the two if statments and somehow - // find out the number of dimensions before calling the opertor() of the functor. - for(LO dof = 0; dof < dofsPerNode_; ++dof) { - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+0)); - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+1)); - if(numDimensions_ > 1) { - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+2)); - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+3)); - if(numDimensions_ > 2) { - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+4)); - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+5)); - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+6)); - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+7)); - } + // Compute Coarse Node LID + // Note lbv 10-06-2018: it is likely benefitial to remove the two if statments and somehow + // find out the number of dimensions before calling the opertor() of the functor. + for (LO dof = 0; dof < dofsPerNode_; ++dof) { + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 0)); + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 1)); + if (numDimensions_ > 1) { + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1] + 1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 2)); + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1] + 1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 3)); + if (numDimensions_ > 2) { + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 4)); + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1], nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 5)); + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1] + 1, nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 6)); + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1] + 1, nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 7)); } } } - } // computeGraphDataLinearFunctor::operator() - -} // end namespace + } +} // computeGraphDataLinearFunctor::operator() +} // namespace MueLu #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp index 28d26813f5d6..b6f644ea0827 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp @@ -75,178 +75,174 @@ namespace MueLu { and local lexicographic mesh orderings are supported. */ - template - class IndexManager : public BaseClass { +template +class IndexManager : public BaseClass { #undef MUELU_INDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - private: + private: + protected: + const RCP > comm_; ///< Communicator used by uncoupled aggregation + const bool coupled_; ///< Flag for coupled vs uncoupled aggregation mode, if true aggregation is coupled. + const bool singleCoarsePoint_; ///< Flag telling us if can reduce dimensions to a single layer. + const int numDimensions; ///< Number of spacial dimensions in the problem + const int interpolationOrder_; ///< Interpolation order used by grid transfer operators using these aggregates. - protected: + Array coarseRate; ///< coarsening rate in each direction + Array endRate; ///< adapted coarsening rate at the edge of the mesh in each direction. - const RCP > comm_; ///< Communicator used by uncoupled aggregation - const bool coupled_; ///< Flag for coupled vs uncoupled aggregation mode, if true aggregation is coupled. - const bool singleCoarsePoint_; ///< Flag telling us if can reduce dimensions to a single layer. - const int numDimensions; ///< Number of spacial dimensions in the problem - const int interpolationOrder_; ///< Interpolation order used by grid transfer operators using these aggregates. + GO gNumFineNodes; ///< global number of nodes. + GO gNumFineNodes10; ///< global number of nodes per 0-1 slice. + const Array gFineNodesPerDir; ///< global number of nodes per direction. - Array coarseRate; ///< coarsening rate in each direction - Array endRate; ///< adapted coarsening rate at the edge of the mesh in each direction. + LO lNumFineNodes; ///< local number of nodes. + LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. + const Array lFineNodesPerDir; ///< local number of nodes per direction. - GO gNumFineNodes; ///< global number of nodes. - GO gNumFineNodes10; ///< global number of nodes per 0-1 slice. - const Array gFineNodesPerDir; ///< global number of nodes per direction. + GO gNumCoarseNodes; ///< global number of nodes remaining after coarsening. + GO gNumCoarseNodes10; ///< global number of nodes per 0-1 slice remaining after coarsening. + Array gCoarseNodesPerDir; ///< global number of nodes per direction remaining after coarsening. - LO lNumFineNodes; ///< local number of nodes. - LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. - const Array lFineNodesPerDir; ///< local number of nodes per direction. + LO lNumCoarseNodes; ///< local number of nodes remaining after coarsening. + LO lNumCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after coarsening. + Array lCoarseNodesPerDir; ///< local number of nodes per direction remaing after coarsening. - GO gNumCoarseNodes; ///< global number of nodes remaining after coarsening. - GO gNumCoarseNodes10; ///< global number of nodes per 0-1 slice remaining after coarsening. - Array gCoarseNodesPerDir; ///< global number of nodes per direction remaining after coarsening. + LO numGhostNodes; ///< local number of ghost nodes + LO numGhostedNodes; ///< local number of ghosted nodes (i.e. ghost + coarse nodes). + LO numGhostedNodes10; ///< local number of ghosted nodes (i.e. ghost + coarse nodes) per 0-1 slice. + Array ghostedNodesPerDir; ///< local number of ghosted nodes (i.e. ghost + coarse nodes) per direction - LO lNumCoarseNodes; ///< local number of nodes remaining after coarsening. - LO lNumCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after coarsening. - Array lCoarseNodesPerDir; ///< local number of nodes per direction remaing after coarsening. + GO minGlobalIndex; ///< lowest GID of any node in the local process + Array offsets; ///< distance between lowest (resp. highest) index to the lowest (resp. highest) ghostedNodeIndex in that direction. + Array coarseNodeOffsets; ///< distance between lowest (resp. highest) index to the lowest (resp. highest) coarseNodeIndex in that direction. + Array startIndices; ///< lowest global tuple (i,j,k) of a node on the local process + Array startGhostedCoarseNode; ///< lowest coarse global tuple (i,j,k) of a node remaing on the local process after coarsening. - LO numGhostNodes; ///< local number of ghost nodes - LO numGhostedNodes; ///< local number of ghosted nodes (i.e. ghost + coarse nodes). - LO numGhostedNodes10; ///< local number of ghosted nodes (i.e. ghost + coarse nodes) per 0-1 slice. - Array ghostedNodesPerDir; ///< local number of ghosted nodes (i.e. ghost + coarse nodes) per direction + bool meshEdge[6] = {false}; ///< flags indicating if we run into the edge of the mesh in ilo, ihi, jlo, jhi, klo or khi. + bool ghostInterface[6] = {false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, jhi, klo and khi boundaries. + bool ghostedDir[6] = {false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, jhi, klo and khi boundaries. - GO minGlobalIndex; ///< lowest GID of any node in the local process - Array offsets; ///< distance between lowest (resp. highest) index to the lowest (resp. highest) ghostedNodeIndex in that direction. - Array coarseNodeOffsets; ///< distance between lowest (resp. highest) index to the lowest (resp. highest) coarseNodeIndex in that direction. - Array startIndices; ///< lowest global tuple (i,j,k) of a node on the local process - Array startGhostedCoarseNode; ///< lowest coarse global tuple (i,j,k) of a node remaing on the local process after coarsening. + public: + IndexManager() = default; - bool meshEdge[6] = {false}; ///< flags indicating if we run into the edge of the mesh in ilo, ihi, jlo, jhi, klo or khi. - bool ghostInterface[6] = {false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, jhi, klo and khi boundaries. - bool ghostedDir[6] = {false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, jhi, klo and khi boundaries. + IndexManager(const RCP > comm, const bool coupled, + const bool singleCoarsePoint, const int NumDimensions, + const int interpolationOrder, const Array GFineNodesPerDir, + const Array LFineNodesPerDir); - public: + virtual ~IndexManager() {} - IndexManager() = default; + //! Sets basic parameters used to compute indices on the mesh. + //! This method requires you to have set this->coarseRate and this->startIndices. + void computeMeshParameters(); - IndexManager(const RCP > comm, const bool coupled, - const bool singleCoarsePoint, const int NumDimensions, - const int interpolationOrder, const Array GFineNodesPerDir, - const Array LFineNodesPerDir); + virtual void computeGlobalCoarseParameters() = 0; - virtual ~IndexManager() {} + virtual void getGhostedNodesData(const RCP fineMap, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& ghostedNodeCoarseGIDs) const = 0; - //! Sets basic parameters used to compute indices on the mesh. - //! This method requires you to have set this->coarseRate and this->startIndices. - void computeMeshParameters(); + virtual void getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const = 0; - virtual void computeGlobalCoarseParameters() = 0; + bool isAggregationCoupled() const { return coupled_; } - virtual void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const = 0; + bool isSingleCoarsePoint() const { return singleCoarsePoint_; } - virtual void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const = 0; + int getNumDimensions() const { return numDimensions; } - bool isAggregationCoupled() const {return coupled_;} + int getInterpolationOrder() const { return interpolationOrder_; } - bool isSingleCoarsePoint() const {return singleCoarsePoint_;} + GO getNumGlobalFineNodes() const { return gNumFineNodes; } - int getNumDimensions() const {return numDimensions;} + GO getNumGlobalCoarseNodes() const { return gNumCoarseNodes; } - int getInterpolationOrder() const {return interpolationOrder_;} + LO getNumLocalFineNodes() const { return lNumFineNodes; } - GO getNumGlobalFineNodes() const {return gNumFineNodes;} + LO getNumLocalCoarseNodes() const { return lNumCoarseNodes; } - GO getNumGlobalCoarseNodes() const {return gNumCoarseNodes;} + LO getNumLocalGhostedNodes() const { return numGhostedNodes; } - LO getNumLocalFineNodes() const {return lNumFineNodes;} + Array getCoarseningRates() const { return coarseRate; } - LO getNumLocalCoarseNodes() const {return lNumCoarseNodes;} + int getCoarseningRate(const int dim) const { return coarseRate[dim]; } - LO getNumLocalGhostedNodes() const {return numGhostedNodes;} + Array getCoarseningEndRates() const { return endRate; } - Array getCoarseningRates() const {return coarseRate;} + int getCoarseningEndRate(const int dim) const { return endRate[dim]; } - int getCoarseningRate(const int dim) const {return coarseRate[dim];} + bool getMeshEdge(const int dir) const { return meshEdge[dir]; } - Array getCoarseningEndRates() const {return endRate;} + bool getGhostInterface(const int dir) const { return ghostInterface[dir]; } - int getCoarseningEndRate(const int dim) const {return endRate[dim];} + Array getOffsets() const { return offsets; } - bool getMeshEdge(const int dir) const {return meshEdge[dir];} + LO getOffset(int const dim) const { return offsets[dim]; } - bool getGhostInterface(const int dir) const {return ghostInterface[dir];} + Array getCoarseNodeOffsets() const { return coarseNodeOffsets; } - Array getOffsets() const {return offsets;} + LO getCoarseNodeOffset(int const dim) const { return coarseNodeOffsets[dim]; } - LO getOffset(int const dim) const {return offsets[dim];} + Array getStartIndices() const { return startIndices; } - Array getCoarseNodeOffsets() const {return coarseNodeOffsets;} + GO getStartIndex(int const dim) const { return startIndices[dim]; } - LO getCoarseNodeOffset(int const dim) const {return coarseNodeOffsets[dim];} + Array getStartGhostedCoarseNodes() const { return startGhostedCoarseNode; } - Array getStartIndices() const {return startIndices;} + GO getStartGhostedCoarseNode(int const dim) const { return startGhostedCoarseNode[dim]; } - GO getStartIndex(int const dim) const {return startIndices[dim];} + Array getLocalFineNodesPerDir() const { return lFineNodesPerDir; } - Array getStartGhostedCoarseNodes() const {return startGhostedCoarseNode;} + LO getLocalFineNodesInDir(const int dim) const { return lFineNodesPerDir[dim]; } - GO getStartGhostedCoarseNode(int const dim) const {return startGhostedCoarseNode[dim];} + Array getGlobalFineNodesPerDir() const { return gFineNodesPerDir; } - Array getLocalFineNodesPerDir() const {return lFineNodesPerDir;} + GO getGlobalFineNodesInDir(const int dim) const { return gFineNodesPerDir[dim]; } - LO getLocalFineNodesInDir(const int dim) const {return lFineNodesPerDir[dim];} + Array getLocalCoarseNodesPerDir() const { return lCoarseNodesPerDir; } - Array getGlobalFineNodesPerDir() const {return gFineNodesPerDir;} + LO getLocalCoarseNodesInDir(const int dim) const { return lCoarseNodesPerDir[dim]; } - GO getGlobalFineNodesInDir(const int dim) const {return gFineNodesPerDir[dim];} + Array getGlobalCoarseNodesPerDir() const { return gCoarseNodesPerDir; } - Array getLocalCoarseNodesPerDir() const {return lCoarseNodesPerDir;} + GO getGlobalCoarseNodesInDir(const int dim) const { return gCoarseNodesPerDir[dim]; } - LO getLocalCoarseNodesInDir(const int dim) const {return lCoarseNodesPerDir[dim];} + Array getGhostedNodesPerDir() const { return ghostedNodesPerDir; } - Array getGlobalCoarseNodesPerDir() const {return gCoarseNodesPerDir;} + LO getGhostedNodesInDir(const int dim) const { return ghostedNodesPerDir[dim]; } - GO getGlobalCoarseNodesInDir(const int dim) const {return gCoarseNodesPerDir[dim];} + virtual std::vector > getCoarseMeshData() const = 0; - Array getGhostedNodesPerDir() const {return ghostedNodesPerDir;} + virtual void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const = 0; - LO getGhostedNodesInDir(const int dim) const {return ghostedNodesPerDir[dim];} + virtual void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; - virtual std::vector > getCoarseMeshData() const = 0; + virtual void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; - virtual void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const = 0; + virtual void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const = 0; - virtual void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; + virtual void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - virtual void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; + virtual void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const = 0; - virtual void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const = 0; + virtual void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; - virtual void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const = 0; - virtual void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const = 0; + virtual void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - virtual void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; + virtual void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - virtual void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const = 0; + virtual void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - virtual void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - virtual void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; +}; - virtual void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - - virtual void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - - virtual void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; - - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_INDEXMANAGER_SHORT -#endif // MUELU_INDEXMANAGER_DECL_HPP +#endif // MUELU_INDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp index 894e26393906..0d46513dfebf 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp @@ -57,220 +57,233 @@ namespace MueLu { - template - IndexManager:: - IndexManager(const RCP > comm, - const bool coupled, - const bool singleCoarsePoint, - const int NumDimensions, - const int interpolationOrder, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir) : - comm_(comm), coupled_(coupled), singleCoarsePoint_(singleCoarsePoint), - numDimensions(NumDimensions), interpolationOrder_(interpolationOrder), - gFineNodesPerDir(GFineNodesPerDir), lFineNodesPerDir(LFineNodesPerDir) { +template +IndexManager:: + IndexManager(const RCP > comm, + const bool coupled, + const bool singleCoarsePoint, + const int NumDimensions, + const int interpolationOrder, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir) + : comm_(comm) + , coupled_(coupled) + , singleCoarsePoint_(singleCoarsePoint) + , numDimensions(NumDimensions) + , interpolationOrder_(interpolationOrder) + , gFineNodesPerDir(GFineNodesPerDir) + , lFineNodesPerDir(LFineNodesPerDir) { + coarseRate.resize(3); + endRate.resize(3); + gCoarseNodesPerDir.resize(3); + lCoarseNodesPerDir.resize(3); + ghostedNodesPerDir.resize(3); - coarseRate.resize(3); - endRate.resize(3); - gCoarseNodesPerDir.resize(3); - lCoarseNodesPerDir.resize(3); - ghostedNodesPerDir.resize(3); + offsets.resize(3); + coarseNodeOffsets.resize(3); + startIndices.resize(6); + startGhostedCoarseNode.resize(3); - offsets.resize(3); - coarseNodeOffsets.resize(3); - startIndices.resize(6); - startGhostedCoarseNode.resize(3); +} // Constructor - } // Constructor +template +void IndexManager:: + computeMeshParameters() { + RCP out; + if (const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - template - void IndexManager:: - computeMeshParameters() { + if (coupled_) { + gNumFineNodes10 = gFineNodesPerDir[1] * gFineNodesPerDir[0]; + gNumFineNodes = gFineNodesPerDir[2] * gNumFineNodes10; + } else { + gNumFineNodes10 = Teuchos::OrdinalTraits::invalid(); + gNumFineNodes = Teuchos::OrdinalTraits::invalid(); + } + lNumFineNodes10 = lFineNodesPerDir[1] * lFineNodesPerDir[0]; + lNumFineNodes = lFineNodesPerDir[2] * lNumFineNodes10; + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + if (coupled_) { + if (startIndices[dim] == 0) { + meshEdge[2 * dim] = true; + } + if (startIndices[dim + 3] + 1 == gFineNodesPerDir[dim]) { + meshEdge[2 * dim + 1] = true; + endRate[dim] = startIndices[dim + 3] % coarseRate[dim]; + } + } else { // With uncoupled problem each rank might require a different endRate + meshEdge[2 * dim] = true; + meshEdge[2 * dim + 1] = true; + endRate[dim] = (lFineNodesPerDir[dim] - 1) % coarseRate[dim]; + } + if (endRate[dim] == 0) { + endRate[dim] = coarseRate[dim]; + } - RCP out; - if(const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } + // If uncoupled aggregation is used, offsets[dim] = 0, so nothing to do. + if (coupled_) { + offsets[dim] = Teuchos::as(startIndices[dim]) % coarseRate[dim]; + if (offsets[dim] == 0) { + coarseNodeOffsets[dim] = 0; + } else if (startIndices[dim] + endRate[dim] == lFineNodesPerDir[dim]) { + coarseNodeOffsets[dim] = endRate[dim] - offsets[dim]; + } else { + coarseNodeOffsets[dim] = coarseRate[dim] - offsets[dim]; + } - if(coupled_) { - gNumFineNodes10 = gFineNodesPerDir[1]*gFineNodesPerDir[0]; - gNumFineNodes = gFineNodesPerDir[2]*gNumFineNodes10; - } else { - gNumFineNodes10 = Teuchos::OrdinalTraits::invalid(); - gNumFineNodes = Teuchos::OrdinalTraits::invalid(); - } - lNumFineNodes10 = lFineNodesPerDir[1]*lFineNodesPerDir[0]; - lNumFineNodes = lFineNodesPerDir[2]*lNumFineNodes10; - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - if(coupled_) { - if(startIndices[dim] == 0) { - meshEdge[2*dim] = true; + if (interpolationOrder_ == 0) { + int rem = startIndices[dim] % coarseRate[dim]; + if ((rem != 0) && (rem <= Teuchos::as(coarseRate[dim]) / 2.0)) { + ghostInterface[2 * dim] = true; } - if(startIndices[dim + 3] + 1 == gFineNodesPerDir[dim]) { - meshEdge[2*dim + 1] = true; - endRate[dim] = startIndices[dim + 3] % coarseRate[dim]; + rem = startIndices[dim + 3] % coarseRate[dim]; + // uncoupled by nature does not require ghosts nodes + if (coupled_ && (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && + (rem > Teuchos::as(coarseRate[dim]) / 2.0)) { + ghostInterface[2 * dim + 1] = true; } - } else { // With uncoupled problem each rank might require a different endRate - meshEdge[2*dim] = true; - meshEdge[2*dim + 1] = true; - endRate[dim] = (lFineNodesPerDir[dim] - 1) % coarseRate[dim]; - } - if(endRate[dim] == 0) {endRate[dim] = coarseRate[dim];} - // If uncoupled aggregation is used, offsets[dim] = 0, so nothing to do. - if(coupled_) { - offsets[dim] = Teuchos::as(startIndices[dim]) % coarseRate[dim]; - if(offsets[dim] == 0) { - coarseNodeOffsets[dim] = 0; - } else if(startIndices[dim] + endRate[dim] == lFineNodesPerDir[dim]) { - coarseNodeOffsets[dim] = endRate[dim] - offsets[dim]; - } else { - coarseNodeOffsets[dim] = coarseRate[dim] - offsets[dim]; + } else if (interpolationOrder_ == 1) { + if (coupled_ && (startIndices[dim] % coarseRate[dim] != 0 || + startIndices[dim] == gFineNodesPerDir[dim] - 1)) { + ghostInterface[2 * dim] = true; } - - if(interpolationOrder_ == 0) { - int rem = startIndices[dim] % coarseRate[dim]; - if( (rem != 0) && (rem <= Teuchos::as(coarseRate[dim]) / 2.0)) { - ghostInterface[2*dim] = true; - } - rem = startIndices[dim + 3] % coarseRate[dim]; - // uncoupled by nature does not require ghosts nodes - if(coupled_ && (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && - (rem > Teuchos::as(coarseRate[dim]) / 2.0)) { - ghostInterface[2*dim + 1] = true; - } - - } else if(interpolationOrder_ == 1) { - if(coupled_ && (startIndices[dim] % coarseRate[dim] != 0 || - startIndices[dim] == gFineNodesPerDir[dim]-1)) { - ghostInterface[2*dim] = true; - } - if(coupled_ && (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && - ((lFineNodesPerDir[dim] == 1) || (startIndices[dim + 3] % coarseRate[dim] != 0))) { - ghostInterface[2*dim+1] = true; - } + if (coupled_ && (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && + ((lFineNodesPerDir[dim] == 1) || (startIndices[dim + 3] % coarseRate[dim] != 0))) { + ghostInterface[2 * dim + 1] = true; } } - } else { // Default value for dim >= numDimensions - endRate[dim] = 1; } + } else { // Default value for dim >= numDimensions + endRate[dim] = 1; } + } - *out << "singleCoarsePoint? " << singleCoarsePoint_ << std::endl; - *out << "gFineNodesPerDir: " << gFineNodesPerDir << std::endl; - *out << "lFineNodesPerDir: " << lFineNodesPerDir << std::endl; - *out << "endRate: " << endRate << std::endl; - *out << "ghostInterface: {" << ghostInterface[0] << ", " << ghostInterface[1] << ", " - << ghostInterface[2] << ", " << ghostInterface[3] << ", " << ghostInterface[4] << ", " - << ghostInterface[5] << "}" << std::endl; - *out << "meshEdge: {" << meshEdge[0] << ", " << meshEdge[1] << ", " - << meshEdge[2] << ", " << meshEdge[3] << ", " << meshEdge[4] << ", " - << meshEdge[5] << "}" << std::endl; - *out << "startIndices: " << startIndices << std::endl; - *out << "offsets: " << offsets << std::endl; - *out << "coarseNodeOffsets: " << coarseNodeOffsets << std::endl; + *out << "singleCoarsePoint? " << singleCoarsePoint_ << std::endl; + *out << "gFineNodesPerDir: " << gFineNodesPerDir << std::endl; + *out << "lFineNodesPerDir: " << lFineNodesPerDir << std::endl; + *out << "endRate: " << endRate << std::endl; + *out << "ghostInterface: {" << ghostInterface[0] << ", " << ghostInterface[1] << ", " + << ghostInterface[2] << ", " << ghostInterface[3] << ", " << ghostInterface[4] << ", " + << ghostInterface[5] << "}" << std::endl; + *out << "meshEdge: {" << meshEdge[0] << ", " << meshEdge[1] << ", " + << meshEdge[2] << ", " << meshEdge[3] << ", " << meshEdge[4] << ", " + << meshEdge[5] << "}" << std::endl; + *out << "startIndices: " << startIndices << std::endl; + *out << "offsets: " << offsets << std::endl; + *out << "coarseNodeOffsets: " << coarseNodeOffsets << std::endl; - // Here one element can represent either the degenerate case of one node or the more general - // case of two nodes, i.e. x---x is a 1D element with two nodes and x is a 1D element with - // one node. This helps generating a 3D space from tensorial products... - // A good way to handle this would be to generalize the algorithm to take into account the - // discretization order used in each direction, at least in the FEM sense, since a 0 degree - // discretization will have a unique node per element. This way 1D discretization can be - // viewed as a 3D problem with one 0 degree element in the y direction and one 0 degre - // element in the z direction. - // !!! Operations below are aftecting both local and global values that have two !!! - // different orientations. Orientations can be interchanged using mapDirG2L and mapDirL2G. - // coarseRate, endRate and offsets are in the global basis, as well as all the variables - // starting with a g. - // !!! while the variables starting with an l are in the local basis. !!! - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - // Check whether the partition includes the "end" of the mesh which means that endRate - // will apply. Also make sure that endRate is not 0 which means that the mesh does not - // require a particular treatment at the boundaries. - if( meshEdge[2*dim + 1] ) { - lCoarseNodesPerDir[dim] = (lFineNodesPerDir[dim] - endRate[dim] + offsets[dim] - 1) - / coarseRate[dim] + 1; - if(offsets[dim] == 0) {++lCoarseNodesPerDir[dim];} - // We might want to coarsening the direction - // into a single layer if there are not enough - // points left to form two aggregates - if(singleCoarsePoint_ && lFineNodesPerDir[dim] - 1 < coarseRate[dim]) { - lCoarseNodesPerDir[dim] =1; - } - } else { - lCoarseNodesPerDir[dim] = (lFineNodesPerDir[dim] + offsets[dim] - 1) / coarseRate[dim]; - if(offsets[dim] == 0) {++lCoarseNodesPerDir[dim];} + // Here one element can represent either the degenerate case of one node or the more general + // case of two nodes, i.e. x---x is a 1D element with two nodes and x is a 1D element with + // one node. This helps generating a 3D space from tensorial products... + // A good way to handle this would be to generalize the algorithm to take into account the + // discretization order used in each direction, at least in the FEM sense, since a 0 degree + // discretization will have a unique node per element. This way 1D discretization can be + // viewed as a 3D problem with one 0 degree element in the y direction and one 0 degre + // element in the z direction. + // !!! Operations below are aftecting both local and global values that have two !!! + // different orientations. Orientations can be interchanged using mapDirG2L and mapDirL2G. + // coarseRate, endRate and offsets are in the global basis, as well as all the variables + // starting with a g. + // !!! while the variables starting with an l are in the local basis. !!! + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + // Check whether the partition includes the "end" of the mesh which means that endRate + // will apply. Also make sure that endRate is not 0 which means that the mesh does not + // require a particular treatment at the boundaries. + if (meshEdge[2 * dim + 1]) { + lCoarseNodesPerDir[dim] = (lFineNodesPerDir[dim] - endRate[dim] + offsets[dim] - 1) / coarseRate[dim] + 1; + if (offsets[dim] == 0) { + ++lCoarseNodesPerDir[dim]; } - - // The first branch of this if-statement will be used if the rank contains only one layer - // of nodes in direction i, that layer must also coincide with the boundary of the mesh - // and coarseRate[i] == endRate[i]... - if(interpolationOrder_ == 0) { - startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; - int rem = startIndices[dim] % coarseRate[dim]; - if(rem > (Teuchos::as(coarseRate[dim]) / 2.0) ) { - ++startGhostedCoarseNode[dim]; - } - } else { - if((startIndices[dim] == gFineNodesPerDir[dim] - 1) && - (startIndices[dim] % coarseRate[dim] == 0)) { - startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim] - 1; - } else { - startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; - } + // We might want to coarsening the direction + // into a single layer if there are not enough + // points left to form two aggregates + if (singleCoarsePoint_ && lFineNodesPerDir[dim] - 1 < coarseRate[dim]) { + lCoarseNodesPerDir[dim] = 1; } + } else { + lCoarseNodesPerDir[dim] = (lFineNodesPerDir[dim] + offsets[dim] - 1) / coarseRate[dim]; + if (offsets[dim] == 0) { + ++lCoarseNodesPerDir[dim]; + } + } - // This array is passed to the RAPFactory and eventually becomes gFineNodePerDir on the next - // level. - gCoarseNodesPerDir[dim] = (gFineNodesPerDir[dim] - 1) / coarseRate[dim]; - if((gFineNodesPerDir[dim] - 1) % coarseRate[dim] == 0) { - ++gCoarseNodesPerDir[dim]; + // The first branch of this if-statement will be used if the rank contains only one layer + // of nodes in direction i, that layer must also coincide with the boundary of the mesh + // and coarseRate[i] == endRate[i]... + if (interpolationOrder_ == 0) { + startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; + int rem = startIndices[dim] % coarseRate[dim]; + if (rem > (Teuchos::as(coarseRate[dim]) / 2.0)) { + ++startGhostedCoarseNode[dim]; + } + } else { + if ((startIndices[dim] == gFineNodesPerDir[dim] - 1) && + (startIndices[dim] % coarseRate[dim] == 0)) { + startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim] - 1; } else { - gCoarseNodesPerDir[dim] += 2; + startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; } - } else { // Default value for dim >= numDimensions - // endRate[dim] = 1; - gCoarseNodesPerDir[dim] = 1; - lCoarseNodesPerDir[dim] = 1; - } // if (dim < numDimensions) - - // This would happen if the rank does not own any nodes but in that case a subcommunicator - // should be used so this should really not be a concern. - if(lFineNodesPerDir[dim] < 1) {lCoarseNodesPerDir[dim] = 0;} - ghostedNodesPerDir[dim] = lCoarseNodesPerDir[dim]; - // Check whether face *low needs ghost nodes - if(ghostInterface[2*dim]) {ghostedNodesPerDir[dim] += 1;} - // Check whether face *hi needs ghost nodes - if(ghostInterface[2*dim + 1]) {ghostedNodesPerDir[dim] += 1;} - } // Loop for dim=0:3 + } - // With uncoupled aggregation we need to communicate to compute the global number of coarse points - if(!coupled_) { - for(int dim = 0; dim < 3; ++dim) { - gCoarseNodesPerDir[dim] = -1; + // This array is passed to the RAPFactory and eventually becomes gFineNodePerDir on the next + // level. + gCoarseNodesPerDir[dim] = (gFineNodesPerDir[dim] - 1) / coarseRate[dim]; + if ((gFineNodesPerDir[dim] - 1) % coarseRate[dim] == 0) { + ++gCoarseNodesPerDir[dim]; + } else { + gCoarseNodesPerDir[dim] += 2; } - } + } else { // Default value for dim >= numDimensions + // endRate[dim] = 1; + gCoarseNodesPerDir[dim] = 1; + lCoarseNodesPerDir[dim] = 1; + } // if (dim < numDimensions) - // Compute cummulative values - lNumCoarseNodes10 = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1]; - lNumCoarseNodes = lNumCoarseNodes10*lCoarseNodesPerDir[2]; - numGhostedNodes10 = ghostedNodesPerDir[1]*ghostedNodesPerDir[0]; - numGhostedNodes = numGhostedNodes10*ghostedNodesPerDir[2]; - numGhostNodes = numGhostedNodes - lNumCoarseNodes; + // This would happen if the rank does not own any nodes but in that case a subcommunicator + // should be used so this should really not be a concern. + if (lFineNodesPerDir[dim] < 1) { + lCoarseNodesPerDir[dim] = 0; + } + ghostedNodesPerDir[dim] = lCoarseNodesPerDir[dim]; + // Check whether face *low needs ghost nodes + if (ghostInterface[2 * dim]) { + ghostedNodesPerDir[dim] += 1; + } + // Check whether face *hi needs ghost nodes + if (ghostInterface[2 * dim + 1]) { + ghostedNodesPerDir[dim] += 1; + } + } // Loop for dim=0:3 - *out << "lCoarseNodesPerDir: " << lCoarseNodesPerDir << std::endl; - *out << "gCoarseNodesPerDir: " << gCoarseNodesPerDir << std::endl; - *out << "ghostedNodesPerDir: " << ghostedNodesPerDir << std::endl; - *out << "lNumCoarseNodes=" << lNumCoarseNodes << std::endl; - *out << "numGhostedNodes=" << numGhostedNodes << std::endl; + // With uncoupled aggregation we need to communicate to compute the global number of coarse points + if (!coupled_) { + for (int dim = 0; dim < 3; ++dim) { + gCoarseNodesPerDir[dim] = -1; + } } -} //namespace MueLu + // Compute cummulative values + lNumCoarseNodes10 = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1]; + lNumCoarseNodes = lNumCoarseNodes10 * lCoarseNodesPerDir[2]; + numGhostedNodes10 = ghostedNodesPerDir[1] * ghostedNodesPerDir[0]; + numGhostedNodes = numGhostedNodes10 * ghostedNodesPerDir[2]; + numGhostNodes = numGhostedNodes - lNumCoarseNodes; + + *out << "lCoarseNodesPerDir: " << lCoarseNodesPerDir << std::endl; + *out << "gCoarseNodesPerDir: " << gCoarseNodesPerDir << std::endl; + *out << "ghostedNodesPerDir: " << ghostedNodesPerDir << std::endl; + *out << "lNumCoarseNodes=" << lNumCoarseNodes << std::endl; + *out << "numGhostedNodes=" << numGhostedNodes << std::endl; +} + +} //namespace MueLu #define MUELU_INDEXMANAGER_SHORT -#endif // MUELU_INDEXMANAGER_DEF_HPP +#endif // MUELU_INDEXMANAGER_DEF_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp index 113368ad6f50..cba9858d2e31 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp @@ -53,7 +53,6 @@ #include "Teuchos_OrdinalTraits.hpp" - #include "MueLu_BaseClass.hpp" #include "MueLu_IndexManager_kokkos_fwd.hpp" @@ -74,112 +73,109 @@ namespace MueLu { spaces and it also provides utilites for coarsening. */ - template - class IndexManager_kokkos : public BaseClass { +template +class IndexManager_kokkos : public BaseClass { #undef MUELU_INDEXMANAGER_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using execution_space = typename Node::execution_space; - using memory_space = typename Node::memory_space; - using device_type = Kokkos::Device; - using intTupleView = typename Kokkos::View; - using LOTupleView = typename Kokkos::View; - - private: - - const int meshLayout = UNCOUPLED; - int myRank = -1; - int numDimensions; ///< Number of spacial dimensions in the problem - int interpolationOrder_; ///< Interpolation order used by grid transfer operators using these aggregates. - intTupleView coarseRate; ///< coarsening rate in each direction - intTupleView endRate; ///< adapted coarsening rate at the edge of the mesh in each direction. - - LO lNumFineNodes; ///< local number of nodes. - LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. - LOTupleView lFineNodesPerDir; ///< local number of nodes per direction. - - LO numCoarseNodes; ///< local number of nodes remaining after coarsening. - LO numCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after coarsening. - LOTupleView coarseNodesPerDir; ///< local number of nodes per direction remaing after coarsening. - - public: - - //! Default constructor, return empty object - IndexManager_kokkos() = default; - - //! Constructs for uncoupled meshes - IndexManager_kokkos(const int NumDimensions, - const int interpolationOrder, - const int MyRank, - const ArrayView LFineNodesPerDir, - const ArrayView CoarseRate); - - virtual ~IndexManager_kokkos() {} - - //! Common setup pattern used for all the different types of undelying mesh - void setupIM(const int NumDimensions, - const int interpolationOrder, - const ArrayView coarseRate, - const ArrayView LFineNodesPerDir); - - //! Sets basic parameters used to compute indices on the mesh. - //! This method requires you to have set this->coarseRate. - void computeMeshParameters(); - - int getNumDimensions() const {return numDimensions;} - - int getInterpolationOrder() const {return interpolationOrder_;} - - LO getNumLocalFineNodes() const {return lNumFineNodes;} - - LO getNumCoarseNodes() const {return numCoarseNodes;} - - KOKKOS_INLINE_FUNCTION - intTupleView getCoarseningRates() const {return coarseRate;} - - KOKKOS_INLINE_FUNCTION - intTupleView getCoarseningEndRates() const {return endRate;} - - KOKKOS_INLINE_FUNCTION - LOTupleView getLocalFineNodesPerDir() const {return lFineNodesPerDir;} - - KOKKOS_INLINE_FUNCTION - LOTupleView getCoarseNodesPerDir() const {return coarseNodesPerDir;} - - Array getCoarseNodesPerDirArray() const; - - KOKKOS_INLINE_FUNCTION - void getFineLID2FineTuple(const LO myLID, LO (&tuple)[3]) const { - LO tmp; - tuple[2] = myLID / (lFineNodesPerDir(1)*lFineNodesPerDir(0)); - tmp = myLID % (lFineNodesPerDir(1)*lFineNodesPerDir(0)); - tuple[1] = tmp / lFineNodesPerDir(0); - tuple[0] = tmp % lFineNodesPerDir(0); - } // getFineNodeLocalTuple - - KOKKOS_INLINE_FUNCTION - void getFineTuple2FineLID(const LO tuple[3], LO& myLID) const { - myLID = tuple[2]*lNumFineNodes10 + tuple[1]*lFineNodesPerDir[0] + tuple[0]; - } // getFineNodeLID - - KOKKOS_INLINE_FUNCTION - void getCoarseLID2CoarseTuple(const LO myLID, LO (&tuple)[3]) const { - LO tmp; - tuple[2] = myLID / numCoarseNodes10; - tmp = myLID % numCoarseNodes10; - tuple[1] = tmp / coarseNodesPerDir[0]; - tuple[0] = tmp % coarseNodesPerDir[0]; - } // getCoarseNodeLocalTuple - - KOKKOS_INLINE_FUNCTION - void getCoarseTuple2CoarseLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*numCoarseNodes10 + j*coarseNodesPerDir[0] + i; - } // getCoarseNodeLID - - }; - -} //namespace MueLu + public: + using execution_space = typename Node::execution_space; + using memory_space = typename Node::memory_space; + using device_type = Kokkos::Device; + using intTupleView = typename Kokkos::View; + using LOTupleView = typename Kokkos::View; + + private: + const int meshLayout = UNCOUPLED; + int myRank = -1; + int numDimensions; ///< Number of spacial dimensions in the problem + int interpolationOrder_; ///< Interpolation order used by grid transfer operators using these aggregates. + intTupleView coarseRate; ///< coarsening rate in each direction + intTupleView endRate; ///< adapted coarsening rate at the edge of the mesh in each direction. + + LO lNumFineNodes; ///< local number of nodes. + LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. + LOTupleView lFineNodesPerDir; ///< local number of nodes per direction. + + LO numCoarseNodes; ///< local number of nodes remaining after coarsening. + LO numCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after coarsening. + LOTupleView coarseNodesPerDir; ///< local number of nodes per direction remaing after coarsening. + + public: + //! Default constructor, return empty object + IndexManager_kokkos() = default; + + //! Constructs for uncoupled meshes + IndexManager_kokkos(const int NumDimensions, + const int interpolationOrder, + const int MyRank, + const ArrayView LFineNodesPerDir, + const ArrayView CoarseRate); + + virtual ~IndexManager_kokkos() {} + + //! Common setup pattern used for all the different types of undelying mesh + void setupIM(const int NumDimensions, + const int interpolationOrder, + const ArrayView coarseRate, + const ArrayView LFineNodesPerDir); + + //! Sets basic parameters used to compute indices on the mesh. + //! This method requires you to have set this->coarseRate. + void computeMeshParameters(); + + int getNumDimensions() const { return numDimensions; } + + int getInterpolationOrder() const { return interpolationOrder_; } + + LO getNumLocalFineNodes() const { return lNumFineNodes; } + + LO getNumCoarseNodes() const { return numCoarseNodes; } + + KOKKOS_INLINE_FUNCTION + intTupleView getCoarseningRates() const { return coarseRate; } + + KOKKOS_INLINE_FUNCTION + intTupleView getCoarseningEndRates() const { return endRate; } + + KOKKOS_INLINE_FUNCTION + LOTupleView getLocalFineNodesPerDir() const { return lFineNodesPerDir; } + + KOKKOS_INLINE_FUNCTION + LOTupleView getCoarseNodesPerDir() const { return coarseNodesPerDir; } + + Array getCoarseNodesPerDirArray() const; + + KOKKOS_INLINE_FUNCTION + void getFineLID2FineTuple(const LO myLID, LO (&tuple)[3]) const { + LO tmp; + tuple[2] = myLID / (lFineNodesPerDir(1) * lFineNodesPerDir(0)); + tmp = myLID % (lFineNodesPerDir(1) * lFineNodesPerDir(0)); + tuple[1] = tmp / lFineNodesPerDir(0); + tuple[0] = tmp % lFineNodesPerDir(0); + } // getFineNodeLocalTuple + + KOKKOS_INLINE_FUNCTION + void getFineTuple2FineLID(const LO tuple[3], LO& myLID) const { + myLID = tuple[2] * lNumFineNodes10 + tuple[1] * lFineNodesPerDir[0] + tuple[0]; + } // getFineNodeLID + + KOKKOS_INLINE_FUNCTION + void getCoarseLID2CoarseTuple(const LO myLID, LO (&tuple)[3]) const { + LO tmp; + tuple[2] = myLID / numCoarseNodes10; + tmp = myLID % numCoarseNodes10; + tuple[1] = tmp / coarseNodesPerDir[0]; + tuple[0] = tmp % coarseNodesPerDir[0]; + } // getCoarseNodeLocalTuple + + KOKKOS_INLINE_FUNCTION + void getCoarseTuple2CoarseLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * numCoarseNodes10 + j * coarseNodesPerDir[0] + i; + } // getCoarseNodeLID +}; + +} //namespace MueLu #define MUELU_INDEXMANAGER_KOKKOS_SHORT -#endif // MUELU_INDEXMANAGER_KOKKOS_DECL_HPP +#endif // MUELU_INDEXMANAGER_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp index f31f94421d86..45f58c4de3df 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp @@ -63,171 +63,172 @@ namespace MueLu { - template - IndexManager_kokkos:: - IndexManager_kokkos(const int NumDimensions, - const int interpolationOrder, - const int MyRank, - const ArrayView LFineNodesPerDir, - const ArrayView CoarseRate) : - myRank(MyRank), coarseRate("coarsening rate"), endRate("endRate"), - lFineNodesPerDir("lFineNodesPerDir"), coarseNodesPerDir("lFineNodesPerDir") { - - RCP out; - if(const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - setupIM(NumDimensions, interpolationOrder, CoarseRate, LFineNodesPerDir); +template +IndexManager_kokkos:: + IndexManager_kokkos(const int NumDimensions, + const int interpolationOrder, + const int MyRank, + const ArrayView LFineNodesPerDir, + const ArrayView CoarseRate) + : myRank(MyRank) + , coarseRate("coarsening rate") + , endRate("endRate") + , lFineNodesPerDir("lFineNodesPerDir") + , coarseNodesPerDir("lFineNodesPerDir") { + RCP out; + if (const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - *out << "Done setting up the IndexManager" << std::endl; + setupIM(NumDimensions, interpolationOrder, CoarseRate, LFineNodesPerDir); - computeMeshParameters(); + *out << "Done setting up the IndexManager" << std::endl; - *out << "Computed Mesh Parameters" << std::endl; + computeMeshParameters(); - } // IndexManager_kokkos Constructor + *out << "Computed Mesh Parameters" << std::endl; - template - void IndexManager_kokkos:: - setupIM(const int NumDimensions, const int interpolationOrder, - const ArrayView CoarseRate, const ArrayView LFineNodesPerDir) { +} // IndexManager_kokkos Constructor - numDimensions = NumDimensions; - interpolationOrder_ = interpolationOrder; +template +void IndexManager_kokkos:: + setupIM(const int NumDimensions, const int interpolationOrder, + const ArrayView CoarseRate, const ArrayView LFineNodesPerDir) { + numDimensions = NumDimensions; + interpolationOrder_ = interpolationOrder; - TEUCHOS_TEST_FOR_EXCEPTION((LFineNodesPerDir.size() != 3) - && (LFineNodesPerDir.size() != numDimensions), - Exceptions::RuntimeError, - "LFineNodesPerDir has to be of size 3 or of size numDimensions!"); + TEUCHOS_TEST_FOR_EXCEPTION((LFineNodesPerDir.size() != 3) && (LFineNodesPerDir.size() != numDimensions), + Exceptions::RuntimeError, + "LFineNodesPerDir has to be of size 3 or of size numDimensions!"); - typename Kokkos::View::HostMirror lFineNodesPerDir_h = Kokkos::create_mirror_view(lFineNodesPerDir); - Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); - typename Kokkos::View::HostMirror coarseRate_h = Kokkos::create_mirror_view(coarseRate); - Kokkos::deep_copy(coarseRate_h, coarseRate); + typename Kokkos::View::HostMirror lFineNodesPerDir_h = Kokkos::create_mirror_view(lFineNodesPerDir); + Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); + typename Kokkos::View::HostMirror coarseRate_h = Kokkos::create_mirror_view(coarseRate); + Kokkos::deep_copy(coarseRate_h, coarseRate); - // Load coarse rate, being careful about formating - // Also load lFineNodesPerDir - for(int dim = 0; dim < 3; ++dim) { - if(dim < getNumDimensions()) { - lFineNodesPerDir_h(dim) = LFineNodesPerDir[dim]; - if(CoarseRate.size() == 1) { - coarseRate_h(dim) = CoarseRate[0]; - } else if(CoarseRate.size() == getNumDimensions()) { - coarseRate_h(dim) = CoarseRate[dim]; - } - } else { - lFineNodesPerDir_h(dim) = 1; - coarseRate_h(dim) = 1; + // Load coarse rate, being careful about formating + // Also load lFineNodesPerDir + for (int dim = 0; dim < 3; ++dim) { + if (dim < getNumDimensions()) { + lFineNodesPerDir_h(dim) = LFineNodesPerDir[dim]; + if (CoarseRate.size() == 1) { + coarseRate_h(dim) = CoarseRate[0]; + } else if (CoarseRate.size() == getNumDimensions()) { + coarseRate_h(dim) = CoarseRate[dim]; } - } - - Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); - Kokkos::deep_copy(coarseRate, coarseRate_h); - - } // setupIM - - template - void IndexManager_kokkos::computeMeshParameters() { - - RCP out; - if(const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + lFineNodesPerDir_h(dim) = 1; + coarseRate_h(dim) = 1; } + } - typename Kokkos::View::HostMirror coarseRate_h = Kokkos::create_mirror_view(coarseRate); - typename Kokkos::View::HostMirror endRate_h = Kokkos::create_mirror_view(endRate); - + Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); + Kokkos::deep_copy(coarseRate, coarseRate_h); - typename Kokkos::View::HostMirror lFineNodesPerDir_h = Kokkos::create_mirror_view(lFineNodesPerDir); - typename Kokkos::View::HostMirror coarseNodesPerDir_h = Kokkos::create_mirror_view(coarseNodesPerDir); - Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); - Kokkos::deep_copy(coarseRate_h, coarseRate); +} // setupIM - lNumFineNodes10 = lFineNodesPerDir_h(1)*lFineNodesPerDir_h(0); - lNumFineNodes = lFineNodesPerDir_h(2)*lNumFineNodes10; - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - endRate_h(dim) = (lFineNodesPerDir_h(dim) - 1) % coarseRate_h(dim); - if(endRate_h(dim) == 0) {endRate_h(dim) = coarseRate_h(dim);} +template +void IndexManager_kokkos::computeMeshParameters() { + RCP out; + if (const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - } else { // Default value for dim >= numDimensions - endRate_h(dim) = 1; + typename Kokkos::View::HostMirror coarseRate_h = Kokkos::create_mirror_view(coarseRate); + typename Kokkos::View::HostMirror endRate_h = Kokkos::create_mirror_view(endRate); + + typename Kokkos::View::HostMirror lFineNodesPerDir_h = Kokkos::create_mirror_view(lFineNodesPerDir); + typename Kokkos::View::HostMirror coarseNodesPerDir_h = Kokkos::create_mirror_view(coarseNodesPerDir); + Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); + Kokkos::deep_copy(coarseRate_h, coarseRate); + + lNumFineNodes10 = lFineNodesPerDir_h(1) * lFineNodesPerDir_h(0); + lNumFineNodes = lFineNodesPerDir_h(2) * lNumFineNodes10; + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + endRate_h(dim) = (lFineNodesPerDir_h(dim) - 1) % coarseRate_h(dim); + if (endRate_h(dim) == 0) { + endRate_h(dim) = coarseRate_h(dim); } - } - *out << "lFineNodesPerDir: {" << lFineNodesPerDir_h(0) << ", " << lFineNodesPerDir_h(1) << ", " - << lFineNodesPerDir_h(2) << "}" << std::endl; - *out << "endRate: {" << endRate_h(0) << ", " << endRate_h(1) << ", " - << endRate_h(2) << "}" << std::endl; - - // Here one element can represent either the degenerate case of one node or the more general - // case of two nodes, i.e. x---x is a 1D element with two nodes and x is a 1D element with - // one node. This helps generating a 3D space from tensorial products... - // A good way to handle this would be to generalize the algorithm to take into account the - // discretization order used in each direction, at least in the FEM sense, since a 0 degree - // discretization will have a unique node per element. This way 1D discretization can be - // viewed as a 3D problem with one 0 degree element in the y direction and one 0 degre - // element in the z direction. - // !!! Operations below are aftecting both local and global values that have two !!! - // different orientations. Orientations can be interchanged using mapDirG2L and mapDirL2G. - // coarseRate, endRate and offsets are in the global basis, as well as all the variables - // starting with a g. - // !!! while the variables starting with an l are in the local basis. !!! - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - // Check whether the partition includes the "end" of the mesh which means that endRate - // will apply. Also make sure that endRate is not 0 which means that the mesh does not - // require a particular treatment at the boundaries. - coarseNodesPerDir_h(dim) = (lFineNodesPerDir_h(dim) - endRate_h(dim) - 1) - / coarseRate_h(dim) + 2; - - } else { // Default value for dim >= numDimensions - // endRate[dim] = 1; - coarseNodesPerDir_h(dim) = 1; - } // if (dim < numDimensions) - - // This would happen if the rank does not own any nodes but in that case a subcommunicator - // should be used so this should really not be a concern. - if(lFineNodesPerDir_h(dim) < 1) {coarseNodesPerDir_h(dim) = 0;} - } // Loop for dim=0:3 - - // Compute cummulative values - numCoarseNodes10 = coarseNodesPerDir_h(0)*coarseNodesPerDir_h(1); - numCoarseNodes = numCoarseNodes10*coarseNodesPerDir_h(2); - - *out << "coarseNodesPerDir: {" << coarseNodesPerDir_h(0) << ", " - << coarseNodesPerDir_h(1) << ", " << coarseNodesPerDir_h(2) << "}" << std::endl; - *out << "numCoarseNodes=" << numCoarseNodes << std::endl; - - // Copy Host data to Device. - Kokkos::deep_copy(coarseRate, coarseRate_h); - Kokkos::deep_copy(endRate, endRate_h); - Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); - Kokkos::deep_copy(coarseNodesPerDir, coarseNodesPerDir_h); + } else { // Default value for dim >= numDimensions + endRate_h(dim) = 1; + } } - template - Array IndexManager_kokkos:: - getCoarseNodesPerDirArray() const { - typename LOTupleView::HostMirror coarseNodesPerDir_h = Kokkos::create_mirror_view(coarseNodesPerDir); - Kokkos::deep_copy(coarseNodesPerDir_h, coarseNodesPerDir); - Array coarseNodesPerDirArray(3); - - for(int dim = 0; dim < 3; ++dim) { - coarseNodesPerDirArray[dim] = coarseNodesPerDir_h(dim); + *out << "lFineNodesPerDir: {" << lFineNodesPerDir_h(0) << ", " << lFineNodesPerDir_h(1) << ", " + << lFineNodesPerDir_h(2) << "}" << std::endl; + *out << "endRate: {" << endRate_h(0) << ", " << endRate_h(1) << ", " + << endRate_h(2) << "}" << std::endl; + + // Here one element can represent either the degenerate case of one node or the more general + // case of two nodes, i.e. x---x is a 1D element with two nodes and x is a 1D element with + // one node. This helps generating a 3D space from tensorial products... + // A good way to handle this would be to generalize the algorithm to take into account the + // discretization order used in each direction, at least in the FEM sense, since a 0 degree + // discretization will have a unique node per element. This way 1D discretization can be + // viewed as a 3D problem with one 0 degree element in the y direction and one 0 degre + // element in the z direction. + // !!! Operations below are aftecting both local and global values that have two !!! + // different orientations. Orientations can be interchanged using mapDirG2L and mapDirL2G. + // coarseRate, endRate and offsets are in the global basis, as well as all the variables + // starting with a g. + // !!! while the variables starting with an l are in the local basis. !!! + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + // Check whether the partition includes the "end" of the mesh which means that endRate + // will apply. Also make sure that endRate is not 0 which means that the mesh does not + // require a particular treatment at the boundaries. + coarseNodesPerDir_h(dim) = (lFineNodesPerDir_h(dim) - endRate_h(dim) - 1) / coarseRate_h(dim) + 2; + + } else { // Default value for dim >= numDimensions + // endRate[dim] = 1; + coarseNodesPerDir_h(dim) = 1; + } // if (dim < numDimensions) + + // This would happen if the rank does not own any nodes but in that case a subcommunicator + // should be used so this should really not be a concern. + if (lFineNodesPerDir_h(dim) < 1) { + coarseNodesPerDir_h(dim) = 0; } + } // Loop for dim=0:3 + + // Compute cummulative values + numCoarseNodes10 = coarseNodesPerDir_h(0) * coarseNodesPerDir_h(1); + numCoarseNodes = numCoarseNodes10 * coarseNodesPerDir_h(2); + + *out << "coarseNodesPerDir: {" << coarseNodesPerDir_h(0) << ", " + << coarseNodesPerDir_h(1) << ", " << coarseNodesPerDir_h(2) << "}" << std::endl; + *out << "numCoarseNodes=" << numCoarseNodes << std::endl; + + // Copy Host data to Device. + Kokkos::deep_copy(coarseRate, coarseRate_h); + Kokkos::deep_copy(endRate, endRate_h); + Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); + Kokkos::deep_copy(coarseNodesPerDir, coarseNodesPerDir_h); +} + +template +Array IndexManager_kokkos:: + getCoarseNodesPerDirArray() const { + typename LOTupleView::HostMirror coarseNodesPerDir_h = Kokkos::create_mirror_view(coarseNodesPerDir); + Kokkos::deep_copy(coarseNodesPerDir_h, coarseNodesPerDir); + Array coarseNodesPerDirArray(3); + + for (int dim = 0; dim < 3; ++dim) { + coarseNodesPerDirArray[dim] = coarseNodesPerDir_h(dim); + } - return coarseNodesPerDirArray; - } // getCoarseNodesData + return coarseNodesPerDirArray; +} // getCoarseNodesData -} //namespace MueLu +} //namespace MueLu #define MUELU_INDEXMANAGER_KOKKOS_SHORT -#endif // MUELU_INDEXMANAGER_DEF_KOKKOS_HPP +#endif // MUELU_INDEXMANAGER_DEF_KOKKOS_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp index c43758ee8352..dadd50c145c7 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_STRUCTUREDAGGREGATIONFACTORY_DECL_HPP #define MUELU_STRUCTUREDAGGREGATIONFACTORY_DECL_HPP - // #include // #include // #include @@ -102,60 +101,59 @@ namespace MueLu { | Aggregates | StructuredAggregationFactory | Container class with aggregation information. See also Aggregates. */ - template - class StructuredAggregationFactory : public SingleLevelFactoryBase { +template +class StructuredAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_STRUCTUREDAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - StructuredAggregationFactory(); + public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~StructuredAggregationFactory() { } + //! Constructor. + StructuredAggregationFactory(); - RCP GetValidParameterList() const; + //! Destructor. + virtual ~StructuredAggregationFactory() {} - //@} + RCP GetValidParameterList() const; - //! @name Set/get methods. - //@{ - // set information about 1-node aggregates (map name and generating factory) - void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); - } + //@} - //@} + //! @name Set/get methods. + //@{ + // set information about 1-node aggregates (map name and generating factory) + void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { + SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); + } - //! Input - //@{ + //@} - void DeclareInput(Level& currentLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level& currentLevel) const; - //! @name Build methods. - //@{ + //@} - /*! @brief Build aggregates. */ - void Build(Level& currentLevel) const; + //! @name Build methods. + //@{ - //@} + /*! @brief Build aggregates. */ + void Build(Level& currentLevel) const; - private: + //@} - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; + private: + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - }; // class StructuredAggregationFactory +}; // class StructuredAggregationFactory -} +} // namespace MueLu #define MUELU_STRUCTUREDAGGREGATIONFACTORY_SHORT #endif /* MUELU_STRUCTUREDAGGREGATIONFACTORY_DECL_HPP */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp index 94685aefcef6..652811c93b60 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp @@ -63,173 +63,172 @@ namespace MueLu { - template - StructuredAggregationFactory:: - StructuredAggregationFactory() : bDefinitionPhase_(true) - { } +template +StructuredAggregationFactory:: + StructuredAggregationFactory() + : bDefinitionPhase_(true) {} - template - RCP StructuredAggregationFactory:: - GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP StructuredAggregationFactory:: + GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - - // general variables needed in StructuredAggregationFactory - SET_VALID_ENTRY("aggregation: mesh layout"); - SET_VALID_ENTRY("aggregation: mode"); - SET_VALID_ENTRY("aggregation: output type"); - SET_VALID_ENTRY("aggregation: coarsening rate"); - SET_VALID_ENTRY("aggregation: coarsening order"); -#undef SET_VALID_ENTRY - validParamList->set >("Graph", Teuchos::null, - "Graph of the matrix after amalgamation but without dropping."); - validParamList->set >("numDimensions", Teuchos::null, - "Number of spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("gNodesPerDim", Teuchos::null, - "Global number of nodes per spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("lNodesPerDim", Teuchos::null, - "Local number of nodes per spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("DofsPerNode", Teuchos::null, - "Generating factory for variable \'DofsPerNode\', usually the same as the \'Graph\' factory"); - validParamList->set("aggregation: single coarse point", false, - "Allows the aggreagtion process to reduce spacial dimensions to a single layer"); - - return validParamList; - } // GetValidParameterList() - - template - void StructuredAggregationFactory:: - DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - ParameterList pL = GetParameterList(); - std::string coupling = pL.get("aggregation: mode"); - const bool coupled = (coupling == "coupled" ? true : false); - if(coupled) { - // Request the global number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "gNodesPerDim was not provided by the user on level0!"); - } + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + + // general variables needed in StructuredAggregationFactory + SET_VALID_ENTRY("aggregation: mesh layout"); + SET_VALID_ENTRY("aggregation: mode"); + SET_VALID_ENTRY("aggregation: output type"); + SET_VALID_ENTRY("aggregation: coarsening rate"); + SET_VALID_ENTRY("aggregation: coarsening order"); +#undef SET_VALID_ENTRY + validParamList->set >("Graph", Teuchos::null, + "Graph of the matrix after amalgamation but without dropping."); + validParamList->set >("numDimensions", Teuchos::null, + "Number of spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set >("gNodesPerDim", Teuchos::null, + "Global number of nodes per spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set >("lNodesPerDim", Teuchos::null, + "Local number of nodes per spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set >("DofsPerNode", Teuchos::null, + "Generating factory for variable \'DofsPerNode\', usually the same as the \'Graph\' factory"); + validParamList->set("aggregation: single coarse point", false, + "Allows the aggreagtion process to reduce spacial dimensions to a single layer"); + + return validParamList; +} // GetValidParameterList() + +template +void StructuredAggregationFactory:: + DeclareInput(Level& currentLevel) const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + ParameterList pL = GetParameterList(); + std::string coupling = pL.get("aggregation: mode"); + const bool coupled = (coupling == "coupled" ? true : false); + if (coupled) { + // Request the global number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("gNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this); } else { - Input(currentLevel, "gNodesPerDim"); - } - } - - // Request the local number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("numDimensions", NoFactory::get())) { - currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), - Exceptions::RuntimeError, - "numDimensions was not provided by the user on level0!"); - } - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); + "gNodesPerDim was not provided by the user on level0!"); } } else { - Input(currentLevel, "numDimensions"); - Input(currentLevel, "lNodesPerDim"); + Input(currentLevel, "gNodesPerDim"); } - } // DeclareInput() - - template - void StructuredAggregationFactory:: - Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); + } - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); + // Request the local number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("numDimensions", NoFactory::get())) { + currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), + Exceptions::RuntimeError, + "numDimensions was not provided by the user on level0!"); } - - *out << "Entering structured aggregation" << std::endl; - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - // General problem informations are gathered from data stored in the problem matix. - RCP graph = Get< RCP >(currentLevel, "Graph"); - RCP fineMap = graph->GetDomainMap(); - const int myRank = fineMap->getComm()->getRank(); - const int numRanks = fineMap->getComm()->getSize(); - const GO minGlobalIndex = fineMap->getMinGlobalIndex(); - const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); - - // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to - // obtain a nodeMap. - const int interpolationOrder = pL.get("aggregation: coarsening order"); - std::string meshLayout = pL.get("aggregation: mesh layout"); - std::string coupling = pL.get("aggregation: mode"); - const bool coupled = (coupling == "coupled" ? true : false); - std::string outputType = pL.get("aggregation: output type"); - const bool outputAggregates = (outputType == "Aggregates" ? true : false); - const bool singleCoarsePoint = pL.get("aggregation: single coarse point"); - int numDimensions; - Array gFineNodesPerDir(3); - Array lFineNodesPerDir(3); - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); - lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); - if(coupled) { - gFineNodesPerDir = currentLevel.Get >("gNodesPerDim", NoFactory::get()); - } + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); } else { - // On level > 0, data is provided directly by generating factories. - numDimensions = Get(currentLevel, "numDimensions"); - lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); - if(coupled) { - gFineNodesPerDir = Get >(currentLevel, "gNodesPerDim"); - } + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); } - - - // First make sure that input parameters are set logically based on dimension - for(int dim = 0; dim < 3; ++dim) { - if(dim >= numDimensions) { - gFineNodesPerDir[dim] = 1; - lFineNodesPerDir[dim] = 1; - } + } else { + Input(currentLevel, "numDimensions"); + Input(currentLevel, "lNodesPerDim"); + } +} // DeclareInput() + +template +void StructuredAggregationFactory:: + Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + *out << "Entering structured aggregation" << std::endl; + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed + + // General problem informations are gathered from data stored in the problem matix. + RCP graph = Get >(currentLevel, "Graph"); + RCP fineMap = graph->GetDomainMap(); + const int myRank = fineMap->getComm()->getRank(); + const int numRanks = fineMap->getComm()->getSize(); + const GO minGlobalIndex = fineMap->getMinGlobalIndex(); + const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); + + // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to + // obtain a nodeMap. + const int interpolationOrder = pL.get("aggregation: coarsening order"); + std::string meshLayout = pL.get("aggregation: mesh layout"); + std::string coupling = pL.get("aggregation: mode"); + const bool coupled = (coupling == "coupled" ? true : false); + std::string outputType = pL.get("aggregation: output type"); + const bool outputAggregates = (outputType == "Aggregates" ? true : false); + const bool singleCoarsePoint = pL.get("aggregation: single coarse point"); + int numDimensions; + Array gFineNodesPerDir(3); + Array lFineNodesPerDir(3); + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated factory. + numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); + if (coupled) { + gFineNodesPerDir = currentLevel.Get >("gNodesPerDim", NoFactory::get()); } - - // Get the coarsening rate - std::string coarseningRate = pL.get("aggregation: coarsening rate"); - Teuchos::Array coarseRate; - try { - coarseRate = Teuchos::fromStringToArray(coarseningRate); - } catch(const Teuchos::InvalidArrayStringRepresentation& e) { - GetOStream(Errors,-1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " - << std::endl; - throw e; + } else { + // On level > 0, data is provided directly by generating factories. + numDimensions = Get(currentLevel, "numDimensions"); + lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); + if (coupled) { + gFineNodesPerDir = Get >(currentLevel, "gNodesPerDim"); } - TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), - Exceptions::RuntimeError, - "\"aggregation: coarsening rate\" must have at least as many" - " components as the number of spatial dimensions in the problem."); + } - // Now that we have extracted info from the level, create the IndexManager - RCP geoData; - if(!coupled) { - geoData = rcp(new MueLu::UncoupledIndexManager(fineMap->getComm(), + // First make sure that input parameters are set logically based on dimension + for (int dim = 0; dim < 3; ++dim) { + if (dim >= numDimensions) { + gFineNodesPerDir[dim] = 1; + lFineNodesPerDir[dim] = 1; + } + } + + // Get the coarsening rate + std::string coarseningRate = pL.get("aggregation: coarsening rate"); + Teuchos::Array coarseRate; + try { + coarseRate = Teuchos::fromStringToArray(coarseningRate); + } catch (const Teuchos::InvalidArrayStringRepresentation& e) { + GetOStream(Errors, -1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " + << std::endl; + throw e; + } + TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), + Exceptions::RuntimeError, + "\"aggregation: coarsening rate\" must have at least as many" + " components as the number of spatial dimensions in the problem."); + + // Now that we have extracted info from the level, create the IndexManager + RCP geoData; + if (!coupled) { + geoData = rcp(new MueLu::UncoupledIndexManager(fineMap->getComm(), coupled, numDimensions, interpolationOrder, @@ -239,23 +238,23 @@ namespace MueLu { lFineNodesPerDir, coarseRate, singleCoarsePoint)); - } else if(meshLayout == "Local Lexicographic") { - Array meshData; - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - meshData = currentLevel.Get >("aggregation: mesh data", NoFactory::get()); - TEUCHOS_TEST_FOR_EXCEPTION(meshData.empty() == true, Exceptions::RuntimeError, - "The meshData array is empty, somehow the input for structured" - " aggregation are not captured correctly."); - } else { - // On level > 0, data is provided directly by generating factories. - meshData = Get >(currentLevel, "aggregation: mesh data"); - } - // Note, LBV Feb 5th 2018: - // I think that it might make sense to pass ghostInterface rather than interpolationOrder. - // For that I need to make sure that ghostInterface can be computed with minimal mesh - // knowledge outside of the IndexManager... - geoData = rcp(new MueLu::LocalLexicographicIndexManager(fineMap->getComm(), + } else if (meshLayout == "Local Lexicographic") { + Array meshData; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated factory. + meshData = currentLevel.Get >("aggregation: mesh data", NoFactory::get()); + TEUCHOS_TEST_FOR_EXCEPTION(meshData.empty() == true, Exceptions::RuntimeError, + "The meshData array is empty, somehow the input for structured" + " aggregation are not captured correctly."); + } else { + // On level > 0, data is provided directly by generating factories. + meshData = Get >(currentLevel, "aggregation: mesh data"); + } + // Note, LBV Feb 5th 2018: + // I think that it might make sense to pass ghostInterface rather than interpolationOrder. + // For that I need to make sure that ghostInterface can be computed with minimal mesh + // knowledge outside of the IndexManager... + geoData = rcp(new MueLu::LocalLexicographicIndexManager(fineMap->getComm(), coupled, numDimensions, interpolationOrder, @@ -265,12 +264,12 @@ namespace MueLu { lFineNodesPerDir, coarseRate, meshData)); - } else if(meshLayout == "Global Lexicographic") { - // Note, LBV Feb 5th 2018: - // I think that it might make sense to pass ghostInterface rather than interpolationOrder. - // For that I need to make sure that ghostInterface can be computed with minimal mesh - // knowledge outside of the IndexManager... - geoData = rcp(new MueLu::GlobalLexicographicIndexManager(fineMap->getComm(), + } else if (meshLayout == "Global Lexicographic") { + // Note, LBV Feb 5th 2018: + // I think that it might make sense to pass ghostInterface rather than interpolationOrder. + // For that I need to make sure that ghostInterface can be computed with minimal mesh + // knowledge outside of the IndexManager... + geoData = rcp(new MueLu::GlobalLexicographicIndexManager(fineMap->getComm(), coupled, numDimensions, interpolationOrder, @@ -278,75 +277,71 @@ namespace MueLu { lFineNodesPerDir, coarseRate, minGlobalIndex)); - } - - - *out << "The index manager has now been built" << std::endl; - *out << "graph num nodes: " << fineMap->getLocalNumElements() - << ", structured aggregation num nodes: " << geoData->getNumLocalFineNodes() << std::endl; - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() - != static_cast(geoData->getNumLocalFineNodes()), + } + + *out << "The index manager has now been built" << std::endl; + *out << "graph num nodes: " << fineMap->getLocalNumElements() + << ", structured aggregation num nodes: " << geoData->getNumLocalFineNodes() << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() != static_cast(geoData->getNumLocalFineNodes()), + Exceptions::RuntimeError, + "The local number of elements in the graph's map is not equal to " + "the number of nodes given by: lNodesPerDim!"); + if (coupled) { + TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getGlobalNumElements() != static_cast(geoData->getNumGlobalFineNodes()), Exceptions::RuntimeError, - "The local number of elements in the graph's map is not equal to " - "the number of nodes given by: lNodesPerDim!"); - if(coupled) { - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getGlobalNumElements() - != static_cast(geoData->getNumGlobalFineNodes()), - Exceptions::RuntimeError, - "The global number of elements in the graph's map is not equal to " - "the number of nodes given by: gNodesPerDim!"); - } - - *out << "Compute coarse mesh data" << std::endl; - std::vector > coarseMeshData = geoData->getCoarseMeshData(); - - // Now we are ready for the big loop over the fine node that will assign each - // node on the fine grid to an aggregate and a processor. - RCP graphFact = GetFactory("Graph"); - RCP coarseCoordinatesFineMap, coarseCoordinatesMap; - RCP > + "The global number of elements in the graph's map is not equal to " + "the number of nodes given by: gNodesPerDim!"); + } + + *out << "Compute coarse mesh data" << std::endl; + std::vector > coarseMeshData = geoData->getCoarseMeshData(); + + // Now we are ready for the big loop over the fine node that will assign each + // node on the fine grid to an aggregate and a processor. + RCP graphFact = GetFactory("Graph"); + RCP coarseCoordinatesFineMap, coarseCoordinatesMap; + RCP > myStructuredAlgorithm = rcp(new AggregationStructuredAlgorithm(graphFact)); - if(interpolationOrder == 0 && outputAggregates){ - // Create aggregates for prolongation - *out << "Compute Aggregates" << std::endl; - RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); - aggregates->setObjectLabel("ST"); - aggregates->SetIndexManager(geoData); - aggregates->AggregatesCrossProcessors(coupled); - aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); - std::vector aggStat(geoData->getNumLocalFineNodes(), READY); - LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); - - myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, - numNonAggregatedNodes); - - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, - "MueLu::StructuredAggregationFactory::Build: Leftover nodes found! Error!"); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); - GetOStream(Statistics1) << aggregates->description() << std::endl; - Set(currentLevel, "Aggregates", aggregates); - - } else { - // Create the graph of the prolongator - *out << "Compute CrsGraph" << std::endl; - RCP myGraph; - myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph, - coarseCoordinatesFineMap, coarseCoordinatesMap); - Set(currentLevel, "prolongatorGraph", myGraph); - } - - if(coupled) { - Set(currentLevel, "gCoarseNodesPerDim", geoData->getGlobalCoarseNodesPerDir()); - } - Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); - Set(currentLevel, "coarseCoordinatesFineMap", coarseCoordinatesFineMap); - Set(currentLevel, "coarseCoordinatesMap", coarseCoordinatesMap); - Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); - Set(currentLevel, "numDimensions", numDimensions); - - } // Build() -} //namespace MueLu - + if (interpolationOrder == 0 && outputAggregates) { + // Create aggregates for prolongation + *out << "Compute Aggregates" << std::endl; + RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); + aggregates->setObjectLabel("ST"); + aggregates->SetIndexManager(geoData); + aggregates->AggregatesCrossProcessors(coupled); + aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); + std::vector aggStat(geoData->getNumLocalFineNodes(), READY); + LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); + + myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, + numNonAggregatedNodes); + + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, + "MueLu::StructuredAggregationFactory::Build: Leftover nodes found! Error!"); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + GetOStream(Statistics1) << aggregates->description() << std::endl; + Set(currentLevel, "Aggregates", aggregates); + + } else { + // Create the graph of the prolongator + *out << "Compute CrsGraph" << std::endl; + RCP myGraph; + myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph, + coarseCoordinatesFineMap, coarseCoordinatesMap); + Set(currentLevel, "prolongatorGraph", myGraph); + } + + if (coupled) { + Set(currentLevel, "gCoarseNodesPerDim", geoData->getGlobalCoarseNodesPerDir()); + } + Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); + Set(currentLevel, "coarseCoordinatesFineMap", coarseCoordinatesFineMap); + Set(currentLevel, "coarseCoordinatesMap", coarseCoordinatesMap); + Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); + Set(currentLevel, "numDimensions", numDimensions); + +} // Build() +} //namespace MueLu #endif /* MUELU_STRUCTUREDAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp index 88724397a340..b966b0043c9a 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp @@ -103,62 +103,61 @@ namespace MueLu { | CrsGraph | StructuredAggregationFactory_kokkos | CrsGraph of the prolongator */ - template - class StructuredAggregationFactory_kokkos : public SingleLevelFactoryBase { +template +class StructuredAggregationFactory_kokkos : public SingleLevelFactoryBase { #undef MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - StructuredAggregationFactory_kokkos(); + //! Constructor. + StructuredAggregationFactory_kokkos(); - //! Destructor. - virtual ~StructuredAggregationFactory_kokkos() { } + //! Destructor. + virtual ~StructuredAggregationFactory_kokkos() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Set/get methods. - //@{ - // set information about 1-node aggregates (map name and generating factory) - void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); - } + //! @name Set/get methods. + //@{ + // set information about 1-node aggregates (map name and generating factory) + void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { + SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); + } - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! @brief Build aggregates. */ - void Build(Level& currentLevel) const; + /*! @brief Build aggregates. */ + void Build(Level& currentLevel) const; - //@} + //@} - private: + private: + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; +}; // class StructuredAggregationFactory - }; // class StructuredAggregationFactory - -} +} // namespace MueLu #define MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_SHORT -#endif // MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DECL_HPP +#endif // MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp index 1067efc3e08d..987730a5e32d 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp @@ -65,193 +65,192 @@ namespace MueLu { - template - StructuredAggregationFactory_kokkos:: - StructuredAggregationFactory_kokkos() : bDefinitionPhase_(true) { } +template +StructuredAggregationFactory_kokkos:: + StructuredAggregationFactory_kokkos() + : bDefinitionPhase_(true) {} - template - RCP StructuredAggregationFactory_kokkos:: - GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP StructuredAggregationFactory_kokkos:: + GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); -#undef SET_VALID_ENTRY - - // general variables needed in StructuredAggregationFactory - validParamList->set ("aggregation: output type", "Aggregates", - "Type of object holding the aggregation data: Aggregtes or CrsGraph"); - validParamList->set ("aggregation: coarsening rate", "{3}", - "Coarsening rate per spatial dimensions"); - validParamList->set ("aggregation: coarsening order", 0, - "The interpolation order used to construct grid transfer operators based off these aggregates."); - validParamList->set >("Graph", Teuchos::null, - "Graph of the matrix after amalgamation but without dropping."); - validParamList->set >("DofsPerNode", Teuchos::null, - "Number of degrees of freedom per mesh node, provided by the coalsce drop factory."); - validParamList->set >("numDimensions", Teuchos::null, - "Number of spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("lNodesPerDim", Teuchos::null, - "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory."); - - return validParamList; - } // GetValidParameterList() - - template - void StructuredAggregationFactory_kokkos:: - DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - // Request the local number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("numDimensions", NoFactory::get())) { - currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), - Exceptions::RuntimeError, - "numDimensions was not provided by the user on level0!"); - } - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); - } + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); +#undef SET_VALID_ENTRY + + // general variables needed in StructuredAggregationFactory + validParamList->set("aggregation: output type", "Aggregates", + "Type of object holding the aggregation data: Aggregtes or CrsGraph"); + validParamList->set("aggregation: coarsening rate", "{3}", + "Coarsening rate per spatial dimensions"); + validParamList->set("aggregation: coarsening order", 0, + "The interpolation order used to construct grid transfer operators based off these aggregates."); + validParamList->set >("Graph", Teuchos::null, + "Graph of the matrix after amalgamation but without dropping."); + validParamList->set >("DofsPerNode", Teuchos::null, + "Number of degrees of freedom per mesh node, provided by the coalsce drop factory."); + validParamList->set >("numDimensions", Teuchos::null, + "Number of spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set >("lNodesPerDim", Teuchos::null, + "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory."); + + return validParamList; +} // GetValidParameterList() + +template +void StructuredAggregationFactory_kokkos:: + DeclareInput(Level& currentLevel) const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + // Request the local number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("numDimensions", NoFactory::get())) { + currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); } else { - Input(currentLevel, "lNodesPerDim"); - Input(currentLevel, "numDimensions"); + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), + Exceptions::RuntimeError, + "numDimensions was not provided by the user on level0!"); } - } // DeclareInput() - - template - void StructuredAggregationFactory_kokkos:: - Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - using device_type = typename LWGraph_kokkos::local_graph_type::device_type; - using execution_space = typename LWGraph_kokkos::local_graph_type::device_type::execution_space; - using memory_space = typename LWGraph_kokkos::local_graph_type::device_type::memory_space; - - *out << "Entering structured aggregation" << std::endl; - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - // General problem informations are gathered from data stored in the problem matix. - RCP graph = Get >(currentLevel, "Graph"); - RCP fineMap = graph->GetDomainMap(); - const int myRank = fineMap->getComm()->getRank(); - const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); - - // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to - // obtain a nodeMap. - const int interpolationOrder = pL.get("aggregation: coarsening order"); - std::string outputType = pL.get("aggregation: output type"); - const bool outputAggregates = (outputType == "Aggregates" ? true : false); - Array lFineNodesPerDir(3); - int numDimensions; - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); - numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); } else { - // On level > 0, data is provided directly by generating factories. - lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); - numDimensions = Get(currentLevel, "numDimensions"); + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); } - - - // First make sure that input parameters are set logically based on dimension - for(int dim = 0; dim < 3; ++dim) { - if(dim >= numDimensions) { - lFineNodesPerDir[dim] = 1; - } + } else { + Input(currentLevel, "lNodesPerDim"); + Input(currentLevel, "numDimensions"); + } +} // DeclareInput() + +template +void StructuredAggregationFactory_kokkos:: + Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP out; + if (const char* dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + using device_type = typename LWGraph_kokkos::local_graph_type::device_type; + using execution_space = typename LWGraph_kokkos::local_graph_type::device_type::execution_space; + using memory_space = typename LWGraph_kokkos::local_graph_type::device_type::memory_space; + + *out << "Entering structured aggregation" << std::endl; + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed + + // General problem informations are gathered from data stored in the problem matix. + RCP graph = Get >(currentLevel, "Graph"); + RCP fineMap = graph->GetDomainMap(); + const int myRank = fineMap->getComm()->getRank(); + const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); + + // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to + // obtain a nodeMap. + const int interpolationOrder = pL.get("aggregation: coarsening order"); + std::string outputType = pL.get("aggregation: output type"); + const bool outputAggregates = (outputType == "Aggregates" ? true : false); + Array lFineNodesPerDir(3); + int numDimensions; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated factory. + lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); + numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + } else { + // On level > 0, data is provided directly by generating factories. + lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); + numDimensions = Get(currentLevel, "numDimensions"); + } + + // First make sure that input parameters are set logically based on dimension + for (int dim = 0; dim < 3; ++dim) { + if (dim >= numDimensions) { + lFineNodesPerDir[dim] = 1; } - - // Get the coarsening rate - std::string coarseningRate = pL.get("aggregation: coarsening rate"); - Teuchos::Array coarseRate; - try { - coarseRate = Teuchos::fromStringToArray(coarseningRate); - } catch(const Teuchos::InvalidArrayStringRepresentation& e) { - GetOStream(Errors,-1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " - << std::endl; - throw e; - } - TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), - Exceptions::RuntimeError, - "\"aggregation: coarsening rate\" must have at least as many" - " components as the number of spatial dimensions in the problem."); - - // Now that we have extracted info from the level, create the IndexManager - RCP geoData = rcp(new IndexManager_kokkos(numDimensions, - interpolationOrder, myRank, - lFineNodesPerDir, - coarseRate)); - - *out << "The index manager has now been built" << std::endl; - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() - != static_cast(geoData->getNumLocalFineNodes()), - Exceptions::RuntimeError, - "The local number of elements in the graph's map is not equal to " - "the number of nodes given by: lNodesPerDim!"); - - // Now we are ready for the big loop over the fine node that will assign each - // node on the fine grid to an aggregate and a processor. - RCP myStructuredAlgorithm - = rcp(new AggregationStructuredAlgorithm_kokkos()); - - if(interpolationOrder == 0 && outputAggregates){ - RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); - aggregates->setObjectLabel("ST"); - aggregates->SetIndexManagerKokkos(geoData); - aggregates->AggregatesCrossProcessors(false); - aggregates->SetNumAggregates(geoData->getNumCoarseNodes()); - - LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); - Kokkos::View aggStat("aggStat", numNonAggregatedNodes); - Kokkos::parallel_for("StructuredAggregation: initialize aggStat", - Kokkos::RangePolicy(0, numNonAggregatedNodes), - KOKKOS_LAMBDA(const LO nodeIdx) {aggStat(nodeIdx) = READY;}); - - myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, - numNonAggregatedNodes); - - *out << "numNonAggregatedNodes: " << numNonAggregatedNodes << std::endl; - - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, - "MueLu::StructuredAggregationFactory::Build: Leftover nodes found! Error!"); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); - GetOStream(Statistics1) << aggregates->description() << std::endl; - Set(currentLevel, "Aggregates", aggregates); - - } else { - // Create Coarse Data - RCP myGraph; - myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph); - Set(currentLevel, "prolongatorGraph", myGraph); - } - - Set(currentLevel, "lCoarseNodesPerDim", geoData->getCoarseNodesPerDirArray()); - Set(currentLevel, "indexManager", geoData); - Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); - Set(currentLevel, "numDimensions", numDimensions); - - } // Build() - -} //namespace MueLu + } + + // Get the coarsening rate + std::string coarseningRate = pL.get("aggregation: coarsening rate"); + Teuchos::Array coarseRate; + try { + coarseRate = Teuchos::fromStringToArray(coarseningRate); + } catch (const Teuchos::InvalidArrayStringRepresentation& e) { + GetOStream(Errors, -1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " + << std::endl; + throw e; + } + TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), + Exceptions::RuntimeError, + "\"aggregation: coarsening rate\" must have at least as many" + " components as the number of spatial dimensions in the problem."); + + // Now that we have extracted info from the level, create the IndexManager + RCP geoData = rcp(new IndexManager_kokkos(numDimensions, + interpolationOrder, myRank, + lFineNodesPerDir, + coarseRate)); + + *out << "The index manager has now been built" << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() != static_cast(geoData->getNumLocalFineNodes()), + Exceptions::RuntimeError, + "The local number of elements in the graph's map is not equal to " + "the number of nodes given by: lNodesPerDim!"); + + // Now we are ready for the big loop over the fine node that will assign each + // node on the fine grid to an aggregate and a processor. + RCP myStructuredAlgorithm = rcp(new AggregationStructuredAlgorithm_kokkos()); + + if (interpolationOrder == 0 && outputAggregates) { + RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); + aggregates->setObjectLabel("ST"); + aggregates->SetIndexManagerKokkos(geoData); + aggregates->AggregatesCrossProcessors(false); + aggregates->SetNumAggregates(geoData->getNumCoarseNodes()); + + LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); + Kokkos::View aggStat("aggStat", numNonAggregatedNodes); + Kokkos::parallel_for( + "StructuredAggregation: initialize aggStat", + Kokkos::RangePolicy(0, numNonAggregatedNodes), + KOKKOS_LAMBDA(const LO nodeIdx) { aggStat(nodeIdx) = READY; }); + + myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, + numNonAggregatedNodes); + + *out << "numNonAggregatedNodes: " << numNonAggregatedNodes << std::endl; + + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, + "MueLu::StructuredAggregationFactory::Build: Leftover nodes found! Error!"); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + GetOStream(Statistics1) << aggregates->description() << std::endl; + Set(currentLevel, "Aggregates", aggregates); + + } else { + // Create Coarse Data + RCP myGraph; + myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph); + Set(currentLevel, "prolongatorGraph", myGraph); + } + + Set(currentLevel, "lCoarseNodesPerDim", geoData->getCoarseNodesPerDirArray()); + Set(currentLevel, "indexManager", geoData); + Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); + Set(currentLevel, "numDimensions", numDimensions); + +} // Build() + +} //namespace MueLu #endif /* MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_DEF_HPP */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp index 9488bfe3dd72..65ff3887bc33 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp @@ -77,69 +77,67 @@ namespace MueLu { correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class GlobalLexicographicIndexManager : public IndexManager { +template +class GlobalLexicographicIndexManager : public IndexManager { #undef MUELU_GLOBALLEXICOGRAPHICINDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + GlobalLexicographicIndexManager(); - GlobalLexicographicIndexManager(); + GlobalLexicographicIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, const Array CoarseRate, + const GO MinGlobalIndex); - GlobalLexicographicIndexManager(const RCP< const Teuchos::Comm > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, const Array CoarseRate, - const GO MinGlobalIndex); + virtual ~GlobalLexicographicIndexManager() {} - virtual ~GlobalLexicographicIndexManager() {} + void computeGlobalCoarseParameters(); - void computeGlobalCoarseParameters(); + void getGhostedNodesData(const RCP fineMap, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& ghostedNodeCoarseGIDs) const; - void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const; + void getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const; - void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const; + std::vector > getCoarseMeshData() const; - std::vector > getCoarseMeshData() const; + void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; + private: +}; - private: - - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_GLOBALLEXICOGRPHICINDEXMANAGER_SHORT -#endif // MUELU_GLOBALLEXICOGRPHICINDEXMANAGER_DECL_HPP +#endif // MUELU_GLOBALLEXICOGRPHICINDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp index 8211e1a461bd..c00e0add4b98 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp @@ -51,341 +51,330 @@ namespace MueLu { - template - GlobalLexicographicIndexManager:: - GlobalLexicographicIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, const Array CoarseRate, - const GO MinGlobalIndex) : - IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, GFineNodesPerDir, LFineNodesPerDir) { - - // Load coarse rate, being careful about formating. - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(CoarseRate.size() == 1) { - this->coarseRate[dim] = CoarseRate[0]; - } else if(CoarseRate.size() == this->numDimensions) { - this->coarseRate[dim] = CoarseRate[dim]; - } - } else { - this->coarseRate[dim] = 1; +template +GlobalLexicographicIndexManager:: + GlobalLexicographicIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, const Array CoarseRate, + const GO MinGlobalIndex) + : IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, GFineNodesPerDir, LFineNodesPerDir) { + // Load coarse rate, being careful about formating. + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (CoarseRate.size() == 1) { + this->coarseRate[dim] = CoarseRate[0]; + } else if (CoarseRate.size() == this->numDimensions) { + this->coarseRate[dim] = CoarseRate[dim]; } + } else { + this->coarseRate[dim] = 1; } + } - { - GO tmp = 0; - this->startIndices[2]= MinGlobalIndex / (this->gFineNodesPerDir[1]*this->gFineNodesPerDir[0]); - tmp = MinGlobalIndex % (this->gFineNodesPerDir[1]*this->gFineNodesPerDir[0]); - this->startIndices[1]= tmp / this->gFineNodesPerDir[0]; - this->startIndices[0]= tmp % this->gFineNodesPerDir[0]; + { + GO tmp = 0; + this->startIndices[2] = MinGlobalIndex / (this->gFineNodesPerDir[1] * this->gFineNodesPerDir[0]); + tmp = MinGlobalIndex % (this->gFineNodesPerDir[1] * this->gFineNodesPerDir[0]); + this->startIndices[1] = tmp / this->gFineNodesPerDir[0]; + this->startIndices[0] = tmp % this->gFineNodesPerDir[0]; - for(int dim = 0; dim < 3; ++dim) { - this->startIndices[dim + 3] = this->startIndices[dim] + this->lFineNodesPerDir[dim] - 1; - } + for (int dim = 0; dim < 3; ++dim) { + this->startIndices[dim + 3] = this->startIndices[dim] + this->lFineNodesPerDir[dim] - 1; } - - this->computeMeshParameters(); - computeGlobalCoarseParameters(); - - } - - template - void GlobalLexicographicIndexManager:: - computeGlobalCoarseParameters() { - this->gNumCoarseNodes10 = this->gCoarseNodesPerDir[0]*this->gCoarseNodesPerDir[1]; - this->gNumCoarseNodes = this->gNumCoarseNodes10*this->gCoarseNodesPerDir[2]; } - template - void GlobalLexicographicIndexManager:: - getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, Array& ghostedNodeCoarsePIDs, Array&ghostedNodeCoarseGIDs) const { - - ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); - - // Find the GIDs, LIDs and PIDs of the coarse points on the fine mesh and coarse - // mesh as this data will be used to fill vertex2AggId and procWinner vectors. - Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes), + this->computeMeshParameters(); + computeGlobalCoarseParameters(); +} + +template +void GlobalLexicographicIndexManager:: + computeGlobalCoarseParameters() { + this->gNumCoarseNodes10 = this->gCoarseNodesPerDir[0] * this->gCoarseNodesPerDir[1]; + this->gNumCoarseNodes = this->gNumCoarseNodes10 * this->gCoarseNodesPerDir[2]; +} + +template +void GlobalLexicographicIndexManager:: + getGhostedNodesData(const RCP fineMap, + Array& ghostedNodeCoarseLIDs, Array& ghostedNodeCoarsePIDs, Array& ghostedNodeCoarseGIDs) const { + ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); + + // Find the GIDs, LIDs and PIDs of the coarse points on the fine mesh and coarse + // mesh as this data will be used to fill vertex2AggId and procWinner vectors. + Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes), lCoarseNodeFineGIDs(this->lNumCoarseNodes); - Array ghostedCoarseNodeFineGIDs(this->numGhostedNodes); - Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3), ijk(3); - LO currentIndex = -1, currentCoarseIndex = -1; - for(ijk[2] = 0; ijk[2] < this->ghostedNodesPerDir[2]; ++ijk[2]) { - for(ijk[1] = 0; ijk[1] < this->ghostedNodesPerDir[1]; ++ijk[1]) { - for(ijk[0] = 0; ijk[0] < this->ghostedNodesPerDir[0]; ++ijk[0]) { - currentIndex = ijk[2]*this->numGhostedNodes10 + ijk[1]*this->ghostedNodesPerDir[0] + ijk[0]; - ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + ijk[0]; - ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + ijk[1]; - ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + ijk[2]; - GO myCoarseGID = ghostedCoarseNodeCoarseIndices[0] - + ghostedCoarseNodeCoarseIndices[1]*this->gCoarseNodesPerDir[0] - + ghostedCoarseNodeCoarseIndices[2]*this->gNumCoarseNodes10; - ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; - GO myGID = 0, factor[3] = {}; - factor[2] = this->gNumFineNodes10; - factor[1] = this->gFineNodesPerDir[0]; - factor[0] = 1; - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(this->startIndices[dim] - this->offsets[dim] + ijk[dim]*this->coarseRate[dim] - < this->gFineNodesPerDir[dim] - 1) { - myGID += (this->startIndices[dim] - this->offsets[dim] - + ijk[dim]*this->coarseRate[dim])*factor[dim]; - } else { - myGID += (this->startIndices[dim] - this->offsets[dim] + (ijk[dim] - 1) - *this->coarseRate[dim] + this->endRate[dim])*factor[dim]; - } + Array ghostedCoarseNodeFineGIDs(this->numGhostedNodes); + Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3), ijk(3); + LO currentIndex = -1, currentCoarseIndex = -1; + for (ijk[2] = 0; ijk[2] < this->ghostedNodesPerDir[2]; ++ijk[2]) { + for (ijk[1] = 0; ijk[1] < this->ghostedNodesPerDir[1]; ++ijk[1]) { + for (ijk[0] = 0; ijk[0] < this->ghostedNodesPerDir[0]; ++ijk[0]) { + currentIndex = ijk[2] * this->numGhostedNodes10 + ijk[1] * this->ghostedNodesPerDir[0] + ijk[0]; + ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + ijk[0]; + ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + ijk[1]; + ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + ijk[2]; + GO myCoarseGID = ghostedCoarseNodeCoarseIndices[0] + ghostedCoarseNodeCoarseIndices[1] * this->gCoarseNodesPerDir[0] + ghostedCoarseNodeCoarseIndices[2] * this->gNumCoarseNodes10; + ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; + GO myGID = 0, factor[3] = {}; + factor[2] = this->gNumFineNodes10; + factor[1] = this->gFineNodesPerDir[0]; + factor[0] = 1; + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (this->startIndices[dim] - this->offsets[dim] + ijk[dim] * this->coarseRate[dim] < this->gFineNodesPerDir[dim] - 1) { + myGID += (this->startIndices[dim] - this->offsets[dim] + ijk[dim] * this->coarseRate[dim]) * factor[dim]; + } else { + myGID += (this->startIndices[dim] - this->offsets[dim] + (ijk[dim] - 1) * this->coarseRate[dim] + this->endRate[dim]) * factor[dim]; } } - // lbv 02-08-2018: - // This check is simplistic and should be replaced by a condition that checks - // if the local tuple of the current index is wihin the range of local nodes - // or not in the range of ghosted nodes. - if((!this->ghostInterface[0] || ijk[0] != 0) && - (!this->ghostInterface[2] || ijk[1] != 0) && - (!this->ghostInterface[4] || ijk[2] != 0) && - (!this->ghostInterface[1] || ijk[0] != this->ghostedNodesPerDir[0] - 1) && - (!this->ghostInterface[3] || ijk[1] != this->ghostedNodesPerDir[1] - 1) && - (!this->ghostInterface[5] || ijk[2] != this->ghostedNodesPerDir[2] - 1)) { - - // this->getGhostedNodeFineLID(ijk[0], ijk[1], ijk[2], coarseNodeFineLID); - if(this->interpolationOrder_ == 0) { - currentCoarseIndex = 0; - if(this->ghostInterface[4]) { - currentCoarseIndex += (ijk[2] - 1)*this->lNumCoarseNodes10; - } else { - currentCoarseIndex += ijk[2]*this->lNumCoarseNodes10; - } - if(this->ghostInterface[2]) { - currentCoarseIndex += (ijk[1] - 1)*this->getLocalCoarseNodesInDir(0); - } else { - currentCoarseIndex += ijk[1]*this->getLocalCoarseNodesInDir(0); - } - if(this->ghostInterface[0]) { - currentCoarseIndex += ijk[0] - 1; - } else { - currentCoarseIndex += ijk[0]; - } + } + // lbv 02-08-2018: + // This check is simplistic and should be replaced by a condition that checks + // if the local tuple of the current index is wihin the range of local nodes + // or not in the range of ghosted nodes. + if ((!this->ghostInterface[0] || ijk[0] != 0) && + (!this->ghostInterface[2] || ijk[1] != 0) && + (!this->ghostInterface[4] || ijk[2] != 0) && + (!this->ghostInterface[1] || ijk[0] != this->ghostedNodesPerDir[0] - 1) && + (!this->ghostInterface[3] || ijk[1] != this->ghostedNodesPerDir[1] - 1) && + (!this->ghostInterface[5] || ijk[2] != this->ghostedNodesPerDir[2] - 1)) { + // this->getGhostedNodeFineLID(ijk[0], ijk[1], ijk[2], coarseNodeFineLID); + if (this->interpolationOrder_ == 0) { + currentCoarseIndex = 0; + if (this->ghostInterface[4]) { + currentCoarseIndex += (ijk[2] - 1) * this->lNumCoarseNodes10; } else { - this->getGhostedNodeCoarseLID(ijk[0], ijk[1], ijk[2], currentCoarseIndex); + currentCoarseIndex += ijk[2] * this->lNumCoarseNodes10; } - - lCoarseNodeCoarseGIDs[currentCoarseIndex] = myCoarseGID; - lCoarseNodeFineGIDs[currentCoarseIndex] = myGID; + if (this->ghostInterface[2]) { + currentCoarseIndex += (ijk[1] - 1) * this->getLocalCoarseNodesInDir(0); + } else { + currentCoarseIndex += ijk[1] * this->getLocalCoarseNodesInDir(0); + } + if (this->ghostInterface[0]) { + currentCoarseIndex += ijk[0] - 1; + } else { + currentCoarseIndex += ijk[0]; + } + } else { + this->getGhostedNodeCoarseLID(ijk[0], ijk[1], ijk[2], currentCoarseIndex); } - ghostedCoarseNodeFineGIDs[currentIndex] = myGID; - } - } - } - - RCP coarseMap = Xpetra::MapFactory::Build (fineMap->lib(), - this->gNumCoarseNodes, - lCoarseNodeCoarseGIDs(), - fineMap->getIndexBase(), - fineMap->getComm()); - - coarseMap->getRemoteIndexList(ghostedNodeCoarseGIDs(), - ghostedNodeCoarsePIDs(), - ghostedNodeCoarseLIDs()); - - } // End getGhostedMeshData - - template - void GlobalLexicographicIndexManager:: - getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const { - - // Allocate sufficient storage space for outputs - coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); - coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - - // Load all the GIDs on the fine mesh - ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); - - Array coarseStartIndices(3); - GO tmp; - for(int dim = 0; dim < 3; ++dim) { - coarseStartIndices[dim] = this->startIndices[dim] / this->coarseRate[dim]; - tmp = this->startIndices[dim] % this->coarseRate[dim]; - if(tmp > 0) {++coarseStartIndices[dim];} - } - // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs - LO fineLID; - Array lCoarseIndices(3); - Array gCoarseIndices(3); - for(LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { - this->getCoarseNodeLocalTuple(coarseLID, - lCoarseIndices[0], - lCoarseIndices[1], - lCoarseIndices[2]); - getCoarseNodeFineLID(lCoarseIndices[0], lCoarseIndices[1], lCoarseIndices[2], fineLID); - coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; - - // Get Coarse Global IJK - for(int dim=0; dim<3; dim++) { - gCoarseIndices[dim] = coarseStartIndices[dim] + lCoarseIndices[dim]; + lCoarseNodeCoarseGIDs[currentCoarseIndex] = myCoarseGID; + lCoarseNodeFineGIDs[currentCoarseIndex] = myGID; + } + ghostedCoarseNodeFineGIDs[currentIndex] = myGID; } - getCoarseNodeGID(gCoarseIndices[0], - gCoarseIndices[1], - gCoarseIndices[2], - coarseNodeCoarseGIDs[coarseLID] ); - } - - } - - template - std::vector > GlobalLexicographicIndexManager:: - getCoarseMeshData() const { - std::vector > coarseMeshData; - return coarseMeshData; - } - - template - void GlobalLexicographicIndexManager:: - getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const { - GO tmp; - k = myGID / this->gNumFineNodes10; - tmp = myGID % this->gNumFineNodes10; - j = tmp / this->gFineNodesPerDir[0]; - i = tmp % this->gFineNodesPerDir[0]; - } - - template - void GlobalLexicographicIndexManager:: - getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - } - - template - void GlobalLexicographicIndexManager:: - getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - - k += this->offsets[2]; - j += this->offsets[1]; - i += this->offsets[0]; - } - - template - void GlobalLexicographicIndexManager:: - getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const { - myGID = k*this->gNumFineNodes10 + j*this->gFineNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->lNumFineNodes10 + j*this->lFineNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const { - GO tmp; - k = myGID / this->gNumCoarseNodes10; - tmp = myGID % this->gNumCoarseNodes10; - j = tmp / this->gCoarseNodesPerDir[0]; - i = tmp % this->gCoarseNodesPerDir[0]; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumCoarseNodes10; - tmp = myLID % this->lNumCoarseNodes10; - j = tmp / this->lCoarseNodesPerDir[0]; - i = tmp % this->lCoarseNodesPerDir[0]; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const { - myGID = k*this->gNumCoarseNodes10 + j*this->gCoarseNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->lNumCoarseNodes10 + j*this->lCoarseNodesPerDir[0] + i; } - template - void GlobalLexicographicIndexManager:: - getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { - // Assumptions: (i,j,k) is a tuple on the coarse mesh - // myLID is the corresponding local ID on the fine mesh - const LO multiplier[3] = {1, this->lFineNodesPerDir[0], this->lNumFineNodes10}; - const LO indices[3] = {i, j, k}; - - myLID = 0; - for(int dim = 0; dim < 3; ++dim) { - if((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && this->meshEdge[2*dim + 1]) { - // We are dealing with the last node on the mesh in direction dim - // so we can simply use the number of nodes on the fine mesh in that direction - myLID += (this->getLocalFineNodesInDir(dim) - 1)*multiplier[dim]; - } else { - myLID += (indices[dim]*this->getCoarseningRate(dim) + this->getCoarseNodeOffset(dim)) - *multiplier[dim]; - } + RCP coarseMap = Xpetra::MapFactory::Build(fineMap->lib(), + this->gNumCoarseNodes, + lCoarseNodeCoarseGIDs(), + fineMap->getIndexBase(), + fineMap->getComm()); + + coarseMap->getRemoteIndexList(ghostedNodeCoarseGIDs(), + ghostedNodeCoarsePIDs(), + ghostedNodeCoarseLIDs()); + +} // End getGhostedMeshData + +template +void GlobalLexicographicIndexManager:: + getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const { + // Allocate sufficient storage space for outputs + coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); + coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); + + // Load all the GIDs on the fine mesh + ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); + + Array coarseStartIndices(3); + GO tmp; + for (int dim = 0; dim < 3; ++dim) { + coarseStartIndices[dim] = this->startIndices[dim] / this->coarseRate[dim]; + tmp = this->startIndices[dim] % this->coarseRate[dim]; + if (tmp > 0) { + ++coarseStartIndices[dim]; } } - template - void GlobalLexicographicIndexManager:: - getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { - LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); - LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); - LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); - myLID = 0; - if(ktmp*this->coarseRate[2] < this->lFineNodesPerDir[2]) { - myLID += ktmp*this->coarseRate[2]*this->lNumCoarseNodes10; - } else { - myLID += (this->lFineNodesPerDir[2] - 1)*this->lNumCoarseNodes10; - } - - if(jtmp*this->coarseRate[1] < this->lFineNodesPerDir[1]) { - myLID += jtmp*this->coarseRate[1]*this->lFineNodesPerDir[0]; - } else { - myLID += (this->lFineNodesPerDir[1] - 1)*this->lFineNodesPerDir[1]; + // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs + LO fineLID; + Array lCoarseIndices(3); + Array gCoarseIndices(3); + for (LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { + this->getCoarseNodeLocalTuple(coarseLID, + lCoarseIndices[0], + lCoarseIndices[1], + lCoarseIndices[2]); + getCoarseNodeFineLID(lCoarseIndices[0], lCoarseIndices[1], lCoarseIndices[2], fineLID); + coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; + + // Get Coarse Global IJK + for (int dim = 0; dim < 3; dim++) { + gCoarseIndices[dim] = coarseStartIndices[dim] + lCoarseIndices[dim]; } - - if(itmp*this->coarseRate[0] < this->lFineNodesPerDir[0]) { - myLID += itmp*this->coarseRate[0]; + getCoarseNodeGID(gCoarseIndices[0], + gCoarseIndices[1], + gCoarseIndices[2], + coarseNodeCoarseGIDs[coarseLID]); + } +} + +template +std::vector > GlobalLexicographicIndexManager:: + getCoarseMeshData() const { + std::vector > coarseMeshData; + return coarseMeshData; +} + +template +void GlobalLexicographicIndexManager:: + getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const { + GO tmp; + k = myGID / this->gNumFineNodes10; + tmp = myGID % this->gNumFineNodes10; + j = tmp / this->gFineNodesPerDir[0]; + i = tmp % this->gFineNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager:: + getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager:: + getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; + + k += this->offsets[2]; + j += this->offsets[1]; + i += this->offsets[0]; +} + +template +void GlobalLexicographicIndexManager:: + getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const { + myGID = k * this->gNumFineNodes10 + j * this->gFineNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * this->lNumFineNodes10 + j * this->lFineNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const { + GO tmp; + k = myGID / this->gNumCoarseNodes10; + tmp = myGID % this->gNumCoarseNodes10; + j = tmp / this->gCoarseNodesPerDir[0]; + i = tmp % this->gCoarseNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumCoarseNodes10; + tmp = myLID % this->lNumCoarseNodes10; + j = tmp / this->lCoarseNodesPerDir[0]; + i = tmp % this->lCoarseNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const { + myGID = k * this->gNumCoarseNodes10 + j * this->gCoarseNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * this->lNumCoarseNodes10 + j * this->lCoarseNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { + // Assumptions: (i,j,k) is a tuple on the coarse mesh + // myLID is the corresponding local ID on the fine mesh + const LO multiplier[3] = {1, this->lFineNodesPerDir[0], this->lNumFineNodes10}; + const LO indices[3] = {i, j, k}; + + myLID = 0; + for (int dim = 0; dim < 3; ++dim) { + if ((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && this->meshEdge[2 * dim + 1]) { + // We are dealing with the last node on the mesh in direction dim + // so we can simply use the number of nodes on the fine mesh in that direction + myLID += (this->getLocalFineNodesInDir(dim) - 1) * multiplier[dim]; } else { - myLID += this->lFineNodesPerDir[0] - 1; + myLID += (indices[dim] * this->getCoarseningRate(dim) + this->getCoarseNodeOffset(dim)) * multiplier[dim]; } } +} + +template +void GlobalLexicographicIndexManager:: + getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { + LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); + LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); + LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); + myLID = 0; + if (ktmp * this->coarseRate[2] < this->lFineNodesPerDir[2]) { + myLID += ktmp * this->coarseRate[2] * this->lNumCoarseNodes10; + } else { + myLID += (this->lFineNodesPerDir[2] - 1) * this->lNumCoarseNodes10; + } - template - void GlobalLexicographicIndexManager:: - getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const { - LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); - LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); - LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); - myLID = ktmp*this->lNumCoarseNodes10 + jtmp*this->lCoarseNodesPerDir[0] + itmp; + if (jtmp * this->coarseRate[1] < this->lFineNodesPerDir[1]) { + myLID += jtmp * this->coarseRate[1] * this->lFineNodesPerDir[0]; + } else { + myLID += (this->lFineNodesPerDir[1] - 1) * this->lFineNodesPerDir[1]; } -} //namespace MueLu + if (itmp * this->coarseRate[0] < this->lFineNodesPerDir[0]) { + myLID += itmp * this->coarseRate[0]; + } else { + myLID += this->lFineNodesPerDir[0] - 1; + } +} + +template +void GlobalLexicographicIndexManager:: + getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const { + LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); + LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); + LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); + myLID = ktmp * this->lNumCoarseNodes10 + jtmp * this->lCoarseNodesPerDir[0] + itmp; +} + +} //namespace MueLu #endif /* MUELU_GLOBALLEXICOGRAPHICINDEXMANAGER_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp index a0e809aaba20..933e3305dbf4 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp @@ -77,93 +77,90 @@ namespace MueLu { correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class LocalLexicographicIndexManager : public IndexManager { +template +class LocalLexicographicIndexManager : public IndexManager { #undef MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + LocalLexicographicIndexManager() = default; - LocalLexicographicIndexManager() = default; + LocalLexicographicIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const int MyRank, const int NumRanks, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, + const Array CoarseRate, const Array MeshData); - LocalLexicographicIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, - const Array CoarseRate, const Array MeshData); + virtual ~LocalLexicographicIndexManager() {} - virtual ~LocalLexicographicIndexManager() {} + void computeGlobalCoarseParameters(); - void computeGlobalCoarseParameters(); + void getGhostedNodesData(const RCP fineMap, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& ghostedNodeCoarseGIDs) const; - void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const; + void getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const; - void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const; + std::vector > getCoarseMeshData() const; - std::vector > getCoarseMeshData() const; + void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; + private: + const int myRank; ///< Local rank ID. + const int numRanks; ///< Number of ranks used to decompose the problem. - private: + // Iterator delimiting the entries in meshData that correspond to the block that owns the local + // part of the mesh. + typename std::vector >::iterator myBlockStart, myBlockEnd; - const int myRank; ///< Local rank ID. - const int numRanks; ///< Number of ranks used to decompose the problem. + int pi, pj, pk; ///< Number of processors in each diretcion. - // Iterator delimiting the entries in meshData that correspond to the block that owns the local - // part of the mesh. - typename std::vector >::iterator myBlockStart, myBlockEnd; + int numBlocks; ///< Number of mesh block. + int myBlock; ///< local mesh block ID. - int pi, pj, pk; ///< Number of processors in each diretcion. + int myRankIndex; ///< local process index for record in meshData after sorting. + Array rankIndices; ///< mapping between rank ID and reordered rank ID. + std::vector > meshData; ///< layout of indices accross all processes. + std::vector > coarseMeshData; ///< layout of indices accross all processes after coarsening. - int numBlocks; ///< Number of mesh block. - int myBlock; ///< local mesh block ID. + void sortLocalLexicographicData(); - int myRankIndex; ///< local process index for record in meshData after sorting. - Array rankIndices; ///< mapping between rank ID and reordered rank ID. - std::vector > meshData; ///< layout of indices accross all processes. - std::vector > coarseMeshData; ///< layout of indices accross all processes after coarsening. + void computeCoarseLocalLexicographicData(); - void sortLocalLexicographicData(); + void getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, const LO kGhosted, + const Array coarseNodeFineIndices, GO& myGID, LO& myPID, + LO& myLID) const; +}; - void computeCoarseLocalLexicographicData(); - - void getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, const LO kGhosted, - const Array coarseNodeFineIndices, GO& myGID, LO& myPID, - LO& myLID) const; - - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_SHORT -#endif // MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_DECL_HPP +#endif // MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp index d3c3d8448630..f3978d1cb877 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp @@ -51,448 +51,418 @@ namespace MueLu { - template - LocalLexicographicIndexManager:: - LocalLexicographicIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, const Array LFineNodesPerDir, - const Array CoarseRate, const Array MeshData) : - IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, GFineNodesPerDir, LFineNodesPerDir), - myRank(MyRank), numRanks(NumRanks) { - - // Allocate data based on user input - meshData.resize(numRanks); - rankIndices.resize(numRanks); - coarseMeshData.resize(numRanks); - - // Load coarse rate, being careful about formating - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(CoarseRate.size() == 1) { - this->coarseRate[dim] = CoarseRate[0]; - } else if(CoarseRate.size() == this->numDimensions) { - this->coarseRate[dim] = CoarseRate[dim]; - } - } else { - this->coarseRate[dim] = 1; +template +LocalLexicographicIndexManager:: + LocalLexicographicIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const int MyRank, const int NumRanks, + const Array GFineNodesPerDir, const Array LFineNodesPerDir, + const Array CoarseRate, const Array MeshData) + : IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, GFineNodesPerDir, LFineNodesPerDir) + , myRank(MyRank) + , numRanks(NumRanks) { + // Allocate data based on user input + meshData.resize(numRanks); + rankIndices.resize(numRanks); + coarseMeshData.resize(numRanks); + + // Load coarse rate, being careful about formating + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (CoarseRate.size() == 1) { + this->coarseRate[dim] = CoarseRate[0]; + } else if (CoarseRate.size() == this->numDimensions) { + this->coarseRate[dim] = CoarseRate[dim]; } + } else { + this->coarseRate[dim] = 1; } + } - // Load meshData for local lexicographic case - for(int rank = 0; rank < numRanks; ++rank) { - meshData[rank].resize(10); - for(int entry = 0; entry < 10; ++entry) { - meshData[rank][entry] = MeshData[10*rank + entry]; - } - } - - if(this->coupled_) { - myBlock = meshData[myRank][2]; - sortLocalLexicographicData(); - } - - // Start simple parameter calculation - myRankIndex = rankIndices[myRank]; - for(int dim = 0; dim < 3; ++dim) { - this->startIndices[dim] = meshData[myRankIndex][2*dim + 3]; - this->startIndices[dim + 3] = meshData[myRankIndex][2*dim + 4]; + // Load meshData for local lexicographic case + for (int rank = 0; rank < numRanks; ++rank) { + meshData[rank].resize(10); + for (int entry = 0; entry < 10; ++entry) { + meshData[rank][entry] = MeshData[10 * rank + entry]; } + } - this->computeMeshParameters(); - computeGlobalCoarseParameters(); - computeCoarseLocalLexicographicData(); - } // Constructor + if (this->coupled_) { + myBlock = meshData[myRank][2]; + sortLocalLexicographicData(); + } - template - void LocalLexicographicIndexManager:: - computeGlobalCoarseParameters() { - this->gNumCoarseNodes10 = this->gCoarseNodesPerDir[0]*this->gCoarseNodesPerDir[1]; - this->gNumCoarseNodes = this->gNumCoarseNodes10*this->gCoarseNodesPerDir[2]; + // Start simple parameter calculation + myRankIndex = rankIndices[myRank]; + for (int dim = 0; dim < 3; ++dim) { + this->startIndices[dim] = meshData[myRankIndex][2 * dim + 3]; + this->startIndices[dim + 3] = meshData[myRankIndex][2 * dim + 4]; } - template - void LocalLexicographicIndexManager:: - getGhostedNodesData(const RCP/* fineMap */, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const { - - // First we allocated memory for the outputs - ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); - - // Now the tricky part starts, the coarse nodes / ghosted coarse nodes need to be imported. - // This requires finding what their GID on the fine mesh is. They need to be ordered - // lexicographically to allow for fast sweeps through the mesh. - - // We loop over all ghosted coarse nodes by increasing global lexicographic order - Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3); - Array lCoarseNodeCoarseIndices(3); - Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes); - LO currentIndex = -1, countCoarseNodes = 0; - for(int k = 0; k < this->ghostedNodesPerDir[2]; ++k) { - for(int j = 0; j < this->ghostedNodesPerDir[1]; ++j) { - for(int i = 0; i < this->ghostedNodesPerDir[0]; ++i) { - currentIndex = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + i; - ghostedCoarseNodeFineIndices[0] = ghostedCoarseNodeCoarseIndices[0]*this->coarseRate[0]; - if(ghostedCoarseNodeFineIndices[0] > this->gFineNodesPerDir[0] - 1) { - ghostedCoarseNodeFineIndices[0] = this->gFineNodesPerDir[0] - 1; - } - ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + j; - ghostedCoarseNodeFineIndices[1] = ghostedCoarseNodeCoarseIndices[1]*this->coarseRate[1]; - if(ghostedCoarseNodeFineIndices[1] > this->gFineNodesPerDir[1] - 1) { - ghostedCoarseNodeFineIndices[1] = this->gFineNodesPerDir[1] - 1; - } - ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + k; - ghostedCoarseNodeFineIndices[2] = ghostedCoarseNodeCoarseIndices[2]*this->coarseRate[2]; - if(ghostedCoarseNodeFineIndices[2] > this->gFineNodesPerDir[2] - 1) { - ghostedCoarseNodeFineIndices[2] = this->gFineNodesPerDir[2] - 1; - } + this->computeMeshParameters(); + computeGlobalCoarseParameters(); + computeCoarseLocalLexicographicData(); +} // Constructor + +template +void LocalLexicographicIndexManager:: + computeGlobalCoarseParameters() { + this->gNumCoarseNodes10 = this->gCoarseNodesPerDir[0] * this->gCoarseNodesPerDir[1]; + this->gNumCoarseNodes = this->gNumCoarseNodes10 * this->gCoarseNodesPerDir[2]; +} + +template +void LocalLexicographicIndexManager:: + getGhostedNodesData(const RCP /* fineMap */, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& ghostedNodeCoarseGIDs) const { + // First we allocated memory for the outputs + ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); + + // Now the tricky part starts, the coarse nodes / ghosted coarse nodes need to be imported. + // This requires finding what their GID on the fine mesh is. They need to be ordered + // lexicographically to allow for fast sweeps through the mesh. + + // We loop over all ghosted coarse nodes by increasing global lexicographic order + Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3); + Array lCoarseNodeCoarseIndices(3); + Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes); + LO currentIndex = -1, countCoarseNodes = 0; + for (int k = 0; k < this->ghostedNodesPerDir[2]; ++k) { + for (int j = 0; j < this->ghostedNodesPerDir[1]; ++j) { + for (int i = 0; i < this->ghostedNodesPerDir[0]; ++i) { + currentIndex = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; + ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + i; + ghostedCoarseNodeFineIndices[0] = ghostedCoarseNodeCoarseIndices[0] * this->coarseRate[0]; + if (ghostedCoarseNodeFineIndices[0] > this->gFineNodesPerDir[0] - 1) { + ghostedCoarseNodeFineIndices[0] = this->gFineNodesPerDir[0] - 1; + } + ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + j; + ghostedCoarseNodeFineIndices[1] = ghostedCoarseNodeCoarseIndices[1] * this->coarseRate[1]; + if (ghostedCoarseNodeFineIndices[1] > this->gFineNodesPerDir[1] - 1) { + ghostedCoarseNodeFineIndices[1] = this->gFineNodesPerDir[1] - 1; + } + ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + k; + ghostedCoarseNodeFineIndices[2] = ghostedCoarseNodeCoarseIndices[2] * this->coarseRate[2]; + if (ghostedCoarseNodeFineIndices[2] > this->gFineNodesPerDir[2] - 1) { + ghostedCoarseNodeFineIndices[2] = this->gFineNodesPerDir[2] - 1; + } - GO myGID = -1, myCoarseGID = -1; - LO myLID = -1, myPID = -1, myCoarseLID = -1; - getGIDLocalLexicographic(i, j, k, ghostedCoarseNodeFineIndices, myGID, myPID, myLID); + GO myGID = -1, myCoarseGID = -1; + LO myLID = -1, myPID = -1, myCoarseLID = -1; + getGIDLocalLexicographic(i, j, k, ghostedCoarseNodeFineIndices, myGID, myPID, myLID); - int rankIndex = rankIndices[myPID]; - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - lCoarseNodeCoarseIndices[dim] = ghostedCoarseNodeCoarseIndices[dim] - - coarseMeshData[rankIndex][3 + 2*dim]; - } - } - LO myRankIndexCoarseNodesInDir0 = coarseMeshData[rankIndex][4] - - coarseMeshData[rankIndex][3] + 1; - LO myRankIndexCoarseNodes10 = (coarseMeshData[rankIndex][6] - - coarseMeshData[rankIndex][5] + 1) - *myRankIndexCoarseNodesInDir0; - myCoarseLID = lCoarseNodeCoarseIndices[2]*myRankIndexCoarseNodes10 - + lCoarseNodeCoarseIndices[1]*myRankIndexCoarseNodesInDir0 - + lCoarseNodeCoarseIndices[0]; - myCoarseGID = myCoarseLID + coarseMeshData[rankIndex][9]; - - ghostedNodeCoarseLIDs[currentIndex] = myCoarseLID; - ghostedNodeCoarsePIDs[currentIndex] = myPID; - ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; - - if(myPID == myRank) { - lCoarseNodeCoarseGIDs[countCoarseNodes] = myCoarseGID; - ++countCoarseNodes; + int rankIndex = rankIndices[myPID]; + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + lCoarseNodeCoarseIndices[dim] = ghostedCoarseNodeCoarseIndices[dim] - coarseMeshData[rankIndex][3 + 2 * dim]; } } + LO myRankIndexCoarseNodesInDir0 = coarseMeshData[rankIndex][4] - coarseMeshData[rankIndex][3] + 1; + LO myRankIndexCoarseNodes10 = (coarseMeshData[rankIndex][6] - coarseMeshData[rankIndex][5] + 1) * myRankIndexCoarseNodesInDir0; + myCoarseLID = lCoarseNodeCoarseIndices[2] * myRankIndexCoarseNodes10 + lCoarseNodeCoarseIndices[1] * myRankIndexCoarseNodesInDir0 + lCoarseNodeCoarseIndices[0]; + myCoarseGID = myCoarseLID + coarseMeshData[rankIndex][9]; + + ghostedNodeCoarseLIDs[currentIndex] = myCoarseLID; + ghostedNodeCoarsePIDs[currentIndex] = myPID; + ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; + + if (myPID == myRank) { + lCoarseNodeCoarseGIDs[countCoarseNodes] = myCoarseGID; + ++countCoarseNodes; + } } } } - - template - void LocalLexicographicIndexManager:: - getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const { - - // Allocate sufficient storage space for outputs - coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); - coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - - // Load all the GIDs on the fine mesh - ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); - - Array coarseStartIndices(3); - for(int dim = 0; dim < 3; ++dim) { - coarseStartIndices[dim] = this->coarseMeshData[myRankIndex][2*dim + 3]; - } - - // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs - LO fineLID; - for(LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { - Array coarseIndices(3), fineIndices(3), gCoarseIndices(3); - this->getCoarseNodeLocalTuple(coarseLID, - coarseIndices[0], - coarseIndices[1], - coarseIndices[2]); - getCoarseNodeFineLID(coarseIndices[0],coarseIndices[1],coarseIndices[2],fineLID); - coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; - - LO myRankIndexCoarseNodesInDir0 = coarseMeshData[myRankIndex][4] - - coarseMeshData[myRankIndex][3] + 1; - LO myRankIndexCoarseNodes10 = (coarseMeshData[myRankIndex][6] - - coarseMeshData[myRankIndex][5] + 1) - *myRankIndexCoarseNodesInDir0; - LO myCoarseLID = coarseIndices[2]*myRankIndexCoarseNodes10 - + coarseIndices[1]*myRankIndexCoarseNodesInDir0 - + coarseIndices[0]; - GO myCoarseGID = myCoarseLID + coarseMeshData[myRankIndex][9]; - coarseNodeCoarseGIDs[coarseLID] = myCoarseGID; - } - +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const { + // Allocate sufficient storage space for outputs + coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); + coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); + + // Load all the GIDs on the fine mesh + ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); + + Array coarseStartIndices(3); + for (int dim = 0; dim < 3; ++dim) { + coarseStartIndices[dim] = this->coarseMeshData[myRankIndex][2 * dim + 3]; } - template - void LocalLexicographicIndexManager:: - getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, const LO kGhosted, - const Array coarseNodeFineIndices, - GO& myGID, LO& myPID, LO& myLID) const { - - LO ni = -1, nj = -1, li = -1, lj = -1, lk = -1; - LO myRankGuess = myRankIndex; - // We try to make a logical guess as to which PID owns the current coarse node - if(iGhosted == 0 && this->ghostInterface[0]) { - --myRankGuess; - } else if((iGhosted == this->ghostedNodesPerDir[0] - 1) && this->ghostInterface[1]) { - ++myRankGuess; - } - if(jGhosted == 0 && this->ghostInterface[2]) { - myRankGuess -= pi; - } else if((jGhosted == this->ghostedNodesPerDir[1] - 1) && this->ghostInterface[3]) { - myRankGuess += pi; - } - if(kGhosted == 0 && this->ghostInterface[4]) { - myRankGuess -= pj*pi; - } else if((kGhosted == this->ghostedNodesPerDir[2] - 1) && this->ghostInterface[5]) { - myRankGuess += pj*pi; - } - if(coarseNodeFineIndices[0] >= meshData[myRankGuess][3] - && coarseNodeFineIndices[0] <= meshData[myRankGuess][4] - && coarseNodeFineIndices[1] >= meshData[myRankGuess][5] - && coarseNodeFineIndices[1] <= meshData[myRankGuess][6] - && coarseNodeFineIndices[2] >= meshData[myRankGuess][7] - && coarseNodeFineIndices[2] <= meshData[myRankGuess][8] - && myRankGuess < numRanks - 1) { - myPID = meshData[myRankGuess][0]; - ni = meshData[myRankGuess][4] - meshData[myRankGuess][3] + 1; - nj = meshData[myRankGuess][6] - meshData[myRankGuess][5] + 1; - li = coarseNodeFineIndices[0] - meshData[myRankGuess][3]; - lj = coarseNodeFineIndices[1] - meshData[myRankGuess][5]; - lk = coarseNodeFineIndices[2] - meshData[myRankGuess][7]; - myLID = lk*nj*ni + lj*ni + li; - myGID = meshData[myRankGuess][9] + myLID; - } else { // The guess failed, let us use the heavy artilery: std::find_if() - // It could be interesting to monitor how many times this branch of the code gets - // used as it is far more expensive than the above one... - auto nodeRank = std::find_if(myBlockStart, myBlockEnd, - [coarseNodeFineIndices](const std::vector& vec){ - if(coarseNodeFineIndices[0] >= vec[3] - && coarseNodeFineIndices[0] <= vec[4] - && coarseNodeFineIndices[1] >= vec[5] - && coarseNodeFineIndices[1] <= vec[6] - && coarseNodeFineIndices[2] >= vec[7] - && coarseNodeFineIndices[2] <= vec[8]) { - return true; - } else { - return false; - } - }); - myPID = (*nodeRank)[0]; - ni = (*nodeRank)[4] - (*nodeRank)[3] + 1; - nj = (*nodeRank)[6] - (*nodeRank)[5] + 1; - li = coarseNodeFineIndices[0] - (*nodeRank)[3]; - lj = coarseNodeFineIndices[1] - (*nodeRank)[5]; - lk = coarseNodeFineIndices[2] - (*nodeRank)[7]; - myLID = lk*nj*ni + lj*ni + li; - myGID = (*nodeRank)[9] + myLID; - } + // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs + LO fineLID; + for (LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { + Array coarseIndices(3), fineIndices(3), gCoarseIndices(3); + this->getCoarseNodeLocalTuple(coarseLID, + coarseIndices[0], + coarseIndices[1], + coarseIndices[2]); + getCoarseNodeFineLID(coarseIndices[0], coarseIndices[1], coarseIndices[2], fineLID); + coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; + + LO myRankIndexCoarseNodesInDir0 = coarseMeshData[myRankIndex][4] - coarseMeshData[myRankIndex][3] + 1; + LO myRankIndexCoarseNodes10 = (coarseMeshData[myRankIndex][6] - coarseMeshData[myRankIndex][5] + 1) * myRankIndexCoarseNodesInDir0; + LO myCoarseLID = coarseIndices[2] * myRankIndexCoarseNodes10 + coarseIndices[1] * myRankIndexCoarseNodesInDir0 + coarseIndices[0]; + GO myCoarseGID = myCoarseLID + coarseMeshData[myRankIndex][9]; + coarseNodeCoarseGIDs[coarseLID] = myCoarseGID; } - - template - void LocalLexicographicIndexManager:: - sortLocalLexicographicData() { - - std::sort(meshData.begin(), meshData.end(), - [](const std::vector& a, const std::vector& b)->bool { - // The below function sorts ranks by blockID, kmin, jmin and imin - if(a[2] < b[2]) { +} + +template +void LocalLexicographicIndexManager:: + getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, const LO kGhosted, + const Array coarseNodeFineIndices, + GO& myGID, LO& myPID, LO& myLID) const { + LO ni = -1, nj = -1, li = -1, lj = -1, lk = -1; + LO myRankGuess = myRankIndex; + // We try to make a logical guess as to which PID owns the current coarse node + if (iGhosted == 0 && this->ghostInterface[0]) { + --myRankGuess; + } else if ((iGhosted == this->ghostedNodesPerDir[0] - 1) && this->ghostInterface[1]) { + ++myRankGuess; + } + if (jGhosted == 0 && this->ghostInterface[2]) { + myRankGuess -= pi; + } else if ((jGhosted == this->ghostedNodesPerDir[1] - 1) && this->ghostInterface[3]) { + myRankGuess += pi; + } + if (kGhosted == 0 && this->ghostInterface[4]) { + myRankGuess -= pj * pi; + } else if ((kGhosted == this->ghostedNodesPerDir[2] - 1) && this->ghostInterface[5]) { + myRankGuess += pj * pi; + } + if (coarseNodeFineIndices[0] >= meshData[myRankGuess][3] && coarseNodeFineIndices[0] <= meshData[myRankGuess][4] && coarseNodeFineIndices[1] >= meshData[myRankGuess][5] && coarseNodeFineIndices[1] <= meshData[myRankGuess][6] && coarseNodeFineIndices[2] >= meshData[myRankGuess][7] && coarseNodeFineIndices[2] <= meshData[myRankGuess][8] && myRankGuess < numRanks - 1) { + myPID = meshData[myRankGuess][0]; + ni = meshData[myRankGuess][4] - meshData[myRankGuess][3] + 1; + nj = meshData[myRankGuess][6] - meshData[myRankGuess][5] + 1; + li = coarseNodeFineIndices[0] - meshData[myRankGuess][3]; + lj = coarseNodeFineIndices[1] - meshData[myRankGuess][5]; + lk = coarseNodeFineIndices[2] - meshData[myRankGuess][7]; + myLID = lk * nj * ni + lj * ni + li; + myGID = meshData[myRankGuess][9] + myLID; + } else { // The guess failed, let us use the heavy artilery: std::find_if() + // It could be interesting to monitor how many times this branch of the code gets + // used as it is far more expensive than the above one... + auto nodeRank = std::find_if(myBlockStart, myBlockEnd, + [coarseNodeFineIndices](const std::vector& vec) { + if (coarseNodeFineIndices[0] >= vec[3] && coarseNodeFineIndices[0] <= vec[4] && coarseNodeFineIndices[1] >= vec[5] && coarseNodeFineIndices[1] <= vec[6] && coarseNodeFineIndices[2] >= vec[7] && coarseNodeFineIndices[2] <= vec[8]) { + return true; + } else { + return false; + } + }); + myPID = (*nodeRank)[0]; + ni = (*nodeRank)[4] - (*nodeRank)[3] + 1; + nj = (*nodeRank)[6] - (*nodeRank)[5] + 1; + li = coarseNodeFineIndices[0] - (*nodeRank)[3]; + lj = coarseNodeFineIndices[1] - (*nodeRank)[5]; + lk = coarseNodeFineIndices[2] - (*nodeRank)[7]; + myLID = lk * nj * ni + lj * ni + li; + myGID = (*nodeRank)[9] + myLID; + } +} + +template +void LocalLexicographicIndexManager:: + sortLocalLexicographicData() { + std::sort(meshData.begin(), meshData.end(), + [](const std::vector& a, const std::vector& b) -> bool { + // The below function sorts ranks by blockID, kmin, jmin and imin + if (a[2] < b[2]) { + return true; + } else if (a[2] == b[2]) { + if (a[7] < b[7]) { return true; - } else if(a[2] == b[2]) { - if(a[7] < b[7]) { + } else if (a[7] == b[7]) { + if (a[5] < b[5]) { return true; - } else if(a[7] == b[7]) { - if(a[5] < b[5]) { + } else if (a[5] == b[5]) { + if (a[3] < b[3]) { return true; - } else if(a[5] == b[5]) { - if(a[3] < b[3]) {return true;} } } } - return false; - }); - - numBlocks = meshData[numRanks - 1][2] + 1; - // Find the range of the current block - myBlockStart = std::lower_bound(meshData.begin(), meshData.end(), myBlock - 1, - [] (const std::vector& vec, const GO val)->bool { - return (vec[2] < val) ? true : false; - }); - myBlockEnd = std::upper_bound(meshData.begin(), meshData.end(), myBlock, - [] (const GO val, const std::vector& vec)->bool { - return (val < vec[2]) ? true : false; + } + return false; + }); + + numBlocks = meshData[numRanks - 1][2] + 1; + // Find the range of the current block + myBlockStart = std::lower_bound(meshData.begin(), meshData.end(), myBlock - 1, + [](const std::vector& vec, const GO val) -> bool { + return (vec[2] < val) ? true : false; }); - // Assuming that i,j,k and ranges are split in pi, pj and pk processors - // we search for these numbers as they will allow us to find quickly the PID of processors - // owning ghost nodes. - auto myKEnd = std::upper_bound(myBlockStart, myBlockEnd, (*myBlockStart)[3], - [] (const GO val, const std::vector& vec)->bool { - return (val < vec[7]) ? true : false; - }); - auto myJEnd = std::upper_bound(myBlockStart, myKEnd, (*myBlockStart)[3], - [] (const GO val, const std::vector& vec)->bool { - return (val < vec[5]) ? true : false; - }); - pi = std::distance(myBlockStart, myJEnd); - pj = std::distance(myBlockStart, myKEnd) / pi; - pk = std::distance(myBlockStart, myBlockEnd) / (pj*pi); - - // We also look for the index of the local rank in the current block. - const int MyRank = myRank; - myRankIndex = std::distance(meshData.begin(), - std::find_if(myBlockStart, myBlockEnd, - [MyRank] (const std::vector& vec)->bool { - return (vec[0] == MyRank) ? true : false; - }) - ); - // We also construct a mapping of rank to rankIndex in the meshData vector, - // this will allow us to access data quickly later on. - for(int rankIndex = 0; rankIndex < numRanks; ++rankIndex) { - rankIndices[meshData[rankIndex][0]] = rankIndex; - } + myBlockEnd = std::upper_bound(meshData.begin(), meshData.end(), myBlock, + [](const GO val, const std::vector& vec) -> bool { + return (val < vec[2]) ? true : false; + }); + // Assuming that i,j,k and ranges are split in pi, pj and pk processors + // we search for these numbers as they will allow us to find quickly the PID of processors + // owning ghost nodes. + auto myKEnd = std::upper_bound(myBlockStart, myBlockEnd, (*myBlockStart)[3], + [](const GO val, const std::vector& vec) -> bool { + return (val < vec[7]) ? true : false; + }); + auto myJEnd = std::upper_bound(myBlockStart, myKEnd, (*myBlockStart)[3], + [](const GO val, const std::vector& vec) -> bool { + return (val < vec[5]) ? true : false; + }); + pi = std::distance(myBlockStart, myJEnd); + pj = std::distance(myBlockStart, myKEnd) / pi; + pk = std::distance(myBlockStart, myBlockEnd) / (pj * pi); + + // We also look for the index of the local rank in the current block. + const int MyRank = myRank; + myRankIndex = std::distance(meshData.begin(), + std::find_if(myBlockStart, myBlockEnd, + [MyRank](const std::vector& vec) -> bool { + return (vec[0] == MyRank) ? true : false; + })); + // We also construct a mapping of rank to rankIndex in the meshData vector, + // this will allow us to access data quickly later on. + for (int rankIndex = 0; rankIndex < numRanks; ++rankIndex) { + rankIndices[meshData[rankIndex][0]] = rankIndex; } - - template - void LocalLexicographicIndexManager:: - computeCoarseLocalLexicographicData() { - Array rankOffset(3); - for(int rank = 0; rank < numRanks; ++rank) { - coarseMeshData[rank].resize(10); - coarseMeshData[rank][0] = meshData[rank][0]; - coarseMeshData[rank][1] = meshData[rank][1]; - coarseMeshData[rank][2] = meshData[rank][2]; - for(int dim = 0; dim < 3; ++dim) { - coarseMeshData[rank][3 + 2*dim] = meshData[rank][3 + 2*dim] / this->coarseRate[dim]; - if(meshData[rank][3 + 2*dim] % this->coarseRate[dim] > 0) { - ++coarseMeshData[rank][3 + 2*dim]; - } - coarseMeshData[rank][3 + 2*dim + 1] = meshData[rank][3 + 2*dim + 1] / this->coarseRate[dim]; - if(meshData[rank][3 + 2*dim + 1] == this->gFineNodesPerDir[dim] - 1 && - meshData[rank][3 + 2*dim + 1] % this->coarseRate[dim] > 0) { - //this->endRate[dim] < this->coarseRate[dim]) { - ++coarseMeshData[rank][3 + 2*dim + 1]; - } +} + +template +void LocalLexicographicIndexManager:: + computeCoarseLocalLexicographicData() { + Array rankOffset(3); + for (int rank = 0; rank < numRanks; ++rank) { + coarseMeshData[rank].resize(10); + coarseMeshData[rank][0] = meshData[rank][0]; + coarseMeshData[rank][1] = meshData[rank][1]; + coarseMeshData[rank][2] = meshData[rank][2]; + for (int dim = 0; dim < 3; ++dim) { + coarseMeshData[rank][3 + 2 * dim] = meshData[rank][3 + 2 * dim] / this->coarseRate[dim]; + if (meshData[rank][3 + 2 * dim] % this->coarseRate[dim] > 0) { + ++coarseMeshData[rank][3 + 2 * dim]; } - if(rank > 0) { - coarseMeshData[rank][9] = coarseMeshData[rank - 1][9] - + (coarseMeshData[rank - 1][8] - coarseMeshData[rank - 1][7] + 1) - * (coarseMeshData[rank - 1][6] - coarseMeshData[rank - 1][5] + 1) - * (coarseMeshData[rank - 1][4] - coarseMeshData[rank - 1][3] + 1); + coarseMeshData[rank][3 + 2 * dim + 1] = meshData[rank][3 + 2 * dim + 1] / this->coarseRate[dim]; + if (meshData[rank][3 + 2 * dim + 1] == this->gFineNodesPerDir[dim] - 1 && + meshData[rank][3 + 2 * dim + 1] % this->coarseRate[dim] > 0) { + //this->endRate[dim] < this->coarseRate[dim]) { + ++coarseMeshData[rank][3 + 2 * dim + 1]; } } + if (rank > 0) { + coarseMeshData[rank][9] = coarseMeshData[rank - 1][9] + (coarseMeshData[rank - 1][8] - coarseMeshData[rank - 1][7] + 1) * (coarseMeshData[rank - 1][6] - coarseMeshData[rank - 1][5] + 1) * (coarseMeshData[rank - 1][4] - coarseMeshData[rank - 1][3] + 1); + } } - - template - std::vector > LocalLexicographicIndexManager:: - getCoarseMeshData() const {return coarseMeshData;} - - template - void LocalLexicographicIndexManager:: - getFineNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } - - template - void LocalLexicographicIndexManager:: - getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - } - - template - void LocalLexicographicIndexManager:: - getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - - k += this->offsets[2]; - j += this->offsets[1]; - i += this->offsets[0]; - } - - template - void LocalLexicographicIndexManager:: - getFineNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } - - template - void LocalLexicographicIndexManager:: - getFineNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumCoarseNodes10; - tmp = myLID % this->lNumCoarseNodes10; - j = tmp / this->lCoarseNodesPerDir[0]; - i = tmp % this->lCoarseNodesPerDir[0]; - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { - // Assumptions: (i,j,k) is a tuple on the coarse mesh - // myLID is the corresponding local ID on the fine mesh - const LO multiplier[3] = {1, this->lFineNodesPerDir[0], this->lNumFineNodes10}; - const LO indices[3] = {i, j, k}; - - myLID = 0; - for(int dim = 0; dim < 3; ++dim) { - if((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && this->meshEdge[2*dim + 1]) { - // We are dealing with the last node on the mesh in direction dim - // so we can simply use the number of nodes on the fine mesh in that direction - myLID += (this->getLocalFineNodesInDir(dim) - 1)*multiplier[dim]; - } else { - myLID += (indices[dim]*this->getCoarseningRate(dim) + this->getCoarseNodeOffset(dim)) - *multiplier[dim]; - } +} + +template +std::vector > LocalLexicographicIndexManager:: + getCoarseMeshData() const { return coarseMeshData; } + +template +void LocalLexicographicIndexManager:: + getFineNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { +} + +template +void LocalLexicographicIndexManager:: + getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; +} + +template +void LocalLexicographicIndexManager:: + getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; + + k += this->offsets[2]; + j += this->offsets[1]; + i += this->offsets[0]; +} + +template +void LocalLexicographicIndexManager:: + getFineNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { +} + +template +void LocalLexicographicIndexManager:: + getFineNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumCoarseNodes10; + tmp = myLID % this->lNumCoarseNodes10; + j = tmp / this->lCoarseNodesPerDir[0]; + i = tmp % this->lCoarseNodesPerDir[0]; +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { + // Assumptions: (i,j,k) is a tuple on the coarse mesh + // myLID is the corresponding local ID on the fine mesh + const LO multiplier[3] = {1, this->lFineNodesPerDir[0], this->lNumFineNodes10}; + const LO indices[3] = {i, j, k}; + + myLID = 0; + for (int dim = 0; dim < 3; ++dim) { + if ((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && this->meshEdge[2 * dim + 1]) { + // We are dealing with the last node on the mesh in direction dim + // so we can simply use the number of nodes on the fine mesh in that direction + myLID += (this->getLocalFineNodesInDir(dim) - 1) * multiplier[dim]; + } else { + myLID += (indices[dim] * this->getCoarseningRate(dim) + this->getCoarseNodeOffset(dim)) * multiplier[dim]; } } +} - template - void LocalLexicographicIndexManager:: - getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void LocalLexicographicIndexManager:: + getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} - template - void LocalLexicographicIndexManager:: - getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void LocalLexicographicIndexManager:: + getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} -} //namespace MueLu +} //namespace MueLu #endif /* MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp index 4a8d987b423b..93229f814cce 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp @@ -77,76 +77,73 @@ namespace MueLu { correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class UncoupledIndexManager : public IndexManager { +template +class UncoupledIndexManager : public IndexManager { #undef MUELU_UNCOUPLEDINDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: + public: + //LBV: I doubt that it makes sense to have + // this particular constructor since it is + // not used anywhere and parameters cannot + // all accessible after construction. + UncoupledIndexManager() = default; - //LBV: I doubt that it makes sense to have - // this particular constructor since it is - // not used anywhere and parameters cannot - // all accessible after construction. - UncoupledIndexManager() = default; + UncoupledIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const int MyRank, const int NumRanks, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, + const Array CoarseRate, + const bool singleCoarsePoint); - UncoupledIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, - const Array CoarseRate, - const bool singleCoarsePoint); + virtual ~UncoupledIndexManager() {} - virtual ~UncoupledIndexManager() {} + void computeGlobalCoarseParameters(); - void computeGlobalCoarseParameters(); + std::vector > getCoarseMeshData() const; - std::vector > getCoarseMeshData() const; + void getGhostedNodesData(const RCP fineMap, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& ghostedNodeCoarseGIDs) const; - void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const; + void getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const; - void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const; + void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; - void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; - void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; - void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; - void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; + private: + const int myRank; ///< Local rank ID. + const int numRanks; ///< Number of ranks used to decompose the problem. +}; - private: - - const int myRank; ///< Local rank ID. - const int numRanks; ///< Number of ranks used to decompose the problem. - - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_UNCOUPLEDINDEXMANAGER_SHORT -#endif // MUELU_UNCOUPLEDINDEXMANAGER_DECL_HPP +#endif // MUELU_UNCOUPLEDINDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp index efd12ebd68c9..a10f9a0c07d9 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp @@ -52,194 +52,188 @@ namespace MueLu { - template - UncoupledIndexManager:: - UncoupledIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, const Array LFineNodesPerDir, - const Array CoarseRate, const bool singleCoarsePoint) : - IndexManager(comm, coupled, singleCoarsePoint, NumDimensions, interpolationOrder, - Array(3, -1), LFineNodesPerDir), - myRank(MyRank), numRanks(NumRanks) - { - - // Load coarse rate, being careful about formating - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(CoarseRate.size() == 1) { - this->coarseRate[dim] = CoarseRate[0]; - } else if(CoarseRate.size() == this->numDimensions) { - this->coarseRate[dim] = CoarseRate[dim]; - } - } else { - this->coarseRate[dim] = 1; +template +UncoupledIndexManager:: + UncoupledIndexManager(const RCP > comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, + const int MyRank, const int NumRanks, + const Array GFineNodesPerDir, const Array LFineNodesPerDir, + const Array CoarseRate, const bool singleCoarsePoint) + : IndexManager(comm, coupled, singleCoarsePoint, NumDimensions, interpolationOrder, + Array(3, -1), LFineNodesPerDir) + , myRank(MyRank) + , numRanks(NumRanks) { + // Load coarse rate, being careful about formating + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (CoarseRate.size() == 1) { + this->coarseRate[dim] = CoarseRate[0]; + } else if (CoarseRate.size() == this->numDimensions) { + this->coarseRate[dim] = CoarseRate[dim]; } + } else { + this->coarseRate[dim] = 1; } + } - this->computeMeshParameters(); - this->gNumCoarseNodes10 = Teuchos::OrdinalTraits::invalid(); - this->gNumCoarseNodes = Teuchos::OrdinalTraits::invalid(); - } // Constructor - - template - void UncoupledIndexManager:: - computeGlobalCoarseParameters() { - GO input[1] = {as(this->lNumCoarseNodes)}, output[1] = {0}; - Teuchos::reduceAll(*(this->comm_), Teuchos::REDUCE_SUM, 1, input, output); - this->gNumCoarseNodes = output[0]; - } // computeGlobalCoarseParameters - - template - void UncoupledIndexManager:: - getGhostedNodesData(const RCP/* fineMap */, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& /* ghostedNodeCoarseGIDs */) const { - - // First we allocate memory for the outputs - ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); - // In the uncoupled case the data required is trivial to provide! - for(LO idx = 0; idx < this->getNumLocalGhostedNodes(); ++idx) { - ghostedNodeCoarseLIDs[idx] = idx; - ghostedNodeCoarsePIDs[idx] = myRank; - } - } // getGhostedNodesData - - template - void UncoupledIndexManager:: - getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const { - - // Allocate sufficient amount of storage in output arrays - coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); - coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - - // Load all the GIDs on the fine mesh - ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); - - // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs - LO fineLID; - for(LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { - Array coarseIndices(3), fineIndices(3); - this->getCoarseNodeLocalTuple(coarseLID, - coarseIndices[0], - coarseIndices[1], - coarseIndices[2]); - for(int dim = 0; dim < 3; ++dim) { - if(coarseIndices[dim] == this->lCoarseNodesPerDir[dim] - 1) { - if(this->lCoarseNodesPerDir[dim] == 1) { - fineIndices[dim] = 0; - } else { - fineIndices[dim] = this->lFineNodesPerDir[dim] - 1; - } + this->computeMeshParameters(); + this->gNumCoarseNodes10 = Teuchos::OrdinalTraits::invalid(); + this->gNumCoarseNodes = Teuchos::OrdinalTraits::invalid(); +} // Constructor + +template +void UncoupledIndexManager:: + computeGlobalCoarseParameters() { + GO input[1] = {as(this->lNumCoarseNodes)}, output[1] = {0}; + Teuchos::reduceAll(*(this->comm_), Teuchos::REDUCE_SUM, 1, input, output); + this->gNumCoarseNodes = output[0]; +} // computeGlobalCoarseParameters + +template +void UncoupledIndexManager:: + getGhostedNodesData(const RCP /* fineMap */, + Array& ghostedNodeCoarseLIDs, + Array& ghostedNodeCoarsePIDs, + Array& /* ghostedNodeCoarseGIDs */) const { + // First we allocate memory for the outputs + ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); + // In the uncoupled case the data required is trivial to provide! + for (LO idx = 0; idx < this->getNumLocalGhostedNodes(); ++idx) { + ghostedNodeCoarseLIDs[idx] = idx; + ghostedNodeCoarsePIDs[idx] = myRank; + } +} // getGhostedNodesData + +template +void UncoupledIndexManager:: + getCoarseNodesData(const RCP fineCoordinatesMap, + Array& coarseNodeCoarseGIDs, + Array& coarseNodeFineGIDs) const { + // Allocate sufficient amount of storage in output arrays + coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); + coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); + + // Load all the GIDs on the fine mesh + ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); + + // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs + LO fineLID; + for (LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { + Array coarseIndices(3), fineIndices(3); + this->getCoarseNodeLocalTuple(coarseLID, + coarseIndices[0], + coarseIndices[1], + coarseIndices[2]); + for (int dim = 0; dim < 3; ++dim) { + if (coarseIndices[dim] == this->lCoarseNodesPerDir[dim] - 1) { + if (this->lCoarseNodesPerDir[dim] == 1) { + fineIndices[dim] = 0; } else { - fineIndices[dim] = coarseIndices[dim]*this->coarseRate[dim]; + fineIndices[dim] = this->lFineNodesPerDir[dim] - 1; } + } else { + fineIndices[dim] = coarseIndices[dim] * this->coarseRate[dim]; } - - fineLID = fineIndices[2]*this->lNumFineNodes10 - + fineIndices[1]*this->lFineNodesPerDir[0] - + fineIndices[0]; - coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; - } - } // getCoarseNodesData - - template - std::vector > UncoupledIndexManager:: - getCoarseMeshData() const { - std::vector > coarseMeshData; - return coarseMeshData; - } - - template - void UncoupledIndexManager:: - getFineNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } - - template - void UncoupledIndexManager:: - getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - } // getFineNodeLocalTuple - - template - void UncoupledIndexManager:: - getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - - k += this->offsets[2]; - j += this->offsets[1]; - i += this->offsets[0]; - } // getFineNodeGhostedTuple - - template - void UncoupledIndexManager:: - getFineNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } - template - void UncoupledIndexManager:: - getFineNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { + fineLID = fineIndices[2] * this->lNumFineNodes10 + fineIndices[1] * this->lFineNodesPerDir[0] + fineIndices[0]; + coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; } - - template - void UncoupledIndexManager:: - getCoarseNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } - - template - void UncoupledIndexManager:: - getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumCoarseNodes10; - tmp = myLID % this->lNumCoarseNodes10; - j = tmp / this->lCoarseNodesPerDir[0]; - i = tmp % this->lCoarseNodesPerDir[0]; - } // getCoarseNodeLocalTuple - - template - void UncoupledIndexManager:: - getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } - - template - void UncoupledIndexManager:: - getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void UncoupledIndexManager:: - getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - } // getCoarseNodeGhostedLID - - template - void UncoupledIndexManager:: - getCoarseNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void UncoupledIndexManager:: - getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void UncoupledIndexManager:: - getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - -} //namespace MueLu +} // getCoarseNodesData + +template +std::vector > UncoupledIndexManager:: + getCoarseMeshData() const { + std::vector > coarseMeshData; + return coarseMeshData; +} + +template +void UncoupledIndexManager:: + getFineNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { +} + +template +void UncoupledIndexManager:: + getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; +} // getFineNodeLocalTuple + +template +void UncoupledIndexManager:: + getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; + + k += this->offsets[2]; + j += this->offsets[1]; + i += this->offsets[0]; +} // getFineNodeGhostedTuple + +template +void UncoupledIndexManager:: + getFineNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { +} + +template +void UncoupledIndexManager:: + getFineNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void UncoupledIndexManager:: + getCoarseNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { +} + +template +void UncoupledIndexManager:: + getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { + LO tmp; + k = myLID / this->lNumCoarseNodes10; + tmp = myLID % this->lNumCoarseNodes10; + j = tmp / this->lCoarseNodesPerDir[0]; + i = tmp % this->lCoarseNodesPerDir[0]; +} // getCoarseNodeLocalTuple + +template +void UncoupledIndexManager:: + getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { +} + +template +void UncoupledIndexManager:: + getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void UncoupledIndexManager:: + getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { + myLID = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; +} // getCoarseNodeGhostedLID + +template +void UncoupledIndexManager:: + getCoarseNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void UncoupledIndexManager:: + getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +template +void UncoupledIndexManager:: + getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { +} + +} //namespace MueLu #endif /* MUELU_UNCOUPLEDINDEXMANAGER_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp index aacc182dc4b3..a6db95d34575 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp @@ -55,7 +55,7 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! +/*! @class AggregationPhase1Algorithm class. @brief Algorithm for coarsening a graph with uncoupled aggregation. @@ -79,51 +79,47 @@ namespace MueLu { Only nodes with state READY are changed to AGGREGATED. Nodes with other states are not touched. */ - template - class AggregationPhase1Algorithm : - public MueLu::AggregationAlgorithmBase { +template +class AggregationPhase1Algorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE1ALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase1Algorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase1Algorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase1Algorithm() { } + //! Destructor. + virtual ~AggregationPhase1Algorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 1 (main)"; } - std::string description() const { return "Phase 1 (main)"; } - - private: - - /*! @brief Utility to take a list of integers and reorder them randomly (by using a local permutation). + private: + /*! @brief Utility to take a list of integers and reorder them randomly (by using a local permutation). @param list On input, a bunch of integers. On output, the same integers in a different order that is determined randomly. */ - void RandomReorder(ArrayRCP list) const; - - /*! @brief Generate a random number in the range [min, max] */ - int RandomOrdinal(int min, int max) const; + void RandomReorder(ArrayRCP list) const; - }; + /*! @brief Generate a random number in the range [min, max] */ + int RandomOrdinal(int min, int max) const; +}; -} //namespace MueLu +} //namespace MueLu #define MUELU_AGGREGATIONPHASE1ALGORITHM_SHORT #endif /* MUELU_AGGREGATIONPHASE1ALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp index 958e5b8d0930..642bf281977c 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp @@ -62,189 +62,188 @@ namespace MueLu { - template - void AggregationPhase1Algorithm:: - BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - std::string orderingStr = params.get("aggregation: ordering"); - int maxNeighAlreadySelected = params.get ("aggregation: max selected neighbors"); - int minNodesPerAggregate = params.get ("aggregation: min agg size"); - int maxNodesPerAggregate = params.get ("aggregation: max agg size"); - bool matchMLBehavior = params.get("aggregation: match ML phase1"); - - TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError, - "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); - - enum { - O_NATURAL, - O_RANDOM, - O_GRAPH - } ordering; - ordering = O_NATURAL; // initialize variable (fix CID 143665) - if (orderingStr == "natural") ordering = O_NATURAL; - if (orderingStr == "random" ) ordering = O_RANDOM; - if (orderingStr == "graph" ) ordering = O_GRAPH; - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); - - LO numLocalAggregates = aggregates.GetNumAggregates(); - - ArrayRCP randomVector; - if (ordering == O_RANDOM) { - randomVector = arcp(numRows); - for (LO i = 0; i < numRows; i++) - randomVector[i] = i; - RandomReorder(randomVector); - } +template +void AggregationPhase1Algorithm:: + BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + std::string orderingStr = params.get("aggregation: ordering"); + int maxNeighAlreadySelected = params.get("aggregation: max selected neighbors"); + int minNodesPerAggregate = params.get("aggregation: min agg size"); + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + bool matchMLBehavior = params.get("aggregation: match ML phase1"); + + TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError, + "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); + + enum { + O_NATURAL, + O_RANDOM, + O_GRAPH + } ordering; + ordering = O_NATURAL; // initialize variable (fix CID 143665) + if (orderingStr == "natural") ordering = O_NATURAL; + if (orderingStr == "random") ordering = O_RANDOM; + if (orderingStr == "graph") ordering = O_GRAPH; + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + LO numLocalAggregates = aggregates.GetNumAggregates(); + + ArrayRCP randomVector; + if (ordering == O_RANDOM) { + randomVector = arcp(numRows); + for (LO i = 0; i < numRows; i++) + randomVector[i] = i; + RandomReorder(randomVector); + } - int aggIndex = -1; - size_t aggSize = 0; - std::vector aggList(graph.getLocalMaxNumRowEntries()); - - std::queue graphOrderQueue; - - // Main loop over all local rows of graph(A) - for (LO i = 0; i < numRows; i++) { - // Step 1: pick the next node to aggregate - LO rootCandidate = 0; - if (ordering == O_NATURAL) rootCandidate = i; - else if (ordering == O_RANDOM) rootCandidate = randomVector[i]; - else if (ordering == O_GRAPH) { - - if (graphOrderQueue.size() == 0) { - // Current queue is empty for "graph" ordering, populate with one READY node - for (LO jnode = 0; jnode < numRows; jnode++) - if (aggStat[jnode] == READY) { - graphOrderQueue.push(jnode); - break; - } - } - if (graphOrderQueue.size() == 0) { - // There are no more ready nodes, end the phase - break; - } - rootCandidate = graphOrderQueue.front(); // take next node from graph ordering queue - graphOrderQueue.pop(); // delete this node in list + int aggIndex = -1; + size_t aggSize = 0; + std::vector aggList(graph.getLocalMaxNumRowEntries()); + + std::queue graphOrderQueue; + + // Main loop over all local rows of graph(A) + for (LO i = 0; i < numRows; i++) { + // Step 1: pick the next node to aggregate + LO rootCandidate = 0; + if (ordering == O_NATURAL) + rootCandidate = i; + else if (ordering == O_RANDOM) + rootCandidate = randomVector[i]; + else if (ordering == O_GRAPH) { + if (graphOrderQueue.size() == 0) { + // Current queue is empty for "graph" ordering, populate with one READY node + for (LO jnode = 0; jnode < numRows; jnode++) + if (aggStat[jnode] == READY) { + graphOrderQueue.push(jnode); + break; + } + } + if (graphOrderQueue.size() == 0) { + // There are no more ready nodes, end the phase + break; } + rootCandidate = graphOrderQueue.front(); // take next node from graph ordering queue + graphOrderQueue.pop(); // delete this node in list + } - if (aggStat[rootCandidate] != READY) - continue; + if (aggStat[rootCandidate] != READY) + continue; - // Step 2: build tentative aggregate - aggSize = 0; - aggList[aggSize++] = rootCandidate; + // Step 2: build tentative aggregate + aggSize = 0; + aggList[aggSize++] = rootCandidate; - ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); + ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); - // If the number of neighbors is less than the minimum number of nodes - // per aggregate, we know this is not going to be a valid root, and we - // may skip it, but only for "natural" and "random" (for "graph" we still - // need to fetch the list of local neighbors to continue) - if ((ordering == O_NATURAL || ordering == O_RANDOM) && - neighOfINode.size() < minNodesPerAggregate) { - continue; - } + // If the number of neighbors is less than the minimum number of nodes + // per aggregate, we know this is not going to be a valid root, and we + // may skip it, but only for "natural" and "random" (for "graph" we still + // need to fetch the list of local neighbors to continue) + if ((ordering == O_NATURAL || ordering == O_RANDOM) && + neighOfINode.size() < minNodesPerAggregate) { + continue; + } - LO numAggregatedNeighbours = 0; - - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - - if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) { - - if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) { - // If aggregate size does not exceed max size, add node to the - // tentative aggregate - // NOTE: We do not exit the loop over all neighbours since we have - // still to count all aggregated neighbour nodes for the - // aggregation criteria - // NOTE: We check here for the maximum aggregation size. If we - // would do it below with all the other check too big aggregates - // would not be accepted at all. - if (aggSize < as(maxNodesPerAggregate)) - aggList[aggSize++] = neigh; - - } else if(!matchMLBehavior || aggStat[neigh] != IGNORED) { - // NOTE: ML checks against BOUNDARY here, but boundary nodes are flagged as IGNORED by - // the time we get to Phase 1, so we check IGNORED instead - numAggregatedNeighbours++; - } + LO numAggregatedNeighbours = 0; + + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + + if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) { + if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) { + // If aggregate size does not exceed max size, add node to the + // tentative aggregate + // NOTE: We do not exit the loop over all neighbours since we have + // still to count all aggregated neighbour nodes for the + // aggregation criteria + // NOTE: We check here for the maximum aggregation size. If we + // would do it below with all the other check too big aggregates + // would not be accepted at all. + if (aggSize < as(maxNodesPerAggregate)) + aggList[aggSize++] = neigh; + + } else if (!matchMLBehavior || aggStat[neigh] != IGNORED) { + // NOTE: ML checks against BOUNDARY here, but boundary nodes are flagged as IGNORED by + // the time we get to Phase 1, so we check IGNORED instead + numAggregatedNeighbours++; } } + } - // Step 3: check if tentative aggregate is acceptable - if ((numAggregatedNeighbours <= maxNeighAlreadySelected) && // too many connections to other aggregates - (aggSize >= as(minNodesPerAggregate))) { // too few nodes in the tentative aggregate - // Accept new aggregate - // rootCandidate becomes the root of the newly formed aggregate - aggregates.SetIsRoot(rootCandidate); - aggIndex = numLocalAggregates++; - - for (size_t k = 0; k < aggSize; k++) { - aggStat [aggList[k]] = AGGREGATED; - vertex2AggId[aggList[k]] = aggIndex; - procWinner [aggList[k]] = myRank; - } + // Step 3: check if tentative aggregate is acceptable + if ((numAggregatedNeighbours <= maxNeighAlreadySelected) && // too many connections to other aggregates + (aggSize >= as(minNodesPerAggregate))) { // too few nodes in the tentative aggregate + // Accept new aggregate + // rootCandidate becomes the root of the newly formed aggregate + aggregates.SetIsRoot(rootCandidate); + aggIndex = numLocalAggregates++; + + for (size_t k = 0; k < aggSize; k++) { + aggStat[aggList[k]] = AGGREGATED; + vertex2AggId[aggList[k]] = aggIndex; + procWinner[aggList[k]] = myRank; + } - numNonAggregatedNodes -= aggSize; + numNonAggregatedNodes -= aggSize; - } else { - // Aggregate is not accepted - aggStat[rootCandidate] = NOTSEL; + } else { + // Aggregate is not accepted + aggStat[rootCandidate] = NOTSEL; - // Need this for the "graph" ordering below - // The original candidate is always aggList[0] - aggSize = 1; - } + // Need this for the "graph" ordering below + // The original candidate is always aggList[0] + aggSize = 1; + } - if (ordering == O_GRAPH) { - // Add candidates to the list of nodes - // NOTE: the code have slightly different meanings depending on context: - // - if aggregate was accepted, we add neighbors of neighbors of the original candidate - // - if aggregate was not accepted, we add neighbors of the original candidate - for (size_t k = 0; k < aggSize; k++) { - ArrayView neighOfJNode = graph.getNeighborVertices(aggList[k]); + if (ordering == O_GRAPH) { + // Add candidates to the list of nodes + // NOTE: the code have slightly different meanings depending on context: + // - if aggregate was accepted, we add neighbors of neighbors of the original candidate + // - if aggregate was not accepted, we add neighbors of the original candidate + for (size_t k = 0; k < aggSize; k++) { + ArrayView neighOfJNode = graph.getNeighborVertices(aggList[k]); - for (int j = 0; j < neighOfJNode.size(); j++) { - LO neigh = neighOfJNode[j]; + for (int j = 0; j < neighOfJNode.size(); j++) { + LO neigh = neighOfJNode[j]; - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) - graphOrderQueue.push(neigh); - } + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) + graphOrderQueue.push(neigh); } } } - - // Reset all NOTSEL vertices to READY - // This simplifies other algorithms - for (LO i = 0; i < numRows; i++) - if (aggStat[i] == NOTSEL) - aggStat[i] = READY; - - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); - } - - template - void AggregationPhase1Algorithm::RandomReorder(ArrayRCP list) const { - //TODO: replace int - int n = list.size(); - for(int i = 0; i < n-1; i++) - std::swap(list[i], list[RandomOrdinal(i,n-1)]); } - template - int AggregationPhase1Algorithm::RandomOrdinal(int min, int max) const { - return min + as((max-min+1) * (static_cast(std::rand()) / (RAND_MAX + 1.0))); - } - -} // end namespace - + // Reset all NOTSEL vertices to READY + // This simplifies other algorithms + for (LO i = 0; i < numRows; i++) + if (aggStat[i] == NOTSEL) + aggStat[i] = READY; + + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} + +template +void AggregationPhase1Algorithm::RandomReorder(ArrayRCP list) const { + //TODO: replace int + int n = list.size(); + for (int i = 0; i < n - 1; i++) + std::swap(list[i], list[RandomOrdinal(i, n - 1)]); +} + +template +int AggregationPhase1Algorithm::RandomOrdinal(int min, int max) const { + return min + as((max - min + 1) * (static_cast(std::rand()) / (RAND_MAX + 1.0))); +} + +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp index 295f9d927e43..b140cce46be3 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp @@ -58,7 +58,7 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! +/*! @class AggregationPhase1Algorithm class. @brief Algorithm for coarsening a graph with uncoupled aggregation. @@ -82,60 +82,57 @@ namespace MueLu { Only nodes with state READY are changed to AGGREGATED. Nodes with other states are not touched. */ - template - class AggregationPhase1Algorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +template +class AggregationPhase1Algorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; + public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase1Algorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase1Algorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase1Algorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase1Algorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const LO maxAggSize, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregatesRandom(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesDeterministic(const LO maxAggSize, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregatesDeterministic(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 1 (main)"; } +}; - std::string description() const { return "Phase 1 (main)"; } - - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_SHORT -#endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp index 82e837c0ef41..eb2f9d5a067a 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp @@ -66,221 +66,217 @@ namespace MueLu { - template - void AggregationPhase1Algorithm_kokkos:: - BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { +template +void AggregationPhase1Algorithm_kokkos:: + BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + int minNodesPerAggregate = params.get("aggregation: min agg size"); + int maxNodesPerAggregate = params.get("aggregation: max agg size"); - int minNodesPerAggregate = params.get ("aggregation: min agg size"); - int maxNodesPerAggregate = params.get ("aggregation: max agg size"); + TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, + Exceptions::RuntimeError, + "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); - TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, - Exceptions::RuntimeError, - "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); - - // Distance-2 gives less control than serial uncoupled phase 1 - // no custom row reordering because would require making deep copy - // of local matrix entries and permuting it can only enforce - // max aggregate size - { - if(params.get("aggregation: deterministic")) - { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesDeterministic(maxNodesPerAggregate, graph, - aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(maxNodesPerAggregate, graph, - aggregates, aggStat, numNonAggregatedNodes); - } + // Distance-2 gives less control than serial uncoupled phase 1 + // no custom row reordering because would require making deep copy + // of local matrix entries and permuting it can only enforce + // max aggregate size + { + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesDeterministic(maxNodesPerAggregate, graph, + aggregates, aggStat, numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(maxNodesPerAggregate, graph, + aggregates, aggStat, numNonAggregatedNodes); } } +} - template - void AggregationPhase1Algorithm_kokkos:: - BuildAggregatesRandom(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - // Extract data from aggregates - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); +template +void AggregationPhase1Algorithm_kokkos:: + BuildAggregatesRandom(const LO maxAggSize, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - auto lclLWGraph = graph.getLocalLWGraph(); - - LO numAggregatedNodes = 0; - LO numLocalAggregates = aggregates.GetNumAggregates(); - Kokkos::View aggCount("aggCount"); - Kokkos::deep_copy(aggCount, numLocalAggregates); - Kokkos::parallel_for("Aggregation Phase 1: initial reduction over color == 1", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO nodeIdx) { - if(colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) { - const LO aggIdx = Kokkos::atomic_fetch_add (&aggCount(), 1); - vertex2AggId(nodeIdx, 0) = aggIdx; - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - } - }); - // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates - // before updating the value of numLocalAggregates. - // But since we also do not want to create a host mirror of aggCount we do some trickery... - numAggregatedNodes -= numLocalAggregates; - Kokkos::deep_copy(numLocalAggregates, aggCount); - numAggregatedNodes += numLocalAggregates; - - // Compute the initial size of the aggregates. - // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1 - // at this point so we could simplify the code below a lot if this - // assumption is correct... - Kokkos::View aggSizesView("aggSizes", numLocalAggregates); - { - // Here there is a possibility that two vertices assigned to two different threads contribute - // to the same aggregate if somethings happened before phase 1? - auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView); - Kokkos::parallel_for("Aggregation Phase 1: compute initial aggregates size", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO nodeIdx) { - auto aggSizesScatterViewAccess = aggSizesScatterView.access(); - if(vertex2AggId(nodeIdx, 0) >= 0) - aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1; - }); - Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView); - } + // Extract data from aggregates + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); - LO tmpNumAggregatedNodes = 0; - Kokkos::parallel_reduce("Aggregation Phase 1: main parallel_reduce over aggSizes", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const size_t nodeIdx, LO & lNumAggregatedNodes) { - if(colors(nodeIdx) != 1 - && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) { - // Get neighbors of vertex i and look for local, aggregated, - // color 1 neighbor (valid root). - auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); - for(LO j = 0; j < neighbors.length; ++j) { - auto nei = neighbors.colidx(j); - if(lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 - && aggStat(nei) == AGGREGATED) { + auto lclLWGraph = graph.getLocalLWGraph(); - // This atomic guarentees that any other node trying to - // join aggregate agg has the correct size. - LO agg = vertex2AggId(nei, 0); - const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg), - 1); - if(aggSize < maxAggSize) { - //assign vertex i to aggregate with root j - vertex2AggId(nodeIdx, 0) = agg; - procWinner(nodeIdx, 0) = myRank; - aggStat(nodeIdx) = AGGREGATED; - ++lNumAggregatedNodes; - break; - } else { - // Decrement back the value of aggSizesView(agg) - Kokkos::atomic_decrement(&aggSizesView(agg)); - } - } - } - } - // if(aggStat(nodeIdx) != AGGREGATED) { - // lNumNonAggregatedNodes++; - if(aggStat(nodeIdx) == NOTSEL) { aggStat(nodeIdx) = READY; } - // } - }, tmpNumAggregatedNodes); - numAggregatedNodes += tmpNumAggregatedNodes; - numNonAggregatedNodes -= numAggregatedNodes; + LO numAggregatedNodes = 0; + LO numLocalAggregates = aggregates.GetNumAggregates(); + Kokkos::View aggCount("aggCount"); + Kokkos::deep_copy(aggCount, numLocalAggregates); + Kokkos::parallel_for( + "Aggregation Phase 1: initial reduction over color == 1", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO nodeIdx) { + if (colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) { + const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1); + vertex2AggId(nodeIdx, 0) = aggIdx; + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + } + }); + // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates + // before updating the value of numLocalAggregates. + // But since we also do not want to create a host mirror of aggCount we do some trickery... + numAggregatedNodes -= numLocalAggregates; + Kokkos::deep_copy(numLocalAggregates, aggCount); + numAggregatedNodes += numLocalAggregates; - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); + // Compute the initial size of the aggregates. + // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1 + // at this point so we could simplify the code below a lot if this + // assumption is correct... + Kokkos::View aggSizesView("aggSizes", numLocalAggregates); + { + // Here there is a possibility that two vertices assigned to two different threads contribute + // to the same aggregate if somethings happened before phase 1? + auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView); + Kokkos::parallel_for( + "Aggregation Phase 1: compute initial aggregates size", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO nodeIdx) { + auto aggSizesScatterViewAccess = aggSizesScatterView.access(); + if (vertex2AggId(nodeIdx, 0) >= 0) + aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1; + }); + Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView); } - template - void AggregationPhase1Algorithm_kokkos:: - BuildAggregatesDeterministic(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + LO tmpNumAggregatedNodes = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 1: main parallel_reduce over aggSizes", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const size_t nodeIdx, LO& lNumAggregatedNodes) { + if (colors(nodeIdx) != 1 && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) { + // Get neighbors of vertex i and look for local, aggregated, + // color 1 neighbor (valid root). + auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); + for (LO j = 0; j < neighbors.length; ++j) { + auto nei = neighbors.colidx(j); + if (lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 && aggStat(nei) == AGGREGATED) { + // This atomic guarentees that any other node trying to + // join aggregate agg has the correct size. + LO agg = vertex2AggId(nei, 0); + const LO aggSize = Kokkos::atomic_fetch_add(&aggSizesView(agg), + 1); + if (aggSize < maxAggSize) { + //assign vertex i to aggregate with root j + vertex2AggId(nodeIdx, 0) = agg; + procWinner(nodeIdx, 0) = myRank; + aggStat(nodeIdx) = AGGREGATED; + ++lNumAggregatedNodes; + break; + } else { + // Decrement back the value of aggSizesView(agg) + Kokkos::atomic_decrement(&aggSizesView(agg)); + } + } + } + } + // if(aggStat(nodeIdx) != AGGREGATED) { + // lNumNonAggregatedNodes++; + if (aggStat(nodeIdx) == NOTSEL) { + aggStat(nodeIdx) = READY; + } + // } + }, + tmpNumAggregatedNodes); + numAggregatedNodes += tmpNumAggregatedNodes; + numNonAggregatedNodes -= numAggregatedNodes; - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} - auto lclLWGraph = graph.getLocalLWGraph(); +template +void AggregationPhase1Algorithm_kokkos:: + BuildAggregatesDeterministic(const LO maxAggSize, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - LO numLocalAggregates = aggregates.GetNumAggregates(); - Kokkos::View numLocalAggregatesView("Num aggregates"); - { - auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView); - h_nla() = numLocalAggregates; - Kokkos::deep_copy(numLocalAggregatesView, h_nla); - } + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); - Kokkos::View newRoots("New root LIDs", numNonAggregatedNodes); - Kokkos::View numNewRoots("Number of new aggregates of current color"); - auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); + auto lclLWGraph = graph.getLocalLWGraph(); - //first loop build the set of new roots - Kokkos::parallel_for("Aggregation Phase 1: building list of new roots", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO i) - { - if(colors(i) == 1 && aggStat(i) == READY) - { - //i will become a root - newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i; - } - }); - Kokkos::deep_copy(h_numNewRoots, numNewRoots); - //sort new roots by LID to guarantee determinism in agg IDs - Kokkos::sort(newRoots, 0, h_numNewRoots()); - LO numAggregated = 0; - Kokkos::parallel_reduce("Aggregation Phase 1: aggregating nodes", - Kokkos::RangePolicy(0, h_numNewRoots()), - KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated) - { - LO root = newRoots(rootIndex); - LO aggID = numLocalAggregatesView() + rootIndex; - LO aggSize = 1; - vertex2AggId(root, 0) = aggID; - procWinner(root, 0) = myRank; - aggStat(root) = AGGREGATED; - auto neighOfRoot = lclLWGraph.getNeighborVertices(root); - for(LO n = 0; n < neighOfRoot.length; n++) - { - LO neigh = neighOfRoot(n); - if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY) - { - //add neigh to aggregate - vertex2AggId(neigh, 0) = aggID; - procWinner(neigh, 0) = myRank; - aggStat(neigh) = AGGREGATED; - aggSize++; - if(aggSize == maxAggSize) - { - //can't add any more nodes - break; - } - } - } - lnumAggregated += aggSize; - }, numAggregated); - numNonAggregatedNodes -= numAggregated; - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots()); + LO numLocalAggregates = aggregates.GetNumAggregates(); + Kokkos::View numLocalAggregatesView("Num aggregates"); + { + auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView); + h_nla() = numLocalAggregates; + Kokkos::deep_copy(numLocalAggregatesView, h_nla); } -} // end namespace + Kokkos::View newRoots("New root LIDs", numNonAggregatedNodes); + Kokkos::View numNewRoots("Number of new aggregates of current color"); + auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); + + //first loop build the set of new roots + Kokkos::parallel_for( + "Aggregation Phase 1: building list of new roots", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO i) { + if (colors(i) == 1 && aggStat(i) == READY) { + //i will become a root + newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i; + } + }); + Kokkos::deep_copy(h_numNewRoots, numNewRoots); + //sort new roots by LID to guarantee determinism in agg IDs + Kokkos::sort(newRoots, 0, h_numNewRoots()); + LO numAggregated = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 1: aggregating nodes", + Kokkos::RangePolicy(0, h_numNewRoots()), + KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated) { + LO root = newRoots(rootIndex); + LO aggID = numLocalAggregatesView() + rootIndex; + LO aggSize = 1; + vertex2AggId(root, 0) = aggID; + procWinner(root, 0) = myRank; + aggStat(root) = AGGREGATED; + auto neighOfRoot = lclLWGraph.getNeighborVertices(root); + for (LO n = 0; n < neighOfRoot.length; n++) { + LO neigh = neighOfRoot(n); + if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY) { + //add neigh to aggregate + vertex2AggId(neigh, 0) = aggID; + procWinner(neigh, 0) = myRank; + aggStat(neigh) = AGGREGATED; + aggSize++; + if (aggSize == maxAggSize) { + //can't add any more nodes + break; + } + } + } + lnumAggregated += aggSize; + }, + numAggregated); + numNonAggregatedNodes -= numAggregated; + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots()); +} + +} // namespace MueLu -#endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp index 0535904d2311..82f8898f192d 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp @@ -56,7 +56,7 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! +/*! @class AggregationPhase2aAlgorithm class. @brief Among unaggregated points, see if we can make a reasonable size aggregate out of it. @ingroup Aggregation @@ -78,41 +78,38 @@ namespace MueLu { */ - template - class AggregationPhase2aAlgorithm : - public MueLu::AggregationAlgorithmBase { +template +class AggregationPhase2aAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE2AALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2aAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2aAlgorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2aAlgorithm() { } + //! Destructor. + virtual ~AggregationPhase2aAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2a (secondary)"; } +}; - std::string description() const { return "Phase 2a (secondary)"; } - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_AGGREGATIONPHASE2AALGORITHM_SHORT - #endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp index fdd92afd4f8e..b9f2b394d01d 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ #define MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ - #include #include @@ -61,139 +60,133 @@ namespace MueLu { - template - void AggregationPhase2aAlgorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - int minNodesPerAggregate = params.get("aggregation: min agg size"); - int maxNodesPerAggregate = params.get("aggregation: max agg size"); - bool matchMLbehavior = params.get("aggregation: match ML phase2a"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); +template +void AggregationPhase2aAlgorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + int minNodesPerAggregate = params.get("aggregation: min agg size"); + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + bool matchMLbehavior = params.get("aggregation: match ML phase2a"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + LO numLocalAggregates = aggregates.GetNumAggregates(); + + LO numLocalNodes = procWinner.size(); + LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + + const double aggFactor = params.get("aggregation: phase2a agg factor"); + double factor; + + if (matchMLbehavior) { + // Note: ML uses global counts to set the factor + // Passing # of nonaggregated nodes and # of nodes via aggStat + GO in_data[2] = {(GO)numNonAggregatedNodes, (GO)aggStat.size()}; + GO out_data[2]; + Teuchos::reduceAll(*graph.GetComm(), Teuchos::REDUCE_SUM, 2, in_data, out_data); + GO phase_one_aggregated = out_data[1] - out_data[0]; + factor = as(phase_one_aggregated) / (out_data[1] + 1); + + LO agg_stat_unaggregated = 0; + LO agg_stat_aggregated = 0; + LO agg_stat_bdry = 0; + for (LO i = 0; i < (LO)aggStat.size(); i++) { + if (aggStat[i] == AGGREGATED) + agg_stat_aggregated++; + else if (aggStat[i] == BOUNDARY) + agg_stat_bdry++; + else + agg_stat_unaggregated++; + } - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); + // NOTE: ML always uses 3 as minNodesPerAggregate + minNodesPerAggregate = 3; - LO numLocalAggregates = aggregates.GetNumAggregates(); + } else { + // MueLu defaults to using local counts to set the factor + factor = as(numLocalAggregated) / (numLocalNodes + 1); + } - LO numLocalNodes = procWinner.size(); - LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + // Now apply aggFactor + factor = pow(factor, aggFactor); - const double aggFactor = params.get("aggregation: phase2a agg factor"); - double factor; + int aggIndex = -1; + size_t aggSize = 0; + std::vector aggList(graph.getLocalMaxNumRowEntries()); + for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) { + if (aggStat[rootCandidate] != READY) { + continue; + } + LO numNeighbors = 0; + aggSize = 0; if (matchMLbehavior) { - // Note: ML uses global counts to set the factor - // Passing # of nonaggregated nodes and # of nodes via aggStat - GO in_data[2] ={(GO)numNonAggregatedNodes,(GO) aggStat.size()}; - GO out_data[2]; - Teuchos::reduceAll(*graph.GetComm(), Teuchos::REDUCE_SUM, 2, in_data, out_data); - GO phase_one_aggregated = out_data[1] - out_data[0]; - factor = as(phase_one_aggregated) / (out_data[1]+1); - - LO agg_stat_unaggregated=0; - LO agg_stat_aggregated=0; - LO agg_stat_bdry=0; - for (LO i=0; i<(LO)aggStat.size(); i++) { - if (aggStat[i] == AGGREGATED) - agg_stat_aggregated++; - else if (aggStat[i] == BOUNDARY) - agg_stat_bdry++; - else - agg_stat_unaggregated++; - } - - // NOTE: ML always uses 3 as minNodesPerAggregate - minNodesPerAggregate=3; - - } - else { - // MueLu defaults to using local counts to set the factor - factor = as(numLocalAggregated)/(numLocalNodes+1); + aggList[aggSize++] = rootCandidate; + numNeighbors++; } - // Now apply aggFactor - factor = pow(factor, aggFactor); - - int aggIndex = -1; - size_t aggSize = 0; - std::vector aggList(graph.getLocalMaxNumRowEntries()); - - for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) { - if (aggStat[rootCandidate] != READY) { - continue; - } + ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); + + LO num_nonaggd_neighbors = 0, num_local_neighbors = 0; + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + if (graph.isLocalNeighborVertex(neigh)) + num_local_neighbors++; + + if (neigh != rootCandidate) { + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { + // If aggregate size does not exceed max size, add node to the tentative aggregate + // NOTE: We do not exit the loop over all neighbours since we have still + // to count all aggregated neighbour nodes for the aggregation criteria + // NOTE: We check here for the maximum aggregation size. If we would do it below + // with all the other check too big aggregates would not be accepted at all. + if (aggSize < as(maxNodesPerAggregate)) + aggList[aggSize++] = neigh; + num_nonaggd_neighbors++; + } - LO numNeighbors = 0; - aggSize = 0; - if (matchMLbehavior) { - aggList[aggSize++] = rootCandidate; numNeighbors++; } + } - ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); - - LO num_nonaggd_neighbors=0, num_local_neighbors=0; - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - if (graph.isLocalNeighborVertex(neigh)) - num_local_neighbors++; - + bool accept_aggregate; + if (matchMLbehavior) { + // ML does this calculation slightly differently than MueLu does by default, specifically it + // uses the *local* number of neigbors, regardless of what they are. + // NOTE: ML does zero compression here. Not sure if it matters + // NOTE: ML uses a hardcoded value 3 instead of minNodesPerAggregate. This has been set above + LO rowi_N = num_local_neighbors; + num_nonaggd_neighbors++; // ML counts the node itself as a nonaggd_neighbor + accept_aggregate = (rowi_N > as(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor * rowi_N)); + } else { + accept_aggregate = (aggSize > as(minNodesPerAggregate)) && (aggSize > factor * numNeighbors); + } - if (neigh != rootCandidate) { - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { - // If aggregate size does not exceed max size, add node to the tentative aggregate - // NOTE: We do not exit the loop over all neighbours since we have still - // to count all aggregated neighbour nodes for the aggregation criteria - // NOTE: We check here for the maximum aggregation size. If we would do it below - // with all the other check too big aggregates would not be accepted at all. - if (aggSize < as(maxNodesPerAggregate)) - aggList[aggSize++] = neigh; - num_nonaggd_neighbors++; - } + if (accept_aggregate) { + // Accept new aggregate + // rootCandidate becomes the root of the newly formed aggregate + aggregates.SetIsRoot(rootCandidate); + aggIndex = numLocalAggregates++; - numNeighbors++; - } + for (size_t k = 0; k < aggSize; k++) { + aggStat[aggList[k]] = AGGREGATED; + vertex2AggId[aggList[k]] = aggIndex; + procWinner[aggList[k]] = myRank; } - - bool accept_aggregate; - if (matchMLbehavior) { - // ML does this calculation slightly differently than MueLu does by default, specifically it - // uses the *local* number of neigbors, regardless of what they are. - // NOTE: ML does zero compression here. Not sure if it matters - // NOTE: ML uses a hardcoded value 3 instead of minNodesPerAggregate. This has been set above - LO rowi_N = num_local_neighbors; - num_nonaggd_neighbors++; // ML counts the node itself as a nonaggd_neighbor - accept_aggregate = (rowi_N > as(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor*rowi_N)); - } - else { - accept_aggregate = (aggSize > as(minNodesPerAggregate)) && (aggSize > factor*numNeighbors); - } - - - if (accept_aggregate) { - // Accept new aggregate - // rootCandidate becomes the root of the newly formed aggregate - aggregates.SetIsRoot(rootCandidate); - aggIndex = numLocalAggregates++; - - for (size_t k = 0; k < aggSize; k++) { - aggStat [aggList[k]] = AGGREGATED; - vertex2AggId[aggList[k]] = aggIndex; - procWinner [aggList[k]] = myRank; - } - - numNonAggregatedNodes -= aggSize; - } + numNonAggregatedNodes -= aggSize; } - - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); } -} // end namespace + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} + +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp index d253e891cd71..5d564915d173 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp @@ -57,7 +57,7 @@ #include "MueLu_FactoryBase_fwd.hpp" namespace MueLu { - /*! +/*! @class AggregationPhase2aAlgorithm class. @brief Among unaggregated points, see if we can make a reasonable size aggregate out of it. @ingroup Aggregation @@ -79,59 +79,57 @@ namespace MueLu { */ - template - class AggregationPhase2aAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +template +class AggregationPhase2aAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; + public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2aAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2aAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2aAlgorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase2aAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregatesRandom(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesDeterministic(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregatesDeterministic(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2a (secondary)"; } +}; - std::string description() const { return "Phase 2a (secondary)"; } - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_SHORT -#endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp index 01fcb20a21b9..9692744f411d 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp @@ -62,268 +62,266 @@ namespace MueLu { - template - void AggregationPhase2aAlgorithm_kokkos:: - BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - if(params.get("aggregation: deterministic")) { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } +template +void AggregationPhase2aAlgorithm_kokkos:: + BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); + } - } // BuildAggregates - - template - void AggregationPhase2aAlgorithm_kokkos:: - BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const int minNodesPerAggregate = params.get("aggregation: min agg size"); - const int maxNodesPerAggregate = params.get("aggregation: max agg size"); - bool matchMLbehavior = params.get("aggregation: match ML phase2a"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - LO numLocalNodes = numRows; - LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; - - const double aggFactor = 0.5; - double factor = static_cast(numLocalAggregated)/(numLocalNodes+1); - factor = pow(factor, aggFactor); - - // LBV on Sept 12, 2019: this looks a little heavy handed, - // I'm not sure a view is needed to perform atomic updates. - // If we can avoid this and use a simple LO that would be - // simpler for later maintenance. - Kokkos::View numLocalAggregates("numLocalAggregates"); - typename Kokkos::View::HostMirror h_numLocalAggregates = +} // BuildAggregates + +template +void AggregationPhase2aAlgorithm_kokkos:: + BuildAggregatesRandom(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const int minNodesPerAggregate = params.get("aggregation: min agg size"); + const int maxNodesPerAggregate = params.get("aggregation: max agg size"); + bool matchMLbehavior = params.get("aggregation: match ML phase2a"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + LO numLocalNodes = numRows; + LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + + const double aggFactor = 0.5; + double factor = static_cast(numLocalAggregated) / (numLocalNodes + 1); + factor = pow(factor, aggFactor); + + // LBV on Sept 12, 2019: this looks a little heavy handed, + // I'm not sure a view is needed to perform atomic updates. + // If we can avoid this and use a simple LO that would be + // simpler for later maintenance. + Kokkos::View numLocalAggregates("numLocalAggregates"); + typename Kokkos::View::HostMirror h_numLocalAggregates = Kokkos::create_mirror_view(numLocalAggregates); - h_numLocalAggregates() = aggregates.GetNumAggregates(); - Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); + h_numLocalAggregates() = aggregates.GetNumAggregates(); + Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); + + // Now we create new aggregates using root nodes in all colors other than the first color, + // as the first color was already exhausted in Phase 1. + for (int color = 2; color < numColors + 1; ++color) { + LO tmpNumNonAggregatedNodes = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 2a: loop over each individual color", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO rootCandidate, LO& lNumNonAggregatedNodes) { + if (aggStat(rootCandidate) == READY && + colors(rootCandidate) == color) { + LO numNeighbors = 0; + LO aggSize = 0; + if (matchMLbehavior) { + aggSize += 1; + numNeighbors += 1; + } + + auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); + + // Loop over neighbors to count how many nodes could join + // the new aggregate + + for (int j = 0; j < neighbors.length; ++j) { + LO neigh = neighbors(j); + if (neigh != rootCandidate) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + (aggStat(neigh) == READY) && + (aggSize < maxNodesPerAggregate)) { + ++aggSize; + } + ++numNeighbors; + } + } + + // If a sufficient number of nodes can join the new aggregate + // then we actually create the aggregate. + if (aggSize > minNodesPerAggregate && + (aggSize > factor * numNeighbors)) { + // aggregates.SetIsRoot(rootCandidate); + LO aggIndex = Kokkos:: + atomic_fetch_add(&numLocalAggregates(), 1); + + LO numAggregated = 0; + + if (matchMLbehavior) { + // Add the root. + aggStat(rootCandidate) = AGGREGATED; + vertex2AggId(rootCandidate, 0) = aggIndex; + procWinner(rootCandidate, 0) = myRank; + ++numAggregated; + --lNumNonAggregatedNodes; + } + + for (int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) { + LO neigh = neighbors(neighIdx); + if (neigh != rootCandidate) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + (aggStat(neigh) == READY) && + (numAggregated < aggSize)) { + aggStat(neigh) = AGGREGATED; + vertex2AggId(neigh, 0) = aggIndex; + procWinner(neigh, 0) = myRank; + + ++numAggregated; + --lNumNonAggregatedNodes; + } + } + } + } + } + }, + tmpNumNonAggregatedNodes); + numNonAggregatedNodes += tmpNumNonAggregatedNodes; + } - // Now we create new aggregates using root nodes in all colors other than the first color, - // as the first color was already exhausted in Phase 1. - for(int color = 2; color < numColors + 1; ++color) { - LO tmpNumNonAggregatedNodes = 0; - Kokkos::parallel_reduce("Aggregation Phase 2a: loop over each individual color", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO rootCandidate, LO& lNumNonAggregatedNodes) { - if(aggStat(rootCandidate) == READY && - colors(rootCandidate) == color) { - - LO numNeighbors = 0; - LO aggSize = 0; - if (matchMLbehavior) { - aggSize += 1; - numNeighbors +=1; - } - - auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); - - // Loop over neighbors to count how many nodes could join - // the new aggregate - - for(int j = 0; j < neighbors.length; ++j) { - LO neigh = neighbors(j); - if(neigh != rootCandidate) { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - (aggStat(neigh) == READY) && - (aggSize < maxNodesPerAggregate)) { - ++aggSize; - } - ++numNeighbors; - } - } - - // If a sufficient number of nodes can join the new aggregate - // then we actually create the aggregate. - if(aggSize > minNodesPerAggregate && - (aggSize > factor*numNeighbors)) { - - // aggregates.SetIsRoot(rootCandidate); - LO aggIndex = Kokkos:: - atomic_fetch_add(&numLocalAggregates(), 1); - - LO numAggregated = 0; - - if (matchMLbehavior) { - // Add the root. - aggStat(rootCandidate) = AGGREGATED; - vertex2AggId(rootCandidate, 0) = aggIndex; - procWinner(rootCandidate, 0) = myRank; - ++numAggregated; - --lNumNonAggregatedNodes; - } - - for(int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) { - LO neigh = neighbors(neighIdx); - if(neigh != rootCandidate) { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - (aggStat(neigh) == READY) && - (numAggregated < aggSize)) { - aggStat(neigh) = AGGREGATED; - vertex2AggId(neigh, 0) = aggIndex; - procWinner(neigh, 0) = myRank; - - ++numAggregated; - --lNumNonAggregatedNodes; - } - } - } - } - } - }, tmpNumNonAggregatedNodes); - numNonAggregatedNodes += tmpNumNonAggregatedNodes; - } + // update aggregate object + Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates); + aggregates.SetNumAggregates(h_numLocalAggregates()); +} // BuildAggregatesRandom + +template +void AggregationPhase2aAlgorithm_kokkos:: + BuildAggregatesDeterministic(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const int minNodesPerAggregate = params.get("aggregation: min agg size"); + const int maxNodesPerAggregate = params.get("aggregation: max agg size"); - // update aggregate object - Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates); - aggregates.SetNumAggregates(h_numLocalAggregates()); - } // BuildAggregatesRandom - - template - void AggregationPhase2aAlgorithm_kokkos:: - BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const int minNodesPerAggregate = params.get("aggregation: min agg size"); - const int maxNodesPerAggregate = params.get("aggregation: max agg size"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - LO numLocalNodes = procWinner.size(); - LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; - - const double aggFactor = 0.5; - double factor = as(numLocalAggregated)/(numLocalNodes+1); - factor = pow(factor, aggFactor); - - Kokkos::View numLocalAggregates("numLocalAggregates"); - typename Kokkos::View::HostMirror h_numLocalAggregates = + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + LO numLocalNodes = procWinner.size(); + LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + + const double aggFactor = 0.5; + double factor = as(numLocalAggregated) / (numLocalNodes + 1); + factor = pow(factor, aggFactor); + + Kokkos::View numLocalAggregates("numLocalAggregates"); + typename Kokkos::View::HostMirror h_numLocalAggregates = Kokkos::create_mirror_view(numLocalAggregates); - h_numLocalAggregates() = aggregates.GetNumAggregates(); - Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); - - // Now we create new aggregates using root nodes in all colors other than the first color, - // as the first color was already exhausted in Phase 1. - // - // In the deterministic version, exactly the same set of aggregates will be created - // (as the nondeterministic version) - // because no vertex V can be a neighbor of two vertices of the same color, so two root - // candidates can't fight over V - // - // But, the precise values in vertex2AggId need to match exactly, so just sort the new - // roots of each color before assigning aggregate IDs - - //numNonAggregatedNodes is the best available upper bound for the number of aggregates - //which may be created in this phase, so use it for the size of newRoots - Kokkos::View newRoots("New root LIDs", numNonAggregatedNodes); - Kokkos::View numNewRoots("Number of new aggregates of current color"); - auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); - for(int color = 1; color < numColors + 1; ++color) { - h_numNewRoots() = 0; - Kokkos::deep_copy(numNewRoots, h_numNewRoots); - Kokkos::parallel_for("Aggregation Phase 2a: determining new roots of current color", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO rootCandidate) { - if(aggStat(rootCandidate) == READY && - colors(rootCandidate) == color) { - LO aggSize = 0; - auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); - // Loop over neighbors to count how many nodes could join - // the new aggregate - LO numNeighbors = 0; - for(int j = 0; j < neighbors.length; ++j) { - LO neigh = neighbors(j); - if(neigh != rootCandidate) - { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == READY && - aggSize < maxNodesPerAggregate) - { - ++aggSize; - } - ++numNeighbors; - } - } - // If a sufficient number of nodes can join the new aggregate - // then we mark rootCandidate as a future root. - if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) { - LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1); - newRoots(newRootIndex) = rootCandidate; - } - } - }); - Kokkos::deep_copy(h_numNewRoots, numNewRoots); - - if(h_numNewRoots() > 0) { - //sort the new root indices - Kokkos::sort(newRoots, 0, h_numNewRoots()); - //now, loop over all new roots again and actually create the aggregates - LO tmpNumNonAggregatedNodes = 0; - //First, just find the set of color vertices which will become aggregate roots - Kokkos::parallel_reduce("Aggregation Phase 2a: create new aggregates", - Kokkos::RangePolicy(0, h_numNewRoots()), - KOKKOS_LAMBDA (const LO newRootIndex, LO& lNumNonAggregatedNodes) { - LO root = newRoots(newRootIndex); - LO newAggID = numLocalAggregates() + newRootIndex; - auto neighbors = lclLWGraph.getNeighborVertices(root); - // Loop over neighbors and add them to new aggregate - aggStat(root) = AGGREGATED; - vertex2AggId(root, 0) = newAggID; - LO aggSize = 1; - for(int j = 0; j < neighbors.length; ++j) { - LO neigh = neighbors(j); - if(neigh != root) { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == READY && - aggSize < maxNodesPerAggregate) { - aggStat(neigh) = AGGREGATED; - vertex2AggId(neigh, 0) = newAggID; - procWinner(neigh, 0) = myRank; - aggSize++; - } - } - } - lNumNonAggregatedNodes -= aggSize; - }, tmpNumNonAggregatedNodes); - numNonAggregatedNodes += tmpNumNonAggregatedNodes; - h_numLocalAggregates() += h_numNewRoots(); - Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); - } + h_numLocalAggregates() = aggregates.GetNumAggregates(); + Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); + + // Now we create new aggregates using root nodes in all colors other than the first color, + // as the first color was already exhausted in Phase 1. + // + // In the deterministic version, exactly the same set of aggregates will be created + // (as the nondeterministic version) + // because no vertex V can be a neighbor of two vertices of the same color, so two root + // candidates can't fight over V + // + // But, the precise values in vertex2AggId need to match exactly, so just sort the new + // roots of each color before assigning aggregate IDs + + //numNonAggregatedNodes is the best available upper bound for the number of aggregates + //which may be created in this phase, so use it for the size of newRoots + Kokkos::View newRoots("New root LIDs", numNonAggregatedNodes); + Kokkos::View numNewRoots("Number of new aggregates of current color"); + auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); + for (int color = 1; color < numColors + 1; ++color) { + h_numNewRoots() = 0; + Kokkos::deep_copy(numNewRoots, h_numNewRoots); + Kokkos::parallel_for( + "Aggregation Phase 2a: determining new roots of current color", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO rootCandidate) { + if (aggStat(rootCandidate) == READY && + colors(rootCandidate) == color) { + LO aggSize = 0; + auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); + // Loop over neighbors to count how many nodes could join + // the new aggregate + LO numNeighbors = 0; + for (int j = 0; j < neighbors.length; ++j) { + LO neigh = neighbors(j); + if (neigh != rootCandidate) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == READY && + aggSize < maxNodesPerAggregate) { + ++aggSize; + } + ++numNeighbors; + } + } + // If a sufficient number of nodes can join the new aggregate + // then we mark rootCandidate as a future root. + if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) { + LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1); + newRoots(newRootIndex) = rootCandidate; + } + } + }); + Kokkos::deep_copy(h_numNewRoots, numNewRoots); + + if (h_numNewRoots() > 0) { + //sort the new root indices + Kokkos::sort(newRoots, 0, h_numNewRoots()); + //now, loop over all new roots again and actually create the aggregates + LO tmpNumNonAggregatedNodes = 0; + //First, just find the set of color vertices which will become aggregate roots + Kokkos::parallel_reduce( + "Aggregation Phase 2a: create new aggregates", + Kokkos::RangePolicy(0, h_numNewRoots()), + KOKKOS_LAMBDA(const LO newRootIndex, LO& lNumNonAggregatedNodes) { + LO root = newRoots(newRootIndex); + LO newAggID = numLocalAggregates() + newRootIndex; + auto neighbors = lclLWGraph.getNeighborVertices(root); + // Loop over neighbors and add them to new aggregate + aggStat(root) = AGGREGATED; + vertex2AggId(root, 0) = newAggID; + LO aggSize = 1; + for (int j = 0; j < neighbors.length; ++j) { + LO neigh = neighbors(j); + if (neigh != root) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == READY && + aggSize < maxNodesPerAggregate) { + aggStat(neigh) = AGGREGATED; + vertex2AggId(neigh, 0) = newAggID; + procWinner(neigh, 0) = myRank; + aggSize++; + } + } + } + lNumNonAggregatedNodes -= aggSize; + }, + tmpNumNonAggregatedNodes); + numNonAggregatedNodes += tmpNumNonAggregatedNodes; + h_numLocalAggregates() += h_numNewRoots(); + Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); } - aggregates.SetNumAggregates(h_numLocalAggregates()); } + aggregates.SetNumAggregates(h_numLocalAggregates()); +} -} // end namespace +} // namespace MueLu -#endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp index 88b156d23c14..0cf2ea335e00 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp @@ -56,7 +56,7 @@ #include "MueLu_AggregationPhase2bAlgorithm_fwd.hpp" namespace MueLu { - /*! +/*! @class AggregationPhase2bAlgorithm class. @brief Add leftovers to existing aggregates @ingroup Aggregation @@ -77,41 +77,38 @@ namespace MueLu { This is not a problem, since after the previous aggregation phases one should not be able to build too large aggregates. */ - template - class AggregationPhase2bAlgorithm : - public MueLu::AggregationAlgorithmBase { +template +class AggregationPhase2bAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE2BALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2bAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2bAlgorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2bAlgorithm() { } + //! Destructor. + virtual ~AggregationPhase2bAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2b (expansion)"; } +}; - std::string description() const { return "Phase 2b (expansion)"; } - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_AGGREGATIONPHASE2BALGORITHM_SHORT - #endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp index 1dc4ac52c9f1..4e402903e6de 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp @@ -60,91 +60,89 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big - template - void AggregationPhase2bAlgorithm::BuildAggregates(const ParameterList& params , const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - bool matchMLbehavior = params.get("aggregation: match ML phase2b"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); - - LO numLocalAggregates = aggregates.GetNumAggregates(); - - const int defaultConnectWeight = 100; - const int penaltyConnectWeight = 10; - - std::vector aggWeight (numLocalAggregates, 0); - std::vector connectWeight(numRows, defaultConnectWeight); - std::vector aggPenalties (numRows, 0); - - // We do this cycle twice. - // I don't know why, but ML does it too - // taw: by running the aggregation routine more than once there is a chance that also - // non-aggregated nodes with a node distance of two are added to existing aggregates. - // Assuming that the aggregate size is 3 in each direction running the algorithm only twice - // should be sufficient. - for (int k = 0; k < 2; k++) { - for (LO i = 0; i < numRows; i++) { - if (aggStat[i] != READY) - continue; - - ArrayView neighOfINode = graph.getNeighborVertices(i); - - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - - // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED) - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) - aggWeight[vertex2AggId[neigh]] += connectWeight[neigh]; - } - - int bestScore = -100000; - int bestAggId = -1; - int bestConnect = -1; - - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - int aggId = vertex2AggId[neigh]; +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big +template +void AggregationPhase2bAlgorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + bool matchMLbehavior = params.get("aggregation: match ML phase2b"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + LO numLocalAggregates = aggregates.GetNumAggregates(); + + const int defaultConnectWeight = 100; + const int penaltyConnectWeight = 10; + + std::vector aggWeight(numLocalAggregates, 0); + std::vector connectWeight(numRows, defaultConnectWeight); + std::vector aggPenalties(numRows, 0); + + // We do this cycle twice. + // I don't know why, but ML does it too + // taw: by running the aggregation routine more than once there is a chance that also + // non-aggregated nodes with a node distance of two are added to existing aggregates. + // Assuming that the aggregate size is 3 in each direction running the algorithm only twice + // should be sufficient. + for (int k = 0; k < 2; k++) { + for (LO i = 0; i < numRows; i++) { + if (aggStat[i] != READY) + continue; + + ArrayView neighOfINode = graph.getNeighborVertices(i); + + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + + // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED) + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) + aggWeight[vertex2AggId[neigh]] += connectWeight[neigh]; + } - // Note: The third condition is only relevant if the ML matching is enabled - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED - && (!matchMLbehavior || aggWeight[aggId] != 0) ) { + int bestScore = -100000; + int bestAggId = -1; + int bestConnect = -1; - int score = aggWeight[aggId] - aggPenalties[aggId]; + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + int aggId = vertex2AggId[neigh]; - if (score > bestScore) { - bestAggId = aggId; - bestScore = score; - bestConnect = connectWeight[neigh]; + // Note: The third condition is only relevant if the ML matching is enabled + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED && (!matchMLbehavior || aggWeight[aggId] != 0)) { + int score = aggWeight[aggId] - aggPenalties[aggId]; - } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) { - bestConnect = connectWeight[neigh]; - } + if (score > bestScore) { + bestAggId = aggId; + bestScore = score; + bestConnect = connectWeight[neigh]; - // Reset the weights for the next loop - aggWeight[aggId] = 0; + } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) { + bestConnect = connectWeight[neigh]; } + + // Reset the weights for the next loop + aggWeight[aggId] = 0; } + } - if (bestScore >= 0) { - aggStat [i] = AGGREGATED; - vertex2AggId[i] = bestAggId; - procWinner [i] = myRank; + if (bestScore >= 0) { + aggStat[i] = AGGREGATED; + vertex2AggId[i] = bestAggId; + procWinner[i] = myRank; - numNonAggregatedNodes--; + numNonAggregatedNodes--; - aggPenalties[bestAggId]++; - connectWeight[i] = bestConnect - penaltyConnectWeight; - } + aggPenalties[bestAggId]++; + connectWeight[i] = bestConnect - penaltyConnectWeight; } } } +} -} // end namespace +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp index 941f732e64d0..b0548aac4a4e 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp @@ -58,7 +58,7 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! +/*! @class AggregationPhase2bAlgorithm class. @brief Add leftovers to existing aggregates @ingroup Aggregation @@ -79,59 +79,57 @@ namespace MueLu { This is not a problem, since after the previous aggregation phases one should not be able to build too large aggregates. */ - template - class AggregationPhase2bAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +template +class AggregationPhase2bAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; + public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2bAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2bAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2bAlgorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase2bAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesDeterministic(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2b (expansion)"; } +}; - std::string description() const { return "Phase 2b (expansion)"; } - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_SHORT -#endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp index bccdbf7f98ca..3e59460b5206 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp @@ -60,179 +60,180 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big - template - void AggregationPhase2bAlgorithm_kokkos:: - BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - if(params.get("aggregation: deterministic")) { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big +template +void AggregationPhase2bAlgorithm_kokkos:: + BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); + } + +} // BuildAggregates + +template +void AggregationPhase2bAlgorithm_kokkos:: + BuildAggregatesRandom(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + const LO numLocalAggregates = aggregates.GetNumAggregates(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + const LO defaultConnectWeight = 100; + const LO penaltyConnectWeight = 10; + + Kokkos::View aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop + Kokkos::View connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows); + Kokkos::View aggPenalties("aggPenalties", numLocalAggregates); // This gets initialized to zero here + + Kokkos::deep_copy(connectWeight, defaultConnectWeight); + + // taw: by running the aggregation routine more than once there is a chance that also + // non-aggregated nodes with a node distance of two are added to existing aggregates. + // Assuming that the aggregate size is 3 in each direction running the algorithm only twice + // should be sufficient. + // lbv: If the prior phase of aggregation where run without specifying an aggregate size, + // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration + // is needed to reach distance 2 neighbors. + int maxIters = 2; + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + if (maxNodesPerAggregate == std::numeric_limits::max()) { + maxIters = 1; + } + for (int iter = 0; iter < maxIters; ++iter) { + for (LO color = 1; color <= numColors; ++color) { + Kokkos::deep_copy(aggWeight, 0); + + //the reduce counts how many nodes are aggregated by this phase, + //which will then be subtracted from numNonAggregatedNodes + LO numAggregated = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 2b: aggregates expansion", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) { + if (aggStat(i) != READY || colors(i) != color) + return; + + auto neighOfINode = lclLWGraph.getNeighborVertices(i); + for (int j = 0; j < neighOfINode.length; j++) { + LO neigh = neighOfINode(j); - } // BuildAggregates - - template - void AggregationPhase2bAlgorithm_kokkos:: - BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - const LO numLocalAggregates = aggregates.GetNumAggregates(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - const LO defaultConnectWeight = 100; - const LO penaltyConnectWeight = 10; - - Kokkos::View aggWeight (Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop - Kokkos::View connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows); - Kokkos::View aggPenalties ("aggPenalties", numLocalAggregates);// This gets initialized to zero here - - Kokkos::deep_copy(connectWeight, defaultConnectWeight); - - // taw: by running the aggregation routine more than once there is a chance that also - // non-aggregated nodes with a node distance of two are added to existing aggregates. - // Assuming that the aggregate size is 3 in each direction running the algorithm only twice - // should be sufficient. - // lbv: If the prior phase of aggregation where run without specifying an aggregate size, - // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration - // is needed to reach distance 2 neighbors. - int maxIters = 2; - int maxNodesPerAggregate = params.get("aggregation: max agg size"); - if(maxNodesPerAggregate == std::numeric_limits::max()) {maxIters = 1;} - for (int iter = 0; iter < maxIters; ++iter) { - for(LO color = 1; color <= numColors; ++color) { - Kokkos::deep_copy(aggWeight, 0); - - //the reduce counts how many nodes are aggregated by this phase, - //which will then be subtracted from numNonAggregatedNodes - LO numAggregated = 0; - Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) { - if (aggStat(i) != READY || colors(i) != color) - return; - - auto neighOfINode = lclLWGraph.getNeighborVertices(i); - for (int j = 0; j < neighOfINode.length; j++) { - LO neigh = neighOfINode(j); - - // We don't check (neigh != i), as it is covered by checking - // (aggStat[neigh] == AGGREGATED) - if (lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == AGGREGATED) - Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), - connectWeight(neigh)); - } - - int bestScore = -100000; - int bestAggId = -1; - int bestConnect = -1; - - for (int j = 0; j < neighOfINode.length; j++) { - LO neigh = neighOfINode(j); - - if (lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == AGGREGATED) { - auto aggId = vertex2AggId(neigh, 0); - int score = aggWeight(aggId) - aggPenalties(aggId); - - if (score > bestScore) { - bestAggId = aggId; - bestScore = score; - bestConnect = connectWeight(neigh); - - } else if (aggId == bestAggId && - connectWeight(neigh) > bestConnect) { - bestConnect = connectWeight(neigh); - } - } - } - if (bestScore >= 0) { - aggStat(i) = AGGREGATED; - vertex2AggId(i, 0) = bestAggId; - procWinner(i, 0) = myRank; - - Kokkos::atomic_add(&aggPenalties(bestAggId), 1); - connectWeight(i) = bestConnect - penaltyConnectWeight; - tmpNumAggregated++; - } - }, numAggregated); //parallel_for - numNonAggregatedNodes -= numAggregated; - } - } // loop over maxIters - - } // BuildAggregatesRandom - - - - template - void AggregationPhase2bAlgorithm_kokkos:: - BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - LO numLocalAggregates = aggregates.GetNumAggregates(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - const int defaultConnectWeight = 100; - const int penaltyConnectWeight = 10; - - Kokkos::View connectWeight (Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows); - Kokkos::View aggWeight (Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates);// This gets re-initialized at the start of each "color" loop - Kokkos::View aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates); - Kokkos::View aggPenalties ("aggPenalties", numLocalAggregates); - - Kokkos::deep_copy(connectWeight, defaultConnectWeight); - - // We do this cycle twice. - // I don't know why, but ML does it too - // taw: by running the aggregation routine more than once there is a chance that also - // non-aggregated nodes with a node distance of two are added to existing aggregates. - // Assuming that the aggregate size is 3 in each direction running the algorithm only twice - // should be sufficient. - int maxIters = 2; - int maxNodesPerAggregate = params.get("aggregation: max agg size"); - if(maxNodesPerAggregate == std::numeric_limits::max()) {maxIters = 1;} - for (int iter = 0; iter < maxIters; ++iter) { - for(LO color = 1; color <= numColors; color++) { - Kokkos::deep_copy(aggWeight, 0); - - //the reduce counts how many nodes are aggregated by this phase, - //which will then be subtracted from numNonAggregatedNodes - LO numAggregated = 0; - Kokkos::parallel_for("Aggregation Phase 2b: updating agg weights", + // We don't check (neigh != i), as it is covered by checking + // (aggStat[neigh] == AGGREGATED) + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == AGGREGATED) + Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), + connectWeight(neigh)); + } + + int bestScore = -100000; + int bestAggId = -1; + int bestConnect = -1; + + for (int j = 0; j < neighOfINode.length; j++) { + LO neigh = neighOfINode(j); + + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == AGGREGATED) { + auto aggId = vertex2AggId(neigh, 0); + int score = aggWeight(aggId) - aggPenalties(aggId); + + if (score > bestScore) { + bestAggId = aggId; + bestScore = score; + bestConnect = connectWeight(neigh); + + } else if (aggId == bestAggId && + connectWeight(neigh) > bestConnect) { + bestConnect = connectWeight(neigh); + } + } + } + if (bestScore >= 0) { + aggStat(i) = AGGREGATED; + vertex2AggId(i, 0) = bestAggId; + procWinner(i, 0) = myRank; + + Kokkos::atomic_add(&aggPenalties(bestAggId), 1); + connectWeight(i) = bestConnect - penaltyConnectWeight; + tmpNumAggregated++; + } + }, + numAggregated); //parallel_for + numNonAggregatedNodes -= numAggregated; + } + } // loop over maxIters + +} // BuildAggregatesRandom + +template +void AggregationPhase2bAlgorithm_kokkos:: + BuildAggregatesDeterministic(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + LO numLocalAggregates = aggregates.GetNumAggregates(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + const int defaultConnectWeight = 100; + const int penaltyConnectWeight = 10; + + Kokkos::View connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows); + Kokkos::View aggWeight(Kokkos::ViewAllocateWithoutInitializing("aggWeight"), numLocalAggregates); // This gets re-initialized at the start of each "color" loop + Kokkos::View aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates); + Kokkos::View aggPenalties("aggPenalties", numLocalAggregates); + + Kokkos::deep_copy(connectWeight, defaultConnectWeight); + + // We do this cycle twice. + // I don't know why, but ML does it too + // taw: by running the aggregation routine more than once there is a chance that also + // non-aggregated nodes with a node distance of two are added to existing aggregates. + // Assuming that the aggregate size is 3 in each direction running the algorithm only twice + // should be sufficient. + int maxIters = 2; + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + if (maxNodesPerAggregate == std::numeric_limits::max()) { + maxIters = 1; + } + for (int iter = 0; iter < maxIters; ++iter) { + for (LO color = 1; color <= numColors; color++) { + Kokkos::deep_copy(aggWeight, 0); + + //the reduce counts how many nodes are aggregated by this phase, + //which will then be subtracted from numNonAggregatedNodes + LO numAggregated = 0; + Kokkos::parallel_for( + "Aggregation Phase 2b: updating agg weights", Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO i) - { + KOKKOS_LAMBDA(const LO i) { if (aggStat(i) != READY || colors(i) != color) return; auto neighOfINode = lclLWGraph.getNeighborVertices(i); @@ -242,15 +243,15 @@ namespace MueLu { // (aggStat[neigh] == AGGREGATED) if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == AGGREGATED) - Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), - connectWeight(neigh)); + Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), + connectWeight(neigh)); } }); - Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion", + Kokkos::parallel_reduce( + "Aggregation Phase 2b: aggregates expansion", Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) - { + KOKKOS_LAMBDA(const LO i, LO& tmpNumAggregated) { if (aggStat(i) != READY || colors(i) != color) return; int bestScore = -100000; @@ -264,7 +265,7 @@ namespace MueLu { if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == AGGREGATED) { auto aggId = vertex2AggId(neigh, 0); - int score = aggWeight(aggId) - aggPenalties(aggId); + int score = aggWeight(aggId) - aggPenalties(aggId); if (score > bestScore) { bestAggId = aggId; @@ -272,7 +273,7 @@ namespace MueLu { bestConnect = connectWeight(neigh); } else if (aggId == bestAggId && - connectWeight(neigh) > bestConnect) { + connectWeight(neigh) > bestConnect) { bestConnect = connectWeight(neigh); } } @@ -286,19 +287,20 @@ namespace MueLu { connectWeight(i) = bestConnect - penaltyConnectWeight; tmpNumAggregated++; } - }, numAggregated); //parallel_reduce + }, + numAggregated); //parallel_reduce - Kokkos::parallel_for("Aggregation Phase 2b: updating agg penalties", + Kokkos::parallel_for( + "Aggregation Phase 2b: updating agg penalties", Kokkos::RangePolicy(0, numLocalAggregates), - KOKKOS_LAMBDA (const LO agg) - { + KOKKOS_LAMBDA(const LO agg) { aggPenalties(agg) += aggPenaltyUpdates(agg); aggPenaltyUpdates(agg) = 0; }); - numNonAggregatedNodes -= numAggregated; - } - } // loop over k - } // BuildAggregatesDeterministic -} // end namespace + numNonAggregatedNodes -= numAggregated; + } + } // loop over k +} // BuildAggregatesDeterministic +} // namespace MueLu -#endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp index 20e5fc8a7222..901d0532d019 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp @@ -55,7 +55,7 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! +/*! @class AggregationPhase3Algorithm class. @brief Handle leftover nodes. Try to avoid singleton nodes @ingroup Aggregation @@ -72,41 +72,38 @@ namespace MueLu { */ - template - class AggregationPhase3Algorithm : - public MueLu::AggregationAlgorithmBase { +template +class AggregationPhase3Algorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE3ALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase3Algorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase3Algorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase3Algorithm() { } + //! Destructor. + virtual ~AggregationPhase3Algorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 3 (cleanup)"; } +}; - std::string description() const { return "Phase 3 (cleanup)"; } - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_AGGREGATIONPHASE3ALGORITHM_SHORT - #endif /* MUELU_AGGREGATIONPHASE3ALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp index ac462bcccf4b..0700e6e2869a 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp @@ -60,150 +60,147 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big. Otherwise, make a new aggregate - template - void AggregationPhase3Algorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big. Otherwise, make a new aggregate +template +void AggregationPhase3Algorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - bool makeNonAdjAggs = false; - bool error_on_isolated = false; - if(params.isParameter("aggregation: error on nodes with no on-rank neighbors")) - error_on_isolated = params.get("aggregation: error on nodes with no on-rank neighbors"); - if(params.isParameter("aggregation: phase3 avoid singletons")) - makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); + bool makeNonAdjAggs = false; + bool error_on_isolated = false; + if (params.isParameter("aggregation: error on nodes with no on-rank neighbors")) + error_on_isolated = params.get("aggregation: error on nodes with no on-rank neighbors"); + if (params.isParameter("aggregation: phase3 avoid singletons")) + makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); - size_t numSingletons=0; + size_t numSingletons = 0; - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); - LO numLocalAggregates = aggregates.GetNumAggregates(); + LO numLocalAggregates = aggregates.GetNumAggregates(); - for (LO i = 0; i < numRows; i++) { - if (aggStat[i] == AGGREGATED || aggStat[i] == IGNORED) - continue; + for (LO i = 0; i < numRows; i++) { + if (aggStat[i] == AGGREGATED || aggStat[i] == IGNORED) + continue; - ArrayView neighOfINode = graph.getNeighborVertices(i); + ArrayView neighOfINode = graph.getNeighborVertices(i); - // We don't want a singleton. So lets see if there is an unaggregated - // neighbor that we can also put with this point. - bool isNewAggregate = false; - bool failedToAggregate = true; - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; + // We don't want a singleton. So lets see if there is an unaggregated + // neighbor that we can also put with this point. + bool isNewAggregate = false; + bool failedToAggregate = true; + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; - if (neigh != i && graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { - isNewAggregate = true; + if (neigh != i && graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { + isNewAggregate = true; - aggStat [neigh] = AGGREGATED; - vertex2AggId[neigh] = numLocalAggregates; - procWinner [neigh] = myRank; + aggStat[neigh] = AGGREGATED; + vertex2AggId[neigh] = numLocalAggregates; + procWinner[neigh] = myRank; - numNonAggregatedNodes--; - } + numNonAggregatedNodes--; } + } - if (isNewAggregate) { - // Create new aggregate (not singleton) - aggStat [i] = AGGREGATED; - procWinner [i] = myRank; - numNonAggregatedNodes--; - aggregates.SetIsRoot(i); - vertex2AggId[i] = numLocalAggregates++; + if (isNewAggregate) { + // Create new aggregate (not singleton) + aggStat[i] = AGGREGATED; + procWinner[i] = myRank; + numNonAggregatedNodes--; + aggregates.SetIsRoot(i); + vertex2AggId[i] = numLocalAggregates++; + + failedToAggregate = false; + } else { + // We do not want a singleton, but there are no non-aggregated + // neighbors. Lets see if we can connect to any other aggregates + // NOTE: This is very similar to phase 2b, but simplier: we stop with + // the first found aggregate + int j = 0; + for (; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; - failedToAggregate = false; - } else { - // We do not want a singleton, but there are no non-aggregated - // neighbors. Lets see if we can connect to any other aggregates - // NOTE: This is very similar to phase 2b, but simplier: we stop with - // the first found aggregate - int j = 0; - for (; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - - // We don't check (neigh != rootCandidate), as it is covered by checking (aggStat[neigh] == AGGREGATED) - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) - break; - } - - if (j < neighOfINode.size()) { - // Assign to an adjacent aggregate - vertex2AggId[i] = vertex2AggId[neighOfINode[j]]; - numNonAggregatedNodes--; - failedToAggregate = false; - } + // We don't check (neigh != rootCandidate), as it is covered by checking (aggStat[neigh] == AGGREGATED) + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) + break; } - if (failedToAggregate && makeNonAdjAggs) { - // it we are still didn't find an aggregate home for i (i.e., we have - // a potential singleton), we are desperate. Basically, we seek to - // group i with any other local point to form an aggregate (even if - // it is not a neighbor of i. Either we find a vertex that is already - // aggregated or not aggregated. - // 1) if found vertex is aggregated, then assign i to this aggregate - // 2) if found vertex is not aggregated, create new aggregate - - - for (LO ii = 0; ii < numRows; ii++) { // look for anyone else - if ( (ii != i) && (aggStat[ii] != IGNORED) ) { - failedToAggregate = false; // found someone so start - aggStat[i] = AGGREGATED; // marking i as aggregated - procWinner[i]= myRank; - - if (aggStat[ii] == AGGREGATED) - vertex2AggId[i] = vertex2AggId[ii]; - else { - vertex2AggId[i] = numLocalAggregates; - vertex2AggId[ii] = numLocalAggregates; - aggStat [ii] = AGGREGATED; - procWinner [ii] = myRank; - numNonAggregatedNodes--; // acounts for ii now being aggregated - aggregates.SetIsRoot(i); - numLocalAggregates++; - } - numNonAggregatedNodes--; // accounts for i now being aggregated - break; - } //if ( (ii != i) && (aggStat[ii] != IGNORED ... - } //for (LO ii = 0; ... - } - if (failedToAggregate) { - if (error_on_isolated) { - // Error on this isolated node, as the user has requested - std::ostringstream oss; - oss<<"MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). "<GetOStream(Warnings1) << "Found singleton: " << i << std::endl; - numSingletons++; - - aggregates.SetIsRoot(i); - vertex2AggId[i] = numLocalAggregates++; - numNonAggregatedNodes--; - } + if (j < neighOfINode.size()) { + // Assign to an adjacent aggregate + vertex2AggId[i] = vertex2AggId[neighOfINode[j]]; + numNonAggregatedNodes--; + failedToAggregate = false; } + } + + if (failedToAggregate && makeNonAdjAggs) { + // it we are still didn't find an aggregate home for i (i.e., we have + // a potential singleton), we are desperate. Basically, we seek to + // group i with any other local point to form an aggregate (even if + // it is not a neighbor of i. Either we find a vertex that is already + // aggregated or not aggregated. + // 1) if found vertex is aggregated, then assign i to this aggregate + // 2) if found vertex is not aggregated, create new aggregate + + for (LO ii = 0; ii < numRows; ii++) { // look for anyone else + if ((ii != i) && (aggStat[ii] != IGNORED)) { + failedToAggregate = false; // found someone so start + aggStat[i] = AGGREGATED; // marking i as aggregated + procWinner[i] = myRank; + + if (aggStat[ii] == AGGREGATED) + vertex2AggId[i] = vertex2AggId[ii]; + else { + vertex2AggId[i] = numLocalAggregates; + vertex2AggId[ii] = numLocalAggregates; + aggStat[ii] = AGGREGATED; + procWinner[ii] = myRank; + numNonAggregatedNodes--; // acounts for ii now being aggregated + aggregates.SetIsRoot(i); + numLocalAggregates++; + } + numNonAggregatedNodes--; // accounts for i now being aggregated + break; + } //if ( (ii != i) && (aggStat[ii] != IGNORED ... + } //for (LO ii = 0; ... + } + if (failedToAggregate) { + if (error_on_isolated) { + // Error on this isolated node, as the user has requested + std::ostringstream oss; + oss << "MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). " << std::endl; + oss << "If this error is being generated at level 0, this is due to an initial partitioning problem in your matrix." << std::endl; + oss << "If this error is being generated at any other level, try turning on repartitioning, which may fix this problem." << std::endl; + throw Exceptions::RuntimeError(oss.str()); + } else { + // Create new aggregate (singleton) + // this->GetOStream(Warnings1) << "Found singleton: " << i << std::endl; + numSingletons++; - // One way or another, the node is aggregated (possibly into a singleton) - aggStat [i] = AGGREGATED; - procWinner[i] = myRank; + aggregates.SetIsRoot(i); + vertex2AggId[i] = numLocalAggregates++; + numNonAggregatedNodes--; + } + } - } // loop over numRows - + // One way or another, the node is aggregated (possibly into a singleton) + aggStat[i] = AGGREGATED; + procWinner[i] = myRank; - if(numSingletons > 0) - this->GetOStream(Runtime0)<<" WARNING Rank "< 0) + this->GetOStream(Runtime0) << " WARNING Rank " << myRank << " singletons :" << numSingletons << " (phase)" << std::endl; - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); - } + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} -} // end namespace +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE3ALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp index 9911ac016c5e..e10b5117ef11 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp @@ -58,7 +58,7 @@ #include "MueLu_LWGraph_kokkos_fwd.hpp" namespace MueLu { - /*! +/*! @class AggregationPhase3Algorithm class. @brief Handle leftover nodes. Try to avoid singleton nodes @ingroup Aggregation @@ -75,53 +75,51 @@ namespace MueLu { */ - template - class AggregationPhase3Algorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +template +class AggregationPhase3Algorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; + public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase3Algorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase3Algorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase3Algorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase3Algorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; - void BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 3 (cleanup)"; } +}; - std::string description() const { return "Phase 3 (cleanup)"; } - }; - -} //namespace MueLu +} //namespace MueLu #define MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_SHORT -#endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp index 4bacd24017cd..a684149e179f 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp @@ -62,171 +62,171 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big. Otherwise, make a new aggregate - template - void AggregationPhase3Algorithm_kokkos:: - BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - // So far we only have the non-deterministic version of the algorithm... - if(params.get("aggregation: deterministic")) { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } - +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big. Otherwise, make a new aggregate +template +void AggregationPhase3Algorithm_kokkos:: + BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + // So far we only have the non-deterministic version of the algorithm... + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); } - - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big. Otherwise, make a new aggregate - template - void AggregationPhase3Algorithm_kokkos:: - BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - bool error_on_isolated = params.get("aggregation: error on nodes with no on-rank neighbors"); - bool makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - Kokkos::View numAggregates("numAggregates"); - Kokkos::deep_copy(numAggregates, aggregates.GetNumAggregates()); - - Kokkos::View aggStatOld(Kokkos::ViewAllocateWithoutInitializing("Initial aggregation status"), aggStat.extent(0)); +} + +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big. Otherwise, make a new aggregate +template +void AggregationPhase3Algorithm_kokkos:: + BuildAggregatesRandom(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + bool error_on_isolated = params.get("aggregation: error on nodes with no on-rank neighbors"); + bool makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + Kokkos::View numAggregates("numAggregates"); + Kokkos::deep_copy(numAggregates, aggregates.GetNumAggregates()); + + Kokkos::View aggStatOld(Kokkos::ViewAllocateWithoutInitializing("Initial aggregation status"), aggStat.extent(0)); + Kokkos::deep_copy(aggStatOld, aggStat); + Kokkos::View numNonAggregated("numNonAggregated"); + Kokkos::deep_copy(numNonAggregated, numNonAggregatedNodes); + for (int color = 1; color < numColors + 1; ++color) { + Kokkos::parallel_for( + "Aggregation Phase 3: aggregates clean-up", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO nodeIdx) { + // Check if node has already been treated? + if ((colors(nodeIdx) != color) || + (aggStatOld(nodeIdx) == AGGREGATED) || + (aggStatOld(nodeIdx) == IGNORED)) { + return; + } + + // Grab node neighbors + auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); + LO neighIdx; + + // We don't want a singleton. + // So lets see if any neighbors can be used to form a new aggregate? + bool isNewAggregate = false; + for (int neigh = 0; neigh < neighbors.length; ++neigh) { + neighIdx = neighbors(neigh); + + if ((neighIdx != nodeIdx) && + lclLWGraph.isLocalNeighborVertex(neighIdx) && + (aggStatOld(neighIdx) == READY)) { + isNewAggregate = true; + break; + } + } + + // We can form a new non singleton aggregate! + if (isNewAggregate) { + // If this is the aggregate root + // we need to process the nodes in the aggregate + const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = aggId; + // aggregates.SetIsRoot(nodeIdx); + Kokkos::atomic_decrement(&numNonAggregated()); + for (int neigh = 0; neigh < neighbors.length; ++neigh) { + neighIdx = neighbors(neigh); + if ((neighIdx != nodeIdx) && + lclLWGraph.isLocalNeighborVertex(neighIdx) && + (aggStatOld(neighIdx) == READY)) { + aggStat(neighIdx) = AGGREGATED; + procWinner(neighIdx, 0) = myRank; + vertex2AggId(neighIdx, 0) = aggId; + Kokkos::atomic_decrement(&numNonAggregated()); + } + } + return; + } + + // Getting a little desperate! + // Let us try to aggregate into a neighboring aggregate + for (int neigh = 0; neigh < neighbors.length; ++neigh) { + neighIdx = neighbors(neigh); + if (lclLWGraph.isLocalNeighborVertex(neighIdx) && + (aggStatOld(neighIdx) == AGGREGATED)) { + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = vertex2AggId(neighIdx, 0); + Kokkos::atomic_decrement(&numNonAggregated()); + return; + } + } + + // Getting quite desperate! + // Let us try to make a non contiguous aggregate + if (makeNonAdjAggs) { + for (LO otherNodeIdx = 0; otherNodeIdx < numRows; ++otherNodeIdx) { + if ((otherNodeIdx != nodeIdx) && + (aggStatOld(otherNodeIdx) == AGGREGATED)) { + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = vertex2AggId(otherNodeIdx, 0); + Kokkos::atomic_decrement(&numNonAggregated()); + return; + } + } + } + + // Total deperation! + // Let us make a singleton + if (!error_on_isolated) { + const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = aggId; + Kokkos::atomic_decrement(&numNonAggregated()); + } + }); + // LBV on 09/27/19: here we could copy numNonAggregated to host + // and check for it to be equal to 0 in which case we can stop + // looping over the different colors... Kokkos::deep_copy(aggStatOld, aggStat); - Kokkos::View numNonAggregated("numNonAggregated"); - Kokkos::deep_copy(numNonAggregated, numNonAggregatedNodes); - for(int color = 1; color < numColors + 1; ++color) { - Kokkos::parallel_for("Aggregation Phase 3: aggregates clean-up", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO nodeIdx) { - // Check if node has already been treated? - if( (colors(nodeIdx) != color) || - (aggStatOld(nodeIdx) == AGGREGATED) || - (aggStatOld(nodeIdx) == IGNORED) ){ return; } - - // Grab node neighbors - auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); - LO neighIdx; - - // We don't want a singleton. - // So lets see if any neighbors can be used to form a new aggregate? - bool isNewAggregate = false; - for(int neigh = 0; neigh < neighbors.length; ++neigh) { - neighIdx = neighbors(neigh); - - if((neighIdx != nodeIdx) && - lclLWGraph.isLocalNeighborVertex(neighIdx) && - (aggStatOld(neighIdx) == READY)) { - isNewAggregate = true; - break; - } - } - - // We can form a new non singleton aggregate! - if(isNewAggregate) { - // If this is the aggregate root - // we need to process the nodes in the aggregate - const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = aggId; - // aggregates.SetIsRoot(nodeIdx); - Kokkos::atomic_decrement(&numNonAggregated()); - for(int neigh = 0; neigh < neighbors.length; ++neigh) { - neighIdx = neighbors(neigh); - if((neighIdx != nodeIdx) && - lclLWGraph.isLocalNeighborVertex(neighIdx) && - (aggStatOld(neighIdx) == READY)) { - aggStat(neighIdx) = AGGREGATED; - procWinner(neighIdx, 0) = myRank; - vertex2AggId(neighIdx, 0) = aggId; - Kokkos::atomic_decrement(&numNonAggregated()); - } - } - return; - } - - // Getting a little desperate! - // Let us try to aggregate into a neighboring aggregate - for(int neigh = 0; neigh < neighbors.length; ++neigh) { - neighIdx = neighbors(neigh); - if (lclLWGraph.isLocalNeighborVertex(neighIdx) && - (aggStatOld(neighIdx) == AGGREGATED)) { - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = vertex2AggId(neighIdx, 0); - Kokkos::atomic_decrement(&numNonAggregated()); - return; - } - } - - // Getting quite desperate! - // Let us try to make a non contiguous aggregate - if(makeNonAdjAggs) { - for(LO otherNodeIdx = 0; otherNodeIdx < numRows; ++otherNodeIdx) { - if((otherNodeIdx != nodeIdx) && - (aggStatOld(otherNodeIdx) == AGGREGATED)) { - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = vertex2AggId(otherNodeIdx, 0); - Kokkos::atomic_decrement(&numNonAggregated()); - return; - } - } - } - - // Total deperation! - // Let us make a singleton - if(!error_on_isolated) { - const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = aggId; - Kokkos::atomic_decrement(&numNonAggregated()); - } - }); - // LBV on 09/27/19: here we could copy numNonAggregated to host - // and check for it to be equal to 0 in which case we can stop - // looping over the different colors... - Kokkos::deep_copy(aggStatOld, aggStat); - } // loop over colors - - auto numNonAggregated_h = Kokkos::create_mirror_view(numNonAggregated); - Kokkos::deep_copy(numNonAggregated_h, numNonAggregated); - numNonAggregatedNodes = numNonAggregated_h(); - if( (error_on_isolated) && (numNonAggregatedNodes > 0) ) { - // Error on this isolated node, as the user has requested - std::ostringstream oss; - oss<<"MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). "< 0)) { + // Error on this isolated node, as the user has requested + std::ostringstream oss; + oss << "MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). " << std::endl; + oss << "If this error is being generated at level 0, this is due to an initial partitioning problem in your matrix." << std::endl; + oss << "If this error is being generated at any other level, try turning on repartitioning, which may fix this problem." << std::endl; + throw Exceptions::RuntimeError(oss.str()); } -} // end namespace + // update aggregate object + auto numAggregates_h = Kokkos::create_mirror_view(numAggregates); + Kokkos::deep_copy(numAggregates_h, numAggregates); + aggregates.SetNumAggregates(numAggregates_h()); +} + +} // namespace MueLu -#endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp index fb3425d5058c..3766eefd52cf 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp @@ -63,7 +63,7 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! +/*! @class InterfaceAggregationAlgorithm class. @brief Algorithm for coarsening a graph with uncoupled aggregation. creates aggregates along an interface using specified root nodes. @@ -78,39 +78,36 @@ namespace MueLu { */ - template - class InterfaceAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +template +class InterfaceAggregationAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_INTERFACEAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - InterfaceAggregationAlgorithm(RCP const &graphFact = Teuchos::null); + //! Constructor. + InterfaceAggregationAlgorithm(RCP const& graphFact = Teuchos::null); - //! Destructor. - virtual ~InterfaceAggregationAlgorithm() { } + //! Destructor. + virtual ~InterfaceAggregationAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(Teuchos::ParameterList const& params, GraphBase const& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(Teuchos::ParameterList const & params, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} +}; //class InterfaceAggregationAlgorithm - - }; //class InterfaceAggregationAlgorithm - -} //namespace MueLu +} //namespace MueLu #define MUELU_INTERFACEAGGREGATIONALGORITHM_SHORT #endif /* MUELU_INTERFACEAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp index 22dd58e56fad..0ec5f6842c22 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp @@ -69,40 +69,37 @@ namespace MueLu { template -InterfaceAggregationAlgorithm::InterfaceAggregationAlgorithm(RCP const &/* graphFact */) -{ +InterfaceAggregationAlgorithm::InterfaceAggregationAlgorithm(RCP const& /* graphFact */) { } template -void InterfaceAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const & /* params */, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { +void InterfaceAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const& /* params */, GraphBase const& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { Monitor m(*this, "BuildAggregates"); const LocalOrdinal nRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + const int myRank = graph.GetComm()->getRank(); // vertex ids for output Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); // some internal variables - LocalOrdinal numLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc + LocalOrdinal numLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc // main loop over all local rows of graph(A) - for(int iNode1 = 0; iNode1 < nRows; ++iNode1) { - + for (int iNode1 = 0; iNode1 < nRows; ++iNode1) { if (aggStat[iNode1] == INTERFACE) { - - aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'agg' + aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'agg' int aggIndex = numLocalAggregates; std::vector aggList; aggList.push_back(iNode1); ArrayView neighOfINode = graph.getNeighborVertices(iNode1); - for(int j = 0; j < neighOfINode.size(); ++j) { + for (int j = 0; j < neighOfINode.size(); ++j) { LO neigh = neighOfINode[j]; - if(neigh != iNode1 && graph.isLocalNeighborVertex(neigh)) { - if(aggStat[neigh] != AGGREGATED && aggStat[neigh] != INTERFACE && - aggStat[neigh] != IGNORED) { + if (neigh != iNode1 && graph.isLocalNeighborVertex(neigh)) { + if (aggStat[neigh] != AGGREGATED && aggStat[neigh] != INTERFACE && + aggStat[neigh] != IGNORED) { aggList.push_back(neigh); } } @@ -117,13 +114,12 @@ void InterfaceAggregationAlgorithm::BuildAggr numNonAggregatedNodes -= aggList.size(); } - } // end for + } // end for // update aggregate object aggregates.SetNumAggregates(numLocalAggregates); } -} // end namespace - +} // namespace MueLu #endif /* MUELU_INTERFACEAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp index 47ebb8038952..fc1e688f3457 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp @@ -57,7 +57,7 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! +/*! @class IsolatedNodeAggregationAlgorithm class. @brief Ignores isolated nodes during aggregation. Marks the node to be "aggregated" without adding real aggregates for them. @@ -76,42 +76,39 @@ namespace MueLu { */ - template - class IsolatedNodeAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +template +class IsolatedNodeAggregationAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - IsolatedNodeAggregationAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + IsolatedNodeAggregationAlgorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~IsolatedNodeAggregationAlgorithm() { } + //! Destructor. + virtual ~IsolatedNodeAggregationAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase - (isolated)"; } - std::string description() const { return "Phase - (isolated)"; } +}; //class MaxLinkAggregationAlgorithm - }; //class MaxLinkAggregationAlgorithm - -} //namespace MueLu +} //namespace MueLu #define MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_SHORT - #endif /* MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp index 77147d7990a8..73dd4eddd6c0 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp @@ -53,7 +53,6 @@ #ifndef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DEF_HPP_ #define MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DEF_HPP_ - #include #include @@ -68,20 +67,20 @@ namespace MueLu { - template - void IsolatedNodeAggregationAlgorithm::BuildAggregates(const ParameterList& /* params */, const GraphBase& graph, Aggregates& /* aggregates */, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +template +void IsolatedNodeAggregationAlgorithm::BuildAggregates(const ParameterList& /* params */, const GraphBase& graph, Aggregates& /* aggregates */, std::vector& aggStat, LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - const LO numRows = graph.GetNodeNumVertices(); + const LO numRows = graph.GetNodeNumVertices(); - // Remove all isolated nodes - for (LO i = 0; i < numRows; i++) - if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && graph.getNeighborVertices(i).size() == 1) { - aggStat[i] = IGNORED; - numNonAggregatedNodes--; - } - } + // Remove all isolated nodes + for (LO i = 0; i < numRows; i++) + if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && graph.getNeighborVertices(i).size() == 1) { + aggStat[i] = IGNORED; + numNonAggregatedNodes--; + } +} -} // end namespace +} // namespace MueLu #endif /* MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp index c5bbb6e9c4b9..2e018df7bba8 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp @@ -58,7 +58,7 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! +/*! @class IsolatedNodeAggregationAlgorithm class. @brief Ignores isolated nodes during aggregation. Marks the node to be "aggregated" without adding real aggregates for them. @@ -77,46 +77,44 @@ namespace MueLu { */ - template - class IsolatedNodeAggregationAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +template +class IsolatedNodeAggregationAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + public: + using device_type = typename LWGraph_kokkos::device_type; + using memory_space = typename LWGraph_kokkos::memory_space; + //! @name Constructors/Destructors. + //@{ - //! Constructor. - IsolatedNodeAggregationAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + IsolatedNodeAggregationAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~IsolatedNodeAggregationAlgorithm_kokkos() { } + //! Destructor. + virtual ~IsolatedNodeAggregationAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase - (isolated)"; } - std::string description() const { return "Phase - (isolated)"; } +}; //class MaxLinkAggregationAlgorithm - }; //class MaxLinkAggregationAlgorithm - -} //namespace MueLu +} //namespace MueLu #define MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_SHORT -#endif // MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DECL_HPP +#endif // MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp index 96ff102a447a..b37aa733ed03 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp @@ -60,41 +60,40 @@ namespace MueLu { - template - void IsolatedNodeAggregationAlgorithm_kokkos:: - BuildAggregates(const ParameterList& /* params */, - const LWGraph_kokkos& graph, - Aggregates& /* aggregates */, - Kokkos::View& aggstat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +template +void IsolatedNodeAggregationAlgorithm_kokkos:: + BuildAggregates(const ParameterList& /* params */, + const LWGraph_kokkos& graph, + Aggregates& /* aggregates */, + Kokkos::View& aggstat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - typename Kokkos::View::HostMirror aggstatHost - = Kokkos::create_mirror(aggstat); - Kokkos::deep_copy(aggstatHost, aggstat); - std::vector aggStat; - aggStat.resize(aggstatHost.extent(0)); - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggStat[idx] = aggstatHost(idx); - } - - auto lclLWGraph = graph.getLocalLWGraph(); + typename Kokkos::View::HostMirror aggstatHost = Kokkos::create_mirror(aggstat); + Kokkos::deep_copy(aggstatHost, aggstat); + std::vector aggStat; + aggStat.resize(aggstatHost.extent(0)); + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggStat[idx] = aggstatHost(idx); + } - const LO numRows = graph.GetNodeNumVertices(); + auto lclLWGraph = graph.getLocalLWGraph(); - // Remove all isolated nodes - for (LO i = 0; i < numRows; i++) - if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && lclLWGraph.getNeighborVertices(i).length == 1) { - aggStat[i] = IGNORED; - numNonAggregatedNodes--; - } + const LO numRows = graph.GetNodeNumVertices(); - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggstatHost(idx) = aggStat[idx]; + // Remove all isolated nodes + for (LO i = 0; i < numRows; i++) + if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && lclLWGraph.getNeighborVertices(i).length == 1) { + aggStat[i] = IGNORED; + numNonAggregatedNodes--; } - Kokkos::deep_copy(aggstat, aggstatHost); + + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggstatHost(idx) = aggStat[idx]; } + Kokkos::deep_copy(aggstat, aggstatHost); +} -} // end namespace +} // namespace MueLu -#endif // MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp index 6a77eb0d4a29..fcd0e9a7df22 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp @@ -63,7 +63,7 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! +/*! @class OnePtAggregationAlgorithm class. @brief Algorithm for coarsening a graph with uncoupled aggregation. keep special marked nodes as singleton node aggregates over all multigrid levels @@ -81,39 +81,36 @@ namespace MueLu { */ - template - class OnePtAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +template +class OnePtAggregationAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_ONEPTAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - OnePtAggregationAlgorithm(RCP const &graphFact = Teuchos::null); + //! Constructor. + OnePtAggregationAlgorithm(RCP const& graphFact = Teuchos::null); - //! Destructor. - virtual ~OnePtAggregationAlgorithm() { } + //! Destructor. + virtual ~OnePtAggregationAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(Teuchos::ParameterList const& params, GraphBase const& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(Teuchos::ParameterList const & params, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} +}; //class OnePtAggregationAlgorithm - - }; //class OnePtAggregationAlgorithm - -} //namespace MueLu +} //namespace MueLu #define MUELU_ONEPTAGGREGATIONALGORITHM_SHORT #endif /* MUELU_ONEPTAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp index 687778c05654..b85b3cc3a927 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp @@ -69,51 +69,47 @@ namespace MueLu { template -OnePtAggregationAlgorithm::OnePtAggregationAlgorithm(RCP const &/* graphFact */) -{ +OnePtAggregationAlgorithm::OnePtAggregationAlgorithm(RCP const& /* graphFact */) { } template -void OnePtAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const & /* params */, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { +void OnePtAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const& /* params */, GraphBase const& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { Monitor m(*this, "BuildAggregates"); const LocalOrdinal nRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + const int myRank = graph.GetComm()->getRank(); // vertex ids for output Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); // some internal variables - LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc - LocalOrdinal iNode1 = 0; // current node + LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc + LocalOrdinal iNode1 = 0; // current node // main loop over all local rows of graph(A) while (iNode1 < nRows) { - if (aggStat[iNode1] == ONEPT) { - - aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' + aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' std::vector aggList; aggList.push_back(iNode1); int aggIndex = nLocalAggregates++; for (size_t k = 0; k < aggList.size(); k++) { - aggStat[aggList[k]] = IGNORED; + aggStat[aggList[k]] = IGNORED; vertex2AggId[aggList[k]] = aggIndex; - procWinner[aggList[k]] = myRank; + procWinner[aggList[k]] = myRank; } numNonAggregatedNodes -= aggList.size(); } iNode1++; - } // end while + } // end while // update aggregate object aggregates.SetNumAggregates(nLocalAggregates); } -} // end namespace - +} // namespace MueLu #endif /* MUELU_ONEPTAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp index 1cae818205c2..4dac3f57a791 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp @@ -58,7 +58,7 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! +/*! @class OnePtAggregationAlgorithm class. @brief Algorithm for coarsening a graph with uncoupled aggregation. keep special marked nodes as singleton node aggregates over all multigrid levels @@ -76,45 +76,42 @@ namespace MueLu { */ - template - class OnePtAggregationAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +template +class OnePtAggregationAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + public: + using device_type = typename LWGraph_kokkos::device_type; + using memory_space = typename LWGraph_kokkos::memory_space; + //! @name Constructors/Destructors. + //@{ - //! Constructor. - OnePtAggregationAlgorithm_kokkos(RCP const &graphFact = Teuchos::null); + //! Constructor. + OnePtAggregationAlgorithm_kokkos(RCP const& graphFact = Teuchos::null); - //! Destructor. - virtual ~OnePtAggregationAlgorithm_kokkos() { } + //! Destructor. + virtual ~OnePtAggregationAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(Teuchos::ParameterList const& params, + LWGraph_kokkos const& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(Teuchos::ParameterList const & params, - LWGraph_kokkos const & graph, - Aggregates & aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} +}; //class OnePtAggregationAlgorithm_kokkos - - }; //class OnePtAggregationAlgorithm_kokkos - -} //namespace MueLu +} //namespace MueLu #define MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_SHORT -#endif // MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp index 82dd4881b96c..f016c974ffe4 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp @@ -60,71 +60,67 @@ namespace MueLu { - template - OnePtAggregationAlgorithm_kokkos::OnePtAggregationAlgorithm_kokkos(RCP const &/* graphFact */) - { +template +OnePtAggregationAlgorithm_kokkos::OnePtAggregationAlgorithm_kokkos(RCP const& /* graphFact */) { +} + +template +void OnePtAggregationAlgorithm_kokkos:: + BuildAggregates(Teuchos::ParameterList const& /* params */, + LWGraph_kokkos const& graph, + Aggregates& aggregates, + Kokkos::View& aggstat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + typename Kokkos::View::HostMirror aggstatHost = Kokkos::create_mirror(aggstat); + Kokkos::deep_copy(aggstatHost, aggstat); + std::vector aggStat; + aggStat.resize(aggstatHost.extent(0)); + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggStat[idx] = aggstatHost(idx); } - template - void OnePtAggregationAlgorithm_kokkos:: - BuildAggregates(Teuchos::ParameterList const & /* params */, - LWGraph_kokkos const & graph, - Aggregates & aggregates, - Kokkos::View& aggstat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - typename Kokkos::View::HostMirror aggstatHost - = Kokkos::create_mirror(aggstat); - Kokkos::deep_copy(aggstatHost, aggstat); - std::vector aggStat; - aggStat.resize(aggstatHost.extent(0)); - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggStat[idx] = aggstatHost(idx); - } - - const LocalOrdinal nRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - // vertex ids for output - Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); - - // some internal variables - LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc - LocalOrdinal iNode1 = 0; // current node - - // main loop over all local rows of graph(A) - while (iNode1 < nRows) { - - if (aggStat[iNode1] == ONEPT) { - - aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' - std::vector aggList; - aggList.push_back(iNode1); - int aggIndex = nLocalAggregates++; - - // finalize aggregate - for (size_t k = 0; k < aggList.size(); k++) { - aggStat[aggList[k]] = IGNORED; - vertex2AggId[aggList[k]] = aggIndex; - procWinner[aggList[k]] = myRank; - } - numNonAggregatedNodes -= aggList.size(); + const LocalOrdinal nRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + // vertex ids for output + Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + // some internal variables + LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc + LocalOrdinal iNode1 = 0; // current node + + // main loop over all local rows of graph(A) + while (iNode1 < nRows) { + if (aggStat[iNode1] == ONEPT) { + aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' + std::vector aggList; + aggList.push_back(iNode1); + int aggIndex = nLocalAggregates++; + + // finalize aggregate + for (size_t k = 0; k < aggList.size(); k++) { + aggStat[aggList[k]] = IGNORED; + vertex2AggId[aggList[k]] = aggIndex; + procWinner[aggList[k]] = myRank; } - - iNode1++; - } // end while - - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggstatHost(idx) = aggStat[idx]; + numNonAggregatedNodes -= aggList.size(); } - Kokkos::deep_copy(aggstat, aggstatHost); - // update aggregate object - aggregates.SetNumAggregates(nLocalAggregates); + iNode1++; + } // end while + + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggstatHost(idx) = aggStat[idx]; } + Kokkos::deep_copy(aggstat, aggstatHost); + + // update aggregate object + aggregates.SetNumAggregates(nLocalAggregates); +} -} // end namespace +} // namespace MueLu -#endif // MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp index 8d1af9929a88..33fa9f5f8931 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp @@ -55,7 +55,7 @@ #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! +/*! @class PreserveDirichletAggregationAlgorithm class. @brief Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might be necessary. (default = off) @@ -79,43 +79,39 @@ namespace MueLu { Only nodes with state BOUNDARY are changed to IGNORED. No other nodes are touched. */ - template - class PreserveDirichletAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +template +class PreserveDirichletAggregationAlgorithm : public MueLu::AggregationAlgorithmBase { #undef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - PreserveDirichletAggregationAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + PreserveDirichletAggregationAlgorithm(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~PreserveDirichletAggregationAlgorithm() { } + //! Destructor. + virtual ~PreserveDirichletAggregationAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const Teuchos::ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase - (Dirichlet)"; } - std::string description() const { return "Phase - (Dirichlet)"; } +}; //class PreserveDirichletAggregationAlgorithm - }; //class PreserveDirichletAggregationAlgorithm - -} //namespace MueLu +} //namespace MueLu #define MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_SHORT - - #endif /* MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp index 5dbd6d0dbf7f..9362342deaba 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp @@ -60,37 +60,37 @@ namespace MueLu { - template - void PreserveDirichletAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const & params, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +template +void PreserveDirichletAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const& params, GraphBase const& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - bool preserve = params.get("aggregation: preserve Dirichlet points"); + bool preserve = params.get("aggregation: preserve Dirichlet points"); - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); - LO numLocalAggregates = aggregates.GetNumAggregates(); + LO numLocalAggregates = aggregates.GetNumAggregates(); - for (LO i = 0; i < numRows; i++) - if (aggStat[i] == BOUNDARY) { - aggStat[i] = IGNORED; - numNonAggregatedNodes--; + for (LO i = 0; i < numRows; i++) + if (aggStat[i] == BOUNDARY) { + aggStat[i] = IGNORED; + numNonAggregatedNodes--; - if (preserve) { - aggregates.SetIsRoot(i); + if (preserve) { + aggregates.SetIsRoot(i); - vertex2AggId[i] = numLocalAggregates++; - procWinner [i] = myRank; - } + vertex2AggId[i] = numLocalAggregates++; + procWinner[i] = myRank; } + } - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); - } + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} -} // end namespace +} // namespace MueLu #endif /* MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp index 568889a49cb2..f6893965e39a 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp @@ -58,7 +58,7 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! +/*! @class PreserveDirichletAggregationAlgorithm class. @brief Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might be necessary. (default = off) @@ -82,48 +82,46 @@ namespace MueLu { Only nodes with state BOUNDARY are changed to IGNORED. No other nodes are touched. */ - template - class PreserveDirichletAggregationAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +template +class PreserveDirichletAggregationAlgorithm_kokkos : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; + public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - PreserveDirichletAggregationAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + PreserveDirichletAggregationAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~PreserveDirichletAggregationAlgorithm_kokkos() { } + //! Destructor. + virtual ~PreserveDirichletAggregationAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList& params, + const LWGraph_kokkos& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const; + //@} - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase - (Dirichlet)"; } - std::string description() const { return "Phase - (Dirichlet)"; } +}; //class PreserveDirichletAggregationAlgorithm - }; //class PreserveDirichletAggregationAlgorithm - -} //namespace MueLu +} //namespace MueLu #define MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_SHORT -#endif // MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_DECL_HPP +#endif // MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp index 498640df9c43..b28171a695a3 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp @@ -60,61 +60,61 @@ namespace MueLu { - template - void PreserveDirichletAggregationAlgorithm_kokkos:: - BuildAggregates(Teuchos::ParameterList const & params, - LWGraph_kokkos const & graph, - Aggregates & aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; +template +void PreserveDirichletAggregationAlgorithm_kokkos:: + BuildAggregates(Teuchos::ParameterList const& params, + LWGraph_kokkos const& graph, + Aggregates& aggregates, + Kokkos::View& aggStat, + LO& numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; - // Extract parameters and data from: - // 1) the parameter list - const bool preserve = params.get("aggregation: preserve Dirichlet points"); + // Extract parameters and data from: + // 1) the parameter list + const bool preserve = params.get("aggregation: preserve Dirichlet points"); - // 2) the amalgamated graph - const LO numNodes = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + // 2) the amalgamated graph + const LO numNodes = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - // 3) the aggregates - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); + // 3) the aggregates + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); - // A view is needed to count on the fly the current number of local aggregates - Kokkos::View aggCount("aggCount"); - if(preserve) { - Kokkos::deep_copy(aggCount, aggregates.GetNumAggregates()); - } - Kokkos::parallel_for("MueLu - PreserveDirichlet: tagging ignored nodes", - Kokkos::RangePolicy(0, numNodes), - KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { - if (aggStat(nodeIdx) == BOUNDARY) { - aggStat(nodeIdx) = IGNORED; - const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1); + // A view is needed to count on the fly the current number of local aggregates + Kokkos::View aggCount("aggCount"); + if (preserve) { + Kokkos::deep_copy(aggCount, aggregates.GetNumAggregates()); + } + Kokkos::parallel_for( + "MueLu - PreserveDirichlet: tagging ignored nodes", + Kokkos::RangePolicy(0, numNodes), + KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { + if (aggStat(nodeIdx) == BOUNDARY) { + aggStat(nodeIdx) = IGNORED; + const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1); - if (preserve) { - // aggregates.SetIsRoot(nodeIdx); + if (preserve) { + // aggregates.SetIsRoot(nodeIdx); - vertex2AggId(nodeIdx, 0) = aggIdx; - procWinner(nodeIdx, 0) = myRank; - } - } - }); - typename Kokkos::View::HostMirror aggCount_h - = Kokkos::create_mirror_view(aggCount); - Kokkos::deep_copy(aggCount_h, aggCount); - // In this phase the number of new aggregates is the same - // as the number of newly aggregated nodes. - numNonAggregatedNodes -= (aggCount_h() - aggregates.GetNumAggregates()); + vertex2AggId(nodeIdx, 0) = aggIdx; + procWinner(nodeIdx, 0) = myRank; + } + } + }); + typename Kokkos::View::HostMirror aggCount_h = Kokkos::create_mirror_view(aggCount); + Kokkos::deep_copy(aggCount_h, aggCount); + // In this phase the number of new aggregates is the same + // as the number of newly aggregated nodes. + numNonAggregatedNodes -= (aggCount_h() - aggregates.GetNumAggregates()); - // update aggregate object - if(preserve) { - aggregates.SetNumAggregates(aggCount_h()); - } + // update aggregate object + if (preserve) { + aggregates.SetNumAggregates(aggCount_h()); } +} -} // end namespace +} // namespace MueLu -#endif // MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_DEF_HPP +#endif // MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp index 21dd4ab2a9bb..1bd36cfd8af9 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_DECL_HPP_ #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_DECL_HPP_ - #include #include @@ -137,14 +136,14 @@ namespace MueLu { | Aggregates | UncoupledAggregationFactory | Container class with aggregation information. See also Aggregates. */ -template +template class UncoupledAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_UNCOUPLEDAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" -public: + public: //! @name Constructors/Destructors. //@{ @@ -152,7 +151,7 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { UncoupledAggregationFactory(); //! Destructor. - virtual ~UncoupledAggregationFactory() { } + virtual ~UncoupledAggregationFactory() {} RCP GetValidParameterList() const; @@ -169,16 +168,16 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { } // deprecated void SetMaxNeighAlreadySelected(int maxNeighAlreadySelected) { - SetParameter("aggregation: max selected neighbors", ParameterEntry(Teuchos::as(maxNeighAlreadySelected))); // revalidate + SetParameter("aggregation: max selected neighbors", ParameterEntry(Teuchos::as(maxNeighAlreadySelected))); // revalidate } // deprecated void SetMinNodesPerAggregate(int minNodesPerAggregate) { - SetParameter("aggregation: min agg size", ParameterEntry(Teuchos::as(minNodesPerAggregate))); // revalidate + SetParameter("aggregation: min agg size", ParameterEntry(Teuchos::as(minNodesPerAggregate))); // revalidate } // set information about 1-node aggregates (map name and generating factory) void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); + SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); } // deprecated @@ -202,7 +201,7 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { //! Input //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level& currentLevel) const; //@} @@ -210,7 +209,7 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { //@{ /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + void Build(Level& currentLevel) const; //@} @@ -224,8 +223,7 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { //void ClearAggregationAlgorithms() { algos_.clear(); } //@} -private: - + private: //! aggregation algorithms // will be filled in Build routine mutable std::vector > > algos_; @@ -235,9 +233,9 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { //! if false, no change in aggregation algorithms is possible any more mutable bool bDefinitionPhase_; -}; // class UncoupledAggregationFactory +}; // class UncoupledAggregationFactory -} +} // namespace MueLu #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_SHORT #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp index d93dc6dbcdb9..d5520d275ba6 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp @@ -72,239 +72,233 @@ namespace MueLu { - template - UncoupledAggregationFactory::UncoupledAggregationFactory() - : bDefinitionPhase_(true) - { } +template +UncoupledAggregationFactory::UncoupledAggregationFactory() + : bDefinitionPhase_(true) {} - template - RCP UncoupledAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP UncoupledAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - // Aggregation parameters (used in aggregation algorithms) - // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters + // Aggregation parameters (used in aggregation algorithms) + // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: max agg size"); - SET_VALID_ENTRY("aggregation: min agg size"); - SET_VALID_ENTRY("aggregation: max selected neighbors"); - SET_VALID_ENTRY("aggregation: ordering"); - validParamList->getEntry("aggregation: ordering").setValidator( - rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); - SET_VALID_ENTRY("aggregation: enable phase 1"); - SET_VALID_ENTRY("aggregation: enable phase 2a"); - SET_VALID_ENTRY("aggregation: enable phase 2b"); - SET_VALID_ENTRY("aggregation: enable phase 3"); - SET_VALID_ENTRY("aggregation: match ML phase1"); - SET_VALID_ENTRY("aggregation: match ML phase2a"); - SET_VALID_ENTRY("aggregation: match ML phase2b"); - SET_VALID_ENTRY("aggregation: phase2a agg factor"); - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: use interface aggregation"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - SET_VALID_ENTRY("aggregation: compute aggregate qualities"); - SET_VALID_ENTRY("aggregation: phase 1 algorithm"); -#undef SET_VALID_ENTRY - - // general variables needed in AggregationFactory - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - validParamList->set< RCP >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); - - // special variables necessary for OnePtAggregationAlgorithm - validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); - validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates."); - //validParamList->set< RCP >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates."); - - // InterfaceAggregation parameters - //validParamList->set< bool > ("aggregation: use interface aggregation", "false", "Flag to trigger aggregation along an interface using specified aggregate seeds."); - validParamList->set< std::string > ("Interface aggregate map name", "", "Name of input map for interface aggregates. (default='')"); - validParamList->set< std::string > ("Interface aggregate map factory", "", "Generating factory of (DOF) map for interface aggregates."); - validParamList->set > ("nodeOnInterface", Teuchos::null, "Array specifying whether or not a node is on the interface (1 or 0)."); - - return validParamList; - } - - template - void UncoupledAggregationFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - const ParameterList& pL = GetParameterList(); - - // request special data necessary for OnePtAggregationAlgorithm - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - if (mapOnePtName.length() > 0) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); - } + SET_VALID_ENTRY("aggregation: max agg size"); + SET_VALID_ENTRY("aggregation: min agg size"); + SET_VALID_ENTRY("aggregation: max selected neighbors"); + SET_VALID_ENTRY("aggregation: ordering"); + validParamList->getEntry("aggregation: ordering").setValidator(rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); + SET_VALID_ENTRY("aggregation: enable phase 1"); + SET_VALID_ENTRY("aggregation: enable phase 2a"); + SET_VALID_ENTRY("aggregation: enable phase 2b"); + SET_VALID_ENTRY("aggregation: enable phase 3"); + SET_VALID_ENTRY("aggregation: match ML phase1"); + SET_VALID_ENTRY("aggregation: match ML phase2a"); + SET_VALID_ENTRY("aggregation: match ML phase2b"); + SET_VALID_ENTRY("aggregation: phase2a agg factor"); + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: use interface aggregation"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + SET_VALID_ENTRY("aggregation: compute aggregate qualities"); + SET_VALID_ENTRY("aggregation: phase 1 algorithm"); +#undef SET_VALID_ENTRY + + // general variables needed in AggregationFactory + validParamList->set>("Graph", null, "Generating factory of the graph"); + validParamList->set>("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); + validParamList->set>("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); + + // special variables necessary for OnePtAggregationAlgorithm + validParamList->set("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); + validParamList->set("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates."); + //validParamList->set< RCP >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates."); + + // InterfaceAggregation parameters + //validParamList->set< bool > ("aggregation: use interface aggregation", "false", "Flag to trigger aggregation along an interface using specified aggregate seeds."); + validParamList->set("Interface aggregate map name", "", "Name of input map for interface aggregates. (default='')"); + validParamList->set("Interface aggregate map factory", "", "Generating factory of (DOF) map for interface aggregates."); + validParamList->set>("nodeOnInterface", Teuchos::null, "Array specifying whether or not a node is on the interface (1 or 0)."); + + return validParamList; +} + +template +void UncoupledAggregationFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + const ParameterList& pL = GetParameterList(); + + // request special data necessary for OnePtAggregationAlgorithm + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + if (mapOnePtName.length() > 0) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); } + } - // request special data necessary for InterfaceAggregation - if (pL.get("aggregation: use interface aggregation") == true){ - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { - currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), - Exceptions::RuntimeError, - "nodeOnInterface was not provided by the user on level0!"); - } + // request special data necessary for InterfaceAggregation + if (pL.get("aggregation: use interface aggregation") == true) { + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { + currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); } else { - Input(currentLevel, "nodeOnInterface"); + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), + Exceptions::RuntimeError, + "nodeOnInterface was not provided by the user on level0!"); } - } - - if (pL.get("aggregation: compute aggregate qualities")) { - Input(currentLevel, "AggregateQualities"); + } else { + Input(currentLevel, "nodeOnInterface"); } } - template - void UncoupledAggregationFactory::Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - if (pL.get("aggregation: max agg size") == -1) - pL.set("aggregation: max agg size", INT_MAX); - - // define aggregation algorithms - RCP graphFact = GetFactory("Graph"); - - // TODO Can we keep different aggregation algorithms over more Build calls? - algos_.clear(); - algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); - if (pL.get("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); - if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm (graphFact))); - - // TODO: remove old aggregation mode - //if (pL.get("UseOnePtAggregationAlgorithm") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); - //if (pL.get("UseUncoupledAggregationAlgorithm") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); - //if (pL.get("UseMaxLinkAggregationAlgorithm") == true) algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); - //if (pL.get("UseEmergencyAggregationAlgorithm") == true) algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); - - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - RCP OnePtMap = Teuchos::null; - if (mapOnePtName.length()) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); - } + if (pL.get("aggregation: compute aggregate qualities")) { + Input(currentLevel, "AggregateQualities"); + } +} + +template +void UncoupledAggregationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed + + if (pL.get("aggregation: max agg size") == -1) + pL.set("aggregation: max agg size", INT_MAX); + + // define aggregation algorithms + RCP graphFact = GetFactory("Graph"); + + // TODO Can we keep different aggregation algorithms over more Build calls? + algos_.clear(); + algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 1") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm(graphFact))); + if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 3") == true) algos_.push_back(rcp(new AggregationPhase3Algorithm(graphFact))); + + // TODO: remove old aggregation mode + //if (pL.get("UseOnePtAggregationAlgorithm") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); + //if (pL.get("UseUncoupledAggregationAlgorithm") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); + //if (pL.get("UseMaxLinkAggregationAlgorithm") == true) algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); + //if (pL.get("UseEmergencyAggregationAlgorithm") == true) algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); + + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + RCP OnePtMap = Teuchos::null; + if (mapOnePtName.length()) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + OnePtMap = currentLevel.Get>(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + OnePtMap = currentLevel.Get>(mapOnePtName, mapOnePtFact.get()); } + } - // Set map for interface aggregates - std::string mapInterfaceName = pL.get("Interface aggregate map name"); - RCP InterfaceMap = Teuchos::null; + // Set map for interface aggregates + std::string mapInterfaceName = pL.get("Interface aggregate map name"); + RCP InterfaceMap = Teuchos::null; - RCP graph = Get< RCP >(currentLevel, "Graph"); + RCP graph = Get>(currentLevel, "Graph"); - // Build - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("UC"); + // Build + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("UC"); - const LO numRows = graph->GetNodeNumVertices(); + const LO numRows = graph->GetNodeNumVertices(); - // construct aggStat information - std::vector aggStat(numRows, READY); + // construct aggStat information + std::vector aggStat(numRows, READY); - // interface - if (pL.get("aggregation: use interface aggregation") == true){ - Teuchos::Array nodeOnInterface = Get>(currentLevel,"nodeOnInterface"); - for (LO i = 0; i < numRows; i++) { - if (nodeOnInterface[i]) - aggStat[i] = INTERFACE; - } + // interface + if (pL.get("aggregation: use interface aggregation") == true) { + Teuchos::Array nodeOnInterface = Get>(currentLevel, "nodeOnInterface"); + for (LO i = 0; i < numRows; i++) { + if (nodeOnInterface[i]) + aggStat[i] = INTERFACE; } + } - ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); - if (dirichletBoundaryMap != Teuchos::null) - for (LO i = 0; i < numRows; i++) - if (dirichletBoundaryMap[i] == true) - aggStat[i] = BOUNDARY; - - LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); - GO indexBase = graph->GetDomainMap()->getIndexBase(); - if (OnePtMap != Teuchos::null) { - for (LO i = 0; i < numRows; i++) { - // reconstruct global row id (FIXME only works for contiguous maps) - GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; - - for (LO kr = 0; kr < nDofsPerNode; kr++) - if (OnePtMap->isNodeGlobalElement(grid + kr)) - aggStat[i] = ONEPT; - } + ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); + if (dirichletBoundaryMap != Teuchos::null) + for (LO i = 0; i < numRows; i++) + if (dirichletBoundaryMap[i] == true) + aggStat[i] = BOUNDARY; + + LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); + GO indexBase = graph->GetDomainMap()->getIndexBase(); + if (OnePtMap != Teuchos::null) { + for (LO i = 0; i < numRows; i++) { + // reconstruct global row id (FIXME only works for contiguous maps) + GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase; + + for (LO kr = 0; kr < nDofsPerNode; kr++) + if (OnePtMap->isNodeGlobalElement(grid + kr)) + aggStat[i] = ONEPT; } + } - - - const RCP > comm = graph->GetComm(); - GO numGlobalRows = 0; - if (IsPrint(Statistics1)) - MueLu_sumAll(comm, as(numRows), numGlobalRows); - - LO numNonAggregatedNodes = numRows; - GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; - for (size_t a = 0; a < algos_.size(); a++) { - std::string phase = algos_[a]->description(); - SubFactoryMonitor sfm(*this, "Algo " + phase, currentLevel); - - int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); - algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); - algos_[a]->SetProcRankVerbose(oldRank); - - if (IsPrint(Statistics1)) { - GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; - GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; - MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); - MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); - - double aggPercent = 100*as(numGlobalAggregated)/as(numGlobalRows); - if (aggPercent > 99.99 && aggPercent < 100.00) { - // Due to round off (for instance, for 140465733/140466897), we could - // get 100.00% display even if there are some remaining nodes. This - // is bad from the users point of view. It is much better to change - // it to display 99.99%. - aggPercent = 99.99; - } - GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed - << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" - << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" - << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; - numGlobalAggregatedPrev = numGlobalAggregated; - numGlobalAggsPrev = numGlobalAggs; + const RCP> comm = graph->GetComm(); + GO numGlobalRows = 0; + if (IsPrint(Statistics1)) + MueLu_sumAll(comm, as(numRows), numGlobalRows); + + LO numNonAggregatedNodes = numRows; + GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; + for (size_t a = 0; a < algos_.size(); a++) { + std::string phase = algos_[a]->description(); + SubFactoryMonitor sfm(*this, "Algo " + phase, currentLevel); + + int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); + algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); + algos_[a]->SetProcRankVerbose(oldRank); + + if (IsPrint(Statistics1)) { + GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; + GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; + MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); + MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); + + double aggPercent = 100 * as(numGlobalAggregated) / as(numGlobalRows); + if (aggPercent > 99.99 && aggPercent < 100.00) { + // Due to round off (for instance, for 140465733/140466897), we could + // get 100.00% display even if there are some remaining nodes. This + // is bad from the users point of view. It is much better to change + // it to display 99.99%. + aggPercent = 99.99; } + GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed + << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" + << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" + << " aggregates : " << numGlobalAggs - numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; + numGlobalAggregatedPrev = numGlobalAggregated; + numGlobalAggsPrev = numGlobalAggs; } + } - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); - aggregates->AggregatesCrossProcessors(false); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); + aggregates->AggregatesCrossProcessors(false); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); - Set(currentLevel, "Aggregates", aggregates); - - if (pL.get("aggregation: compute aggregate qualities")) { - RCP> aggQualities = Get>>(currentLevel, "AggregateQualities"); - } + Set(currentLevel, "Aggregates", aggregates); + if (pL.get("aggregation: compute aggregate qualities")) { + RCP> aggQualities = Get>>(currentLevel, "AggregateQualities"); } +} -} //namespace MueLu - +} //namespace MueLu #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp index 5bf4100d95bb..0058ca9b07e1 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp @@ -133,107 +133,106 @@ namespace MueLu { | Aggregates | UncoupledAggregationFactory | Container class with aggregation information. See also Aggregates. */ - template - class UncoupledAggregationFactory_kokkos : public SingleLevelFactoryBase { +template +class UncoupledAggregationFactory_kokkos : public SingleLevelFactoryBase { #undef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - UncoupledAggregationFactory_kokkos(); + //! Constructor. + UncoupledAggregationFactory_kokkos(); - //! Destructor. - virtual ~UncoupledAggregationFactory_kokkos() { } + //! Destructor. + virtual ~UncoupledAggregationFactory_kokkos() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Set/get methods. - //@{ + //! @name Set/get methods. + //@{ - // Options shared by all aggregation algorithms + // Options shared by all aggregation algorithms - // deprecated - void SetOrdering(const std::string& ordering) { - SetParameter("aggregation: ordering", ParameterEntry(ordering)); - } - // deprecated - void SetMaxNeighAlreadySelected(int maxNeighAlreadySelected) { - SetParameter("aggregation: max selected neighbors", ParameterEntry(Teuchos::as(maxNeighAlreadySelected))); // revalidate - } - // deprecated - void SetMinNodesPerAggregate(int minNodesPerAggregate) { - SetParameter("aggregation: min agg size", ParameterEntry(Teuchos::as(minNodesPerAggregate))); // revalidate - } - // set information about 1-node aggregates (map name and generating factory) - void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); - } + // deprecated + void SetOrdering(const std::string& ordering) { + SetParameter("aggregation: ordering", ParameterEntry(ordering)); + } + // deprecated + void SetMaxNeighAlreadySelected(int maxNeighAlreadySelected) { + SetParameter("aggregation: max selected neighbors", ParameterEntry(Teuchos::as(maxNeighAlreadySelected))); // revalidate + } + // deprecated + void SetMinNodesPerAggregate(int minNodesPerAggregate) { + SetParameter("aggregation: min agg size", ParameterEntry(Teuchos::as(minNodesPerAggregate))); // revalidate + } + // set information about 1-node aggregates (map name and generating factory) + void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { + SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); + } - // deprecated - const std::string& GetOrdering() const { - const ParameterList& pL = GetParameterList(); - return pL.get("aggregation: ordering"); - } - // deprecated - int GetMaxNeighAlreadySelected() const { - const ParameterList& pL = GetParameterList(); - return Teuchos::as(pL.get("aggregation: max selected neighbors")); - } - // deprecated - int GetMinNodesPerAggregate() const { - const ParameterList& pL = GetParameterList(); - return Teuchos::as(pL.get("aggregation: min agg size")); - } + // deprecated + const std::string& GetOrdering() const { + const ParameterList& pL = GetParameterList(); + return pL.get("aggregation: ordering"); + } + // deprecated + int GetMaxNeighAlreadySelected() const { + const ParameterList& pL = GetParameterList(); + return Teuchos::as(pL.get("aggregation: max selected neighbors")); + } + // deprecated + int GetMinNodesPerAggregate() const { + const ParameterList& pL = GetParameterList(); + return Teuchos::as(pL.get("aggregation: min agg size")); + } - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + /*! @brief Build aggregates. */ + void Build(Level& currentLevel) const; - //@} + //@} - //! @name Definition methods - //@{ + //! @name Definition methods + //@{ - /*! @brief Append a new aggregation algorithm to list of aggregation algorithms */ - //void Append(const RCP > & alg); + /*! @brief Append a new aggregation algorithm to list of aggregation algorithms */ + //void Append(const RCP > & alg); - /*! @brief Remove all aggregation algorithms from list */ - //void ClearAggregationAlgorithms() { algos_.clear(); } - //@} + /*! @brief Remove all aggregation algorithms from list */ + //void ClearAggregationAlgorithms() { algos_.clear(); } + //@} - private: + private: + //! aggregation algorithms + // will be filled in Build routine + mutable std::vector > > algos_; - //! aggregation algorithms - // will be filled in Build routine - mutable std::vector > > algos_; + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; +}; // class UncoupledAggregationFactory_kokkos - }; // class UncoupledAggregationFactory_kokkos - -} +} // namespace MueLu #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_SHORT -#endif // MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DECL_HPP +#endif // MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp index 0c0daed773ac..2d6dc5fa46bb 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp @@ -75,338 +75,328 @@ namespace MueLu { - template - UncoupledAggregationFactory_kokkos::UncoupledAggregationFactory_kokkos() - : bDefinitionPhase_(true) - { } +template +UncoupledAggregationFactory_kokkos::UncoupledAggregationFactory_kokkos() + : bDefinitionPhase_(true) {} - template - RCP UncoupledAggregationFactory_kokkos::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP UncoupledAggregationFactory_kokkos::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - // Aggregation parameters (used in aggregation algorithms) - // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters + // Aggregation parameters (used in aggregation algorithms) + // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: max agg size"); - SET_VALID_ENTRY("aggregation: min agg size"); - SET_VALID_ENTRY("aggregation: max selected neighbors"); - SET_VALID_ENTRY("aggregation: ordering"); - validParamList->getEntry("aggregation: ordering").setValidator( - rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); - SET_VALID_ENTRY("aggregation: deterministic"); - SET_VALID_ENTRY("aggregation: coloring algorithm"); - SET_VALID_ENTRY("aggregation: enable phase 1"); - SET_VALID_ENTRY("aggregation: enable phase 2a"); - SET_VALID_ENTRY("aggregation: enable phase 2b"); - SET_VALID_ENTRY("aggregation: enable phase 3"); - SET_VALID_ENTRY("aggregation: match ML phase1"); - SET_VALID_ENTRY("aggregation: match ML phase2a"); - SET_VALID_ENTRY("aggregation: match ML phase2b"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: phase 1 algorithm"); -#undef SET_VALID_ENTRY - - // general variables needed in AggregationFactory - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - - // special variables necessary for OnePtAggregationAlgorithm - validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); - validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates."); - //validParamList->set< RCP >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates."); - - return validParamList; - } - - template - void UncoupledAggregationFactory_kokkos::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - const ParameterList& pL = GetParameterList(); - - // request special data necessary for OnePtAggregationAlgorithm - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - if (mapOnePtName.length() > 0) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); - } + SET_VALID_ENTRY("aggregation: max agg size"); + SET_VALID_ENTRY("aggregation: min agg size"); + SET_VALID_ENTRY("aggregation: max selected neighbors"); + SET_VALID_ENTRY("aggregation: ordering"); + validParamList->getEntry("aggregation: ordering").setValidator(rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); + SET_VALID_ENTRY("aggregation: deterministic"); + SET_VALID_ENTRY("aggregation: coloring algorithm"); + SET_VALID_ENTRY("aggregation: enable phase 1"); + SET_VALID_ENTRY("aggregation: enable phase 2a"); + SET_VALID_ENTRY("aggregation: enable phase 2b"); + SET_VALID_ENTRY("aggregation: enable phase 3"); + SET_VALID_ENTRY("aggregation: match ML phase1"); + SET_VALID_ENTRY("aggregation: match ML phase2a"); + SET_VALID_ENTRY("aggregation: match ML phase2b"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: phase 1 algorithm"); +#undef SET_VALID_ENTRY + + // general variables needed in AggregationFactory + validParamList->set >("Graph", null, "Generating factory of the graph"); + validParamList->set >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); + + // special variables necessary for OnePtAggregationAlgorithm + validParamList->set("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); + validParamList->set("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates."); + //validParamList->set< RCP >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates."); + + return validParamList; +} + +template +void UncoupledAggregationFactory_kokkos::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + const ParameterList& pL = GetParameterList(); + + // request special data necessary for OnePtAggregationAlgorithm + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + if (mapOnePtName.length() > 0) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); } } - - template - void UncoupledAggregationFactory_kokkos:: - Build(Level ¤tLevel) const { - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; - using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; - FactoryMonitor m(*this, "Build", currentLevel); - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - if (pL.get("aggregation: max agg size") == -1) - pL.set("aggregation: max agg size", INT_MAX); - - // define aggregation algorithms - RCP graphFact = GetFactory("Graph"); - - // TODO Can we keep different aggregation algorithms over more Build calls? - algos_.clear(); - algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact))); - if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos (graphFact))); - - // Sanity Checking: match ML behavior is not supported in UncoupledAggregation_Kokkos in Phase 1 or Phase 2b, but is in 2a - TEUCHOS_TEST_FOR_EXCEPTION( pL.get("aggregation: match ML phase1"),std::invalid_argument,"Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation"); - TEUCHOS_TEST_FOR_EXCEPTION( pL.get("aggregation: match ML phase2b"),std::invalid_argument,"Option: 'aggregation: match ML phase2b' is not supported in the Kokkos version of uncoupled aggregation"); - - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - RCP OnePtMap = Teuchos::null; - if (mapOnePtName.length()) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); - } +} + +template +void UncoupledAggregationFactory_kokkos:: + Build(Level& currentLevel) const { + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; + using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; + FactoryMonitor m(*this, "Build", currentLevel); + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed + + if (pL.get("aggregation: max agg size") == -1) + pL.set("aggregation: max agg size", INT_MAX); + + // define aggregation algorithms + RCP graphFact = GetFactory("Graph"); + + // TODO Can we keep different aggregation algorithms over more Build calls? + algos_.clear(); + algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 1") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 3") == true) algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos(graphFact))); + + // Sanity Checking: match ML behavior is not supported in UncoupledAggregation_Kokkos in Phase 1 or Phase 2b, but is in 2a + TEUCHOS_TEST_FOR_EXCEPTION(pL.get("aggregation: match ML phase1"), std::invalid_argument, "Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation"); + TEUCHOS_TEST_FOR_EXCEPTION(pL.get("aggregation: match ML phase2b"), std::invalid_argument, "Option: 'aggregation: match ML phase2b' is not supported in the Kokkos version of uncoupled aggregation"); + + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + RCP OnePtMap = Teuchos::null; + if (mapOnePtName.length()) { + std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); } + } - RCP graph = Get< RCP >(currentLevel, "Graph"); + RCP graph = Get >(currentLevel, "Graph"); + + // Build + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("UC"); + + const LO numRows = graph->GetNodeNumVertices(); + + // construct aggStat information + Kokkos::View aggStat(Kokkos::ViewAllocateWithoutInitializing("aggregation status"), + numRows); + Kokkos::deep_copy(aggStat, READY); + + // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map + // even if the map is correctly extracted from the graph, aggStat is + // now a Kokkos::View and filling it will + // require a parallel_for or to copy it to the Host which is not really + // good from a performance point of view. + // If dirichletBoundaryMap was an actual Xpetra::Map, one could call + // getLocalMap to have a Kokkos::View on the appropriate memory_space + // instead of an ArrayRCP. + { + typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = graph->getLocalLWGraph().GetBoundaryNodeMap(); + Kokkos::parallel_for( + "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { + if (dirichletBoundaryMap(nodeIdx) == true) { + aggStat(nodeIdx) = BOUNDARY; + } + }); + } - // Build - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("UC"); + LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); + GO indexBase = graph->GetDomainMap()->getIndexBase(); - const LO numRows = graph->GetNodeNumVertices(); + /* FIXME: This chunk of code is still executing on the host */ + if (OnePtMap != Teuchos::null) { + typename Kokkos::View::HostMirror aggStatHost = Kokkos::create_mirror_view(aggStat); + Kokkos::deep_copy(aggStatHost, aggStat); - // construct aggStat information - Kokkos::View aggStat(Kokkos::ViewAllocateWithoutInitializing("aggregation status"), - numRows); - Kokkos::deep_copy(aggStat, READY); + for (LO i = 0; i < numRows; i++) { + // reconstruct global row id (FIXME only works for contiguous maps) + GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase; - // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map - // even if the map is correctly extracted from the graph, aggStat is - // now a Kokkos::View and filling it will - // require a parallel_for or to copy it to the Host which is not really - // good from a performance point of view. - // If dirichletBoundaryMap was an actual Xpetra::Map, one could call - // getLocalMap to have a Kokkos::View on the appropriate memory_space - // instead of an ArrayRCP. - { - typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = graph->getLocalLWGraph().GetBoundaryNodeMap(); - Kokkos::parallel_for("MueLu - UncoupledAggregation: tagging boundary nodes in aggStat", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { - if (dirichletBoundaryMap(nodeIdx) == true) { - aggStat(nodeIdx) = BOUNDARY; - } - }); + for (LO kr = 0; kr < nDofsPerNode; kr++) + if (OnePtMap->isNodeGlobalElement(grid + kr)) + aggStatHost(i) = ONEPT; } - LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); - GO indexBase = graph->GetDomainMap()->getIndexBase(); - - /* FIXME: This chunk of code is still executing on the host */ - if (OnePtMap != Teuchos::null) { - typename Kokkos::View::HostMirror aggStatHost - = Kokkos::create_mirror_view(aggStat); - Kokkos::deep_copy(aggStatHost, aggStat); - - for (LO i = 0; i < numRows; i++) { - // reconstruct global row id (FIXME only works for contiguous maps) - GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; - - for (LO kr = 0; kr < nDofsPerNode; kr++) - if (OnePtMap->isNodeGlobalElement(grid + kr)) - aggStatHost(i) = ONEPT; - } + Kokkos::deep_copy(aggStat, aggStatHost); + } - Kokkos::deep_copy(aggStat, aggStatHost); + const RCP > comm = graph->GetComm(); + GO numGlobalRows = 0; + if (IsPrint(Statistics1)) + MueLu_sumAll(comm, as(numRows), numGlobalRows); + + LO numNonAggregatedNodes = numRows; + std::string aggAlgo = pL.get("aggregation: coloring algorithm"); + if (aggAlgo == "mis2 coarsening" || aggAlgo == "mis2 aggregation") { + SubFactoryMonitor sfm(*this, "Algo \"MIS2\"", currentLevel); + using graph_t = typename LWGraph_kokkos::local_graph_type; + using device_t = typename graph_t::device_type; + using exec_space = typename device_t::execution_space; + using rowmap_t = typename graph_t::row_map_type; + using colinds_t = typename graph_t::entries_type; + using lno_t = typename colinds_t::non_const_value_type; + rowmap_t aRowptrs = graph->getLocalLWGraph().getRowPtrs(); + colinds_t aColinds = graph->getLocalLWGraph().getEntries(); + lno_t numAggs = 0; + typename colinds_t::non_const_type labels; + + if (aggAlgo == "mis2 coarsening") { + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 coarsening" << std::endl; + labels = KokkosGraph::graph_mis2_coarsen(aRowptrs, aColinds, numAggs); + } else if (aggAlgo == "mis2 aggregation") { + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 aggregation" << std::endl; + labels = KokkosGraph::graph_mis2_aggregate(aRowptrs, aColinds, numAggs); } - - const RCP > comm = graph->GetComm(); - GO numGlobalRows = 0; - if (IsPrint(Statistics1)) - MueLu_sumAll(comm, as(numRows), numGlobalRows); - - LO numNonAggregatedNodes = numRows; - std::string aggAlgo = pL.get("aggregation: coloring algorithm"); - if(aggAlgo == "mis2 coarsening" || aggAlgo == "mis2 aggregation") - { - SubFactoryMonitor sfm(*this, "Algo \"MIS2\"", currentLevel); - using graph_t = typename LWGraph_kokkos::local_graph_type; - using device_t = typename graph_t::device_type; - using exec_space = typename device_t::execution_space; - using rowmap_t = typename graph_t::row_map_type; - using colinds_t = typename graph_t::entries_type; - using lno_t = typename colinds_t::non_const_value_type; - rowmap_t aRowptrs = graph->getLocalLWGraph().getRowPtrs(); - colinds_t aColinds = graph->getLocalLWGraph().getEntries(); - lno_t numAggs = 0; - typename colinds_t::non_const_type labels; - - if(aggAlgo == "mis2 coarsening") - { - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 coarsening" << std::endl; - labels = KokkosGraph::graph_mis2_coarsen(aRowptrs, aColinds, numAggs); - } - else if(aggAlgo == "mis2 aggregation") - { - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 aggregation" << std::endl; - labels = KokkosGraph::graph_mis2_aggregate(aRowptrs, aColinds, numAggs); - } - auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates->GetProcWinner() ->getDeviceLocalView(Xpetra::Access::OverwriteAll); - int rank = comm->getRank(); - Kokkos::parallel_for(Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(lno_t i) - { + auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto procWinner = aggregates->GetProcWinner()->getDeviceLocalView(Xpetra::Access::OverwriteAll); + int rank = comm->getRank(); + Kokkos::parallel_for( + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(lno_t i) { procWinner(i, 0) = rank; - if(aggStat(i) == READY) - { - aggStat(i) = AGGREGATED; + if (aggStat(i) == READY) { + aggStat(i) = AGGREGATED; vertex2AggId(i, 0) = labels(i); } }); - numNonAggregatedNodes = 0; - aggregates->SetNumAggregates(numAggs); - } - else + numNonAggregatedNodes = 0; + aggregates->SetNumAggregates(numAggs); + } else { { - { - SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel); - - // LBV on Sept 06 2019: the note below is a little worrisome, - // can we guarantee that MueLu is never used on a non-symmetric - // graph? - // note: just using colinds_view in place of scalar_view_t type - // (it won't be used at all by symbolic SPGEMM) - using graph_t = typename LWGraph_kokkos::local_graph_type; - using KernelHandle = KokkosKernels::Experimental:: + SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel); + + // LBV on Sept 06 2019: the note below is a little worrisome, + // can we guarantee that MueLu is never used on a non-symmetric + // graph? + // note: just using colinds_view in place of scalar_view_t type + // (it won't be used at all by symbolic SPGEMM) + using graph_t = typename LWGraph_kokkos::local_graph_type; + using KernelHandle = KokkosKernels::Experimental:: KokkosKernelsHandle; - KernelHandle kh; - //leave gc algorithm choice as the default - kh.create_distance2_graph_coloring_handle(); - - // get the distance-2 graph coloring handle - auto coloringHandle = kh.get_distance2_graph_coloring_handle(); - - // Set the distance-2 graph coloring algorithm to use. - // Options: - // COLORING_D2_DEFAULT - Let the kernel handle pick the variation - // COLORING_D2_SERIAL - Use the legacy serial-only implementation - // COLORING_D2_VB - Use the parallel vertex based direct method - // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array - // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT - // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest) - if(pL.get("aggregation: deterministic") == true) { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl; - } else if(aggAlgo == "serial") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl; - } else if(aggAlgo == "default") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_DEFAULT ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl; - } else if(aggAlgo == "vertex based") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl; - } else if(aggAlgo == "vertex based bit set") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl; - } else if(aggAlgo == "edge filtering") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT_EF ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl; - } else if(aggAlgo == "net based bit set") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_NB_BIT ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl; - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering") - } - - //Create device views for graph rowptrs/colinds - typename graph_t::row_map_type aRowptrs = graph->getLocalLWGraph().getRowPtrs(); - typename graph_t::entries_type aColinds = graph->getLocalLWGraph().getEntries(); - - //run d2 graph coloring - //graph is symmetric so row map/entries and col map/entries are the same - { - SubFactoryMonitor sfm2(*this, "Algo \"Graph Coloring\": KokkosGraph Call", currentLevel);//CMS HACK - KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds); - } + KernelHandle kh; + //leave gc algorithm choice as the default + kh.create_distance2_graph_coloring_handle(); + + // get the distance-2 graph coloring handle + auto coloringHandle = kh.get_distance2_graph_coloring_handle(); + + // Set the distance-2 graph coloring algorithm to use. + // Options: + // COLORING_D2_DEFAULT - Let the kernel handle pick the variation + // COLORING_D2_SERIAL - Use the legacy serial-only implementation + // COLORING_D2_VB - Use the parallel vertex based direct method + // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array + // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT + // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest) + if (pL.get("aggregation: deterministic") == true) { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl; + } else if (aggAlgo == "serial") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl; + } else if (aggAlgo == "default") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_DEFAULT); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl; + } else if (aggAlgo == "vertex based") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl; + } else if (aggAlgo == "vertex based bit set") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl; + } else if (aggAlgo == "edge filtering") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT_EF); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl; + } else if (aggAlgo == "net based bit set") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_NB_BIT); + if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl; + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering") + } - // extract the colors and store them in the aggregates - aggregates->SetGraphColors(coloringHandle->get_vertex_colors()); - aggregates->SetGraphNumColors(static_cast(coloringHandle->get_num_colors())); + //Create device views for graph rowptrs/colinds + typename graph_t::row_map_type aRowptrs = graph->getLocalLWGraph().getRowPtrs(); + typename graph_t::entries_type aColinds = graph->getLocalLWGraph().getEntries(); - //clean up coloring handle - kh.destroy_distance2_graph_coloring_handle(); + //run d2 graph coloring + //graph is symmetric so row map/entries and col map/entries are the same + { + SubFactoryMonitor sfm2(*this, "Algo \"Graph Coloring\": KokkosGraph Call", currentLevel); //CMS HACK + KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds); } + // extract the colors and store them in the aggregates + aggregates->SetGraphColors(coloringHandle->get_vertex_colors()); + aggregates->SetGraphNumColors(static_cast(coloringHandle->get_num_colors())); + + //clean up coloring handle + kh.destroy_distance2_graph_coloring_handle(); + } + + if (IsPrint(Statistics1)) { + GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl; + } + GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; + for (size_t a = 0; a < algos_.size(); a++) { + std::string phase = algos_[a]->description(); + SubFactoryMonitor sfm2(*this, "Algo \"" + phase + "\"", currentLevel); + + int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); + algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); + algos_[a]->SetProcRankVerbose(oldRank); + if (IsPrint(Statistics1)) { - GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl; - } - GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; - for (size_t a = 0; a < algos_.size(); a++) { - std::string phase = algos_[a]->description(); - SubFactoryMonitor sfm2(*this, "Algo \"" + phase + "\"", currentLevel); - - int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); - algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); - algos_[a]->SetProcRankVerbose(oldRank); - - if (IsPrint(Statistics1)) { - GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; - GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; - MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); - MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); - - double aggPercent = 100*as(numGlobalAggregated)/as(numGlobalRows); - if (aggPercent > 99.99 && aggPercent < 100.00) { - // Due to round off (for instance, for 140465733/140466897), we could - // get 100.00% display even if there are some remaining nodes. This - // is bad from the users point of view. It is much better to change - // it to display 99.99%. - aggPercent = 99.99; - } - GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed - << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" - << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" - << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; - numGlobalAggregatedPrev = numGlobalAggregated; - numGlobalAggsPrev = numGlobalAggs; + GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; + GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; + MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); + MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); + + double aggPercent = 100 * as(numGlobalAggregated) / as(numGlobalRows); + if (aggPercent > 99.99 && aggPercent < 100.00) { + // Due to round off (for instance, for 140465733/140466897), we could + // get 100.00% display even if there are some remaining nodes. This + // is bad from the users point of view. It is much better to change + // it to display 99.99%. + aggPercent = 99.99; } + GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed + << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" + << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" + << " aggregates : " << numGlobalAggs - numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; + numGlobalAggregatedPrev = numGlobalAggregated; + numGlobalAggsPrev = numGlobalAggs; } } + } - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); - - aggregates->AggregatesCrossProcessors(false); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); - Set(currentLevel, "Aggregates", aggregates); + aggregates->AggregatesCrossProcessors(false); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); - } + Set(currentLevel, "Aggregates", aggregates); +} -} //namespace MueLu +} //namespace MueLu #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp index 17c7a3297fd6..9fbcbbc45951 100644 --- a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_USERAGGREGATIONFACTORY_DECL_HPP_ #define MUELU_USERAGGREGATIONFACTORY_DECL_HPP_ - #include #include @@ -60,22 +59,22 @@ namespace MueLu { -template +template class UserAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_USERAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" -public: + public: //! @name Constructors/Destructors. //@{ //! Constructor. - UserAggregationFactory() { }; + UserAggregationFactory(){}; //! Destructor. - virtual ~UserAggregationFactory() { } + virtual ~UserAggregationFactory() {} RCP GetValidParameterList() const; @@ -101,10 +100,10 @@ class UserAggregationFactory : public SingleLevelFactoryBase { //@} -private: -}; // class UserAggregationFactory + private: +}; // class UserAggregationFactory -} +} // namespace MueLu #define MUELU_USERAGGREGATIONFACTORY_SHORT #endif /* MUELU_USERAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp index bc0a418a46c7..9e3df1c1a434 100644 --- a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp @@ -60,87 +60,87 @@ namespace MueLu { - template - RCP UserAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP UserAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - // input parameters - validParamList->set("filePrefix", "", "The data is read from files of this name: _."); - validParamList->set("fileExt", "", "The data is read from files of this name: _."); + // input parameters + validParamList->set("filePrefix", "", "The data is read from files of this name: _."); + validParamList->set("fileExt", "", "The data is read from files of this name: _."); - return validParamList; - } + return validParamList; +} - template - void UserAggregationFactory::DeclareInput(Level& /* currentLevel */) const { } +template +void UserAggregationFactory::DeclareInput(Level& /* currentLevel */) const {} - /** +/** * The function reads aggregate information from a file. * The file structure is the following: * * line 1 : * * line 2+: ... */ - template - void UserAggregationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); +template +void UserAggregationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); - const ParameterList& pL = GetParameterList(); + const ParameterList& pL = GetParameterList(); - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - const int myRank = comm->getRank(); + RCP > comm = Teuchos::DefaultComm::getComm(); + const int myRank = comm->getRank(); - std::string fileName = pL.get("filePrefix") + toString(currentLevel.GetLevelID()) + "_" + toString(myRank) + "." + pL.get("fileExt"); - std::ifstream ifs(fileName.c_str()); - TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, "Cannot read data from \"" << fileName << "\""); + std::string fileName = pL.get("filePrefix") + toString(currentLevel.GetLevelID()) + "_" + toString(myRank) + "." + pL.get("fileExt"); + std::ifstream ifs(fileName.c_str()); + TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, "Cannot read data from \"" << fileName << "\""); - LO numVertices, numAggregates; - ifs >> numVertices; - TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, "Cannot read data from \"" << fileName << "\""); - ifs >> numAggregates; - TEUCHOS_TEST_FOR_EXCEPTION(numVertices <= 0, Exceptions::InvalidArgument, "Number of vertices must be > 0"); - TEUCHOS_TEST_FOR_EXCEPTION(numAggregates <= 0, Exceptions::InvalidArgument, "Number of aggregates must be > 0"); + LO numVertices, numAggregates; + ifs >> numVertices; + TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, "Cannot read data from \"" << fileName << "\""); + ifs >> numAggregates; + TEUCHOS_TEST_FOR_EXCEPTION(numVertices <= 0, Exceptions::InvalidArgument, "Number of vertices must be > 0"); + TEUCHOS_TEST_FOR_EXCEPTION(numAggregates <= 0, Exceptions::InvalidArgument, "Number of aggregates must be > 0"); - Xpetra::UnderlyingLib lib = currentLevel.lib(); - const int indexBase = 0; - RCP map = MapFactory::Build(lib, numVertices, indexBase, comm); + Xpetra::UnderlyingLib lib = currentLevel.lib(); + const int indexBase = 0; + RCP map = MapFactory::Build(lib, numVertices, indexBase, comm); - RCP aggregates = rcp(new Aggregates(map)); - aggregates->setObjectLabel("User"); + RCP aggregates = rcp(new Aggregates(map)); + aggregates->setObjectLabel("User"); - aggregates->SetNumAggregates(numAggregates); + aggregates->SetNumAggregates(numAggregates); - Teuchos::ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); + Teuchos::ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); + Teuchos::ArrayRCP procWinner = aggregates->GetProcWinner()->getDataNonConst(0); - for (LO i = 0; i < numAggregates; i++) { - int aggSize = 0; - ifs >> aggSize; + for (LO i = 0; i < numAggregates; i++) { + int aggSize = 0; + ifs >> aggSize; - std::vector list(aggSize); - for (int k = 0; k < aggSize; k++) { - // FIXME: File contains GIDs, we need LIDs - // for now, works on a single processor - ifs >> list[k]; - } + std::vector list(aggSize); + for (int k = 0; k < aggSize; k++) { + // FIXME: File contains GIDs, we need LIDs + // for now, works on a single processor + ifs >> list[k]; + } - // Mark first node as root node for the aggregate - aggregates->SetIsRoot(list[0]); + // Mark first node as root node for the aggregate + aggregates->SetIsRoot(list[0]); - // Fill vertex2AggId and procWinner structure with information - for (int k = 0; k < aggSize; k++) { - vertex2AggId[list[k]] = i; - procWinner [list[k]] = myRank; - } + // Fill vertex2AggId and procWinner structure with information + for (int k = 0; k < aggSize; k++) { + vertex2AggId[list[k]] = i; + procWinner[list[k]] = myRank; } + } - // FIXME: do the proper check whether aggregates cross interprocessor boundary - aggregates->AggregatesCrossProcessors(false); + // FIXME: do the proper check whether aggregates cross interprocessor boundary + aggregates->AggregatesCrossProcessors(false); - Set(currentLevel, "Aggregates", aggregates); + Set(currentLevel, "Aggregates", aggregates); - GetOStream(Statistics0) << aggregates->description() << std::endl; - } + GetOStream(Statistics0) << aggregates->description() << std::endl; +} -} //namespace MueLu +} //namespace MueLu #endif /* MUELU_USERAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp b/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp index 6d65bf28102c..799914095ee3 100644 --- a/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp +++ b/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp @@ -51,7 +51,7 @@ #include // Tpetra -#include // default template parameter of many MueLu classes +#include // default template parameter of many MueLu classes // Memory management #include @@ -83,33 +83,33 @@ //! Namespace for MueLu classes and methods namespace MueLu { - // import Teuchos memory management classes into MueLu - using Teuchos::arcp; - using Teuchos::arcpFromArrayView; - using Teuchos::arcp_reinterpret_cast; - using Teuchos::Array; - using Teuchos::ArrayRCP; - using Teuchos::ArrayView; - using Teuchos::as; - using Teuchos::null; - using Teuchos::ParameterList; - using Teuchos::rcp; - using Teuchos::RCP; - using Teuchos::rcp_const_cast; - using Teuchos::rcp_dynamic_cast; - using Teuchos::rcpFromRef; - using Teuchos::rcp_implicit_cast; - using Teuchos::rcp_static_cast; +// import Teuchos memory management classes into MueLu +using Teuchos::arcp; +using Teuchos::arcp_reinterpret_cast; +using Teuchos::arcpFromArrayView; +using Teuchos::Array; +using Teuchos::ArrayRCP; +using Teuchos::ArrayView; +using Teuchos::as; +using Teuchos::null; +using Teuchos::ParameterList; +using Teuchos::rcp; +using Teuchos::RCP; +using Teuchos::rcp_const_cast; +using Teuchos::rcp_dynamic_cast; +using Teuchos::rcp_implicit_cast; +using Teuchos::rcp_static_cast; +using Teuchos::rcpFromRef; - // verbose levels - using Teuchos::VERB_DEFAULT; - using Teuchos::VERB_NONE; - using Teuchos::VERB_LOW; - using Teuchos::VERB_MEDIUM; - using Teuchos::VERB_HIGH; - using Teuchos::VERB_EXTREME; +// verbose levels +using Teuchos::VERB_DEFAULT; +using Teuchos::VERB_EXTREME; +using Teuchos::VERB_HIGH; +using Teuchos::VERB_LOW; +using Teuchos::VERB_MEDIUM; +using Teuchos::VERB_NONE; -} +} // namespace MueLu // This include file defines macros to avoid warnings under CUDA. See github issue #1133. #include "Teuchos_CompilerCodeTweakMacros.hpp" diff --git a/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp b/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp index fa9745afbe37..b1d19f73d5e8 100644 --- a/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp +++ b/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp @@ -52,22 +52,21 @@ #include -namespace MueLu -{ +namespace MueLu { - typedef Tpetra::Details::DefaultTypes::scalar_type DefaultScalar; +typedef Tpetra::Details::DefaultTypes::scalar_type DefaultScalar; - typedef int DefaultLocalOrdinal; +typedef int DefaultLocalOrdinal; - #if defined HAVE_MUELU_DEFAULT_GO_LONG - typedef long DefaultGlobalOrdinal; - #elif defined HAVE_MUELU_DEFAULT_GO_LONGLONG - typedef long long DefaultGlobalOrdinal; - #else - typedef int DefaultGlobalOrdinal; - #endif +#if defined HAVE_MUELU_DEFAULT_GO_LONG +typedef long DefaultGlobalOrdinal; +#elif defined HAVE_MUELU_DEFAULT_GO_LONGLONG +typedef long long DefaultGlobalOrdinal; +#else +typedef int DefaultGlobalOrdinal; +#endif - typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType DefaultNode; -} +typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType DefaultNode; +} // namespace MueLu #endif diff --git a/packages/muelu/src/Headers/MueLu_Types.hpp b/packages/muelu/src/Headers/MueLu_Types.hpp index 6ea5d31711d7..c57bda789e0b 100644 --- a/packages/muelu/src/Headers/MueLu_Types.hpp +++ b/packages/muelu/src/Headers/MueLu_Types.hpp @@ -49,55 +49,55 @@ #include "MueLu_ConfigDefs.hpp" namespace MueLu { - enum CycleType { - VCYCLE, - WCYCLE - }; +enum CycleType { + VCYCLE, + WCYCLE +}; - enum PreOrPost { - PRE = 0x1, - POST = 0x2, - BOTH = 0x3 - }; +enum PreOrPost { + PRE = 0x1, + POST = 0x2, + BOTH = 0x3 +}; - // In the algorithm, aggStat[] = READY/NOTSEL/SELECTED indicates whether a node has been aggregated - enum NodeState { - READY = 1, // indicates that a node is available to be - // selected as a root node of an aggregate +// In the algorithm, aggStat[] = READY/NOTSEL/SELECTED indicates whether a node has been aggregated +enum NodeState { + READY = 1, // indicates that a node is available to be + // selected as a root node of an aggregate - NOTSEL = 2, // indicates that a node has been rejected as a root node. - // This could perhaps be because if this node had been - // selected a small aggregate would have resulted - // This is Phase 1 specific + NOTSEL = 2, // indicates that a node has been rejected as a root node. + // This could perhaps be because if this node had been + // selected a small aggregate would have resulted + // This is Phase 1 specific - AGGREGATED = 3, // indicates that a node has been assigned - // to an aggregate + AGGREGATED = 3, // indicates that a node has been assigned + // to an aggregate - ONEPT = 4, // indicates that a node shall be preserved over - // all multigrid levels as 1 point aggregate + ONEPT = 4, // indicates that a node shall be preserved over + // all multigrid levels as 1 point aggregate - IGNORED = 5, // indicates that the node is removed from consideration, - // and is not aggregated + IGNORED = 5, // indicates that the node is removed from consideration, + // and is not aggregated - BOUNDARY = 6, // node is a Dirichlet node - // During aggregation, it is transformed either to AGGREGATED - // or to IGNORED - INTERFACE = 7 // node is chosen as root node on an interface where coordinated - // coarsening across the interface is required. - }; + BOUNDARY = 6, // node is a Dirichlet node + // During aggregation, it is transformed either to AGGREGATED + // or to IGNORED + INTERFACE = 7 // node is chosen as root node on an interface where coordinated + // coarsening across the interface is required. +}; - // This is use by the structured aggregation index manager to keep track of the underlying mesh - // layout. - enum IndexingType { - UNCOUPLED = 1, // indicates that the underlying mesh is treated independently from rank to rank +// This is use by the structured aggregation index manager to keep track of the underlying mesh +// layout. +enum IndexingType { + UNCOUPLED = 1, // indicates that the underlying mesh is treated independently from rank to rank - LOCALLEXI = 2, // local lexicographic indexing of the mesh, this is similar to uncoupled but - // extra data is used to compute indices accross ranks + LOCALLEXI = 2, // local lexicographic indexing of the mesh, this is similar to uncoupled but + // extra data is used to compute indices accross ranks - GLOBALLEXI = 3 // global lexicographic indexing of the mesh means that the mesh is ordered - // lexicographically accorss and subsequently split among ranks. - }; + GLOBALLEXI = 3 // global lexicographic indexing of the mesh means that the mesh is ordered + // lexicographically accorss and subsequently split among ranks. +}; -} +} // namespace MueLu -#endif //ifndef MUELU_TYPES_HPP +#endif //ifndef MUELU_TYPES_HPP diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp index 9bc47bfe9b36..63fa18d9a1df 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp @@ -3,124 +3,124 @@ #include #ifdef MUELU_AGGREGATES_SHORT -using Aggregates [[maybe_unused]] = MueLu::Aggregates; +using Aggregates [[maybe_unused]] = MueLu::Aggregates; #endif #ifdef MUELU_AGGREGATIONPHASE1ALGORITHM_SHORT -using AggregationPhase1Algorithm [[maybe_unused]] = MueLu::AggregationPhase1Algorithm; +using AggregationPhase1Algorithm [[maybe_unused]] = MueLu::AggregationPhase1Algorithm; #endif #ifdef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_SHORT -using AggregationPhase1Algorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase1Algorithm_kokkos; +using AggregationPhase1Algorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase1Algorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONPHASE2AALGORITHM_SHORT -using AggregationPhase2aAlgorithm [[maybe_unused]] = MueLu::AggregationPhase2aAlgorithm; +using AggregationPhase2aAlgorithm [[maybe_unused]] = MueLu::AggregationPhase2aAlgorithm; #endif #ifdef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_SHORT -using AggregationPhase2aAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase2aAlgorithm_kokkos; +using AggregationPhase2aAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase2aAlgorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONPHASE2BALGORITHM_SHORT -using AggregationPhase2bAlgorithm [[maybe_unused]] = MueLu::AggregationPhase2bAlgorithm; +using AggregationPhase2bAlgorithm [[maybe_unused]] = MueLu::AggregationPhase2bAlgorithm; #endif #ifdef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_SHORT -using AggregationPhase2bAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase2bAlgorithm_kokkos; +using AggregationPhase2bAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase2bAlgorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONPHASE3ALGORITHM_SHORT -using AggregationPhase3Algorithm [[maybe_unused]] = MueLu::AggregationPhase3Algorithm; +using AggregationPhase3Algorithm [[maybe_unused]] = MueLu::AggregationPhase3Algorithm; #endif #ifdef MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_SHORT -using AggregationPhase3Algorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase3Algorithm_kokkos; +using AggregationPhase3Algorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase3Algorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_SHORT -using AggregationStructuredAlgorithm [[maybe_unused]] = MueLu::AggregationStructuredAlgorithm; +using AggregationStructuredAlgorithm [[maybe_unused]] = MueLu::AggregationStructuredAlgorithm; #endif #ifdef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_SHORT -using AggregationStructuredAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationStructuredAlgorithm_kokkos; +using AggregationStructuredAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationStructuredAlgorithm_kokkos; #endif #ifdef MUELU_AMALGAMATIONINFO_SHORT -using AmalgamationInfo [[maybe_unused]] = MueLu::AmalgamationInfo; +using AmalgamationInfo [[maybe_unused]] = MueLu::AmalgamationInfo; #endif #ifdef MUELU_GLOBALLEXICOGRAPHICINDEXMANAGER_SHORT -using GlobalLexicographicIndexManager [[maybe_unused]] = MueLu::GlobalLexicographicIndexManager; +using GlobalLexicographicIndexManager [[maybe_unused]] = MueLu::GlobalLexicographicIndexManager; #endif #ifdef MUELU_GRAPH_SHORT -using Graph [[maybe_unused]] = MueLu::Graph; +using Graph [[maybe_unused]] = MueLu::Graph; #endif #ifdef MUELU_GRAPHBASE_SHORT -using GraphBase [[maybe_unused]] = MueLu::GraphBase; +using GraphBase [[maybe_unused]] = MueLu::GraphBase; #endif #ifdef MUELU_HYBRIDAGGREGATIONFACTORY_SHORT -using HybridAggregationFactory [[maybe_unused]] = MueLu::HybridAggregationFactory; +using HybridAggregationFactory [[maybe_unused]] = MueLu::HybridAggregationFactory; #endif #ifdef MUELU_INDEXMANAGER_SHORT -using IndexManager [[maybe_unused]] = MueLu::IndexManager; +using IndexManager [[maybe_unused]] = MueLu::IndexManager; #endif #ifdef MUELU_INDEXMANAGER_KOKKOS_SHORT -using IndexManager_kokkos [[maybe_unused]] = MueLu::IndexManager_kokkos; +using IndexManager_kokkos [[maybe_unused]] = MueLu::IndexManager_kokkos; #endif #ifdef MUELU_INTERFACEAGGREGATIONALGORITHM_SHORT -using InterfaceAggregationAlgorithm [[maybe_unused]] = MueLu::InterfaceAggregationAlgorithm; +using InterfaceAggregationAlgorithm [[maybe_unused]] = MueLu::InterfaceAggregationAlgorithm; #endif #ifdef MUELU_INTERFACEMAPPINGTRANSFERFACTORY_SHORT -using InterfaceMappingTransferFactory [[maybe_unused]] = MueLu::InterfaceMappingTransferFactory; +using InterfaceMappingTransferFactory [[maybe_unused]] = MueLu::InterfaceMappingTransferFactory; #endif #ifdef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_SHORT -using IsolatedNodeAggregationAlgorithm [[maybe_unused]] = MueLu::IsolatedNodeAggregationAlgorithm; +using IsolatedNodeAggregationAlgorithm [[maybe_unused]] = MueLu::IsolatedNodeAggregationAlgorithm; #endif #ifdef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_SHORT -using IsolatedNodeAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::IsolatedNodeAggregationAlgorithm_kokkos; +using IsolatedNodeAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::IsolatedNodeAggregationAlgorithm_kokkos; #endif #ifdef MUELU_ISORROPIAINTERFACE_SHORT -using IsorropiaInterface [[maybe_unused]] = MueLu::IsorropiaInterface; +using IsorropiaInterface [[maybe_unused]] = MueLu::IsorropiaInterface; #endif #ifdef MUELU_LWGRAPH_SHORT -using LWGraph [[maybe_unused]] = MueLu::LWGraph; +using LWGraph [[maybe_unused]] = MueLu::LWGraph; #endif #ifdef MUELU_LWGRAPH_KOKKOS_SHORT -using LWGraph_kokkos [[maybe_unused]] = MueLu::LWGraph_kokkos; +using LWGraph_kokkos [[maybe_unused]] = MueLu::LWGraph_kokkos; #endif #ifdef MUELU_LOCALLWGRAPH_KOKKOS_SHORT -using LocalLWGraph_kokkos [[maybe_unused]] = MueLu::LocalLWGraph_kokkos; +using LocalLWGraph_kokkos [[maybe_unused]] = MueLu::LocalLWGraph_kokkos; #endif #ifdef MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_SHORT -using LocalLexicographicIndexManager [[maybe_unused]] = MueLu::LocalLexicographicIndexManager; +using LocalLexicographicIndexManager [[maybe_unused]] = MueLu::LocalLexicographicIndexManager; #endif #ifdef MUELU_LOCALORDINALTRANSFERFACTORY_SHORT -using LocalOrdinalTransferFactory [[maybe_unused]] = MueLu::LocalOrdinalTransferFactory; +using LocalOrdinalTransferFactory [[maybe_unused]] = MueLu::LocalOrdinalTransferFactory; #endif #ifdef MUELU_ONEPTAGGREGATIONALGORITHM_SHORT -using OnePtAggregationAlgorithm [[maybe_unused]] = MueLu::OnePtAggregationAlgorithm; +using OnePtAggregationAlgorithm [[maybe_unused]] = MueLu::OnePtAggregationAlgorithm; #endif #ifdef MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_SHORT -using OnePtAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::OnePtAggregationAlgorithm_kokkos; +using OnePtAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::OnePtAggregationAlgorithm_kokkos; #endif #ifdef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_SHORT -using PreserveDirichletAggregationAlgorithm [[maybe_unused]] = MueLu::PreserveDirichletAggregationAlgorithm; +using PreserveDirichletAggregationAlgorithm [[maybe_unused]] = MueLu::PreserveDirichletAggregationAlgorithm; #endif #ifdef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_SHORT -using PreserveDirichletAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::PreserveDirichletAggregationAlgorithm_kokkos; +using PreserveDirichletAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::PreserveDirichletAggregationAlgorithm_kokkos; #endif #ifdef MUELU_PRFACTORY_SHORT -using PRFactory [[maybe_unused]] = MueLu::PRFactory; +using PRFactory [[maybe_unused]] = MueLu::PRFactory; #endif #ifdef MUELU_REBALANCEMAPFACTORY_SHORT -using RebalanceMapFactory [[maybe_unused]] = MueLu::RebalanceMapFactory; +using RebalanceMapFactory [[maybe_unused]] = MueLu::RebalanceMapFactory; #endif #ifdef MUELU_REPARTITIONINTERFACE_SHORT -using RepartitionInterface [[maybe_unused]] = MueLu::RepartitionInterface; +using RepartitionInterface [[maybe_unused]] = MueLu::RepartitionInterface; #endif #ifdef MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_SHORT -using StructuredAggregationFactory_kokkos [[maybe_unused]] = MueLu::StructuredAggregationFactory_kokkos; +using StructuredAggregationFactory_kokkos [[maybe_unused]] = MueLu::StructuredAggregationFactory_kokkos; #endif #ifdef MUELU_UNCOUPLEDAGGREGATIONFACTORY_SHORT -using UncoupledAggregationFactory [[maybe_unused]] = MueLu::UncoupledAggregationFactory; +using UncoupledAggregationFactory [[maybe_unused]] = MueLu::UncoupledAggregationFactory; #endif #ifdef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_SHORT -using UncoupledAggregationFactory_kokkos [[maybe_unused]] = MueLu::UncoupledAggregationFactory_kokkos; +using UncoupledAggregationFactory_kokkos [[maybe_unused]] = MueLu::UncoupledAggregationFactory_kokkos; #endif #ifdef MUELU_UNCOUPLEDINDEXMANAGER_SHORT -using UncoupledIndexManager [[maybe_unused]] = MueLu::UncoupledIndexManager; +using UncoupledIndexManager [[maybe_unused]] = MueLu::UncoupledIndexManager; #endif #ifdef MUELU_USERAGGREGATIONFACTORY_SHORT -using UserAggregationFactory [[maybe_unused]] = MueLu::UserAggregationFactory; +using UserAggregationFactory [[maybe_unused]] = MueLu::UserAggregationFactory; #endif #ifdef MUELU_FACTORY_SHORT using Factory [[maybe_unused]] = MueLu::Factory; diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp index faeb451b7026..a3cbd53dbb67 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp @@ -3,449 +3,449 @@ #include #ifdef MUELU_AGGREGATIONEXPORTFACTORY_SHORT -using AggregationExportFactory [[maybe_unused]] = MueLu::AggregationExportFactory; +using AggregationExportFactory [[maybe_unused]] = MueLu::AggregationExportFactory; #endif #ifdef MUELU_AGGREGATEQUALITYESTIMATEFACTORY_SHORT -using AggregateQualityEstimateFactory [[maybe_unused]] = MueLu::AggregateQualityEstimateFactory; +using AggregateQualityEstimateFactory [[maybe_unused]] = MueLu::AggregateQualityEstimateFactory; #endif #ifdef MUELU_AMALGAMATIONFACTORY_SHORT -using AmalgamationFactory [[maybe_unused]] = MueLu::AmalgamationFactory; +using AmalgamationFactory [[maybe_unused]] = MueLu::AmalgamationFactory; #endif #ifdef MUELU_AMESOS2SMOOTHER_SHORT -using Amesos2Smoother [[maybe_unused]] = MueLu::Amesos2Smoother; +using Amesos2Smoother [[maybe_unused]] = MueLu::Amesos2Smoother; #endif #ifdef MUELU_AMGXOPERATOR_SHORT -using AMGXOperator [[maybe_unused]] = MueLu::AMGXOperator; +using AMGXOperator [[maybe_unused]] = MueLu::AMGXOperator; #endif #ifdef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_SHORT -using AlgebraicPermutationStrategy [[maybe_unused]] = MueLu::AlgebraicPermutationStrategy; +using AlgebraicPermutationStrategy [[maybe_unused]] = MueLu::AlgebraicPermutationStrategy; #endif #ifdef MUELU_BELOSSMOOTHER_SHORT -using BelosSmoother [[maybe_unused]] = MueLu::BelosSmoother; +using BelosSmoother [[maybe_unused]] = MueLu::BelosSmoother; #endif #ifdef MUELU_BLACKBOXPFACTORY_SHORT -using BlackBoxPFactory [[maybe_unused]] = MueLu::BlackBoxPFactory; +using BlackBoxPFactory [[maybe_unused]] = MueLu::BlackBoxPFactory; #endif #ifdef MUELU_BLOCKEDCOARSEMAPFACTORY_SHORT -using BlockedCoarseMapFactory [[maybe_unused]] = MueLu::BlockedCoarseMapFactory; +using BlockedCoarseMapFactory [[maybe_unused]] = MueLu::BlockedCoarseMapFactory; #endif #ifdef MUELU_BLOCKEDCOORDINATESTRANSFERFACTORY_SHORT -using BlockedCoordinatesTransferFactory [[maybe_unused]] = MueLu::BlockedCoordinatesTransferFactory; +using BlockedCoordinatesTransferFactory [[maybe_unused]] = MueLu::BlockedCoordinatesTransferFactory; #endif #ifdef MUELU_BLOCKEDDIRECTSOLVER_SHORT -using BlockedDirectSolver [[maybe_unused]] = MueLu::BlockedDirectSolver; +using BlockedDirectSolver [[maybe_unused]] = MueLu::BlockedDirectSolver; #endif #ifdef MUELU_BLOCKEDGAUSSSEIDELSMOOTHER_SHORT -using BlockedGaussSeidelSmoother [[maybe_unused]] = MueLu::BlockedGaussSeidelSmoother; +using BlockedGaussSeidelSmoother [[maybe_unused]] = MueLu::BlockedGaussSeidelSmoother; #endif #ifdef MUELU_BLOCKEDJACOBISMOOTHER_SHORT -using BlockedJacobiSmoother [[maybe_unused]] = MueLu::BlockedJacobiSmoother; +using BlockedJacobiSmoother [[maybe_unused]] = MueLu::BlockedJacobiSmoother; #endif #ifdef MUELU_BLOCKEDPFACTORY_SHORT -using BlockedPFactory [[maybe_unused]] = MueLu::BlockedPFactory; +using BlockedPFactory [[maybe_unused]] = MueLu::BlockedPFactory; #endif #ifdef MUELU_BLOCKEDRAPFACTORY_SHORT -using BlockedRAPFactory [[maybe_unused]] = MueLu::BlockedRAPFactory; +using BlockedRAPFactory [[maybe_unused]] = MueLu::BlockedRAPFactory; #endif #ifdef MUELU_BRICKAGGREGATIONFACTORY_SHORT -using BrickAggregationFactory [[maybe_unused]] = MueLu::BrickAggregationFactory; +using BrickAggregationFactory [[maybe_unused]] = MueLu::BrickAggregationFactory; #endif #ifdef MUELU_BRAESSSARAZINSMOOTHER_SHORT -using BraessSarazinSmoother [[maybe_unused]] = MueLu::BraessSarazinSmoother; +using BraessSarazinSmoother [[maybe_unused]] = MueLu::BraessSarazinSmoother; #endif #ifdef MUELU_CGSOLVER_SHORT -using CGSolver [[maybe_unused]] = MueLu::CGSolver; +using CGSolver [[maybe_unused]] = MueLu::CGSolver; #endif #ifdef MUELU_CLASSICALMAPFACTORY_SHORT -using ClassicalMapFactory [[maybe_unused]] = MueLu::ClassicalMapFactory; +using ClassicalMapFactory [[maybe_unused]] = MueLu::ClassicalMapFactory; #endif #ifdef MUELU_CLASSICALPFACTORY_SHORT -using ClassicalPFactory [[maybe_unused]] = MueLu::ClassicalPFactory; +using ClassicalPFactory [[maybe_unused]] = MueLu::ClassicalPFactory; #endif #ifdef MUELU_CLONEREPARTITIONINTERFACE_SHORT -using CloneRepartitionInterface [[maybe_unused]] = MueLu::CloneRepartitionInterface; +using CloneRepartitionInterface [[maybe_unused]] = MueLu::CloneRepartitionInterface; #endif #ifdef MUELU_COALESCEDROPFACTORY_SHORT -using CoalesceDropFactory [[maybe_unused]] = MueLu::CoalesceDropFactory; +using CoalesceDropFactory [[maybe_unused]] = MueLu::CoalesceDropFactory; #endif #ifdef MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT -using CoalesceDropFactory_kokkos [[maybe_unused]] = MueLu::CoalesceDropFactory_kokkos; +using CoalesceDropFactory_kokkos [[maybe_unused]] = MueLu::CoalesceDropFactory_kokkos; #endif #ifdef MUELU_COARSEMAPFACTORY_SHORT -using CoarseMapFactory [[maybe_unused]] = MueLu::CoarseMapFactory; +using CoarseMapFactory [[maybe_unused]] = MueLu::CoarseMapFactory; #endif #ifdef MUELU_COARSENINGVISUALIZATIONFACTORY_SHORT -using CoarseningVisualizationFactory [[maybe_unused]] = MueLu::CoarseningVisualizationFactory; +using CoarseningVisualizationFactory [[maybe_unused]] = MueLu::CoarseningVisualizationFactory; #endif #ifdef MUELU_CONSTRAINT_SHORT -using Constraint [[maybe_unused]] = MueLu::Constraint; +using Constraint [[maybe_unused]] = MueLu::Constraint; #endif #ifdef MUELU_CONSTRAINTFACTORY_SHORT -using ConstraintFactory [[maybe_unused]] = MueLu::ConstraintFactory; +using ConstraintFactory [[maybe_unused]] = MueLu::ConstraintFactory; #endif #ifdef MUELU_COORDINATESTRANSFERFACTORY_SHORT -using CoordinatesTransferFactory [[maybe_unused]] = MueLu::CoordinatesTransferFactory; +using CoordinatesTransferFactory [[maybe_unused]] = MueLu::CoordinatesTransferFactory; #endif #ifdef MUELU_COUPLEDRBMFACTORY_SHORT -using CoupledRBMFactory [[maybe_unused]] = MueLu::CoupledRBMFactory; +using CoupledRBMFactory [[maybe_unused]] = MueLu::CoupledRBMFactory; #endif #ifdef MUELU_DEMOFACTORY_SHORT -using DemoFactory [[maybe_unused]] = MueLu::DemoFactory; +using DemoFactory [[maybe_unused]] = MueLu::DemoFactory; #endif #ifdef MUELU_DIRECTSOLVER_SHORT -using DirectSolver [[maybe_unused]] = MueLu::DirectSolver; +using DirectSolver [[maybe_unused]] = MueLu::DirectSolver; #endif #ifdef MUELU_DROPNEGATIVEENTRIESFACTORY_SHORT -using DropNegativeEntriesFactory [[maybe_unused]] = MueLu::DropNegativeEntriesFactory; +using DropNegativeEntriesFactory [[maybe_unused]] = MueLu::DropNegativeEntriesFactory; #endif #ifdef MUELU_EMINPFACTORY_SHORT -using EminPFactory [[maybe_unused]] = MueLu::EminPFactory; +using EminPFactory [[maybe_unused]] = MueLu::EminPFactory; #endif #ifdef MUELU_FACADECLASSFACTORY_SHORT -using FacadeClassFactory [[maybe_unused]] = MueLu::FacadeClassFactory; +using FacadeClassFactory [[maybe_unused]] = MueLu::FacadeClassFactory; #endif #ifdef MUELU_FACTORYMANAGER_SHORT -using FactoryManager [[maybe_unused]] = MueLu::FactoryManager; +using FactoryManager [[maybe_unused]] = MueLu::FactoryManager; #endif #ifdef MUELU_FAKESMOOTHERPROTOTYPE_SHORT -using FakeSmootherPrototype [[maybe_unused]] = MueLu::FakeSmootherPrototype; +using FakeSmootherPrototype [[maybe_unused]] = MueLu::FakeSmootherPrototype; #endif #ifdef MUELU_FILTEREDAFACTORY_SHORT -using FilteredAFactory [[maybe_unused]] = MueLu::FilteredAFactory; +using FilteredAFactory [[maybe_unused]] = MueLu::FilteredAFactory; #endif #ifdef MUELU_FINELEVELINPUTDATAFACTORY_SHORT -using FineLevelInputDataFactory [[maybe_unused]] = MueLu::FineLevelInputDataFactory; +using FineLevelInputDataFactory [[maybe_unused]] = MueLu::FineLevelInputDataFactory; #endif #ifdef MUELU_GENERALGEOMETRICPFACTORY_SHORT -using GeneralGeometricPFactory [[maybe_unused]] = MueLu::GeneralGeometricPFactory; +using GeneralGeometricPFactory [[maybe_unused]] = MueLu::GeneralGeometricPFactory; #endif #ifdef MUELU_GENERICRFACTORY_SHORT -using GenericRFactory [[maybe_unused]] = MueLu::GenericRFactory; +using GenericRFactory [[maybe_unused]] = MueLu::GenericRFactory; #endif #ifdef MUELU_GEOMETRICINTERPOLATIONPFACTORY_SHORT -using GeometricInterpolationPFactory [[maybe_unused]] = MueLu::GeometricInterpolationPFactory; +using GeometricInterpolationPFactory [[maybe_unused]] = MueLu::GeometricInterpolationPFactory; #endif #ifdef MUELU_GEOMETRICINTERPOLATIONPFACTORY_KOKKOS_SHORT -using GeometricInterpolationPFactory_kokkos [[maybe_unused]] = MueLu::GeometricInterpolationPFactory_kokkos; +using GeometricInterpolationPFactory_kokkos [[maybe_unused]] = MueLu::GeometricInterpolationPFactory_kokkos; #endif #ifdef MUELU_GMRESSOLVER_SHORT -using GMRESSolver [[maybe_unused]] = MueLu::GMRESSolver; +using GMRESSolver [[maybe_unused]] = MueLu::GMRESSolver; #endif #ifdef MUELU_HIERARCHY_SHORT -using Hierarchy [[maybe_unused]] = MueLu::Hierarchy; +using Hierarchy [[maybe_unused]] = MueLu::Hierarchy; #endif #ifdef MUELU_HIERARCHYMANAGER_SHORT -using HierarchyManager [[maybe_unused]] = MueLu::HierarchyManager; +using HierarchyManager [[maybe_unused]] = MueLu::HierarchyManager; #endif #ifdef MUELU_HIERARCHYFACTORY_SHORT -using HierarchyFactory [[maybe_unused]] = MueLu::HierarchyFactory; +using HierarchyFactory [[maybe_unused]] = MueLu::HierarchyFactory; #endif #ifdef MUELU_HIERARCHYUTILS_SHORT -using HierarchyUtils [[maybe_unused]] = MueLu::HierarchyUtils; +using HierarchyUtils [[maybe_unused]] = MueLu::HierarchyUtils; #endif #ifdef MUELU_INTERFACEAGGREGATIONFACTORY_SHORT -using InterfaceAggregationFactory [[maybe_unused]] = MueLu::InterfaceAggregationFactory; +using InterfaceAggregationFactory [[maybe_unused]] = MueLu::InterfaceAggregationFactory; #endif #ifdef MUELU_IFPACK2SMOOTHER_SHORT -using Ifpack2Smoother [[maybe_unused]] = MueLu::Ifpack2Smoother; +using Ifpack2Smoother [[maybe_unused]] = MueLu::Ifpack2Smoother; #endif #ifdef MUELU_INDEFBLOCKEDDIAGONALSMOOTHER_SHORT -using IndefBlockedDiagonalSmoother [[maybe_unused]] = MueLu::IndefBlockedDiagonalSmoother; +using IndefBlockedDiagonalSmoother [[maybe_unused]] = MueLu::IndefBlockedDiagonalSmoother; #endif #ifdef MUELU_INITIALBLOCKNUMBERFACTORY_SHORT -using InitialBlockNumberFactory [[maybe_unused]] = MueLu::InitialBlockNumberFactory; +using InitialBlockNumberFactory [[maybe_unused]] = MueLu::InitialBlockNumberFactory; #endif #ifdef MUELU_INTREPIDPCOARSENFACTORY_SHORT -using IntrepidPCoarsenFactory [[maybe_unused]] = MueLu::IntrepidPCoarsenFactory; +using IntrepidPCoarsenFactory [[maybe_unused]] = MueLu::IntrepidPCoarsenFactory; #endif #ifdef MUELU_INVERSEAPPROXIMATIONFACTORY_SHORT -using InverseApproximationFactory [[maybe_unused]] = MueLu::InverseApproximationFactory; +using InverseApproximationFactory [[maybe_unused]] = MueLu::InverseApproximationFactory; #endif #ifdef MUELU_LINEDETECTIONFACTORY_SHORT -using LineDetectionFactory [[maybe_unused]] = MueLu::LineDetectionFactory; +using LineDetectionFactory [[maybe_unused]] = MueLu::LineDetectionFactory; #endif #ifdef MUELU_LOCALPERMUTATIONSTRATEGY_SHORT -using LocalPermutationStrategy [[maybe_unused]] = MueLu::LocalPermutationStrategy; +using LocalPermutationStrategy [[maybe_unused]] = MueLu::LocalPermutationStrategy; #endif #ifdef MUELU_LOWPRECISIONFACTORY_SHORT -using LowPrecisionFactory [[maybe_unused]] = MueLu::LowPrecisionFactory; +using LowPrecisionFactory [[maybe_unused]] = MueLu::LowPrecisionFactory; #endif #ifdef MUELU_MAPTRANSFERFACTORY_SHORT -using MapTransferFactory [[maybe_unused]] = MueLu::MapTransferFactory; +using MapTransferFactory [[maybe_unused]] = MueLu::MapTransferFactory; #endif #ifdef MUELU_MATRIXANALYSISFACTORY_SHORT -using MatrixAnalysisFactory [[maybe_unused]] = MueLu::MatrixAnalysisFactory; +using MatrixAnalysisFactory [[maybe_unused]] = MueLu::MatrixAnalysisFactory; #endif #ifdef MUELU_MERGEDBLOCKEDMATRIXFACTORY_SHORT -using MergedBlockedMatrixFactory [[maybe_unused]] = MueLu::MergedBlockedMatrixFactory; +using MergedBlockedMatrixFactory [[maybe_unused]] = MueLu::MergedBlockedMatrixFactory; #endif #ifdef MUELU_MERGEDSMOOTHER_SHORT -using MergedSmoother [[maybe_unused]] = MueLu::MergedSmoother; +using MergedSmoother [[maybe_unused]] = MueLu::MergedSmoother; #endif #ifdef MUELU_MULTIVECTORTRANSFERFACTORY_SHORT -using MultiVectorTransferFactory [[maybe_unused]] = MueLu::MultiVectorTransferFactory; +using MultiVectorTransferFactory [[maybe_unused]] = MueLu::MultiVectorTransferFactory; #endif #ifdef MUELU_NOTAYAGGREGATIONFACTORY_SHORT -using NotayAggregationFactory [[maybe_unused]] = MueLu::NotayAggregationFactory; +using NotayAggregationFactory [[maybe_unused]] = MueLu::NotayAggregationFactory; #endif #ifdef MUELU_NULLSPACEFACTORY_SHORT -using NullspaceFactory [[maybe_unused]] = MueLu::NullspaceFactory; +using NullspaceFactory [[maybe_unused]] = MueLu::NullspaceFactory; #endif #ifdef MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -using NullspaceFactory_kokkos [[maybe_unused]] = MueLu::NullspaceFactory_kokkos; +using NullspaceFactory_kokkos [[maybe_unused]] = MueLu::NullspaceFactory_kokkos; #endif #ifdef MUELU_NULLSPACEPRESMOOTHFACTORY_SHORT -using NullspacePresmoothFactory [[maybe_unused]] = MueLu::NullspacePresmoothFactory; +using NullspacePresmoothFactory [[maybe_unused]] = MueLu::NullspacePresmoothFactory; #endif #ifdef MUELU_PATTERNFACTORY_SHORT -using PatternFactory [[maybe_unused]] = MueLu::PatternFactory; +using PatternFactory [[maybe_unused]] = MueLu::PatternFactory; #endif #ifdef MUELU_PERFUTILS_SHORT -using PerfUtils [[maybe_unused]] = MueLu::PerfUtils; +using PerfUtils [[maybe_unused]] = MueLu::PerfUtils; #endif #ifdef MUELU_PERFMODELS_SHORT -using PerfModels [[maybe_unused]] = MueLu::PerfModels; +using PerfModels [[maybe_unused]] = MueLu::PerfModels; #endif #ifdef MUELU_PERMUTATIONFACTORY_SHORT -using PermutationFactory [[maybe_unused]] = MueLu::PermutationFactory; +using PermutationFactory [[maybe_unused]] = MueLu::PermutationFactory; #endif #ifdef MUELU_PERMUTINGSMOOTHER_SHORT -using PermutingSmoother [[maybe_unused]] = MueLu::PermutingSmoother; +using PermutingSmoother [[maybe_unused]] = MueLu::PermutingSmoother; #endif #ifdef MUELU_PGPFACTORY_SHORT -using PgPFactory [[maybe_unused]] = MueLu::PgPFactory; +using PgPFactory [[maybe_unused]] = MueLu::PgPFactory; #endif #ifdef MUELU_PREDROPFUNCTIONBASECLASS_SHORT -using PreDropFunctionBaseClass [[maybe_unused]] = MueLu::PreDropFunctionBaseClass; +using PreDropFunctionBaseClass [[maybe_unused]] = MueLu::PreDropFunctionBaseClass; #endif #ifdef MUELU_PREDROPFUNCTIONCONSTVAL_SHORT -using PreDropFunctionConstVal [[maybe_unused]] = MueLu::PreDropFunctionConstVal; +using PreDropFunctionConstVal [[maybe_unused]] = MueLu::PreDropFunctionConstVal; #endif #ifdef MUELU_PROJECTORSMOOTHER_SHORT -using ProjectorSmoother [[maybe_unused]] = MueLu::ProjectorSmoother; +using ProjectorSmoother [[maybe_unused]] = MueLu::ProjectorSmoother; #endif #ifdef MUELU_RAPFACTORY_SHORT -using RAPFactory [[maybe_unused]] = MueLu::RAPFactory; +using RAPFactory [[maybe_unused]] = MueLu::RAPFactory; #endif #ifdef MUELU_RAPSHIFTFACTORY_SHORT -using RAPShiftFactory [[maybe_unused]] = MueLu::RAPShiftFactory; +using RAPShiftFactory [[maybe_unused]] = MueLu::RAPShiftFactory; #endif #ifdef MUELU_REBALANCEACFACTORY_SHORT -using RebalanceAcFactory [[maybe_unused]] = MueLu::RebalanceAcFactory; +using RebalanceAcFactory [[maybe_unused]] = MueLu::RebalanceAcFactory; #endif #ifdef MUELU_REBALANCEBLOCKACFACTORY_SHORT -using RebalanceBlockAcFactory [[maybe_unused]] = MueLu::RebalanceBlockAcFactory; +using RebalanceBlockAcFactory [[maybe_unused]] = MueLu::RebalanceBlockAcFactory; #endif #ifdef MUELU_REBALANCEBLOCKINTERPOLATIONFACTORY_SHORT -using RebalanceBlockInterpolationFactory [[maybe_unused]] = MueLu::RebalanceBlockInterpolationFactory; +using RebalanceBlockInterpolationFactory [[maybe_unused]] = MueLu::RebalanceBlockInterpolationFactory; #endif #ifdef MUELU_REBALANCEBLOCKRESTRICTIONFACTORY_SHORT -using RebalanceBlockRestrictionFactory [[maybe_unused]] = MueLu::RebalanceBlockRestrictionFactory; +using RebalanceBlockRestrictionFactory [[maybe_unused]] = MueLu::RebalanceBlockRestrictionFactory; #endif #ifdef MUELU_REBALANCETRANSFERFACTORY_SHORT -using RebalanceTransferFactory [[maybe_unused]] = MueLu::RebalanceTransferFactory; +using RebalanceTransferFactory [[maybe_unused]] = MueLu::RebalanceTransferFactory; #endif #ifdef MUELU_REFMAXWELLSMOOTHER_SHORT -using RefMaxwellSmoother [[maybe_unused]] = MueLu::RefMaxwellSmoother; +using RefMaxwellSmoother [[maybe_unused]] = MueLu::RefMaxwellSmoother; #endif #ifdef MUELU_REGIONRFACTORY_SHORT -using RegionRFactory [[maybe_unused]] = MueLu::RegionRFactory; +using RegionRFactory [[maybe_unused]] = MueLu::RegionRFactory; #endif #ifdef MUELU_REGIONRFACTORY_KOKKOS_SHORT -using RegionRFactory_kokkos [[maybe_unused]] = MueLu::RegionRFactory_kokkos; +using RegionRFactory_kokkos [[maybe_unused]] = MueLu::RegionRFactory_kokkos; #endif #ifdef MUELU_REITZINGERPFACTORY_SHORT -using ReitzingerPFactory [[maybe_unused]] = MueLu::ReitzingerPFactory; +using ReitzingerPFactory [[maybe_unused]] = MueLu::ReitzingerPFactory; #endif #ifdef MUELU_REORDERBLOCKAFACTORY_SHORT -using ReorderBlockAFactory [[maybe_unused]] = MueLu::ReorderBlockAFactory; +using ReorderBlockAFactory [[maybe_unused]] = MueLu::ReorderBlockAFactory; #endif #ifdef MUELU_REPARTITIONFACTORY_SHORT -using RepartitionFactory [[maybe_unused]] = MueLu::RepartitionFactory; +using RepartitionFactory [[maybe_unused]] = MueLu::RepartitionFactory; #endif #ifdef MUELU_REPARTITIONBLOCKDIAGONALFACTORY_SHORT -using RepartitionBlockDiagonalFactory [[maybe_unused]] = MueLu::RepartitionBlockDiagonalFactory; +using RepartitionBlockDiagonalFactory [[maybe_unused]] = MueLu::RepartitionBlockDiagonalFactory; #endif #ifdef MUELU_REPARTITIONHEURISTICFACTORY_SHORT -using RepartitionHeuristicFactory [[maybe_unused]] = MueLu::RepartitionHeuristicFactory; +using RepartitionHeuristicFactory [[maybe_unused]] = MueLu::RepartitionHeuristicFactory; #endif #ifdef MUELU_COMBINEPFACTORY_SHORT -using CombinePFactory [[maybe_unused]] = MueLu::CombinePFactory; +using CombinePFactory [[maybe_unused]] = MueLu::CombinePFactory; #endif #ifdef MUELU_REPLICATEPFACTORY_SHORT -using ReplicatePFactory [[maybe_unused]] = MueLu::ReplicatePFactory; +using ReplicatePFactory [[maybe_unused]] = MueLu::ReplicatePFactory; #endif #ifdef MUELU_RIGIDBODYMODEFACTORY_SHORT -using RigidBodyModeFactory [[maybe_unused]] = MueLu::RigidBodyModeFactory; +using RigidBodyModeFactory [[maybe_unused]] = MueLu::RigidBodyModeFactory; #endif #ifdef MUELU_SAPFACTORY_SHORT -using SaPFactory [[maybe_unused]] = MueLu::SaPFactory; +using SaPFactory [[maybe_unused]] = MueLu::SaPFactory; #endif #ifdef MUELU_SAPFACTORY_KOKKOS_SHORT -using SaPFactory_kokkos [[maybe_unused]] = MueLu::SaPFactory_kokkos; +using SaPFactory_kokkos [[maybe_unused]] = MueLu::SaPFactory_kokkos; #endif #ifdef MUELU_SCALEDNULLSPACEFACTORY_SHORT -using ScaledNullspaceFactory [[maybe_unused]] = MueLu::ScaledNullspaceFactory; +using ScaledNullspaceFactory [[maybe_unused]] = MueLu::ScaledNullspaceFactory; #endif #ifdef MUELU_SCHURCOMPLEMENTFACTORY_SHORT -using SchurComplementFactory [[maybe_unused]] = MueLu::SchurComplementFactory; +using SchurComplementFactory [[maybe_unused]] = MueLu::SchurComplementFactory; #endif #ifdef MUELU_SEGREGATEDAFACTORY_SHORT -using SegregatedAFactory [[maybe_unused]] = MueLu::SegregatedAFactory; +using SegregatedAFactory [[maybe_unused]] = MueLu::SegregatedAFactory; #endif #ifdef MUELU_SHIFTEDLAPLACIAN_SHORT -using ShiftedLaplacian [[maybe_unused]] = MueLu::ShiftedLaplacian; +using ShiftedLaplacian [[maybe_unused]] = MueLu::ShiftedLaplacian; #endif #ifdef MUELU_SHIFTEDLAPLACIANOPERATOR_SHORT -using ShiftedLaplacianOperator [[maybe_unused]] = MueLu::ShiftedLaplacianOperator; +using ShiftedLaplacianOperator [[maybe_unused]] = MueLu::ShiftedLaplacianOperator; #endif #ifdef MUELU_SIMPLESMOOTHER_SHORT -using SimpleSmoother [[maybe_unused]] = MueLu::SimpleSmoother; +using SimpleSmoother [[maybe_unused]] = MueLu::SimpleSmoother; #endif #ifdef MUELU_SMOOTHER_SHORT -using Smoother [[maybe_unused]] = MueLu::Smoother; +using Smoother [[maybe_unused]] = MueLu::Smoother; #endif #ifdef MUELU_SMOOTHERBASE_SHORT -using SmootherBase [[maybe_unused]] = MueLu::SmootherBase; +using SmootherBase [[maybe_unused]] = MueLu::SmootherBase; #endif #ifdef MUELU_SMOOTHERFACTORY_SHORT -using SmootherFactory [[maybe_unused]] = MueLu::SmootherFactory; +using SmootherFactory [[maybe_unused]] = MueLu::SmootherFactory; #endif #ifdef MUELU_SMOOTHERPROTOTYPE_SHORT -using SmootherPrototype [[maybe_unused]] = MueLu::SmootherPrototype; +using SmootherPrototype [[maybe_unused]] = MueLu::SmootherPrototype; #endif #ifdef MUELU_SMOOVECCOALESCEDROPFACTORY_SHORT -using SmooVecCoalesceDropFactory [[maybe_unused]] = MueLu::SmooVecCoalesceDropFactory; +using SmooVecCoalesceDropFactory [[maybe_unused]] = MueLu::SmooVecCoalesceDropFactory; #endif #ifdef MUELU_SOLVERBASE_SHORT -using SolverBase [[maybe_unused]] = MueLu::SolverBase; +using SolverBase [[maybe_unused]] = MueLu::SolverBase; #endif #ifdef MUELU_STEEPESTDESCENTSOLVER_SHORT -using SteepestDescentSolver [[maybe_unused]] = MueLu::SteepestDescentSolver; +using SteepestDescentSolver [[maybe_unused]] = MueLu::SteepestDescentSolver; #endif #ifdef MUELU_STRATIMIKOSSMOOTHER_SHORT -using StratimikosSmoother [[maybe_unused]] = MueLu::StratimikosSmoother; +using StratimikosSmoother [[maybe_unused]] = MueLu::StratimikosSmoother; #endif #ifdef MUELU_STRUCTUREDAGGREGATIONFACTORY_SHORT -using StructuredAggregationFactory [[maybe_unused]] = MueLu::StructuredAggregationFactory; +using StructuredAggregationFactory [[maybe_unused]] = MueLu::StructuredAggregationFactory; #endif #ifdef MUELU_STRUCTUREDLINEDETECTIONFACTORY_SHORT -using StructuredLineDetectionFactory [[maybe_unused]] = MueLu::StructuredLineDetectionFactory; +using StructuredLineDetectionFactory [[maybe_unused]] = MueLu::StructuredLineDetectionFactory; #endif #ifdef MUELU_SUBBLOCKAFACTORY_SHORT -using SubBlockAFactory [[maybe_unused]] = MueLu::SubBlockAFactory; +using SubBlockAFactory [[maybe_unused]] = MueLu::SubBlockAFactory; #endif #ifdef MUELU_TEKOSMOOTHER_SHORT -using TekoSmoother [[maybe_unused]] = MueLu::TekoSmoother; +using TekoSmoother [[maybe_unused]] = MueLu::TekoSmoother; #endif #ifdef MUELU_TENTATIVEPFACTORY_SHORT -using TentativePFactory [[maybe_unused]] = MueLu::TentativePFactory; +using TentativePFactory [[maybe_unused]] = MueLu::TentativePFactory; #endif #ifdef MUELU_TENTATIVEPFACTORY_KOKKOS_SHORT -using TentativePFactory_kokkos [[maybe_unused]] = MueLu::TentativePFactory_kokkos; +using TentativePFactory_kokkos [[maybe_unused]] = MueLu::TentativePFactory_kokkos; #endif #ifdef MUELU_MATRIXFREETENTATIVEP_SHORT -using MatrixFreeTentativeP [[maybe_unused]] = MueLu::MatrixFreeTentativeP; +using MatrixFreeTentativeP [[maybe_unused]] = MueLu::MatrixFreeTentativeP; #endif #ifdef MUELU_MATRIXFREETENTATIVEPFACTORY_SHORT -using MatrixFreeTentativePFactory [[maybe_unused]] = MueLu::MatrixFreeTentativePFactory; +using MatrixFreeTentativePFactory [[maybe_unused]] = MueLu::MatrixFreeTentativePFactory; #endif #ifdef MUELU_THRESHOLDAFILTERFACTORY_SHORT -using ThresholdAFilterFactory [[maybe_unused]] = MueLu::ThresholdAFilterFactory; +using ThresholdAFilterFactory [[maybe_unused]] = MueLu::ThresholdAFilterFactory; #endif #ifdef MUELU_TOGGLECOORDINATESTRANSFERFACTORY_SHORT -using ToggleCoordinatesTransferFactory [[maybe_unused]] = MueLu::ToggleCoordinatesTransferFactory; +using ToggleCoordinatesTransferFactory [[maybe_unused]] = MueLu::ToggleCoordinatesTransferFactory; #endif #ifdef MUELU_TOGGLEPFACTORY_SHORT -using TogglePFactory [[maybe_unused]] = MueLu::TogglePFactory; +using TogglePFactory [[maybe_unused]] = MueLu::TogglePFactory; #endif #ifdef MUELU_TOPRAPFACTORY_SHORT -using TopRAPFactory [[maybe_unused]] = MueLu::TopRAPFactory; +using TopRAPFactory [[maybe_unused]] = MueLu::TopRAPFactory; #endif #ifdef MUELU_TOPSMOOTHERFACTORY_SHORT -using TopSmootherFactory [[maybe_unused]] = MueLu::TopSmootherFactory; +using TopSmootherFactory [[maybe_unused]] = MueLu::TopSmootherFactory; #endif #ifdef MUELU_TPETRAOPERATOR_SHORT -using TpetraOperator [[maybe_unused]] = MueLu::TpetraOperator; +using TpetraOperator [[maybe_unused]] = MueLu::TpetraOperator; #endif #ifdef MUELU_TRANSPFACTORY_SHORT -using TransPFactory [[maybe_unused]] = MueLu::TransPFactory; +using TransPFactory [[maybe_unused]] = MueLu::TransPFactory; #endif #ifdef MUELU_RFROMP_OR_TRANSP_SHORT -using RfromP_Or_TransP [[maybe_unused]] = MueLu::RfromP_Or_TransP; +using RfromP_Or_TransP [[maybe_unused]] = MueLu::RfromP_Or_TransP; #endif #ifdef MUELU_TRILINOSSMOOTHER_SHORT -using TrilinosSmoother [[maybe_unused]] = MueLu::TrilinosSmoother; +using TrilinosSmoother [[maybe_unused]] = MueLu::TrilinosSmoother; #endif #ifdef MUELU_UNSMOOSHFACTORY_SHORT -using UnsmooshFactory [[maybe_unused]] = MueLu::UnsmooshFactory; +using UnsmooshFactory [[maybe_unused]] = MueLu::UnsmooshFactory; #endif #ifdef MUELU_USERPFACTORY_SHORT -using UserPFactory [[maybe_unused]] = MueLu::UserPFactory; +using UserPFactory [[maybe_unused]] = MueLu::UserPFactory; #endif #ifdef MUELU_UTILITIES_SHORT -using Utilities [[maybe_unused]] = MueLu::Utilities; +using Utilities [[maybe_unused]] = MueLu::Utilities; #endif #ifdef MUELU_UTILITIESBASE_SHORT -using UtilitiesBase [[maybe_unused]] = MueLu::UtilitiesBase; +using UtilitiesBase [[maybe_unused]] = MueLu::UtilitiesBase; #endif #ifdef MUELU_VARIABLEDOFLAPLACIANFACTORY_SHORT -using VariableDofLaplacianFactory [[maybe_unused]] = MueLu::VariableDofLaplacianFactory; +using VariableDofLaplacianFactory [[maybe_unused]] = MueLu::VariableDofLaplacianFactory; #endif #ifdef MUELU_SEMICOARSENPFACTORY_SHORT -using SemiCoarsenPFactory [[maybe_unused]] = MueLu::SemiCoarsenPFactory; +using SemiCoarsenPFactory [[maybe_unused]] = MueLu::SemiCoarsenPFactory; #endif #ifdef MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT -using SemiCoarsenPFactory_kokkos [[maybe_unused]] = MueLu::SemiCoarsenPFactory_kokkos; +using SemiCoarsenPFactory_kokkos [[maybe_unused]] = MueLu::SemiCoarsenPFactory_kokkos; #endif #ifdef MUELU_UZAWASMOOTHER_SHORT -using UzawaSmoother [[maybe_unused]] = MueLu::UzawaSmoother; +using UzawaSmoother [[maybe_unused]] = MueLu::UzawaSmoother; #endif #ifdef MUELU_VISUALIZATIONHELPERS_SHORT -using VisualizationHelpers [[maybe_unused]] = MueLu::VisualizationHelpers; +using VisualizationHelpers [[maybe_unused]] = MueLu::VisualizationHelpers; #endif #ifdef MUELU_ZEROSUBBLOCKAFACTORY_SHORT -using ZeroSubBlockAFactory [[maybe_unused]] = MueLu::ZeroSubBlockAFactory; +using ZeroSubBlockAFactory [[maybe_unused]] = MueLu::ZeroSubBlockAFactory; #endif #ifdef MUELU_ZOLTANINTERFACE_SHORT -using ZoltanInterface [[maybe_unused]] = MueLu::ZoltanInterface; +using ZoltanInterface [[maybe_unused]] = MueLu::ZoltanInterface; #endif #ifdef MUELU_ZOLTAN2INTERFACE_SHORT -using Zoltan2Interface [[maybe_unused]] = MueLu::Zoltan2Interface; +using Zoltan2Interface [[maybe_unused]] = MueLu::Zoltan2Interface; #endif #ifdef MUELU_NODEPARTITIONINTERFACE_SHORT -using NodePartitionInterface [[maybe_unused]] = MueLu::NodePartitionInterface; +using NodePartitionInterface [[maybe_unused]] = MueLu::NodePartitionInterface; #endif #ifdef MUELU_XPETRAOPERATOR_SHORT -using XpetraOperator [[maybe_unused]] = MueLu::XpetraOperator; +using XpetraOperator [[maybe_unused]] = MueLu::XpetraOperator; #endif #ifdef MUELU_REFMAXWELL_SHORT -using RefMaxwell [[maybe_unused]] = MueLu::RefMaxwell; +using RefMaxwell [[maybe_unused]] = MueLu::RefMaxwell; #endif #ifdef MUELU_MAXWELL1_SHORT -using Maxwell1 [[maybe_unused]] = MueLu::Maxwell1; +using Maxwell1 [[maybe_unused]] = MueLu::Maxwell1; #endif #ifdef MUELU_MAXWELL_UTILS_SHORT -using Maxwell_Utils [[maybe_unused]] = MueLu::Maxwell_Utils; +using Maxwell_Utils [[maybe_unused]] = MueLu::Maxwell_Utils; #endif #ifdef MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_SHORT -typedef MueLu::AdaptiveSaMLParameterListInterpreter AdaptiveSaMLParameterListInterpreter; +typedef MueLu::AdaptiveSaMLParameterListInterpreter AdaptiveSaMLParameterListInterpreter; #endif #ifdef MUELU_FACTORYFACTORY_SHORT -typedef MueLu::FactoryFactory FactoryFactory; +typedef MueLu::FactoryFactory FactoryFactory; #endif #ifdef MUELU_MLPARAMETERLISTINTERPRETER_SHORT -typedef MueLu::MLParameterListInterpreter MLParameterListInterpreter; +typedef MueLu::MLParameterListInterpreter MLParameterListInterpreter; #endif #ifdef MUELU_PARAMETERLISTINTERPRETER_SHORT -typedef MueLu::ParameterListInterpreter ParameterListInterpreter; +typedef MueLu::ParameterListInterpreter ParameterListInterpreter; #endif #ifdef MUELU_TWOLEVELMATLABFACTORY_SHORT -typedef MueLu::TwoLevelMatlabFactory TwoLevelMatlabFactory; +typedef MueLu::TwoLevelMatlabFactory TwoLevelMatlabFactory; #endif #ifdef MUELU_SINGLELEVELMATLABFACTORY_SHORT -typedef MueLu::SingleLevelMatlabFactory SingleLevelMatlabFactory; +typedef MueLu::SingleLevelMatlabFactory SingleLevelMatlabFactory; #endif #ifdef MUELU_MATLABSMOOTHER_SHORT -typedef MueLu::MatlabSmoother MatlabSmoother; +typedef MueLu::MatlabSmoother MatlabSmoother; #endif diff --git a/packages/muelu/src/Headers/MueLu_Version.hpp b/packages/muelu/src/Headers/MueLu_Version.hpp index 0fbda392c252..42996fc8d57d 100644 --- a/packages/muelu/src/Headers/MueLu_Version.hpp +++ b/packages/muelu/src/Headers/MueLu_Version.hpp @@ -55,10 +55,10 @@ namespace MueLu { - inline std::string const Version() { - return("MueLu development"); - } +inline std::string const Version() { + return ("MueLu development"); +} -} // namespace MueLu +} // namespace MueLu -#endif //ifndef MUELU_VERSION_HPP +#endif //ifndef MUELU_VERSION_HPP diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp index f9d9d8b9ff5b..97fdaf1670b1 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp @@ -51,52 +51,48 @@ namespace MueLu { - template - class FacadeClassBase - : public virtual BaseClass{ +template +class FacadeClassBase + : public virtual BaseClass { #undef MUELU_FACADECLASSBASE_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ + public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeClassBase(); + //! Constructor. + FacadeClassBase(); - //! Destructor. - virtual ~FacadeClassBase() { } + //! Destructor. + virtual ~FacadeClassBase() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClass (abstract member). + /*! @brief Set parameter list for FacadeClass (abstract member). @param[in] paramList: ParameterList containing the MueLu parameters. */ - virtual Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList) = 0; + virtual Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList) = 0; - protected: - - /*! @brief Replace all occurrences of search string "search" by the string in "replace" given the string "subject" + protected: + /*! @brief Replace all occurrences of search string "search" by the string in "replace" given the string "subject" */ - std::string ReplaceString(std::string& subject, const std::string& search, const std::string& replace) { - size_t pos = 0; - while ((pos = subject.find(search, pos)) != std::string::npos) { - subject.replace(pos, search.length(), replace); - pos += replace.length(); - } - return subject; + std::string ReplaceString(std::string& subject, const std::string& search, const std::string& replace) { + size_t pos = 0; + while ((pos = subject.find(search, pos)) != std::string::npos) { + subject.replace(pos, search.length(), replace); + pos += replace.length(); } + return subject; + } +}; - }; - -} // namespace MueLu +} // namespace MueLu #define MUELU_FACADECLASSBASE_SHORT - - #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSBASE_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp index fd2375c1ccc2..6ef823b40fcb 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp @@ -50,15 +50,14 @@ #include #include - #include "MueLu_Exceptions.hpp" #include "MueLu_FacadeClassBase_decl.hpp" namespace MueLu { - template - FacadeClassBase::FacadeClassBase() { - } +template +FacadeClassBase::FacadeClassBase() { } +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSBASE_DEF_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp index 96aa53ca3d7e..15ddc112f9c9 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp @@ -56,51 +56,47 @@ namespace MueLu { - template - class FacadeClassFactory - : public virtual BaseClass{ +template +class FacadeClassFactory + : public virtual BaseClass { #undef MUELU_FACADECLASSFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ + public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeClassFactory(); + //! Constructor. + FacadeClassFactory(); - //! Destructor. - virtual ~FacadeClassFactory() { } + //! Destructor. + virtual ~FacadeClassFactory() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClassFactory interpreter. + /*! @brief Set parameter list for FacadeClassFactory interpreter. @param[in] paramList: ParameterList containing the MueLu parameters. */ - Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); + Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); - /*! @brief Register new facade class + /*! @brief Register new facade class * * Register new externally provided facade class in FacadeClassFactory * * @param[in] name: name that is used to access Facade class * @param[in] facadeclass: RCP pointer to facade class instance */ - void RegisterFacadeClass(std::string name, Teuchos::RCP > facadeclass) { - facadeClasses_[name] = facadeclass; - } + void RegisterFacadeClass(std::string name, Teuchos::RCP > facadeclass) { + facadeClasses_[name] = facadeclass; + } - private: + private: + std::map > > facadeClasses_; +}; - std::map > > facadeClasses_; - - }; - -} // namespace MueLu +} // namespace MueLu #define MUELU_FACADECLASSFACTORY_SHORT - - #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp index f093d7b7f11e..330df184b893 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp @@ -50,7 +50,6 @@ #include #include - #include "MueLu_Exceptions.hpp" #include "MueLu_FacadeClassBase.hpp" @@ -61,34 +60,32 @@ namespace MueLu { - template - FacadeClassFactory::FacadeClassFactory() { - facadeClasses_["Simple"] = Teuchos::rcp(new FacadeSimple()); - facadeClasses_["BGS2x2"] = Teuchos::rcp(new FacadeBGS2x2()); - } - +template +FacadeClassFactory::FacadeClassFactory() { + facadeClasses_["Simple"] = Teuchos::rcp(new FacadeSimple()); + facadeClasses_["BGS2x2"] = Teuchos::rcp(new FacadeBGS2x2()); +} - template - Teuchos::RCP FacadeClassFactory::SetParameterList(const ParameterList& paramList) { +template +Teuchos::RCP FacadeClassFactory::SetParameterList(const ParameterList& paramList) { + TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter("MueLu preconditioner") == false, MueLu::Exceptions::RuntimeError, "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter("MueLu preconditioner") == false, MueLu::Exceptions::RuntimeError, "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); + std::string precMueLu = paramList.get("MueLu preconditioner"); - std::string precMueLu = paramList.get("MueLu preconditioner"); - - // could not find requested facade class - if(facadeClasses_.find(precMueLu) == facadeClasses_.end()) { - GetOStream(Errors) << "FacadeClassFactory: Could not find facade class \"" << precMueLu << "\"!" << std::endl; - GetOStream(Errors) << "The available facade classes are:" << std::endl; - for(typename std::map > >::const_iterator it =facadeClasses_.begin(); it != facadeClasses_.end(); it++){ - GetOStream(Errors) << " " << it->first << std::endl; - } - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "FacadeClassFactory: Could not find facade class \"" << precMueLu << "\"."); + // could not find requested facade class + if (facadeClasses_.find(precMueLu) == facadeClasses_.end()) { + GetOStream(Errors) << "FacadeClassFactory: Could not find facade class \"" << precMueLu << "\"!" << std::endl; + GetOStream(Errors) << "The available facade classes are:" << std::endl; + for (typename std::map > >::const_iterator it = facadeClasses_.begin(); it != facadeClasses_.end(); it++) { + GetOStream(Errors) << " " << it->first << std::endl; } - - return facadeClasses_[precMueLu]->SetParameterList(paramList); + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "FacadeClassFactory: Could not find facade class \"" << precMueLu << "\"."); } -} // end namespace MueLu + return facadeClasses_[precMueLu]->SetParameterList(paramList); +} + +} // end namespace MueLu #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_decl.hpp index 8885d500ed3c..83daeb0a32b2 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_decl.hpp @@ -55,37 +55,34 @@ namespace MueLu { - template - class FacadeBGS2x2 : public FacadeClassBase { +template +class FacadeBGS2x2 : public FacadeClassBase { #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ + public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeBGS2x2(); + //! Constructor. + FacadeBGS2x2(); - //! Destructor. - virtual ~FacadeBGS2x2() { } + //! Destructor. + virtual ~FacadeBGS2x2() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClass interpreter. + /*! @brief Set parameter list for FacadeClass interpreter. @param[in] paramList: ParameterList containing the MueLu parameters for chosen facade class. */ - Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); - - private: - - }; - -} // namespace MueLu + Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); + private: +}; +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_BGS2x2_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_def.hpp index 3d7beb7d2909..6d5a1bc1f8e1 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_def.hpp @@ -50,349 +50,343 @@ #include #include - #include "MueLu_Exceptions.hpp" #include "MueLu_Facade_BGS2x2_decl.hpp" namespace MueLu { - template - FacadeBGS2x2::FacadeBGS2x2() { - } - - - template - Teuchos::RCP FacadeBGS2x2::SetParameterList(const ParameterList& paramList) { - - // obtain ParameterList with default input parameters for this facade class - // Note all parameters are of type string (we use it for string replacement) - std::string defaultString = -"" -"" -"" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" -; - Teuchos::RCP defaultList = Teuchos::getParametersFromXmlString(defaultString); - // validate user input parameters (and set defaults if necessary) - Teuchos::ParameterList inputParameters = paramList; - inputParameters.validateParametersAndSetDefaults(*defaultList); - TEUCHOS_TEST_FOR_EXCEPTION(inputParameters.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeBGS2x2: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - - // create copy of template string which is updated with in-place string replacements - // template string for preconditioner layout (factory based parameters) - std::string finalString = +template +FacadeBGS2x2::FacadeBGS2x2() { +}template +Teuchos::RCP FacadeBGS2x2::SetParameterList(const ParameterList& paramList) { + // obtain ParameterList with default input parameters for this facade class + // Note all parameters are of type string (we use it for string replacement) + std::string defaultString = + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""; + Teuchos::RCP defaultList = Teuchos::getParametersFromXmlString(defaultString); + // validate user input parameters (and set defaults if necessary) + Teuchos::ParameterList inputParameters = paramList; + inputParameters.validateParametersAndSetDefaults(*defaultList); + TEUCHOS_TEST_FOR_EXCEPTION(inputParameters.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeBGS2x2: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - // logical code for more complicated distinctions + // create copy of template string which is updated with in-place string replacements + // template string for preconditioner layout (factory based parameters) + std::string finalString = + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + "" + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + ""; - std::string smoother1 = inputParameters.get("Block 1: smoother"); - if(smoother1 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Jacobi"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 1: " << smoother1 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } + // logical code for more complicated distinctions - std::string smoother2 = inputParameters.get("Block 2: smoother"); - if(smoother2 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 2: " << smoother2 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } + std::string smoother1 = inputParameters.get("Block 1: smoother"); + if (smoother1 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Jacobi"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); + } else { + this->GetOStream(Errors) << "Invalid smoother type for block 1: " << smoother1 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; + } - if(inputParameters.get("Block 1: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); - } else { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myTentativePFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myTransPFact1"); - } - if(inputParameters.get("Block 2: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); - } else { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myTentativePFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myTransPFact2"); - } + std::string smoother2 = inputParameters.get("Block 2: smoother"); + if (smoother2 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); + } else { + this->GetOStream(Errors) << "Invalid smoother type for block 2: " << smoother2 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; + } - // end logical code + if (inputParameters.get("Block 1: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); + } else { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myTentativePFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myTransPFact1"); + } + if (inputParameters.get("Block 2: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); + } else { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myTentativePFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myTransPFact2"); + } - // loop over all input parameters - for(Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); it != inputParameters.end(); it++) { - // form replacement string - std::string par_name = inputParameters.name(it); - std::stringstream ss; - ss << "XXX" << par_name << "YYY"; + // end logical code - // update final string with parameters - Teuchos::ParameterEntry par_entry = inputParameters.entry(it); - this->ReplaceString(finalString, - ss.str(), Teuchos::toString(par_entry.getAny())); - } + // loop over all input parameters + for (Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); it != inputParameters.end(); it++) { + // form replacement string + std::string par_name = inputParameters.name(it); + std::stringstream ss; + ss << "XXX" << par_name << "YYY"; - Teuchos::RCP ret = Teuchos::getParametersFromXmlString(finalString); - return ret; + // update final string with parameters + Teuchos::ParameterEntry par_entry = inputParameters.entry(it); + this->ReplaceString(finalString, + ss.str(), Teuchos::toString(par_entry.getAny())); } -} // end namespace MueLu -#endif // PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_BGS2x2_DEF_HPP_ + Teuchos::RCP ret = Teuchos::getParametersFromXmlString(finalString); + return ret; +} + +} // end namespace MueLu +#endif // PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_BGS2x2_DEF_HPP_ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_decl.hpp index 7aa865679bf6..2b16ece4c893 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_decl.hpp @@ -55,37 +55,34 @@ namespace MueLu { - template - class FacadeSimple : public FacadeClassBase { +template +class FacadeSimple : public FacadeClassBase { #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ + public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeSimple(); + //! Constructor. + FacadeSimple(); - //! Destructor. - virtual ~FacadeSimple() { } + //! Destructor. + virtual ~FacadeSimple() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClass interpreter. + /*! @brief Set parameter list for FacadeClass interpreter. @param[in] paramList: ParameterList containing the MueLu parameters for chosen facade class. */ - Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); - - private: - - }; - -} // namespace MueLu + Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); + private: +}; +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_Simple_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_def.hpp index 7a69633e3341..24001ee83766 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_def.hpp @@ -50,355 +50,349 @@ #include #include - #include "MueLu_Exceptions.hpp" #include "MueLu_Facade_Simple_decl.hpp" namespace MueLu { - template - FacadeSimple::FacadeSimple() { - } - - - template - Teuchos::RCP FacadeSimple::SetParameterList(const ParameterList& paramList) { - - // obtain ParameterList with default input parameters for this facade class - // Note all parameters are of type string (we use it for string replacement) - std::string defaultString = -"" -"" -"" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" -; - Teuchos::RCP defaultList = Teuchos::getParametersFromXmlString(defaultString); - // validate user input parameters (and set defaults if necessary) - Teuchos::ParameterList inputParameters = paramList; - inputParameters.validateParametersAndSetDefaults(*defaultList); - TEUCHOS_TEST_FOR_EXCEPTION(inputParameters.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeSimple: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - - // create copy of template string which is updated with in-place string replacements - // template string for preconditioner layout (factory based parameters) - std::string finalString = +template +FacadeSimple::FacadeSimple() { +}template +Teuchos::RCP FacadeSimple::SetParameterList(const ParameterList& paramList) { + // obtain ParameterList with default input parameters for this facade class + // Note all parameters are of type string (we use it for string replacement) + std::string defaultString = + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""; + Teuchos::RCP defaultList = Teuchos::getParametersFromXmlString(defaultString); + // validate user input parameters (and set defaults if necessary) + Teuchos::ParameterList inputParameters = paramList; + inputParameters.validateParametersAndSetDefaults(*defaultList); + TEUCHOS_TEST_FOR_EXCEPTION(inputParameters.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeSimple: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - // logical code for more complicated distinctions + // create copy of template string which is updated with in-place string replacements + // template string for preconditioner layout (factory based parameters) + std::string finalStringstd::string smoother1 = inputParameters.get("Block 1: smoother"); - if(smoother1 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Jacobi"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 1: " << smoother1 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } + // logical code for more complicated distinctions - std::string smoother2 = inputParameters.get("Block 2: smoother"); - if(smoother2 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Jacobi"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 2: " << smoother2 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } + std::string smoother1 = inputParameters.get("Block 1: smoother"); + if (smoother1 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Jacobi"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); + } else { + this->GetOStream(Errors) << "Invalid smoother type for block 1: " << smoother1 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; + } - if(inputParameters.get("Block 1: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); - } else { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myTentativePFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myTransPFact1"); - } - if(inputParameters.get("Block 2: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); - } else { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myTentativePFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myTransPFact2"); - } + std::string smoother2 = inputParameters.get("Block 2: smoother"); + if (smoother2 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Jacobi"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); + } else { + this->GetOStream(Errors) << "Invalid smoother type for block 2: " << smoother2 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; + } - // end logical code + if (inputParameters.get("Block 1: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); + } else { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myTentativePFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myTransPFact1"); + } + if (inputParameters.get("Block 2: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); + } else { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myTentativePFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myTransPFact2"); + } - // loop over all input parameters - for(Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); it != inputParameters.end(); it++) { - // form replacement string - std::string par_name = inputParameters.name(it); - std::stringstream ss; - ss << "XXX" << par_name << "YYY"; + // end logical code - // update final string with parameters - Teuchos::ParameterEntry par_entry = inputParameters.entry(it); - this->ReplaceString(finalString, - ss.str(), Teuchos::toString(par_entry.getAny())); - } + // loop over all input parameters + for (Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); it != inputParameters.end(); it++) { + // form replacement string + std::string par_name = inputParameters.name(it); + std::stringstream ss; + ss << "XXX" << par_name << "YYY"; - Teuchos::RCP ret = Teuchos::getParametersFromXmlString(finalString); - return ret; + // update final string with parameters + Teuchos::ParameterEntry par_entry = inputParameters.entry(it); + this->ReplaceString(finalString, + ss.str(), Teuchos::toString(par_entry.getAny())); } -} // end namespace MueLu -#endif // PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_Simple_DEF_HPP_ + Teuchos::RCP ret = Teuchos::getParametersFromXmlString(finalString); + return ret; +} + +} // end namespace MueLu +#endif // PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_Simple_DEF_HPP_ diff --git a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp index e3dfc36150a6..ae3869cfa013 100644 --- a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp @@ -37,7 +37,7 @@ namespace MueLu { - /* +/* Utility that from an existing Teuchos::ParameterList creates a new list, in which level-specific parameters are replaced with sublists. @@ -54,154 +54,152 @@ namespace MueLu { smoother: type = symmetric Gauss-Seidel smoother: sweeps = 1 */ - // This function is a copy of ML_CreateSublists to avoid dependency on ML - // Throw exception on error instead of exit() - //void CreateSublists(const ParameterList &List, ParameterList &newList); - +// This function is a copy of ML_CreateSublists to avoid dependency on ML +// Throw exception on error instead of exit() +//void CreateSublists(const ParameterList &List, ParameterList &newList); - /*! +/*! @class AdaptiveSAMLParameterListInterpreter class. @brief Class that accepts ML-style parameters and builds a MueLu preconditioner. This interpreter uses the same default values as ML. This allows to compare ML/MueLu results */ - template - class AdaptiveSaMLParameterListInterpreter : - public HierarchyManager { +template +class AdaptiveSaMLParameterListInterpreter : public HierarchyManager { #undef MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AdaptiveSaMLParameterListInterpreter() : nullspace_(NULL), blksize_(1) { } + //! Constructor. + AdaptiveSaMLParameterListInterpreter() + : nullspace_(NULL) + , blksize_(1) {} - //! Constructor. - //! @param paramList: parameter list with ML parameters - //! @param nspVector: MultiVector with fine-level nullspace approximation - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - AdaptiveSaMLParameterListInterpreter(Teuchos::ParameterList & paramList,std::vector > factoryList = std::vector >(0)); + //! Constructor. + //! @param paramList: parameter list with ML parameters + //! @param nspVector: MultiVector with fine-level nullspace approximation + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. + //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! + AdaptiveSaMLParameterListInterpreter(Teuchos::ParameterList& paramList, std::vector > factoryList = std::vector >(0)); - //! Constructor. - //! @param xmlFileName: file name for XML file with ML parameters - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - AdaptiveSaMLParameterListInterpreter(const std::string & xmlFileName,std::vector > factoryList = std::vector >(0)); + //! Constructor. + //! @param xmlFileName: file name for XML file with ML parameters + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. + //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! + AdaptiveSaMLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList = std::vector >(0)); - //! Destructor. - virtual ~AdaptiveSaMLParameterListInterpreter() { } + //! Destructor. + virtual ~AdaptiveSaMLParameterListInterpreter() {} - //@} + //@} - //@{ + //@{ - void SetParameterList(const Teuchos::ParameterList & paramList); + void SetParameterList(const Teuchos::ParameterList& paramList); - //@} + //@} - //@{ + //@{ - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy & H) const; + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy& H) const; - //@} + //@} - //@{ + //@{ - //! @name Handling of additional user-specific transfer factories - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. + //! @name Handling of additional user-specific transfer factories + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. This allows the user to add user-specific factories to the MueLu Hierarchy. The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! */ - void AddTransferFactory(const RCP & factory); - - //! Returns number of transfer factories. - size_t NumTransferFactories() const; - //@} - - private: - - //! build multigrid hierarchy for improving nullspace - //! use ML settings that are also used for the final full multigrid - //! hierarchy. In contrary to the final multigrid hierarchy use - //! only nonsmoothed transfer operators (safe time of prolongator smoothing) - //! and cheap level smoothers (no direct solver on coarsest level). - void SetupInitHierarchy(Hierarchy & H) const; - - //! internal routine to add a new factory manager used for the initialization phase - void AddInitFactoryManager(int startLevel, int numDesiredLevel, RCP manager) { - const int lastLevel = startLevel + numDesiredLevel - 1; - if (init_levelManagers_.size() < lastLevel + 1) init_levelManagers_.resize(lastLevel + 1); - - for(int iLevel = startLevel; iLevel <= lastLevel; iLevel++) { - init_levelManagers_[iLevel] = manager; - } + void AddTransferFactory(const RCP& factory); + + //! Returns number of transfer factories. + size_t NumTransferFactories() const; + //@} + + private: + //! build multigrid hierarchy for improving nullspace + //! use ML settings that are also used for the final full multigrid + //! hierarchy. In contrary to the final multigrid hierarchy use + //! only nonsmoothed transfer operators (safe time of prolongator smoothing) + //! and cheap level smoothers (no direct solver on coarsest level). + void SetupInitHierarchy(Hierarchy& H) const; + + //! internal routine to add a new factory manager used for the initialization phase + void AddInitFactoryManager(int startLevel, int numDesiredLevel, RCP manager) { + const int lastLevel = startLevel + numDesiredLevel - 1; + if (init_levelManagers_.size() < lastLevel + 1) init_levelManagers_.resize(lastLevel + 1); + + for (int iLevel = startLevel; iLevel <= lastLevel; iLevel++) { + init_levelManagers_[iLevel] = manager; } + } - //! Used in SetupInitHierarchy() to access levelManagers_ - //! Inputs i=-1 and i=size() are allowed to simplify calls to hierarchy->Setup() - Teuchos::RCP InitLvlMngr(int levelID, int lastLevelID) const { - - // Please not that the order of the 'if' statements is important. + //! Used in SetupInitHierarchy() to access levelManagers_ + //! Inputs i=-1 and i=size() are allowed to simplify calls to hierarchy->Setup() + Teuchos::RCP InitLvlMngr(int levelID, int lastLevelID) const { + // Please not that the order of the 'if' statements is important. - if (levelID == -1) return Teuchos::null; // when this routine is called with levelID == '-1', it means that we are processing the finest Level (there is no finer level) - if (levelID == lastLevelID+1) return Teuchos::null; // when this routine is called with levelID == 'lastLevelID+1', it means that we are processing the last level (ie: there is no nextLevel...) + if (levelID == -1) return Teuchos::null; // when this routine is called with levelID == '-1', it means that we are processing the finest Level (there is no finer level) + if (levelID == lastLevelID + 1) return Teuchos::null; // when this routine is called with levelID == 'lastLevelID+1', it means that we are processing the last level (ie: there is no nextLevel...) - if (0 == init_levelManagers_.size()) { // default factory manager. - // the default manager is shared across levels, initialized only if needed and deleted with the HierarchyManager. - static RCP defaultMngr = rcp(new FactoryManager()); - return defaultMngr; - } - if (levelID >= init_levelManagers_.size()) return init_levelManagers_[init_levelManagers_.size()-1]; // last levelManager is used for all the remaining levels. - - return init_levelManagers_[levelID]; // throw exception if out of bound. + if (0 == init_levelManagers_.size()) { // default factory manager. + // the default manager is shared across levels, initialized only if needed and deleted with the HierarchyManager. + static RCP defaultMngr = rcp(new FactoryManager()); + return defaultMngr; } + if (levelID >= init_levelManagers_.size()) return init_levelManagers_[init_levelManagers_.size() - 1]; // last levelManager is used for all the remaining levels. + + return init_levelManagers_[levelID]; // throw exception if out of bound. + } - //! nullspace can be embedded in the ML parameter list - int nullspaceDim_; - double* nullspace_; + //! nullspace can be embedded in the ML parameter list + int nullspaceDim_; + double* nullspace_; - //! export aggregates - bool bExportAggregates_; //!< if set to true an AggregationExportFactory is used to export aggregation information (default = false) + //! export aggregates + bool bExportAggregates_; //!< if set to true an AggregationExportFactory is used to export aggregation information (default = false) - //! list of user-defined transfer Factories - //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) - //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the - //! capabibilities of ML. - std::vector > TransferFacts_; + //! list of user-defined transfer Factories + //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) + //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the + //! capabibilities of ML. + std::vector > TransferFacts_; - //! list of levelManagers for adaptive smoothed aggregation - //! initialization phase - Array > init_levelManagers_; + //! list of levelManagers for adaptive smoothed aggregation + //! initialization phase + Array > init_levelManagers_; - //@{ Operator configuration + //@{ Operator configuration - //! Setup Operator object - //! overloaded from HierarchyManager to set nDofsPerNode - virtual void SetupOperator(Operator & Op) const; + //! Setup Operator object + //! overloaded from HierarchyManager to set nDofsPerNode + virtual void SetupOperator(Operator& Op) const; - //! Matrix configuration storage - int blksize_; - //@} + //! Matrix configuration storage + int blksize_; + //@} - }; // class AdaptiveSaMLParameterListInterpreter +}; // class AdaptiveSaMLParameterListInterpreter -} // namespace MueLu +} // namespace MueLu #define MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_ADAPTIVESAMLPARAMTERLISTINTERPRETER_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp index c9444689155b..097a2f8f259c 100644 --- a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp +++ b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp @@ -54,396 +54,396 @@ // Read a parameter value from a parameter list and store it into a variable named 'varName' #define MUELU_READ_PARAM(paramList, paramStr, varType, defaultValue, varName) \ - varType varName = defaultValue; if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); + varType varName = defaultValue; \ + if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); // Read a parameter value from a paraeter list and copy it into a new parameter list (with another parameter name) #define MUELU_COPY_PARAM(paramList, paramStr, varType, defaultValue, outParamList, outParamStr) \ - if (paramList.isParameter(paramStr)) \ - outParamList.set(outParamStr, paramList.get(paramStr)); \ - else outParamList.set(outParamStr, defaultValue); \ + if (paramList.isParameter(paramStr)) \ + outParamList.set(outParamStr, paramList.get(paramStr)); \ + else \ + outParamList.set(outParamStr, defaultValue); namespace MueLu { - template - AdaptiveSaMLParameterListInterpreter::AdaptiveSaMLParameterListInterpreter(Teuchos::ParameterList & paramList, std::vector > factoryList) : TransferFacts_(factoryList), blksize_(1) { - SetParameterList(paramList); +template +AdaptiveSaMLParameterListInterpreter::AdaptiveSaMLParameterListInterpreter(Teuchos::ParameterList& paramList, std::vector > factoryList) + : TransferFacts_(factoryList) + , blksize_(1) { + SetParameterList(paramList); +} + +template +AdaptiveSaMLParameterListInterpreter::AdaptiveSaMLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList) + : nullspace_(NULL) + , TransferFacts_(factoryList) + , blksize_(1) { + Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); + SetParameterList(*paramList); +} + +template +void AdaptiveSaMLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList& paramList_in) { + Teuchos::ParameterList paramList = paramList_in; + + RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) + + // + // Read top-level of the parameter list + // + + // hard-coded default values == ML defaults according to the manual + MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); + MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); + MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); + + MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); + + MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); + //MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); + MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4 / (double)3, agg_damping); + //MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); + MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); + + MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); + MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation + MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation + + MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); + + // + // Move smoothers/aggregation/coarse parameters to sublists + // + + // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: + // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists + ParameterList paramListWithSubList; + MueLu::CreateSublists(paramList, paramListWithSubList); + paramList = paramListWithSubList; // swap + + // std::cout << std::endl << "Parameter list after CreateSublists" << std::endl; + // std::cout << paramListWithSubList << std::endl; + + int maxNbrAlreadySelected = 0; + + // Matrix option + this->blksize_ = nDofsPerNode; + + // Translate verbosity parameter + Teuchos::EVerbosityLevel eVerbLevel = Teuchos::VERB_NONE; + if (verbosityLevel == 0) eVerbLevel = Teuchos::VERB_NONE; + if (verbosityLevel > 0) eVerbLevel = Teuchos::VERB_LOW; + if (verbosityLevel > 4) eVerbLevel = Teuchos::VERB_MEDIUM; + if (verbosityLevel > 7) eVerbLevel = Teuchos::VERB_HIGH; + if (verbosityLevel > 9) eVerbLevel = Teuchos::VERB_EXTREME; + + TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); + + // Create MueLu factories + // RCP nspFact = rcp(new NullspaceFactory()); + RCP dropFact = rcp(new CoalesceDropFactory()); + //dropFact->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); + + // Uncoupled aggregation + RCP AggFact = rcp(new UncoupledAggregationFactory()); + AggFact->SetMinNodesPerAggregate(minPerAgg); //TODO should increase if run anything other than 1D + AggFact->SetMaxNeighAlreadySelected(maxNbrAlreadySelected); + AggFact->SetOrdering("natural"); + + if (verbosityLevel > 3) { // TODO fix me: Setup is a static function: we cannot use GetOStream without an object... + *out << "========================= Aggregate option summary =========================" << std::endl; + *out << "min Nodes per aggregate : " << minPerAgg << std::endl; + *out << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; + *out << "aggregate ordering : natural" << std::endl; + *out << "=============================================================================" << std::endl; } - template - AdaptiveSaMLParameterListInterpreter::AdaptiveSaMLParameterListInterpreter(const std::string & xmlFileName, std::vector > factoryList) : nullspace_(NULL), TransferFacts_(factoryList), blksize_(1) { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - SetParameterList(*paramList); + RCP PFact; + RCP RFact; + RCP PtentFact = rcp(new TentativePFactory()); + if (agg_damping == 0.0 && bEnergyMinimization == false) { + // tentative prolongation operator (PA-AMG) + PFact = PtentFact; + RFact = rcp(new TransPFactory()); + } else if (agg_damping != 0.0 && bEnergyMinimization == false) { + // smoothed aggregation (SA-AMG) + RCP SaPFact = rcp(new SaPFactory()); + SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); + PFact = SaPFact; + RFact = rcp(new TransPFactory()); + } else if (bEnergyMinimization == true) { + // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) + PFact = rcp(new PgPFactory()); + RFact = rcp(new GenericRFactory()); } - template - void AdaptiveSaMLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList & paramList_in) { - Teuchos::ParameterList paramList = paramList_in; - - RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) - - // - // Read top-level of the parameter list - // - - // hard-coded default values == ML defaults according to the manual - MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); - MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); - MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); + RCP AcFact = rcp(new RAPFactory()); + for (size_t i = 0; i < TransferFacts_.size(); i++) { + AcFact->AddTransferFactory(TransferFacts_[i]); // THIS WILL BE REPLACED with a call to the MLParamterListInterpreter + } - MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); + // + // Nullspace factory + // - MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); - //MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); - MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4/(double)3, agg_damping); - //MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); - MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); + // Set fine level nullspace + // extract pre-computed nullspace from ML parameter list + // store it in nullspace_ and nullspaceDim_ + if (nullspaceType != "default vectors") { + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); - MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); - MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation - MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation + nullspaceDim_ = nullspaceDim; + nullspace_ = nullspaceVec; + } - MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); + Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory()); + nspFact->SetFactory("Nullspace", PtentFact); + + // + // Hierarchy + FactoryManager + // + + // Hierarchy options + this->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); + this->numDesiredLevel_ = maxLevels; + this->maxCoarseSize_ = maxCoarseSize; + + // init smoother + RCP initSmootherFact = Teuchos::null; + if (paramList.isSublist("init smoother")) { + ParameterList& initList = paramList.sublist("init smoother"); // TODO move this before for loop + initSmootherFact = MLParameterListInterpreter::GetSmootherFactory(initList); // TODO: missing AFact input arg. + } else { + std::string ifpackType = "RELAXATION"; + Teuchos::ParameterList smootherParamList; + smootherParamList.set("relaxation: type", "symmetric Gauss-Seidel"); + smootherParamList.set("smoother: sweeps", 1); + smootherParamList.set("smoother: damping factor", 1.0); + RCP smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + + initSmootherFact = rcp(new SmootherFactory()); + initSmootherFact->SetSmootherPrototypes(smooProto, smooProto); + } + // + // Coarse Smoother + // + ParameterList& coarseList = paramList.sublist("coarse: list"); + // coarseList.get("smoother: type", "Amesos-KLU"); // set default + //RCP coarseFact = this->GetSmootherFactory(coarseList); + RCP coarseFact = MLParameterListInterpreter::GetSmootherFactory(coarseList); - // - // Move smoothers/aggregation/coarse parameters to sublists - // + // Smoothers Top Level Parameters - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); - paramList = paramListWithSubList; // swap - - // std::cout << std::endl << "Parameter list after CreateSublists" << std::endl; - // std::cout << paramListWithSubList << std::endl; - - int maxNbrAlreadySelected = 0; - - // Matrix option - this->blksize_ = nDofsPerNode; - - // Translate verbosity parameter - Teuchos::EVerbosityLevel eVerbLevel = Teuchos::VERB_NONE; - if (verbosityLevel == 0) eVerbLevel = Teuchos::VERB_NONE; - if (verbosityLevel > 0) eVerbLevel = Teuchos::VERB_LOW; - if (verbosityLevel > 4) eVerbLevel = Teuchos::VERB_MEDIUM; - if (verbosityLevel > 7) eVerbLevel = Teuchos::VERB_HIGH; - if (verbosityLevel > 9) eVerbLevel = Teuchos::VERB_EXTREME; - - TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); - - // Create MueLu factories - // RCP nspFact = rcp(new NullspaceFactory()); - RCP dropFact = rcp(new CoalesceDropFactory()); - //dropFact->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); - - // Uncoupled aggregation - RCP AggFact = rcp(new UncoupledAggregationFactory()); - AggFact->SetMinNodesPerAggregate(minPerAgg); //TODO should increase if run anything other than 1D - AggFact->SetMaxNeighAlreadySelected(maxNbrAlreadySelected); - AggFact->SetOrdering("natural"); - - if (verbosityLevel > 3) { // TODO fix me: Setup is a static function: we cannot use GetOStream without an object... - *out << "========================= Aggregate option summary =========================" << std::endl; - *out << "min Nodes per aggregate : " << minPerAgg << std::endl; - *out << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; - *out << "aggregate ordering : natural" << std::endl; - *out << "=============================================================================" << std::endl; - } + RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); + // std::cout << std::endl << "Top level smoother parameters:" << std::endl; + // std::cout << *topLevelSmootherParam << std::endl; - RCP PFact; - RCP RFact; - RCP PtentFact = rcp( new TentativePFactory() ); - if (agg_damping == 0.0 && bEnergyMinimization == false) { - // tentative prolongation operator (PA-AMG) - PFact = PtentFact; - RFact = rcp( new TransPFactory() ); - } else if (agg_damping != 0.0 && bEnergyMinimization == false) { - // smoothed aggregation (SA-AMG) - RCP SaPFact = rcp( new SaPFactory() ); - SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); - PFact = SaPFact; - RFact = rcp( new TransPFactory() ); - } else if (bEnergyMinimization == true) { - // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) - PFact = rcp( new PgPFactory() ); - RFact = rcp( new GenericRFactory() ); - } + // - RCP AcFact = rcp( new RAPFactory() ); - for (size_t i = 0; iAddTransferFactory(TransferFacts_[i]); // THIS WILL BE REPLACED with a call to the MLParamterListInterpreter - } + // Prepare factory managers + // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList + for (int levelID = 0; levelID < maxLevels; levelID++) { // - // Nullspace factory + // Level FactoryManager // - // Set fine level nullspace - // extract pre-computed nullspace from ML parameter list - // store it in nullspace_ and nullspaceDim_ - if (nullspaceType != "default vectors") { - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); - - nullspaceDim_ = nullspaceDim; - nullspace_ = nullspaceVec; - } - - Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory()); - nspFact->SetFactory("Nullspace", PtentFact); + RCP manager = rcp(new FactoryManager()); + RCP initmanager = rcp(new FactoryManager()); // - // Hierarchy + FactoryManager + // Smoothers // - // Hierarchy options - this->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); - this->numDesiredLevel_ = maxLevels; - this->maxCoarseSize_ = maxCoarseSize; - - // init smoother - RCP initSmootherFact = Teuchos::null; - if(paramList.isSublist("init smoother")) { - ParameterList& initList = paramList.sublist("init smoother"); // TODO move this before for loop - initSmootherFact = MLParameterListInterpreter::GetSmootherFactory(initList); // TODO: missing AFact input arg. - } else { - std::string ifpackType = "RELAXATION"; - Teuchos::ParameterList smootherParamList; - smootherParamList.set("relaxation: type", "symmetric Gauss-Seidel"); - smootherParamList.set("smoother: sweeps", 1); - smootherParamList.set("smoother: damping factor", 1.0); - RCP smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - - initSmootherFact = rcp( new SmootherFactory() ); - initSmootherFact->SetSmootherPrototypes(smooProto, smooProto); + { + // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. + // TODO: unit-test this part alone + + ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy + MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ + // std::cout << std::endl << "Merged List for level " << levelID << std::endl; + // std::cout << levelSmootherParam << std::endl; + + //RCP smootherFact = this->GetSmootherFactory(levelSmootherParam); // TODO: missing AFact input arg. + RCP smootherFact = MLParameterListInterpreter::GetSmootherFactory(levelSmootherParam); // TODO: missing AFact input arg. + manager->SetFactory("Smoother", smootherFact); + smootherFact->DisableMultipleCallCheck(); + + initmanager->SetFactory("Smoother", initSmootherFact); + initmanager->SetFactory("CoarseSolver", initSmootherFact); + initSmootherFact->DisableMultipleCallCheck(); } // - // Coarse Smoother - // - ParameterList& coarseList = paramList.sublist("coarse: list"); - // coarseList.get("smoother: type", "Amesos-KLU"); // set default - //RCP coarseFact = this->GetSmootherFactory(coarseList); - RCP coarseFact = MLParameterListInterpreter::GetSmootherFactory(coarseList); - - // Smoothers Top Level Parameters - - RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); - // std::cout << std::endl << "Top level smoother parameters:" << std::endl; - // std::cout << *topLevelSmootherParam << std::endl; - + // Misc // - // Prepare factory managers - // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList - - for (int levelID=0; levelID < maxLevels; levelID++) { - - // - // Level FactoryManager - // - - RCP manager = rcp(new FactoryManager()); - RCP initmanager = rcp(new FactoryManager()); - - // - // Smoothers - // - - { - // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. - // TODO: unit-test this part alone - - ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy - MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ - // std::cout << std::endl << "Merged List for level " << levelID << std::endl; - // std::cout << levelSmootherParam << std::endl; - - //RCP smootherFact = this->GetSmootherFactory(levelSmootherParam); // TODO: missing AFact input arg. - RCP smootherFact = MLParameterListInterpreter::GetSmootherFactory(levelSmootherParam); // TODO: missing AFact input arg. - manager->SetFactory("Smoother", smootherFact); - smootherFact->DisableMultipleCallCheck(); - - initmanager->SetFactory("Smoother", initSmootherFact); - initmanager->SetFactory("CoarseSolver", initSmootherFact); - initSmootherFact->DisableMultipleCallCheck(); - - } - - // - // Misc - // - - Teuchos::rcp_dynamic_cast(PFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(PtentFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(RFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(coarseFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(dropFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(AggFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(AcFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(nspFact)->DisableMultipleCallCheck(); - - manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop - manager->SetFactory("Graph", dropFact); - manager->SetFactory("Aggregates", AggFact); - manager->SetFactory("DofsPerNode", dropFact); - manager->SetFactory("A", AcFact); - manager->SetFactory("P", PFact); - manager->SetFactory("Ptent", PtentFact); - manager->SetFactory("R", RFact); - manager->SetFactory("Nullspace", nspFact); - - //initmanager->SetFactory("CoarseSolver", coarseFact); - initmanager->SetFactory("Graph", dropFact); - initmanager->SetFactory("Aggregates", AggFact); - initmanager->SetFactory("DofsPerNode", dropFact); - initmanager->SetFactory("A", AcFact); - initmanager->SetFactory("P", PtentFact); // use nonsmoothed transfers - initmanager->SetFactory("Ptent", PtentFact); - initmanager->SetFactory("R", RFact); - initmanager->SetFactory("Nullspace", nspFact); - - this->AddFactoryManager(levelID, 1, manager); - this->AddInitFactoryManager(levelID, 1, initmanager); - } // for (level loop) + Teuchos::rcp_dynamic_cast(PFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(PtentFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(RFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(coarseFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(dropFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(AggFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(AcFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(nspFact)->DisableMultipleCallCheck(); + + manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop + manager->SetFactory("Graph", dropFact); + manager->SetFactory("Aggregates", AggFact); + manager->SetFactory("DofsPerNode", dropFact); + manager->SetFactory("A", AcFact); + manager->SetFactory("P", PFact); + manager->SetFactory("Ptent", PtentFact); + manager->SetFactory("R", RFact); + manager->SetFactory("Nullspace", nspFact); + + //initmanager->SetFactory("CoarseSolver", coarseFact); + initmanager->SetFactory("Graph", dropFact); + initmanager->SetFactory("Aggregates", AggFact); + initmanager->SetFactory("DofsPerNode", dropFact); + initmanager->SetFactory("A", AcFact); + initmanager->SetFactory("P", PtentFact); // use nonsmoothed transfers + initmanager->SetFactory("Ptent", PtentFact); + initmanager->SetFactory("R", RFact); + initmanager->SetFactory("Nullspace", nspFact); + + this->AddFactoryManager(levelID, 1, manager); + this->AddInitFactoryManager(levelID, 1, initmanager); + } // for (level loop) +} + +template +void AdaptiveSaMLParameterListInterpreter::SetupInitHierarchy(Hierarchy& H) const { + TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), Exceptions::RuntimeError, "No fine level operator"); + + RCP l = H.GetLevel(0); + RCP Op = l->Get >("A"); + SetupOperator(*Op); // use overloaded SetupMatrix routine + this->SetupExtra(H); + + // Setup Hierarchy + H.SetMaxCoarseSize(this->maxCoarseSize_); // TODO + + int levelID = 0; + int lastLevelID = this->numDesiredLevel_ - 1; + bool isLastLevel = false; + + while (!isLastLevel) { + bool r = H.Setup(levelID, + InitLvlMngr(levelID - 1, lastLevelID), + InitLvlMngr(levelID, lastLevelID), + InitLvlMngr(levelID + 1, lastLevelID)); + + isLastLevel = r || (levelID == lastLevelID); + levelID++; } - - template - void AdaptiveSaMLParameterListInterpreter::SetupInitHierarchy(Hierarchy & H) const { - TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), Exceptions::RuntimeError, "No fine level operator"); - - RCP l = H.GetLevel(0); - RCP Op = l->Get >("A"); - SetupOperator(*Op); // use overloaded SetupMatrix routine - this->SetupExtra(H); - - // Setup Hierarchy - H.SetMaxCoarseSize(this->maxCoarseSize_); // TODO - - int levelID = 0; - int lastLevelID = this->numDesiredLevel_ - 1; - bool isLastLevel = false; - - while(!isLastLevel) { - bool r = H.Setup(levelID, - InitLvlMngr(levelID-1, lastLevelID), - InitLvlMngr(levelID, lastLevelID), - InitLvlMngr(levelID+1, lastLevelID)); - - isLastLevel = r || (levelID == lastLevelID); - levelID++; - } - } - - template - void AdaptiveSaMLParameterListInterpreter::SetupHierarchy(Hierarchy & H) const { - - // set fine level null space - // usually this null space is provided from outside (by the user) using - // the ML parameter lists. - if (this->nullspace_ != NULL) { - RCP fineLevel = H.GetLevel(0); - const RCP rowMap = fineLevel->Get< RCP >("A")->getRowMap(); - RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); - - for ( size_t i=0; i < Teuchos::as(nullspaceDim_); i++) { - Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); - const size_t myLength = nullspace->getLocalLength(); - - for (size_t j = 0; j < myLength; j++) { - nullspacei[j] = nullspace_[i*myLength + j]; - } +} + +template +void AdaptiveSaMLParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { + // set fine level null space + // usually this null space is provided from outside (by the user) using + // the ML parameter lists. + if (this->nullspace_ != NULL) { + RCP fineLevel = H.GetLevel(0); + const RCP rowMap = fineLevel->Get >("A")->getRowMap(); + RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); + + for (size_t i = 0; i < Teuchos::as(nullspaceDim_); i++) { + Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); + const size_t myLength = nullspace->getLocalLength(); + + for (size_t j = 0; j < myLength; j++) { + nullspacei[j] = nullspace_[i * myLength + j]; } - - fineLevel->Set("Nullspace", nullspace); } - // keep aggregates - H.Keep("Aggregates", HierarchyManager::GetFactoryManager(0)->GetFactory("Aggregates").get()); - - /////////////////////////////// - - // build hierarchy for initialization - SetupInitHierarchy(H); + fineLevel->Set("Nullspace", nullspace); + } - { - // do some iterations with the built hierarchy to improve the null space - Teuchos::RCP Finest = H.GetLevel(0); // get finest level,MueLu::NoFactory::get() - Teuchos::RCP nspVector2 = Finest->Get >("Nullspace"); + // keep aggregates + H.Keep("Aggregates", HierarchyManager::GetFactoryManager(0)->GetFactory("Aggregates").get()); - Xpetra::IO::Write("orig_nsp.vec", *nspVector2); + /////////////////////////////// - RCP Op = Finest->Get >("A"); - Xpetra::IO::Write("A.mat", *Op); + // build hierarchy for initialization + SetupInitHierarchy(H); + { + // do some iterations with the built hierarchy to improve the null space + Teuchos::RCP Finest = H.GetLevel(0); // get finest level,MueLu::NoFactory::get() + Teuchos::RCP nspVector2 = Finest->Get >("Nullspace"); - Teuchos::RCP homogRhsVec = MultiVectorFactory::Build(nspVector2->getMap(),nspVector2->getNumVectors(),true); - homogRhsVec->putScalar(0.0); + Xpetra::IO::Write("orig_nsp.vec", *nspVector2); - // do 1 multigrid cycle for improving the null space by "solving" - // A B_f = 0 - // where A is the system matrix and B_f the fine level null space vectors - H.Iterate(*homogRhsVec, *nspVector2, 1, false); + RCP Op = Finest->Get >("A"); + Xpetra::IO::Write("A.mat", *Op); - // store improved fine level null space - Finest->Set("Nullspace",nspVector2); + Teuchos::RCP homogRhsVec = MultiVectorFactory::Build(nspVector2->getMap(), nspVector2->getNumVectors(), true); + homogRhsVec->putScalar(0.0); - Xpetra::IO::Write("new_nsp.vec", *nspVector2); + // do 1 multigrid cycle for improving the null space by "solving" + // A B_f = 0 + // where A is the system matrix and B_f the fine level null space vectors + H.Iterate(*homogRhsVec, *nspVector2, 1, false); - //H.Delete("CoarseSolver", init_levelManagers_[0]->GetFactory("CoarseSolver").get()); - } + // store improved fine level null space + Finest->Set("Nullspace", nspVector2); - { - // do some clean up. - // remove all old default factories. Build new ones for the second build. - // this is a little bit tricky to understand - for(size_t k=0; k < HierarchyManager::getNumFactoryManagers(); k++) { - HierarchyManager::GetFactoryManager(k)->Clean(); - //Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(k)->GetFactory("Smoother"))->DisableMultipleCallCheck(); // after changing to MLParamterListInterpreter functions - } - // not sure about this. i only need it if Smoother is defined explicitely (not using default smoother) - // need this: otherwise RAPFactory::Build is complaining on level 0 - // and TentativePFactory::Build is complaining on level 1 - Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(0)->GetFactory("A"))->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(1)->GetFactory("P"))->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(1)->GetFactory("Ptent"))->DisableMultipleCallCheck(); - - HierarchyManager::SetupHierarchy(H); - } + Xpetra::IO::Write("new_nsp.vec", *nspVector2); + //H.Delete("CoarseSolver", init_levelManagers_[0]->GetFactory("CoarseSolver").get()); } - template - void AdaptiveSaMLParameterListInterpreter::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); - TransferFacts_.push_back(factory); - } - - template - size_t AdaptiveSaMLParameterListInterpreter::NumTransferFactories() const { - return TransferFacts_.size(); - } - - template - void AdaptiveSaMLParameterListInterpreter::SetupOperator(Operator & Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; - - A.SetFixedBlockSize(blksize_); - - } catch (std::bad_cast& e) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; + { + // do some clean up. + // remove all old default factories. Build new ones for the second build. + // this is a little bit tricky to understand + for (size_t k = 0; k < HierarchyManager::getNumFactoryManagers(); k++) { + HierarchyManager::GetFactoryManager(k)->Clean(); + //Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(k)->GetFactory("Smoother"))->DisableMultipleCallCheck(); // after changing to MLParamterListInterpreter functions } + // not sure about this. i only need it if Smoother is defined explicitely (not using default smoother) + // need this: otherwise RAPFactory::Build is complaining on level 0 + // and TentativePFactory::Build is complaining on level 1 + Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(0)->GetFactory("A"))->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(1)->GetFactory("P"))->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(1)->GetFactory("Ptent"))->DisableMultipleCallCheck(); + + HierarchyManager::SetupHierarchy(H); } +} + +template +void AdaptiveSaMLParameterListInterpreter::AddTransferFactory(const RCP& factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); + TransferFacts_.push_back(factory); +} + +template +size_t AdaptiveSaMLParameterListInterpreter::NumTransferFactories() const { + return TransferFacts_.size(); +} + +template +void AdaptiveSaMLParameterListInterpreter::SetupOperator(Operator& Op) const { + try { + Matrix& A = dynamic_cast(Op); + if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) + this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " + << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; + + A.SetFixedBlockSize(blksize_); + + } catch (std::bad_cast& e) { + this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; + } +} -} // namespace MueLu - +} // namespace MueLu #endif /* MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_DEF_HPP_ */ diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp index 1832f0704030..24a99ceb45dc 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp @@ -159,7 +159,6 @@ #include "MueLu_Zoltan2Interface.hpp" #include "MueLu_NodePartitionInterface.hpp" - #include "MueLu_CoalesceDropFactory_kokkos.hpp" #include "MueLu_GeometricInterpolationPFactory_kokkos.hpp" #include "MueLu_NullspaceFactory_kokkos.hpp" @@ -187,777 +186,780 @@ namespace MueLu { - /*! class FactoryFactory +/*! class FactoryFactory @brief Factory that can generate other factories from */ - template - class FactoryFactory : public BaseClass { +template +class FactoryFactory : public BaseClass { #undef MUELU_FACTORYFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - typedef std::map > FactoryMap; // TODO: remove - typedef std::map > FactoryManagerMap; - - public: - - /// \brief: Interpret Factory parameter list and build new factory - /// - /// \param param [in]: ParameterEntry being either the parameter list containing the "factory" parameter declaring the factory type (e.g., "TrilinosSmoother") or being a plain Parameter containing the factory type as value - /// \param factoryMapIn [in]: FactoryMap containing a map between factory name (e.g., "smootherFact1") and corresponding factory of all previously defined factories - /// \param factoryManagersIn [in]: FactoryManagerMap containing a map between group names and Factory manager objects. Needed for factories with sub-factory managers. - /// - /// Parameter List Parsing: - /// --------- - /// - /// - /// or: - /// - /// - /// - /// ... - /// - /// - virtual RCP BuildFactory(const Teuchos::ParameterEntry& param, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // Find factory - std::string factoryName; - Teuchos::ParameterList paramList; - if (!param.isList()) { - factoryName = Teuchos::getValue(param); - } else { - paramList = Teuchos::getValue(param); - factoryName = paramList.get("factory"); - } + typedef std::map > FactoryMap; // TODO: remove + typedef std::map > FactoryManagerMap; + + public: + /// \brief: Interpret Factory parameter list and build new factory + /// + /// \param param [in]: ParameterEntry being either the parameter list containing the "factory" parameter declaring the factory type (e.g., "TrilinosSmoother") or being a plain Parameter containing the factory type as value + /// \param factoryMapIn [in]: FactoryMap containing a map between factory name (e.g., "smootherFact1") and corresponding factory of all previously defined factories + /// \param factoryManagersIn [in]: FactoryManagerMap containing a map between group names and Factory manager objects. Needed for factories with sub-factory managers. + /// + /// Parameter List Parsing: + /// --------- + /// + /// + /// or: + /// + /// + /// + /// ... + /// + /// + virtual RCP BuildFactory(const Teuchos::ParameterEntry& param, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + // Find factory + std::string factoryName; + Teuchos::ParameterList paramList; + if (!param.isList()) { + factoryName = Teuchos::getValue(param); + } else { + paramList = Teuchos::getValue(param); + factoryName = paramList.get("factory"); + } - // TODO: see how Teko handles this (=> register factories). - if (factoryName == "AggregateQualityEstimateFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "AggregationExportFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "AmalgamationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedCoarseMapFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedRAPFactory") return BuildRAPFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BrickAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ClassicalMapFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ClassicalPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CloneRepartitionInterface") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoarseMapFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoarseningVisualizationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoalesceDropFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SmooVecCoalesceDropFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ConstraintFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoordinatesTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "DirectSolver") return BuildDirectSolver (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "DropNegativeEntriesFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "EminPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "FilteredAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "FineLevelInputDataFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GeneralGeometricPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ReplicatePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CombinePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GenericRFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GeometricInterpolationPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "HybridAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InterfaceAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InterfaceMappingTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InverseApproximationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InitialBlockNumberFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "LineDetectionFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - // LocalOrdinalTransferFactory is a utility factory that can be used for multiple things, so there is no default - // if (factoryName == "LocalOrdinalTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MapTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MatrixAnalysisFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MultiVectorTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NoFactory") return MueLu::NoFactory::getRCP(); - if (factoryName == "NoSmoother") return rcp(new SmootherFactory(Teuchos::null)); - if (factoryName == "NotayAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NullspaceFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NullspacePresmoothFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "PatternFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "PgPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SaPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RAPFactory") return BuildRAPFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RAPShiftFactory") return BuildRAPFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceAcFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RegionRFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RegionRFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ReorderBlockAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RepartitionInterface") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ScaledNullspaceFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SegregatedAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SemiCoarsenPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "StructuredAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "StructuredLineDetectionFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SubBlockAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TentativePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ToggleCoordinatesTransferFactory") return BuildToggleCoordinatesTransferFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TogglePFactory") return BuildTogglePFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TransPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RfromP_Or_TransP") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TrilinosSmoother") return BuildTrilinosSmoother (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UncoupledAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UnsmooshFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UserAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UserPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "VariableDofLaplacianFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ZeroSubBlockAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoalesceDropFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GeometricInterpolationPFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NullspaceFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SaPFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SemiCoarsenPFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "StructuredAggregationFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TentativePFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MatrixFreeTentativePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UncoupledAggregationFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - - // Handle removed Kokkos factories - if (factoryName == "CoarseMapFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoordinatesTransferFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - - if (factoryName == "ZoltanInterface") { + // TODO: see how Teko handles this (=> register factories). + if (factoryName == "AggregateQualityEstimateFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "AggregationExportFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "AmalgamationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedCoarseMapFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedRAPFactory") return BuildRAPFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BrickAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ClassicalMapFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ClassicalPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CloneRepartitionInterface") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoarseMapFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoarseningVisualizationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoalesceDropFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SmooVecCoalesceDropFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ConstraintFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoordinatesTransferFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "DirectSolver") return BuildDirectSolver(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "DropNegativeEntriesFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "EminPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "FilteredAFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "FineLevelInputDataFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "GeneralGeometricPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ReplicatePFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CombinePFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "GenericRFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "GeometricInterpolationPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "HybridAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "InterfaceAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "InterfaceMappingTransferFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "InverseApproximationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "InitialBlockNumberFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "LineDetectionFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + // LocalOrdinalTransferFactory is a utility factory that can be used for multiple things, so there is no default + // if (factoryName == "LocalOrdinalTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "MapTransferFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "MatrixAnalysisFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "MultiVectorTransferFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "NoFactory") return MueLu::NoFactory::getRCP(); + if (factoryName == "NoSmoother") return rcp(new SmootherFactory(Teuchos::null)); + if (factoryName == "NotayAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "NullspaceFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "NullspacePresmoothFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "PatternFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "PgPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SaPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RAPFactory") return BuildRAPFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RAPShiftFactory") return BuildRAPFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceAcFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceTransferFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RegionRFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RegionRFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ReorderBlockAFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RepartitionInterface") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ScaledNullspaceFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SegregatedAFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SemiCoarsenPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "StructuredAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "StructuredLineDetectionFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SubBlockAFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TentativePFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ToggleCoordinatesTransferFactory") return BuildToggleCoordinatesTransferFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TogglePFactory") return BuildTogglePFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TransPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RfromP_Or_TransP") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TrilinosSmoother") return BuildTrilinosSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UncoupledAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UnsmooshFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UserAggregationFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UserPFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "VariableDofLaplacianFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "ZeroSubBlockAFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoalesceDropFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "GeometricInterpolationPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "NullspaceFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SaPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SemiCoarsenPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "StructuredAggregationFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TentativePFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "MatrixFreeTentativePFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UncoupledAggregationFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + + // Handle removed Kokkos factories + if (factoryName == "CoarseMapFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "CoordinatesTransferFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); + + if (factoryName == "ZoltanInterface") { #if defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a ZoltanInterface object: Zoltan is disabled: HAVE_MUELU_ZOLTAN && HAVE_MPI == false."); -#endif // HAVE_MUELU_ZOLTAN && HAVE_MPI - } - if (factoryName == "Zoltan2Interface") { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a ZoltanInterface object: Zoltan is disabled: HAVE_MUELU_ZOLTAN && HAVE_MPI == false."); +#endif // HAVE_MUELU_ZOLTAN && HAVE_MPI + } + if (factoryName == "Zoltan2Interface") { #if defined(HAVE_MUELU_ZOLTAN2) && defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a Zoltan2Interface object: Zoltan2 is disabled: HAVE_MUELU_ZOLTAN2 && HAVE_MPI == false."); -#endif // HAVE_MUELU_ZOLTAN2 && HAVE_MPI - } - if (factoryName == "IsorropiaInterface") { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a Zoltan2Interface object: Zoltan2 is disabled: HAVE_MUELU_ZOLTAN2 && HAVE_MPI == false."); +#endif // HAVE_MUELU_ZOLTAN2 && HAVE_MPI + } + if (factoryName == "IsorropiaInterface") { #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a IsorropiaInterface object: Isorropia is disabled: HAVE_MUELU_ISORROPIA && HAVE_MPI == false."); -#endif // HAVE_MUELU_ZOLTAN2 && HAVE_MPI - } + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a IsorropiaInterface object: Isorropia is disabled: HAVE_MUELU_ISORROPIA && HAVE_MPI == false."); +#endif // HAVE_MUELU_ZOLTAN2 && HAVE_MPI + } - if (factoryName == "NodePartitionInterface") { + if (factoryName == "NodePartitionInterface") { #if defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a NodePartitionInterface object: HAVE_MPI == false."); -#endif // HAVE_MPI - } + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a NodePartitionInterface object: HAVE_MPI == false."); +#endif // HAVE_MPI + } - if (factoryName == "RepartitionFactory") { + if (factoryName == "RepartitionFactory") { #ifdef HAVE_MPI - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a RepartitionFactory object: HAVE_MPI == false."); -#endif // HAVE_MPI - } - if (factoryName == "RepartitionHeuristicFactory") { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a RepartitionFactory object: HAVE_MPI == false."); +#endif // HAVE_MPI + } + if (factoryName == "RepartitionHeuristicFactory") { #ifdef HAVE_MPI - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a RepartitionHeuristicFactory object: HAVE_MPI == false."); -#endif // HAVE_MPI - } - // Blocked factories - if (factoryName == "BlockedCoordinatesTransferFactory") return BuildBlockedCoordFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedDirectSolver") return BuildBlockedDirectSolver(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedGaussSeidelSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedJacobiSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedPFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BraessSarazinSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "IndefiniteBlockDiagonalSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SimpleSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SchurComplementFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceBlockRestrictionFactory")return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceBlockAcFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceBlockInterpolationFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a RepartitionHeuristicFactory object: HAVE_MPI == false."); +#endif // HAVE_MPI + } + // Blocked factories + if (factoryName == "BlockedCoordinatesTransferFactory") return BuildBlockedCoordFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedDirectSolver") return BuildBlockedDirectSolver(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedGaussSeidelSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedJacobiSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedPFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BraessSarazinSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "IndefiniteBlockDiagonalSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SimpleSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SchurComplementFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceBlockRestrictionFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceBlockAcFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceBlockInterpolationFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); #ifdef HAVE_MPI - if (factoryName == "RepartitionBlockDiagonalFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RepartitionBlockDiagonalFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); #endif #ifdef HAVE_MUELU_TEKO - if (factoryName == "TekoSmoother") return BuildTekoSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TekoSmoother") return BuildTekoSmoother(paramList, factoryMapIn, factoryManagersIn); #endif - if (factoryName == "UzawaSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UzawaSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); // Matlab factories #ifdef HAVE_MUELU_MATLAB - if (factoryName == "TwoLevelMatlabFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SingleLevelMatlabFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MatlabSmoother") return BuildMatlabSmoother (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TwoLevelMatlabFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SingleLevelMatlabFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "MatlabSmoother") return BuildMatlabSmoother(paramList, factoryMapIn, factoryManagersIn); #endif #ifdef HAVE_MUELU_INTREPID2 - if (factoryName == "IntrepidPCoarsenFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "IntrepidPCoarsenFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); #endif - // Use a user defined factories (in node) - if (factoryMapIn.find(factoryName) != factoryMapIn.end()) { - TEUCHOS_TEST_FOR_EXCEPTION((param.isList() && (++paramList.begin() != paramList.end())), Exceptions::RuntimeError, - "MueLu::FactoryFactory: Error during the parsing of: " << std::endl << paramList << std::endl - << "'" << factoryName << "' is not a factory name but an existing instance of a factory." << std::endl - << "Extra parameters cannot be specified after the creation of the object." << std::endl << std::endl - << "Correct syntaxes includes:" << std::endl - << " " << std::endl - << "or" << std::endl - << " " << std::endl - ); - - return factoryMapIn.find(factoryName)->second; - } + // Use a user defined factories (in node) + if (factoryMapIn.find(factoryName) != factoryMapIn.end()) { + TEUCHOS_TEST_FOR_EXCEPTION((param.isList() && (++paramList.begin() != paramList.end())), Exceptions::RuntimeError, + "MueLu::FactoryFactory: Error during the parsing of: " << std::endl + << paramList << std::endl + << "'" << factoryName << "' is not a factory name but an existing instance of a factory." << std::endl + << "Extra parameters cannot be specified after the creation of the object." << std::endl + << std::endl + << "Correct syntaxes includes:" << std::endl + << " " << std::endl + << "or" << std::endl + << " " << std::endl); + + return factoryMapIn.find(factoryName)->second; + } - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory: unknown factory name : " << factoryName); + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory: unknown factory name : " << factoryName); - TEUCHOS_UNREACHABLE_RETURN(Teuchos::null); - } + TEUCHOS_UNREACHABLE_RETURN(Teuchos::null); + } - // - // - // + // + // + // - // FOLLOWING FUNCTIONS SHOULD LIVE WITH THE CORRESPONDING CLASS + // FOLLOWING FUNCTIONS SHOULD LIVE WITH THE CORRESPONDING CLASS - // - // - // + // + // + // -#define arraysize(ar) (sizeof(ar) / sizeof(ar[0])) +#define arraysize(ar) (sizeof(ar) / sizeof(ar[0])) - template // T must implement the Factory interface - RCP Build(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory = rcp(new T()); + template // T must implement the Factory interface + RCP Build(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP factory = rcp(new T()); - const char* strarray[] = {"A", "P", "R", "Graph", "UnAmalgamationInfo", "Aggregates", "Nullspace", "TransferFactory", "DofsPerNode"}; - std::vector v(strarray, strarray + arraysize(strarray)); - for (size_t i = 0; i < v.size(); ++i) - if (paramList.isParameter(v[i])) - factory->SetFactory(v[i], BuildFactory(paramList.getEntry(v[i]), factoryMapIn, factoryManagersIn)); + const char* strarray[] = {"A", "P", "R", "Graph", "UnAmalgamationInfo", "Aggregates", "Nullspace", "TransferFactory", "DofsPerNode"}; + std::vector v(strarray, strarray + arraysize(strarray)); + for (size_t i = 0; i < v.size(); ++i) + if (paramList.isParameter(v[i])) + factory->SetFactory(v[i], BuildFactory(paramList.getEntry(v[i]), factoryMapIn, factoryManagersIn)); - return factory; - } + return factory; + } - template // T must implement the Factory interface - RCP Build2(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory = rcp(new T()); + template // T must implement the Factory interface + RCP Build2(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP factory = rcp(new T()); - ParameterList paramListWithFactories; + ParameterList paramListWithFactories; - // Read the RCP parameters of the class T - RCP validParamList = factory->GetValidParameterList(); // TODO check for Teuchos::null (no parameter list validation) - TEUCHOS_TEST_FOR_EXCEPTION(validParamList == Teuchos::null, Exceptions::RuntimeError, "FactoryFactory::Build2: default parameter list is null. Please fix this."); - for (ParameterList::ConstIterator param = validParamList->begin(); param != validParamList->end(); ++param) { - const std::string& pName = validParamList->name(param); + // Read the RCP parameters of the class T + RCP validParamList = factory->GetValidParameterList(); // TODO check for Teuchos::null (no parameter list validation) + TEUCHOS_TEST_FOR_EXCEPTION(validParamList == Teuchos::null, Exceptions::RuntimeError, "FactoryFactory::Build2: default parameter list is null. Please fix this."); + for (ParameterList::ConstIterator param = validParamList->begin(); param != validParamList->end(); ++param) { + const std::string& pName = validParamList->name(param); - if (!paramList.isParameter(pName)) { - // Ignore unknown parameters - continue; - } + if (!paramList.isParameter(pName)) { + // Ignore unknown parameters + continue; + } - if (validParamList->isType< RCP >(pName)) { - // Generate or get factory described by param - RCP generatingFact = BuildFactory(paramList.getEntry(pName), factoryMapIn, factoryManagersIn); - paramListWithFactories.set(pName, generatingFact); - } else if (validParamList->isType >(pName)) { - if (pName == "ParameterList") { - // NOTE: we cannot use - // subList = sublist(rcpFromRef(paramList), pName) - // here as that would result in sublist also being a reference to a temporary object. - // The resulting dereferencing in the corresponding factory would then segfault - RCP subList = Teuchos::sublist(rcp(new ParameterList(paramList)), pName); - paramListWithFactories.set(pName, subList); - } - } else { - paramListWithFactories.setEntry(pName, paramList.getEntry(pName)); + if (validParamList->isType >(pName)) { + // Generate or get factory described by param + RCP generatingFact = BuildFactory(paramList.getEntry(pName), factoryMapIn, factoryManagersIn); + paramListWithFactories.set(pName, generatingFact); + } else if (validParamList->isType >(pName)) { + if (pName == "ParameterList") { + // NOTE: we cannot use + // subList = sublist(rcpFromRef(paramList), pName) + // here as that would result in sublist also being a reference to a temporary object. + // The resulting dereferencing in the corresponding factory would then segfault + RCP subList = Teuchos::sublist(rcp(new ParameterList(paramList)), pName); + paramListWithFactories.set(pName, subList); } + } else { + paramListWithFactories.setEntry(pName, paramList.getEntry(pName)); } - - // Configure the factory - factory->SetParameterList(paramListWithFactories); - - return factory; } - template // T must implement the Factory interface - RCP BuildRAPFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory; - if (paramList.isSublist("TransferFactories") == false) { - factory = Build2(paramList, factoryMapIn, factoryManagersIn); - - } else { - RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); - RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); - - paramListNonConst->remove("TransferFactories"); - - factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - RCP p = BuildFactory(transferFactories->entry(param), factoryMapIn, factoryManagersIn); - factory->AddTransferFactory(p); - } - } + // Configure the factory + factory->SetParameterList(paramListWithFactories); - return factory; - } + return factory; + } - template // T must implement the Factory interface - RCP BuildTogglePFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory; - if (paramList.isSublist("TransferFactories") == false) { - //TODO put in an error message: the TogglePFactory needs a TransferFactories sublist! - factory = Build2(paramList, factoryMapIn, factoryManagersIn); + template // T must implement the Factory interface + RCP BuildRAPFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP factory; + if (paramList.isSublist("TransferFactories") == false) { + factory = Build2(paramList, factoryMapIn, factoryManagersIn); - } else { - RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); - RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); - - paramListNonConst->remove("TransferFactories"); - - // build TogglePFactory - factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // count how many prolongation factories and how many coarse null space factories have been declared. - // the numbers must match! - int numProlongatorFactories = 0; - int numPtentFactories = 0; - int numCoarseNspFactories = 0; - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundNsp = transferFactories->name(param).find("Nullspace"); - if (foundNsp != std::string::npos && foundNsp == 0 && transferFactories->name(param).length()==10) { - numCoarseNspFactories++; - continue; - } - size_t foundPtent = transferFactories->name(param).find("Ptent"); - if (foundPtent != std::string::npos && foundPtent == 0 && transferFactories->name(param).length()==6) { - numPtentFactories++; - continue; - } - size_t foundP = transferFactories->name(param).find("P"); - if (foundP != std::string::npos && foundP == 0 && transferFactories->name(param).length()==2) { - numProlongatorFactories++; - continue; - } - } - TEUCHOS_TEST_FOR_EXCEPTION(numProlongatorFactories!=numCoarseNspFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The user has to provide the same number of prolongator and coarse nullspace factories!"); - TEUCHOS_TEST_FOR_EXCEPTION(numPtentFactories!=numCoarseNspFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The user has to provide the same number of ptent and coarse nullspace factories!"); - TEUCHOS_TEST_FOR_EXCEPTION(numProlongatorFactories < 2, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The TogglePFactory needs at least two different prolongation operators. The factories have to be provided using the names P%i and Nullspace %i, where %i denotes a number between 1 and 9."); - - // create empty vectors with data - std::vector prolongatorFactoryNames(numProlongatorFactories); - std::vector coarseNspFactoryNames(numProlongatorFactories); - std::vector ptentFactoryNames(numProlongatorFactories); - - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundNsp = transferFactories->name(param).find("Nullspace"); - if (foundNsp != std::string::npos && foundNsp == 0 && transferFactories->name(param).length()==10) { - int number = atoi(&(transferFactories->name(param).at(9))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numProlongatorFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format Nullspace%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); - coarseNspFactoryNames[number-1] = transferFactories->entry(param); - continue; - } - size_t foundPtent = transferFactories->name(param).find("Ptent"); - if (foundPtent != std::string::npos && foundPtent == 0 && transferFactories->name(param).length()==6) { - int number = atoi(&(transferFactories->name(param).at(5))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numPtentFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format Ptent%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); - ptentFactoryNames[number-1] = transferFactories->entry(param); - continue; - } - size_t foundP = transferFactories->name(param).find("P"); - if (foundP != std::string::npos && foundP == 0 && transferFactories->name(param).length()==2) { - int number = atoi(&(transferFactories->name(param).at(1))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numProlongatorFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format P%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); - prolongatorFactoryNames[number-1] = transferFactories->entry(param); - continue; - } - } + } else { + RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); + RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); - // register all prolongation factories in TogglePFactory - for (std::vector::const_iterator it = prolongatorFactoryNames.begin(); it != prolongatorFactoryNames.end(); ++it) { - RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddProlongatorFactory(p); - } + paramListNonConst->remove("TransferFactories"); - // register all tentative prolongation factories in TogglePFactory - for (std::vector::const_iterator it = ptentFactoryNames.begin(); it != ptentFactoryNames.end(); ++it) { - RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddPtentFactory(p); - } + factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - // register all coarse nullspace factories in TogglePFactory - for (std::vector::const_iterator it = coarseNspFactoryNames.begin(); it != coarseNspFactoryNames.end(); ++it) { - RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddCoarseNullspaceFactory(p); - } + for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { + RCP p = BuildFactory(transferFactories->entry(param), factoryMapIn, factoryManagersIn); + factory->AddTransferFactory(p); } - return factory; } - RCP BuildToggleCoordinatesTransferFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory; - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isSublist("TransferFactories") == false, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransferFactory: the ToggleCoordinatesTransferFactory needs a sublist 'TransferFactories' containing information about the subfactories for coordinate transfer!"); + return factory; + } - RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); + template // T must implement the Factory interface + RCP BuildTogglePFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP factory; + if (paramList.isSublist("TransferFactories") == false) { + //TODO put in an error message: the TogglePFactory needs a TransferFactories sublist! + factory = Build2(paramList, factoryMapIn, factoryManagersIn); + + } else { + RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); + paramListNonConst->remove("TransferFactories"); - // build CoordinatesTransferFactory - factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); + // build TogglePFactory + factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - // count how many coordinate transfer factories have been declared. + // count how many prolongation factories and how many coarse null space factories have been declared. // the numbers must match! - int numCoordTransferFactories = 0; + int numProlongatorFactories = 0; + int numPtentFactories = 0; + int numCoarseNspFactories = 0; for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundCoordinates = transferFactories->name(param).find("Coordinates"); - if (foundCoordinates != std::string::npos && foundCoordinates == 0 && transferFactories->name(param).length()==12) { - numCoordTransferFactories++; + size_t foundNsp = transferFactories->name(param).find("Nullspace"); + if (foundNsp != std::string::npos && foundNsp == 0 && transferFactories->name(param).length() == 10) { + numCoarseNspFactories++; + continue; + } + size_t foundPtent = transferFactories->name(param).find("Ptent"); + if (foundPtent != std::string::npos && foundPtent == 0 && transferFactories->name(param).length() == 6) { + numPtentFactories++; + continue; + } + size_t foundP = transferFactories->name(param).find("P"); + if (foundP != std::string::npos && foundP == 0 && transferFactories->name(param).length() == 2) { + numProlongatorFactories++; continue; } } - TEUCHOS_TEST_FOR_EXCEPTION(numCoordTransferFactories != 2, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransfer: The ToggleCoordinatesTransferFactory needs two (different) coordinate transfer factories. The factories have to be provided using the names Coordinates%i, where %i denotes a number between 1 and 9."); + TEUCHOS_TEST_FOR_EXCEPTION(numProlongatorFactories != numCoarseNspFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The user has to provide the same number of prolongator and coarse nullspace factories!"); + TEUCHOS_TEST_FOR_EXCEPTION(numPtentFactories != numCoarseNspFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The user has to provide the same number of ptent and coarse nullspace factories!"); + TEUCHOS_TEST_FOR_EXCEPTION(numProlongatorFactories < 2, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The TogglePFactory needs at least two different prolongation operators. The factories have to be provided using the names P%i and Nullspace %i, where %i denotes a number between 1 and 9."); // create empty vectors with data - std::vector coarseCoordsFactoryNames(numCoordTransferFactories); + std::vector prolongatorFactoryNames(numProlongatorFactories); + std::vector coarseNspFactoryNames(numProlongatorFactories); + std::vector ptentFactoryNames(numProlongatorFactories); for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundCoords = transferFactories->name(param).find("Coordinates"); - if (foundCoords != std::string::npos && foundCoords == 0 && transferFactories->name(param).length()==12) { - int number = atoi(&(transferFactories->name(param).at(11))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numCoordTransferFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransfer: Please use the format Coordinates%i with %i an integer between 1 and the maximum number of coordinate transfer factories in ToggleCoordinatesTransferFactory!"); - coarseCoordsFactoryNames[number-1] = transferFactories->entry(param); - continue; + size_t foundNsp = transferFactories->name(param).find("Nullspace"); + if (foundNsp != std::string::npos && foundNsp == 0 && transferFactories->name(param).length() == 10) { + int number = atoi(&(transferFactories->name(param).at(9))); + TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numProlongatorFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format Nullspace%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); + coarseNspFactoryNames[number - 1] = transferFactories->entry(param); + continue; + } + size_t foundPtent = transferFactories->name(param).find("Ptent"); + if (foundPtent != std::string::npos && foundPtent == 0 && transferFactories->name(param).length() == 6) { + int number = atoi(&(transferFactories->name(param).at(5))); + TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numPtentFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format Ptent%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); + ptentFactoryNames[number - 1] = transferFactories->entry(param); + continue; + } + size_t foundP = transferFactories->name(param).find("P"); + if (foundP != std::string::npos && foundP == 0 && transferFactories->name(param).length() == 2) { + int number = atoi(&(transferFactories->name(param).at(1))); + TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numProlongatorFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format P%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); + prolongatorFactoryNames[number - 1] = transferFactories->entry(param); + continue; } } - // register all coarse nullspace factories in TogglePFactory - for (std::vector::const_iterator it = coarseCoordsFactoryNames.begin(); it != coarseCoordsFactoryNames.end(); ++it) { + // register all prolongation factories in TogglePFactory + for (std::vector::const_iterator it = prolongatorFactoryNames.begin(); it != prolongatorFactoryNames.end(); ++it) { RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddCoordTransferFactory(p); + factory->AddProlongatorFactory(p); } - return factory; - } + // register all tentative prolongation factories in TogglePFactory + for (std::vector::const_iterator it = ptentFactoryNames.begin(); it != ptentFactoryNames.end(); ++it) { + RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); + factory->AddPtentFactory(p); + } - //! TrilinosSmoother - // Parameter List Parsing: - // - // - // - // - // - // ... - // - // - RCP BuildTrilinosSmoother(const Teuchos::ParameterList & paramList, const FactoryMap & factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - if (paramList.begin() == paramList.end()) - return rcp(new SmootherFactory(rcp(new TrilinosSmoother()))); - - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "TrilinosSmoother", Exceptions::RuntimeError, ""); - - // Is it true? TEUCHOS_TEST_FOR_EXCEPTION(!paramList.isParameter("type"), Exceptions::RuntimeError, "TrilinosSmoother: parameter 'type' is mandatory"); - // type="" is default in TrilinosSmoother, but what happen then? - - std::string type=""; if(paramList.isParameter("type")) type = paramList.get("type"); - int overlap=0; if(paramList.isParameter("overlap")) overlap = paramList.get ("overlap"); - // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); - Teuchos::ParameterList params; if(paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); - - // parameters from SmootherFactory - //bool bKeepSmootherData = false; if(paramList.isParameter("keep smoother data")) bKeepSmootherData = paramList.get("keep smoother data"); - - // Read in factory information for smoothers (if available...) - // NOTE: only a selected number of factories can be used with the Trilinos smoother - // smoothers usually work with the global data available (which is A and the transfers P and R) - - Teuchos::RCP trilSmoo = Teuchos::rcp(new TrilinosSmoother(type, params, overlap)); - - if (paramList.isParameter("LineDetection_Layers")) { - RCP generatingFact = BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, factoryManagersIn); - trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + // register all coarse nullspace factories in TogglePFactory + for (std::vector::const_iterator it = coarseNspFactoryNames.begin(); it != coarseNspFactoryNames.end(); ++it) { + RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); + factory->AddCoarseNullspaceFactory(p); } - if (paramList.isParameter("LineDetection_VertLineIds")) { - RCP generatingFact = BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, factoryManagersIn); - trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + } + return factory; + } + + RCP BuildToggleCoordinatesTransferFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP factory; + TEUCHOS_TEST_FOR_EXCEPTION(paramList.isSublist("TransferFactories") == false, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransferFactory: the ToggleCoordinatesTransferFactory needs a sublist 'TransferFactories' containing information about the subfactories for coordinate transfer!"); + + RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); + RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); + paramListNonConst->remove("TransferFactories"); + + // build CoordinatesTransferFactory + factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); + + // count how many coordinate transfer factories have been declared. + // the numbers must match! + int numCoordTransferFactories = 0; + for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { + size_t foundCoordinates = transferFactories->name(param).find("Coordinates"); + if (foundCoordinates != std::string::npos && foundCoordinates == 0 && transferFactories->name(param).length() == 12) { + numCoordTransferFactories++; + continue; } - if (paramList.isParameter("CoarseNumZLayers")) { - RCP generatingFact = BuildFactory(paramList.getEntry("CoarseNumZLayers"), factoryMapIn, factoryManagersIn); - trilSmoo->SetFactory("CoarseNumZLayers", generatingFact); + } + TEUCHOS_TEST_FOR_EXCEPTION(numCoordTransferFactories != 2, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransfer: The ToggleCoordinatesTransferFactory needs two (different) coordinate transfer factories. The factories have to be provided using the names Coordinates%i, where %i denotes a number between 1 and 9."); + + // create empty vectors with data + std::vector coarseCoordsFactoryNames(numCoordTransferFactories); + + for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { + size_t foundCoords = transferFactories->name(param).find("Coordinates"); + if (foundCoords != std::string::npos && foundCoords == 0 && transferFactories->name(param).length() == 12) { + int number = atoi(&(transferFactories->name(param).at(11))); + TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numCoordTransferFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransfer: Please use the format Coordinates%i with %i an integer between 1 and the maximum number of coordinate transfer factories in ToggleCoordinatesTransferFactory!"); + coarseCoordsFactoryNames[number - 1] = transferFactories->entry(param); + continue; } - - RCP smooFact = rcp(new SmootherFactory(Teuchos::null)); - Teuchos::ParameterList smooFactParams; - //smooFactParams.setEntry("keep smoother data", paramList.getEntry("keep smoother data")); - smooFact->SetParameterList(smooFactParams); - smooFact->SetSmootherPrototypes(trilSmoo); - return smooFact; } -#ifdef HAVE_MUELU_MATLAB - //! MatlabSmoother - // Parameter List Parsing: - // - // - // - // - // - // - // - // - // - RCP BuildMatlabSmoother(const Teuchos::ParameterList & paramList, const FactoryMap & factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - if (paramList.begin() == paramList.end()) - return rcp(new SmootherFactory(rcp(new MatlabSmoother()))); - - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "MatlabSmoother", Exceptions::RuntimeError, ""); - - // Read in factory information for smoothers (if available...) - // NOTE: only a selected number of factories can be used with the Trilinos smoother - // smoothers usually work with the global data available (which is A and the transfers P and R) - - Teuchos::RCP matSmoo = Teuchos::rcp(new MatlabSmoother(paramList)); - - return rcp(new SmootherFactory(matSmoo)); + // register all coarse nullspace factories in TogglePFactory + for (std::vector::const_iterator it = coarseCoordsFactoryNames.begin(); it != coarseCoordsFactoryNames.end(); ++it) { + RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); + factory->AddCoordTransferFactory(p); } -#endif - - RCP BuildDirectSolver(const Teuchos::ParameterList& paramList, const FactoryMap& /* factoryMapIn */, const FactoryManagerMap& /* factoryManagersIn */) const { - if (paramList.begin() == paramList.end()) - return rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null)); - - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "DirectSolver", Exceptions::RuntimeError, ""); - std::string type; if(paramList.isParameter("type")) type = paramList.get("type"); - // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); - Teuchos::ParameterList params; if(paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); - - return rcp(new SmootherFactory(rcp(new DirectSolver(type, params)), Teuchos::null)); + return factory; + } + + //! TrilinosSmoother + // Parameter List Parsing: + // + // + // + // + // + // ... + // + // + RCP BuildTrilinosSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + if (paramList.begin() == paramList.end()) + return rcp(new SmootherFactory(rcp(new TrilinosSmoother()))); + + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "TrilinosSmoother", Exceptions::RuntimeError, ""); + + // Is it true? TEUCHOS_TEST_FOR_EXCEPTION(!paramList.isParameter("type"), Exceptions::RuntimeError, "TrilinosSmoother: parameter 'type' is mandatory"); + // type="" is default in TrilinosSmoother, but what happen then? + + std::string type = ""; + if (paramList.isParameter("type")) type = paramList.get("type"); + int overlap = 0; + if (paramList.isParameter("overlap")) overlap = paramList.get("overlap"); + // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); + Teuchos::ParameterList params; + if (paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); + + // parameters from SmootherFactory + //bool bKeepSmootherData = false; if(paramList.isParameter("keep smoother data")) bKeepSmootherData = paramList.get("keep smoother data"); + + // Read in factory information for smoothers (if available...) + // NOTE: only a selected number of factories can be used with the Trilinos smoother + // smoothers usually work with the global data available (which is A and the transfers P and R) + + Teuchos::RCP trilSmoo = Teuchos::rcp(new TrilinosSmoother(type, params, overlap)); + + if (paramList.isParameter("LineDetection_Layers")) { + RCP generatingFact = BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, factoryManagersIn); + trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + } + if (paramList.isParameter("LineDetection_VertLineIds")) { + RCP generatingFact = BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, factoryManagersIn); + trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + } + if (paramList.isParameter("CoarseNumZLayers")) { + RCP generatingFact = BuildFactory(paramList.getEntry("CoarseNumZLayers"), factoryMapIn, factoryManagersIn); + trilSmoo->SetFactory("CoarseNumZLayers", generatingFact); } - template // T must implement the Factory interface - RCP BuildBlockedSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); - - // internal vector of factory managers - std::vector > facManagers; - - // loop over all "block%i" sublists in parameter list - int blockid = 1; - bool blockExists = true; - while (blockExists == true) { - std::stringstream ss; - ss << "block" << blockid; - - if(paramList.isSublist(ss.str()) == true) { - // we either have a parameter group or we have a list of factories in here - RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - - RCP M = Teuchos::null; - - if (b->isParameter("group")) { - // use a factory manager - std::string facManagerName = b->get< std::string >("group"); - TEUCHOS_TEST_FOR_EXCEPTION(factoryManagersIn.count(facManagerName) != 1, Exceptions::RuntimeError, "Factory manager has not been found. Please check the spelling of the factory managers in your xml file."); - RCP Mb = factoryManagersIn.find(facManagerName)->second; - M = Teuchos::rcp_dynamic_cast(Mb); - TEUCHOS_TEST_FOR_EXCEPTION(M==Teuchos::null, Exceptions::RuntimeError, "Failed to cast FactoryManagerBase object to FactoryManager."); - } else { - // read in the list of factories - M = rcp(new FactoryManager()); - for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { - RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); - M->SetFactory(b->name(param),p); - } - } + RCP smooFact = rcp(new SmootherFactory(Teuchos::null)); + Teuchos::ParameterList smooFactParams; + //smooFactParams.setEntry("keep smoother data", paramList.getEntry("keep smoother data")); + smooFact->SetParameterList(smooFactParams); + smooFact->SetSmootherPrototypes(trilSmoo); + return smooFact; + } - // add factory manager to internal vector of factory managers - M->SetIgnoreUserData(true); - facManagers.push_back(M); - paramListNonConst->remove(ss.str()); - blockid++; +#ifdef HAVE_MUELU_MATLAB + //! MatlabSmoother + // Parameter List Parsing: + // + // + // + // + // + // + // + // + // + RCP BuildMatlabSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + if (paramList.begin() == paramList.end()) + return rcp(new SmootherFactory(rcp(new MatlabSmoother()))); + + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "MatlabSmoother", Exceptions::RuntimeError, ""); + + // Read in factory information for smoothers (if available...) + // NOTE: only a selected number of factories can be used with the Trilinos smoother + // smoothers usually work with the global data available (which is A and the transfers P and R) + + Teuchos::RCP matSmoo = Teuchos::rcp(new MatlabSmoother(paramList)); + + return rcp(new SmootherFactory(matSmoo)); + } +#endif + + RCP BuildDirectSolver(const Teuchos::ParameterList& paramList, const FactoryMap& /* factoryMapIn */, const FactoryManagerMap& /* factoryManagersIn */) const { + if (paramList.begin() == paramList.end()) + return rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null)); + + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "DirectSolver", Exceptions::RuntimeError, ""); + + std::string type; + if (paramList.isParameter("type")) type = paramList.get("type"); + // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); + Teuchos::ParameterList params; + if (paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); + + return rcp(new SmootherFactory(rcp(new DirectSolver(type, params)), Teuchos::null)); + } + + template // T must implement the Factory interface + RCP BuildBlockedSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); + + // internal vector of factory managers + std::vector > facManagers; + + // loop over all "block%i" sublists in parameter list + int blockid = 1; + bool blockExists = true; + while (blockExists == true) { + std::stringstream ss; + ss << "block" << blockid; + + if (paramList.isSublist(ss.str()) == true) { + // we either have a parameter group or we have a list of factories in here + RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); + + RCP M = Teuchos::null; + + if (b->isParameter("group")) { + // use a factory manager + std::string facManagerName = b->get("group"); + TEUCHOS_TEST_FOR_EXCEPTION(factoryManagersIn.count(facManagerName) != 1, Exceptions::RuntimeError, "Factory manager has not been found. Please check the spelling of the factory managers in your xml file."); + RCP Mb = factoryManagersIn.find(facManagerName)->second; + M = Teuchos::rcp_dynamic_cast(Mb); + TEUCHOS_TEST_FOR_EXCEPTION(M == Teuchos::null, Exceptions::RuntimeError, "Failed to cast FactoryManagerBase object to FactoryManager."); } else { - blockExists = false; - break; + // read in the list of factories + M = rcp(new FactoryManager()); + for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { + RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); + M->SetFactory(b->name(param), p); + } } + // add factory manager to internal vector of factory managers + M->SetIgnoreUserData(true); + facManagers.push_back(M); + paramListNonConst->remove(ss.str()); + blockid++; + } else { + blockExists = false; + break; } + } - // create a new blocked smoother - RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // important: set block factory for A here! - // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator here. - // The user might want to overwrite this in the xml file, so just - // use what is declared as "A" - //bs->SetFactory("A", MueLu::NoFactory::getRCP()); + // create a new blocked smoother + RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - for (int i = 0; i(facManagers.size()); i++) { - bs->AddFactoryManager(facManagers[i],i); - } + // important: set block factory for A here! + // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator here. + // The user might want to overwrite this in the xml file, so just + // use what is declared as "A" + //bs->SetFactory("A", MueLu::NoFactory::getRCP()); - return rcp(new SmootherFactory(bs)); + for (int i = 0; i < Teuchos::as(facManagers.size()); i++) { + bs->AddFactoryManager(facManagers[i], i); } + return rcp(new SmootherFactory(bs)); + } + #ifdef HAVE_MUELU_TEKO - RCP BuildTekoSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); - RCP tekoParams = rcp(new ParameterList(paramListNonConst->sublist("Inverse Factory Library"))); - paramListNonConst->remove("Inverse Factory Library"); + RCP BuildTekoSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); + RCP tekoParams = rcp(new ParameterList(paramListNonConst->sublist("Inverse Factory Library"))); + paramListNonConst->remove("Inverse Factory Library"); + + // create a new blocked smoother + RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); + + // important: set block factory for A here! + // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator here. + // The user might want to overwrite this in the xml file, so just + // use what is declared as "A" + //bs->SetFactory("A", MueLu::NoFactory::getRCP()); + + // Set Teko parameters ("Inverse Factory Library") + bs->SetTekoParameters(tekoParams); + + return rcp(new SmootherFactory(bs)); + } +#endif - // create a new blocked smoother - RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); + RCP BuildBlockedDirectSolver(const Teuchos::ParameterList& paramList, const FactoryMap& /* factoryMapIn */, const FactoryManagerMap& /* factoryManagersIn */) const { + if (paramList.numParams() == 0) + return rcp(new SmootherFactory(rcp(new BlockedDirectSolver()))); - // important: set block factory for A here! - // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator here. - // The user might want to overwrite this in the xml file, so just - // use what is declared as "A" - //bs->SetFactory("A", MueLu::NoFactory::getRCP()); + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "BlockedDirectSolver", Exceptions::RuntimeError, "FactoryFactory::BuildBlockedDirectSolver: Generating factory needs to be a BlockedDirectSolver."); - // Set Teko parameters ("Inverse Factory Library") - bs->SetTekoParameters(tekoParams); + std::string type; + if (paramList.isParameter("type")) type = paramList.get("type"); + // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); + Teuchos::ParameterList params; + if (paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); - return rcp(new SmootherFactory(bs)); - } -#endif + return rcp(new SmootherFactory(rcp(new BlockedDirectSolver(type, params)))); + } - RCP BuildBlockedDirectSolver(const Teuchos::ParameterList& paramList, const FactoryMap& /* factoryMapIn */, const FactoryManagerMap& /* factoryManagersIn */) const { - if (paramList.numParams() == 0) - return rcp(new SmootherFactory(rcp(new BlockedDirectSolver()))); + //RCP BuildBlockedPFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + // RCP pfac = rcp(new BlockedPFactory()); - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "BlockedDirectSolver", Exceptions::RuntimeError, "FactoryFactory::BuildBlockedDirectSolver: Generating factory needs to be a BlockedDirectSolver."); + template // T must implement the Factory interface + RCP BuildBlockedFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP pfac = Teuchos::null; - std::string type; if(paramList.isParameter("type")) type = paramList.get("type"); - // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); - Teuchos::ParameterList params; if(paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); - return rcp(new SmootherFactory(rcp(new BlockedDirectSolver(type, params)))); - } + // internal vector of factory managers + std::vector > facManagers; - //RCP BuildBlockedPFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // RCP pfac = rcp(new BlockedPFactory()); - - template // T must implement the Factory interface - RCP BuildBlockedFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP pfac = Teuchos::null; - - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); - - // internal vector of factory managers - std::vector > facManagers; - - // loop over all "block%i" sublists in parameter list - int blockid = 1; - bool blockExists = true; - while (blockExists == true) { - std::stringstream ss; - ss << "block" << blockid; - - if(paramList.isSublist(ss.str()) == true) { - // we either have a parameter group or we have a list of factories in here - RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - - RCP M = Teuchos::null; - - if (b->isParameter("group")) { - // use a factory manager - std::string facManagerName = b->get< std::string >("group"); - TEUCHOS_TEST_FOR_EXCEPTION(factoryManagersIn.count(facManagerName) != 1, Exceptions::RuntimeError, "Factory manager has not been found. Please check the spelling of the factory managers in your xml file."); - RCP Mb = factoryManagersIn.find(facManagerName)->second; - M = Teuchos::rcp_dynamic_cast(Mb); - TEUCHOS_TEST_FOR_EXCEPTION(M==Teuchos::null, Exceptions::RuntimeError, "Failed to cast FactoryManagerBase object to FactoryManager."); - } else { - // read in the list of factories - M = rcp(new FactoryManager()); - for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { - RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); - M->SetFactory(b->name(param),p); - } - } + // loop over all "block%i" sublists in parameter list + int blockid = 1; + bool blockExists = true; + while (blockExists == true) { + std::stringstream ss; + ss << "block" << blockid; + + if (paramList.isSublist(ss.str()) == true) { + // we either have a parameter group or we have a list of factories in here + RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - // add factory manager to internal vector of factory managers - M->SetIgnoreUserData(true); - facManagers.push_back(M); - paramListNonConst->remove(ss.str()); - blockid++; + RCP M = Teuchos::null; + + if (b->isParameter("group")) { + // use a factory manager + std::string facManagerName = b->get("group"); + TEUCHOS_TEST_FOR_EXCEPTION(factoryManagersIn.count(facManagerName) != 1, Exceptions::RuntimeError, "Factory manager has not been found. Please check the spelling of the factory managers in your xml file."); + RCP Mb = factoryManagersIn.find(facManagerName)->second; + M = Teuchos::rcp_dynamic_cast(Mb); + TEUCHOS_TEST_FOR_EXCEPTION(M == Teuchos::null, Exceptions::RuntimeError, "Failed to cast FactoryManagerBase object to FactoryManager."); } else { - blockExists = false; - break; + // read in the list of factories + M = rcp(new FactoryManager()); + for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { + RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); + M->SetFactory(b->name(param), p); + } } + // add factory manager to internal vector of factory managers + M->SetIgnoreUserData(true); + facManagers.push_back(M); + paramListNonConst->remove(ss.str()); + blockid++; + } else { + blockExists = false; + break; } + } - // build BlockedPFactory (without sub block information) - pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // add FactoryManager objects - for(size_t i = 0; iAddFactoryManager(facManagers[i]); // add factory manager - } + // build BlockedPFactory (without sub block information) + pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - return pfac; + // add FactoryManager objects + for (size_t i = 0; i < facManagers.size(); i++) { + pfac->AddFactoryManager(facManagers[i]); // add factory manager } + return pfac; + } - template // T must implement the Factory interface - RCP BuildBlockedCoordFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP pfac = Teuchos::null; - - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); + template // T must implement the Factory interface + RCP BuildBlockedCoordFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { + RCP pfac = Teuchos::null; - // internal vector of factory managers - std::vector > facBase; + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); - // loop over all "block%i" sublists in parameter list - int blockid = 1; - bool blockExists = true; - while (blockExists == true) { - std::stringstream ss; - ss << "block" << blockid; + // internal vector of factory managers + std::vector > facBase; - if(paramList.isSublist(ss.str()) == true) { - // we either have a parameter group or we have a list of factories in here - RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); + // loop over all "block%i" sublists in parameter list + int blockid = 1; + bool blockExists = true; + while (blockExists == true) { + std::stringstream ss; + ss << "block" << blockid; - // read in the list of factories - for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { - RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); - facBase.push_back(p); - } + if (paramList.isSublist(ss.str()) == true) { + // we either have a parameter group or we have a list of factories in here + RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - // add factory manager to internal vector of factory managers - paramListNonConst->remove(ss.str()); - blockid++; - } else { - blockExists = false; - break; + // read in the list of factories + for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { + RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); + facBase.push_back(p); } + // add factory manager to internal vector of factory managers + paramListNonConst->remove(ss.str()); + blockid++; + } else { + blockExists = false; + break; } + } - // build BlockedPFactory (without sub block information) - pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // add FactoryManager objects - for(size_t i = 0; iAddFactory(facBase[i]); // add factory manager - } + // build BlockedPFactory (without sub block information) + pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - return pfac; + // add FactoryManager objects + for (size_t i = 0; i < facBase.size(); i++) { + pfac->AddFactory(facBase[i]); // add factory manager } - }; // class -} // namespace MueLu + return pfac; + } + +}; // class +} // namespace MueLu #define MUELU_FACTORYFACTORY_SHORT -#endif // MUELU_FACTORYFACTORY_DECL_HPP +#endif // MUELU_FACTORYFACTORY_DECL_HPP - // TODO: handle factory parameters - // TODO: parameter validator - // TODO: static - // TODO: default parameters should not be duplicated here and on the Factory (ex: default for overlap (=0) is defined both here and on TrilinosSmoother constructors) +// TODO: handle factory parameters +// TODO: parameter validator +// TODO: static +// TODO: default parameters should not be duplicated here and on the Factory (ex: default for overlap (=0) is defined both here and on TrilinosSmoother constructors) diff --git a/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp b/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp index 03e2596daf12..b04d1c967246 100644 --- a/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp +++ b/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp @@ -55,42 +55,42 @@ namespace MueLu { - //! - template - class HierarchyFactory : public BaseClass { +//! +template +class HierarchyFactory : public BaseClass { #undef MUELU_HIERARCHYFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //@{ Constructors/Destructors. + public: + //@{ Constructors/Destructors. - //! Destructor. - virtual ~HierarchyFactory() { } + //! Destructor. + virtual ~HierarchyFactory() {} - //@} + //@} - //! Create an empty Hierarchy object - // Note: This function is not very useful at the moment as MueLu only have on Hierarchy class. - // In the future, we might have an abstract Hierarchy class and several derived Hierarchy classes. - // Using this function will then be the recommended way to generate a Hierarchy. - // - // This method is called Create() instead of Build(), because it return an non-initialized - // object (ie: MG setup is not done). - // Build() function in MueLu returns initialized objects. - virtual RCP CreateHierarchy() const = 0; + //! Create an empty Hierarchy object + // Note: This function is not very useful at the moment as MueLu only have on Hierarchy class. + // In the future, we might have an abstract Hierarchy class and several derived Hierarchy classes. + // Using this function will then be the recommended way to generate a Hierarchy. + // + // This method is called Create() instead of Build(), because it return an non-initialized + // object (ie: MG setup is not done). + // Build() function in MueLu returns initialized objects. + virtual RCP CreateHierarchy() const = 0; - //! Create a labeled empty Hierarchy object - virtual RCP CreateHierarchy(const std::string& label) const = 0; + //! Create a labeled empty Hierarchy object + virtual RCP CreateHierarchy(const std::string& label) const = 0; - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy & H) const = 0; + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy& H) const = 0; - }; // class HierarchyFactoryBase +}; // class HierarchyFactoryBase -} // namespace MueLu +} // namespace MueLu #define MUELU_HIERARCHYFACTORY_SHORT -#endif //ifndef MUELU_HIERARCHYFACTORY_HPP +#endif //ifndef MUELU_HIERARCHYFACTORY_HPP diff --git a/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp b/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp index 04f89f47e6ae..6abe8fbb03a5 100644 --- a/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp +++ b/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp @@ -70,471 +70,456 @@ namespace MueLu { - // This class stores the configuration of a Hierarchy. - // The class also provides an algorithm to build a Hierarchy from the configuration. - // - // See also: FactoryManager - // - template - class HierarchyManager : public HierarchyFactory { +// This class stores the configuration of a Hierarchy. +// The class also provides an algorithm to build a Hierarchy from the configuration. +// +// See also: FactoryManager +// +template +class HierarchyManager : public HierarchyFactory { #undef MUELU_HIERARCHYMANAGER_SHORT #include "MueLu_UseShortNames.hpp" - typedef std::pair keep_pair; - - public: - - //! Constructor - HierarchyManager(int numDesiredLevel = MasterList::getDefault("max levels")) : - numDesiredLevel_(numDesiredLevel), - maxCoarseSize_(MasterList::getDefault("coarse: max size")), - verbosity_(Medium), - doPRrebalance_(MasterList::getDefault("repartition: rebalance P and R")), - doPRViaCopyrebalance_(MasterList::getDefault("repartition: explicit via new copy rebalance P and R")), - implicitTranspose_(MasterList::getDefault("transpose: use implicit")), - fuseProlongationAndUpdate_(MasterList::getDefault("fuse prolongation and update")), - suppressNullspaceDimensionCheck_(MasterList::getDefault("nullspace: suppress dimension check")), - sizeOfMultiVectors_(MasterList::getDefault("number of vectors")), - graphOutputLevel_(-2) { } - - //! Destructor - virtual ~HierarchyManager() = default; - - //! - void AddFactoryManager(int startLevel, int numDesiredLevel, RCP manager) { - const int lastLevel = startLevel + numDesiredLevel - 1; - if (levelManagers_.size() < lastLevel + 1) - levelManagers_.resize(lastLevel + 1); - - for (int iLevel = startLevel; iLevel <= lastLevel; iLevel++) - levelManagers_[iLevel] = manager; - } - - //! - RCP GetFactoryManager(int levelID) const { - // NOTE: last levelManager is used for all the remaining levels - return (levelID >= levelManagers_.size() ? levelManagers_[levelManagers_.size()-1] : levelManagers_[levelID]); - } - - //! returns number of factory managers stored in levelManagers_ vector. - size_t getNumFactoryManagers() const { - return levelManagers_.size(); - } - - //! - void CheckConfig() { - for (int i = 0; i < levelManagers_.size(); i++) - TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_[i] == Teuchos::null, Exceptions::RuntimeError, "MueLu:HierarchyConfig::CheckConfig(): Undefined configuration for level:"); - } - - //@{ - - virtual RCP CreateHierarchy() const { - return rcp(new Hierarchy()); - } - - virtual RCP CreateHierarchy(const std::string& label) const { - return rcp(new Hierarchy(label)); - } - - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy& H) const { - TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), Exceptions::RuntimeError, "No fine level operator"); - - RCP l0 = H.GetLevel(0); - RCP Op = l0->Get>("A"); - - // Compare nullspace dimension to NumPDEs and throw/warn based on user input - if (l0->IsAvailable("Nullspace")) { - RCP A = Teuchos::rcp_dynamic_cast(Op); - if (A != Teuchos::null) { - RCP nullspace = l0->Get>("Nullspace"); - - if (static_cast(A->GetFixedBlockSize()) > nullspace->getNumVectors()) - { - std::stringstream msg; - msg << "User-provided nullspace has fewer vectors (" - << nullspace->getNumVectors() << ") than number of PDE equations (" - << A->GetFixedBlockSize() << "). "; - - if (suppressNullspaceDimensionCheck_) - { - msg << "It depends on the PDE, if this is a problem or not."; - this->GetOStream(Warnings0) << msg.str() << std::endl; - } - else - { - msg << "Add the missing nullspace vectors! (You can suppress this check. See the MueLu user guide for details.)"; - TEUCHOS_TEST_FOR_EXCEPTION(static_cast(A->GetFixedBlockSize()) > nullspace->getNumVectors(), Exceptions::RuntimeError, msg.str()); - } + typedef std::pair keep_pair; + + public: + //! Constructor + HierarchyManager(int numDesiredLevel = MasterList::getDefault("max levels")) + : numDesiredLevel_(numDesiredLevel) + , maxCoarseSize_(MasterList::getDefault("coarse: max size")) + , verbosity_(Medium) + , doPRrebalance_(MasterList::getDefault("repartition: rebalance P and R")) + , doPRViaCopyrebalance_(MasterList::getDefault("repartition: explicit via new copy rebalance P and R")) + , implicitTranspose_(MasterList::getDefault("transpose: use implicit")) + , fuseProlongationAndUpdate_(MasterList::getDefault("fuse prolongation and update")) + , suppressNullspaceDimensionCheck_(MasterList::getDefault("nullspace: suppress dimension check")) + , sizeOfMultiVectors_(MasterList::getDefault("number of vectors")) + , graphOutputLevel_(-2) {} + + //! Destructor + virtual ~HierarchyManager() = default; + + //! + void AddFactoryManager(int startLevel, int numDesiredLevel, RCP manager) { + const int lastLevel = startLevel + numDesiredLevel - 1; + if (levelManagers_.size() < lastLevel + 1) + levelManagers_.resize(lastLevel + 1); + + for (int iLevel = startLevel; iLevel <= lastLevel; iLevel++) + levelManagers_[iLevel] = manager; + } + + //! + RCP GetFactoryManager(int levelID) const { + // NOTE: last levelManager is used for all the remaining levels + return (levelID >= levelManagers_.size() ? levelManagers_[levelManagers_.size() - 1] : levelManagers_[levelID]); + } + + //! returns number of factory managers stored in levelManagers_ vector. + size_t getNumFactoryManagers() const { + return levelManagers_.size(); + } + + //! + void CheckConfig() { + for (int i = 0; i < levelManagers_.size(); i++) + TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_[i] == Teuchos::null, Exceptions::RuntimeError, "MueLu:HierarchyConfig::CheckConfig(): Undefined configuration for level:"); + } + + //@{ + + virtual RCP CreateHierarchy() const { + return rcp(new Hierarchy()); + } + + virtual RCP CreateHierarchy(const std::string& label) const { + return rcp(new Hierarchy(label)); + } + + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy& H) const { + TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), Exceptions::RuntimeError, "No fine level operator"); + + RCP l0 = H.GetLevel(0); + RCP Op = l0->Get>("A"); + + // Compare nullspace dimension to NumPDEs and throw/warn based on user input + if (l0->IsAvailable("Nullspace")) { + RCP A = Teuchos::rcp_dynamic_cast(Op); + if (A != Teuchos::null) { + RCP nullspace = l0->Get>("Nullspace"); + + if (static_cast(A->GetFixedBlockSize()) > nullspace->getNumVectors()) { + std::stringstream msg; + msg << "User-provided nullspace has fewer vectors (" + << nullspace->getNumVectors() << ") than number of PDE equations (" + << A->GetFixedBlockSize() << "). "; + + if (suppressNullspaceDimensionCheck_) { + msg << "It depends on the PDE, if this is a problem or not."; + this->GetOStream(Warnings0) << msg.str() << std::endl; + } else { + msg << "Add the missing nullspace vectors! (You can suppress this check. See the MueLu user guide for details.)"; + TEUCHOS_TEST_FOR_EXCEPTION(static_cast(A->GetFixedBlockSize()) > nullspace->getNumVectors(), Exceptions::RuntimeError, msg.str()); } - } else { - this->GetOStream(Warnings0) << "Skipping dimension check of user-supplied nullspace because user-supplied operator is not a matrix" << std::endl; } + } else { + this->GetOStream(Warnings0) << "Skipping dimension check of user-supplied nullspace because user-supplied operator is not a matrix" << std::endl; } + } #ifdef HAVE_MUELU_DEBUG - // Reset factories' data used for debugging - for (int i = 0; i < levelManagers_.size(); i++) - levelManagers_[i]->ResetDebugData(); + // Reset factories' data used for debugging + for (int i = 0; i < levelManagers_.size(); i++) + levelManagers_[i]->ResetDebugData(); #endif - // Setup Matrix - // TODO: I should certainly undo this somewhere... + // Setup Matrix + // TODO: I should certainly undo this somewhere... - Xpetra::UnderlyingLib lib = Op->getDomainMap()->lib(); - H.setlib(lib); + Xpetra::UnderlyingLib lib = Op->getDomainMap()->lib(); + H.setlib(lib); - SetupOperator(*Op); - SetupExtra(H); + SetupOperator(*Op); + SetupExtra(H); - // Setup Hierarchy - H.SetMaxCoarseSize(maxCoarseSize_); - VerboseObject::SetDefaultVerbLevel(verbosity_); - if (graphOutputLevel_ >= 0 || graphOutputLevel_ == -1) - H.EnableGraphDumping("dep_graph", graphOutputLevel_); + // Setup Hierarchy + H.SetMaxCoarseSize(maxCoarseSize_); + VerboseObject::SetDefaultVerbLevel(verbosity_); + if (graphOutputLevel_ >= 0 || graphOutputLevel_ == -1) + H.EnableGraphDumping("dep_graph", graphOutputLevel_); - if (VerboseObject::IsPrint(Statistics2)) { - RCP Amat = rcp_dynamic_cast(Op); + if (VerboseObject::IsPrint(Statistics2)) { + RCP Amat = rcp_dynamic_cast(Op); - if (!Amat.is_null()) { - RCP params = rcp(new ParameterList()); - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); + if (!Amat.is_null()) { + RCP params = rcp(new ParameterList()); + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); - VerboseObject::GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); - } else { - VerboseObject::GetOStream(Warnings1) << "Fine level operator is not a matrix, statistics are not available" << std::endl; - } + VerboseObject::GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); + } else { + VerboseObject::GetOStream(Warnings1) << "Fine level operator is not a matrix, statistics are not available" << std::endl; } + } - H.SetPRrebalance(doPRrebalance_); - H.SetPRViaCopyrebalance(doPRViaCopyrebalance_); - H.SetImplicitTranspose(implicitTranspose_); - H.SetFuseProlongationAndUpdate(fuseProlongationAndUpdate_); - - H.Clear(); - - // There are few issues with using Keep in the interpreter: - // 1. Hierarchy::Keep interface takes a name and a factory. If - // factories are different on different levels, the AddNewLevel() call - // in Hierarchy does not work properly, as it assume that factories are - // the same. - // 2. FactoryManager does not have a Keep option, only Hierarchy and - // Level have it - // 3. Interpreter constructs factory managers, but not levels. So we - // cannot set up Keep flags there. - // - // The solution implemented here does the following: - // 1. Construct hierarchy with dummy levels. This avoids - // Hierarchy::AddNewLevel() calls which will propagate wrong - // inheritance. - // 2. Interpreter constructs keep_ array with names and factories for - // that level - // 3. For each level, we call Keep(name, factory) for each keep_ - for (int i = 0; i < numDesiredLevel_; i++) { - std::map >::const_iterator it = keep_.find(i); - if (it != keep_.end()) { - RCP l = H.GetLevel(i); - const std::vector& keeps = it->second; - for (size_t j = 0; j < keeps.size(); j++) - l->Keep(keeps[j].first, keeps[j].second); - } - if (i < numDesiredLevel_-1) { - RCP newLevel = rcp(new Level()); - H.AddLevel(newLevel); - } + H.SetPRrebalance(doPRrebalance_); + H.SetPRViaCopyrebalance(doPRViaCopyrebalance_); + H.SetImplicitTranspose(implicitTranspose_); + H.SetFuseProlongationAndUpdate(fuseProlongationAndUpdate_); + + H.Clear(); + + // There are few issues with using Keep in the interpreter: + // 1. Hierarchy::Keep interface takes a name and a factory. If + // factories are different on different levels, the AddNewLevel() call + // in Hierarchy does not work properly, as it assume that factories are + // the same. + // 2. FactoryManager does not have a Keep option, only Hierarchy and + // Level have it + // 3. Interpreter constructs factory managers, but not levels. So we + // cannot set up Keep flags there. + // + // The solution implemented here does the following: + // 1. Construct hierarchy with dummy levels. This avoids + // Hierarchy::AddNewLevel() calls which will propagate wrong + // inheritance. + // 2. Interpreter constructs keep_ array with names and factories for + // that level + // 3. For each level, we call Keep(name, factory) for each keep_ + for (int i = 0; i < numDesiredLevel_; i++) { + std::map>::const_iterator it = keep_.find(i); + if (it != keep_.end()) { + RCP l = H.GetLevel(i); + const std::vector& keeps = it->second; + for (size_t j = 0; j < keeps.size(); j++) + l->Keep(keeps[j].first, keeps[j].second); } + if (i < numDesiredLevel_ - 1) { + RCP newLevel = rcp(new Level()); + H.AddLevel(newLevel); + } + } - // Matrices to print - for(auto iter=matricesToPrint_.begin(); iter!=matricesToPrint_.end(); iter++) - ExportDataSetKeepFlags(H,iter->second,iter->first); + // Matrices to print + for (auto iter = matricesToPrint_.begin(); iter != matricesToPrint_.end(); iter++) + ExportDataSetKeepFlags(H, iter->second, iter->first); - // Vectors, aggregates and other things that need special case handling - ExportDataSetKeepFlags(H, nullspaceToPrint_, "Nullspace"); - ExportDataSetKeepFlags(H, coordinatesToPrint_, "Coordinates"); - // NOTE: Aggregates use the next level's Factory - ExportDataSetKeepFlagsNextLevel(H, aggregatesToPrint_, "Aggregates"); + // Vectors, aggregates and other things that need special case handling + ExportDataSetKeepFlags(H, nullspaceToPrint_, "Nullspace"); + ExportDataSetKeepFlags(H, coordinatesToPrint_, "Coordinates"); + // NOTE: Aggregates use the next level's Factory + ExportDataSetKeepFlagsNextLevel(H, aggregatesToPrint_, "Aggregates"); #ifdef HAVE_MUELU_INTREPID2 - ExportDataSetKeepFlags(H,elementToNodeMapsToPrint_, "pcoarsen: element to node map"); + ExportDataSetKeepFlags(H, elementToNodeMapsToPrint_, "pcoarsen: element to node map"); #endif - // Data to save only (these do not have a level, so we do all levels) - for(int i=0; iprint(H.GetOStream(Developer), verbosity_); - - isLastLevel = r || (levelID == lastLevelID); - levelID++; - } - if (!matvecParams_.is_null()) - H.SetMatvecParams(matvecParams_); - H.AllocateLevelMultiVectors(sizeOfMultiVectors_); - // Set hierarchy description. - // This is cached, but involves and MPI_Allreduce. - H.description(); - H.describe(H.GetOStream(Runtime0), verbosity_); - - // When we reuse hierarchy, it is necessary that we don't - // change the number of levels. We also cannot make requests - // for coarser levels, because we don't construct all the - // data on previous levels. For instance, let's say our first - // run constructed three levels. If we try to do requests during - // next setup for the fourth level, it would need Aggregates - // which we didn't construct for level 3 because we reused P. - // To fix this situation, we change the number of desired levels - // here. - numDesiredLevel_ = levelID; - - // Matrix prints - for(auto iter = matricesToPrint_.begin(); iter != matricesToPrint_.end(); iter++) { - WriteData(H,iter->second,iter->first); - } + int levelID = 0; + int lastLevelID = numDesiredLevel_ - 1; + bool isLastLevel = false; - // Vectors, aggregates and all things we need to print manually - WriteData(H, nullspaceToPrint_, "Nullspace"); - WriteData(H, coordinatesToPrint_, "Coordinates"); - WriteDataAggregates(H, aggregatesToPrint_, "Aggregates"); + while (!isLastLevel) { + bool r = H.Setup(levelID, + LvlMngr(levelID - 1, lastLevelID), + LvlMngr(levelID, lastLevelID), + LvlMngr(levelID + 1, lastLevelID)); + if (levelID < H.GetNumLevels()) + H.GetLevel(levelID)->print(H.GetOStream(Developer), verbosity_); + isLastLevel = r || (levelID == lastLevelID); + levelID++; + } + if (!matvecParams_.is_null()) + H.SetMatvecParams(matvecParams_); + H.AllocateLevelMultiVectors(sizeOfMultiVectors_); + // Set hierarchy description. + // This is cached, but involves and MPI_Allreduce. + H.description(); + H.describe(H.GetOStream(Runtime0), verbosity_); + + // When we reuse hierarchy, it is necessary that we don't + // change the number of levels. We also cannot make requests + // for coarser levels, because we don't construct all the + // data on previous levels. For instance, let's say our first + // run constructed three levels. If we try to do requests during + // next setup for the fourth level, it would need Aggregates + // which we didn't construct for level 3 because we reused P. + // To fix this situation, we change the number of desired levels + // here. + numDesiredLevel_ = levelID; + + // Matrix prints + for (auto iter = matricesToPrint_.begin(); iter != matricesToPrint_.end(); iter++) { + WriteData(H, iter->second, iter->first); + } + // Vectors, aggregates and all things we need to print manually + WriteData(H, nullspaceToPrint_, "Nullspace"); + WriteData(H, coordinatesToPrint_, "Coordinates"); + WriteDataAggregates(H, aggregatesToPrint_, "Aggregates"); #ifdef HAVE_MUELU_INTREPID2 - typedef Kokkos::DynRankView FCi; - WriteDataFC(H,elementToNodeMapsToPrint_, "pcoarsen: element to node map","el2node"); + typedef Kokkos::DynRankView FCi; + WriteDataFC(H, elementToNodeMapsToPrint_, "pcoarsen: element to node map", "el2node"); #endif + } //SetupHierarchy - } //SetupHierarchy - - //@} - - typedef std::map > FactoryMap; + //@} - protected: //TODO: access function + typedef std::map> FactoryMap; - //! Setup Matrix object - virtual void SetupOperator(Operator& /* Op */) const { } + protected: //TODO: access function + //! Setup Matrix object + virtual void SetupOperator(Operator& /* Op */) const {} - //! Setup extra data - // TODO: merge with SetupMatrix ? - virtual void SetupExtra(Hierarchy& /* H */) const { } + //! Setup extra data + // TODO: merge with SetupMatrix ? + virtual void SetupExtra(Hierarchy& /* H */) const {} - // TODO this was private - // Used in SetupHierarchy() to access levelManagers_ - // Inputs i=-1 and i=size() are allowed to simplify calls to hierarchy->Setup() - Teuchos::RCP LvlMngr(int levelID, int lastLevelID) const { - // NOTE: the order of 'if' statements is important - if (levelID == -1) // levelID = -1 corresponds to the finest level - return Teuchos::null; + // TODO this was private + // Used in SetupHierarchy() to access levelManagers_ + // Inputs i=-1 and i=size() are allowed to simplify calls to hierarchy->Setup() + Teuchos::RCP LvlMngr(int levelID, int lastLevelID) const { + // NOTE: the order of 'if' statements is important + if (levelID == -1) // levelID = -1 corresponds to the finest level + return Teuchos::null; - if (levelID == lastLevelID+1) // levelID = 'lastLevelID+1' corresponds to the last level (i.e., no nextLevel) - return Teuchos::null; + if (levelID == lastLevelID + 1) // levelID = 'lastLevelID+1' corresponds to the last level (i.e., no nextLevel) + return Teuchos::null; - if (levelManagers_.size() == 0) { // default factory manager. - // The default manager is shared across levels, initialized only if needed and deleted with the HierarchyManager - static RCP defaultMngr = rcp(new FactoryManager()); - return defaultMngr; - } - - return GetFactoryManager(levelID); + if (levelManagers_.size() == 0) { // default factory manager. + // The default manager is shared across levels, initialized only if needed and deleted with the HierarchyManager + static RCP defaultMngr = rcp(new FactoryManager()); + return defaultMngr; } - //! @group Hierarchy parameters - //! @{ + return GetFactoryManager(levelID); + } + + //! @group Hierarchy parameters + //! @{ - mutable int numDesiredLevel_; - Xpetra::global_size_t maxCoarseSize_; - MsgType verbosity_; + mutable int numDesiredLevel_; + Xpetra::global_size_t maxCoarseSize_; + MsgType verbosity_; - bool doPRrebalance_; - bool doPRViaCopyrebalance_; - bool implicitTranspose_; - bool fuseProlongationAndUpdate_; + bool doPRrebalance_; + bool doPRViaCopyrebalance_; + bool implicitTranspose_; + bool fuseProlongationAndUpdate_; - /*! @brief Flag to indicate whether the check of the nullspace dimension is suppressed + /*! @brief Flag to indicate whether the check of the nullspace dimension is suppressed By default, we do not suppress such a check, as it acts as a safety mechanism. Yet, certain scenarios deliberately use nullspaces with less nullspace vectors than NumPDEs. Therefore, the user can suppress this check. Then, the error message is converted to a warning. */ - bool suppressNullspaceDimensionCheck_; + bool suppressNullspaceDimensionCheck_; - int sizeOfMultiVectors_; + int sizeOfMultiVectors_; - //! -2 = no output, -1 = all levels - int graphOutputLevel_; + //! -2 = no output, -1 = all levels + int graphOutputLevel_; - //! Lists of entities to be exported (or saved) - // Items here get handled manually - Teuchos::Array nullspaceToPrint_; - Teuchos::Array coordinatesToPrint_; - Teuchos::Array aggregatesToPrint_; - Teuchos::Array elementToNodeMapsToPrint_; + //! Lists of entities to be exported (or saved) + // Items here get handled manually + Teuchos::Array nullspaceToPrint_; + Teuchos::Array coordinatesToPrint_; + Teuchos::Array aggregatesToPrint_; + Teuchos::Array elementToNodeMapsToPrint_; - // Data we'll need to save, not necessarily print - Teuchos::Array dataToSave_; + // Data we'll need to save, not necessarily print + Teuchos::Array dataToSave_; - // Matrices we'll need to print - std::map > matricesToPrint_; + // Matrices we'll need to print + std::map> matricesToPrint_; - Teuchos::RCP matvecParams_; + Teuchos::RCP matvecParams_; - std::map > keep_; - //! @} + std::map> keep_; + //! @} - private: - // Set the keep flags for Export Data - void ExportDataSetKeepFlags(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - if(!L.is_null() && data[i] < levelManagers_.size()) - L->AddKeepFlag(name, &*levelManagers_[data[i]]->GetFactory(name)); - } + private: + // Set the keep flags for Export Data + void ExportDataSetKeepFlags(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { + for (int i = 0; i < data.size(); ++i) { + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + if (!L.is_null() && data[i] < levelManagers_.size()) + L->AddKeepFlag(name, &*levelManagers_[data[i]]->GetFactory(name)); } } - - void ExportDataSetKeepFlagsNextLevel(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - if(!L.is_null() && data[i]+1 < levelManagers_.size()) - L->AddKeepFlag(name, &*levelManagers_[data[i]+1]->GetFactory(name)); - } + } + + void ExportDataSetKeepFlagsNextLevel(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { + for (int i = 0; i < data.size(); ++i) { + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + if (!L.is_null() && data[i] + 1 < levelManagers_.size()) + L->AddKeepFlag(name, &*levelManagers_[data[i] + 1]->GetFactory(name)); } } - - // Set the keep flags for Export Data - void ExportDataSetKeepFlagsAll(Hierarchy& H, const std::string& name) const { - for (int i=0; i < H.GetNumLevels(); i++ ) { - RCP L = H.GetLevel(i); - if(!L.is_null() && i < levelManagers_.size()) - L->AddKeepFlag(name, &*levelManagers_[i]->GetFactory(name)); - } + } + + // Set the keep flags for Export Data + void ExportDataSetKeepFlagsAll(Hierarchy& H, const std::string& name) const { + for (int i = 0; i < H.GetNumLevels(); i++) { + RCP L = H.GetLevel(i); + if (!L.is_null() && i < levelManagers_.size()) + L->AddKeepFlag(name, &*levelManagers_[i]->GetFactory(name)); } - - - template - void WriteData(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - std::string fileName; - if (H.getObjectLabel() != "") - fileName = H.getObjectLabel() + "_" + name + "_" + Teuchos::toString(data[i]) + ".m"; - else - fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; - - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - if (data[i] < levelManagers_.size() && L->IsAvailable(name,&*levelManagers_[data[i]]->GetFactory(name))) { - // Try generating factory - RCP M = L->template Get< RCP >(name,&*levelManagers_[data[i]]->GetFactory(name)); - if (!M.is_null()) { - Xpetra::IO::Write(fileName,* M); - } + } + + template + void WriteData(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { + for (int i = 0; i < data.size(); ++i) { + std::string fileName; + if (H.getObjectLabel() != "") + fileName = H.getObjectLabel() + "_" + name + "_" + Teuchos::toString(data[i]) + ".m"; + else + fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; + + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + if (data[i] < levelManagers_.size() && L->IsAvailable(name, &*levelManagers_[data[i]]->GetFactory(name))) { + // Try generating factory + RCP M = L->template Get>(name, &*levelManagers_[data[i]]->GetFactory(name)); + if (!M.is_null()) { + Xpetra::IO::Write(fileName, *M); } - else if (L->IsAvailable(name)) { - // Try nofactory - RCP M = L->template Get< RCP >(name); - if (!M.is_null()) { - Xpetra::IO::Write(fileName,* M); - } + } else if (L->IsAvailable(name)) { + // Try nofactory + RCP M = L->template Get>(name); + if (!M.is_null()) { + Xpetra::IO::Write(fileName, *M); } } } } - - void WriteDataAggregates(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - const std::string fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; - - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - - // NOTE: Aggregates use the next level's factory - RCP agg; - if(data[i]+1 < H.GetNumLevels() && L->IsAvailable(name,&*levelManagers_[data[i]+1]->GetFactory(name))) { - // Try generating factory - agg = L->template Get< RCP >(name,&*levelManagers_[data[i]+1]->GetFactory(name)); - } - else if (L->IsAvailable(name)) { - agg = L->template Get >("Aggregates"); - } - if(!agg.is_null()) { - std::ofstream ofs(fileName); - Teuchos::FancyOStream fofs(rcp(&ofs,false)); - agg->print(fofs,Teuchos::VERB_EXTREME); - } + } + + void WriteDataAggregates(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { + for (int i = 0; i < data.size(); ++i) { + const std::string fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; + + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + + // NOTE: Aggregates use the next level's factory + RCP agg; + if (data[i] + 1 < H.GetNumLevels() && L->IsAvailable(name, &*levelManagers_[data[i] + 1]->GetFactory(name))) { + // Try generating factory + agg = L->template Get>(name, &*levelManagers_[data[i] + 1]->GetFactory(name)); + } else if (L->IsAvailable(name)) { + agg = L->template Get>("Aggregates"); + } + if (!agg.is_null()) { + std::ofstream ofs(fileName); + Teuchos::FancyOStream fofs(rcp(&ofs, false)); + agg->print(fofs, Teuchos::VERB_EXTREME); } } } - - template - void WriteDataFC(Hierarchy& H, const Teuchos::Array& data, const std::string& name, const std::string & ofname) const { - for (int i = 0; i < data.size(); ++i) { - const std::string fileName = ofname + "_" + Teuchos::toString(data[i]) + ".m"; - - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - - if (L->IsAvailable(name)) { - RCP M = L->template Get< RCP >(name); - if (!M.is_null()) { - RCP A = L->template Get >("A"); - RCP AG = A->getCrsGraph(); - WriteFieldContainer(fileName,*M,*AG->getColMap()); - } + } + + template + void WriteDataFC(Hierarchy& H, const Teuchos::Array& data, const std::string& name, const std::string& ofname) const { + for (int i = 0; i < data.size(); ++i) { + const std::string fileName = ofname + "_" + Teuchos::toString(data[i]) + ".m"; + + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + + if (L->IsAvailable(name)) { + RCP M = L->template Get>(name); + if (!M.is_null()) { + RCP A = L->template Get>("A"); + RCP AG = A->getCrsGraph(); + WriteFieldContainer(fileName, *M, *AG->getColMap()); } } } } + } - // For dumping an IntrepidPCoarsening element-to-node map to disk - template - void WriteFieldContainer(const std::string& fileName, T & fcont,const Map &colMap) const { - - size_t num_els = (size_t) fcont.extent(0); - size_t num_vecs =(size_t) fcont.extent(1); + // For dumping an IntrepidPCoarsening element-to-node map to disk + template + void WriteFieldContainer(const std::string& fileName, T& fcont, const Map& colMap) const { + size_t num_els = (size_t)fcont.extent(0); + size_t num_vecs = (size_t)fcont.extent(1); - // Generate rowMap - Teuchos::RCP rowMap = Xpetra::MapFactory::Build(colMap.lib(),Teuchos::OrdinalTraits::invalid(),fcont.extent(0),colMap.getIndexBase(),colMap.getComm()); + // Generate rowMap + Teuchos::RCP rowMap = Xpetra::MapFactory::Build(colMap.lib(), Teuchos::OrdinalTraits::invalid(), fcont.extent(0), colMap.getIndexBase(), colMap.getComm()); - // Fill multivector to use *petra dump routines - RCP vec = Xpetra::MultiVectorFactory::Build(rowMap,num_vecs); + // Fill multivector to use *petra dump routines + RCP vec = Xpetra::MultiVectorFactory::Build(rowMap, num_vecs); - for(size_t j=0; j v = vec->getDataNonConst(j); - for(size_t i=0; i::Write(fileName,*vec); + for (size_t j = 0; j < num_vecs; j++) { + Teuchos::ArrayRCP v = vec->getDataNonConst(j); + for (size_t i = 0; i < num_els; i++) + v[i] = colMap.getGlobalElement(fcont(i, j)); } + Xpetra::IO::Write(fileName, *vec); + } + // Levels + Array> levelManagers_; // one FactoryManager per level (the last levelManager is used for all the remaining levels) - // Levels - Array > levelManagers_; // one FactoryManager per level (the last levelManager is used for all the remaining levels) - - }; // class HierarchyManager +}; // class HierarchyManager -} // namespace MueLu +} // namespace MueLu #define MUELU_HIERARCHYMANAGER_SHORT -#endif // MUELU_HIERARCHYMANAGER_HPP +#endif // MUELU_HIERARCHYMANAGER_HPP //TODO: split into _decl/_def // TODO: default value for first param (FactoryManager()) should not be duplicated (code maintainability) diff --git a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp index 540e556f62e4..ca71b2f0e4a5 100644 --- a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp +++ b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp @@ -46,448 +46,498 @@ #include "MueLu_ConfigDefs.hpp" #if defined(HAVE_MUELU_ML) -# include -# if defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) -# include -# include // for default values -# include -# endif +#include +#if defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) +#include +#include // for default values +#include +#endif #endif #include namespace MueLu { +std::string ML2MueLuParameterTranslator::GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value) { + TEUCHOS_TEST_FOR_EXCEPTION(pname != "coarse: type" && pname != "coarse: list" && pname != "smoother: type" && pname.find("smoother: list", 0) != 0, + Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::Setup(): Only \"coarse: type\", \"smoother: type\" or \"smoother: list\" (\"coarse: list\") are " + "supported as ML parameters for transformation of smoother/solver parameters to MueLu"); + + // string stream containing the smoother/solver xml parameters + std::stringstream mueluss; + + // Check whether we are dealing with coarse level (solver) parameters or level smoother parameters + std::string mode = "smoother:"; + bool is_coarse = false; + if (pname.find("coarse:", 0) == 0) { + mode = "coarse:"; + is_coarse = true; + } - std::string ML2MueLuParameterTranslator::GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value) { - - TEUCHOS_TEST_FOR_EXCEPTION(pname != "coarse: type" && pname != "coarse: list" && pname != "smoother: type" && pname.find("smoother: list",0) != 0, - Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::Setup(): Only \"coarse: type\", \"smoother: type\" or \"smoother: list\" (\"coarse: list\") are " - "supported as ML parameters for transformation of smoother/solver parameters to MueLu"); + // check whether pre and/or post smoothing + std::string PreOrPost = "both"; + if (paramList.isParameter(mode + " pre or post")) + PreOrPost = paramList.get(mode + " pre or post"); + + TEUCHOS_TEST_FOR_EXCEPTION(mode == "coarse:" && PreOrPost != "both", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: pre or post\" is not supported by MueLu. " + "It does not make sense for direct solvers. For iterative solvers you obtain the same effect by increasing, " + "e.g., the number of sweeps for the coarse grid smoother. Please remove it from your parameters."); + + // select smoother type + std::string valuestr = value; // temporary variable + std::transform(valuestr.begin(), valuestr.end(), valuestr.begin(), ::tolower); + if (valuestr == "jacobi" || valuestr == "gauss-seidel" || valuestr == "symmetric gauss-seidel") { + std::string my_name; + if (PreOrPost == "both") + my_name = "\"" + pname + "\""; + else + my_name = "\"smoother: " + PreOrPost + " type\""; + mueluss << "" << std::endl; + + } else if (valuestr == "hiptmair") { + std::string my_name; + if (PreOrPost == "both") + my_name = "\"" + pname + "\""; + else + my_name = "\"smoother: " + PreOrPost + " type\""; + mueluss << "" << std::endl; + + } else if (valuestr == "ifpack") { + std::string my_name = "\"" + pname + "\""; + if (paramList.isParameter("smoother: ifpack type")) { + if (paramList.get("smoother: ifpack type") == "ILU") { + mueluss << "" << std::endl; + adaptingParamList.remove("smoother: ifpack type", false); + } + if (paramList.get("smoother: ifpack type") == "ILUT") { + mueluss << "" << std::endl; + adaptingParamList.remove("smoother: ifpack type", false); + } + } - + } else if ((valuestr == "chebyshev") || (valuestr == "mls")) { + std::string my_name = "\"" + pname + "\""; + mueluss << "" << std::endl; - // string stream containing the smoother/solver xml parameters - std::stringstream mueluss; + } else if (valuestr.length() > strlen("amesos") && valuestr.substr(0, strlen("amesos")) == "amesos") { /* catch Amesos-* */ + std::string solverType = valuestr.substr(strlen("amesos") + 1); /* ("amesos-klu" -> "klu") */ - // Check whether we are dealing with coarse level (solver) parameters or level smoother parameters - std::string mode = "smoother:"; - bool is_coarse = false; - if (pname.find("coarse:", 0) == 0) { - mode = "coarse:"; - is_coarse = true; - } + bool valid = false; + const int validatorSize = 5; + std::string validator[validatorSize] = {"superlu", "superludist", "klu", "umfpack", "mumps"}; + for (int i = 0; i < validatorSize; i++) + if (validator[i] == solverType) + valid = true; + TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: unknown smoother type. '" << solverType << "' not supported."); - // check whether pre and/or post smoothing - std::string PreOrPost = "both"; - if (paramList.isParameter(mode + " pre or post")) - PreOrPost = paramList.get(mode + " pre or post"); - - TEUCHOS_TEST_FOR_EXCEPTION(mode == "coarse:" && PreOrPost != "both", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: pre or post\" is not supported by MueLu. " - "It does not make sense for direct solvers. For iterative solvers you obtain the same effect by increasing, " - "e.g., the number of sweeps for the coarse grid smoother. Please remove it from your parameters."); - - // select smoother type - std::string valuestr = value; // temporary variable - std::transform(valuestr.begin(), valuestr.end(), valuestr.begin(), ::tolower); - if ( valuestr == "jacobi" || valuestr == "gauss-seidel" || valuestr == "symmetric gauss-seidel" ) { - std::string my_name; - if ( PreOrPost == "both" ) my_name = "\"" + pname + "\""; - else my_name = "\"smoother: " + PreOrPost + " type\""; - mueluss << "" << std::endl; - - } else if ( valuestr == "hiptmair" ) { - std::string my_name; - if ( PreOrPost == "both" ) my_name = "\"" + pname + "\""; - else my_name = "\"smoother: " + PreOrPost + " type\""; - mueluss << "" << std::endl; - - } else if ( valuestr == "ifpack" ) { - std::string my_name = "\"" + pname + "\""; - if ( paramList.isParameter("smoother: ifpack type") ) { - if ( paramList.get("smoother: ifpack type") == "ILU" ) { - mueluss << "" << std::endl; - adaptingParamList.remove("smoother: ifpack type",false); - } - if ( paramList.get("smoother: ifpack type") == "ILUT" ) { - mueluss << "" << std::endl; - adaptingParamList.remove("smoother: ifpack type",false); - } - } + mueluss << "" << std::endl; - } else if (( valuestr == "chebyshev" ) || ( valuestr == "mls" )) { - std::string my_name = "\"" + pname + "\""; - mueluss << "" << std::endl; + } else { + // TODO error message + std::cout << "error in " << __FILE__ << ":" << __LINE__ << " could not find valid smoother/solver" << std::endl; + } - } else if (valuestr.length() > strlen("amesos") && valuestr.substr(0, strlen("amesos")) == "amesos") { /* catch Amesos-* */ - std::string solverType = valuestr.substr(strlen("amesos")+1); /* ("amesos-klu" -> "klu") */ + // set smoother: pre or post parameter + // Note that there is no "coarse: pre or post" in MueLu! + if (paramList.isParameter("smoother: pre or post") && mode == "smoother:") { + //std::cout << "paramList" << paramList << std::endl; + //std::string smootherPreOrPost = paramList.get("smoother: pre or post"); + //std::cout << "Create pre or post parameter with " << smootherPreOrPost << std::endl; + mueluss << "" << std::endl; + adaptingParamList.remove("smoother: pre or post", false); + } - bool valid = false; - const int validatorSize = 5; - std::string validator[validatorSize] = {"superlu", "superludist", "klu", "umfpack", "mumps"}; - for (int i=0; i < validatorSize; i++) - if (validator[i] == solverType) - valid = true; - TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter: unknown smoother type. '" << solverType << "' not supported."); + // create smoother parameter list + if (PreOrPost != "both") { + mueluss << "" << std::endl; + } else { + mueluss << "" << std::endl; + } - mueluss << "" << std::endl; + // relaxation based smoothers: - } else { - // TODO error message - std::cout << "error in " << __FILE__ << ":" << __LINE__ << " could not find valid smoother/solver" << std::endl; + if (valuestr == "jacobi" || valuestr == "gauss-seidel" || valuestr == "symmetric gauss-seidel") { + if (valuestr == "jacobi") { + mueluss << "" << std::endl; + adaptingParamList.remove("relaxation: type", false); + } + if (valuestr == "gauss-seidel") { + mueluss << "" << std::endl; + adaptingParamList.remove("relaxation: type", false); + } + if (valuestr == "symmetric gauss-seidel") { + mueluss << "" << std::endl; + adaptingParamList.remove("relaxation: type", false); } - // set smoother: pre or post parameter - // Note that there is no "coarse: pre or post" in MueLu! - if ( paramList.isParameter("smoother: pre or post") && mode == "smoother:") { - //std::cout << "paramList" << paramList << std::endl; - //std::string smootherPreOrPost = paramList.get("smoother: pre or post"); - //std::cout << "Create pre or post parameter with " << smootherPreOrPost << std::endl; - mueluss << "" << std::endl; - adaptingParamList.remove("smoother: pre or post",false); + if (paramList.isParameter("smoother: sweeps")) { + mueluss << "("smoother: sweeps") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: sweeps", false); + } + if (paramList.isParameter("smoother: damping factor")) { + mueluss << "("smoother: damping factor") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: damping factor", false); + } + if (paramList.isParameter("smoother: use l1 Gauss-Seidel")) { + mueluss << "("smoother: use l1 Gauss-Seidel") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: use l1 Gauss-Seidel", false); } + } - // create smoother parameter list - if (PreOrPost != "both") { - mueluss << "" << std::endl; + // Chebyshev + if (valuestr == "chebyshev") { + if (paramList.isParameter("smoother: polynomial order")) { + mueluss << "("smoother: polynomial order") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: polynomial order", false); } else { - mueluss << "" << std::endl; + mueluss << "" << std::endl; } - - // relaxation based smoothers: - - if ( valuestr == "jacobi" || valuestr == "gauss-seidel" || valuestr == "symmetric gauss-seidel" ) { - if ( valuestr == "jacobi" ) { mueluss << "" << std::endl; adaptingParamList.remove("relaxation: type",false); } - if ( valuestr == "gauss-seidel" ) { mueluss << "" << std::endl; adaptingParamList.remove("relaxation: type",false); } - if ( valuestr == "symmetric gauss-seidel" ) { mueluss << "" << std::endl; adaptingParamList.remove("relaxation: type",false); } - - if ( paramList.isParameter("smoother: sweeps") ) { mueluss << "("smoother: sweeps") << "\"/>" << std::endl; adaptingParamList.remove("smoother: sweeps",false); } - if ( paramList.isParameter("smoother: damping factor") ) { mueluss << "("smoother: damping factor") << "\"/>" << std::endl; adaptingParamList.remove("smoother: damping factor",false); } - if ( paramList.isParameter("smoother: use l1 Gauss-Seidel") ) { mueluss << "("smoother: use l1 Gauss-Seidel") << "\"/>" << std::endl; adaptingParamList.remove("smoother: use l1 Gauss-Seidel",false); } + if (paramList.isParameter("smoother: Chebyshev alpha")) { + mueluss << "("smoother: Chebyshev alpha") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: Chebyshev alpha", false); + } else { + mueluss << "" << std::endl; + adaptingParamList.remove("smoother: Chebyshev alpha", false); } - - // Chebyshev - if ( valuestr == "chebyshev") { - if ( paramList.isParameter("smoother: polynomial order") ) { mueluss << "("smoother: polynomial order") << "\"/>" << std::endl; adaptingParamList.remove("smoother: polynomial order",false); } - else { mueluss << "" << std::endl; } - if ( paramList.isParameter("smoother: Chebyshev alpha") ) { mueluss << "("smoother: Chebyshev alpha") << "\"/>" << std::endl; adaptingParamList.remove("smoother: Chebyshev alpha",false); } - else { mueluss << "" << std::endl; adaptingParamList.remove("smoother: Chebyshev alpha",false); } - if ( paramList.isParameter("eigen-analysis: type") ) { mueluss << "("eigen-analysis: type") << "\"/>" << std::endl; adaptingParamList.remove("eigen-analysis: type",false); } - else { mueluss << "" << std::endl; } + if (paramList.isParameter("eigen-analysis: type")) { + mueluss << "("eigen-analysis: type") << "\"/>" << std::endl; + adaptingParamList.remove("eigen-analysis: type", false); + } else { + mueluss << "" << std::endl; } + } - // MLS - if ( valuestr == "mls") { - if ( paramList.isParameter("smoother: MLS polynomial order") ) { mueluss << "("smoother: MLS polynomial order") << "\"/>" << std::endl; adaptingParamList.remove("smoother: MLS polynomial order",false); } - else if ( paramList.isParameter("smoother: polynomial order") ) { mueluss << "("smoother: polynomial order") << "\"/>" << std::endl; adaptingParamList.remove("smoother: polynomial order",false); } - else { mueluss << "" << std::endl; } - if ( paramList.isParameter("smoother: MLS alpha") ) { mueluss << "("smoother: MLS alpha") << "\"/>" << std::endl; adaptingParamList.remove("smoother: MLS alpha",false); } - else if ( paramList.isParameter("smoother: Chebyshev alpha") ) { mueluss << "("smoother: Chebyshev alpha") << "\"/>" << std::endl; adaptingParamList.remove("smoother: Chebyshev alpha",false); } - else { mueluss << "" << std::endl; } - if ( paramList.isParameter("eigen-analysis: type") ) { mueluss << "("eigen-analysis: type") << "\"/>" << std::endl; adaptingParamList.remove("eigen-analysis: type",false); } - else { mueluss << "" << std::endl; } + // MLS + if (valuestr == "mls") { + if (paramList.isParameter("smoother: MLS polynomial order")) { + mueluss << "("smoother: MLS polynomial order") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: MLS polynomial order", false); + } else if (paramList.isParameter("smoother: polynomial order")) { + mueluss << "("smoother: polynomial order") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: polynomial order", false); + } else { + mueluss << "" << std::endl; + } + if (paramList.isParameter("smoother: MLS alpha")) { + mueluss << "("smoother: MLS alpha") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: MLS alpha", false); + } else if (paramList.isParameter("smoother: Chebyshev alpha")) { + mueluss << "("smoother: Chebyshev alpha") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: Chebyshev alpha", false); + } else { + mueluss << "" << std::endl; } + if (paramList.isParameter("eigen-analysis: type")) { + mueluss << "("eigen-analysis: type") << "\"/>" << std::endl; + adaptingParamList.remove("eigen-analysis: type", false); + } else { + mueluss << "" << std::endl; + } + } - if ( valuestr == "hiptmair" ) { - std::string subSmootherType = "Chebyshev"; - if (!is_coarse && paramList.isParameter("subsmoother: type")) - subSmootherType = paramList.get("subsmoother: type"); - if (is_coarse && paramList.isParameter("smoother: subsmoother type")) - subSmootherType = paramList.get("smoother: subsmoother type"); - - std::string subSmootherIfpackType; - if (subSmootherType == "Chebyshev") - subSmootherIfpackType = "CHEBYSHEV"; - else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { - if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME - subSmootherIfpackType = "RELAXATION"; - } else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListTranslator: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); - - mueluss << "" << std::endl; - mueluss << "" << std::endl; - - mueluss << "" << std::endl; - if (subSmootherType == "Chebyshev") { - std::string edge_sweeps = is_coarse ? "smoother: edge sweeps" : "subsmoother: edge sweeps"; - std::string cheby_alpha = is_coarse ? "smoother: Chebyshev alpha" : "subsmoother: Chebyshev_alpha"; - - if (paramList.isParameter(edge_sweeps)) { - mueluss << "(edge_sweeps) << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: edge sweeps", false); - } - if (paramList.isParameter(cheby_alpha)) { - mueluss << "(cheby_alpha) << "\"/>" << std::endl; - } - } else { - std::string edge_sweeps = is_coarse ? "smoother: edge sweeps" : "subsmoother: edge sweeps"; - std::string SGS_damping = is_coarse ? "smoother: SGS damping factor" : "subsmoother: SGS damping factor"; - - if (paramList.isParameter(edge_sweeps)) { - mueluss << "" << std::endl; - mueluss << "(edge_sweeps) << "\"/>" << std::endl; - adaptingParamList.remove(edge_sweeps, false); - } - if (paramList.isParameter(SGS_damping)) { - mueluss << "(SGS_damping) << "\"/>" << std::endl; - } + if (valuestr == "hiptmair") { + std::string subSmootherType = "Chebyshev"; + if (!is_coarse && paramList.isParameter("subsmoother: type")) + subSmootherType = paramList.get("subsmoother: type"); + if (is_coarse && paramList.isParameter("smoother: subsmoother type")) + subSmootherType = paramList.get("smoother: subsmoother type"); + + std::string subSmootherIfpackType; + if (subSmootherType == "Chebyshev") + subSmootherIfpackType = "CHEBYSHEV"; + else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { + if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME + subSmootherIfpackType = "RELAXATION"; + } else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListTranslator: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); + + mueluss << "" << std::endl; + mueluss << "" << std::endl; + + mueluss << "" << std::endl; + if (subSmootherType == "Chebyshev") { + std::string edge_sweeps = is_coarse ? "smoother: edge sweeps" : "subsmoother: edge sweeps"; + std::string cheby_alpha = is_coarse ? "smoother: Chebyshev alpha" : "subsmoother: Chebyshev_alpha"; + + if (paramList.isParameter(edge_sweeps)) { + mueluss << "(edge_sweeps) << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: edge sweeps", false); } - mueluss << "" << std::endl; - - mueluss << "" << std::endl; - if (subSmootherType == "Chebyshev") { - std::string node_sweeps = is_coarse ? "smoother: node sweeps" : "subsmoother: node sweeps"; - std::string cheby_alpha = is_coarse ? "smoother: Chebyshev alpha" : "subsmoother: Chebyshev_alpha"; - if (paramList.isParameter(node_sweeps)) { - mueluss << "(node_sweeps) << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: node sweeps", false); - } - if (paramList.isParameter(cheby_alpha)) { - mueluss << "(cheby_alpha) << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: Chebyshev alpha", false); - } - } else { - std::string node_sweeps = is_coarse ? "smoother: node sweeps" : "subsmoother: node sweeps"; - std::string SGS_damping = is_coarse ? "smoother: SGS damping factor" : "subsmoother: SGS damping factor"; - - if (paramList.isParameter(node_sweeps)) { - mueluss << "" << std::endl; - mueluss << "(node_sweeps) << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: node sweeps", false); - } - if (paramList.isParameter(SGS_damping)) { - mueluss << "(SGS_damping) << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: SGS damping factor", false); - } + if (paramList.isParameter(cheby_alpha)) { + mueluss << "(cheby_alpha) << "\"/>" << std::endl; } - mueluss << "" << std::endl; + } else { + std::string edge_sweeps = is_coarse ? "smoother: edge sweeps" : "subsmoother: edge sweeps"; + std::string SGS_damping = is_coarse ? "smoother: SGS damping factor" : "subsmoother: SGS damping factor"; + if (paramList.isParameter(edge_sweeps)) { + mueluss << "" << std::endl; + mueluss << "(edge_sweeps) << "\"/>" << std::endl; + adaptingParamList.remove(edge_sweeps, false); + } + if (paramList.isParameter(SGS_damping)) { + mueluss << "(SGS_damping) << "\"/>" << std::endl; + } } + mueluss << "" << std::endl; - // parameters for ILU based preconditioners - if ( valuestr == "ifpack") { + mueluss << "" << std::endl; + if (subSmootherType == "Chebyshev") { + std::string node_sweeps = is_coarse ? "smoother: node sweeps" : "subsmoother: node sweeps"; + std::string cheby_alpha = is_coarse ? "smoother: Chebyshev alpha" : "subsmoother: Chebyshev_alpha"; + if (paramList.isParameter(node_sweeps)) { + mueluss << "(node_sweeps) << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: node sweeps", false); + } + if (paramList.isParameter(cheby_alpha)) { + mueluss << "(cheby_alpha) << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: Chebyshev alpha", false); + } + } else { + std::string node_sweeps = is_coarse ? "smoother: node sweeps" : "subsmoother: node sweeps"; + std::string SGS_damping = is_coarse ? "smoother: SGS damping factor" : "subsmoother: SGS damping factor"; - // add Ifpack parameters - if ( paramList.isParameter("smoother: ifpack overlap") ) { mueluss << "("smoother: ifpack overlap") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack overlap",false); } - if ( paramList.isParameter("smoother: ifpack level-of-fill") ) { mueluss << "("smoother: ifpack level-of-fill") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack level-of-fill",false); } - if ( paramList.isParameter("smoother: ifpack absolute threshold") ) { mueluss << "("smoother: ifpack absolute threshold") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack absolute threshold",false); } - if ( paramList.isParameter("smoother: ifpack relative threshold") ) { mueluss << "("smoother: ifpack relative threshold") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack relative threshold",false); } + if (paramList.isParameter(node_sweeps)) { + mueluss << "" << std::endl; + mueluss << "(node_sweeps) << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: node sweeps", false); + } + if (paramList.isParameter(SGS_damping)) { + mueluss << "(SGS_damping) << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: SGS damping factor", false); + } } - mueluss << "" << std::endl; + } - // max coarse level size parameter (outside of smoother parameter lists) - if ( paramList.isParameter("smoother: max size") ) { - mueluss << "("smoother: max size") << "\"/>" << std::endl; adaptingParamList.remove("smoother: max size",false); + // parameters for ILU based preconditioners + if (valuestr == "ifpack") { + // add Ifpack parameters + if (paramList.isParameter("smoother: ifpack overlap")) { + mueluss << "("smoother: ifpack overlap") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: ifpack overlap", false); + } + if (paramList.isParameter("smoother: ifpack level-of-fill")) { + mueluss << "("smoother: ifpack level-of-fill") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: ifpack level-of-fill", false); + } + if (paramList.isParameter("smoother: ifpack absolute threshold")) { + mueluss << "("smoother: ifpack absolute threshold") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: ifpack absolute threshold", false); + } + if (paramList.isParameter("smoother: ifpack relative threshold")) { + mueluss << "("smoother: ifpack relative threshold") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: ifpack relative threshold", false); } - - return mueluss.str(); } - std::string ML2MueLuParameterTranslator::SetParameterList(const Teuchos::ParameterList & paramList_in, const std::string& defaultVals) { - Teuchos::ParameterList paramList = paramList_in; - - RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) + mueluss << "" << std::endl; -#if defined(HAVE_MUELU_ML) && defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) + // max coarse level size parameter (outside of smoother parameter lists) + if (paramList.isParameter("smoother: max size")) { + mueluss << "("smoother: max size") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: max size", false); + } - // TODO alternative with standard parameterlist from ML user guide? + return mueluss.str(); +} - if (defaultVals != "") { - TEUCHOS_TEST_FOR_EXCEPTION(defaultVals!="SA" && defaultVals!="NSSA" && defaultVals!="refmaxwell" && defaultVals!="Maxwell", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter: only \"SA\", \"NSSA\", \"refmaxwell\" and \"Maxwell\" allowed as options for ML default parameters."); - Teuchos::ParameterList ML_defaultlist; - if (defaultVals == "refmaxwell") - ML_Epetra::SetDefaultsRefMaxwell(ML_defaultlist); - else - ML_Epetra::SetDefaults(defaultVals,ML_defaultlist); +std::string ML2MueLuParameterTranslator::SetParameterList(const Teuchos::ParameterList& paramList_in, const std::string& defaultVals) { + Teuchos::ParameterList paramList = paramList_in; - // merge user parameters with default parameters - MueLu::MergeParameterList(paramList_in, ML_defaultlist, true); - paramList = ML_defaultlist; - } -#else - if (defaultVals != "") { - // If no validator available: issue a warning and set parameter value to false in the output list - *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or ML_ENABLE_TEUCHOS=OFF. No ML default values available." << std::endl; - } -#endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS + RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) - // - // Move smoothers/aggregation/coarse parameters to sublists - // +#if defined(HAVE_MUELU_ML) && defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); + // TODO alternative with standard parameterlist from ML user guide? - paramList = paramListWithSubList; // swap - Teuchos::ParameterList adaptingParamList = paramList; // copy of paramList which is used to removed already interpreted parameters - - // - // Validate parameter list - // - { - bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ - if (validate && defaultVals!="refmaxwell") { + if (defaultVals != "") { + TEUCHOS_TEST_FOR_EXCEPTION(defaultVals != "SA" && defaultVals != "NSSA" && defaultVals != "refmaxwell" && defaultVals != "Maxwell", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: only \"SA\", \"NSSA\", \"refmaxwell\" and \"Maxwell\" allowed as options for ML default parameters."); + Teuchos::ParameterList ML_defaultlist; + if (defaultVals == "refmaxwell") + ML_Epetra::SetDefaultsRefMaxwell(ML_defaultlist); + else + ML_Epetra::SetDefaults(defaultVals, ML_defaultlist); + // merge user parameters with default parameters + MueLu::MergeParameterList(paramList_in, ML_defaultlist, true); + paramList = ML_defaultlist; + } +#else + if (defaultVals != "") { + // If no validator available: issue a warning and set parameter value to false in the output list + *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or ML_ENABLE_TEUCHOS=OFF. No ML default values available." << std::endl; + } +#endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS + + // + // Move smoothers/aggregation/coarse parameters to sublists + // + + // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: + // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists + ParameterList paramListWithSubList; + MueLu::CreateSublists(paramList, paramListWithSubList); + + paramList = paramListWithSubList; // swap + Teuchos::ParameterList adaptingParamList = paramList; // copy of paramList which is used to removed already interpreted parameters + + // + // Validate parameter list + // + { + bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ + if (validate && defaultVals != "refmaxwell") { #if defined(HAVE_MUELU_ML) && defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) - // Validate parameter list using ML validator - int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ - TEUCHOS_TEST_FOR_EXCEPTION(! ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, - "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); + // Validate parameter list using ML validator + int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ + TEUCHOS_TEST_FOR_EXCEPTION(!ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, + "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); #else - // If no validator available: issue a warning and set parameter value to false in the output list - *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or ML_ENABLE_TEUCHOS=OFF. The parameter list cannot be validated." << std::endl; - paramList.set("ML validate parameter list", false); - -#endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS - } // if(validate) - } // scope - - - { - // Special handling of ML's aux aggregation - // - // In ML, when "aggregation: aux: enable" == true, the threshold - // is set via "aggregation: aux: threshold" instead of - // "aggregation: threshold". In MueLu, we use "aggregation: drop - // tol" regardless of "sa: use filtering". So depending on - // "aggregation: aux: enable" we use either one or the other - // threshold to set "aggregation: drop tol". - if (paramListWithSubList.isParameter("aggregation: aux: enable") && paramListWithSubList.get("aggregation: aux: enable")) { - if (paramListWithSubList.isParameter("aggregation: aux: threshold")) { - paramListWithSubList.set("aggregation: threshold", paramListWithSubList.get("aggregation: aux: threshold")); - paramListWithSubList.remove("aggregation: aux: threshold"); - } - } - } - - // stringstream for concatenating xml parameter strings. - std::stringstream mueluss; - - // create surrounding MueLu parameter list - mueluss << "" << std::endl; - - // loop over all ML parameters in provided parameter list - for (ParameterList::ConstIterator param = paramListWithSubList.begin(); param != paramListWithSubList.end(); ++param) { - - // extract ML parameter name - const std::string & pname=paramListWithSubList.name(param); - - // extract corresponding (ML) value - // remove ParameterList specific information from result string - std::stringstream valuess; - valuess << paramList.entry(param); - std::string valuestr = valuess.str(); - replaceAll(valuestr, "[unused]", ""); - replaceAll(valuestr, "[default]", ""); - valuestr = trim(valuestr); - - // transform ML parameter to corresponding MueLu parameter and generate XML string - std::string valueInterpreterStr = "\"" + valuestr + "\""; - std::string ret = MasterList::interpretParameterName(MasterList::ML2MueLu(pname),valueInterpreterStr); - - // special handling for verbosity level - if (pname == "ML output") { - // Translate verbosity parameter - int verbosityLevel = std::stoi(valuestr); - std::string eVerbLevel = "none"; - if (verbosityLevel == 0) eVerbLevel = "none"; - if (verbosityLevel >= 1) eVerbLevel = "low"; - if (verbosityLevel >= 5) eVerbLevel = "medium"; - if (verbosityLevel >= 10) eVerbLevel = "high"; - if (verbosityLevel >= 11) eVerbLevel = "extreme"; - if (verbosityLevel >= 42) eVerbLevel = "test"; - if (verbosityLevel >= 666) eVerbLevel = "interfacetest"; - mueluss << "" << std::endl; - continue; - } + // If no validator available: issue a warning and set parameter value to false in the output list + *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or ML_ENABLE_TEUCHOS=OFF. The parameter list cannot be validated." << std::endl; + paramList.set("ML validate parameter list", false); - // add XML string - if (ret != "") { - mueluss << ret << std::endl; +#endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS + } // if(validate) + } // scope - // remove parameter from ML parameter list - adaptingParamList.remove(pname,false); + { + // Special handling of ML's aux aggregation + // + // In ML, when "aggregation: aux: enable" == true, the threshold + // is set via "aggregation: aux: threshold" instead of + // "aggregation: threshold". In MueLu, we use "aggregation: drop + // tol" regardless of "sa: use filtering". So depending on + // "aggregation: aux: enable" we use either one or the other + // threshold to set "aggregation: drop tol". + if (paramListWithSubList.isParameter("aggregation: aux: enable") && paramListWithSubList.get("aggregation: aux: enable")) { + if (paramListWithSubList.isParameter("aggregation: aux: threshold")) { + paramListWithSubList.set("aggregation: threshold", paramListWithSubList.get("aggregation: aux: threshold")); + paramListWithSubList.remove("aggregation: aux: threshold"); } + } + } - // make sure that MueLu's phase1 matches ML's - mueluss << "" << std::endl; - - // make sure that MueLu's phase2a matches ML's - mueluss << "" << std::endl; - - // make sure that MueLu's phase2b matches ML's - mueluss << "" << std::endl; + // stringstream for concatenating xml parameter strings. + std::stringstream mueluss; + + // create surrounding MueLu parameter list + mueluss << "" << std::endl; + + // loop over all ML parameters in provided parameter list + for (ParameterList::ConstIterator param = paramListWithSubList.begin(); param != paramListWithSubList.end(); ++param) { + // extract ML parameter name + const std::string& pname = paramListWithSubList.name(param); + + // extract corresponding (ML) value + // remove ParameterList specific information from result string + std::stringstream valuess; + valuess << paramList.entry(param); + std::string valuestr = valuess.str(); + replaceAll(valuestr, "[unused]", ""); + replaceAll(valuestr, "[default]", ""); + valuestr = trim(valuestr); + + // transform ML parameter to corresponding MueLu parameter and generate XML string + std::string valueInterpreterStr = "\"" + valuestr + "\""; + std::string ret = MasterList::interpretParameterName(MasterList::ML2MueLu(pname), valueInterpreterStr); + + // special handling for verbosity level + if (pname == "ML output") { + // Translate verbosity parameter + int verbosityLevel = std::stoi(valuestr); + std::string eVerbLevel = "none"; + if (verbosityLevel == 0) eVerbLevel = "none"; + if (verbosityLevel >= 1) eVerbLevel = "low"; + if (verbosityLevel >= 5) eVerbLevel = "medium"; + if (verbosityLevel >= 10) eVerbLevel = "high"; + if (verbosityLevel >= 11) eVerbLevel = "extreme"; + if (verbosityLevel >= 42) eVerbLevel = "test"; + if (verbosityLevel >= 666) eVerbLevel = "interfacetest"; + mueluss << "" << std::endl; + continue; + } - // make sure that MueLu's drop tol matches ML's - mueluss << "" << std::endl; + // add XML string + if (ret != "") { + mueluss << ret << std::endl; - // special handling for energy minimization - // TAW: this is not optimal for symmetric problems but at least works. - // for symmetric problems the "energy minimization" parameter should not exist anyway... - if (pname == "energy minimization: enable") { - mueluss << "" << std::endl; - mueluss << "" << std::endl; - } + // remove parameter from ML parameter list + adaptingParamList.remove(pname, false); + } - // special handling for smoothers - if (pname == "smoother: type") { + // make sure that MueLu's phase1 matches ML's + mueluss << "" << std::endl; - mueluss << GetSmootherFactory(paramList, adaptingParamList, pname, valuestr); + // make sure that MueLu's phase2a matches ML's + mueluss << "" << std::endl; - } + // make sure that MueLu's phase2b matches ML's + mueluss << "" << std::endl; - // special handling for level-specific smoothers - if (pname.find("smoother: list (level",0) == 0) { - // Scan pname (ex: pname="smoother: type (level 2)") - std::string type, option; - int levelID=-1; - { - typedef Teuchos::ArrayRCP::size_type size_type; - Teuchos::Array ctype (size_type(pname.size()+1)); - Teuchos::Array coption(size_type(pname.size()+1)); - - int matched = sscanf(pname.c_str(),"%s %[^(](level %d)", ctype.getRawPtr(), coption.getRawPtr(), &levelID); // use [^(] instead of %s to allow for strings with white-spaces (ex: "ifpack list") - type = std::string(ctype.getRawPtr()); - option = std::string(coption.getRawPtr()); option.resize(option.size () - 1); // remove final white-space - - if (matched != 3 || (type != "smoother:")) { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::CreateSublist(), Line " << __LINE__ << ". " - << "Error in creating level-specific sublists" << std::endl - << "Offending parameter: " << pname << std::endl); - } - - mueluss << "" << std::endl; - mueluss << GetSmootherFactory(paramList.sublist(pname),adaptingParamList.sublist(pname), "smoother: type", paramList.sublist(pname).get("smoother: type")); - mueluss << "" << std::endl; - } - } + // make sure that MueLu's drop tol matches ML's + mueluss << "" << std::endl; - // special handling for coarse level - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter("coarse: type"), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: type\" should not exist but being stored in \"coarse: list\" instead."); - if ( pname == "coarse: list" ) { + // special handling for energy minimization + // TAW: this is not optimal for symmetric problems but at least works. + // for symmetric problems the "energy minimization" parameter should not exist anyway... + if (pname == "energy minimization: enable") { + mueluss << "" << std::endl; + mueluss << "" << std::endl; + } - // interpret smoother/coarse solver data. - // Note, that we inspect the "coarse: list" sublist to define the "coarse" smoother/solver - // Be aware, that MueLu::CreateSublists renames the prefix of the parameters in the "coarse: list" from "coarse" to "smoother". - // Therefore, we have to check the values of the "smoother" parameters - TEUCHOS_TEST_FOR_EXCEPTION(!paramList.sublist("coarse: list").isParameter("smoother: type"), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): no coarse grid solver defined."); - mueluss << GetSmootherFactory(paramList.sublist("coarse: list"), adaptingParamList.sublist("coarse: list"), "coarse: type", paramList.sublist("coarse: list").get("smoother: type")); + // special handling for smoothers + if (pname == "smoother: type") { + mueluss << GetSmootherFactory(paramList, adaptingParamList, pname, valuestr); + } + // special handling for level-specific smoothers + if (pname.find("smoother: list (level", 0) == 0) { + // Scan pname (ex: pname="smoother: type (level 2)") + std::string type, option; + int levelID = -1; + { + typedef Teuchos::ArrayRCP::size_type size_type; + Teuchos::Array ctype(size_type(pname.size() + 1)); + Teuchos::Array coption(size_type(pname.size() + 1)); + + int matched = sscanf(pname.c_str(), "%s %[^(](level %d)", ctype.getRawPtr(), coption.getRawPtr(), &levelID); // use [^(] instead of %s to allow for strings with white-spaces (ex: "ifpack list") + type = std::string(ctype.getRawPtr()); + option = std::string(coption.getRawPtr()); + option.resize(option.size() - 1); // remove final white-space + + if (matched != 3 || (type != "smoother:")) { + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::CreateSublist(), Line " << __LINE__ << ". " + << "Error in creating level-specific sublists" << std::endl + << "Offending parameter: " << pname << std::endl); + } + mueluss << "" << std::endl; + mueluss << GetSmootherFactory(paramList.sublist(pname), adaptingParamList.sublist(pname), "smoother: type", paramList.sublist(pname).get("smoother: type")); + mueluss << "" << std::endl; } - } // for + } - mueluss << "" << std::endl; + // special handling for coarse level + TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter("coarse: type"), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: type\" should not exist but being stored in \"coarse: list\" instead."); + if (pname == "coarse: list") { + // interpret smoother/coarse solver data. + // Note, that we inspect the "coarse: list" sublist to define the "coarse" smoother/solver + // Be aware, that MueLu::CreateSublists renames the prefix of the parameters in the "coarse: list" from "coarse" to "smoother". + // Therefore, we have to check the values of the "smoother" parameters + TEUCHOS_TEST_FOR_EXCEPTION(!paramList.sublist("coarse: list").isParameter("smoother: type"), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): no coarse grid solver defined."); + mueluss << GetSmootherFactory(paramList.sublist("coarse: list"), adaptingParamList.sublist("coarse: list"), "coarse: type", paramList.sublist("coarse: list").get("smoother: type")); + } + } // for - return mueluss.str(); - } + mueluss << "" << std::endl; + return mueluss.str(); +} -} // namespace MueLu +} // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp index 34697f6fd4e9..145b0b980f58 100644 --- a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp +++ b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp @@ -59,98 +59,95 @@ namespace MueLu { - /*! +/*! @class ML2MueLuParameterTranslator class. @brief Class that accepts ML-style parameters and builds a MueLu parameter list (easy input deck) This interpreter class is meant to make the transition from ML to MueLu easier. */ - class ML2MueLuParameterTranslator { - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - ML2MueLuParameterTranslator() { } - - //! Destructor. - virtual ~ML2MueLuParameterTranslator() { } - - //@} - - //!@name Parameter translation from ML to MueLu - //@{ - - /// @brief: Translate ML parameters to MueLu parameter XML string - /// - /// @param [in] paramList_in: ML parameter list - /// @return std::string with MueLu XML parameters - static std::string translate(Teuchos::ParameterList & paramList, const std::string& defaultVals="") { - return SetParameterList(paramList, defaultVals); - } - - /// @brief: Translate ML parameters to MueLu parameter XML string - /// - /// @param [in] xmlFileName: file name with ML xml parameters - /// @return std::string with MueLu XML parameters - static std::string translate(const std::string & xmlFileName, const std::string& defaultVals="") { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - return SetParameterList(*paramList, defaultVals); - } - - //@} - - private: - - //! @name Parameter handling - //@{ - - /// @brief: Interpret parameter list - /// - /// @param [in] paramList_in: ML parameter list - /// @return std::string with MueLu XML parameters - static std::string SetParameterList(const Teuchos::ParameterList & paramList_in, const std::string& defaultVals); - - - /// @brief: Helper function which translates ML smoother/solver paramters to MueLu XML string - /// - /// @param [in] paramList: reference to Teuchos::ParameterList containing the ML smoother/solver parameters. - /// @param [in,out] adaptingParamList: reference to Teuchos::ParameterList containing the ML smoother/solver parameters. Note that the processed parameters are removed from the ParameterList. It can be used to detect non-interpreted ML parameters. - /// @param [in] pname: currently processed parameter TODO - /// @param [in] value: currently processed value TODO - static std::string GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value); - - //@} - - // - // helper routines - // - - // trim from start - static inline std::string <rim(std::string &s) { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c){return !std::isspace(c);})); - return s; - } - - // trim from end - static inline std::string &rtrim(std::string &s) { - s.erase(std::find_if(s.rbegin(), s.rend(), [](int c){return !std::isspace(c);}).base(), s.end()); - return s; - } - - // trim from both ends - static inline std::string &trim(std::string &s) { - return ltrim(rtrim(s)); - } - - //! @name Member variables - //@{ - //std::string xmlString_; ///! string containing MueLu XML parameters corresponding to ML parameters - //@} - - }; // class MLParameterListInterpreter - - -} // end namespace MueLu +class ML2MueLuParameterTranslator { + public: + //! @name Constructors/Destructors. + //@{ + + //! Constructor. + ML2MueLuParameterTranslator() {} + + //! Destructor. + virtual ~ML2MueLuParameterTranslator() {} + + //@} + + //!@name Parameter translation from ML to MueLu + //@{ + + /// @brief: Translate ML parameters to MueLu parameter XML string + /// + /// @param [in] paramList_in: ML parameter list + /// @return std::string with MueLu XML parameters + static std::string translate(Teuchos::ParameterList& paramList, const std::string& defaultVals = "") { + return SetParameterList(paramList, defaultVals); + } + + /// @brief: Translate ML parameters to MueLu parameter XML string + /// + /// @param [in] xmlFileName: file name with ML xml parameters + /// @return std::string with MueLu XML parameters + static std::string translate(const std::string& xmlFileName, const std::string& defaultVals = "") { + Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); + return SetParameterList(*paramList, defaultVals); + } + + //@} + + private: + //! @name Parameter handling + //@{ + + /// @brief: Interpret parameter list + /// + /// @param [in] paramList_in: ML parameter list + /// @return std::string with MueLu XML parameters + static std::string SetParameterList(const Teuchos::ParameterList& paramList_in, const std::string& defaultVals); + + /// @brief: Helper function which translates ML smoother/solver paramters to MueLu XML string + /// + /// @param [in] paramList: reference to Teuchos::ParameterList containing the ML smoother/solver parameters. + /// @param [in,out] adaptingParamList: reference to Teuchos::ParameterList containing the ML smoother/solver parameters. Note that the processed parameters are removed from the ParameterList. It can be used to detect non-interpreted ML parameters. + /// @param [in] pname: currently processed parameter TODO + /// @param [in] value: currently processed value TODO + static std::string GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value); + + //@} + + // + // helper routines + // + + // trim from start + static inline std::string& ltrim(std::string& s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); + return s; + } + + // trim from end + static inline std::string& rtrim(std::string& s) { + s.erase(std::find_if(s.rbegin(), s.rend(), [](int c) { return !std::isspace(c); }).base(), s.end()); + return s; + } + + // trim from both ends + static inline std::string& trim(std::string& s) { + return ltrim(rtrim(s)); + } + + //! @name Member variables + //@{ + //std::string xmlString_; ///! string containing MueLu XML parameters corresponding to ML parameters + //@} + +}; // class MLParameterListInterpreter + +} // end namespace MueLu #endif /* MUELU_ML2MUELUPARAMETERTRANSLATOR_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp index b03bb9e30e47..ebdd3561f14c 100644 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp @@ -87,7 +87,7 @@ namespace MueLu { - /* +/* Utility that from an existing Teuchos::ParameterList creates a new list, in which level-specific parameters are replaced with sublists. @@ -104,12 +104,11 @@ namespace MueLu { smoother: type = symmetric Gauss-Seidel smoother: sweeps = 1 */ - // This function is a copy of ML_CreateSublists to avoid dependency on ML - // Throw exception on error instead of exit() - void CreateSublists(const ParameterList &List, ParameterList &newList); +// This function is a copy of ML_CreateSublists to avoid dependency on ML +// Throw exception on error instead of exit() +void CreateSublists(const ParameterList& List, ParameterList& newList); - - /*! +/*! @class MLParameterListInterpreter class. @brief Class that accepts ML-style parameters and builds a MueLu preconditioner. This interpreter uses the same default values as ML. This allows to compare ML/MueLu results @@ -118,115 +117,115 @@ namespace MueLu { TODO: A warning is issued if ML is not available */ - template - class MLParameterListInterpreter : public HierarchyManager { +template +class MLParameterListInterpreter : public HierarchyManager { #undef MUELU_MLPARAMETERLISTINTERPRETER_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - MLParameterListInterpreter() : nullspace_(NULL), blksize_(1) { } + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - //! @param paramList: parameter list with ML parameters - //! @param[in] comm (RCP >): Optional RCP of a Teuchos communicator (default: Teuchos::null) - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - MLParameterListInterpreter(Teuchos::ParameterList & paramList, Teuchos::RCP > comm = Teuchos::null, std::vector > factoryList = std::vector >(0)); + //! Constructor. + MLParameterListInterpreter() + : nullspace_(NULL) + , blksize_(1) {} - //! Constructor. - //! @param xmlFileName: file name for XML file with ML parameters - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - MLParameterListInterpreter(const std::string & xmlFileName,std::vector > factoryList = std::vector >(0)); + //! Constructor. + //! @param paramList: parameter list with ML parameters + //! @param[in] comm (RCP >): Optional RCP of a Teuchos communicator (default: Teuchos::null) + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. + //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! + MLParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm = Teuchos::null, std::vector > factoryList = std::vector >(0)); - //! Destructor. - virtual ~MLParameterListInterpreter() { } + //! Constructor. + //! @param xmlFileName: file name for XML file with ML parameters + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. + //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! + MLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList = std::vector >(0)); - //@} + //! Destructor. + virtual ~MLParameterListInterpreter() {} - //@{ + //@} - void SetParameterList(const Teuchos::ParameterList & paramList); + //@{ - //@} + void SetParameterList(const Teuchos::ParameterList& paramList); - //@{ + //@} - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy & H) const; + //@{ - //@} + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy& H) const; - //@{ + //@} - //! @name static helper functions translating parameter list to factories - //! @brief static helper functions that also can be used from outside for translating ML parameters into MueLu objects - //@{ + //@{ - //! Read smoother options and build the corresponding smoother factory - // @param AFact: Factory used by smoother to find 'A' - static RCP GetSmootherFactory(const Teuchos::ParameterList & paramList, const RCP & AFact = Teuchos::null); + //! @name static helper functions translating parameter list to factories + //! @brief static helper functions that also can be used from outside for translating ML parameters into MueLu objects + //@{ - //@} + //! Read smoother options and build the corresponding smoother factory + // @param AFact: Factory used by smoother to find 'A' + static RCP GetSmootherFactory(const Teuchos::ParameterList& paramList, const RCP& AFact = Teuchos::null); + //@} - //! @name Handling of additional user-specific transfer factories - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. + //! @name Handling of additional user-specific transfer factories + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. This allows the user to add user-specific factories to the MueLu Hierarchy. The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! */ - void AddTransferFactory(const RCP & factory); - - //! Returns number of transfer factories. - size_t NumTransferFactories() const; - //@} + void AddTransferFactory(const RCP& factory); - private: + //! Returns number of transfer factories. + size_t NumTransferFactories() const; + //@} - //! nullspace can be embedded in the ML parameter list - int nullspaceDim_; - double* nullspace_; //TODO: replace by Teuchos::ArrayRCP<> + private: + //! nullspace can be embedded in the ML parameter list + int nullspaceDim_; + double* nullspace_; //TODO: replace by Teuchos::ArrayRCP<> - //! coordinates can be embedded in the ML parameter list - double* xcoord_; - double* ycoord_; - double* zcoord_; + //! coordinates can be embedded in the ML parameter list + double* xcoord_; + double* ycoord_; + double* zcoord_; - //! list of user-defined transfer Factories - //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) - //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the - //! capabibilities of ML. - std::vector > TransferFacts_; + //! list of user-defined transfer Factories + //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) + //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the + //! capabibilities of ML. + std::vector > TransferFacts_; - //@{ Matrix configuration + //@{ Matrix configuration - //! Setup Operator object - virtual void SetupOperator(Operator & Op) const; + //! Setup Operator object + virtual void SetupOperator(Operator& Op) const; - //! Matrix configuration storage - int blksize_; + //! Matrix configuration storage + int blksize_; - //@} + //@} - }; // class MLParameterListInterpreter +}; // class MLParameterListInterpreter -} // namespace MueLu +} // namespace MueLu #define MUELU_MLPARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_MLPARAMETERLISTINTERPRETER_DECL_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp index 7001923b075e..b1ea66d7541f 100644 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp +++ b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp @@ -107,672 +107,666 @@ // Read a parameter value from a parameter list and store it into a variable named 'varName' #define MUELU_READ_PARAM(paramList, paramStr, varType, defaultValue, varName) \ - varType varName = defaultValue; if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); + varType varName = defaultValue; \ + if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); // Read a parameter value from a paraeter list and copy it into a new parameter list (with another parameter name) #define MUELU_COPY_PARAM(paramList, paramStr, varType, defaultValue, outParamList, outParamStr) \ - if (paramList.isParameter(paramStr)) \ - outParamList.set(outParamStr, paramList.get(paramStr)); \ - else outParamList.set(outParamStr, static_cast(defaultValue)); \ + if (paramList.isParameter(paramStr)) \ + outParamList.set(outParamStr, paramList.get(paramStr)); \ + else \ + outParamList.set(outParamStr, static_cast(defaultValue)); namespace MueLu { - template - MLParameterListInterpreter::MLParameterListInterpreter(Teuchos::ParameterList & paramList, Teuchos::RCP > comm, std::vector > factoryList) : nullspace_(NULL), xcoord_(NULL), ycoord_(NULL), zcoord_(NULL),TransferFacts_(factoryList), blksize_(1) { - - if (paramList.isParameter("xml parameter file")){ - std::string filename = paramList.get("xml parameter file",""); - if (filename.length() != 0) { - TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); - Teuchos::ParameterList paramList2 = paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2),*comm); - paramList2.remove("xml parameter file"); - SetParameterList(paramList2); - } - else - SetParameterList(paramList); - } - else +template +MLParameterListInterpreter::MLParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm, std::vector > factoryList) + : nullspace_(NULL) + , xcoord_(NULL) + , ycoord_(NULL) + , zcoord_(NULL) + , TransferFacts_(factoryList) + , blksize_(1) { + if (paramList.isParameter("xml parameter file")) { + std::string filename = paramList.get("xml parameter file", ""); + if (filename.length() != 0) { + TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); + Teuchos::ParameterList paramList2 = paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2), *comm); + paramList2.remove("xml parameter file"); + SetParameterList(paramList2); + } else SetParameterList(paramList); + } else + SetParameterList(paramList); +} + +template +MLParameterListInterpreter::MLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList) + : nullspace_(NULL) + , TransferFacts_(factoryList) + , blksize_(1) { + Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); + SetParameterList(*paramList); +} + +template +void MLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList& paramList_in) { + Teuchos::ParameterList paramList = paramList_in; + + // + // Read top-level of the parameter list + // + + // hard-coded default values == ML defaults according to the manual + MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); + MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); + MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); + + MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); + + MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); + //MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); + MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4 / (double)3, agg_damping); + //MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); + MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); + MUELU_READ_PARAM(paramList, "aggregation: keep Dirichlet bcs", bool, false, bKeepDirichletBcs); // This is a MueLu specific extension that does not exist in ML + MUELU_READ_PARAM(paramList, "aggregation: max neighbours already aggregated", int, 0, maxNbrAlreadySelected); // This is a MueLu specific extension that does not exist in M + MUELU_READ_PARAM(paramList, "aggregation: aux: enable", bool, false, agg_use_aux); + MUELU_READ_PARAM(paramList, "aggregation: aux: threshold", double, false, agg_aux_thresh); + + MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); + MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation + MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation + + MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); + + MUELU_READ_PARAM(paramList, "RAP: fix diagonal", bool, false, bFixDiagonal); // This is a MueLu specific extension that does not exist in ML + + MUELU_READ_PARAM(paramList, "x-coordinates", double*, NULL, xcoord); + MUELU_READ_PARAM(paramList, "y-coordinates", double*, NULL, ycoord); + MUELU_READ_PARAM(paramList, "z-coordinates", double*, NULL, zcoord); + + // + // Move smoothers/aggregation/coarse parameters to sublists + // + + // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: + // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists + ParameterList paramListWithSubList; + MueLu::CreateSublists(paramList, paramListWithSubList); + paramList = paramListWithSubList; // swap + + // pull out "use kokkos refactor" + bool setKokkosRefactor = false; + bool useKokkosRefactor = !Node::is_serial; + if (paramList.isType("use kokkos refactor")) { + useKokkosRefactor = paramList.get("use kokkos refactor"); + setKokkosRefactor = true; + paramList.remove("use kokkos refactor"); } - template - MLParameterListInterpreter::MLParameterListInterpreter(const std::string & xmlFileName, std::vector > factoryList) : nullspace_(NULL), TransferFacts_(factoryList), blksize_(1) { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - SetParameterList(*paramList); - } - - template - void MLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList & paramList_in) { - Teuchos::ParameterList paramList = paramList_in; - - // - // Read top-level of the parameter list - // + // + // Validate parameter list + // - // hard-coded default values == ML defaults according to the manual - MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); - MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); - MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); - - MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); - - MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); - //MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); - MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4/(double)3, agg_damping); - //MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); - MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); - MUELU_READ_PARAM(paramList, "aggregation: keep Dirichlet bcs", bool, false, bKeepDirichletBcs); // This is a MueLu specific extension that does not exist in ML - MUELU_READ_PARAM(paramList, "aggregation: max neighbours already aggregated", int, 0, maxNbrAlreadySelected); // This is a MueLu specific extension that does not exist in M - MUELU_READ_PARAM(paramList, "aggregation: aux: enable", bool, false, agg_use_aux); - MUELU_READ_PARAM(paramList, "aggregation: aux: threshold", double, false, agg_aux_thresh); - - MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); - MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation - MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation - - MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); + { + bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ + if (validate) { +#if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) + // Validate parameter list using ML validator + int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ + TEUCHOS_TEST_FOR_EXCEPTION(!ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, + "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); +#else + // If no validator available: issue a warning and set parameter value to false in the output list + this->GetOStream(Warnings0) << "Warning: MueLu_ENABLE_ML=OFF. The parameter list cannot be validated." << std::endl; + paramList.set("ML validate parameter list", false); + +#endif // HAVE_MUELU_ML + } // if(validate) + } // scope + + // Matrix option + blksize_ = nDofsPerNode; + + // Translate verbosity parameter + + // Translate verbosity parameter + MsgType eVerbLevel = None; + if (verbosityLevel == 0) eVerbLevel = None; + if (verbosityLevel >= 1) eVerbLevel = Low; + if (verbosityLevel >= 5) eVerbLevel = Medium; + if (verbosityLevel >= 10) eVerbLevel = High; + if (verbosityLevel >= 11) eVerbLevel = Extreme; + if (verbosityLevel >= 42) eVerbLevel = Test; + if (verbosityLevel >= 43) eVerbLevel = InterfaceTest; + this->verbosity_ = eVerbLevel; + + TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::SetParameterList(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); + + // Create MueLu factories + RCP dropFact; + if (useKokkosRefactor) + dropFact = rcp(new CoalesceDropFactory_kokkos()); + else + dropFact = rcp(new CoalesceDropFactory()); + + if (agg_use_aux) { + dropFact->SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + dropFact->SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(agg_aux_thresh)); + } - MUELU_READ_PARAM(paramList, "RAP: fix diagonal", bool, false, bFixDiagonal); // This is a MueLu specific extension that does not exist in ML + // Uncoupled aggregation + RCP AggFact = Teuchos::null; + if (useKokkosRefactor) { + AggFact = rcp(new UncoupledAggregationFactory_kokkos()); + } else + AggFact = rcp(new UncoupledAggregationFactory()); + + AggFact->SetFactory("Graph", dropFact); + AggFact->SetFactory("DofsPerNode", dropFact); + AggFact->SetParameter("aggregation: preserve Dirichlet points", Teuchos::ParameterEntry(bKeepDirichletBcs)); + AggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); + AggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(maxNbrAlreadySelected)); + AggFact->SetParameter("aggregation: min agg size", Teuchos::ParameterEntry(minPerAgg)); + + if (verbosityLevel > 3) { + std::ostringstream oss; + oss << "========================= Aggregate option summary  =========================" << std::endl; + oss << "min Nodes per aggregate :              " << minPerAgg << std::endl; + oss << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; + oss << "aggregate ordering :                    natural" << std::endl; + oss << "=============================================================================" << std::endl; + this->GetOStream(Runtime1) << oss.str(); + } - MUELU_READ_PARAM(paramList, "x-coordinates", double*, NULL, xcoord); - MUELU_READ_PARAM(paramList, "y-coordinates", double*, NULL, ycoord); - MUELU_READ_PARAM(paramList, "z-coordinates", double*, NULL, zcoord); + RCP PFact; + RCP RFact; + RCP PtentFact; + if (useKokkosRefactor) + PtentFact = rcp(new TentativePFactory_kokkos()); + else + PtentFact = rcp(new TentativePFactory()); + if (agg_damping == 0.0 && bEnergyMinimization == false) { + // tentative prolongation operator (PA-AMG) + PFact = PtentFact; + RFact = rcp(new TransPFactory()); + } else if (agg_damping != 0.0 && bEnergyMinimization == false) { + // smoothed aggregation (SA-AMG) + RCP SaPFact; + if (useKokkosRefactor) + SaPFact = rcp(new SaPFactory_kokkos()); + else + SaPFact = rcp(new SaPFactory()); + SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); + PFact = SaPFact; + RFact = rcp(new TransPFactory()); + } else if (bEnergyMinimization == true) { + // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) + PFact = rcp(new PgPFactory()); + RFact = rcp(new GenericRFactory()); + } + RCP AcFact = rcp(new RAPFactory()); + AcFact->SetParameter("RepairMainDiagonal", Teuchos::ParameterEntry(bFixDiagonal)); + for (size_t i = 0; i < TransferFacts_.size(); i++) { + AcFact->AddTransferFactory(TransferFacts_[i]); + } + // + // introduce rebalancing + // +#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) + Teuchos::RCP RebalancedPFact = Teuchos::null; + Teuchos::RCP RebalancedRFact = Teuchos::null; + Teuchos::RCP RepartitionFact = Teuchos::null; + Teuchos::RCP RebalancedAFact = Teuchos::null; + + MUELU_READ_PARAM(paramList, "repartition: enable", int, 0, bDoRepartition); + if (bDoRepartition == 1) { + // The Factory Manager will be configured to return the rebalanced versions of P, R, A by default. + // Everytime we want to use the non-rebalanced versions, we need to explicitly define the generating factory. + RFact->SetFactory("P", PFact); // - // Move smoothers/aggregation/coarse parameters to sublists - // + AcFact->SetFactory("P", PFact); + AcFact->SetFactory("R", RFact); - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); - paramList = paramListWithSubList; // swap - - // pull out "use kokkos refactor" - bool setKokkosRefactor = false; - bool useKokkosRefactor = !Node::is_serial; - if (paramList.isType("use kokkos refactor")) { - useKokkosRefactor = paramList.get("use kokkos refactor"); - setKokkosRefactor = true; - paramList.remove("use kokkos refactor"); - } + // define rebalancing factory for coarse matrix + Teuchos::RCP > rebAmalgFact = Teuchos::rcp(new MueLu::AmalgamationFactory()); + rebAmalgFact->SetFactory("A", AcFact); - // - // Validate parameter list - // + MUELU_READ_PARAM(paramList, "repartition: max min ratio", double, 1.3, maxminratio); + MUELU_READ_PARAM(paramList, "repartition: min per proc", int, 512, minperproc); + // Repartitioning heuristic + RCP RepartitionHeuristicFact = Teuchos::rcp(new RepartitionHeuristicFactory()); { - bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ - if (validate) { - -#if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) - // Validate parameter list using ML validator - int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ - TEUCHOS_TEST_FOR_EXCEPTION(! ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, - "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); -#else - // If no validator available: issue a warning and set parameter value to false in the output list - this->GetOStream(Warnings0) << "Warning: MueLu_ENABLE_ML=OFF. The parameter list cannot be validated." << std::endl; - paramList.set("ML validate parameter list", false); + Teuchos::ParameterList paramListRepFact; + paramListRepFact.set("repartition: min rows per proc", minperproc); + paramListRepFact.set("repartition: max imbalance", maxminratio); + RepartitionHeuristicFact->SetParameterList(paramListRepFact); + } + RepartitionHeuristicFact->SetFactory("A", AcFact); + + // create "Partition" + Teuchos::RCP > isoInterface = Teuchos::rcp(new MueLu::IsorropiaInterface()); + isoInterface->SetFactory("A", AcFact); + isoInterface->SetFactory("number of partitions", RepartitionHeuristicFact); + isoInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); + + // create "Partition" by unamalgamtion + Teuchos::RCP > repInterface = Teuchos::rcp(new MueLu::RepartitionInterface()); + repInterface->SetFactory("A", AcFact); + repInterface->SetFactory("number of partitions", RepartitionHeuristicFact); + repInterface->SetFactory("AmalgamatedPartition", isoInterface); + //repInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); // not necessary? + + // Repartitioning (creates "Importer" from "Partition") + RepartitionFact = Teuchos::rcp(new RepartitionFactory()); + RepartitionFact->SetFactory("A", AcFact); + RepartitionFact->SetFactory("number of partitions", RepartitionHeuristicFact); + RepartitionFact->SetFactory("Partition", repInterface); + + // Reordering of the transfer operators + RebalancedPFact = Teuchos::rcp(new RebalanceTransferFactory()); + RebalancedPFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Interpolation"))); + RebalancedPFact->SetFactory("P", PFact); + RebalancedPFact->SetFactory("Nullspace", PtentFact); + RebalancedPFact->SetFactory("Importer", RepartitionFact); + + RebalancedRFact = Teuchos::rcp(new RebalanceTransferFactory()); + RebalancedRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction"))); + RebalancedRFact->SetFactory("R", RFact); + RebalancedRFact->SetFactory("Importer", RepartitionFact); + + // Compute Ac from rebalanced P and R + RebalancedAFact = Teuchos::rcp(new RebalanceAcFactory()); + RebalancedAFact->SetFactory("A", AcFact); + } +#else // #ifdef HAVE_MUELU_ISORROPIA + // Get rid of [-Wunused] warnings + //(void) + // + // ^^^ FIXME (mfh 17 Nov 2013) That definitely doesn't compile. +#endif -#endif // HAVE_MUELU_ML - } // if(validate) - } // scope + // + // Nullspace factory + // + // Set fine level nullspace + // extract pre-computed nullspace from ML parameter list + // store it in nullspace_ and nullspaceDim_ + if (nullspaceType != "default vectors") { + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); - // Matrix option - blksize_ = nDofsPerNode; + nullspaceDim_ = nullspaceDim; + nullspace_ = nullspaceVec; + } - // Translate verbosity parameter + Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory("Nullspace")); + nspFact->SetFactory("Nullspace", PtentFact); - // Translate verbosity parameter - MsgType eVerbLevel = None; - if (verbosityLevel == 0) eVerbLevel = None; - if (verbosityLevel >= 1) eVerbLevel = Low; - if (verbosityLevel >= 5) eVerbLevel = Medium; - if (verbosityLevel >= 10) eVerbLevel = High; - if (verbosityLevel >= 11) eVerbLevel = Extreme; - if (verbosityLevel >= 42) eVerbLevel = Test; - if (verbosityLevel >= 43) eVerbLevel = InterfaceTest; - this->verbosity_ = eVerbLevel; + // Stash coordinates + xcoord_ = xcoord; + ycoord_ = ycoord; + zcoord_ = zcoord; + // + // Hierarchy + FactoryManager + // - TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::SetParameterList(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); + // Hierarchy options + this->numDesiredLevel_ = maxLevels; + this->maxCoarseSize_ = maxCoarseSize; - // Create MueLu factories - RCP dropFact; - if(useKokkosRefactor) - dropFact = rcp( new CoalesceDropFactory_kokkos() ); - else - dropFact = rcp( new CoalesceDropFactory() ); + // + // Coarse Smoother + // + ParameterList& coarseList = paramList.sublist("coarse: list"); + // check whether coarse solver is set properly. If not, set default coarse solver. + if (!coarseList.isParameter("smoother: type")) + coarseList.set("smoother: type", "Amesos-KLU"); // set default coarse solver according to ML 5.0 guide + RCP coarseFact = GetSmootherFactory(coarseList, Teuchos::null); - if (agg_use_aux) { - dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(std::string("distance laplacian"))); - dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(agg_aux_thresh)); - } + // Smoothers Top Level Parameters - // Uncoupled aggregation - RCP AggFact = Teuchos::null; - if(useKokkosRefactor) { - AggFact = rcp( new UncoupledAggregationFactory_kokkos() ); - } - else - AggFact = rcp( new UncoupledAggregationFactory() ); - - AggFact->SetFactory("Graph", dropFact); - AggFact->SetFactory("DofsPerNode", dropFact); - AggFact->SetParameter("aggregation: preserve Dirichlet points", Teuchos::ParameterEntry(bKeepDirichletBcs)); - AggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); - AggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(maxNbrAlreadySelected)); - AggFact->SetParameter("aggregation: min agg size", Teuchos::ParameterEntry(minPerAgg)); - - - if (verbosityLevel > 3) { - std::ostringstream oss; - oss << "========================= Aggregate option summary  =========================" << std::endl; - oss << "min Nodes per aggregate :              " << minPerAgg << std::endl; - oss << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; - oss << "aggregate ordering :                    natural" << std::endl; - oss << "=============================================================================" << std::endl; - this->GetOStream(Runtime1) << oss.str(); - } + RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); - RCP PFact; - RCP RFact; - RCP PtentFact; - if(useKokkosRefactor) - PtentFact = rcp( new TentativePFactory_kokkos() ); - else - PtentFact = rcp( new TentativePFactory() ); - if (agg_damping == 0.0 && bEnergyMinimization == false) { - // tentative prolongation operator (PA-AMG) - PFact = PtentFact; - RFact = rcp( new TransPFactory() ); - } else if (agg_damping != 0.0 && bEnergyMinimization == false) { - // smoothed aggregation (SA-AMG) - RCP SaPFact; - if(useKokkosRefactor) - SaPFact = rcp( new SaPFactory_kokkos() ); - else - SaPFact = rcp( new SaPFactory() ); - SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); - PFact = SaPFact; - RFact = rcp( new TransPFactory() ); - } else if (bEnergyMinimization == true) { - // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) - PFact = rcp( new PgPFactory() ); - RFact = rcp( new GenericRFactory() ); - } + // - RCP AcFact = rcp( new RAPFactory() ); - AcFact->SetParameter("RepairMainDiagonal", Teuchos::ParameterEntry(bFixDiagonal)); - for (size_t i = 0; iAddTransferFactory(TransferFacts_[i]); - } + // Prepare factory managers + // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList + for (int levelID = 0; levelID < maxLevels; levelID++) { // - // introduce rebalancing + // Level FactoryManager // -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - Teuchos::RCP RebalancedPFact = Teuchos::null; - Teuchos::RCP RebalancedRFact = Teuchos::null; - Teuchos::RCP RepartitionFact = Teuchos::null; - Teuchos::RCP RebalancedAFact = Teuchos::null; - MUELU_READ_PARAM(paramList, "repartition: enable", int, 0, bDoRepartition); - if (bDoRepartition == 1) { - // The Factory Manager will be configured to return the rebalanced versions of P, R, A by default. - // Everytime we want to use the non-rebalanced versions, we need to explicitly define the generating factory. - RFact->SetFactory("P", PFact); - // - AcFact->SetFactory("P", PFact); - AcFact->SetFactory("R", RFact); - - // define rebalancing factory for coarse matrix - Teuchos::RCP > rebAmalgFact = Teuchos::rcp(new MueLu::AmalgamationFactory()); - rebAmalgFact->SetFactory("A", AcFact); - - MUELU_READ_PARAM(paramList, "repartition: max min ratio", double, 1.3, maxminratio); - MUELU_READ_PARAM(paramList, "repartition: min per proc", int, 512, minperproc); - - // Repartitioning heuristic - RCP RepartitionHeuristicFact = Teuchos::rcp(new RepartitionHeuristicFactory()); - { - Teuchos::ParameterList paramListRepFact; - paramListRepFact.set("repartition: min rows per proc", minperproc); - paramListRepFact.set("repartition: max imbalance", maxminratio); - RepartitionHeuristicFact->SetParameterList(paramListRepFact); - } - RepartitionHeuristicFact->SetFactory("A", AcFact); - - // create "Partition" - Teuchos::RCP > isoInterface = Teuchos::rcp(new MueLu::IsorropiaInterface()); - isoInterface->SetFactory("A", AcFact); - isoInterface->SetFactory("number of partitions", RepartitionHeuristicFact); - isoInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); - - // create "Partition" by unamalgamtion - Teuchos::RCP > repInterface = Teuchos::rcp(new MueLu::RepartitionInterface()); - repInterface->SetFactory("A", AcFact); - repInterface->SetFactory("number of partitions", RepartitionHeuristicFact); - repInterface->SetFactory("AmalgamatedPartition", isoInterface); - //repInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); // not necessary? - - // Repartitioning (creates "Importer" from "Partition") - RepartitionFact = Teuchos::rcp(new RepartitionFactory()); - RepartitionFact->SetFactory("A", AcFact); - RepartitionFact->SetFactory("number of partitions", RepartitionHeuristicFact); - RepartitionFact->SetFactory("Partition", repInterface); - - // Reordering of the transfer operators - RebalancedPFact = Teuchos::rcp(new RebalanceTransferFactory()); - RebalancedPFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Interpolation"))); - RebalancedPFact->SetFactory("P", PFact); - RebalancedPFact->SetFactory("Nullspace", PtentFact); - RebalancedPFact->SetFactory("Importer", RepartitionFact); - - RebalancedRFact = Teuchos::rcp(new RebalanceTransferFactory()); - RebalancedRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction"))); - RebalancedRFact->SetFactory("R", RFact); - RebalancedRFact->SetFactory("Importer", RepartitionFact); - - // Compute Ac from rebalanced P and R - RebalancedAFact = Teuchos::rcp(new RebalanceAcFactory()); - RebalancedAFact->SetFactory("A", AcFact); - } -#else // #ifdef HAVE_MUELU_ISORROPIA - // Get rid of [-Wunused] warnings - //(void) - // - // ^^^ FIXME (mfh 17 Nov 2013) That definitely doesn't compile. -#endif + RCP manager = rcp(new FactoryManager()); + if (setKokkosRefactor) + manager->SetKokkosRefactor(useKokkosRefactor); // - // Nullspace factory + // Smoothers // - // Set fine level nullspace - // extract pre-computed nullspace from ML parameter list - // store it in nullspace_ and nullspaceDim_ - if (nullspaceType != "default vectors") { - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); - - nullspaceDim_ = nullspaceDim; - nullspace_ = nullspaceVec; - } - - Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory("Nullspace")); - nspFact->SetFactory("Nullspace", PtentFact); - - - // Stash coordinates - xcoord_ = xcoord; - ycoord_ = ycoord; - zcoord_ = zcoord; - + { + // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. + // TODO: unit-test this part alone + ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy + MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ + // std::cout << std::endl << "Merged List for level " << levelID << std::endl; + // std::cout << levelSmootherParam << std::endl; - // - // Hierarchy + FactoryManager - // + RCP smootherFact = GetSmootherFactory(levelSmootherParam, Teuchos::null); // TODO: missing AFact input arg. - // Hierarchy options - this->numDesiredLevel_ = maxLevels; - this->maxCoarseSize_ = maxCoarseSize; + manager->SetFactory("Smoother", smootherFact); + } // - // Coarse Smoother + // Misc // - ParameterList& coarseList = paramList.sublist("coarse: list"); - // check whether coarse solver is set properly. If not, set default coarse solver. - if (!coarseList.isParameter("smoother: type")) - coarseList.set("smoother: type", "Amesos-KLU"); // set default coarse solver according to ML 5.0 guide - RCP coarseFact = GetSmootherFactory(coarseList, Teuchos::null); - - // Smoothers Top Level Parameters - - RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); - - // - - // Prepare factory managers - // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList - - for (int levelID=0; levelID < maxLevels; levelID++) { - - // - // Level FactoryManager - // - - RCP manager = rcp(new FactoryManager()); - if (setKokkosRefactor) - manager->SetKokkosRefactor(useKokkosRefactor); - // - // Smoothers - // - - { - // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. - // TODO: unit-test this part alone - - ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy - MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ - // std::cout << std::endl << "Merged List for level " << levelID << std::endl; - // std::cout << levelSmootherParam << std::endl; - - RCP smootherFact = GetSmootherFactory(levelSmootherParam, Teuchos::null); // TODO: missing AFact input arg. - - manager->SetFactory("Smoother", smootherFact); - } - - // - // Misc - // - - manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop - manager->SetFactory("Graph", dropFact); - manager->SetFactory("Aggregates", AggFact); - manager->SetFactory("DofsPerNode", dropFact); - manager->SetFactory("Ptent", PtentFact); + manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop + manager->SetFactory("Graph", dropFact); + manager->SetFactory("Aggregates", AggFact); + manager->SetFactory("DofsPerNode", dropFact); + manager->SetFactory("Ptent", PtentFact); #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) if (bDoRepartition == 1) { manager->SetFactory("A", RebalancedAFact); manager->SetFactory("P", RebalancedPFact); manager->SetFactory("R", RebalancedRFact); - manager->SetFactory("Nullspace", RebalancedPFact); - manager->SetFactory("Importer", RepartitionFact); + manager->SetFactory("Nullspace", RebalancedPFact); + manager->SetFactory("Importer", RepartitionFact); } else { -#endif // #ifdef HAVE_MUELU_ISORROPIA - manager->SetFactory("Nullspace", nspFact); // use same nullspace factory throughout all multigrid levels - manager->SetFactory("A", AcFact); // same RAP factory for all levels - manager->SetFactory("P", PFact); // same prolongator and restrictor factories for all levels - manager->SetFactory("R", RFact); // same prolongator and restrictor factories for all levels +#endif // #ifdef HAVE_MUELU_ISORROPIA + manager->SetFactory("Nullspace", nspFact); // use same nullspace factory throughout all multigrid levels + manager->SetFactory("A", AcFact); // same RAP factory for all levels + manager->SetFactory("P", PFact); // same prolongator and restrictor factories for all levels + manager->SetFactory("R", RFact); // same prolongator and restrictor factories for all levels #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) } #endif - this->AddFactoryManager(levelID, 1, manager); - } // for (level loop) - - } - - template - void MLParameterListInterpreter::SetupHierarchy(Hierarchy & H) const { - // if nullspace_ has already been extracted from ML parameter list - // make nullspace available for MueLu - if (nullspace_ != NULL) { - RCP fineLevel = H.GetLevel(0); - RCP Op = fineLevel->Get >("A"); - RCP A = rcp_dynamic_cast(Op); - if (!A.is_null()) { - const RCP rowMap = fineLevel->Get< RCP >("A")->getRowMap(); - RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); - - for ( size_t i=0; i < Teuchos::as(nullspaceDim_); i++) { - Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); - const size_t myLength = nullspace->getLocalLength(); - - for (size_t j = 0; j < myLength; j++) { - nullspacei[j] = nullspace_[i*myLength + j]; - } + this->AddFactoryManager(levelID, 1, manager); + } // for (level loop) +} + +template +void MLParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { + // if nullspace_ has already been extracted from ML parameter list + // make nullspace available for MueLu + if (nullspace_ != NULL) { + RCP fineLevel = H.GetLevel(0); + RCP Op = fineLevel->Get >("A"); + RCP A = rcp_dynamic_cast(Op); + if (!A.is_null()) { + const RCP rowMap = fineLevel->Get >("A")->getRowMap(); + RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); + + for (size_t i = 0; i < Teuchos::as(nullspaceDim_); i++) { + Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); + const size_t myLength = nullspace->getLocalLength(); + + for (size_t j = 0; j < myLength; j++) { + nullspacei[j] = nullspace_[i * myLength + j]; } - - fineLevel->Set("Nullspace", nullspace); } + + fineLevel->Set("Nullspace", nullspace); } + } - // Do the same for coordinates - size_t num_coords = 0; - double * coordPTR[3]; - if (xcoord_) { - coordPTR[0] = xcoord_; + // Do the same for coordinates + size_t num_coords = 0; + double* coordPTR[3]; + if (xcoord_) { + coordPTR[0] = xcoord_; + num_coords++; + if (ycoord_) { + coordPTR[1] = ycoord_; num_coords++; - if (ycoord_) { - coordPTR[1] = ycoord_; + if (zcoord_) { + coordPTR[2] = zcoord_; num_coords++; - if (zcoord_) { - coordPTR[2] = zcoord_; - num_coords++; - } } } - if (num_coords){ - Teuchos::RCP fineLevel = H.GetLevel(0); - Teuchos::RCP Op = fineLevel->Get >("A"); - Teuchos::RCP A = rcp_dynamic_cast(Op); - if (!A.is_null()) { - const Teuchos::RCP rowMap = fineLevel->Get< RCP >("A")->getRowMap(); - Teuchos::RCP coordinates = MultiVectorFactory::Build(rowMap, num_coords, true); - - for ( size_t i=0; i < num_coords; i++) { - Teuchos::ArrayRCP coordsi = coordinates->getDataNonConst(i); - const size_t myLength = coordinates->getLocalLength(); - for (size_t j = 0; j < myLength; j++) { - coordsi[j] = coordPTR[i][j]; - } + } + if (num_coords) { + Teuchos::RCP fineLevel = H.GetLevel(0); + Teuchos::RCP Op = fineLevel->Get >("A"); + Teuchos::RCP A = rcp_dynamic_cast(Op); + if (!A.is_null()) { + const Teuchos::RCP rowMap = fineLevel->Get >("A")->getRowMap(); + Teuchos::RCP coordinates = MultiVectorFactory::Build(rowMap, num_coords, true); + + for (size_t i = 0; i < num_coords; i++) { + Teuchos::ArrayRCP coordsi = coordinates->getDataNonConst(i); + const size_t myLength = coordinates->getLocalLength(); + for (size_t j = 0; j < myLength; j++) { + coordsi[j] = coordPTR[i][j]; } - fineLevel->Set("Coordinates",coordinates); } + fineLevel->Set("Coordinates", coordinates); } - - HierarchyManager::SetupHierarchy(H); } - // TODO: code factorization with MueLu_ParameterListInterpreter. - template - RCP > - MLParameterListInterpreter:: - GetSmootherFactory (const Teuchos::ParameterList & paramList, - const RCP & AFact) - { - typedef Teuchos::ScalarTraits STS; - SC one = STS::one(); - - std::string type = "symmetric Gauss-Seidel"; // default - - // - // Get 'type' - // - -// //TODO: fix defaults!! - -// // Default coarse grid smoother -// std::string type; -// if ("smoother" == "coarse") { -// #if (defined(HAVE_MUELU_EPETRA) && defined( HAVE_MUELU_AMESOS)) || (defined(HAVE_MUELU_AMESOS2)) // FIXME: test is wrong (ex: compiled with Epetra&&Tpetra&&Amesos2 but without Amesos => error running Epetra problem) -// type = ""; // use default defined by AmesosSmoother or Amesos2Smoother -// #else -// type = "symmetric Gauss-Seidel"; // use a sym Gauss-Seidel (with no damping) as fallback "coarse solver" (TODO: needs Ifpack(2)) -// #endif -// } else { -// // TODO: default smoother? -// type = ""; -// } - - - if (paramList.isParameter("smoother: type")) type = paramList.get("smoother: type"); - TEUCHOS_TEST_FOR_EXCEPTION(type.empty(), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no \"smoother: type\" in the smoother parameter list" << std::endl << paramList); - - // - // Create the smoother prototype - // - - RCP smooProto; - std::string ifpackType; - Teuchos::ParameterList smootherParamList; - - if (type == "Jacobi" || type == "Gauss-Seidel" || type == "symmetric Gauss-Seidel") { - if (type == "symmetric Gauss-Seidel") type = "Symmetric Gauss-Seidel"; // FIXME - - ifpackType = "RELAXATION"; - smootherParamList.set("relaxation: type", type); - - MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "relaxation: sweeps"); - MUELU_COPY_PARAM(paramList, "smoother: damping factor", Scalar, one, smootherParamList, "relaxation: damping factor"); - - smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - smooProto->SetFactory("A", AFact); - - } else if (type == "Chebyshev" || type == "MLS") { - - ifpackType = "CHEBYSHEV"; - - MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "chebyshev: degree"); - if (paramList.isParameter("smoother: MLS alpha")) { - MUELU_COPY_PARAM(paramList, "smoother: MLS alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); - } else { - MUELU_COPY_PARAM(paramList, "smoother: Chebyshev alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); - } - - - smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - smooProto->SetFactory("A", AFact); + HierarchyManager::SetupHierarchy(H); +} + +// TODO: code factorization with MueLu_ParameterListInterpreter. +template +RCP > +MLParameterListInterpreter:: + GetSmootherFactory(const Teuchos::ParameterList& paramList, + const RCP& AFact) { + typedef Teuchos::ScalarTraits STS; + SC one = STS::one(); + + std::string type = "symmetric Gauss-Seidel"; // default + + // + // Get 'type' + // + + // //TODO: fix defaults!! + + // // Default coarse grid smoother + // std::string type; + // if ("smoother" == "coarse") { + // #if (defined(HAVE_MUELU_EPETRA) && defined( HAVE_MUELU_AMESOS)) || (defined(HAVE_MUELU_AMESOS2)) // FIXME: test is wrong (ex: compiled with Epetra&&Tpetra&&Amesos2 but without Amesos => error running Epetra problem) + // type = ""; // use default defined by AmesosSmoother or Amesos2Smoother + // #else + // type = "symmetric Gauss-Seidel"; // use a sym Gauss-Seidel (with no damping) as fallback "coarse solver" (TODO: needs Ifpack(2)) + // #endif + // } else { + // // TODO: default smoother? + // type = ""; + // } + + if (paramList.isParameter("smoother: type")) type = paramList.get("smoother: type"); + TEUCHOS_TEST_FOR_EXCEPTION(type.empty(), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no \"smoother: type\" in the smoother parameter list" << std::endl + << paramList); + + // + // Create the smoother prototype + // + + RCP smooProto; + std::string ifpackType; + Teuchos::ParameterList smootherParamList; + + if (type == "Jacobi" || type == "Gauss-Seidel" || type == "symmetric Gauss-Seidel") { + if (type == "symmetric Gauss-Seidel") type = "Symmetric Gauss-Seidel"; // FIXME + + ifpackType = "RELAXATION"; + smootherParamList.set("relaxation: type", type); + + MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "relaxation: sweeps"); + MUELU_COPY_PARAM(paramList, "smoother: damping factor", Scalar, one, smootherParamList, "relaxation: damping factor"); + + smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + smooProto->SetFactory("A", AFact); + + } else if (type == "Chebyshev" || type == "MLS") { + ifpackType = "CHEBYSHEV"; + + MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "chebyshev: degree"); + if (paramList.isParameter("smoother: MLS alpha")) { + MUELU_COPY_PARAM(paramList, "smoother: MLS alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); + } else { + MUELU_COPY_PARAM(paramList, "smoother: Chebyshev alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); + } - } else if (type == "Hiptmair") { - ifpackType = "HIPTMAIR"; - std::string subSmootherType = "Chebyshev"; - if (paramList.isParameter("subsmoother: type")) - subSmootherType = paramList.get("subsmoother: type"); - std::string subSmootherIfpackType; - if (subSmootherType == "Chebyshev") - subSmootherIfpackType = "CHEBYSHEV"; - else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { - if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME - subSmootherIfpackType = "RELAXATION"; - } else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); - - smootherParamList.set("hiptmair: smoother type 1", subSmootherIfpackType); - smootherParamList.set("hiptmair: smoother type 2", subSmootherIfpackType); - - auto smoother1ParamList = smootherParamList.sublist("hiptmair: smoother list 1"); - auto smoother2ParamList = smootherParamList.sublist("hiptmair: smoother list 2"); - - if (subSmootherType == "Chebyshev") { - MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "chebyshev: degree"); - MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "chebyshev: degree"); - - MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother1ParamList, "chebyshev: ratio eigenvalue"); - MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother2ParamList, "chebyshev: ratio eigenvalue"); - } else { - MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "relaxation: sweeps"); - MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "relaxation: sweeps"); - - MUELU_COPY_PARAM(paramList, "subsmoother: SGS damping factor", double, 0.8, smoother2ParamList, "relaxation: damping factor"); - } + smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + smooProto->SetFactory("A", AFact); + + } else if (type == "Hiptmair") { + ifpackType = "HIPTMAIR"; + std::string subSmootherType = "Chebyshev"; + if (paramList.isParameter("subsmoother: type")) + subSmootherType = paramList.get("subsmoother: type"); + std::string subSmootherIfpackType; + if (subSmootherType == "Chebyshev") + subSmootherIfpackType = "CHEBYSHEV"; + else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { + if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME + subSmootherIfpackType = "RELAXATION"; + } else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); + + smootherParamList.set("hiptmair: smoother type 1", subSmootherIfpackType); + smootherParamList.set("hiptmair: smoother type 2", subSmootherIfpackType); + + auto smoother1ParamList = smootherParamList.sublist("hiptmair: smoother list 1"); + auto smoother2ParamList = smootherParamList.sublist("hiptmair: smoother list 2"); + + if (subSmootherType == "Chebyshev") { + MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "chebyshev: degree"); + MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "chebyshev: degree"); + + MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother1ParamList, "chebyshev: ratio eigenvalue"); + MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother2ParamList, "chebyshev: ratio eigenvalue"); + } else { + MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "relaxation: sweeps"); + MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "relaxation: sweeps"); + MUELU_COPY_PARAM(paramList, "subsmoother: SGS damping factor", double, 0.8, smoother2ParamList, "relaxation: damping factor"); + } - smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - smooProto->SetFactory("A", AFact); + smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + smooProto->SetFactory("A", AFact); - } else if (type == "IFPACK") { // TODO: this option is not described in the ML Guide v5.0 + } else if (type == "IFPACK") { // TODO: this option is not described in the ML Guide v5.0 #if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_IFPACK) - ifpackType = paramList.get("smoother: ifpack type"); - - if (ifpackType == "ILU") { - // TODO fix this (type mismatch double vs. int) - //MUELU_COPY_PARAM(paramList, "smoother: ifpack level-of-fill", double /*int*/, 0.0 /*2*/, smootherParamList, "fact: level-of-fill"); - if (paramList.isParameter("smoother: ifpack level-of-fill")) - smootherParamList.set("fact: level-of-fill", Teuchos::as(paramList.get("smoother: ifpack level-of-fill"))); - else smootherParamList.set("fact: level-of-fill", as(0)); - - MUELU_COPY_PARAM(paramList, "smoother: ifpack overlap", int, 2, smootherParamList, "partitioner: overlap"); - - // TODO change to TrilinosSmoother as soon as Ifpack2 supports all preconditioners from Ifpack - smooProto = - MueLu::GetIfpackSmoother (ifpackType, - smootherParamList, - paramList.get ("smoother: ifpack overlap")); - smooProto->SetFactory("A", AFact); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown ML smoother type " + type + " (IFPACK) not supported by MueLu. Only ILU is supported."); - } -#else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: MueLu compiled without Ifpack support"); -#endif - - } else if (type.length() > strlen("Amesos") && type.substr(0, strlen("Amesos")) == "Amesos") { /* catch Amesos-* */ - std::string solverType = type.substr(strlen("Amesos")+1); /* ("Amesos-KLU" -> "KLU") */ + ifpackType = paramList.get("smoother: ifpack type"); - // Validator: following upper/lower case is what is allowed by ML - bool valid = false; - const int validatorSize = 5; - std::string validator[validatorSize] = {"Superlu", "Superludist", "KLU", "UMFPACK", "MUMPS"}; /* TODO: should "" be allowed? */ - for (int i=0; i < validatorSize; i++) { if (validator[i] == solverType) valid = true; } - TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported."); + if (ifpackType == "ILU") { + // TODO fix this (type mismatch double vs. int) + //MUELU_COPY_PARAM(paramList, "smoother: ifpack level-of-fill", double /*int*/, 0.0 /*2*/, smootherParamList, "fact: level-of-fill"); + if (paramList.isParameter("smoother: ifpack level-of-fill")) + smootherParamList.set("fact: level-of-fill", Teuchos::as(paramList.get("smoother: ifpack level-of-fill"))); + else + smootherParamList.set("fact: level-of-fill", as(0)); - // FIXME: MueLu should accept any Upper/Lower case. Not the case for the moment - std::transform(solverType.begin()+1, solverType.end(), solverType.begin()+1, ::tolower); + MUELU_COPY_PARAM(paramList, "smoother: ifpack overlap", int, 2, smootherParamList, "partitioner: overlap"); - smooProto = Teuchos::rcp( new DirectSolver(solverType, Teuchos::ParameterList()) ); + // TODO change to TrilinosSmoother as soon as Ifpack2 supports all preconditioners from Ifpack + smooProto = + MueLu::GetIfpackSmoother(ifpackType, + smootherParamList, + paramList.get("smoother: ifpack overlap")); smooProto->SetFactory("A", AFact); - } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown ML smoother type " + type + " (IFPACK) not supported by MueLu. Only ILU is supported."); + } +#else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: MueLu compiled without Ifpack support"); +#endif - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported by MueLu."); + } else if (type.length() > strlen("Amesos") && type.substr(0, strlen("Amesos")) == "Amesos") { /* catch Amesos-* */ + std::string solverType = type.substr(strlen("Amesos") + 1); /* ("Amesos-KLU" -> "KLU") */ + // Validator: following upper/lower case is what is allowed by ML + bool valid = false; + const int validatorSize = 5; + std::string validator[validatorSize] = {"Superlu", "Superludist", "KLU", "UMFPACK", "MUMPS"}; /* TODO: should "" be allowed? */ + for (int i = 0; i < validatorSize; i++) { + if (validator[i] == solverType) valid = true; } - TEUCHOS_TEST_FOR_EXCEPTION(smooProto == Teuchos::null, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no smoother prototype. fatal error."); - - // - // Create the smoother factory - // + TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported."); - RCP SmooFact = rcp( new SmootherFactory() ); + // FIXME: MueLu should accept any Upper/Lower case. Not the case for the moment + std::transform(solverType.begin() + 1, solverType.end(), solverType.begin() + 1, ::tolower); - // Set parameters of the smoother factory - MUELU_READ_PARAM(paramList, "smoother: pre or post", std::string, "both", preOrPost); - if (preOrPost == "both") { - SmooFact->SetSmootherPrototypes(smooProto, smooProto); - } else if (preOrPost == "pre") { - SmooFact->SetSmootherPrototypes(smooProto, Teuchos::null); - } else if (preOrPost == "post") { - SmooFact->SetSmootherPrototypes(Teuchos::null, smooProto); - } + smooProto = Teuchos::rcp(new DirectSolver(solverType, Teuchos::ParameterList())); + smooProto->SetFactory("A", AFact); - return SmooFact; + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported by MueLu."); } - - template - void MLParameterListInterpreter::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); - TransferFacts_.push_back(factory); + TEUCHOS_TEST_FOR_EXCEPTION(smooProto == Teuchos::null, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no smoother prototype. fatal error."); + + // + // Create the smoother factory + // + + RCP SmooFact = rcp(new SmootherFactory()); + + // Set parameters of the smoother factory + MUELU_READ_PARAM(paramList, "smoother: pre or post", std::string, "both", preOrPost); + if (preOrPost == "both") { + SmooFact->SetSmootherPrototypes(smooProto, smooProto); + } else if (preOrPost == "pre") { + SmooFact->SetSmootherPrototypes(smooProto, Teuchos::null); + } else if (preOrPost == "post") { + SmooFact->SetSmootherPrototypes(Teuchos::null, smooProto); } - template - size_t MLParameterListInterpreter::NumTransferFactories() const { - return TransferFacts_.size(); - } + return SmooFact; +} + +template +void MLParameterListInterpreter::AddTransferFactory(const RCP& factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); + TransferFacts_.push_back(factory); +} - template - void MLParameterListInterpreter::SetupOperator(Operator & Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; +template +size_t MLParameterListInterpreter::NumTransferFactories() const { + return TransferFacts_.size(); +} - A.SetFixedBlockSize(blksize_); +template +void MLParameterListInterpreter::SetupOperator(Operator& Op) const { + try { + Matrix& A = dynamic_cast(Op); + if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) + this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " + << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; + + A.SetFixedBlockSize(blksize_); #ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(A); -#endif // HAVE_MUELU_DEBUG + MatrixUtils::checkLocalRowMapMatchesColMap(A); +#endif // HAVE_MUELU_DEBUG - } catch (std::bad_cast&) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; - } + } catch (std::bad_cast&) { + this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; } +} -} // namespace MueLu +} // namespace MueLu #define MUELU_MLPARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_MLPARAMETERLISTINTERPRETER_DEF_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp index fe96d582432b..9801981af33c 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp @@ -51,44 +51,44 @@ namespace MueLu { - size_t LevenshteinDistance(const char* s, size_t len_s, const char* t, size_t len_t) { - // degenerate cases - if (len_s == 0) return len_t; - if (len_t == 0) return len_s; - if (!strncmp(s, t, std::min(len_s, len_t))) return 0; +size_t LevenshteinDistance(const char* s, size_t len_s, const char* t, size_t len_t) { + // degenerate cases + if (len_s == 0) return len_t; + if (len_t == 0) return len_s; + if (!strncmp(s, t, std::min(len_s, len_t))) return 0; - // create two work vectors of integer distances - size_t len = len_t + 1; - std::vector v0(len); - std::vector v1(len); + // create two work vectors of integer distances + size_t len = len_t + 1; + std::vector v0(len); + std::vector v1(len); - // initialize v0 (the previous row of distances) - // this row is A[0][i]: edit distance for an empty s - // the distance is just the number of characters to delete from t - for (size_t i = 0; i < len; i++) - v0[i] = i; + // initialize v0 (the previous row of distances) + // this row is A[0][i]: edit distance for an empty s + // the distance is just the number of characters to delete from t + for (size_t i = 0; i < len; i++) + v0[i] = i; - for (size_t i = 0; i < len_s; i++) { - // calculate v1 (current row distances) from the previous row v0 + for (size_t i = 0; i < len_s; i++) { + // calculate v1 (current row distances) from the previous row v0 - // first element of v1 is A[i+1][0] - // edit distance is delete (i+1) chars from s to match empty t - v1[0] = i + 1; + // first element of v1 is A[i+1][0] + // edit distance is delete (i+1) chars from s to match empty t + v1[0] = i + 1; - // use formula to fill in the rest of the row - for (size_t j = 0; j < len_t; j++) { - size_t cost = (s[i] == t[j]) ? 0 : 1; - v1[j+1] = std::min(v1[j] + 1, - std::min(v0[j + 1] + 1, - v0[j] + cost)); - } - - // copy v1 (current row) to v0 (previous row) for next iteration - for (size_t j = 0; j < len; j++) - v0[j] = v1[j]; + // use formula to fill in the rest of the row + for (size_t j = 0; j < len_t; j++) { + size_t cost = (s[i] == t[j]) ? 0 : 1; + v1[j + 1] = std::min(v1[j] + 1, + std::min(v0[j + 1] + 1, + v0[j] + cost)); } - return v1[len_t]; + // copy v1 (current row) to v0 (previous row) for next iteration + for (size_t j = 0; j < len; j++) + v0[j] = v1[j]; } + return v1[len_t]; } + +} // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp index 395dc4231ffc..f470e5bc8aaa 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp @@ -99,7 +99,6 @@ #include "MueLu_SingleLevelMatlabFactory_fwd.hpp" #endif - #include "MueLu_CoalesceDropFactory_kokkos_fwd.hpp" #include "MueLu_NullspaceFactory_kokkos_fwd.hpp" #include "MueLu_SaPFactory_kokkos_fwd.hpp" @@ -113,32 +112,31 @@ namespace MueLu { - template - class ParameterListInterpreter : - public HierarchyManager { +template +class ParameterListInterpreter : public HierarchyManager { #undef MUELU_PARAMETERLISTINTERPRETER_SHORT #include "MueLu_UseShortNames.hpp" - typedef std::pair keep_pair; + typedef std::pair keep_pair; - public: - //! @name Constructors/Destructors - //@{ + public: + //! @name Constructors/Destructors + //@{ - protected: - /*! @brief Empty constructor + protected: + /*! @brief Empty constructor * * Constructor for derived classes */ - ParameterListInterpreter() { - factFact_ = Teuchos::null; - facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); - } + ParameterListInterpreter() { + factFact_ = Teuchos::null; + facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); + } - public: - /*! @brief Constructor that accepts a user-provided ParameterList. + public: + /*! @brief Constructor that accepts a user-provided ParameterList. Constructor for parameter list interpreter which directly interprets Teuchos::ParameterLists @@ -150,9 +148,9 @@ namespace MueLu { @param[in] facadeFact (RCP): Optional parameter containing a FacadeFactory class. The user can register its own facade classes in the FacadeFactory and provide it to the ParameterListInterpreter. (default: Teuchos::null, means, only standard FacadeClass that come with MueLu are available) */ - ParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm = Teuchos::null, Teuchos::RCP factFact = Teuchos::null, Teuchos::RCP facadeFact = Teuchos::null); + ParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm = Teuchos::null, Teuchos::RCP factFact = Teuchos::null, Teuchos::RCP facadeFact = Teuchos::null); - /*! @brief Constructor that reads parameters from an XML file. + /*! @brief Constructor that reads parameters from an XML file. XML options are converted to ParameterList entries by Teuchos. @@ -162,14 +160,14 @@ namespace MueLu { @param[in] facadeFact (RCP): Optional parameter containing a FacadeFactory class. The user can register its own facade classes in the FacadeFactory and provide it to the ParameterListInterpreter. (default: Teuchos::null, means, only standard FacadeClass that come with MueLu are available) */ - ParameterListInterpreter(const std::string& xmlFileName, const Teuchos::Comm& comm, Teuchos::RCP factFact = Teuchos::null, Teuchos::RCP facadeFact = Teuchos::null); + ParameterListInterpreter(const std::string& xmlFileName, const Teuchos::Comm& comm, Teuchos::RCP factFact = Teuchos::null, Teuchos::RCP facadeFact = Teuchos::null); - //! Destructor. - virtual ~ParameterListInterpreter() { } + //! Destructor. + virtual ~ParameterListInterpreter() {} - //@} + //@} - /*! @brief Set parameter list for Parameter list interpreter. + /*! @brief Set parameter list for Parameter list interpreter. The routine checks whether it is a parameter list in the easy parameter format or the more advanced factory-based parameter format and calls the corresponding interpreter routine. @@ -185,112 +183,109 @@ namespace MueLu { @param[in] paramList: ParameterList containing the MueLu parameters. */ - void SetParameterList(const Teuchos::ParameterList& paramList); - - //! Call the SetupHierarchy routine from the HiearchyManager object. - void SetupHierarchy(Hierarchy& H) const; - - private: - //! Setup Operator object - virtual void SetupOperator(Operator& A) const; - - int blockSize_; ///< block size of matrix (fixed block size) - CycleType Cycle_; ///< multigrid cycle type (V-cycle or W-cycle) - int WCycleStartLevel_; ///< in case of W-cycle, level on which cycle should start - double scalingFactor_; ///< prolongator scaling factor - GlobalOrdinal dofOffset_; ///< global offset variable describing offset of DOFs in operator - - //! Easy interpreter stuff - //@{ - // These three variables are only needed to print out proper [default] - bool changedPRrebalance_; - bool changedPRViaCopyrebalance_; - bool changedImplicitTranspose_; - - void SetEasyParameterList(const Teuchos::ParameterList& paramList); - void Validate(const Teuchos::ParameterList& paramList) const; - - void UpdateFactoryManager(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - - // "Generic components" for UpdateFactoryManager - void UpdateFactoryManager_Smoothers(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_CoarseSolvers(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Aggregation_TentativeP(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Restriction(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + void SetParameterList(const Teuchos::ParameterList& paramList); + + //! Call the SetupHierarchy routine from the HiearchyManager object. + void SetupHierarchy(Hierarchy& H) const; + + private: + //! Setup Operator object + virtual void SetupOperator(Operator& A) const; + + int blockSize_; ///< block size of matrix (fixed block size) + CycleType Cycle_; ///< multigrid cycle type (V-cycle or W-cycle) + int WCycleStartLevel_; ///< in case of W-cycle, level on which cycle should start + double scalingFactor_; ///< prolongator scaling factor + GlobalOrdinal dofOffset_; ///< global offset variable describing offset of DOFs in operator + + //! Easy interpreter stuff + //@{ + // These three variables are only needed to print out proper [default] + bool changedPRrebalance_; + bool changedPRViaCopyrebalance_; + bool changedImplicitTranspose_; + + void SetEasyParameterList(const Teuchos::ParameterList& paramList); + void Validate(const Teuchos::ParameterList& paramList) const; + + void UpdateFactoryManager(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + + // "Generic components" for UpdateFactoryManager + void UpdateFactoryManager_Smoothers(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_CoarseSolvers(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, int levelID, std::vector& keeps) const; - void UpdateFactoryManager_RAP(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Coordinates(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Repartition(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps, RCP & nullSpaceFactory) const; - void UpdateFactoryManager_LowPrecision(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Nullspace(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps, RCP & nullSpaceFactory) const; - void UpdateFactoryManager_BlockNumber(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, - FactoryManager& manager,int levelID, std::vector& keeps) const; - void UpdateFactoryManager_LocalOrdinalTransfer(const std::string& VarName, const std::string& multigridAlgo, Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, - FactoryManager& manager,int levelID, std::vector& keeps) const; - - // Algorithm-specific components for UpdateFactoryManager - void UpdateFactoryManager_SemiCoarsen(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_PCoarsen(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + void UpdateFactoryManager_Aggregation_TentativeP(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Restriction(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_RAP(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Coordinates(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Repartition(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps, RCP& nullSpaceFactory) const; + void UpdateFactoryManager_LowPrecision(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Nullspace(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps, RCP& nullSpaceFactory) const; + void UpdateFactoryManager_BlockNumber(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const; + void UpdateFactoryManager_LocalOrdinalTransfer(const std::string& VarName, const std::string& multigridAlgo, Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const; + + // Algorithm-specific components for UpdateFactoryManager + void UpdateFactoryManager_SemiCoarsen(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_PCoarsen(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_SA(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Reitzinger(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, int levelID, std::vector& keeps) const; - void UpdateFactoryManager_SA(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + void UpdateFactoryManager_Emin(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Reitzinger(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Emin(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + void UpdateFactoryManager_PG(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Replicate(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Combine(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const; + void UpdateFactoryManager_Matlab(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, int levelID, std::vector& keeps) const; - void UpdateFactoryManager_PG(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Replicate(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Combine(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Matlab(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - - - - bool useCoordinates_; - bool useBlockNumber_; - bool useKokkos_; - //@} + bool useCoordinates_; + bool useBlockNumber_; + bool useKokkos_; + //@} - //! Factory interpreter stuff - // TODO: - // - parameter list validator - // - SetParameterList - // - Set/Get directly Level manager - // - build per level - // - comments/docs - // - use FactoryManager instead of FactoryMap - //@{ - void SetFactoryParameterList(const Teuchos::ParameterList& paramList); + //! Factory interpreter stuff + // TODO: + // - parameter list validator + // - SetParameterList + // - Set/Get directly Level manager + // - build per level + // - comments/docs + // - use FactoryManager instead of FactoryMap + //@{ + void SetFactoryParameterList(const Teuchos::ParameterList& paramList); - typedef std::map > FactoryMap; //TODO: remove this line - typedef std::map > FactoryManagerMap; + typedef std::map > FactoryMap; //TODO: remove this line + typedef std::map > FactoryManagerMap; - void BuildFactoryMap(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, FactoryMap& factoryMapOut, FactoryManagerMap& factoryManagers) const; + void BuildFactoryMap(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, FactoryMap& factoryMapOut, FactoryManagerMap& factoryManagers) const; - //! Internal factory for factories - Teuchos::RCP factFact_; + //! Internal factory for factories + Teuchos::RCP factFact_; - //! FacadeClass factory - Teuchos::RCP > facadeFact_; + //! FacadeClass factory + Teuchos::RCP > facadeFact_; - //@} - }; + //@} +}; -} // namespace MueLu +} // namespace MueLu #define MUELU_PARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_PARAMETERLISTINTERPRETER_DECL_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp index d27cfdca878c..ea36029f9200 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp @@ -129,1838 +129,1789 @@ namespace MueLu { - template - ParameterListInterpreter::ParameterListInterpreter(ParameterList& paramList, Teuchos::RCP > comm, Teuchos::RCP factFact, Teuchos::RCP facadeFact) : factFact_(factFact) { - RCP tM = rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(std::string("MueLu: ParameterListInterpreter (ParameterList)")))); - if(facadeFact == Teuchos::null) - facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); - else - facadeFact_ = facadeFact; - - if (paramList.isParameter("xml parameter file")) { - std::string filename = paramList.get("xml parameter file", ""); - if (filename.length() != 0) { - TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); - - ParameterList paramList2 = paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2), *comm); - SetParameterList(paramList2); - - } else { - SetParameterList(paramList); - } +template +ParameterListInterpreter::ParameterListInterpreter(ParameterList& paramList, Teuchos::RCP > comm, Teuchos::RCP factFact, Teuchos::RCP facadeFact) + : factFact_(factFact) { + RCP tM = rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(std::string("MueLu: ParameterListInterpreter (ParameterList)")))); + if (facadeFact == Teuchos::null) + facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); + else + facadeFact_ = facadeFact; + + if (paramList.isParameter("xml parameter file")) { + std::string filename = paramList.get("xml parameter file", ""); + if (filename.length() != 0) { + TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); + + ParameterList paramList2 = paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2), *comm); + SetParameterList(paramList2); } else { SetParameterList(paramList); } - } - template - ParameterListInterpreter::ParameterListInterpreter(const std::string& xmlFileName, const Teuchos::Comm& comm, Teuchos::RCP factFact, Teuchos::RCP facadeFact) : factFact_(factFact) { - RCP tM = rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(std::string("MueLu: ParameterListInterpreter (XML)")))); - if(facadeFact == Teuchos::null) - facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); - else - facadeFact_ = facadeFact; - - ParameterList paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), comm); + } else { SetParameterList(paramList); } - - template - void ParameterListInterpreter::SetParameterList(const ParameterList& paramList) { - Cycle_ = Hierarchy::GetDefaultCycle(); - WCycleStartLevel_ = Hierarchy::GetDefaultCycleStartLevel(); - scalingFactor_= Teuchos::ScalarTraits::one(); - blockSize_ = 1; - dofOffset_ = 0; - - if (paramList.isSublist("Hierarchy")) { - SetFactoryParameterList(paramList); - - } else if (paramList.isParameter("MueLu preconditioner") == true) { - this->GetOStream(Runtime0) << "Use facade class: " << paramList.get("MueLu preconditioner") << std::endl; - Teuchos::RCP pp = facadeFact_->SetParameterList(paramList); - SetFactoryParameterList(*pp); - - } else { - // The validator doesn't work correctly for non-serializable data (Hint: template parameters), so strip it out - ParameterList serialList, nonSerialList; - - ExtractNonSerializableData(paramList, serialList, nonSerialList); - Validate(serialList); - SetEasyParameterList(paramList); - } +} + +template +ParameterListInterpreter::ParameterListInterpreter(const std::string& xmlFileName, const Teuchos::Comm& comm, Teuchos::RCP factFact, Teuchos::RCP facadeFact) + : factFact_(factFact) { + RCP tM = rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(std::string("MueLu: ParameterListInterpreter (XML)")))); + if (facadeFact == Teuchos::null) + facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); + else + facadeFact_ = facadeFact; + + ParameterList paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), comm); + SetParameterList(paramList); +} + +template +void ParameterListInterpreter::SetParameterList(const ParameterList& paramList) { + Cycle_ = Hierarchy::GetDefaultCycle(); + WCycleStartLevel_ = Hierarchy::GetDefaultCycleStartLevel(); + scalingFactor_ = Teuchos::ScalarTraits::one(); + blockSize_ = 1; + dofOffset_ = 0; + + if (paramList.isSublist("Hierarchy")) { + SetFactoryParameterList(paramList); + + } else if (paramList.isParameter("MueLu preconditioner") == true) { + this->GetOStream(Runtime0) << "Use facade class: " << paramList.get("MueLu preconditioner") << std::endl; + Teuchos::RCP pp = facadeFact_->SetParameterList(paramList); + SetFactoryParameterList(*pp); + + } else { + // The validator doesn't work correctly for non-serializable data (Hint: template parameters), so strip it out + ParameterList serialList, nonSerialList; + + ExtractNonSerializableData(paramList, serialList, nonSerialList); + Validate(serialList); + SetEasyParameterList(paramList); } +} - // ===================================================================================================== - // ====================================== EASY interpreter ============================================= - // ===================================================================================================== - //! Helper functions to compare two paramter lists - static inline bool areSame(const ParameterList& list1, const ParameterList& list2); +// ===================================================================================================== +// ====================================== EASY interpreter ============================================= +// ===================================================================================================== +//! Helper functions to compare two paramter lists +static inline bool areSame(const ParameterList& list1, const ParameterList& list2); - // Get value from one of the lists, or set it to default - // Use case: check for a parameter value in a level-specific sublist, then in a root level list; - // if it is absent from both, set it to default +// Get value from one of the lists, or set it to default +// Use case: check for a parameter value in a level-specific sublist, then in a root level list; +// if it is absent from both, set it to default #define MUELU_SET_VAR_2LIST(paramList, defaultList, paramName, paramType, varName) \ - paramType varName; \ - if (paramList.isParameter(paramName)) varName = paramList.get(paramName); \ - else if (defaultList.isParameter(paramName)) varName = defaultList.get(paramName); \ - else varName = MasterList::getDefault(paramName); + paramType varName; \ + if (paramList.isParameter(paramName)) \ + varName = paramList.get(paramName); \ + else if (defaultList.isParameter(paramName)) \ + varName = defaultList.get(paramName); \ + else \ + varName = MasterList::getDefault(paramName); #define MUELU_TEST_AND_SET_VAR(paramList, paramName, paramType, varName) \ (paramList.isParameter(paramName) ? varName = paramList.get(paramName), true : false) - // Set parameter in a list if it is present in any of two lists - // User case: set factory specific parameter, first checking for a level-specific value, then cheking root level value -#define MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, paramName, paramType, listWrite) \ - try { \ - if (paramList .isParameter(paramName)) listWrite.set(paramName, paramList .get(paramName)); \ - else if (defaultList.isParameter(paramName)) listWrite.set(paramName, defaultList.get(paramName)); \ - } \ - catch(Teuchos::Exceptions::InvalidParameterType&) { \ - TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG(true, Teuchos::Exceptions::InvalidParameterType, \ +// Set parameter in a list if it is present in any of two lists +// User case: set factory specific parameter, first checking for a level-specific value, then cheking root level value +#define MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, paramName, paramType, listWrite) \ + try { \ + if (paramList.isParameter(paramName)) \ + listWrite.set(paramName, paramList.get(paramName)); \ + else if (defaultList.isParameter(paramName)) \ + listWrite.set(paramName, defaultList.get(paramName)); \ + } catch (Teuchos::Exceptions::InvalidParameterType&) { \ + TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG(true, Teuchos::Exceptions::InvalidParameterType, \ "Error: parameter \"" << paramName << "\" must be of type " << Teuchos::TypeNameTraits::name()); \ - } \ + } #define MUELU_TEST_PARAM_2LIST(paramList, defaultList, paramName, paramType, cmpValue) \ - (cmpValue == ( \ - paramList.isParameter(paramName) ? paramList .get(paramName) : ( \ - defaultList.isParameter(paramName) ? defaultList.get(paramName) : \ - MasterList::getDefault(paramName) ) ) ) + (cmpValue == (paramList.isParameter(paramName) ? paramList.get(paramName) : (defaultList.isParameter(paramName) ? defaultList.get(paramName) : MasterList::getDefault(paramName)))) #define MUELU_KOKKOS_FACTORY(varName, oldFactory, newFactory) \ - RCP varName; \ - if (!useKokkos_) varName = rcp(new oldFactory()); \ - else varName = rcp(new newFactory()); + RCP varName; \ + if (!useKokkos_) \ + varName = rcp(new oldFactory()); \ + else \ + varName = rcp(new newFactory()); #define MUELU_KOKKOS_FACTORY_NO_DECL(varName, oldFactory, newFactory) \ - if (!useKokkos_) varName = rcp(new oldFactory()); \ - else varName = rcp(new newFactory()); - - template - void ParameterListInterpreter:: - SetEasyParameterList(const ParameterList& constParamList) { - ParameterList paramList; - - MUELU_SET_VAR_2LIST(constParamList, constParamList, "problem: type", std::string, problemType); - if (problemType != "unknown") { - paramList = *MasterList::GetProblemSpecificList(problemType); - paramList.setParameters(constParamList); - } else { - // Create a non const copy of the parameter list - // Working with a modifiable list is much much easier than with original one - paramList = constParamList; - } - - // Check for Kokkos - useKokkos_ = !Node::is_serial; - (void)MUELU_TEST_AND_SET_VAR(paramList, "use kokkos refactor", bool, useKokkos_); - - // Check for timer synchronization - MUELU_SET_VAR_2LIST(paramList, paramList, "synchronize factory timers", bool, syncTimers); - if (syncTimers) - Factory::EnableTimerSync(); - - // Translate cycle type parameter - if (paramList.isParameter("cycle type")) { - std::map cycleMap; - cycleMap["V"] = VCYCLE; - cycleMap["W"] = WCYCLE; - - auto cycleType = paramList.get("cycle type"); - TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, Exceptions::RuntimeError, - "Invalid cycle type: \"" << cycleType << "\""); - Cycle_ = cycleMap[cycleType]; - } - - if (paramList.isParameter("W cycle start level")) { - WCycleStartLevel_ = paramList.get("W cycle start level"); - } - - if (paramList.isParameter("coarse grid correction scaling factor")) - scalingFactor_ = paramList.get("coarse grid correction scaling factor"); - - this->maxCoarseSize_ = paramList.get ("coarse: max size", MasterList::getDefault("coarse: max size")); - this->numDesiredLevel_ = paramList.get ("max levels", MasterList::getDefault("max levels")); - blockSize_ = paramList.get ("number of equations", MasterList::getDefault("number of equations")); - - - (void)MUELU_TEST_AND_SET_VAR(paramList, "debug: graph level", int, this->graphOutputLevel_); - - // Generic data saving (this saves the data on all levels) - if(paramList.isParameter("save data")) - this->dataToSave_ = Teuchos::getArrayFromStringParameter(paramList,"save data"); + if (!useKokkos_) \ + varName = rcp(new oldFactory()); \ + else \ + varName = rcp(new newFactory()); + +template +void ParameterListInterpreter:: + SetEasyParameterList(const ParameterList& constParamList) { + ParameterList paramList; + + MUELU_SET_VAR_2LIST(constParamList, constParamList, "problem: type", std::string, problemType); + if (problemType != "unknown") { + paramList = *MasterList::GetProblemSpecificList(problemType); + paramList.setParameters(constParamList); + } else { + // Create a non const copy of the parameter list + // Working with a modifiable list is much much easier than with original one + paramList = constParamList; + } - // Save level data - if (paramList.isSublist("export data")) { - ParameterList printList = paramList.sublist("export data"); + // Check for Kokkos + useKokkos_ = !Node::is_serial; + (void)MUELU_TEST_AND_SET_VAR(paramList, "use kokkos refactor", bool, useKokkos_); + + // Check for timer synchronization + MUELU_SET_VAR_2LIST(paramList, paramList, "synchronize factory timers", bool, syncTimers); + if (syncTimers) + Factory::EnableTimerSync(); + + // Translate cycle type parameter + if (paramList.isParameter("cycle type")) { + std::map cycleMap; + cycleMap["V"] = VCYCLE; + cycleMap["W"] = WCYCLE; + + auto cycleType = paramList.get("cycle type"); + TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, Exceptions::RuntimeError, + "Invalid cycle type: \"" << cycleType << "\""); + Cycle_ = cycleMap[cycleType]; + } - // Vectors, aggregates and other things that need special handling - if (printList.isParameter("Nullspace")) - this->nullspaceToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Nullspace"); - if (printList.isParameter("Coordinates")) - this->coordinatesToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Coordinates"); - if (printList.isParameter("Aggregates")) - this->aggregatesToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Aggregates"); - if (printList.isParameter("pcoarsen: element to node map")) - this->elementToNodeMapsToPrint_ = Teuchos::getArrayFromStringParameter(printList, "pcoarsen: element to node map"); + if (paramList.isParameter("W cycle start level")) { + WCycleStartLevel_ = paramList.get("W cycle start level"); + } - // If we asked for an arbitrary matrix to be printed, we do that here - for(auto iter = printList.begin(); iter != printList.end(); iter++) { - const std::string & name = printList.name(iter); - // Ignore the special cases - if(name == "Nullspace" || name == "Coordinates" || name == "Aggregates" || name == "pcoarsen: element to node map") - continue; + if (paramList.isParameter("coarse grid correction scaling factor")) + scalingFactor_ = paramList.get("coarse grid correction scaling factor"); + + this->maxCoarseSize_ = paramList.get("coarse: max size", MasterList::getDefault("coarse: max size")); + this->numDesiredLevel_ = paramList.get("max levels", MasterList::getDefault("max levels")); + blockSize_ = paramList.get("number of equations", MasterList::getDefault("number of equations")); + + (void)MUELU_TEST_AND_SET_VAR(paramList, "debug: graph level", int, this->graphOutputLevel_); + + // Generic data saving (this saves the data on all levels) + if (paramList.isParameter("save data")) + this->dataToSave_ = Teuchos::getArrayFromStringParameter(paramList, "save data"); + + // Save level data + if (paramList.isSublist("export data")) { + ParameterList printList = paramList.sublist("export data"); + + // Vectors, aggregates and other things that need special handling + if (printList.isParameter("Nullspace")) + this->nullspaceToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Nullspace"); + if (printList.isParameter("Coordinates")) + this->coordinatesToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Coordinates"); + if (printList.isParameter("Aggregates")) + this->aggregatesToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Aggregates"); + if (printList.isParameter("pcoarsen: element to node map")) + this->elementToNodeMapsToPrint_ = Teuchos::getArrayFromStringParameter(printList, "pcoarsen: element to node map"); + + // If we asked for an arbitrary matrix to be printed, we do that here + for (auto iter = printList.begin(); iter != printList.end(); iter++) { + const std::string& name = printList.name(iter); + // Ignore the special cases + if (name == "Nullspace" || name == "Coordinates" || name == "Aggregates" || name == "pcoarsen: element to node map") + continue; - this->matricesToPrint_[name] = Teuchos::getArrayFromStringParameter(printList, name); - } + this->matricesToPrint_[name] = Teuchos::getArrayFromStringParameter(printList, name); } + } - // Set verbosity parameter - VerbLevel oldVerbLevel = VerboseObject::GetDefaultVerbLevel(); - { - MUELU_SET_VAR_2LIST(paramList, paramList, "verbosity", std::string, verbosityLevel); - this->verbosity_ = toVerbLevel(verbosityLevel); - VerboseObject::SetDefaultVerbLevel(this->verbosity_); - } + // Set verbosity parameter + VerbLevel oldVerbLevel = VerboseObject::GetDefaultVerbLevel(); + { + MUELU_SET_VAR_2LIST(paramList, paramList, "verbosity", std::string, verbosityLevel); + this->verbosity_ = toVerbLevel(verbosityLevel); + VerboseObject::SetDefaultVerbLevel(this->verbosity_); + } - MUELU_SET_VAR_2LIST(paramList, paramList, "output filename", std::string, outputFilename); - if (outputFilename != "") - VerboseObject::SetMueLuOFileStream(outputFilename); - - // Detect if we need to transfer coordinates to coarse levels. We do that iff - // - we use "distance laplacian" dropping on some level, or - // - we use a repartitioner on some level that needs coordinates - // - we use brick aggregation - // - we use Ifpack2 line partitioner - // This is not ideal, as we may have "repartition: enable" turned on by default - // and not present in the list, but it is better than nothing. - useCoordinates_ = false; - useBlockNumber_ = false; - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "distance laplacian") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: type", std::string, "brick") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: export visualization data", bool, true)) { + MUELU_SET_VAR_2LIST(paramList, paramList, "output filename", std::string, outputFilename); + if (outputFilename != "") + VerboseObject::SetMueLuOFileStream(outputFilename); + + // Detect if we need to transfer coordinates to coarse levels. We do that iff + // - we use "distance laplacian" dropping on some level, or + // - we use a repartitioner on some level that needs coordinates + // - we use brick aggregation + // - we use Ifpack2 line partitioner + // This is not ideal, as we may have "repartition: enable" turned on by default + // and not present in the list, but it is better than nothing. + useCoordinates_ = false; + useBlockNumber_ = false; + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "distance laplacian") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: type", std::string, "brick") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: export visualization data", bool, true)) { + useCoordinates_ = true; + } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal distance laplacian")) { + useCoordinates_ = true; + useBlockNumber_ = true; + } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal classical") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal signed classical") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal colored signed classical") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "signed classical")) { + useBlockNumber_ = true; + } else if (paramList.isSublist("smoother: params")) { + const auto smooParamList = paramList.sublist("smoother: params"); + if (smooParamList.isParameter("partitioner: type") && + (smooParamList.get("partitioner: type") == "line")) { useCoordinates_ = true; - } else if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal distance laplacian")) { - useCoordinates_ = true; - useBlockNumber_ = true; - } else if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal signed classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal colored signed classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "signed classical")) { - useBlockNumber_ = true; - } else if(paramList.isSublist("smoother: params")) { - const auto smooParamList = paramList.sublist("smoother: params"); - if(smooParamList.isParameter("partitioner: type") && - (smooParamList.get("partitioner: type") == "line")) { - useCoordinates_ = true; - } - } else { - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - std::string levelStr = "level " + toString(levelID); + } + } else { + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + std::string levelStr = "level " + toString(levelID); - if (paramList.isSublist(levelStr)) { - const ParameterList& levelList = paramList.sublist(levelStr); + if (paramList.isSublist(levelStr)) { + const ParameterList& levelList = paramList.sublist(levelStr); - if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "distance laplacian") || - MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: type", std::string, "brick") || - MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: export visualization data", bool, true)) { - useCoordinates_ = true; - } - else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal distance laplacian")) { - useCoordinates_ = true; - useBlockNumber_ = true; - } - else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal") || + if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "distance laplacian") || + MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: type", std::string, "brick") || + MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: export visualization data", bool, true)) { + useCoordinates_ = true; + } else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal distance laplacian")) { + useCoordinates_ = true; + useBlockNumber_ = true; + } else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal") || MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal classical") || MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal signed classical") || MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal colored signed classical") || MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "signed classical")) { - useBlockNumber_ = true; - } + useBlockNumber_ = true; } } } + } - if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, true)) { - // We don't need coordinates if we're doing the in-place restriction - if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: use subcommunicators", bool, true) && - MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: use subcommunicators in place", bool, true)) { - // do nothing --- these don't need coordinates - } else if (!paramList.isSublist("repartition: params")) { - useCoordinates_ = true; - } else { - const ParameterList& repParams = paramList.sublist("repartition: params"); - if (repParams.isType("algorithm")) { - const std::string algo = repParams.get("algorithm"); - if (algo == "multijagged" || algo == "rcb") { - useCoordinates_ = true; - } - } else { + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, true)) { + // We don't need coordinates if we're doing the in-place restriction + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: use subcommunicators", bool, true) && + MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: use subcommunicators in place", bool, true)) { + // do nothing --- these don't need coordinates + } else if (!paramList.isSublist("repartition: params")) { + useCoordinates_ = true; + } else { + const ParameterList& repParams = paramList.sublist("repartition: params"); + if (repParams.isType("algorithm")) { + const std::string algo = repParams.get("algorithm"); + if (algo == "multijagged" || algo == "rcb") { useCoordinates_ = true; } + } else { + useCoordinates_ = true; } } - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - std::string levelStr = "level " + toString(levelID); + } + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + std::string levelStr = "level " + toString(levelID); - if (paramList.isSublist(levelStr)) { - const ParameterList& levelList = paramList.sublist(levelStr); + if (paramList.isSublist(levelStr)) { + const ParameterList& levelList = paramList.sublist(levelStr); - if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "repartition: enable", bool, true)) { - if (!levelList.isSublist("repartition: params")) { - useCoordinates_ = true; - break; - } else { - const ParameterList& repParams = levelList.sublist("repartition: params"); - if (repParams.isType("algorithm")) { - const std::string algo = repParams.get("algorithm"); - if (algo == "multijagged" || algo == "rcb"){ - useCoordinates_ = true; - break; - } - } else { + if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "repartition: enable", bool, true)) { + if (!levelList.isSublist("repartition: params")) { + useCoordinates_ = true; + break; + } else { + const ParameterList& repParams = levelList.sublist("repartition: params"); + if (repParams.isType("algorithm")) { + const std::string algo = repParams.get("algorithm"); + if (algo == "multijagged" || algo == "rcb") { useCoordinates_ = true; break; } + } else { + useCoordinates_ = true; + break; } } } } + } - // Detect if we do implicit P and R rebalance - changedPRrebalance_ = false; - changedPRViaCopyrebalance_ = false; - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, true)) { - changedPRrebalance_ = MUELU_TEST_AND_SET_VAR(paramList, "repartition: rebalance P and R", bool, this->doPRrebalance_); - changedPRViaCopyrebalance_ = MUELU_TEST_AND_SET_VAR(paramList,"repartition: explicit via new copy rebalance P and R", bool, this->doPRViaCopyrebalance_); - } - - // Detect if we use implicit transpose - changedImplicitTranspose_ = MUELU_TEST_AND_SET_VAR(paramList, "transpose: use implicit", bool, this->implicitTranspose_); + // Detect if we do implicit P and R rebalance + changedPRrebalance_ = false; + changedPRViaCopyrebalance_ = false; + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, true)) { + changedPRrebalance_ = MUELU_TEST_AND_SET_VAR(paramList, "repartition: rebalance P and R", bool, this->doPRrebalance_); + changedPRViaCopyrebalance_ = MUELU_TEST_AND_SET_VAR(paramList, "repartition: explicit via new copy rebalance P and R", bool, this->doPRViaCopyrebalance_); + } - // Detect if we use fuse prolongation and update - (void)MUELU_TEST_AND_SET_VAR(paramList, "fuse prolongation and update", bool, this->fuseProlongationAndUpdate_); + // Detect if we use implicit transpose + changedImplicitTranspose_ = MUELU_TEST_AND_SET_VAR(paramList, "transpose: use implicit", bool, this->implicitTranspose_); - // Detect if we suppress the dimension check of the user-given nullspace - (void)MUELU_TEST_AND_SET_VAR(paramList, "nullspace: suppress dimension check", bool, this->suppressNullspaceDimensionCheck_); + // Detect if we use fuse prolongation and update + (void)MUELU_TEST_AND_SET_VAR(paramList, "fuse prolongation and update", bool, this->fuseProlongationAndUpdate_); - if (paramList.isSublist("matvec params")) - this->matvecParams_ = Teuchos::parameterList(paramList.sublist("matvec params")); + // Detect if we suppress the dimension check of the user-given nullspace + (void)MUELU_TEST_AND_SET_VAR(paramList, "nullspace: suppress dimension check", bool, this->suppressNullspaceDimensionCheck_); - // Create default manager - // FIXME: should it be here, or higher up - RCP defaultManager = rcp(new FactoryManager()); - defaultManager->SetVerbLevel(this->verbosity_); - defaultManager->SetKokkosRefactor(useKokkos_); + if (paramList.isSublist("matvec params")) + this->matvecParams_ = Teuchos::parameterList(paramList.sublist("matvec params")); - // We will ignore keeps0 - std::vector keeps0; - UpdateFactoryManager(paramList, ParameterList(), *defaultManager, 0/*levelID*/, keeps0); + // Create default manager + // FIXME: should it be here, or higher up + RCP defaultManager = rcp(new FactoryManager()); + defaultManager->SetVerbLevel(this->verbosity_); + defaultManager->SetKokkosRefactor(useKokkos_); - // std::cout<<"*** Default Manager ***"<Print(); + // We will ignore keeps0 + std::vector keeps0; + UpdateFactoryManager(paramList, ParameterList(), *defaultManager, 0 /*levelID*/, keeps0); - // Create level specific factory managers - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - // Note, that originally if there were no level specific parameters, we - // simply copied the defaultManager However, with the introduction of - // levelID to UpdateFactoryManager (required for reuse), we can no longer - // guarantee that the kept variables are the same for each level even if - // dependency structure does not change. - RCP levelManager = rcp(new FactoryManager(*defaultManager)); - levelManager->SetVerbLevel(defaultManager->GetVerbLevel()); - - std::vector keeps; - if (paramList.isSublist("level " + toString(levelID))) { - // We do this so the parameters on the level get flagged correctly as "used" - ParameterList& levelList = paramList.sublist("level " + toString(levelID), true/*mustAlreadyExist*/); - UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); - - } else { - ParameterList levelList; - UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); - } + // std::cout<<"*** Default Manager ***"<Print(); - this->keep_[levelID] = keeps; - this->AddFactoryManager(levelID, 1, levelManager); + // Create level specific factory managers + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + // Note, that originally if there were no level specific parameters, we + // simply copied the defaultManager However, with the introduction of + // levelID to UpdateFactoryManager (required for reuse), we can no longer + // guarantee that the kept variables are the same for each level even if + // dependency structure does not change. + RCP levelManager = rcp(new FactoryManager(*defaultManager)); + levelManager->SetVerbLevel(defaultManager->GetVerbLevel()); - // std::cout<<"*** Level "<Print(); + std::vector keeps; + if (paramList.isSublist("level " + toString(levelID))) { + // We do this so the parameters on the level get flagged correctly as "used" + ParameterList& levelList = paramList.sublist("level " + toString(levelID), true /*mustAlreadyExist*/); + UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); + } else { + ParameterList levelList; + UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); } - // FIXME: parameters passed to packages, like Ifpack2, are not touched by us, resulting in "[unused]" flag - // being displayed. On the other hand, we don't want to simply iterate through them touching. I don't know - // what a good solution looks like - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print initial parameters", bool, true)) - this->GetOStream(static_cast(Runtime1), 0) << paramList << std::endl; + this->keep_[levelID] = keeps; + this->AddFactoryManager(levelID, 1, levelManager); - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print unused parameters", bool, true)) { - // Check unused parameters - ParameterList unusedParamList; + // std::cout<<"*** Level "<Print(); + } - // Check for unused parameters that aren't lists - for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) { - const ParameterEntry& entry = paramList.entry(it); + // FIXME: parameters passed to packages, like Ifpack2, are not touched by us, resulting in "[unused]" flag + // being displayed. On the other hand, we don't want to simply iterate through them touching. I don't know + // what a good solution looks like + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print initial parameters", bool, true)) + this->GetOStream(static_cast(Runtime1), 0) << paramList << std::endl; - if (!entry.isList() && !entry.isUsed()) - unusedParamList.setEntry(paramList.name(it), entry); - } + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print unused parameters", bool, true)) { + // Check unused parameters + ParameterList unusedParamList; - // Check for unused parameters in level-specific sublists - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - std::string levelStr = "level " + toString(levelID); + // Check for unused parameters that aren't lists + for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) { + const ParameterEntry& entry = paramList.entry(it); - if (paramList.isSublist(levelStr)) { - const ParameterList& levelList = paramList.sublist(levelStr); + if (!entry.isList() && !entry.isUsed()) + unusedParamList.setEntry(paramList.name(it), entry); + } - for (ParameterList::ConstIterator itr = levelList.begin(); itr != levelList.end(); ++itr) { - const ParameterEntry& entry = levelList.entry(itr); + // Check for unused parameters in level-specific sublists + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + std::string levelStr = "level " + toString(levelID); - if (!entry.isList() && !entry.isUsed()) - unusedParamList.sublist(levelStr).setEntry(levelList.name(itr), entry); - } - } - } + if (paramList.isSublist(levelStr)) { + const ParameterList& levelList = paramList.sublist(levelStr); - if (unusedParamList.numParams() > 0) { - std::ostringstream unusedParamsStream; - int indent = 4; - unusedParamList.print(unusedParamsStream, indent); + for (ParameterList::ConstIterator itr = levelList.begin(); itr != levelList.end(); ++itr) { + const ParameterEntry& entry = levelList.entry(itr); - this->GetOStream(Warnings1) << "The following parameters were not used:\n" << unusedParamsStream.str() << std::endl; + if (!entry.isList() && !entry.isUsed()) + unusedParamList.sublist(levelStr).setEntry(levelList.name(itr), entry); + } } } - VerboseObject::SetDefaultVerbLevel(oldVerbLevel); + if (unusedParamList.numParams() > 0) { + std::ostringstream unusedParamsStream; + int indent = 4; + unusedParamList.print(unusedParamsStream, indent); + this->GetOStream(Warnings1) << "The following parameters were not used:\n" + << unusedParamsStream.str() << std::endl; + } } + VerboseObject::SetDefaultVerbLevel(oldVerbLevel); +} - // ===================================================================================================== - // ==================================== UpdateFactoryManager =========================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { - // NOTE: Factory::SetParameterList must be called prior to Factory::SetFactory, as - // SetParameterList sets default values for non mentioned parameters, including factories +// ===================================================================================================== +// ==================================== UpdateFactoryManager =========================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const { + // NOTE: Factory::SetParameterList must be called prior to Factory::SetFactory, as + // SetParameterList sets default values for non mentioned parameters, including factories - using strings = std::unordered_set; + using strings = std::unordered_set; - // shortcut - if (paramList.numParams() == 0 && defaultList.numParams() > 0) - paramList = ParameterList(defaultList); + // shortcut + if (paramList.numParams() == 0 && defaultList.numParams() > 0) + paramList = ParameterList(defaultList); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - TEUCHOS_TEST_FOR_EXCEPTION(strings({"none", "tP", "RP", "emin", "RAP", "full", "S"}).count(reuseType) == 0, - Exceptions::RuntimeError, "Unknown \"reuse: type\" value: \"" << reuseType << "\". Please consult User's Guide."); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + TEUCHOS_TEST_FOR_EXCEPTION(strings({"none", "tP", "RP", "emin", "RAP", "full", "S"}).count(reuseType) == 0, + Exceptions::RuntimeError, "Unknown \"reuse: type\" value: \"" << reuseType << "\". Please consult User's Guide."); - MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); - TEUCHOS_TEST_FOR_EXCEPTION(strings({"unsmoothed", "sa", "pg", "emin", "matlab", "pcoarsen","classical","smoothed reitzinger","unsmoothed reitzinger","replicate","combine"}).count(multigridAlgo) == 0, - Exceptions::RuntimeError, "Unknown \"multigrid algorithm\" value: \"" << multigridAlgo << "\". Please consult User's Guide."); + MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); + TEUCHOS_TEST_FOR_EXCEPTION(strings({"unsmoothed", "sa", "pg", "emin", "matlab", "pcoarsen", "classical", "smoothed reitzinger", "unsmoothed reitzinger", "replicate", "combine"}).count(multigridAlgo) == 0, + Exceptions::RuntimeError, "Unknown \"multigrid algorithm\" value: \"" << multigridAlgo << "\". Please consult User's Guide."); #ifndef HAVE_MUELU_MATLAB - TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "matlab", Exceptions::RuntimeError, - "Cannot use matlab for multigrid algorithm - MueLu was not configured with MATLAB support."); + TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "matlab", Exceptions::RuntimeError, + "Cannot use matlab for multigrid algorithm - MueLu was not configured with MATLAB support."); #endif #ifndef HAVE_MUELU_INTREPID2 - TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "pcoarsen", Exceptions::RuntimeError, - "Cannot use IntrepidPCoarsen prolongator factory - MueLu was not configured with Intrepid support."); + TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "pcoarsen", Exceptions::RuntimeError, + "Cannot use IntrepidPCoarsen prolongator factory - MueLu was not configured with Intrepid support."); #endif - // Only some combinations of reuse and multigrid algorithms are tested, all - // other are considered invalid at the moment - if (reuseType == "none" || reuseType == "S" || reuseType == "RP" || reuseType == "RAP") { - // This works for all kinds of multigrid algorithms - - } else if (reuseType == "tP" && (multigridAlgo != "sa" && multigridAlgo != "unsmoothed")) { - reuseType = "none"; - this->GetOStream(Warnings0) << "Ignoring \"tP\" reuse option as it is only compatible with \"sa\", " - "or \"unsmoothed\" multigrid algorithms" << std::endl; - - } else if (reuseType == "emin" && multigridAlgo != "emin") { - reuseType = "none"; - this->GetOStream(Warnings0) << "Ignoring \"emin\" reuse option it is only compatible with " - "\"emin\" multigrid algorithm" << std::endl; - } - - // == Non-serializable data === - // Check both the parameter and the type - bool have_userP = false; - if (paramList.isParameter("P") && !paramList.get >("P").is_null()) - have_userP = true; + // Only some combinations of reuse and multigrid algorithms are tested, all + // other are considered invalid at the moment + if (reuseType == "none" || reuseType == "S" || reuseType == "RP" || reuseType == "RAP") { + // This works for all kinds of multigrid algorithms + + } else if (reuseType == "tP" && (multigridAlgo != "sa" && multigridAlgo != "unsmoothed")) { + reuseType = "none"; + this->GetOStream(Warnings0) << "Ignoring \"tP\" reuse option as it is only compatible with \"sa\", " + "or \"unsmoothed\" multigrid algorithms" + << std::endl; + + } else if (reuseType == "emin" && multigridAlgo != "emin") { + reuseType = "none"; + this->GetOStream(Warnings0) << "Ignoring \"emin\" reuse option it is only compatible with " + "\"emin\" multigrid algorithm" + << std::endl; + } - // === Coarse solver === - UpdateFactoryManager_CoarseSolvers(paramList, defaultList, manager, levelID, keeps); + // == Non-serializable data === + // Check both the parameter and the type + bool have_userP = false; + if (paramList.isParameter("P") && !paramList.get >("P").is_null()) + have_userP = true; + + // === Coarse solver === + UpdateFactoryManager_CoarseSolvers(paramList, defaultList, manager, levelID, keeps); + + // == Smoothers == + UpdateFactoryManager_Smoothers(paramList, defaultList, manager, levelID, keeps); + + // === BlockNumber === + if (levelID == 0) + UpdateFactoryManager_BlockNumber(paramList, defaultList, manager, levelID, keeps); + + // === Aggregation === + if (multigridAlgo == "unsmoothed reitzinger" || multigridAlgo == "smoothed reitzinger") + UpdateFactoryManager_Reitzinger(paramList, defaultList, manager, levelID, keeps); + else + UpdateFactoryManager_Aggregation_TentativeP(paramList, defaultList, manager, levelID, keeps); + + // === Nullspace === + RCP nullSpaceFactory; // Cache thcAN is guy for the combination of semi-coarsening & repartitioning + UpdateFactoryManager_Nullspace(paramList, defaultList, manager, levelID, keeps, nullSpaceFactory); + + // === Prolongation === + // NOTE: None of the UpdateFactoryManager routines called here check the + // multigridAlgo. This is intentional, to allow for reuse of components + // underneath. Thus, the multigridAlgo was checked in the beginning of the + // function. + if (have_userP) { + // User prolongator + manager.SetFactory("P", NoFactory::getRCP()); + + } else if (multigridAlgo == "unsmoothed" || multigridAlgo == "unsmoothed reitzinger") { + // Unsmoothed aggregation + manager.SetFactory("P", manager.GetFactory("Ptent")); + + } else if (multigridAlgo == "classical") { + // Classical AMG + manager.SetFactory("P", manager.GetFactory("Ptent")); + + } else if (multigridAlgo == "sa" || multigridAlgo == "smoothed reitzinger") { + // Smoothed aggregation + UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); - // == Smoothers == - UpdateFactoryManager_Smoothers(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "emin") { + // Energy minimization + UpdateFactoryManager_Emin(paramList, defaultList, manager, levelID, keeps); - // === BlockNumber === - if(levelID == 0) - UpdateFactoryManager_BlockNumber(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "replicate") { + UpdateFactoryManager_Replicate(paramList, defaultList, manager, levelID, keeps); - // === Aggregation === - if(multigridAlgo == "unsmoothed reitzinger" || multigridAlgo == "smoothed reitzinger") - UpdateFactoryManager_Reitzinger(paramList, defaultList, manager, levelID, keeps); - else - UpdateFactoryManager_Aggregation_TentativeP(paramList, defaultList, manager, levelID, keeps); - - // === Nullspace === - RCP nullSpaceFactory; // Cache thcAN is guy for the combination of semi-coarsening & repartitioning - UpdateFactoryManager_Nullspace(paramList, defaultList, manager, levelID, keeps, nullSpaceFactory); - - // === Prolongation === - // NOTE: None of the UpdateFactoryManager routines called here check the - // multigridAlgo. This is intentional, to allow for reuse of components - // underneath. Thus, the multigridAlgo was checked in the beginning of the - // function. - if (have_userP) { - // User prolongator - manager.SetFactory("P", NoFactory::getRCP()); - - } else if (multigridAlgo == "unsmoothed" || multigridAlgo == "unsmoothed reitzinger") { - // Unsmoothed aggregation - manager.SetFactory("P", manager.GetFactory("Ptent")); - - } else if (multigridAlgo == "classical") { - // Classical AMG - manager.SetFactory("P", manager.GetFactory("Ptent")); - - } else if (multigridAlgo == "sa" || multigridAlgo == "smoothed reitzinger") { - // Smoothed aggregation - UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); - - } else if (multigridAlgo == "emin") { - // Energy minimization - UpdateFactoryManager_Emin(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "combine") { + UpdateFactoryManager_Combine(paramList, defaultList, manager, levelID, keeps); - } else if (multigridAlgo == "replicate") { - UpdateFactoryManager_Replicate(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "pg") { + // Petrov-Galerkin + UpdateFactoryManager_PG(paramList, defaultList, manager, levelID, keeps); - } else if (multigridAlgo == "combine") { - UpdateFactoryManager_Combine(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "matlab") { + // Matlab Coarsneing + UpdateFactoryManager_Matlab(paramList, defaultList, manager, levelID, keeps); - } else if (multigridAlgo == "pg") { - // Petrov-Galerkin - UpdateFactoryManager_PG(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "pcoarsen") { + // P-Coarsening + UpdateFactoryManager_PCoarsen(paramList, defaultList, manager, levelID, keeps); + } - } else if (multigridAlgo == "matlab") { - // Matlab Coarsneing - UpdateFactoryManager_Matlab(paramList, defaultList, manager, levelID, keeps); + // === Semi-coarsening === + UpdateFactoryManager_SemiCoarsen(paramList, defaultList, manager, levelID, keeps); - } else if (multigridAlgo == "pcoarsen") { - // P-Coarsening - UpdateFactoryManager_PCoarsen(paramList, defaultList, manager, levelID, keeps); - } + // === Restriction === + UpdateFactoryManager_Restriction(paramList, defaultList, manager, levelID, keeps); - // === Semi-coarsening === - UpdateFactoryManager_SemiCoarsen(paramList, defaultList, manager, levelID, keeps); + // === RAP === + UpdateFactoryManager_RAP(paramList, defaultList, manager, levelID, keeps); - // === Restriction === - UpdateFactoryManager_Restriction(paramList, defaultList, manager, levelID, keeps); + // == BlockNumber Transfer == + UpdateFactoryManager_LocalOrdinalTransfer("BlockNumber", multigridAlgo, paramList, defaultList, manager, levelID, keeps); - // === RAP === - UpdateFactoryManager_RAP(paramList, defaultList, manager, levelID, keeps); + // === Coordinates === + UpdateFactoryManager_Coordinates(paramList, defaultList, manager, levelID, keeps); - // == BlockNumber Transfer == - UpdateFactoryManager_LocalOrdinalTransfer("BlockNumber",multigridAlgo,paramList,defaultList,manager,levelID,keeps); + // === Pre-Repartition Keeps for Reuse === + if ((reuseType == "RP" || reuseType == "RAP" || reuseType == "full") && levelID) + keeps.push_back(keep_pair("Nullspace", manager.GetFactory("Nullspace").get())); + if (reuseType == "RP" && levelID) { + keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); + if (!this->implicitTranspose_) + keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); + } + if ((reuseType == "tP" || reuseType == "RP" || reuseType == "emin") && useCoordinates_ && levelID) + keeps.push_back(keep_pair("Coordinates", manager.GetFactory("Coordinates").get())); - // === Coordinates === - UpdateFactoryManager_Coordinates(paramList, defaultList, manager, levelID, keeps); + // === Repartitioning === + UpdateFactoryManager_Repartition(paramList, defaultList, manager, levelID, keeps, nullSpaceFactory); - // === Pre-Repartition Keeps for Reuse === - if ((reuseType == "RP" || reuseType == "RAP" || reuseType == "full") && levelID) - keeps.push_back(keep_pair("Nullspace", manager.GetFactory("Nullspace").get())); + // === Lower precision transfers === + UpdateFactoryManager_LowPrecision(paramList, defaultList, manager, levelID, keeps); - if (reuseType == "RP" && levelID) { - keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); - if (!this->implicitTranspose_) - keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); - } - if ((reuseType == "tP" || reuseType == "RP" || reuseType == "emin") && useCoordinates_ && levelID) - keeps.push_back(keep_pair("Coordinates", manager.GetFactory("Coordinates").get())); + // === Final Keeps for Reuse === + if ((reuseType == "RAP" || reuseType == "full") && levelID) { + keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); + if (!this->implicitTranspose_) + keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); + keeps.push_back(keep_pair("A", manager.GetFactory("A").get())); + } - // === Repartitioning === - UpdateFactoryManager_Repartition(paramList, defaultList, manager, levelID, keeps, nullSpaceFactory); + // In case you ever want to inspect the FactoryManager as it is generated for each level + /*std::cout<<"*** Factory Manager on level "< +void ParameterListInterpreter:: + UpdateFactoryManager_Smoothers(ParameterList& paramList, const ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + bool useMaxAbsDiagonalScaling = false; + if (defaultList.isParameter("sa: use rowsumabs diagonal scaling")) + useMaxAbsDiagonalScaling = defaultList.get("sa: use rowsumabs diagonal scaling"); + + // === Smoothing === + // FIXME: should custom smoother check default list too? + bool isCustomSmoother = + paramList.isParameter("smoother: pre or post") || + paramList.isParameter("smoother: type") || paramList.isParameter("smoother: pre type") || paramList.isParameter("smoother: post type") || + paramList.isSublist("smoother: params") || paramList.isSublist("smoother: pre params") || paramList.isSublist("smoother: post params") || + paramList.isParameter("smoother: sweeps") || paramList.isParameter("smoother: pre sweeps") || paramList.isParameter("smoother: post sweeps") || + paramList.isParameter("smoother: overlap") || paramList.isParameter("smoother: pre overlap") || paramList.isParameter("smoother: post overlap"); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: pre or post", std::string, PreOrPost); + if (PreOrPost == "none") { + manager.SetFactory("Smoother", Teuchos::null); + + } else if (isCustomSmoother) { + // FIXME: get default values from the factory + // NOTE: none of the smoothers at the moment use parameter validation framework, so we + // cannot get the default values from it. +#define TEST_MUTUALLY_EXCLUSIVE(arg1, arg2) \ + TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter(#arg1) && paramList.isParameter(#arg2), \ + Exceptions::InvalidArgument, "You cannot specify both \"" #arg1 "\" and \"" #arg2 "\""); +#define TEST_MUTUALLY_EXCLUSIVE_S(arg1, arg2) \ + TEUCHOS_TEST_FOR_EXCEPTION(paramList.isSublist(#arg1) && paramList.isSublist(#arg2), \ + Exceptions::InvalidArgument, "You cannot specify both \"" #arg1 "\" and \"" #arg2 "\""); + + TEST_MUTUALLY_EXCLUSIVE("smoother: type", "smoother: pre type"); + TEST_MUTUALLY_EXCLUSIVE("smoother: type", "smoother: post type"); + TEST_MUTUALLY_EXCLUSIVE("smoother: sweeps", "smoother: pre sweeps"); + TEST_MUTUALLY_EXCLUSIVE("smoother: sweeps", "smoother: post sweeps"); + TEST_MUTUALLY_EXCLUSIVE("smoother: overlap", "smoother: pre overlap"); + TEST_MUTUALLY_EXCLUSIVE("smoother: overlap", "smoother: post overlap"); + TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: pre params"); + TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: post params"); + TEUCHOS_TEST_FOR_EXCEPTION(PreOrPost == "both" && (paramList.isParameter("smoother: pre type") != paramList.isParameter("smoother: post type")), + Exceptions::InvalidArgument, "You must specify both \"smoother: pre type\" and \"smoother: post type\""); + + // Default values + int overlap = 0; + ParameterList defaultSmootherParams; + defaultSmootherParams.set("relaxation: type", "Symmetric Gauss-Seidel"); + defaultSmootherParams.set("relaxation: sweeps", Teuchos::OrdinalTraits::one()); + defaultSmootherParams.set("relaxation: damping factor", Teuchos::ScalarTraits::one()); + + RCP preSmoother = Teuchos::null, postSmoother = Teuchos::null; + std::string preSmootherType, postSmootherType; + ParameterList preSmootherParams, postSmootherParams; + + if (paramList.isParameter("smoother: overlap")) + overlap = paramList.get("smoother: overlap"); + + if (PreOrPost == "pre" || PreOrPost == "both") { + if (paramList.isParameter("smoother: pre type")) { + preSmootherType = paramList.get("smoother: pre type"); + } else { + MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", std::string, preSmootherTypeTmp); + preSmootherType = preSmootherTypeTmp; + } + if (paramList.isParameter("smoother: pre overlap")) + overlap = paramList.get("smoother: pre overlap"); - // === Lower precision transfers === - UpdateFactoryManager_LowPrecision(paramList, defaultList, manager, levelID, keeps); + if (paramList.isSublist("smoother: pre params")) + preSmootherParams = paramList.sublist("smoother: pre params"); + else if (paramList.isSublist("smoother: params")) + preSmootherParams = paramList.sublist("smoother: params"); + else if (defaultList.isSublist("smoother: params")) + preSmootherParams = defaultList.sublist("smoother: params"); + else if (preSmootherType == "RELAXATION") + preSmootherParams = defaultSmootherParams; - // === Final Keeps for Reuse === - if ((reuseType == "RAP" || reuseType == "full") && levelID) { - keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); - if (!this->implicitTranspose_) - keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); - keeps.push_back(keep_pair("A", manager.GetFactory("A").get())); - } + if (preSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) + preSmootherParams.set("chebyshev: use rowsumabs diagonal scaling", true); - // In case you ever want to inspect the FactoryManager as it is generated for each level - /*std::cout<<"*** Factory Manager on level "< - void ParameterListInterpreter:: - UpdateFactoryManager_Smoothers(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID, std::vector& keeps) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - bool useMaxAbsDiagonalScaling = false; - if (defaultList.isParameter("sa: use rowsumabs diagonal scaling")) - useMaxAbsDiagonalScaling = defaultList.get("sa: use rowsumabs diagonal scaling"); - - // === Smoothing === - // FIXME: should custom smoother check default list too? - bool isCustomSmoother = - paramList.isParameter("smoother: pre or post") || - paramList.isParameter("smoother: type") || paramList.isParameter("smoother: pre type") || paramList.isParameter("smoother: post type") || - paramList.isSublist ("smoother: params") || paramList.isSublist ("smoother: pre params") || paramList.isSublist ("smoother: post params") || - paramList.isParameter("smoother: sweeps") || paramList.isParameter("smoother: pre sweeps") || paramList.isParameter("smoother: post sweeps") || - paramList.isParameter("smoother: overlap") || paramList.isParameter("smoother: pre overlap") || paramList.isParameter("smoother: post overlap"); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: pre or post", std::string, PreOrPost); - if (PreOrPost == "none") { - manager.SetFactory("Smoother", Teuchos::null); - - } else if (isCustomSmoother) { - // FIXME: get default values from the factory - // NOTE: none of the smoothers at the moment use parameter validation framework, so we - // cannot get the default values from it. - #define TEST_MUTUALLY_EXCLUSIVE(arg1,arg2) \ - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter(#arg1) && paramList.isParameter(#arg2), \ - Exceptions::InvalidArgument, "You cannot specify both \""#arg1"\" and \""#arg2"\""); - #define TEST_MUTUALLY_EXCLUSIVE_S(arg1,arg2) \ - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isSublist(#arg1) && paramList.isSublist(#arg2), \ - Exceptions::InvalidArgument, "You cannot specify both \""#arg1"\" and \""#arg2"\""); - - TEST_MUTUALLY_EXCLUSIVE ("smoother: type", "smoother: pre type"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: type", "smoother: post type"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: sweeps", "smoother: pre sweeps"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: sweeps", "smoother: post sweeps"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: overlap", "smoother: pre overlap"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: overlap", "smoother: post overlap"); - TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: pre params"); - TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: post params"); - TEUCHOS_TEST_FOR_EXCEPTION(PreOrPost == "both" && (paramList.isParameter("smoother: pre type") != paramList.isParameter("smoother: post type")), - Exceptions::InvalidArgument, "You must specify both \"smoother: pre type\" and \"smoother: post type\""); - - // Default values - int overlap = 0; - ParameterList defaultSmootherParams; - defaultSmootherParams.set("relaxation: type", "Symmetric Gauss-Seidel"); - defaultSmootherParams.set("relaxation: sweeps", Teuchos::OrdinalTraits::one()); - defaultSmootherParams.set("relaxation: damping factor", Teuchos::ScalarTraits::one()); - - RCP preSmoother = Teuchos::null, postSmoother = Teuchos::null; - std::string preSmootherType, postSmootherType; - ParameterList preSmootherParams, postSmootherParams; - - if (paramList.isParameter("smoother: overlap")) - overlap = paramList.get("smoother: overlap"); - - if (PreOrPost == "pre" || PreOrPost == "both") { - if (paramList.isParameter("smoother: pre type")) { - preSmootherType = paramList.get("smoother: pre type"); - } else { - MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", std::string, preSmootherTypeTmp); - preSmootherType = preSmootherTypeTmp; - } - if (paramList.isParameter("smoother: pre overlap")) - overlap = paramList.get("smoother: pre overlap"); - - if (paramList.isSublist("smoother: pre params")) - preSmootherParams = paramList.sublist("smoother: pre params"); - else if (paramList.isSublist("smoother: params")) - preSmootherParams = paramList.sublist("smoother: params"); - else if (defaultList.isSublist("smoother: params")) - preSmootherParams = defaultList.sublist("smoother: params"); - else if (preSmootherType == "RELAXATION") - preSmootherParams = defaultSmootherParams; - - if (preSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) - preSmootherParams.set("chebyshev: use rowsumabs diagonal scaling",true); - - #ifdef HAVE_MUELU_INTREPID2 - // Propagate P-coarsening for Topo smoothing - if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && - defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { - // P-Coarsening by schedule (new interface) - // NOTE: levelID represents the *coarse* level in this case - auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList, "pcoarsen: schedule"); - auto pcoarsen_element = defaultList.get("pcoarsen: element"); - - if (levelID < (int)pcoarsen_schedule.size()) { - // Topo info for P-Coarsening - auto lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); - preSmootherParams.set("pcoarsen: hi basis", lo); - } - } - #endif - - #ifdef HAVE_MUELU_MATLAB - if (preSmootherType == "matlab") - preSmoother = rcp(new SmootherFactory(rcp(new MatlabSmoother(preSmootherParams)))); - else - #endif - preSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother(preSmootherType, preSmootherParams, overlap)))); - } - - if (PreOrPost == "post" || PreOrPost == "both") { - if (paramList.isParameter("smoother: post type")) - postSmootherType = paramList.get("smoother: post type"); - else { - MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", std::string, postSmootherTypeTmp); - postSmootherType = postSmootherTypeTmp; - } - - if (paramList.isSublist("smoother: post params")) - postSmootherParams = paramList.sublist("smoother: post params"); - else if (paramList.isSublist("smoother: params")) - postSmootherParams = paramList.sublist("smoother: params"); - else if (defaultList.isSublist("smoother: params")) - postSmootherParams = defaultList.sublist("smoother: params"); - else if (postSmootherType == "RELAXATION") - postSmootherParams = defaultSmootherParams; - if (paramList.isParameter("smoother: post overlap")) - overlap = paramList.get("smoother: post overlap"); - - if (postSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) - postSmootherParams.set("chebyshev: use rowsumabs diagonal scaling",true); - - if (postSmootherType == preSmootherType && areSame(preSmootherParams, postSmootherParams)) - postSmoother = preSmoother; - else { - #ifdef HAVE_MUELU_INTREPID2 - // Propagate P-coarsening for Topo smoothing - if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && - defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { - // P-Coarsening by schedule (new interface) - // NOTE: levelID represents the *coarse* level in this case - auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList,"pcoarsen: schedule"); - auto pcoarsen_element = defaultList.get("pcoarsen: element"); - - if (levelID < (int)pcoarsen_schedule.size()) { - // Topo info for P-Coarsening - auto lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); - postSmootherParams.set("pcoarsen: hi basis", lo); - } - } - #endif - - #ifdef HAVE_MUELU_MATLAB - if (postSmootherType == "matlab") - postSmoother = rcp(new SmootherFactory(rcp(new MatlabSmoother(postSmootherParams)))); - else - #endif - postSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother(postSmootherType, postSmootherParams, overlap)))); - } - } - - if (preSmoother == postSmoother) - manager.SetFactory("Smoother", preSmoother); - else { - manager.SetFactory("PreSmoother", preSmoother); - manager.SetFactory("PostSmoother", postSmoother); - } - } - - // The first clause is not necessary, but it is here for clarity Smoothers - // are reused if smoother explicitly said to reuse them, or if any other - // reuse option is enabled - bool reuseSmoothers = (reuseType == "S" || reuseType != "none"); - if (reuseSmoothers) { - auto preSmootherFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("PreSmoother"))); - - if (preSmootherFactory != Teuchos::null) { - ParameterList postSmootherFactoryParams; - postSmootherFactoryParams.set("keep smoother data", true); - preSmootherFactory->SetParameterList(postSmootherFactoryParams); - - keeps.push_back(keep_pair("PreSmoother data", preSmootherFactory.get())); - } - - auto postSmootherFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("PostSmoother"))); - if (postSmootherFactory != Teuchos::null) { - ParameterList postSmootherFactoryParams; - postSmootherFactoryParams.set("keep smoother data", true); - postSmootherFactory->SetParameterList(postSmootherFactoryParams); - - keeps.push_back(keep_pair("PostSmoother data", postSmootherFactory.get())); - } - - auto coarseFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("CoarseSolver"))); - if (coarseFactory != Teuchos::null) { - ParameterList coarseFactoryParams; - coarseFactoryParams.set("keep smoother data", true); - coarseFactory->SetParameterList(coarseFactoryParams); - - keeps.push_back(keep_pair("PreSmoother data", coarseFactory.get())); - } - } - - if ((reuseType == "RAP" && levelID) || (reuseType == "full")) { - // The difference between "RAP" and "full" is keeping smoothers. However, - // as in both cases we keep coarse matrices, we do not need to update - // coarse smoothers. On the other hand, if a user changes fine level - // matrix, "RAP" would update the fine level smoother, while "full" would - // not - keeps.push_back(keep_pair("PreSmoother", manager.GetFactory("PreSmoother") .get())); - keeps.push_back(keep_pair("PostSmoother", manager.GetFactory("PostSmoother").get())); - - // We do keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get()) - // as the coarse solver factory is in fact a smoothing factory, so the - // only pieces of data it generates are PreSmoother and PostSmoother - keeps.push_back(keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get())); - } - } - - // ===================================================================================================== - // ====================================== Coarse Solvers =============================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_CoarseSolvers(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int /* levelID */, std::vector& /* keeps */) const - { - // FIXME: should custom coarse solver check default list too? - bool isCustomCoarseSolver = - paramList.isParameter("coarse: type") || - paramList.isParameter("coarse: params"); - if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "coarse: type", std::string, "none")) { - manager.SetFactory("CoarseSolver", Teuchos::null); - - } else if (isCustomCoarseSolver) { - // FIXME: get default values from the factory - // NOTE: none of the smoothers at the moment use parameter validation framework, so we - // cannot get the default values from it. - MUELU_SET_VAR_2LIST(paramList, defaultList, "coarse: type", std::string, coarseType); - - int overlap = 0; - if (paramList.isParameter("coarse: overlap")) - overlap = paramList.get("coarse: overlap"); - - ParameterList coarseParams; - if (paramList.isSublist("coarse: params")) - coarseParams = paramList.sublist("coarse: params"); - else if (defaultList.isSublist("coarse: params")) - coarseParams = defaultList.sublist("coarse: params"); - - using strings = std::unordered_set; - - RCP coarseSmoother; - // TODO: this is not a proper place to check. If we consider direct solver to be a special - // case of smoother, we would like to unify Amesos and Ifpack2 smoothers in src/Smoothers, and - // have a single factory responsible for those. Then, this check would belong there. - if (strings({"RELAXATION", "CHEBYSHEV", "ILUT", "ILU", "RILUK", "SCHWARZ", "Amesos", - "BLOCK RELAXATION", "BLOCK_RELAXATION", "BLOCKRELAXATION" , - "SPARSE BLOCK RELAXATION", "SPARSE_BLOCK_RELAXATION", "SPARSEBLOCKRELAXATION", - "LINESMOOTHING_BANDEDRELAXATION", "LINESMOOTHING_BANDED_RELAXATION", "LINESMOOTHING_BANDED RELAXATION", - "LINESMOOTHING_TRIDIRELAXATION", "LINESMOOTHING_TRIDI_RELAXATION", "LINESMOOTHING_TRIDI RELAXATION", - "LINESMOOTHING_TRIDIAGONALRELAXATION", "LINESMOOTHING_TRIDIAGONAL_RELAXATION", "LINESMOOTHING_TRIDIAGONAL RELAXATION", - "TOPOLOGICAL", "FAST_ILU", "FAST_IC", "FAST_ILDL","HIPTMAIR"}).count(coarseType)) { - coarseSmoother = rcp(new TrilinosSmoother(coarseType, coarseParams, overlap)); - } else { - #ifdef HAVE_MUELU_MATLAB - if (coarseType == "matlab") - coarseSmoother = rcp(new MatlabSmoother(coarseParams)); - else - #endif - coarseSmoother = rcp(new DirectSolver(coarseType, coarseParams)); - } - - manager.SetFactory("CoarseSolver", rcp(new SmootherFactory(coarseSmoother))); - } - } - - - // ===================================================================================================== - // ========================================= TentativeP================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Reitzinger(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID, std::vector& keeps) const - { - ParameterList rParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: enable", bool, rParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: constant column sums", bool, rParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, rParams); - - RCP rFactory = rcp(new ReitzingerPFactory()); - rFactory->SetParameterList(rParams); - - // These are all going to be user provided, so NoFactory - rFactory->SetFactory("Pnodal", NoFactory::getRCP()); - rFactory->SetFactory("NodeAggMatrix", NoFactory::getRCP()); - //rFactory->SetFactory("NodeMatrix", NoFactory::getRCP()); - - if(levelID > 1) - rFactory->SetFactory("D0", this->GetFactoryManager(levelID-1)->GetFactory("D0")); - else - rFactory->SetFactory("D0", NoFactory::getRCP()); - - manager.SetFactory("Ptent", rFactory); - manager.SetFactory("D0", rFactory); - manager.SetFactory("InPlaceMap", rFactory); - - } - - // ===================================================================================================== - // ========================================= TentativeP================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Aggregation_TentativeP(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID, std::vector& keeps) const - { - using strings = std::unordered_set; - - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "aggregation: type", std::string, aggType); - TEUCHOS_TEST_FOR_EXCEPTION(!strings({"uncoupled", "coupled", "brick", "matlab","notay","classical"}).count(aggType), - Exceptions::RuntimeError, "Unknown aggregation algorithm: \"" << aggType << "\". Please consult User's Guide."); - - - // Only doing this for classical because otherwise, the gold tests get broken badly - RCP amalgFact; - if(aggType == "classical") { - amalgFact = rcp(new AmalgamationFactory()); - manager.SetFactory("UnAmalgamationInfo",amalgFact); - } - - // Aggregation graph - RCP dropFactory; - - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "matlab")) { - #ifdef HAVE_MUELU_MATLAB - dropFactory = rcp(new SingleLevelMatlabFactory()); - ParameterList socParams = paramList.sublist("strength-of-connection: params"); - dropFactory->SetParameterList(socParams); - #else - throw std::runtime_error("Cannot use MATLAB evolutionary strength-of-connection - MueLu was not configured with MATLAB support."); - #endif - } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "unsupported vector smoothing")) { - dropFactory = rcp(new MueLu::SmooVecCoalesceDropFactory()); - ParameterList dropParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: number of random vectors", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: number of times to pre or post smooth", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: penalty parameters", Teuchos::Array, dropParams); - dropFactory->SetParameterList(dropParams); - } - else { - MUELU_KOKKOS_FACTORY_NO_DECL(dropFactory, CoalesceDropFactory, CoalesceDropFactory_kokkos); - ParameterList dropParams; - if (!rcp_dynamic_cast(dropFactory).is_null()) - dropParams.set("lightweight wrap", true); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: row sum drop tol", double, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: use ml scaling of drop tol", bool, dropParams); - - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: greedy Dirichlet", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: distance laplacian algo", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: classical algo", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: distance laplacian directional weights",Teuchos::Array,dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring: localize color graph", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: dropping may create Dirichlet", bool, dropParams); - if (useKokkos_) { - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, dropParams); - } - - if(!amalgFact.is_null()) - dropFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - - if(dropParams.isParameter("aggregation: drop scheme")) { - std::string drop_scheme = dropParams.get("aggregation: drop scheme"); - if(drop_scheme == "block diagonal colored signed classical") - manager.SetFactory("Coloring Graph",dropFactory); - if (drop_scheme.find("block diagonal") != std::string::npos || drop_scheme == "signed classical") { - if(levelID > 0) - dropFactory->SetFactory("BlockNumber", this->GetFactoryManager(levelID-1)->GetFactory("BlockNumber")); - else - dropFactory->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); - } - } - - dropFactory->SetParameterList(dropParams); - } - manager.SetFactory("Graph", dropFactory); - - - // Aggregation scheme - #ifndef HAVE_MUELU_MATLAB - if (aggType == "matlab") - throw std::runtime_error("Cannot use MATLAB aggregation - MueLu was not configured with MATLAB support."); - #endif - RCP aggFactory; - if (aggType == "uncoupled") { - MUELU_KOKKOS_FACTORY_NO_DECL(aggFactory, UncoupledAggregationFactory, UncoupledAggregationFactory_kokkos); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: mode", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: min agg size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: max agg size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: max selected neighbors", int, aggParams); - if(useKokkos_) { - //if not using kokkos refactor Uncoupled, there is no algorithm option (always Serial) - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase 1 algorithm", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: deterministic", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring algorithm", std::string, aggParams); - } - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 1", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 2a", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 2b", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 3", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase1", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase2a", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase2b", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase2a agg factor", double, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: preserve Dirichlet points", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: error on nodes with no on-rank neighbors", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase3 avoid singletons", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, aggParams); - aggFactory->SetParameterList(aggParams); - // make sure that the aggregation factory has all necessary data - aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); - aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); - // aggFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - - } else if (aggType == "brick") { - aggFactory = rcp(new BrickAggregationFactory()); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick x size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick y size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick z size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick x Dirichlet", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick y Dirichlet", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick z Dirichlet", bool, aggParams); - aggFactory->SetParameterList(aggParams); - - // Unlike other factories, BrickAggregationFactory makes the Graph/DofsPerNode itself - manager.SetFactory("Graph", aggFactory); - manager.SetFactory("DofsPerNode", aggFactory); - manager.SetFactory("Filtering", aggFactory); - if (levelID > 1) { - // We check for levelID > 0, as in the interpreter aggFactory for - // levelID really corresponds to level 0. Managers are clunky, as they - // contain factories for two different levels - aggFactory->SetFactory("Coordinates", this->GetFactoryManager(levelID-1)->GetFactory("Coordinates")); +#ifdef HAVE_MUELU_INTREPID2 + // Propagate P-coarsening for Topo smoothing + if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && + defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { + // P-Coarsening by schedule (new interface) + // NOTE: levelID represents the *coarse* level in this case + auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList, "pcoarsen: schedule"); + auto pcoarsen_element = defaultList.get("pcoarsen: element"); + + if (levelID < (int)pcoarsen_schedule.size()) { + // Topo info for P-Coarsening + auto lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); + preSmootherParams.set("pcoarsen: hi basis", lo); + } } +#endif + +#ifdef HAVE_MUELU_MATLAB + if (preSmootherType == "matlab") + preSmoother = rcp(new SmootherFactory(rcp(new MatlabSmoother(preSmootherParams)))); + else +#endif + preSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother(preSmootherType, preSmootherParams, overlap)))); } - else if (aggType == "classical") { - // Map and coloring - RCP mapFact = rcp(new ClassicalMapFactory()); - ParameterList mapParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: deterministic", bool, mapParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring algorithm", std::string, mapParams); - - ParameterList tempParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, tempParams); - std::string drop_algo = tempParams.get("aggregation: drop scheme"); - if(drop_algo == "block diagonal colored signed classical") { - mapParams.set("aggregation: coloring: use color graph",true); - mapFact->SetFactory("Coloring Graph", manager.GetFactory("Coloring Graph")); + if (PreOrPost == "post" || PreOrPost == "both") { + if (paramList.isParameter("smoother: post type")) + postSmootherType = paramList.get("smoother: post type"); + else { + MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", std::string, postSmootherTypeTmp); + postSmootherType = postSmootherTypeTmp; } - mapFact->SetParameterList(mapParams); - mapFact->SetFactory("Graph", manager.GetFactory("Graph")); - mapFact->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - - manager.SetFactory("FC Splitting", mapFact); - manager.SetFactory("CoarseMap", mapFact); - - - aggFactory = rcp(new ClassicalPFactory()); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: classical scheme", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, aggParams); - aggFactory->SetParameterList(aggParams); - aggFactory->SetFactory("FC Splitting",manager.GetFactory("FC Splitting")); - aggFactory->SetFactory("CoarseMap",manager.GetFactory("CoarseMap")); - aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); - aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); - - if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") { - if(levelID > 0) - aggFactory->SetFactory("BlockNumber", this->GetFactoryManager(levelID-1)->GetFactory("BlockNumber")); - else - aggFactory->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); - } - - // Now we short-circuit, because we neither need nor want TentativePFactory here - manager.SetFactory("Ptent", aggFactory); - manager.SetFactory("P Graph", aggFactory); + if (paramList.isSublist("smoother: post params")) + postSmootherParams = paramList.sublist("smoother: post params"); + else if (paramList.isSublist("smoother: params")) + postSmootherParams = paramList.sublist("smoother: params"); + else if (defaultList.isSublist("smoother: params")) + postSmootherParams = defaultList.sublist("smoother: params"); + else if (postSmootherType == "RELAXATION") + postSmootherParams = defaultSmootherParams; + if (paramList.isParameter("smoother: post overlap")) + overlap = paramList.get("smoother: post overlap"); + + if (postSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) + postSmootherParams.set("chebyshev: use rowsumabs diagonal scaling", true); + + if (postSmootherType == preSmootherType && areSame(preSmootherParams, postSmootherParams)) + postSmoother = preSmoother; + else { +#ifdef HAVE_MUELU_INTREPID2 + // Propagate P-coarsening for Topo smoothing + if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && + defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { + // P-Coarsening by schedule (new interface) + // NOTE: levelID represents the *coarse* level in this case + auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList, "pcoarsen: schedule"); + auto pcoarsen_element = defaultList.get("pcoarsen: element"); + + if (levelID < (int)pcoarsen_schedule.size()) { + // Topo info for P-Coarsening + auto lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); + postSmootherParams.set("pcoarsen: hi basis", lo); + } + } +#endif - if (reuseType == "tP" && levelID) { - // keeps.push_back(keep_pair("Nullspace", Ptent.get())); - keeps.push_back(keep_pair("Ptent",aggFactory.get())); - } - return; - } - else if (aggType == "notay") { - aggFactory = rcp(new NotayAggregationFactory()); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: tie threshold", double, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities",bool, aggParams); - aggFactory->SetParameterList(aggParams); - aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); - aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); - } #ifdef HAVE_MUELU_MATLAB - else if(aggType == "matlab") { - ParameterList aggParams = paramList.sublist("aggregation: params"); - aggFactory = rcp(new SingleLevelMatlabFactory()); - aggFactory->SetParameterList(aggParams); - } + if (postSmootherType == "matlab") + postSmoother = rcp(new SmootherFactory(rcp(new MatlabSmoother(postSmootherParams)))); + else #endif + postSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother(postSmootherType, postSmootherParams, overlap)))); + } + } + if (preSmoother == postSmoother) + manager.SetFactory("Smoother", preSmoother); + else { + manager.SetFactory("PreSmoother", preSmoother); + manager.SetFactory("PostSmoother", postSmoother); + } + } + // The first clause is not necessary, but it is here for clarity Smoothers + // are reused if smoother explicitly said to reuse them, or if any other + // reuse option is enabled + bool reuseSmoothers = (reuseType == "S" || reuseType != "none"); + if (reuseSmoothers) { + auto preSmootherFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("PreSmoother"))); - manager.SetFactory("Aggregates", aggFactory); + if (preSmootherFactory != Teuchos::null) { + ParameterList postSmootherFactoryParams; + postSmootherFactoryParams.set("keep smoother data", true); + preSmootherFactory->SetParameterList(postSmootherFactoryParams); - // Coarse map - RCP coarseMap = rcp(new CoarseMapFactory()); - coarseMap->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - manager.SetFactory("CoarseMap", coarseMap); + keeps.push_back(keep_pair("PreSmoother data", preSmootherFactory.get())); + } - // Aggregate qualities - if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, true)) { - RCP aggQualityFact = rcp(new AggregateQualityEstimateFactory()); - ParameterList aggQualityParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: good aggregate threshold", double, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file output", bool, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file base", std::string, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: check symmetry", bool, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: algorithm", std::string, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: zero threshold", double, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: percentiles", Teuchos::Array,aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: mode", std::string, aggQualityParams); - aggQualityFact->SetParameterList(aggQualityParams); - manager.SetFactory("AggregateQualities", aggQualityFact); + auto postSmootherFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("PostSmoother"))); + if (postSmootherFactory != Teuchos::null) { + ParameterList postSmootherFactoryParams; + postSmootherFactoryParams.set("keep smoother data", true); + postSmootherFactory->SetParameterList(postSmootherFactoryParams); - assert(aggType == "uncoupled"); - aggFactory->SetFactory("AggregateQualities", aggQualityFact); + keeps.push_back(keep_pair("PostSmoother data", postSmootherFactory.get())); } + auto coarseFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("CoarseSolver"))); + if (coarseFactory != Teuchos::null) { + ParameterList coarseFactoryParams; + coarseFactoryParams.set("keep smoother data", true); + coarseFactory->SetParameterList(coarseFactoryParams); - // Tentative P - MUELU_KOKKOS_FACTORY(Ptent, TentativePFactory, TentativePFactory_kokkos); - ParameterList ptentParams; - if (paramList.isSublist("matrixmatrix: kernel params")) - ptentParams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); - if (defaultList.isSublist("matrixmatrix: kernel params")) - ptentParams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, ptentParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: build coarse coordinates", bool, ptentParams); - Ptent->SetParameterList(ptentParams); - Ptent->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - Ptent->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - manager.SetFactory("Ptent", Ptent); - - if (reuseType == "tP" && levelID) { - keeps.push_back(keep_pair("Nullspace", Ptent.get())); - keeps.push_back(keep_pair("P", Ptent.get())); + keeps.push_back(keep_pair("PreSmoother data", coarseFactory.get())); } } - // ===================================================================================================== - // ============================================ RAP ==================================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_RAP(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { - if (paramList.isParameter("A") && !paramList.get >("A").is_null()) { - // We have user matrix A - manager.SetFactory("A", NoFactory::getRCP()); - return; - } + if ((reuseType == "RAP" && levelID) || (reuseType == "full")) { + // The difference between "RAP" and "full" is keeping smoothers. However, + // as in both cases we keep coarse matrices, we do not need to update + // coarse smoothers. On the other hand, if a user changes fine level + // matrix, "RAP" would update the fine level smoother, while "full" would + // not + keeps.push_back(keep_pair("PreSmoother", manager.GetFactory("PreSmoother").get())); + keeps.push_back(keep_pair("PostSmoother", manager.GetFactory("PostSmoother").get())); + + // We do keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get()) + // as the coarse solver factory is in fact a smoothing factory, so the + // only pieces of data it generates are PreSmoother and PostSmoother + keeps.push_back(keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get())); + } +} + +// ===================================================================================================== +// ====================================== Coarse Solvers =============================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_CoarseSolvers(ParameterList& paramList, const ParameterList& defaultList, + FactoryManager& manager, int /* levelID */, std::vector& /* keeps */) const { + // FIXME: should custom coarse solver check default list too? + bool isCustomCoarseSolver = + paramList.isParameter("coarse: type") || + paramList.isParameter("coarse: params"); + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "coarse: type", std::string, "none")) { + manager.SetFactory("CoarseSolver", Teuchos::null); + + } else if (isCustomCoarseSolver) { + // FIXME: get default values from the factory + // NOTE: none of the smoothers at the moment use parameter validation framework, so we + // cannot get the default values from it. + MUELU_SET_VAR_2LIST(paramList, defaultList, "coarse: type", std::string, coarseType); + + int overlap = 0; + if (paramList.isParameter("coarse: overlap")) + overlap = paramList.get("coarse: overlap"); + + ParameterList coarseParams; + if (paramList.isSublist("coarse: params")) + coarseParams = paramList.sublist("coarse: params"); + else if (defaultList.isSublist("coarse: params")) + coarseParams = defaultList.sublist("coarse: params"); - ParameterList RAPparams; - - RCP RAP; - RCP RAPs; - // Allow for Galerkin or shifted RAP - // FIXME: Should this not be some form of MUELU_SET_VAR_2LIST? - std::string alg = paramList.get("rap: algorithm", "galerkin"); - if (alg == "shift" || alg == "non-galerkin") { - RAPs = rcp(new RAPShiftFactory()); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift", double, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift diagonal M", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift low storage", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift array", Teuchos::Array, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: cfl array", Teuchos::Array, RAPparams); + using strings = std::unordered_set; + RCP coarseSmoother; + // TODO: this is not a proper place to check. If we consider direct solver to be a special + // case of smoother, we would like to unify Amesos and Ifpack2 smoothers in src/Smoothers, and + // have a single factory responsible for those. Then, this check would belong there. + if (strings({"RELAXATION", "CHEBYSHEV", "ILUT", "ILU", "RILUK", "SCHWARZ", "Amesos", + "BLOCK RELAXATION", "BLOCK_RELAXATION", "BLOCKRELAXATION", + "SPARSE BLOCK RELAXATION", "SPARSE_BLOCK_RELAXATION", "SPARSEBLOCKRELAXATION", + "LINESMOOTHING_BANDEDRELAXATION", "LINESMOOTHING_BANDED_RELAXATION", "LINESMOOTHING_BANDED RELAXATION", + "LINESMOOTHING_TRIDIRELAXATION", "LINESMOOTHING_TRIDI_RELAXATION", "LINESMOOTHING_TRIDI RELAXATION", + "LINESMOOTHING_TRIDIAGONALRELAXATION", "LINESMOOTHING_TRIDIAGONAL_RELAXATION", "LINESMOOTHING_TRIDIAGONAL RELAXATION", + "TOPOLOGICAL", "FAST_ILU", "FAST_IC", "FAST_ILDL", "HIPTMAIR"}) + .count(coarseType)) { + coarseSmoother = rcp(new TrilinosSmoother(coarseType, coarseParams, overlap)); } else { - RAP = rcp(new RAPFactory()); +#ifdef HAVE_MUELU_MATLAB + if (coarseType == "matlab") + coarseSmoother = rcp(new MatlabSmoother(coarseParams)); + else +#endif + coarseSmoother = rcp(new DirectSolver(coarseType, coarseParams)); } - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: relative diagonal floor", Teuchos::Array, RAPparams); - - if (paramList.isSublist("matrixmatrix: kernel params")) - RAPparams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); - if (defaultList.isSublist("matrixmatrix: kernel params")) - RAPparams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "transpose: use implicit", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals threshold", double, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals replacement", Scalar, RAPparams); - - // if "rap: triple product" has not been set and algorithm is "unsmoothed" switch triple product on - if (!paramList.isParameter("rap: triple product") && - paramList.isType("multigrid algorithm") && - paramList.get("multigrid algorithm") == "unsmoothed") - paramList.set("rap: triple product", true); - else - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: triple product", bool, RAPparams); + manager.SetFactory("CoarseSolver", rcp(new SmootherFactory(coarseSmoother))); + } +} + +// ===================================================================================================== +// ========================================= TentativeP================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Reitzinger(ParameterList& paramList, const ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const { + ParameterList rParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: enable", bool, rParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: constant column sums", bool, rParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, rParams); + + RCP rFactory = rcp(new ReitzingerPFactory()); + rFactory->SetParameterList(rParams); + + // These are all going to be user provided, so NoFactory + rFactory->SetFactory("Pnodal", NoFactory::getRCP()); + rFactory->SetFactory("NodeAggMatrix", NoFactory::getRCP()); + //rFactory->SetFactory("NodeMatrix", NoFactory::getRCP()); + + if (levelID > 1) + rFactory->SetFactory("D0", this->GetFactoryManager(levelID - 1)->GetFactory("D0")); + else + rFactory->SetFactory("D0", NoFactory::getRCP()); + + manager.SetFactory("Ptent", rFactory); + manager.SetFactory("D0", rFactory); + manager.SetFactory("InPlaceMap", rFactory); +} + +// ===================================================================================================== +// ========================================= TentativeP================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Aggregation_TentativeP(ParameterList& paramList, const ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const { + using strings = std::unordered_set; + + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "aggregation: type", std::string, aggType); + TEUCHOS_TEST_FOR_EXCEPTION(!strings({"uncoupled", "coupled", "brick", "matlab", "notay", "classical"}).count(aggType), + Exceptions::RuntimeError, "Unknown aggregation algorithm: \"" << aggType << "\". Please consult User's Guide."); + + // Only doing this for classical because otherwise, the gold tests get broken badly + RCP amalgFact; + if (aggType == "classical") { + amalgFact = rcp(new AmalgamationFactory()); + manager.SetFactory("UnAmalgamationInfo", amalgFact); + } - try { - if (paramList.isParameter("aggregation: allow empty prolongator columns")) { - RAPparams.set("CheckMainDiagonal", paramList.get("aggregation: allow empty prolongator columns")); - RAPparams.set("RepairMainDiagonal", paramList.get("aggregation: allow empty prolongator columns")); - } - else if (defaultList.isParameter("aggregation: allow empty prolongator columns")) { - RAPparams.set("CheckMainDiagonal", defaultList.get("aggregation: allow empty prolongator columns")); - RAPparams.set("RepairMainDiagonal", defaultList.get("aggregation: allow empty prolongator columns")); - } + // Aggregation graph + RCP dropFactory; - } catch (Teuchos::Exceptions::InvalidParameterType&) { - TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG(true, Teuchos::Exceptions::InvalidParameterType, - "Error: parameter \"aggregation: allow empty prolongator columns\" must be of type " << Teuchos::TypeNameTraits::name()); + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "matlab")) { +#ifdef HAVE_MUELU_MATLAB + dropFactory = rcp(new SingleLevelMatlabFactory()); + ParameterList socParams = paramList.sublist("strength-of-connection: params"); + dropFactory->SetParameterList(socParams); +#else + throw std::runtime_error("Cannot use MATLAB evolutionary strength-of-connection - MueLu was not configured with MATLAB support."); +#endif + } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "unsupported vector smoothing")) { + dropFactory = rcp(new MueLu::SmooVecCoalesceDropFactory()); + ParameterList dropParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: number of random vectors", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: number of times to pre or post smooth", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: penalty parameters", Teuchos::Array, dropParams); + dropFactory->SetParameterList(dropParams); + } else { + MUELU_KOKKOS_FACTORY_NO_DECL(dropFactory, CoalesceDropFactory, CoalesceDropFactory_kokkos); + ParameterList dropParams; + if (!rcp_dynamic_cast(dropFactory).is_null()) + dropParams.set("lightweight wrap", true); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: row sum drop tol", double, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: use ml scaling of drop tol", bool, dropParams); + + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: greedy Dirichlet", bool, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: distance laplacian algo", std::string, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: classical algo", std::string, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: distance laplacian directional weights", Teuchos::Array, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring: localize color graph", bool, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: dropping may create Dirichlet", bool, dropParams); + if (useKokkos_) { + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, dropParams); } - if (!RAP.is_null()) { - RAP->SetParameterList(RAPparams); - RAP->SetFactory("P", manager.GetFactory("P")); - } else { - RAPs->SetParameterList(RAPparams); - RAPs->SetFactory("P", manager.GetFactory("P")); - } + if (!amalgFact.is_null()) + dropFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - if (!this->implicitTranspose_) { - if (!RAP.is_null()) - RAP->SetFactory("R", manager.GetFactory("R")); - else - RAPs->SetFactory("R", manager.GetFactory("R")); + if (dropParams.isParameter("aggregation: drop scheme")) { + std::string drop_scheme = dropParams.get("aggregation: drop scheme"); + if (drop_scheme == "block diagonal colored signed classical") + manager.SetFactory("Coloring Graph", dropFactory); + if (drop_scheme.find("block diagonal") != std::string::npos || drop_scheme == "signed classical") { + if (levelID > 0) + dropFactory->SetFactory("BlockNumber", this->GetFactoryManager(levelID - 1)->GetFactory("BlockNumber")); + else + dropFactory->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); + } } - if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: export visualization data", bool, true)) { - RCP aggExport = rcp(new AggregationExportFactory()); - ParameterList aggExportParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output filename", std::string, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: agg style", std::string, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: iter", int, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: time step", int, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: fine graph edges", bool, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: coarse graph edges", bool, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: build colormap", bool, aggExportParams); - aggExport->SetParameterList(aggExportParams); - aggExport->SetFactory("DofsPerNode", manager.GetFactory("DofsPerNode")); + dropFactory->SetParameterList(dropParams); + } + manager.SetFactory("Graph", dropFactory); - if (!RAP.is_null()) - RAP->AddTransferFactory(aggExport); +// Aggregation scheme +#ifndef HAVE_MUELU_MATLAB + if (aggType == "matlab") + throw std::runtime_error("Cannot use MATLAB aggregation - MueLu was not configured with MATLAB support."); +#endif + RCP aggFactory; + if (aggType == "uncoupled") { + MUELU_KOKKOS_FACTORY_NO_DECL(aggFactory, UncoupledAggregationFactory, UncoupledAggregationFactory_kokkos); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: mode", std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: min agg size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: max agg size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: max selected neighbors", int, aggParams); + if (useKokkos_) { + //if not using kokkos refactor Uncoupled, there is no algorithm option (always Serial) + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase 1 algorithm", std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: deterministic", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring algorithm", std::string, aggParams); + } + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 1", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 2a", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 2b", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 3", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase1", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase2a", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase2b", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase2a agg factor", double, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: preserve Dirichlet points", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: error on nodes with no on-rank neighbors", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase3 avoid singletons", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, aggParams); + aggFactory->SetParameterList(aggParams); + // make sure that the aggregation factory has all necessary data + aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); + aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); + // aggFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); + + } else if (aggType == "brick") { + aggFactory = rcp(new BrickAggregationFactory()); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick x size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick y size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick z size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick x Dirichlet", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick y Dirichlet", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick z Dirichlet", bool, aggParams); + aggFactory->SetParameterList(aggParams); + + // Unlike other factories, BrickAggregationFactory makes the Graph/DofsPerNode itself + manager.SetFactory("Graph", aggFactory); + manager.SetFactory("DofsPerNode", aggFactory); + manager.SetFactory("Filtering", aggFactory); + if (levelID > 1) { + // We check for levelID > 0, as in the interpreter aggFactory for + // levelID really corresponds to level 0. Managers are clunky, as they + // contain factories for two different levels + aggFactory->SetFactory("Coordinates", this->GetFactoryManager(levelID - 1)->GetFactory("Coordinates")); + } + } else if (aggType == "classical") { + // Map and coloring + RCP mapFact = rcp(new ClassicalMapFactory()); + ParameterList mapParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: deterministic", bool, mapParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring algorithm", std::string, mapParams); + + ParameterList tempParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, tempParams); + std::string drop_algo = tempParams.get("aggregation: drop scheme"); + if (drop_algo == "block diagonal colored signed classical") { + mapParams.set("aggregation: coloring: use color graph", true); + mapFact->SetFactory("Coloring Graph", manager.GetFactory("Coloring Graph")); + } + mapFact->SetParameterList(mapParams); + mapFact->SetFactory("Graph", manager.GetFactory("Graph")); + mapFact->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); + + manager.SetFactory("FC Splitting", mapFact); + manager.SetFactory("CoarseMap", mapFact); + + aggFactory = rcp(new ClassicalPFactory()); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: classical scheme", std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, aggParams); + aggFactory->SetParameterList(aggParams); + aggFactory->SetFactory("FC Splitting", manager.GetFactory("FC Splitting")); + aggFactory->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); + aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); + + if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") { + if (levelID > 0) + aggFactory->SetFactory("BlockNumber", this->GetFactoryManager(levelID - 1)->GetFactory("BlockNumber")); else - RAPs->AddTransferFactory(aggExport); + aggFactory->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); } - if (!RAP.is_null()) - manager.SetFactory("A", RAP); - else - manager.SetFactory("A", RAPs); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, useFiltering); - bool filteringChangesMatrix = useFiltering && !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, 0); - if (reuseType == "RP" || (reuseType == "tP" && !filteringChangesMatrix)) { - if (!RAP.is_null()) { - keeps.push_back(keep_pair("AP reuse data", RAP.get())); - keeps.push_back(keep_pair("RAP reuse data", RAP.get())); + // Now we short-circuit, because we neither need nor want TentativePFactory here + manager.SetFactory("Ptent", aggFactory); + manager.SetFactory("P Graph", aggFactory); - } else { - keeps.push_back(keep_pair("AP reuse data", RAPs.get())); - keeps.push_back(keep_pair("RAP reuse data", RAPs.get())); - } + if (reuseType == "tP" && levelID) { + // keeps.push_back(keep_pair("Nullspace", Ptent.get())); + keeps.push_back(keep_pair("Ptent", aggFactory.get())); } + return; + } else if (aggType == "notay") { + aggFactory = rcp(new NotayAggregationFactory()); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: tie threshold", double, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, aggParams); + aggFactory->SetParameterList(aggParams); + aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); + aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); } +#ifdef HAVE_MUELU_MATLAB + else if (aggType == "matlab") { + ParameterList aggParams = paramList.sublist("aggregation: params"); + aggFactory = rcp(new SingleLevelMatlabFactory()); + aggFactory->SetParameterList(aggParams); + } +#endif - // ===================================================================================================== - // ======================================= Coordinates ================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Coordinates(ParameterList& paramList, const ParameterList& /* defaultList */, - FactoryManager& manager, int /* levelID */, std::vector& /* keeps */) const - { - bool have_userCO = false; - if (paramList.isParameter("Coordinates") && !paramList.get >("Coordinates").is_null()) - have_userCO = true; + manager.SetFactory("Aggregates", aggFactory); + + // Coarse map + RCP coarseMap = rcp(new CoarseMapFactory()); + coarseMap->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + manager.SetFactory("CoarseMap", coarseMap); + + // Aggregate qualities + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, true)) { + RCP aggQualityFact = rcp(new AggregateQualityEstimateFactory()); + ParameterList aggQualityParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: good aggregate threshold", double, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file output", bool, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file base", std::string, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: check symmetry", bool, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: algorithm", std::string, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: zero threshold", double, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: percentiles", Teuchos::Array, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: mode", std::string, aggQualityParams); + aggQualityFact->SetParameterList(aggQualityParams); + manager.SetFactory("AggregateQualities", aggQualityFact); + + assert(aggType == "uncoupled"); + aggFactory->SetFactory("AggregateQualities", aggQualityFact); + } - if (useCoordinates_) { - if (have_userCO) { - manager.SetFactory("Coordinates", NoFactory::getRCP()); + // Tentative P + MUELU_KOKKOS_FACTORY(Ptent, TentativePFactory, TentativePFactory_kokkos); + ParameterList ptentParams; + if (paramList.isSublist("matrixmatrix: kernel params")) + ptentParams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); + if (defaultList.isSublist("matrixmatrix: kernel params")) + ptentParams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, ptentParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: build coarse coordinates", bool, ptentParams); + Ptent->SetParameterList(ptentParams); + Ptent->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + Ptent->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + manager.SetFactory("Ptent", Ptent); + + if (reuseType == "tP" && levelID) { + keeps.push_back(keep_pair("Nullspace", Ptent.get())); + keeps.push_back(keep_pair("P", Ptent.get())); + } +} + +// ===================================================================================================== +// ============================================ RAP ==================================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_RAP(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const { + if (paramList.isParameter("A") && !paramList.get >("A").is_null()) { + // We have user matrix A + manager.SetFactory("A", NoFactory::getRCP()); + return; + } - } else { - RCP coords = rcp(new CoordinatesTransferFactory()); - coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - manager.SetFactory("Coordinates", coords); - - auto RAP = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - if (!RAP.is_null()) { - RAP->AddTransferFactory(manager.GetFactory("Coordinates")); - } else { - auto RAPs = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - RAPs->AddTransferFactory(manager.GetFactory("Coordinates")); - } - } + ParameterList RAPparams; + + RCP RAP; + RCP RAPs; + // Allow for Galerkin or shifted RAP + // FIXME: Should this not be some form of MUELU_SET_VAR_2LIST? + std::string alg = paramList.get("rap: algorithm", "galerkin"); + if (alg == "shift" || alg == "non-galerkin") { + RAPs = rcp(new RAPShiftFactory()); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift", double, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift diagonal M", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift low storage", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift array", Teuchos::Array, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: cfl array", Teuchos::Array, RAPparams); + + } else { + RAP = rcp(new RAPFactory()); + } + + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: relative diagonal floor", Teuchos::Array, RAPparams); + + if (paramList.isSublist("matrixmatrix: kernel params")) + RAPparams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); + if (defaultList.isSublist("matrixmatrix: kernel params")) + RAPparams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "transpose: use implicit", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals threshold", double, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals replacement", Scalar, RAPparams); + + // if "rap: triple product" has not been set and algorithm is "unsmoothed" switch triple product on + if (!paramList.isParameter("rap: triple product") && + paramList.isType("multigrid algorithm") && + paramList.get("multigrid algorithm") == "unsmoothed") + paramList.set("rap: triple product", true); + else + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: triple product", bool, RAPparams); + + try { + if (paramList.isParameter("aggregation: allow empty prolongator columns")) { + RAPparams.set("CheckMainDiagonal", paramList.get("aggregation: allow empty prolongator columns")); + RAPparams.set("RepairMainDiagonal", paramList.get("aggregation: allow empty prolongator columns")); + } else if (defaultList.isParameter("aggregation: allow empty prolongator columns")) { + RAPparams.set("CheckMainDiagonal", defaultList.get("aggregation: allow empty prolongator columns")); + RAPparams.set("RepairMainDiagonal", defaultList.get("aggregation: allow empty prolongator columns")); } + + } catch (Teuchos::Exceptions::InvalidParameterType&) { + TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG(true, Teuchos::Exceptions::InvalidParameterType, + "Error: parameter \"aggregation: allow empty prolongator columns\" must be of type " << Teuchos::TypeNameTraits::name()); } - // ===================================================================================================== - // ================================= LocalOrdinalTransfer ============================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_LocalOrdinalTransfer(const std::string & VarName, const std::string &multigridAlgo,ParameterList& paramList, const ParameterList& /* defaultList */, - FactoryManager& manager, int levelID, std::vector& /* keeps */) const - { - // NOTE: You would think this would be levelID > 0, but you'd be wrong, since the FactoryManager is basically - // offset by a level from the things which actually do the work. - if (useBlockNumber_ && (levelID > 0)) { - auto RAP = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - auto RAPs = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - if (!RAP.is_null() || !RAPs.is_null()) { - RCP fact = rcp(new LocalOrdinalTransferFactory(VarName,multigridAlgo)); - if(multigridAlgo == "classical") - fact->SetFactory("P Graph", manager.GetFactory("P Graph")); - else - fact->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - fact->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + if (!RAP.is_null()) { + RAP->SetParameterList(RAPparams); + RAP->SetFactory("P", manager.GetFactory("P")); + } else { + RAPs->SetParameterList(RAPparams); + RAPs->SetFactory("P", manager.GetFactory("P")); + } - fact->SetFactory(VarName, this->GetFactoryManager(levelID-1)->GetFactory(VarName)); + if (!this->implicitTranspose_) { + if (!RAP.is_null()) + RAP->SetFactory("R", manager.GetFactory("R")); + else + RAPs->SetFactory("R", manager.GetFactory("R")); + } - manager.SetFactory(VarName, fact); + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: export visualization data", bool, true)) { + RCP aggExport = rcp(new AggregationExportFactory()); + ParameterList aggExportParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output filename", std::string, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: agg style", std::string, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: iter", int, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: time step", int, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: fine graph edges", bool, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: coarse graph edges", bool, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: build colormap", bool, aggExportParams); + aggExport->SetParameterList(aggExportParams); + aggExport->SetFactory("DofsPerNode", manager.GetFactory("DofsPerNode")); - if (!RAP.is_null()) - RAP->AddTransferFactory(manager.GetFactory(VarName)); - else - RAPs->AddTransferFactory(manager.GetFactory(VarName)); - } - } + if (!RAP.is_null()) + RAP->AddTransferFactory(aggExport); + else + RAPs->AddTransferFactory(aggExport); } + if (!RAP.is_null()) + manager.SetFactory("A", RAP); + else + manager.SetFactory("A", RAPs); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, useFiltering); + bool filteringChangesMatrix = useFiltering && !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, 0); - // ====================================================================================================== - // ====================================== BlockNumber ================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_BlockNumber(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID , std::vector& keeps) const - { - if(useBlockNumber_) { - ParameterList myParams; - RCP fact = rcp(new InitialBlockNumberFactory()); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, myParams); - fact->SetParameterList(myParams); - manager.SetFactory("BlockNumber",fact); - } + if (reuseType == "RP" || (reuseType == "tP" && !filteringChangesMatrix)) { + if (!RAP.is_null()) { + keeps.push_back(keep_pair("AP reuse data", RAP.get())); + keeps.push_back(keep_pair("RAP reuse data", RAP.get())); + } else { + keeps.push_back(keep_pair("AP reuse data", RAPs.get())); + keeps.push_back(keep_pair("RAP reuse data", RAPs.get())); + } } +} + +// ===================================================================================================== +// ======================================= Coordinates ================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Coordinates(ParameterList& paramList, const ParameterList& /* defaultList */, + FactoryManager& manager, int /* levelID */, std::vector& /* keeps */) const { + bool have_userCO = false; + if (paramList.isParameter("Coordinates") && !paramList.get >("Coordinates").is_null()) + have_userCO = true; + + if (useCoordinates_) { + if (have_userCO) { + manager.SetFactory("Coordinates", NoFactory::getRCP()); + } else { + RCP coords = rcp(new CoordinatesTransferFactory()); + coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + manager.SetFactory("Coordinates", coords); - // ===================================================================================================== - // =========================================== Restriction ============================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Restriction(ParameterList& paramList, const ParameterList& defaultList , FactoryManager& manager, - int levelID, std::vector& /* keeps */) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); - bool have_userR = false; - if (paramList.isParameter("R") && !paramList.get >("R").is_null()) - have_userR = true; - - // === Restriction === - RCP R; - if (!this->implicitTranspose_) { - MUELU_SET_VAR_2LIST(paramList, defaultList, "problem: symmetric", bool, isSymmetric); - - if (isSymmetric == false && (multigridAlgo == "unsmoothed" || multigridAlgo == "emin")) { - this->GetOStream(Warnings0) << - "Switching \"problem: symmetric\" parameter to symmetric as multigrid algorithm. " << - multigridAlgo << " is primarily supposed to be used for symmetric problems.\n\n" << - "Please note: if you are using \"unsmoothed\" transfer operators the \"problem: symmetric\" parameter " << - "has no real mathematical meaning, i.e. you can use it for non-symmetric\n" << - "problems, too. With \"problem: symmetric\"=\"symmetric\" you can use implicit transpose for building " << - "the restriction operators which may drastically reduce the amount of consumed memory." << std::endl; - isSymmetric = true; + auto RAP = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); + if (!RAP.is_null()) { + RAP->AddTransferFactory(manager.GetFactory("Coordinates")); + } else { + auto RAPs = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); + RAPs->AddTransferFactory(manager.GetFactory("Coordinates")); } - TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "pg" && isSymmetric == true, Exceptions::RuntimeError, - "Petrov-Galerkin smoothed transfer operators are only allowed for non-symmetric problems: Set \"problem: symmetric\" to false!\n" \ - "While PG smoothed transfer operators generally would also work for symmetric problems this is an unusual use case. " \ - "You can use the factory-based xml interface though if you need PG-AMG for symmetric problems."); + } + } +} + +// ===================================================================================================== +// ================================= LocalOrdinalTransfer ============================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_LocalOrdinalTransfer(const std::string& VarName, const std::string& multigridAlgo, ParameterList& paramList, const ParameterList& /* defaultList */, + FactoryManager& manager, int levelID, std::vector& /* keeps */) const { + // NOTE: You would think this would be levelID > 0, but you'd be wrong, since the FactoryManager is basically + // offset by a level from the things which actually do the work. + if (useBlockNumber_ && (levelID > 0)) { + auto RAP = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); + auto RAPs = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); + if (!RAP.is_null() || !RAPs.is_null()) { + RCP fact = rcp(new LocalOrdinalTransferFactory(VarName, multigridAlgo)); + if (multigridAlgo == "classical") + fact->SetFactory("P Graph", manager.GetFactory("P Graph")); + else + fact->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + fact->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - if (have_userR) { - manager.SetFactory("R", NoFactory::getRCP()); - } else { - if (isSymmetric) R = rcp(new TransPFactory()); - else R = rcp(new GenericRFactory()); + fact->SetFactory(VarName, this->GetFactoryManager(levelID - 1)->GetFactory(VarName)); - R->SetFactory("P", manager.GetFactory("P")); - manager.SetFactory("R", R); - } + manager.SetFactory(VarName, fact); - } else { - manager.SetFactory("R", Teuchos::null); + if (!RAP.is_null()) + RAP->AddTransferFactory(manager.GetFactory(VarName)); + else + RAPs->AddTransferFactory(manager.GetFactory(VarName)); } - - // === Restriction: Nullspace Scaling === - if (paramList.isParameter("restriction: scale nullspace") && paramList.get("restriction: scale nullspace")) { - RCP tentPFactory = rcp(new TentativePFactory()); - Teuchos::ParameterList tentPlist; - tentPlist.set("Nullspace name","Scaled Nullspace"); - tentPFactory->SetParameterList(tentPlist); - tentPFactory->SetFactory("Aggregates",manager.GetFactory("Aggregates")); - tentPFactory->SetFactory("CoarseMap",manager.GetFactory("CoarseMap")); - - if(R.is_null()) R = rcp(new TransPFactory()); - R->SetFactory("P",tentPFactory); + } +} + +// ====================================================================================================== +// ====================================== BlockNumber ================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_BlockNumber(ParameterList& paramList, const ParameterList& defaultList, + FactoryManager& manager, int levelID, std::vector& keeps) const { + if (useBlockNumber_) { + ParameterList myParams; + RCP fact = rcp(new InitialBlockNumberFactory()); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, myParams); + fact->SetParameterList(myParams); + manager.SetFactory("BlockNumber", fact); + } +} + +// ===================================================================================================== +// =========================================== Restriction ============================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Restriction(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& /* keeps */) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); + bool have_userR = false; + if (paramList.isParameter("R") && !paramList.get >("R").is_null()) + have_userR = true; + + // === Restriction === + RCP R; + if (!this->implicitTranspose_) { + MUELU_SET_VAR_2LIST(paramList, defaultList, "problem: symmetric", bool, isSymmetric); + + if (isSymmetric == false && (multigridAlgo == "unsmoothed" || multigridAlgo == "emin")) { + this->GetOStream(Warnings0) << "Switching \"problem: symmetric\" parameter to symmetric as multigrid algorithm. " << multigridAlgo << " is primarily supposed to be used for symmetric problems.\n\n" + << "Please note: if you are using \"unsmoothed\" transfer operators the \"problem: symmetric\" parameter " + << "has no real mathematical meaning, i.e. you can use it for non-symmetric\n" + << "problems, too. With \"problem: symmetric\"=\"symmetric\" you can use implicit transpose for building " + << "the restriction operators which may drastically reduce the amount of consumed memory." << std::endl; + isSymmetric = true; } + TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "pg" && isSymmetric == true, Exceptions::RuntimeError, + "Petrov-Galerkin smoothed transfer operators are only allowed for non-symmetric problems: Set \"problem: symmetric\" to false!\n" + "While PG smoothed transfer operators generally would also work for symmetric problems this is an unusual use case. " + "You can use the factory-based xml interface though if you need PG-AMG for symmetric problems."); + if (have_userR) { + manager.SetFactory("R", NoFactory::getRCP()); + } else { + if (isSymmetric) + R = rcp(new TransPFactory()); + else + R = rcp(new GenericRFactory()); - } - - // ===================================================================================================== - // ========================================= Repartition =============================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Repartition(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps, RCP & nullSpaceFactory) const - { - // === Repartitioning === - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: enable", bool, enableRepart); - if (enableRepart) { -#if defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || defined(HAVE_MUELU_ZOLTAN2)) // skip to the end, print warning, and turn off repartitioning if we don't have MPI and Zoltan/Zoltan2 - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: use subcommunicators in place", bool, enableInPlace); - // Short summary of the issue: RebalanceTransferFactory shares ownership - // of "P" with SaPFactory, and therefore, changes the stored version. - // That means that if SaPFactory generated P, and stored it on the level, - // then after rebalancing the value in that storage changed. It goes - // against the concept of factories (I think), that every factory is - // responsible for its own objects, and they are immutable outside. - // - // In reuse, this is what happens: as we reuse Importer across setups, - // the order of factories changes, and coupled with shared ownership - // leads to problems. - // *First setup* - // SaP builds P [and stores it] - // TransP builds R [and stores it] - // RAP builds A [and stores it] - // RebalanceTransfer rebalances P [and changes the P stored by SaP] (*) - // RebalanceTransfer rebalances R - // RebalanceAc rebalances A - // *Second setup* ("RP" reuse) - // RebalanceTransfer rebalances P [which is incorrect due to (*)] - // RebalanceTransfer rebalances R - // RAP builds A [which is incorrect due to (*)] - // RebalanceAc rebalances A [which throws due to map inconsistency] - // ... - // *Second setup* ("tP" reuse) - // SaP builds P [and stores it] - // RebalanceTransfer rebalances P [and changes the P stored by SaP] (**) - // TransP builds R [which is incorrect due to (**)] - // RebalanceTransfer rebalances R - // ... - // - // Couple solutions to this: - // 1. [implemented] Requre "tP" and "PR" reuse to only be used with - // implicit rebalancing. - // 2. Do deep copy of P, and changed domain map and importer there. - // Need to investigate how expensive this is. - TEUCHOS_TEST_FOR_EXCEPTION(this->doPRrebalance_ && (reuseType == "tP" || reuseType == "RP"), Exceptions::InvalidArgument, - "Reuse types \"tP\" and \"PR\" require \"repartition: rebalance P and R\" set to \"false\""); - - // TEUCHOS_TEST_FOR_EXCEPTION(aggType == "brick", Exceptions::InvalidArgument, - // "Aggregation type \"brick\" requires \"repartition: enable\" set to \"false\""); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: partitioner", std::string, partName); - TEUCHOS_TEST_FOR_EXCEPTION(partName != "zoltan" && partName != "zoltan2", Exceptions::InvalidArgument, - "Invalid partitioner name: \"" << partName << "\". Valid options: \"zoltan\", \"zoltan2\""); - -# ifndef HAVE_MUELU_ZOLTAN - bool switched = false; - if (partName == "zoltan") { - this->GetOStream(Warnings0) << "Zoltan interface is not available, trying to switch to Zoltan2" << std::endl; - partName = "zoltan2"; - switched = true; - } -# else -# ifndef HAVE_MUELU_ZOLTAN2 - bool switched = false; -# endif // HAVE_MUELU_ZOLTAN2 -# endif // HAVE_MUELU_ZOLTAN - -# ifndef HAVE_MUELU_ZOLTAN2 - if (partName == "zoltan2" && !switched) { - this->GetOStream(Warnings0) << "Zoltan2 interface is not available, trying to switch to Zoltan" << std::endl; - partName = "zoltan"; - } -# endif // HAVE_MUELU_ZOLTAN2 - - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: node repartition level",int,nodeRepartitionLevel); - - // RepartitionHeuristic - auto repartheurFactory = rcp(new RepartitionHeuristicFactory()); - ParameterList repartheurParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: node repartition level", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: start level", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: min rows per proc", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: target rows per proc", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: min rows per thread", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: target rows per thread", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: max imbalance", double, repartheurParams); - repartheurFactory->SetParameterList(repartheurParams); - repartheurFactory->SetFactory("A", manager.GetFactory("A")); - manager.SetFactory("number of partitions", repartheurFactory); - manager.SetFactory("repartition: heuristic target rows per process", repartheurFactory); - - // Partitioner - RCP partitioner; - if (levelID == nodeRepartitionLevel) { - // partitioner = rcp(new NodePartitionInterface()); - partitioner = rcp(new MueLu::NodePartitionInterface()); - ParameterList partParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: node id" ,int,repartheurParams); - partitioner->SetParameterList(partParams); - partitioner->SetFactory("Node Comm", manager.GetFactory("Node Comm")); - } - else if (partName == "zoltan") { -# ifdef HAVE_MUELU_ZOLTAN - partitioner = rcp(new ZoltanInterface()); - // NOTE: ZoltanInterface ("zoltan") does not support external parameters through ParameterList -# else - throw Exceptions::RuntimeError("Zoltan interface is not available"); -# endif // HAVE_MUELU_ZOLTAN - } else if (partName == "zoltan2") { -# ifdef HAVE_MUELU_ZOLTAN2 - partitioner = rcp(new Zoltan2Interface()); - ParameterList partParams; - RCP partpartParams = rcp(new ParameterList(paramList.sublist("repartition: params", false))); - partParams.set("ParameterList", partpartParams); - partitioner->SetParameterList(partParams); - partitioner->SetFactory("repartition: heuristic target rows per process", - manager.GetFactory("repartition: heuristic target rows per process")); -# else - throw Exceptions::RuntimeError("Zoltan2 interface is not available"); -# endif // HAVE_MUELU_ZOLTAN2 - } + R->SetFactory("P", manager.GetFactory("P")); + manager.SetFactory("R", R); + } - partitioner->SetFactory("A", manager.GetFactory("A")); - partitioner->SetFactory("number of partitions", manager.GetFactory("number of partitions")); - if (useCoordinates_) - partitioner->SetFactory("Coordinates", manager.GetFactory("Coordinates")); - manager.SetFactory("Partition", partitioner); - - // Repartitioner - auto repartFactory = rcp(new RepartitionFactory()); - ParameterList repartParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: print partition distribution", bool, repartParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap parts", bool, repartParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap num values", int, repartParams); - repartFactory->SetParameterList(repartParams); - repartFactory->SetFactory("A", manager.GetFactory("A")); - repartFactory->SetFactory("number of partitions", manager.GetFactory("number of partitions")); - repartFactory->SetFactory("Partition", manager.GetFactory("Partition")); - manager.SetFactory("Importer", repartFactory); - if (reuseType != "none" && reuseType != "S" && levelID) - keeps.push_back(keep_pair("Importer", manager.GetFactory("Importer").get())); - - - if(enableInPlace) { - // Rebalanced A (in place) - // NOTE: This is for when we want to constrain repartitioning to match some other idea of what's going on. - // The major application is the (1,1) hierarchy in the Maxwell1 preconditioner. - auto newA = rcp(new RebalanceAcFactory()); - ParameterList rebAcParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rebAcParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators in place", bool, rebAcParams); - newA->SetParameterList(rebAcParams); - newA->SetFactory("A", manager.GetFactory("A")); - newA->SetFactory("InPlaceMap", manager.GetFactory("InPlaceMap")); - manager.SetFactory("A",newA); - } - else { - // Rebalanced A - auto newA = rcp(new RebalanceAcFactory()); - ParameterList rebAcParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rebAcParams); - newA->SetParameterList(rebAcParams); - newA->SetFactory("A", manager.GetFactory("A")); - newA->SetFactory("Importer", manager.GetFactory("Importer")); - manager.SetFactory("A", newA); - - // Rebalanced P - auto newP = rcp(new RebalanceTransferFactory()); - ParameterList newPparams; - newPparams.set("type", "Interpolation"); - if (changedPRrebalance_) - newPparams.set("repartition: rebalance P and R", this->doPRrebalance_); - if (changedPRViaCopyrebalance_) - newPparams.set("repartition: explicit via new copy rebalance P and R",true); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, newPparams); - newP-> SetParameterList(newPparams); - newP-> SetFactory("Importer", manager.GetFactory("Importer")); - newP-> SetFactory("P", manager.GetFactory("P")); - if (!paramList.isParameter("semicoarsen: number of levels")) - newP->SetFactory("Nullspace", manager.GetFactory("Ptent")); - else - newP->SetFactory("Nullspace", manager.GetFactory("P")); // TogglePFactory - if (useCoordinates_) - newP-> SetFactory("Coordinates", manager.GetFactory("Coordinates")); - manager.SetFactory("P", newP); - if (useCoordinates_) - manager.SetFactory("Coordinates", newP); - if (useBlockNumber_ && (levelID > 0)) { - newP->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); - manager.SetFactory("BlockNumber", newP); - } - - // Rebalanced R - auto newR = rcp(new RebalanceTransferFactory()); - ParameterList newRparams; - newRparams.set("type", "Restriction"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, newRparams); - if (changedPRrebalance_) - newRparams.set("repartition: rebalance P and R", this->doPRrebalance_); - if (changedPRViaCopyrebalance_) - newPparams.set("repartition: explicit via new copy rebalance P and R",true); - if (changedImplicitTranspose_) - newRparams.set("transpose: use implicit", this->implicitTranspose_); - newR-> SetParameterList(newRparams); - newR-> SetFactory("Importer", manager.GetFactory("Importer")); - if (!this->implicitTranspose_) { - newR->SetFactory("R", manager.GetFactory("R")); - manager.SetFactory("R", newR); - } + } else { + manager.SetFactory("R", Teuchos::null); + } - // NOTE: the role of NullspaceFactory is to provide nullspace on the finest - // level if a user does not do that. For all other levels it simply passes - // nullspace from a real factory to whoever needs it. If we don't use - // repartitioning, that factory is "TentativePFactory"; if we do, it is - // "RebalanceTransferFactory". But we still have to have NullspaceFactory as - // the "Nullspace" of the manager - // NOTE: This really needs to be set on the *NullSpaceFactory*, not manager.get("Nullspace"). - ParameterList newNullparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "nullspace: calculate rotations", bool, newNullparams); - nullSpaceFactory->SetFactory("Nullspace", newP); - nullSpaceFactory->SetParameterList(newNullparams); - } + // === Restriction: Nullspace Scaling === + if (paramList.isParameter("restriction: scale nullspace") && paramList.get("restriction: scale nullspace")) { + RCP tentPFactory = rcp(new TentativePFactory()); + Teuchos::ParameterList tentPlist; + tentPlist.set("Nullspace name", "Scaled Nullspace"); + tentPFactory->SetParameterList(tentPlist); + tentPFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + tentPFactory->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + + if (R.is_null()) R = rcp(new TransPFactory()); + R->SetFactory("P", tentPFactory); + } +} + +// ===================================================================================================== +// ========================================= Repartition =============================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Repartition(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps, RCP& nullSpaceFactory) const { + // === Repartitioning === + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: enable", bool, enableRepart); + if (enableRepart) { +#if defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || defined(HAVE_MUELU_ZOLTAN2)) // skip to the end, print warning, and turn off repartitioning if we don't have MPI and Zoltan/Zoltan2 + MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: use subcommunicators in place", bool, enableInPlace); + // Short summary of the issue: RebalanceTransferFactory shares ownership + // of "P" with SaPFactory, and therefore, changes the stored version. + // That means that if SaPFactory generated P, and stored it on the level, + // then after rebalancing the value in that storage changed. It goes + // against the concept of factories (I think), that every factory is + // responsible for its own objects, and they are immutable outside. + // + // In reuse, this is what happens: as we reuse Importer across setups, + // the order of factories changes, and coupled with shared ownership + // leads to problems. + // *First setup* + // SaP builds P [and stores it] + // TransP builds R [and stores it] + // RAP builds A [and stores it] + // RebalanceTransfer rebalances P [and changes the P stored by SaP] (*) + // RebalanceTransfer rebalances R + // RebalanceAc rebalances A + // *Second setup* ("RP" reuse) + // RebalanceTransfer rebalances P [which is incorrect due to (*)] + // RebalanceTransfer rebalances R + // RAP builds A [which is incorrect due to (*)] + // RebalanceAc rebalances A [which throws due to map inconsistency] + // ... + // *Second setup* ("tP" reuse) + // SaP builds P [and stores it] + // RebalanceTransfer rebalances P [and changes the P stored by SaP] (**) + // TransP builds R [which is incorrect due to (**)] + // RebalanceTransfer rebalances R + // ... + // + // Couple solutions to this: + // 1. [implemented] Requre "tP" and "PR" reuse to only be used with + // implicit rebalancing. + // 2. Do deep copy of P, and changed domain map and importer there. + // Need to investigate how expensive this is. + TEUCHOS_TEST_FOR_EXCEPTION(this->doPRrebalance_ && (reuseType == "tP" || reuseType == "RP"), Exceptions::InvalidArgument, + "Reuse types \"tP\" and \"PR\" require \"repartition: rebalance P and R\" set to \"false\""); + + // TEUCHOS_TEST_FOR_EXCEPTION(aggType == "brick", Exceptions::InvalidArgument, + // "Aggregation type \"brick\" requires \"repartition: enable\" set to \"false\""); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: partitioner", std::string, partName); + TEUCHOS_TEST_FOR_EXCEPTION(partName != "zoltan" && partName != "zoltan2", Exceptions::InvalidArgument, + "Invalid partitioner name: \"" << partName << "\". Valid options: \"zoltan\", \"zoltan2\""); + +#ifndef HAVE_MUELU_ZOLTAN + bool switched = false; + if (partName == "zoltan") { + this->GetOStream(Warnings0) << "Zoltan interface is not available, trying to switch to Zoltan2" << std::endl; + partName = "zoltan2"; + switched = true; + } #else - paramList.set("repartition: enable",false); -# ifndef HAVE_MPI - this->GetOStream(Warnings0) << "No repartitioning available for a serial run\n"; -# else - this->GetOStream(Warnings0) << "Zoltan/Zoltan2 are unavailable for repartitioning\n"; -# endif // HAVE_MPI -#endif // defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || defined(HAVE_MUELU_ZOLTAN2)) +#ifndef HAVE_MUELU_ZOLTAN2 + bool switched = false; +#endif // HAVE_MUELU_ZOLTAN2 +#endif // HAVE_MUELU_ZOLTAN + +#ifndef HAVE_MUELU_ZOLTAN2 + if (partName == "zoltan2" && !switched) { + this->GetOStream(Warnings0) << "Zoltan2 interface is not available, trying to switch to Zoltan" << std::endl; + partName = "zoltan"; + } +#endif // HAVE_MUELU_ZOLTAN2 + + MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: node repartition level", int, nodeRepartitionLevel); + + // RepartitionHeuristic + auto repartheurFactory = rcp(new RepartitionHeuristicFactory()); + ParameterList repartheurParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: node repartition level", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: start level", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: min rows per proc", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: target rows per proc", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: min rows per thread", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: target rows per thread", int, repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: max imbalance", double, repartheurParams); + repartheurFactory->SetParameterList(repartheurParams); + repartheurFactory->SetFactory("A", manager.GetFactory("A")); + manager.SetFactory("number of partitions", repartheurFactory); + manager.SetFactory("repartition: heuristic target rows per process", repartheurFactory); + + // Partitioner + RCP partitioner; + if (levelID == nodeRepartitionLevel) { + // partitioner = rcp(new NodePartitionInterface()); + partitioner = rcp(new MueLu::NodePartitionInterface()); + ParameterList partParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: node id", int, repartheurParams); + partitioner->SetParameterList(partParams); + partitioner->SetFactory("Node Comm", manager.GetFactory("Node Comm")); + } else if (partName == "zoltan") { +#ifdef HAVE_MUELU_ZOLTAN + partitioner = rcp(new ZoltanInterface()); + // NOTE: ZoltanInterface ("zoltan") does not support external parameters through ParameterList +#else + throw Exceptions::RuntimeError("Zoltan interface is not available"); +#endif // HAVE_MUELU_ZOLTAN + } else if (partName == "zoltan2") { +#ifdef HAVE_MUELU_ZOLTAN2 + partitioner = rcp(new Zoltan2Interface()); + ParameterList partParams; + RCP partpartParams = rcp(new ParameterList(paramList.sublist("repartition: params", false))); + partParams.set("ParameterList", partpartParams); + partitioner->SetParameterList(partParams); + partitioner->SetFactory("repartition: heuristic target rows per process", + manager.GetFactory("repartition: heuristic target rows per process")); +#else + throw Exceptions::RuntimeError("Zoltan2 interface is not available"); +#endif // HAVE_MUELU_ZOLTAN2 } - } - - // ===================================================================================================== - // ========================================= Low precision transfers =================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_LowPrecision(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "transfers: half precision", bool, enableLowPrecision); - if (enableLowPrecision) { - // Low precision P - auto newP = rcp(new LowPrecisionFactory()); + partitioner->SetFactory("A", manager.GetFactory("A")); + partitioner->SetFactory("number of partitions", manager.GetFactory("number of partitions")); + if (useCoordinates_) + partitioner->SetFactory("Coordinates", manager.GetFactory("Coordinates")); + manager.SetFactory("Partition", partitioner); + + // Repartitioner + auto repartFactory = rcp(new RepartitionFactory()); + ParameterList repartParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: print partition distribution", bool, repartParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap parts", bool, repartParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap num values", int, repartParams); + repartFactory->SetParameterList(repartParams); + repartFactory->SetFactory("A", manager.GetFactory("A")); + repartFactory->SetFactory("number of partitions", manager.GetFactory("number of partitions")); + repartFactory->SetFactory("Partition", manager.GetFactory("Partition")); + manager.SetFactory("Importer", repartFactory); + if (reuseType != "none" && reuseType != "S" && levelID) + keeps.push_back(keep_pair("Importer", manager.GetFactory("Importer").get())); + + if (enableInPlace) { + // Rebalanced A (in place) + // NOTE: This is for when we want to constrain repartitioning to match some other idea of what's going on. + // The major application is the (1,1) hierarchy in the Maxwell1 preconditioner. + auto newA = rcp(new RebalanceAcFactory()); + ParameterList rebAcParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rebAcParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators in place", bool, rebAcParams); + newA->SetParameterList(rebAcParams); + newA->SetFactory("A", manager.GetFactory("A")); + newA->SetFactory("InPlaceMap", manager.GetFactory("InPlaceMap")); + manager.SetFactory("A", newA); + } else { + // Rebalanced A + auto newA = rcp(new RebalanceAcFactory()); + ParameterList rebAcParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rebAcParams); + newA->SetParameterList(rebAcParams); + newA->SetFactory("A", manager.GetFactory("A")); + newA->SetFactory("Importer", manager.GetFactory("Importer")); + manager.SetFactory("A", newA); + + // Rebalanced P + auto newP = rcp(new RebalanceTransferFactory()); ParameterList newPparams; - newPparams.set("matrix key", "P"); - newP-> SetParameterList(newPparams); - newP-> SetFactory("P", manager.GetFactory("P")); + newPparams.set("type", "Interpolation"); + if (changedPRrebalance_) + newPparams.set("repartition: rebalance P and R", this->doPRrebalance_); + if (changedPRViaCopyrebalance_) + newPparams.set("repartition: explicit via new copy rebalance P and R", true); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, newPparams); + newP->SetParameterList(newPparams); + newP->SetFactory("Importer", manager.GetFactory("Importer")); + newP->SetFactory("P", manager.GetFactory("P")); + if (!paramList.isParameter("semicoarsen: number of levels")) + newP->SetFactory("Nullspace", manager.GetFactory("Ptent")); + else + newP->SetFactory("Nullspace", manager.GetFactory("P")); // TogglePFactory + if (useCoordinates_) + newP->SetFactory("Coordinates", manager.GetFactory("Coordinates")); manager.SetFactory("P", newP); + if (useCoordinates_) + manager.SetFactory("Coordinates", newP); + if (useBlockNumber_ && (levelID > 0)) { + newP->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); + manager.SetFactory("BlockNumber", newP); + } + // Rebalanced R + auto newR = rcp(new RebalanceTransferFactory()); + ParameterList newRparams; + newRparams.set("type", "Restriction"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, newRparams); + if (changedPRrebalance_) + newRparams.set("repartition: rebalance P and R", this->doPRrebalance_); + if (changedPRViaCopyrebalance_) + newPparams.set("repartition: explicit via new copy rebalance P and R", true); + if (changedImplicitTranspose_) + newRparams.set("transpose: use implicit", this->implicitTranspose_); + newR->SetParameterList(newRparams); + newR->SetFactory("Importer", manager.GetFactory("Importer")); if (!this->implicitTranspose_) { - // Low precision R - auto newR = rcp(new LowPrecisionFactory()); - ParameterList newRparams; - newRparams.set("matrix key", "R"); - newR-> SetParameterList(newRparams); - newR-> SetFactory("R", manager.GetFactory("R")); + newR->SetFactory("R", manager.GetFactory("R")); manager.SetFactory("R", newR); } - } - } - - // ===================================================================================================== - // =========================================== Nullspace =============================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Nullspace(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */, RCP & nullSpaceFactory) const - { - // Nullspace - MUELU_KOKKOS_FACTORY(nullSpace, NullspaceFactory, NullspaceFactory_kokkos); - bool have_userNS = false; - if (paramList.isParameter("Nullspace") && !paramList.get >("Nullspace").is_null()) - have_userNS = true; - - if (!have_userNS) { + // NOTE: the role of NullspaceFactory is to provide nullspace on the finest + // level if a user does not do that. For all other levels it simply passes + // nullspace from a real factory to whoever needs it. If we don't use + // repartitioning, that factory is "TentativePFactory"; if we do, it is + // "RebalanceTransferFactory". But we still have to have NullspaceFactory as + // the "Nullspace" of the manager + // NOTE: This really needs to be set on the *NullSpaceFactory*, not manager.get("Nullspace"). ParameterList newNullparams; MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "nullspace: calculate rotations", bool, newNullparams); - nullSpace->SetParameterList(newNullparams); - nullSpace->SetFactory("Nullspace", manager.GetFactory("Ptent")); - manager.SetFactory("Nullspace", nullSpace); - } - nullSpaceFactory = nullSpace; - - if (paramList.isParameter("restriction: scale nullspace") && paramList.get("restriction: scale nullspace")) { - RCP scaledNSfactory = rcp(new ScaledNullspaceFactory()); - scaledNSfactory->SetFactory("Nullspace",nullSpaceFactory); - manager.SetFactory("Scaled Nullspace",scaledNSfactory); + nullSpaceFactory->SetFactory("Nullspace", newP); + nullSpaceFactory->SetParameterList(newNullparams); } - +#else + paramList.set("repartition: enable", false); +#ifndef HAVE_MPI + this->GetOStream(Warnings0) << "No repartitioning available for a serial run\n"; +#else + this->GetOStream(Warnings0) << "Zoltan/Zoltan2 are unavailable for repartitioning\n"; +#endif // HAVE_MPI +#endif // defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || defined(HAVE_MUELU_ZOLTAN2)) } +} + +// ===================================================================================================== +// ========================================= Low precision transfers =================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_LowPrecision(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "transfers: half precision", bool, enableLowPrecision); + + if (enableLowPrecision) { + // Low precision P + auto newP = rcp(new LowPrecisionFactory()); + ParameterList newPparams; + newPparams.set("matrix key", "P"); + newP->SetParameterList(newPparams); + newP->SetFactory("P", manager.GetFactory("P")); + manager.SetFactory("P", newP); - // ===================================================================================================== - // ================================= Algorithm: SemiCoarsening ========================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_SemiCoarsen(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const - { - // === Semi-coarsening === - RCP semicoarsenFactory = Teuchos::null; - if (paramList.isParameter("semicoarsen: number of levels") && - paramList.get("semicoarsen: number of levels") > 0) { - - ParameterList togglePParams; - ParameterList semicoarsenPParams; - ParameterList linedetectionParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: number of levels", int, togglePParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: coarsen rate", int, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: piecewise constant", bool, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: piecewise linear", bool, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: calculate nonsym restriction", bool, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "linedetection: orientation", std::string, linedetectionParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "linedetection: num layers", int, linedetectionParams); - - MUELU_KOKKOS_FACTORY_NO_DECL(semicoarsenFactory, SemiCoarsenPFactory, SemiCoarsenPFactory_kokkos); - RCP linedetectionFactory = rcp(new LineDetectionFactory()); - RCP togglePFactory = rcp(new TogglePFactory()); - - linedetectionFactory->SetParameterList(linedetectionParams); - semicoarsenFactory ->SetParameterList(semicoarsenPParams); - togglePFactory ->SetParameterList(togglePParams); - - togglePFactory->AddCoarseNullspaceFactory (semicoarsenFactory); - togglePFactory->AddProlongatorFactory (semicoarsenFactory); - togglePFactory->AddPtentFactory (semicoarsenFactory); - togglePFactory->AddCoarseNullspaceFactory (manager.GetFactory("Ptent")); - togglePFactory->AddProlongatorFactory (manager.GetFactory("P")); - togglePFactory->AddPtentFactory (manager.GetFactory("Ptent")); - - manager.SetFactory("CoarseNumZLayers", linedetectionFactory); - manager.SetFactory("LineDetection_Layers", linedetectionFactory); - manager.SetFactory("LineDetection_VertLineIds", linedetectionFactory); - - manager.SetFactory("P", togglePFactory); - manager.SetFactory("Ptent", togglePFactory); - manager.SetFactory("Nullspace", togglePFactory); + if (!this->implicitTranspose_) { + // Low precision R + auto newR = rcp(new LowPrecisionFactory()); + ParameterList newRparams; + newRparams.set("matrix key", "R"); + newR->SetParameterList(newRparams); + newR->SetFactory("R", manager.GetFactory("R")); + manager.SetFactory("R", newR); } + } +} + +// ===================================================================================================== +// =========================================== Nullspace =============================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Nullspace(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int /* levelID */, std::vector& /* keeps */, RCP& nullSpaceFactory) const { + // Nullspace + MUELU_KOKKOS_FACTORY(nullSpace, NullspaceFactory, NullspaceFactory_kokkos); + + bool have_userNS = false; + if (paramList.isParameter("Nullspace") && !paramList.get >("Nullspace").is_null()) + have_userNS = true; + + if (!have_userNS) { + ParameterList newNullparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "nullspace: calculate rotations", bool, newNullparams); + nullSpace->SetParameterList(newNullparams); + nullSpace->SetFactory("Nullspace", manager.GetFactory("Ptent")); + manager.SetFactory("Nullspace", nullSpace); + } + nullSpaceFactory = nullSpace; - if (paramList.isParameter("semicoarsen: number of levels")) { - auto tf = rcp(new ToggleCoordinatesTransferFactory()); - tf->SetFactory("Chosen P", manager.GetFactory("P")); - tf->AddCoordTransferFactory(semicoarsenFactory); - - RCP coords = rcp(new CoordinatesTransferFactory()); - coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - tf->AddCoordTransferFactory(coords); - manager.SetFactory("Coordinates", tf); - } + if (paramList.isParameter("restriction: scale nullspace") && paramList.get("restriction: scale nullspace")) { + RCP scaledNSfactory = rcp(new ScaledNullspaceFactory()); + scaledNSfactory->SetFactory("Nullspace", nullSpaceFactory); + manager.SetFactory("Scaled Nullspace", scaledNSfactory); + } +} + +// ===================================================================================================== +// ================================= Algorithm: SemiCoarsening ========================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_SemiCoarsen(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int /* levelID */, std::vector& /* keeps */) const { + // === Semi-coarsening === + RCP semicoarsenFactory = Teuchos::null; + if (paramList.isParameter("semicoarsen: number of levels") && + paramList.get("semicoarsen: number of levels") > 0) { + ParameterList togglePParams; + ParameterList semicoarsenPParams; + ParameterList linedetectionParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: number of levels", int, togglePParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: coarsen rate", int, semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: piecewise constant", bool, semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: piecewise linear", bool, semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: calculate nonsym restriction", bool, semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "linedetection: orientation", std::string, linedetectionParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "linedetection: num layers", int, linedetectionParams); + + MUELU_KOKKOS_FACTORY_NO_DECL(semicoarsenFactory, SemiCoarsenPFactory, SemiCoarsenPFactory_kokkos); + RCP linedetectionFactory = rcp(new LineDetectionFactory()); + RCP togglePFactory = rcp(new TogglePFactory()); + + linedetectionFactory->SetParameterList(linedetectionParams); + semicoarsenFactory->SetParameterList(semicoarsenPParams); + togglePFactory->SetParameterList(togglePParams); + + togglePFactory->AddCoarseNullspaceFactory(semicoarsenFactory); + togglePFactory->AddProlongatorFactory(semicoarsenFactory); + togglePFactory->AddPtentFactory(semicoarsenFactory); + togglePFactory->AddCoarseNullspaceFactory(manager.GetFactory("Ptent")); + togglePFactory->AddProlongatorFactory(manager.GetFactory("P")); + togglePFactory->AddPtentFactory(manager.GetFactory("Ptent")); + + manager.SetFactory("CoarseNumZLayers", linedetectionFactory); + manager.SetFactory("LineDetection_Layers", linedetectionFactory); + manager.SetFactory("LineDetection_VertLineIds", linedetectionFactory); + + manager.SetFactory("P", togglePFactory); + manager.SetFactory("Ptent", togglePFactory); + manager.SetFactory("Nullspace", togglePFactory); } + if (paramList.isParameter("semicoarsen: number of levels")) { + auto tf = rcp(new ToggleCoordinatesTransferFactory()); + tf->SetFactory("Chosen P", manager.GetFactory("P")); + tf->AddCoordTransferFactory(semicoarsenFactory); - // ===================================================================================================== - // ================================== Algorithm: P-Coarsening ========================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_PCoarsen(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { + RCP coords = rcp(new CoordinatesTransferFactory()); + coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + tf->AddCoordTransferFactory(coords); + manager.SetFactory("Coordinates", tf); + } +} + +// ===================================================================================================== +// ================================== Algorithm: P-Coarsening ========================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_PCoarsen(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int levelID, std::vector& keeps) const { #ifdef HAVE_MUELU_INTREPID2 - // This only makes sense to invoke from the default list. - if (defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { - // P-Coarsening by schedule (new interface) - // NOTE: levelID represents the *coarse* level in this case - auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList,"pcoarsen: schedule"); - auto pcoarsen_element = defaultList.get("pcoarsen: element"); - - if (levelID >= (int)pcoarsen_schedule.size()) { - // Past the p-coarsening levels, we do Smoothed Aggregation - // NOTE: We should probably consider allowing other options past p-coarsening - UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); - - } else { - // P-Coarsening - ParameterList Pparams; - auto P = rcp(new IntrepidPCoarsenFactory()); - std::string lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); - std::string hi = (levelID ? pcoarsen_element + std::to_string(pcoarsen_schedule[levelID-1]) : lo); - Pparams.set("pcoarsen: hi basis", hi); - Pparams.set("pcoarsen: lo basis", lo); - P->SetParameterList(Pparams); - manager.SetFactory("P", P); - - // Add special nullspace handling - rcp_dynamic_cast(manager.GetFactoryNonConst("Nullspace"))->SetFactory("Nullspace", manager.GetFactory("P")); - } + // This only makes sense to invoke from the default list. + if (defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { + // P-Coarsening by schedule (new interface) + // NOTE: levelID represents the *coarse* level in this case + auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList, "pcoarsen: schedule"); + auto pcoarsen_element = defaultList.get("pcoarsen: element"); + + if (levelID >= (int)pcoarsen_schedule.size()) { + // Past the p-coarsening levels, we do Smoothed Aggregation + // NOTE: We should probably consider allowing other options past p-coarsening + UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); } else { - // P-Coarsening by manual specification (old interface) + // P-Coarsening ParameterList Pparams; - auto P = rcp(new IntrepidPCoarsenFactory()); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: hi basis", std::string, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: lo basis", std::string, Pparams); + auto P = rcp(new IntrepidPCoarsenFactory()); + std::string lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); + std::string hi = (levelID ? pcoarsen_element + std::to_string(pcoarsen_schedule[levelID - 1]) : lo); + Pparams.set("pcoarsen: hi basis", hi); + Pparams.set("pcoarsen: lo basis", lo); P->SetParameterList(Pparams); manager.SetFactory("P", P); @@ -1968,225 +1919,230 @@ namespace MueLu { rcp_dynamic_cast(manager.GetFactoryNonConst("Nullspace"))->SetFactory("Nullspace", manager.GetFactory("P")); } -#endif - } - - // ===================================================================================================== - // ============================== Algorithm: Smoothed Aggregation ====================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_SA(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const { - // Smoothed aggregation - MUELU_KOKKOS_FACTORY(P, SaPFactory, SaPFactory_kokkos); + } else { + // P-Coarsening by manual specification (old interface) ParameterList Pparams; - if (paramList.isSublist("matrixmatrix: kernel params")) - Pparams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); - if (defaultList.isSublist("matrixmatrix: kernel params")) - Pparams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: damping factor", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: calculate eigenvalue estimate", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: max eigenvalue", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: eigenvalue estimate num iterations", int, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: use rowsumabs diagonal scaling", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs diagonal replacement tolerance", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs diagonal replacement value", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs use automatic diagonal tolerance", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: enforce constraints", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: eigen-analysis type", std::string, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, Pparams); - + auto P = rcp(new IntrepidPCoarsenFactory()); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: hi basis", std::string, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: lo basis", std::string, Pparams); P->SetParameterList(Pparams); - - - // Filtering - MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, useFiltering); - if (useFiltering) { - // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the - // dependency tree is setup. The Kokkos version has merged the the - // FilteredAFactory into the CoalesceDropFactory. - if (!useKokkos_) { - RCP filterFactory = rcp(new FilteredAFactory()); - - ParameterList fParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use root stencil", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: Dirichlet threshold", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use spread lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom growth factor", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom cap", double, fParams); - filterFactory->SetParameterList(fParams); - filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); - filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - filterFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - // I'm not sure why we need this line. See comments for DofsPerNode for UncoupledAggregation above - filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); - - P->SetFactory("A", filterFactory); - - } else { - P->SetFactory("A", manager.GetFactory("Graph")); - } - } - - P->SetFactory("P", manager.GetFactory("Ptent")); manager.SetFactory("P", P); - bool filteringChangesMatrix = useFiltering && !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, 0); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - if (reuseType == "tP" && !filteringChangesMatrix) - keeps.push_back(keep_pair("AP reuse data", P.get())); + // Add special nullspace handling + rcp_dynamic_cast(manager.GetFactoryNonConst("Nullspace"))->SetFactory("Nullspace", manager.GetFactory("P")); } - // ===================================================================================================== - // =============================== Algorithm: Energy Minimization ====================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Emin(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: pattern", std::string, patternType); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - TEUCHOS_TEST_FOR_EXCEPTION(patternType != "AkPtent", Exceptions::InvalidArgument, - "Invalid pattern name: \"" << patternType << "\". Valid options: \"AkPtent\""); - // Pattern - auto patternFactory = rcp(new PatternFactory()); - ParameterList patternParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: pattern order", int, patternParams); - patternFactory->SetParameterList(patternParams); - patternFactory->SetFactory("P", manager.GetFactory("Ptent")); - manager.SetFactory("Ppattern", patternFactory); - - // Constraint - auto constraintFactory = rcp(new ConstraintFactory()); - constraintFactory->SetFactory("Ppattern", manager.GetFactory("Ppattern")); - constraintFactory->SetFactory("CoarseNullspace", manager.GetFactory("Ptent")); - manager.SetFactory("Constraint", constraintFactory); - - // Emin Factory - auto P = rcp(new EminPFactory()); - // Filtering - MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: use filtered matrix", bool, useFiltering); - if(useFiltering) { - // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the - // dependency tree is setup. The Kokkos version has merged the the - // FilteredAFactory into the CoalesceDropFactory. - if (!useKokkos_) { - RCP filterFactory = rcp(new FilteredAFactory()); - - ParameterList fParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use root stencil", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: Dirichlet threshold", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use spread lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom growth factor", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom cap", double, fParams); - filterFactory->SetParameterList(fParams); - filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); - filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - filterFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - // I'm not sure why we need this line. See comments for DofsPerNode for UncoupledAggregation above - filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); - - P->SetFactory("A", filterFactory); - - } else { - P->SetFactory("A", manager.GetFactory("Graph")); - } - } +#endif +} + +// ===================================================================================================== +// ============================== Algorithm: Smoothed Aggregation ====================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_SA(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const { + // Smoothed aggregation + MUELU_KOKKOS_FACTORY(P, SaPFactory, SaPFactory_kokkos); + ParameterList Pparams; + if (paramList.isSublist("matrixmatrix: kernel params")) + Pparams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); + if (defaultList.isSublist("matrixmatrix: kernel params")) + Pparams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: damping factor", double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: calculate eigenvalue estimate", bool, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: max eigenvalue", double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: eigenvalue estimate num iterations", int, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: use rowsumabs diagonal scaling", bool, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs diagonal replacement tolerance", double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs diagonal replacement value", double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs use automatic diagonal tolerance", bool, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: enforce constraints", bool, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: eigen-analysis type", std::string, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, Pparams); + + P->SetParameterList(Pparams); + + // Filtering + MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, useFiltering); + if (useFiltering) { + // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the + // dependency tree is setup. The Kokkos version has merged the the + // FilteredAFactory into the CoalesceDropFactory. + if (!useKokkos_) { + RCP filterFactory = rcp(new FilteredAFactory()); + + ParameterList fParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use root stencil", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: Dirichlet threshold", double, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use spread lumping", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom growth factor", double, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom cap", double, fParams); + filterFactory->SetParameterList(fParams); + filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); + filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + filterFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); + // I'm not sure why we need this line. See comments for DofsPerNode for UncoupledAggregation above + filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); + + P->SetFactory("A", filterFactory); - // Energy minimization - ParameterList Pparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num iterations", int, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: iterative method", std::string, Pparams); - if (reuseType == "emin") { - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num reuse iterations", int, Pparams); - Pparams.set("Keep P0", true); - Pparams.set("Keep Constraint0", true); + } else { + P->SetFactory("A", manager.GetFactory("Graph")); } - P->SetParameterList(Pparams); - P->SetFactory("P", manager.GetFactory("Ptent")); - P->SetFactory("Constraint", manager.GetFactory("Constraint")); - manager.SetFactory("P", P); - } - - // ===================================================================================================== - // ================================= Algorithm: Petrov-Galerkin ======================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_PG(ParameterList& /* paramList */, const ParameterList& /* defaultList */, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const - { - TEUCHOS_TEST_FOR_EXCEPTION(this->implicitTranspose_, Exceptions::RuntimeError, - "Implicit transpose not supported with Petrov-Galerkin smoothed transfer operators: Set \"transpose: use implicit\" to false!\n" \ - "Petrov-Galerkin transfer operator smoothing for non-symmetric problems requires a separate handling of the restriction operator which " \ - "does not allow the usage of implicit transpose easily."); - - // Petrov-Galerkin - auto P = rcp(new PgPFactory()); - P->SetFactory("P", manager.GetFactory("Ptent")); - manager.SetFactory("P", P); } - // ===================================================================================================== - // ================================= Algorithm: Replicate ======================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Replicate(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const - { - auto P = rcp(new MueLu::ReplicatePFactory()); - - ParameterList Pparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "replicate: npdes", int, Pparams); - - P->SetParameterList(Pparams); - manager.SetFactory("P", P); + P->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("P", P); + + bool filteringChangesMatrix = useFiltering && !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, 0); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + if (reuseType == "tP" && !filteringChangesMatrix) + keeps.push_back(keep_pair("AP reuse data", P.get())); +} + +// ===================================================================================================== +// =============================== Algorithm: Energy Minimization ====================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Emin(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, + int /* levelID */, std::vector& /* keeps */) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: pattern", std::string, patternType); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); + TEUCHOS_TEST_FOR_EXCEPTION(patternType != "AkPtent", Exceptions::InvalidArgument, + "Invalid pattern name: \"" << patternType << "\". Valid options: \"AkPtent\""); + // Pattern + auto patternFactory = rcp(new PatternFactory()); + ParameterList patternParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: pattern order", int, patternParams); + patternFactory->SetParameterList(patternParams); + patternFactory->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("Ppattern", patternFactory); + + // Constraint + auto constraintFactory = rcp(new ConstraintFactory()); + constraintFactory->SetFactory("Ppattern", manager.GetFactory("Ppattern")); + constraintFactory->SetFactory("CoarseNullspace", manager.GetFactory("Ptent")); + manager.SetFactory("Constraint", constraintFactory); + + // Emin Factory + auto P = rcp(new EminPFactory()); + // Filtering + MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: use filtered matrix", bool, useFiltering); + if (useFiltering) { + // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the + // dependency tree is setup. The Kokkos version has merged the the + // FilteredAFactory into the CoalesceDropFactory. + if (!useKokkos_) { + RCP filterFactory = rcp(new FilteredAFactory()); + + ParameterList fParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use root stencil", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: Dirichlet threshold", double, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use spread lumping", bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom growth factor", double, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom cap", double, fParams); + filterFactory->SetParameterList(fParams); + filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); + filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + filterFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); + // I'm not sure why we need this line. See comments for DofsPerNode for UncoupledAggregation above + filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); + + P->SetFactory("A", filterFactory); + } else { + P->SetFactory("A", manager.GetFactory("Graph")); + } } - // ===================================================================================================== - // ====================================== Algorithm: Combine ============================================ - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Combine(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const - { - auto P = rcp(new MueLu::CombinePFactory()); - - ParameterList Pparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "combine: numBlks", int, Pparams); - - P->SetParameterList(Pparams); - manager.SetFactory("P", P); - + // Energy minimization + ParameterList Pparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num iterations", int, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: iterative method", std::string, Pparams); + if (reuseType == "emin") { + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num reuse iterations", int, Pparams); + Pparams.set("Keep P0", true); + Pparams.set("Keep Constraint0", true); } - - - // ===================================================================================================== - // ====================================== Algorithm: Matlab ============================================ - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Matlab(ParameterList& paramList, const ParameterList& /* defaultList */, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const { + P->SetParameterList(Pparams); + P->SetFactory("P", manager.GetFactory("Ptent")); + P->SetFactory("Constraint", manager.GetFactory("Constraint")); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ================================= Algorithm: Petrov-Galerkin ======================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_PG(ParameterList& /* paramList */, const ParameterList& /* defaultList */, FactoryManager& manager, + int /* levelID */, std::vector& /* keeps */) const { + TEUCHOS_TEST_FOR_EXCEPTION(this->implicitTranspose_, Exceptions::RuntimeError, + "Implicit transpose not supported with Petrov-Galerkin smoothed transfer operators: Set \"transpose: use implicit\" to false!\n" + "Petrov-Galerkin transfer operator smoothing for non-symmetric problems requires a separate handling of the restriction operator which " + "does not allow the usage of implicit transpose easily."); + + // Petrov-Galerkin + auto P = rcp(new PgPFactory()); + P->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ================================= Algorithm: Replicate ======================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Replicate(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const { + auto P = rcp(new MueLu::ReplicatePFactory()); + + ParameterList Pparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "replicate: npdes", int, Pparams); + + P->SetParameterList(Pparams); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ====================================== Algorithm: Combine ============================================ +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Combine(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const { + auto P = rcp(new MueLu::CombinePFactory()); + + ParameterList Pparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "combine: numBlks", int, Pparams); + + P->SetParameterList(Pparams); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ====================================== Algorithm: Matlab ============================================ +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Matlab(ParameterList& paramList, const ParameterList& /* defaultList */, FactoryManager& manager, + int /* levelID */, std::vector& /* keeps */) const { #ifdef HAVE_MUELU_MATLAB - ParameterList Pparams = paramList.sublist("transfer: params"); - auto P = rcp(new TwoLevelMatlabFactory()); - P->SetParameterList(Pparams); - P->SetFactory("P", manager.GetFactory("Ptent")); - manager.SetFactory("P", P); + ParameterList Pparams = paramList.sublist("transfer: params"); + auto P = rcp(new TwoLevelMatlabFactory()); + P->SetParameterList(Pparams); + P->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("P", P); #else - (void)paramList; - (void)manager; + (void)paramList; + (void)manager; #endif - } +} #undef MUELU_SET_VAR_2LIST #undef MUELU_TEST_AND_SET_VAR @@ -2194,546 +2150,550 @@ namespace MueLu { #undef MUELU_TEST_PARAM_2LIST #undef MUELU_KOKKOS_FACTORY - size_t LevenshteinDistance(const char* s, size_t len_s, const char* t, size_t len_t); - - template - void ParameterListInterpreter::Validate(const ParameterList& constParamList) const { - ParameterList paramList = constParamList; - const ParameterList& validList = *MasterList::List(); - // Validate up to maxLevels level specific parameter sublists - const int maxLevels = 100; - - // Extract level specific list - std::vector paramLists; - for (int levelID = 0; levelID < maxLevels; levelID++) { - std::string sublistName = "level " + toString(levelID); - if (paramList.isSublist(sublistName)) { - paramLists.push_back(paramList.sublist(sublistName)); - // paramLists.back().setName(sublistName); - paramList.remove(sublistName); - } +size_t LevenshteinDistance(const char* s, size_t len_s, const char* t, size_t len_t); + +template +void ParameterListInterpreter::Validate(const ParameterList& constParamList) const { + ParameterList paramList = constParamList; + const ParameterList& validList = *MasterList::List(); + // Validate up to maxLevels level specific parameter sublists + const int maxLevels = 100; + + // Extract level specific list + std::vector paramLists; + for (int levelID = 0; levelID < maxLevels; levelID++) { + std::string sublistName = "level " + toString(levelID); + if (paramList.isSublist(sublistName)) { + paramLists.push_back(paramList.sublist(sublistName)); + // paramLists.back().setName(sublistName); + paramList.remove(sublistName); } - paramLists.push_back(paramList); - // paramLists.back().setName("main"); + } + paramLists.push_back(paramList); + // paramLists.back().setName("main"); #ifdef HAVE_MUELU_MATLAB - // If Muemex is supported, hide custom level variables from validator by removing them from paramList's sublists - for (size_t i = 0; i < paramLists.size(); i++) { - std::vector customVars; // list of names (keys) to be removed from list - - for(Teuchos::ParameterList::ConstIterator it = paramLists[i].begin(); it != paramLists[i].end(); it++) { - std::string paramName = paramLists[i].name(it); + // If Muemex is supported, hide custom level variables from validator by removing them from paramList's sublists + for (size_t i = 0; i < paramLists.size(); i++) { + std::vector customVars; // list of names (keys) to be removed from list - if (IsParamMuemexVariable(paramName)) - customVars.push_back(paramName); - } + for (Teuchos::ParameterList::ConstIterator it = paramLists[i].begin(); it != paramLists[i].end(); it++) { + std::string paramName = paramLists[i].name(it); - // Remove the keys - for (size_t j = 0; j < customVars.size(); j++) - paramLists[i].remove(customVars[j], false); + if (IsParamMuemexVariable(paramName)) + customVars.push_back(paramName); } + + // Remove the keys + for (size_t j = 0; j < customVars.size(); j++) + paramLists[i].remove(customVars[j], false); + } #endif - const int maxDepth = 0; - for (size_t i = 0; i < paramLists.size(); i++) { - // validate every sublist - try { - paramLists[i].validateParameters(validList, maxDepth); - - } catch (const Teuchos::Exceptions::InvalidParameterName& e) { - std::string eString = e.what(); - - // Parse name from: - size_t nameStart = eString.find_first_of('"') + 1; - size_t nameEnd = eString.find_first_of('"', nameStart); - std::string name = eString.substr(nameStart, nameEnd - nameStart); - - size_t bestScore = 100; - std::string bestName = ""; - for (ParameterList::ConstIterator it = validList.begin(); it != validList.end(); it++) { - const std::string& pName = validList.name(it); - this->GetOStream(Runtime1) << "| " << pName; - size_t score = LevenshteinDistance(name.c_str(), name.length(), pName.c_str(), pName.length()); - this->GetOStream(Runtime1) << " -> " << score << std::endl; - if (score < bestScore) { - bestScore = score; - bestName = pName; - } + const int maxDepth = 0; + for (size_t i = 0; i < paramLists.size(); i++) { + // validate every sublist + try { + paramLists[i].validateParameters(validList, maxDepth); + + } catch (const Teuchos::Exceptions::InvalidParameterName& e) { + std::string eString = e.what(); + + // Parse name from: + size_t nameStart = eString.find_first_of('"') + 1; + size_t nameEnd = eString.find_first_of('"', nameStart); + std::string name = eString.substr(nameStart, nameEnd - nameStart); + + size_t bestScore = 100; + std::string bestName = ""; + for (ParameterList::ConstIterator it = validList.begin(); it != validList.end(); it++) { + const std::string& pName = validList.name(it); + this->GetOStream(Runtime1) << "| " << pName; + size_t score = LevenshteinDistance(name.c_str(), name.length(), pName.c_str(), pName.length()); + this->GetOStream(Runtime1) << " -> " << score << std::endl; + if (score < bestScore) { + bestScore = score; + bestName = pName; } - if (bestScore < 10 && bestName != "") { - TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameterName, - eString << "The parameter name \"" + name + "\" is not valid. Did you mean \"" + bestName << "\"?\n"); + } + if (bestScore < 10 && bestName != "") { + TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameterName, + eString << "The parameter name \"" + name + "\" is not valid. Did you mean \"" + bestName << "\"?\n"); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameterName, - eString << "The parameter name \"" + name + "\" is not valid.\n"); - } + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameterName, + eString << "The parameter name \"" + name + "\" is not valid.\n"); } } } +} + +// ===================================================================================================== +// ==================================== FACTORY interpreter ============================================ +// ===================================================================================================== +template +void ParameterListInterpreter:: + SetFactoryParameterList(const ParameterList& constParamList) { + // Create a non const copy of the parameter list + // Working with a modifiable list is much much easier than with original one + ParameterList paramList = constParamList; + + // Parameter List Parsing: + // --------- + // + // + // + if (paramList.isSublist("Matrix")) { + blockSize_ = paramList.sublist("Matrix").get("PDE equations", MasterList::getDefault("number of equations")); + dofOffset_ = paramList.sublist("Matrix").get("DOF offset", 0); // undocumented parameter allowing to define a DOF offset of the global dofs of an operator (defaul = 0) + } - // ===================================================================================================== - // ==================================== FACTORY interpreter ============================================ - // ===================================================================================================== - template - void ParameterListInterpreter:: - SetFactoryParameterList(const ParameterList& constParamList) { - // Create a non const copy of the parameter list - // Working with a modifiable list is much much easier than with original one - ParameterList paramList = constParamList; - - // Parameter List Parsing: - // --------- - // - // - // - if (paramList.isSublist("Matrix")) { - blockSize_ = paramList.sublist("Matrix").get("PDE equations", MasterList::getDefault("number of equations")); - dofOffset_ = paramList.sublist("Matrix").get("DOF offset", 0); // undocumented parameter allowing to define a DOF offset of the global dofs of an operator (defaul = 0) + // create new FactoryFactory object if necessary + if (factFact_ == Teuchos::null) + factFact_ = Teuchos::rcp(new FactoryFactory()); + + // Parameter List Parsing: + // --------- + // + // <== call BuildFactoryMap() on this parameter list + // ... + // + // + FactoryMap factoryMap; + FactoryManagerMap factoryManagers; + if (paramList.isSublist("Factories")) + this->BuildFactoryMap(paramList.sublist("Factories"), factoryMap, factoryMap, factoryManagers); + + // Parameter List Parsing: + // --------- + // + // + // <== get + // <== get + // + // <== parse first args and call BuildFactoryMap() on the rest of this parameter list + // ... + // + // + // + if (paramList.isSublist("Hierarchy")) { + ParameterList hieraList = paramList.sublist("Hierarchy"); // copy because list temporally modified (remove 'id') + + // Get hierarchy options + if (hieraList.isParameter("max levels")) { + this->numDesiredLevel_ = hieraList.get("max levels"); + hieraList.remove("max levels"); } - // create new FactoryFactory object if necessary - if (factFact_ == Teuchos::null) - factFact_ = Teuchos::rcp(new FactoryFactory()); - - // Parameter List Parsing: - // --------- - // - // <== call BuildFactoryMap() on this parameter list - // ... - // - // - FactoryMap factoryMap; - FactoryManagerMap factoryManagers; - if (paramList.isSublist("Factories")) - this->BuildFactoryMap(paramList.sublist("Factories"), factoryMap, factoryMap, factoryManagers); - - // Parameter List Parsing: - // --------- - // - // - // <== get - // <== get - // - // <== parse first args and call BuildFactoryMap() on the rest of this parameter list - // ... - // - // - // - if (paramList.isSublist("Hierarchy")) { - ParameterList hieraList = paramList.sublist("Hierarchy"); // copy because list temporally modified (remove 'id') - - // Get hierarchy options - if (hieraList.isParameter("max levels")) { - this->numDesiredLevel_ = hieraList.get("max levels"); - hieraList.remove("max levels"); - } + if (hieraList.isParameter("coarse: max size")) { + this->maxCoarseSize_ = hieraList.get("coarse: max size"); + hieraList.remove("coarse: max size"); + } - if (hieraList.isParameter("coarse: max size")) { - this->maxCoarseSize_ = hieraList.get("coarse: max size"); - hieraList.remove("coarse: max size"); - } + if (hieraList.isParameter("repartition: rebalance P and R")) { + this->doPRrebalance_ = hieraList.get("repartition: rebalance P and R"); + hieraList.remove("repartition: rebalance P and R"); + } - if (hieraList.isParameter("repartition: rebalance P and R")) { - this->doPRrebalance_ = hieraList.get("repartition: rebalance P and R"); - hieraList.remove("repartition: rebalance P and R"); - } + if (hieraList.isParameter("transpose: use implicit")) { + this->implicitTranspose_ = hieraList.get("transpose: use implicit"); + hieraList.remove("transpose: use implicit"); + } - if (hieraList.isParameter("transpose: use implicit")) { - this->implicitTranspose_ = hieraList.get("transpose: use implicit"); - hieraList.remove("transpose: use implicit"); - } + if (hieraList.isParameter("fuse prolongation and update")) { + this->fuseProlongationAndUpdate_ = hieraList.get("fuse prolongation and update"); + hieraList.remove("fuse prolongation and update"); + } - if (hieraList.isParameter("fuse prolongation and update")) { - this->fuseProlongationAndUpdate_ = hieraList.get("fuse prolongation and update"); - hieraList.remove("fuse prolongation and update"); - } + if (hieraList.isParameter("nullspace: suppress dimension check")) { + this->suppressNullspaceDimensionCheck_ = hieraList.get("nullspace: suppress dimension check"); + hieraList.remove("nullspace: suppress dimension check"); + } - if (hieraList.isParameter("nullspace: suppress dimension check")) { - this->suppressNullspaceDimensionCheck_ = hieraList.get("nullspace: suppress dimension check"); - hieraList.remove("nullspace: suppress dimension check"); - } + if (hieraList.isParameter("number of vectors")) { + this->numDesiredLevel_ = hieraList.get("number of vectors"); + hieraList.remove("number of vectors"); + } - if (hieraList.isParameter("number of vectors")) { - this->numDesiredLevel_ = hieraList.get("number of vectors"); - hieraList.remove("number of vectors"); - } + if (hieraList.isSublist("matvec params")) + this->matvecParams_ = Teuchos::parameterList(hieraList.sublist("matvec params")); - if (hieraList.isSublist("matvec params")) - this->matvecParams_ = Teuchos::parameterList(hieraList.sublist("matvec params")); + if (hieraList.isParameter("coarse grid correction scaling factor")) { + this->scalingFactor_ = hieraList.get("coarse grid correction scaling factor"); + hieraList.remove("coarse grid correction scaling factor"); + } + // Translate cycle type parameter + if (hieraList.isParameter("cycle type")) { + std::map cycleMap; + cycleMap["V"] = VCYCLE; + cycleMap["W"] = WCYCLE; - if (hieraList.isParameter("coarse grid correction scaling factor")) { - this->scalingFactor_ = hieraList.get("coarse grid correction scaling factor"); - hieraList.remove("coarse grid correction scaling factor"); - } + std::string cycleType = hieraList.get("cycle type"); + TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, Exceptions::RuntimeError, "Invalid cycle type: \"" << cycleType << "\""); + this->Cycle_ = cycleMap[cycleType]; + } - // Translate cycle type parameter - if (hieraList.isParameter("cycle type")) { - std::map cycleMap; - cycleMap["V"] = VCYCLE; - cycleMap["W"] = WCYCLE; + if (hieraList.isParameter("W cycle start level")) { + this->WCycleStartLevel_ = hieraList.get("W cycle start level"); + } - std::string cycleType = hieraList.get("cycle type"); - TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, Exceptions::RuntimeError, "Invalid cycle type: \"" << cycleType << "\""); - this->Cycle_ = cycleMap[cycleType]; - } + if (hieraList.isParameter("verbosity")) { + std::string vl = hieraList.get("verbosity"); + hieraList.remove("verbosity"); + this->verbosity_ = toVerbLevel(vl); + } - if (hieraList.isParameter("W cycle start level")) { - this->WCycleStartLevel_ = hieraList.get("W cycle start level"); - } + if (hieraList.isParameter("output filename")) + VerboseObject::SetMueLuOFileStream(hieraList.get("output filename")); + + if (hieraList.isParameter("dependencyOutputLevel")) + this->graphOutputLevel_ = hieraList.get("dependencyOutputLevel"); + + // Check for the reuse case + if (hieraList.isParameter("reuse")) + Factory::DisableMultipleCheckGlobally(); + + if (hieraList.isSublist("DataToWrite")) { + //TODO We should be able to specify any data. If it exists, write it. + //TODO This would requires something like std::set > + ParameterList foo = hieraList.sublist("DataToWrite"); + std::string dataName = "Matrices"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["A"] = Teuchos::getArrayFromStringParameter(foo, dataName); + dataName = "Prolongators"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["P"] = Teuchos::getArrayFromStringParameter(foo, dataName); + dataName = "Restrictors"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["R"] = Teuchos::getArrayFromStringParameter(foo, dataName); + dataName = "D0"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["D0"] = Teuchos::getArrayFromStringParameter(foo, dataName); + } - if (hieraList.isParameter("verbosity")) { - std::string vl = hieraList.get("verbosity"); - hieraList.remove("verbosity"); - this->verbosity_ = toVerbLevel(vl); - } + // Get level configuration + for (ParameterList::ConstIterator param = hieraList.begin(); param != hieraList.end(); ++param) { + const std::string& paramName = hieraList.name(param); - if (hieraList.isParameter("output filename")) - VerboseObject::SetMueLuOFileStream(hieraList.get("output filename")); - - if (hieraList.isParameter("dependencyOutputLevel")) - this->graphOutputLevel_ = hieraList.get("dependencyOutputLevel"); - - // Check for the reuse case - if (hieraList.isParameter("reuse")) - Factory::DisableMultipleCheckGlobally(); - - if (hieraList.isSublist("DataToWrite")) { - //TODO We should be able to specify any data. If it exists, write it. - //TODO This would requires something like std::set > - ParameterList foo = hieraList.sublist("DataToWrite"); - std::string dataName = "Matrices"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["A"] = Teuchos::getArrayFromStringParameter(foo, dataName); - dataName = "Prolongators"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["P"] = Teuchos::getArrayFromStringParameter(foo, dataName); - dataName = "Restrictors"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["R"] = Teuchos::getArrayFromStringParameter(foo, dataName); - dataName = "D0"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["D0"] = Teuchos::getArrayFromStringParameter(foo, dataName); - } + if (paramName != "DataToWrite" && hieraList.isSublist(paramName)) { + ParameterList levelList = hieraList.sublist(paramName); // copy because list temporally modified (remove 'id') - // Get level configuration - for (ParameterList::ConstIterator param = hieraList.begin(); param != hieraList.end(); ++param) { - const std::string & paramName = hieraList.name(param); - - if (paramName != "DataToWrite" && hieraList.isSublist(paramName)) { - ParameterList levelList = hieraList.sublist(paramName); // copy because list temporally modified (remove 'id') - - int startLevel = 0; if(levelList.isParameter("startLevel")) { startLevel = levelList.get("startLevel"); levelList.remove("startLevel"); } - int numDesiredLevel = 1; if(levelList.isParameter("numDesiredLevel")) { numDesiredLevel = levelList.get("numDesiredLevel"); levelList.remove("numDesiredLevel"); } - - // Parameter List Parsing: - // --------- - // - // - // - // - // - // [] <== call BuildFactoryMap() on the rest of the parameter list - // - // - FactoryMap levelFactoryMap; - BuildFactoryMap(levelList, factoryMap, levelFactoryMap, factoryManagers); - - RCP m = rcp(new FactoryManager(levelFactoryMap)); - if (hieraList.isParameter("use kokkos refactor")) - m->SetKokkosRefactor(hieraList.get("use kokkos refactor")); - - if (startLevel >= 0) - this->AddFactoryManager(startLevel, numDesiredLevel, m); - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::ParameterListInterpreter():: invalid level id"); - } /* TODO: else { } */ - } + int startLevel = 0; + if (levelList.isParameter("startLevel")) { + startLevel = levelList.get("startLevel"); + levelList.remove("startLevel"); + } + int numDesiredLevel = 1; + if (levelList.isParameter("numDesiredLevel")) { + numDesiredLevel = levelList.get("numDesiredLevel"); + levelList.remove("numDesiredLevel"); + } + + // Parameter List Parsing: + // --------- + // + // + // + // + // + // [] <== call BuildFactoryMap() on the rest of the parameter list + // + // + FactoryMap levelFactoryMap; + BuildFactoryMap(levelList, factoryMap, levelFactoryMap, factoryManagers); + + RCP m = rcp(new FactoryManager(levelFactoryMap)); + if (hieraList.isParameter("use kokkos refactor")) + m->SetKokkosRefactor(hieraList.get("use kokkos refactor")); + + if (startLevel >= 0) + this->AddFactoryManager(startLevel, numDesiredLevel, m); + else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::ParameterListInterpreter():: invalid level id"); + } /* TODO: else { } */ } } +} + +//TODO: static? +/// \brief Interpret "Factories" sublist +/// +/// \param paramList [in]: "Factories" ParameterList +/// \param factoryMapIn [in]: FactoryMap maps variable names to factories. This factory map is used to resolve data dependencies of previously defined factories. +/// \param factoryMapOut [out]: FactoryMap maps variable names to factories. New factory entries are added to that FactoryMap. Usually, factoryMapIn and factoryMapOut should use the same object, such that new factories are added. We have to distinguish input and output if we build sub-factory managers, though. +/// \param factoryManagers [in/out]: FacotryManagerMap maps group names to a FactoryManager object. +/// +/// Interpret "Factories" parameter list. For each "factory" entry, add a new entry in the factoryMapOut map or create a new FacotryManager +/// +/// Parameter List Parsing: +/// Create an entry in factoryMapOut for each parameter of the list paramList +/// --------- +/// +/// +/// +/// +/// +/// ... +/// +/// +/// +/// --------- +/// Group factories +/// We can group factories using parameter sublists with the "group" parameter +/// +/// +/// +/// +/// +/// +/// +/// +/// + +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// These factory groups can be used with factories for blocked operators (such as the BlockedPFactory) +/// to easily define the operations on the sub-blocks. +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// As an alternative one can also directly specify the factories in the sublists "block1", "block2", etc..., of course. +/// But using blocks has the advantage that one can reuse them in all blocked factories. +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// As an alternative one can also directly specify the factories in the sublists "block1", "block2", etc..., of course. +/// +/// + +/// --------- +/// add more dependencies (circular dependencies) +/// +/// The NullspaceFactory needs to know which factory generates the null space on the coarse level (e.g., the TentativePFactory or the RebalancedPFactory). +/// However, we cannot set the information in this place in the xml file, since the tentative prolongator facotry is typically defined later. +/// We have to add that dependency later to the NullspaceFactory: +/// +/// +/// +/// +/// +/// +/// +/// +/// <...> +/// +/// +/// +/// +/// +/// +/// <...> +/// +/// +/// +/// After the definition of the generating factory for the nullspace (in this case myRebalanceProlongatorFact) +/// we add that dependency to the NullspaceFactory instance myNspFact +/// +/// +/// +/// +/// +/// +/// We have to create a new block (with a different name than myNspFact). In the example we use "myNspFactDeps". +/// It should contain a parameter "dependency for" with the name of the factory that we want the dependencies to be addded to. +/// With above block we do not need the entry for the Nullspace in the global FactoryManager any more. +template +void ParameterListInterpreter:: + BuildFactoryMap(const ParameterList& paramList, const FactoryMap& factoryMapIn, FactoryMap& factoryMapOut, FactoryManagerMap& factoryManagers) const { + for (ParameterList::ConstIterator param = paramList.begin(); param != paramList.end(); ++param) { + const std::string& paramName = paramList.name(param); //< paramName contains the user chosen factory name (e.g., "smootherFact1") + const Teuchos::ParameterEntry& paramValue = paramList.entry(param); //< for factories, paramValue should be either a list or just a MueLu Factory (e.g., TrilinosSmoother) + + //TODO: do not allow name of existing MueLu classes (can be tested using FactoryFactory) + + if (paramValue.isList()) { + ParameterList paramList1 = Teuchos::getValue(paramValue); + if (paramList1.isParameter("factory")) { // default: just a factory definition + // New Factory is a sublist with internal parameters and/or data dependencies + TEUCHOS_TEST_FOR_EXCEPTION(paramList1.isParameter("dependency for") == true, Exceptions::RuntimeError, + "MueLu::ParameterListInterpreter(): It seems that in the parameter lists for defining " << paramName << " there is both a 'factory' and 'dependency for' parameter. This is not allowed. Please remove the 'dependency for' parameter."); + factoryMapOut[paramName] = factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); - //TODO: static? - /// \brief Interpret "Factories" sublist - /// - /// \param paramList [in]: "Factories" ParameterList - /// \param factoryMapIn [in]: FactoryMap maps variable names to factories. This factory map is used to resolve data dependencies of previously defined factories. - /// \param factoryMapOut [out]: FactoryMap maps variable names to factories. New factory entries are added to that FactoryMap. Usually, factoryMapIn and factoryMapOut should use the same object, such that new factories are added. We have to distinguish input and output if we build sub-factory managers, though. - /// \param factoryManagers [in/out]: FacotryManagerMap maps group names to a FactoryManager object. - /// - /// Interpret "Factories" parameter list. For each "factory" entry, add a new entry in the factoryMapOut map or create a new FacotryManager - /// - /// Parameter List Parsing: - /// Create an entry in factoryMapOut for each parameter of the list paramList - /// --------- - /// - /// - /// - /// - /// - /// ... - /// - /// - /// - /// --------- - /// Group factories - /// We can group factories using parameter sublists with the "group" parameter - /// - /// - /// - /// - /// - /// - /// - /// - /// - - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// These factory groups can be used with factories for blocked operators (such as the BlockedPFactory) - /// to easily define the operations on the sub-blocks. - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// As an alternative one can also directly specify the factories in the sublists "block1", "block2", etc..., of course. - /// But using blocks has the advantage that one can reuse them in all blocked factories. - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// As an alternative one can also directly specify the factories in the sublists "block1", "block2", etc..., of course. - /// - /// - - /// --------- - /// add more dependencies (circular dependencies) - /// - /// The NullspaceFactory needs to know which factory generates the null space on the coarse level (e.g., the TentativePFactory or the RebalancedPFactory). - /// However, we cannot set the information in this place in the xml file, since the tentative prolongator facotry is typically defined later. - /// We have to add that dependency later to the NullspaceFactory: - /// - /// - /// - /// - /// - /// - /// - /// - /// <...> - /// - /// - /// - /// - /// - /// - /// <...> - /// - /// - /// - /// After the definition of the generating factory for the nullspace (in this case myRebalanceProlongatorFact) - /// we add that dependency to the NullspaceFactory instance myNspFact - /// - /// - /// - /// - /// - /// - /// We have to create a new block (with a different name than myNspFact). In the example we use "myNspFactDeps". - /// It should contain a parameter "dependency for" with the name of the factory that we want the dependencies to be addded to. - /// With above block we do not need the entry for the Nullspace in the global FactoryManager any more. - template - void ParameterListInterpreter:: - BuildFactoryMap(const ParameterList& paramList, const FactoryMap& factoryMapIn, FactoryMap& factoryMapOut, FactoryManagerMap& factoryManagers) const { - for (ParameterList::ConstIterator param = paramList.begin(); param != paramList.end(); ++param) { - const std::string & paramName = paramList.name(param); //< paramName contains the user chosen factory name (e.g., "smootherFact1") - const Teuchos::ParameterEntry & paramValue = paramList.entry(param); //< for factories, paramValue should be either a list or just a MueLu Factory (e.g., TrilinosSmoother) - - //TODO: do not allow name of existing MueLu classes (can be tested using FactoryFactory) - - if (paramValue.isList()) { - ParameterList paramList1 = Teuchos::getValue(paramValue); - if (paramList1.isParameter("factory")) { // default: just a factory definition - // New Factory is a sublist with internal parameters and/or data dependencies - TEUCHOS_TEST_FOR_EXCEPTION(paramList1.isParameter("dependency for") == true, Exceptions::RuntimeError, - "MueLu::ParameterListInterpreter(): It seems that in the parameter lists for defining " << paramName << - " there is both a 'factory' and 'dependency for' parameter. This is not allowed. Please remove the 'dependency for' parameter."); - - factoryMapOut[paramName] = factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); - - } else if (paramList1.isParameter("dependency for")) { // add more data dependencies to existing factory - TEUCHOS_TEST_FOR_EXCEPTION(paramList1.isParameter("factory") == true, Exceptions::RuntimeError, - "MueLu::ParameterListInterpreter(): It seems that in the parameter lists for defining " << paramName << - " there is both a 'factory' and 'dependency for' parameter. This is not allowed."); - - std::string factoryName = paramList1.get("dependency for"); - - RCP factbase = factoryMapIn.find(factoryName /*paramName*/)->second; // access previously defined factory - TEUCHOS_TEST_FOR_EXCEPTION(factbase.is_null() == true, Exceptions::RuntimeError, - "MueLu::ParameterListInterpreter(): could not find factory " + factoryName + " in factory map. Did you define it before?"); - - RCP factoryconst = Teuchos::rcp_dynamic_cast(factbase); - RCP< Factory> factory = Teuchos::rcp_const_cast(factoryconst); - - // Read the RCP parameters of the class T - RCP validParamList = factory->GetValidParameterList(); - for (ParameterList::ConstIterator vparam = validParamList->begin(); vparam != validParamList->end(); ++vparam) { - const std::string& pName = validParamList->name(vparam); - - if (!paramList1.isParameter(pName)) { - // Ignore unknown parameters - continue; - } + } else if (paramList1.isParameter("dependency for")) { // add more data dependencies to existing factory + TEUCHOS_TEST_FOR_EXCEPTION(paramList1.isParameter("factory") == true, Exceptions::RuntimeError, + "MueLu::ParameterListInterpreter(): It seems that in the parameter lists for defining " << paramName << " there is both a 'factory' and 'dependency for' parameter. This is not allowed."); - if (validParamList->isType< RCP >(pName)) { - // Generate or get factory described by pName and set dependency - RCP generatingFact = factFact_->BuildFactory(paramList1.getEntry(pName), factoryMapIn, factoryManagers); - factory->SetFactory(pName, generatingFact.create_weak()); - - } else if (validParamList->isType >(pName)) { - if (pName == "ParameterList") { - // NOTE: we cannot use - // subList = sublist(rcpFromRef(paramList), pName) - // here as that would result in sublist also being a reference to a temporary object. - // The resulting dereferencing in the corresponding factory would then segfault - RCP subList = Teuchos::sublist(rcp(new ParameterList(paramList1)), pName); - factory->SetParameter(pName, ParameterEntry(subList)); - } - } else { - factory->SetParameter(pName, paramList1.getEntry(pName)); - } + std::string factoryName = paramList1.get("dependency for"); + + RCP factbase = factoryMapIn.find(factoryName /*paramName*/)->second; // access previously defined factory + TEUCHOS_TEST_FOR_EXCEPTION(factbase.is_null() == true, Exceptions::RuntimeError, + "MueLu::ParameterListInterpreter(): could not find factory " + factoryName + " in factory map. Did you define it before?"); + + RCP factoryconst = Teuchos::rcp_dynamic_cast(factbase); + RCP factory = Teuchos::rcp_const_cast(factoryconst); + + // Read the RCP parameters of the class T + RCP validParamList = factory->GetValidParameterList(); + for (ParameterList::ConstIterator vparam = validParamList->begin(); vparam != validParamList->end(); ++vparam) { + const std::string& pName = validParamList->name(vparam); + + if (!paramList1.isParameter(pName)) { + // Ignore unknown parameters + continue; } - } else if (paramList1.isParameter("group")) { // definitiion of a factory group (for a factory manager) - // Define a new (sub) FactoryManager - std::string groupType = paramList1.get("group"); - TEUCHOS_TEST_FOR_EXCEPTION(groupType!="FactoryManager", Exceptions::RuntimeError, - "group must be of type \"FactoryManager\"."); - - ParameterList groupList = paramList1; // copy because list temporally modified (remove 'id') - groupList.remove("group"); - - bool setKokkosRefactor = false; - bool kokkosRefactor = useKokkos_; - if (groupList.isParameter("use kokkos refactor")) { - kokkosRefactor = groupList.get("use kokkos refactor"); - groupList.remove("use kokkos refactor"); - setKokkosRefactor = true; + if (validParamList->isType >(pName)) { + // Generate or get factory described by pName and set dependency + RCP generatingFact = factFact_->BuildFactory(paramList1.getEntry(pName), factoryMapIn, factoryManagers); + factory->SetFactory(pName, generatingFact.create_weak()); + + } else if (validParamList->isType >(pName)) { + if (pName == "ParameterList") { + // NOTE: we cannot use + // subList = sublist(rcpFromRef(paramList), pName) + // here as that would result in sublist also being a reference to a temporary object. + // The resulting dereferencing in the corresponding factory would then segfault + RCP subList = Teuchos::sublist(rcp(new ParameterList(paramList1)), pName); + factory->SetParameter(pName, ParameterEntry(subList)); + } + } else { + factory->SetParameter(pName, paramList1.getEntry(pName)); } + } - FactoryMap groupFactoryMap; - BuildFactoryMap(groupList, factoryMapIn, groupFactoryMap, factoryManagers); + } else if (paramList1.isParameter("group")) { // definitiion of a factory group (for a factory manager) + // Define a new (sub) FactoryManager + std::string groupType = paramList1.get("group"); + TEUCHOS_TEST_FOR_EXCEPTION(groupType != "FactoryManager", Exceptions::RuntimeError, + "group must be of type \"FactoryManager\"."); + + ParameterList groupList = paramList1; // copy because list temporally modified (remove 'id') + groupList.remove("group"); + + bool setKokkosRefactor = false; + bool kokkosRefactor = useKokkos_; + if (groupList.isParameter("use kokkos refactor")) { + kokkosRefactor = groupList.get("use kokkos refactor"); + groupList.remove("use kokkos refactor"); + setKokkosRefactor = true; + } - // do not store groupFactoryMap in factoryMapOut - // Create a factory manager object from groupFactoryMap - RCP m = rcp(new FactoryManager(groupFactoryMap)); - if (setKokkosRefactor) - m->SetKokkosRefactor(kokkosRefactor); - factoryManagers[paramName] = m; + FactoryMap groupFactoryMap; + BuildFactoryMap(groupList, factoryMapIn, groupFactoryMap, factoryManagers); + + // do not store groupFactoryMap in factoryMapOut + // Create a factory manager object from groupFactoryMap + RCP m = rcp(new FactoryManager(groupFactoryMap)); + if (setKokkosRefactor) + m->SetKokkosRefactor(kokkosRefactor); + factoryManagers[paramName] = m; - } else { - this->GetOStream(Warnings0) << "Could not interpret parameter list " << paramList1 << std::endl; - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, - "XML Parameter list must either be of type \"factory\" or of type \"group\"."); - } } else { - // default: just a factory (no parameter list) - factoryMapOut[paramName] = factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); + this->GetOStream(Warnings0) << "Could not interpret parameter list " << paramList1 << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, + "XML Parameter list must either be of type \"factory\" or of type \"group\"."); } + } else { + // default: just a factory (no parameter list) + factoryMapOut[paramName] = factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); } } - - // ===================================================================================================== - // ======================================= MISC functions ============================================== - // ===================================================================================================== - template - void ParameterListInterpreter::SetupOperator(Operator& Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blockSize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blockSize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl - << "You may want to check \"number of equations\" (or \"PDE equations\" for factory style list) parameter." << std::endl; - - A.SetFixedBlockSize(blockSize_, dofOffset_); +} + +// ===================================================================================================== +// ======================================= MISC functions ============================================== +// ===================================================================================================== +template +void ParameterListInterpreter::SetupOperator(Operator& Op) const { + try { + Matrix& A = dynamic_cast(Op); + if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blockSize_)) + this->GetOStream(Warnings0) << "Setting matrix block size to " << blockSize_ << " (value of the parameter in the list) " + << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl + << "You may want to check \"number of equations\" (or \"PDE equations\" for factory style list) parameter." << std::endl; + + A.SetFixedBlockSize(blockSize_, dofOffset_); #ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(A); -#endif // HAVE_MUELU_DEBUG - - } catch (std::bad_cast&) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; - } - } + MatrixUtils::checkLocalRowMapMatchesColMap(A); +#endif // HAVE_MUELU_DEBUG - template - void ParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { - H.SetCycle(Cycle_); - H.SetCycleStartLevel(WCycleStartLevel_); - H.SetProlongatorScalingFactor(scalingFactor_); - HierarchyManager::SetupHierarchy(H); + } catch (std::bad_cast&) { + this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; } - - static bool compare(const ParameterList& list1, const ParameterList& list2) { - // First loop through and validate the parameters at this level. - // In addition, we generate a list of sublists that we will search next - for (ParameterList::ConstIterator it = list1.begin(); it != list1.end(); it++) { - const std::string& name = it->first; - const Teuchos::ParameterEntry& entry1 = it->second; - - const Teuchos::ParameterEntry *entry2 = list2.getEntryPtr(name); - if (!entry2) // entry is not present in the second list - return false; - if (entry1.isList() && entry2->isList()) { // sublist check - compare(Teuchos::getValue(entry1), Teuchos::getValue(*entry2)); - continue; - } - if (entry1.getAny(false) != entry2->getAny(false)) // entries have different types or different values - return false; +} + +template +void ParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { + H.SetCycle(Cycle_); + H.SetCycleStartLevel(WCycleStartLevel_); + H.SetProlongatorScalingFactor(scalingFactor_); + HierarchyManager::SetupHierarchy(H); +} + +static bool compare(const ParameterList& list1, const ParameterList& list2) { + // First loop through and validate the parameters at this level. + // In addition, we generate a list of sublists that we will search next + for (ParameterList::ConstIterator it = list1.begin(); it != list1.end(); it++) { + const std::string& name = it->first; + const Teuchos::ParameterEntry& entry1 = it->second; + + const Teuchos::ParameterEntry* entry2 = list2.getEntryPtr(name); + if (!entry2) // entry is not present in the second list + return false; + if (entry1.isList() && entry2->isList()) { // sublist check + compare(Teuchos::getValue(entry1), Teuchos::getValue(*entry2)); + continue; } - - return true; + if (entry1.getAny(false) != entry2->getAny(false)) // entries have different types or different values + return false; } - static inline bool areSame(const ParameterList& list1, const ParameterList& list2) { - return compare(list1, list2) && compare(list2, list1); - } + return true; +} + +static inline bool areSame(const ParameterList& list1, const ParameterList& list2) { + return compare(list1, list2) && compare(list2, list1); +} -} // namespace MueLu +} // namespace MueLu #define MUELU_PARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_PARAMETERLISTINTERPRETER_DEF_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp b/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp index 051a2dfd622e..aa120ca51725 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp @@ -49,137 +49,133 @@ namespace MueLu { - /* See also: ML_Epetra::UpdateList */ - //! @brief: merge two parameter lists - //! - //! @param source [in]: parameter lists with source parameters which are to be merged in into the dest parameter list - //! @param dest [in,out]: parameter list with, e.g., default parameters which is extended by parameters from source parameter list - //! @param overWrite (bool): if true, overwrite parameters in dest with entries from source - void MergeParameterList(const Teuchos::ParameterList &source, Teuchos::ParameterList &dest, bool overWrite){ - for(Teuchos::ParameterList::ConstIterator param=source.begin(); param!=source.end(); ++param) - if (dest.isParameter(source.name(param)) == false || overWrite) - dest.setEntry(source.name(param),source.entry(param)); - } - - void CreateSublists(const Teuchos::ParameterList &List, Teuchos::ParameterList &newList) - { - using Teuchos::ParameterList; - using std::string; +/* See also: ML_Epetra::UpdateList */ +//! @brief: merge two parameter lists +//! +//! @param source [in]: parameter lists with source parameters which are to be merged in into the dest parameter list +//! @param dest [in,out]: parameter list with, e.g., default parameters which is extended by parameters from source parameter list +//! @param overWrite (bool): if true, overwrite parameters in dest with entries from source +void MergeParameterList(const Teuchos::ParameterList &source, Teuchos::ParameterList &dest, bool overWrite) { + for (Teuchos::ParameterList::ConstIterator param = source.begin(); param != source.end(); ++param) + if (dest.isParameter(source.name(param)) == false || overWrite) + dest.setEntry(source.name(param), source.entry(param)); +} + +void CreateSublists(const Teuchos::ParameterList &List, Teuchos::ParameterList &newList) { + using std::string; + using Teuchos::ParameterList; + + newList.setName(List.name()); + + // Copy general (= not level-specific) options and sublists to the new list. + // - Coarse and level-specific parameters are not copied yet. They will be moved to sublists later. + // - Already existing level-specific lists are copied to the new list but the coarse list is not copied + // yet because it has to be modified before copy (s/coarse/smoother/) + for (ParameterList::ConstIterator param = List.begin(); param != List.end(); ++param) { + const string &pname = List.name(param); + + if ((pname.find(" (level", 0) == string::npos || pname.find("smoother: list (level", 0) == 0 || pname.find("aggregation: list (level", 0) == 0) && + (pname.find("coarse: ", 0) == string::npos)) { + newList.setEntry(pname, List.entry(param)); + } + } // for + + // Copy of the sublist "coarse: list" to the new list. Change "coarse:" to "smoother:" along the way. + if (List.isSublist("coarse: list")) { + const ParameterList &coarseList = List.sublist("coarse: list"); + ParameterList &newCoarseList = newList.sublist("coarse: list"); + for (ParameterList::ConstIterator param = coarseList.begin(); param != coarseList.end(); ++param) { + const string &pname = coarseList.name(param); + + if (pname.find("coarse:", 0) == 0) { + // change "coarse: " to "smoother:" + newCoarseList.setEntry("smoother: " + pname.substr(8), coarseList.entry(param)); + } else { + newCoarseList.setEntry(pname, coarseList.entry(param)); + } + } + } // if - newList.setName(List.name()); + // Copy of level-specific parameters and coarse parameters to sublist + for (ParameterList::ConstIterator param = List.begin(); param != List.end(); ++param) { + const string &pname = List.name(param); + if (pname.find(" (level", 0) != string::npos && pname.find("smoother: list (level", 0) != 0 && pname.find("aggregation: list (level", 0) != 0) { + // Copy level-specific parameters (smoother and aggregation) - // Copy general (= not level-specific) options and sublists to the new list. - // - Coarse and level-specific parameters are not copied yet. They will be moved to sublists later. - // - Already existing level-specific lists are copied to the new list but the coarse list is not copied - // yet because it has to be modified before copy (s/coarse/smoother/) - for (ParameterList::ConstIterator param=List.begin(); param!=List.end(); ++param) + // Scan pname (ex: pname="smoother: type (level 2)") + string type, option; + int levelID = -1; { - const string & pname=List.name(param); - - if ((pname.find(" (level",0) == string::npos || pname.find("smoother: list (level",0) == 0 || pname.find("aggregation: list (level",0) == 0) && - (pname.find("coarse: ",0) == string::npos)) - { - newList.setEntry(pname,List.entry(param)); - } - } // for - - // Copy of the sublist "coarse: list" to the new list. Change "coarse:" to "smoother:" along the way. - if (List.isSublist("coarse: list")) { - const ParameterList &coarseList = List.sublist("coarse: list"); - ParameterList &newCoarseList = newList.sublist("coarse: list"); - for (ParameterList::ConstIterator param=coarseList.begin(); param!=coarseList.end() ; ++param) { - const string & pname=coarseList.name(param); - - if (pname.find("coarse:",0) == 0) { - // change "coarse: " to "smoother:" - newCoarseList.setEntry("smoother: "+pname.substr(8),coarseList.entry(param)); - } else { - newCoarseList.setEntry(pname,coarseList.entry(param)); + typedef Teuchos::ArrayRCP::size_type size_type; // (!) + Teuchos::Array ctype(size_type(pname.size() + 1)); + Teuchos::Array coption(size_type(pname.size() + 1)); + + int matched = sscanf(pname.c_str(), "%s %[^(](level %d)", ctype.getRawPtr(), coption.getRawPtr(), &levelID); // use [^(] instead of %s to allow for strings with white-spaces (ex: "ifpack list") + type = string(ctype.getRawPtr()); + option = string(coption.getRawPtr()); + option.resize(option.size() - 1); // remove final white-space + + if (matched != 3 || (type != "smoother:" && type != "aggregation:")) { + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::CreateSublist(), Line " << __LINE__ << ". " + << "Error in creating level-specific sublists" << std::endl + << "Offending parameter: " << pname << std::endl); } } - } // if - // Copy of level-specific parameters and coarse parameters to sublist - for (ParameterList::ConstIterator param=List.begin(); param!=List.end(); ++param) - { - const string & pname=List.name(param); - if (pname.find(" (level",0) != string::npos && pname.find("smoother: list (level",0) != 0 && pname.find("aggregation: list (level",0) != 0) - { - // Copy level-specific parameters (smoother and aggregation) - - // Scan pname (ex: pname="smoother: type (level 2)") - string type, option; - int levelID=-1; - { - typedef Teuchos::ArrayRCP::size_type size_type; // (!) - Teuchos::Array ctype (size_type(pname.size()+1)); - Teuchos::Array coption(size_type(pname.size()+1)); - - int matched = sscanf(pname.c_str(),"%s %[^(](level %d)", ctype.getRawPtr(), coption.getRawPtr(), &levelID); // use [^(] instead of %s to allow for strings with white-spaces (ex: "ifpack list") - type = string(ctype.getRawPtr()); - option = string(coption.getRawPtr()); option.resize(option.size () - 1); // remove final white-space - - if (matched != 3 || (type != "smoother:" && type != "aggregation:")) { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::CreateSublist(), Line " << __LINE__ << ". " - << "Error in creating level-specific sublists" << std::endl - << "Offending parameter: " << pname << std::endl); - } - } - - // Create/grab the corresponding sublist of newList - ParameterList &newSubList = newList.sublist(type + " list (level " + Teuchos::toString(levelID) + ")"); - // Shove option w/o level number into sublist - newSubList.setEntry(type + " " + option,List.entry(param)); - - } else if (pname.find("coarse:",0) == 0 && pname != "coarse: list") { - // Copy coarse parameters - ParameterList &newCoarseList = newList.sublist("coarse: list"); // the coarse sublist is created only if there is at least one "coarse:" parameter - newCoarseList.setEntry("smoother: "+pname.substr(8),List.entry(param)); // change "coarse: " to "smoother:" - } // end if - - } // for - - } //MueLu::CreateSublist() - - // Usage: GetMLSubList(paramList, "smoother", 2); - const Teuchos::ParameterList & GetMLSubList(const Teuchos::ParameterList & paramList, const std::string & type, int levelID) { - static const Teuchos::ParameterList emptyParamList; - - char levelChar[11]; - sprintf(levelChar, "(level %d)", levelID); - std::string levelStr(levelChar); - - if (paramList.isSublist(type + ": list " + levelStr)) { - return paramList.sublist(type + ": list " + levelStr); - } else { - return emptyParamList; - } - } + // Create/grab the corresponding sublist of newList + ParameterList &newSubList = newList.sublist(type + " list (level " + Teuchos::toString(levelID) + ")"); + // Shove option w/o level number into sublist + newSubList.setEntry(type + " " + option, List.entry(param)); - // Extract all the parameters that begin with "str:" (but skip sublist) - Teuchos::RCP ExtractSetOfParameters(const Teuchos::ParameterList & paramList, const std::string & str) { - Teuchos::RCP subList = rcp(new Teuchos::ParameterList()); + } else if (pname.find("coarse:", 0) == 0 && pname != "coarse: list") { + // Copy coarse parameters + ParameterList &newCoarseList = newList.sublist("coarse: list"); // the coarse sublist is created only if there is at least one "coarse:" parameter + newCoarseList.setEntry("smoother: " + pname.substr(8), List.entry(param)); // change "coarse: " to "smoother:" + } // end if - for (Teuchos::ParameterList::ConstIterator param = paramList.begin(); param != paramList.end(); ++param) { - const Teuchos::ParameterEntry & entry = paramList.entry(param); - const std::string & pname = paramList.name(param); - if (pname.find(str+":",0) == 0 && !entry.isList()) { - subList->setEntry(pname,entry); - } - } + } // for + +} //MueLu::CreateSublist() + +// Usage: GetMLSubList(paramList, "smoother", 2); +const Teuchos::ParameterList &GetMLSubList(const Teuchos::ParameterList ¶mList, const std::string &type, int levelID) { + static const Teuchos::ParameterList emptyParamList; - return subList; + char levelChar[11]; + sprintf(levelChar, "(level %d)", levelID); + std::string levelStr(levelChar); + + if (paramList.isSublist(type + ": list " + levelStr)) { + return paramList.sublist(type + ": list " + levelStr); + } else { + return emptyParamList; } +} + +// Extract all the parameters that begin with "str:" (but skip sublist) +Teuchos::RCP ExtractSetOfParameters(const Teuchos::ParameterList ¶mList, const std::string &str) { + Teuchos::RCP subList = rcp(new Teuchos::ParameterList()); - // replace all string occurrences "from" with "to" in "str" - void replaceAll(std::string& str, const std::string& from, const std::string& to) { - if(from.empty()) - return; - size_t start_pos = 0; - while((start_pos = str.find(from, start_pos)) != std::string::npos) { - str.replace(start_pos, from.length(), to); - start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx' + for (Teuchos::ParameterList::ConstIterator param = paramList.begin(); param != paramList.end(); ++param) { + const Teuchos::ParameterEntry &entry = paramList.entry(param); + const std::string &pname = paramList.name(param); + if (pname.find(str + ":", 0) == 0 && !entry.isList()) { + subList->setEntry(pname, entry); } } -} // namespace MueLu + return subList; +} + +// replace all string occurrences "from" with "to" in "str" +void replaceAll(std::string &str, const std::string &from, const std::string &to) { + if (from.empty()) + return; + size_t start_pos = 0; + while ((start_pos = str.find(from, start_pos)) != std::string::npos) { + str.replace(start_pos, from.length(), to); + start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx' + } +} + +} // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp b/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp index 9d55b472ad02..5a0f508c43df 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp @@ -56,47 +56,46 @@ namespace MueLu { - /* See also: ML_Epetra::UpdateList */ - void MergeParameterList(const Teuchos::ParameterList &source, Teuchos::ParameterList &dest, bool overWrite); +/* See also: ML_Epetra::UpdateList */ +void MergeParameterList(const Teuchos::ParameterList& source, Teuchos::ParameterList& dest, bool overWrite); - void CreateSublists(const Teuchos::ParameterList &List, Teuchos::ParameterList &newList); +void CreateSublists(const Teuchos::ParameterList& List, Teuchos::ParameterList& newList); - // Usage: GetMLSubList(paramList, "smoother", 2); - const Teuchos::ParameterList & GetMLSubList(const Teuchos::ParameterList & paramList, const std::string & type, int levelID); +// Usage: GetMLSubList(paramList, "smoother", 2); +const Teuchos::ParameterList& GetMLSubList(const Teuchos::ParameterList& paramList, const std::string& type, int levelID); - // Extract all the parameters that begin with "str:" (but skip sublist) - Teuchos::RCP ExtractSetOfParameters(const Teuchos::ParameterList & paramList, const std::string & str); +// Extract all the parameters that begin with "str:" (but skip sublist) +Teuchos::RCP ExtractSetOfParameters(const Teuchos::ParameterList& paramList, const std::string& str); - //! replace all string occurrences "from" with "to" in "str" - //! - //! @param str: input and output string - //! @param from: search string - //! @param to: replace with "to" - void replaceAll(std::string& str, const std::string& from, const std::string& to); +//! replace all string occurrences "from" with "to" in "str" +//! +//! @param str: input and output string +//! @param from: search string +//! @param to: replace with "to" +void replaceAll(std::string& str, const std::string& from, const std::string& to); - //! templated version to replace placeholder by data in "str" - template - bool replacePlaceholder(std::string& str, const std::string& placeholder, Type data) { - std::stringstream s; - s << data; - replaceAll(str, placeholder, s.str()); - return true; - } +//! templated version to replace placeholder by data in "str" +template +bool replacePlaceholder(std::string& str, const std::string& placeholder, Type data) { + std::stringstream s; + s << data; + replaceAll(str, placeholder, s.str()); + return true; +} - template - bool actionInterpretParameter(Teuchos::ParameterList& mlParams, const std::string& paramName, std::string& str) { +template +bool actionInterpretParameter(Teuchos::ParameterList& mlParams, const std::string& paramName, std::string& str) { + //MUELU_READ_PARAM(mlParams, paramName, int, 0, data); - //MUELU_READ_PARAM(mlParams, paramName, int, 0, data); + Type varName; // = defaultValue; // extract from master list + if (mlParams.isParameter(paramName)) varName = mlParams.get(paramName); - Type varName; // = defaultValue; // extract from master list - if (mlParams.isParameter(paramName)) varName = mlParams.get(paramName); + std::stringstream placeholder; + placeholder << "$" << paramName << "$"; - std::stringstream placeholder; - placeholder << "$" << paramName << "$"; + return MueLu::replacePlaceholder(str, placeholder.str(), varName); +} - return MueLu::replacePlaceholder(str, placeholder.str(), varName); - } +} // namespace MueLu -} // namespace MueLu - -#endif // MUELU_PARAMETERLISTUTILS_HPP +#endif // MUELU_PARAMETERLISTUTILS_HPP diff --git a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp index a41ee8b9ae26..307ff50b0099 100644 --- a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp @@ -57,13 +57,12 @@ #include #include - #include "MueLu_Aggregates_fwd.hpp" #include "MueLu_Level_fwd.hpp" namespace MueLu { - /*! +/*! @class AggregateQualityEstimateFactory class. @brief An factory which assigns each aggregate a quality estimate. Originally developed by Napov and Notay in the @@ -78,82 +77,77 @@ namespace MueLu { computing, 34(2), A1079-A1109. */ - template - class AggregateQualityEstimateFactory : public SingleLevelFactoryBase { +template +class AggregateQualityEstimateFactory : public SingleLevelFactoryBase { #undef MUELU_AGGREGATEQUALITYESTIMATEFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - typedef typename Teuchos::ScalarTraits::magnitudeType magnitudeType; + typedef typename Teuchos::ScalarTraits::magnitudeType magnitudeType; - //! Constructor. - AggregateQualityEstimateFactory(); + //! Constructor. + AggregateQualityEstimateFactory(); - //! Destructor. - virtual ~AggregateQualityEstimateFactory(); + //! Destructor. + virtual ~AggregateQualityEstimateFactory(); - //@} + //@} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class will fall back to the settings in FactoryManager. */ - void DeclareInput(Level ¤tLevel) const; - - //@} - - //! @name Build methods. - //@{ - - //! Build aggregate quality esimates with this factory. - void Build(Level & currentLevel) const; - - //@} + void DeclareInput(Level& currentLevel) const; - //! @name Utility method to convert aggregate data to a convenient format. - //@{ + //@} - //! Build aggregate quality esimates with this factory. - static void ConvertAggregatesData(RCP aggs, ArrayRCP& aggSortedVertices, ArrayRCP& aggsToIndices, ArrayRCP& aggSizes); + //! @name Build methods. + //@{ - //@} + //! Build aggregate quality esimates with this factory. + void Build(Level& currentLevel) const; - private: + //@} - //! @name Internal method for computing aggregate quality. - //@{ + //! @name Utility method to convert aggregate data to a convenient format. + //@{ - void ComputeAggregateQualities(RCP A, RCP aggs, RCP> agg_qualities) const; + //! Build aggregate quality esimates with this factory. + static void ConvertAggregatesData(RCP aggs, ArrayRCP& aggSortedVertices, ArrayRCP& aggsToIndices, ArrayRCP& aggSizes); - void ComputeAggregateSizes(RCP A, RCP aggs, RCP agg_sizes) const; + //@} - //@} + private: + //! @name Internal method for computing aggregate quality. + //@{ - //! @name Internal method for outputting aggregate quality - //@{ + void ComputeAggregateQualities(RCP A, RCP aggs, RCP> agg_qualities) const; - void OutputAggQualities(const Level& level, RCP> agg_qualities) const; + void ComputeAggregateSizes(RCP A, RCP aggs, RCP agg_sizes) const; - - void OutputAggSizes(const Level& level, RCP agg_sizes) const; + //@} + //! @name Internal method for outputting aggregate quality + //@{ + void OutputAggQualities(const Level& level, RCP> agg_qualities) const; - //@} + void OutputAggSizes(const Level& level, RCP agg_sizes) const; + //@} - }; // class AggregateQualityEsimateFactory(); +}; // class AggregateQualityEsimateFactory(); -} // namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATEQUALITYESTIMATEFACTORY_SHORT -#endif // MUELU_DEMOFACTORY_DECL_HPP +#endif // MUELU_DEMOFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp index 44b760ea9ee0..e06ce44c82e8 100644 --- a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp @@ -63,481 +63,448 @@ namespace MueLu { - template - AggregateQualityEstimateFactory::AggregateQualityEstimateFactory() - { } - - template - AggregateQualityEstimateFactory::~AggregateQualityEstimateFactory() {} - - template - void AggregateQualityEstimateFactory::DeclareInput(Level& currentLevel) const { +template +AggregateQualityEstimateFactory::AggregateQualityEstimateFactory() {} - Input(currentLevel, "A"); - Input(currentLevel, "Aggregates"); - Input(currentLevel, "CoarseMap"); +template +AggregateQualityEstimateFactory::~AggregateQualityEstimateFactory() {} - } +template +void AggregateQualityEstimateFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "Aggregates"); + Input(currentLevel, "CoarseMap"); +} - template - RCP AggregateQualityEstimateFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP AggregateQualityEstimateFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregate qualities: good aggregate threshold"); - SET_VALID_ENTRY("aggregate qualities: file output"); - SET_VALID_ENTRY("aggregate qualities: file base"); - SET_VALID_ENTRY("aggregate qualities: check symmetry"); - SET_VALID_ENTRY("aggregate qualities: algorithm"); - SET_VALID_ENTRY("aggregate qualities: zero threshold"); - SET_VALID_ENTRY("aggregate qualities: percentiles"); - SET_VALID_ENTRY("aggregate qualities: mode"); - -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("Aggregates", Teuchos::null, "Generating factory of the aggregates"); - validParamList->set< RCP >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - - return validParamList; - } + SET_VALID_ENTRY("aggregate qualities: good aggregate threshold"); + SET_VALID_ENTRY("aggregate qualities: file output"); + SET_VALID_ENTRY("aggregate qualities: file base"); + SET_VALID_ENTRY("aggregate qualities: check symmetry"); + SET_VALID_ENTRY("aggregate qualities: algorithm"); + SET_VALID_ENTRY("aggregate qualities: zero threshold"); + SET_VALID_ENTRY("aggregate qualities: percentiles"); + SET_VALID_ENTRY("aggregate qualities: mode"); + +#undef SET_VALID_ENTRY + + validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>("Aggregates", Teuchos::null, "Generating factory of the aggregates"); + validParamList->set>("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + + return validParamList; +} +template +void AggregateQualityEstimateFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); - template - void AggregateQualityEstimateFactory::Build(Level & currentLevel) const { + RCP A = Get>(currentLevel, "A"); + RCP aggregates = Get>(currentLevel, "Aggregates"); - FactoryMonitor m(*this, "Build", currentLevel); + RCP map = Get>(currentLevel, "CoarseMap"); - RCP A = Get>(currentLevel, "A"); - RCP aggregates = Get>(currentLevel, "Aggregates"); + assert(!aggregates->AggregatesCrossProcessors()); + ParameterList pL = GetParameterList(); + std::string mode = pL.get("aggregate qualities: mode"); + GetOStream(Statistics1) << "AggregateQuality: mode " << mode << std::endl; - RCP map = Get< RCP >(currentLevel, "CoarseMap"); + RCP> aggregate_qualities; + if (mode == "eigenvalue" || mode == "both") { + aggregate_qualities = Xpetra::MultiVectorFactory::Build(map, 1); + ComputeAggregateQualities(A, aggregates, aggregate_qualities); + OutputAggQualities(currentLevel, aggregate_qualities); + } + if (mode == "size" || mode == "both") { + RCP aggregate_sizes = Xpetra::VectorFactory::Build(map); + ComputeAggregateSizes(A, aggregates, aggregate_sizes); + Set(currentLevel, "AggregateSizes", aggregate_sizes); + OutputAggSizes(currentLevel, aggregate_sizes); + } + Set(currentLevel, "AggregateQualities", aggregate_qualities); +} +template +void AggregateQualityEstimateFactory::ConvertAggregatesData(RCP aggs, ArrayRCP& aggSortedVertices, ArrayRCP& aggsToIndices, ArrayRCP& aggSizes) { + // Reorder local aggregate information into a format amenable to computing + // per-aggregate quantities. Specifically, we compute a format + // similar to compressed sparse row format for sparse matrices in which + // we store all the local vertices in a single array in blocks corresponding + // to aggregates. (This array is aggSortedVertices.) We then store a second + // array (aggsToIndices) whose k-th element stores the index of the first + // vertex in aggregate k in the array aggSortedVertices. - assert(!aggregates->AggregatesCrossProcessors()); - ParameterList pL = GetParameterList(); - std::string mode = pL.get("aggregate qualities: mode"); - GetOStream(Statistics1) << "AggregateQuality: mode "<::zero(); + const LO LO_ONE = Teuchos::OrdinalTraits::one(); - RCP> aggregate_qualities; - if(mode == "eigenvalue" || mode == "both") { - aggregate_qualities = Xpetra::MultiVectorFactory::Build(map, 1); - ComputeAggregateQualities(A, aggregates, aggregate_qualities); - OutputAggQualities(currentLevel, aggregate_qualities); - } - if(mode == "size" || mode =="both") { - RCP aggregate_sizes = Xpetra::VectorFactory::Build(map); - ComputeAggregateSizes(A,aggregates,aggregate_sizes); - Set(currentLevel, "AggregateSizes",aggregate_sizes); - OutputAggSizes(currentLevel, aggregate_sizes); - } - Set(currentLevel, "AggregateQualities", aggregate_qualities); + LO numAggs = aggs->GetNumAggregates(); + aggSizes = aggs->ComputeAggregateSizesArrayRCP(); + aggsToIndices = ArrayRCP(numAggs + LO_ONE, LO_ZERO); + for (LO i = 0; i < numAggs; ++i) { + aggsToIndices[i + LO_ONE] = aggsToIndices[i] + aggSizes[i]; } - template - void AggregateQualityEstimateFactory::ConvertAggregatesData(RCP aggs, ArrayRCP& aggSortedVertices, ArrayRCP& aggsToIndices, ArrayRCP& aggSizes) { + const RCP vertex2AggId = aggs->GetVertex2AggId(); + const ArrayRCP vertex2AggIdData = vertex2AggId->getData(0); - // Reorder local aggregate information into a format amenable to computing - // per-aggregate quantities. Specifically, we compute a format - // similar to compressed sparse row format for sparse matrices in which - // we store all the local vertices in a single array in blocks corresponding - // to aggregates. (This array is aggSortedVertices.) We then store a second - // array (aggsToIndices) whose k-th element stores the index of the first - // vertex in aggregate k in the array aggSortedVertices. + LO numNodes = vertex2AggId->getLocalLength(); + aggSortedVertices = ArrayRCP(numNodes, -LO_ONE); + std::vector vertexInsertionIndexByAgg(numNodes, LO_ZERO); - const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); - const LO LO_ONE = Teuchos::OrdinalTraits::one(); + for (LO i = 0; i < numNodes; ++i) { + LO aggId = vertex2AggIdData[i]; + if (aggId < 0 || aggId >= numAggs) continue; - LO numAggs = aggs->GetNumAggregates(); - aggSizes = aggs->ComputeAggregateSizesArrayRCP(); + aggSortedVertices[aggsToIndices[aggId] + vertexInsertionIndexByAgg[aggId]] = i; + vertexInsertionIndexByAgg[aggId]++; + } +} - aggsToIndices = ArrayRCP(numAggs+LO_ONE,LO_ZERO); +template +void AggregateQualityEstimateFactory::ComputeAggregateQualities(RCP A, RCP aggs, RCP> agg_qualities) const { + const SC SCALAR_ONE = Teuchos::ScalarTraits::one(); + const SC SCALAR_TWO = SCALAR_ONE + SCALAR_ONE; + + const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); + const LO LO_ONE = Teuchos::OrdinalTraits::one(); + + using MT = magnitudeType; + const MT MT_ZERO = Teuchos::ScalarTraits::zero(); + const MT MT_ONE = Teuchos::ScalarTraits::one(); + ParameterList pL = GetParameterList(); + + RCP AT = A; + + // Algorithm check + std::string algostr = pL.get("aggregate qualities: algorithm"); + MT zeroThreshold = Teuchos::as(pL.get("aggregate qualities: zero threshold")); + enum AggAlgo { ALG_FORWARD = 0, + ALG_REVERSE }; + AggAlgo algo; + if (algostr == "forward") { + algo = ALG_FORWARD; + GetOStream(Statistics1) << "AggregateQuality: Using 'forward' algorithm" << std::endl; + } else if (algostr == "reverse") { + algo = ALG_REVERSE; + GetOStream(Statistics1) << "AggregateQuality: Using 'reverse' algorithm" << std::endl; + } else { + TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "\"algorithm\" must be one of (forward|reverse)"); + } - for (LO i=0;i("aggregate qualities: check symmetry"); + if (check_symmetry) { + RCP x = MultiVectorFactory::Build(A->getMap(), 1, false); + x->Xpetra_randomize(); - const RCP vertex2AggId = aggs->GetVertex2AggId(); - const ArrayRCP vertex2AggIdData = vertex2AggId->getData(0); + RCP tmp = MultiVectorFactory::Build(A->getMap(), 1, false); - LO numNodes = vertex2AggId->getLocalLength(); - aggSortedVertices = ArrayRCP(numNodes,-LO_ONE); - std::vector vertexInsertionIndexByAgg(numNodes,LO_ZERO); + A->apply(*x, *tmp, Teuchos::NO_TRANS); // tmp now stores A*x + A->apply(*x, *tmp, Teuchos::TRANS, -SCALAR_ONE, SCALAR_ONE); // tmp now stores A*x - A^T*x - for (LO i=0;i tmp_norm(1); + tmp->norm2(tmp_norm()); - LO aggId = vertex2AggIdData[i]; - if (aggId<0 || aggId>=numAggs) continue; + Array x_norm(1); + tmp->norm2(x_norm()); - aggSortedVertices[aggsToIndices[aggId]+vertexInsertionIndexByAgg[aggId]] = i; - vertexInsertionIndexByAgg[aggId]++; + if (tmp_norm[0] > 1e-10 * x_norm[0]) { + std::string transpose_string = "transpose"; + RCP whatever; + AT = Utilities::Transpose(*rcp_const_cast(A), true, transpose_string, whatever); + assert(A->getMap()->isSameAs(*(AT->getMap()))); } - - } - template - void AggregateQualityEstimateFactory::ComputeAggregateQualities(RCP A, RCP aggs, RCP> agg_qualities) const { + // Reorder local aggregate information into a format amenable to computing + // per-aggregate quantities. Specifically, we compute a format + // similar to compressed sparse row format for sparse matrices in which + // we store all the local vertices in a single array in blocks corresponding + // to aggregates. (This array is aggSortedVertices.) We then store a second + // array (aggsToIndices) whose k-th element stores the index of the first + // vertex in aggregate k in the array aggSortedVertices. - const SC SCALAR_ONE = Teuchos::ScalarTraits::one(); - const SC SCALAR_TWO = SCALAR_ONE + SCALAR_ONE; + ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; + ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); - const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); - const LO LO_ONE = Teuchos::OrdinalTraits::one(); + LO numAggs = aggs->GetNumAggregates(); - using MT = magnitudeType; - const MT MT_ZERO = Teuchos::ScalarTraits::zero(); - const MT MT_ONE = Teuchos::ScalarTraits::one(); - ParameterList pL = GetParameterList(); + // Compute the per-aggregate quality estimate - RCP AT = A; + typedef Teuchos::SerialDenseMatrix DenseMatrix; + typedef Teuchos::SerialDenseVector DenseVector; - // Algorithm check - std::string algostr = pL.get("aggregate qualities: algorithm"); - MT zeroThreshold = Teuchos::as(pL.get("aggregate qualities: zero threshold")); - enum AggAlgo {ALG_FORWARD=0, ALG_REVERSE}; - AggAlgo algo; - if(algostr == "forward") {algo = ALG_FORWARD; GetOStream(Statistics1) << "AggregateQuality: Using 'forward' algorithm" << std::endl;} - else if(algostr == "reverse") {algo = ALG_REVERSE; GetOStream(Statistics1) << "AggregateQuality: Using 'reverse' algorithm" << std::endl;} - else { - TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "\"algorithm\" must be one of (forward|reverse)"); - } + ArrayView rowIndices; + ArrayView rowValues; + ArrayView colValues; + Teuchos::LAPACK myLapack; - bool check_symmetry = pL.get("aggregate qualities: check symmetry"); - if (check_symmetry) { + // Iterate over each aggregate to compute the quality estimate + for (LO aggId = LO_ZERO; aggId < numAggs; ++aggId) { + LO aggSize = aggSizes[aggId]; + DenseMatrix A_aggPart(aggSize, aggSize, true); + DenseVector offDiagonalAbsoluteSums(aggSize, true); - RCP x = MultiVectorFactory::Build(A->getMap(), 1, false); - x->Xpetra_randomize(); - - RCP tmp = MultiVectorFactory::Build(A->getMap(), 1, false); - - A->apply(*x, *tmp, Teuchos::NO_TRANS); // tmp now stores A*x - A->apply(*x, *tmp, Teuchos::TRANS, -SCALAR_ONE, SCALAR_ONE); // tmp now stores A*x - A^T*x + // Iterate over each node in the aggregate + for (LO idx = LO_ZERO; idx < aggSize; ++idx) { + LO nodeId = aggSortedVertices[idx + aggsToIndices[aggId]]; + A->getLocalRowView(nodeId, rowIndices, rowValues); + AT->getLocalRowView(nodeId, rowIndices, colValues); + + // Iterate over each element in the row corresponding to the current node + for (LO elem = LO_ZERO; elem < rowIndices.size(); ++elem) { + LO nodeId2 = rowIndices[elem]; + SC val = (rowValues[elem] + colValues[elem]) / SCALAR_TWO; + + LO idxInAgg = -LO_ONE; // -1 if element is not in aggregate + + // Check whether the element belongs in the aggregate. If it does + // find, its index. Otherwise, add it's value to the off diagonal + // sums + for (LO idx2 = LO_ZERO; idx2 < aggSize; ++idx2) { + if (aggSortedVertices[idx2 + aggsToIndices[aggId]] == nodeId2) { + // Element does belong to aggregate + idxInAgg = idx2; + break; + } + } - Array tmp_norm(1); - tmp->norm2(tmp_norm()); + if (idxInAgg == -LO_ONE) { // Element does not belong to aggregate - Array x_norm(1); - tmp->norm2(x_norm()); + offDiagonalAbsoluteSums[idx] += Teuchos::ScalarTraits::magnitude(val); - if (tmp_norm[0] > 1e-10*x_norm[0]) { - std::string transpose_string = "transpose"; - RCP whatever; - AT = Utilities::Transpose(*rcp_const_cast(A), true, transpose_string, whatever); + } else { // Element does belong to aggregate - assert(A->getMap()->isSameAs( *(AT->getMap()) )); + A_aggPart(idx, idxInAgg) = Teuchos::ScalarTraits::real(val); + } } - } - // Reorder local aggregate information into a format amenable to computing - // per-aggregate quantities. Specifically, we compute a format - // similar to compressed sparse row format for sparse matrices in which - // we store all the local vertices in a single array in blocks corresponding - // to aggregates. (This array is aggSortedVertices.) We then store a second - // array (aggsToIndices) whose k-th element stores the index of the first - // vertex in aggregate k in the array aggSortedVertices. - - ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; - ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); - - LO numAggs = aggs->GetNumAggregates(); - - // Compute the per-aggregate quality estimate - - typedef Teuchos::SerialDenseMatrix DenseMatrix; - typedef Teuchos::SerialDenseVector DenseVector; - - ArrayView rowIndices; - ArrayView rowValues; - ArrayView colValues; - Teuchos::LAPACK myLapack; - - // Iterate over each aggregate to compute the quality estimate - for (LO aggId=LO_ZERO; aggIdgetLocalRowView(nodeId, rowIndices, rowValues); - AT->getLocalRowView(nodeId, rowIndices, colValues); - - // Iterate over each element in the row corresponding to the current node - for (LO elem=LO_ZERO; elem::real(A_aggPart(i, i)); + diag_sum += Teuchos::ScalarTraits::real(A_aggPart(i, i)); + } - if (idxInAgg == -LO_ONE) { // Element does not belong to aggregate + DenseMatrix ones(aggSize, aggSize, false); + ones.putScalar(MT_ONE); - offDiagonalAbsoluteSums[idx] += Teuchos::ScalarTraits::magnitude(val); + // Compute matrix on top of generalized Rayleigh quotient + // topMatrix = A_aggPartDiagonal - A_aggPartDiagonal*ones*A_aggPartDiagonal/diag_sum; + DenseMatrix tmp(aggSize, aggSize, false); + DenseMatrix topMatrix(A_aggPartDiagonal); - } else { // Element does belong to aggregate + tmp.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, MT_ONE, ones, A_aggPartDiagonal, MT_ZERO); + topMatrix.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -MT_ONE / diag_sum, A_aggPartDiagonal, tmp, MT_ONE); - A_aggPart(idx,idxInAgg) = Teuchos::ScalarTraits::real(val); + // Compute matrix on bottom of generalized Rayleigh quotient + DenseMatrix bottomMatrix(A_aggPart); + MT matrixNorm = A_aggPart.normInf(); - } + // Forward mode: Include a small perturbation to the bottom matrix to make it nonsingular + const MT boost = (algo == ALG_FORWARD) ? (-1e4 * Teuchos::ScalarTraits::eps() * matrixNorm) : MT_ZERO; - } + for (int i = 0; i < aggSize; ++i) { + bottomMatrix(i, i) -= offDiagonalAbsoluteSums(i) + boost; + } + // Compute generalized eigenvalues + LO sdim, info; + DenseVector alpha_real(aggSize, false); + DenseVector alpha_imag(aggSize, false); + DenseVector beta(aggSize, false); + + DenseVector workArray(14 * (aggSize + 1), false); + + LO(*ptr2func) + (MT*, MT*, MT*); + ptr2func = NULL; + LO* bwork = NULL; + MT* vl = NULL; + MT* vr = NULL; + + const char compute_flag = 'N'; + if (algo == ALG_FORWARD) { + // Forward: Solve the generalized eigenvalue problem as is + myLapack.GGES(compute_flag, compute_flag, compute_flag, ptr2func, aggSize, + topMatrix.values(), aggSize, bottomMatrix.values(), aggSize, &sdim, + alpha_real.values(), alpha_imag.values(), beta.values(), vl, aggSize, + vr, aggSize, workArray.values(), workArray.length(), bwork, + &info); + TEUCHOS_ASSERT(info == LO_ZERO); + + MT maxEigenVal = MT_ZERO; + for (int i = LO_ZERO; i < aggSize; ++i) { + // NOTE: In theory, the eigenvalues should be nearly real + //TEUCHOS_ASSERT(fabs(alpha_imag[i]) <= 1e-8*fabs(alpha_real[i])); // Eigenvalues should be nearly real + maxEigenVal = std::max(maxEigenVal, alpha_real[i] / beta[i]); } - // Construct a diagonal matrix consisting of the diagonal - // of A_aggPart - DenseMatrix A_aggPartDiagonal(aggSize, aggSize, true); - MT diag_sum = MT_ZERO; - for (int i=0;i::real(A_aggPart(i,i)); - diag_sum += Teuchos::ScalarTraits::real(A_aggPart(i,i)); + (agg_qualities->getDataNonConst(0))[aggId] = (MT_ONE + MT_ONE) * maxEigenVal; + } else { + // Reverse: Swap the top and bottom matrices for the generalized eigenvalue problem + // This is trickier, since we need to grab the smallest non-zero eigenvalue and invert it. + myLapack.GGES(compute_flag, compute_flag, compute_flag, ptr2func, aggSize, + bottomMatrix.values(), aggSize, topMatrix.values(), aggSize, &sdim, + alpha_real.values(), alpha_imag.values(), beta.values(), vl, aggSize, + vr, aggSize, workArray.values(), workArray.length(), bwork, + &info); + + TEUCHOS_ASSERT(info == LO_ZERO); + + MT minEigenVal = MT_ZERO; + + for (int i = LO_ZERO; i < aggSize; ++i) { + MT ev = alpha_real[i] / beta[i]; + if (ev > zeroThreshold) { + if (minEigenVal == MT_ZERO) + minEigenVal = ev; + else + minEigenVal = std::min(minEigenVal, ev); + } } + if (minEigenVal == MT_ZERO) + (agg_qualities->getDataNonConst(0))[aggId] = Teuchos::ScalarTraits::rmax(); + else + (agg_qualities->getDataNonConst(0))[aggId] = (MT_ONE + MT_ONE) / minEigenVal; + } + } //end aggId loop +} - DenseMatrix ones(aggSize, aggSize, false); - ones.putScalar(MT_ONE); - - // Compute matrix on top of generalized Rayleigh quotient - // topMatrix = A_aggPartDiagonal - A_aggPartDiagonal*ones*A_aggPartDiagonal/diag_sum; - DenseMatrix tmp(aggSize, aggSize, false); - DenseMatrix topMatrix(A_aggPartDiagonal); +template +void AggregateQualityEstimateFactory::OutputAggQualities(const Level& level, RCP> agg_qualities) const { + ParameterList pL = GetParameterList(); - tmp.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, MT_ONE, ones, A_aggPartDiagonal, MT_ZERO); - topMatrix.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -MT_ONE/diag_sum, A_aggPartDiagonal, tmp, MT_ONE); + magnitudeType good_agg_thresh = Teuchos::as(pL.get("aggregate qualities: good aggregate threshold")); + using MT = magnitudeType; - // Compute matrix on bottom of generalized Rayleigh quotient - DenseMatrix bottomMatrix(A_aggPart); - MT matrixNorm = A_aggPart.normInf(); + ArrayRCP data = agg_qualities->getData(0); - // Forward mode: Include a small perturbation to the bottom matrix to make it nonsingular - const MT boost = (algo == ALG_FORWARD) ? (-1e4*Teuchos::ScalarTraits::eps()*matrixNorm) : MT_ZERO; + LO num_bad_aggs = 0; + MT worst_agg = 0.0; - for (int i=0;igetDataNonConst(0))[aggId] = (MT_ONE+MT_ONE)*maxEigenVal; - } - else { - // Reverse: Swap the top and bottom matrices for the generalized eigenvalue problem - // This is trickier, since we need to grab the smallest non-zero eigenvalue and invert it. - myLapack.GGES(compute_flag,compute_flag,compute_flag,ptr2func,aggSize, - bottomMatrix.values(),aggSize,topMatrix.values(),aggSize,&sdim, - alpha_real.values(),alpha_imag.values(),beta.values(),vl,aggSize, - vr,aggSize,workArray.values(),workArray.length(),bwork, - &info); - - TEUCHOS_ASSERT(info == LO_ZERO); - - MT minEigenVal = MT_ZERO; - - for (int i=LO_ZERO;i zeroThreshold) { - if (minEigenVal == MT_ZERO) minEigenVal = ev; - else minEigenVal = std::min(minEigenVal,ev); - } - } - if(minEigenVal == MT_ZERO) (agg_qualities->getDataNonConst(0))[aggId] = Teuchos::ScalarTraits::rmax(); - else (agg_qualities->getDataNonConst(0))[aggId] = (MT_ONE+MT_ONE) / minEigenVal; - } - }//end aggId loop + for (size_t i = 0; i < agg_qualities->getLocalLength(); ++i) { + if (data[i] > good_agg_thresh) { + num_bad_aggs++; + mean_bad_agg += data[i]; + } else { + mean_good_agg += data[i]; + } + worst_agg = std::max(worst_agg, data[i]); } - template - void AggregateQualityEstimateFactory::OutputAggQualities(const Level& level, RCP> agg_qualities) const { - - ParameterList pL = GetParameterList(); + if (num_bad_aggs > 0) mean_bad_agg /= num_bad_aggs; + mean_good_agg /= agg_qualities->getLocalLength() - num_bad_aggs; - magnitudeType good_agg_thresh = Teuchos::as(pL.get("aggregate qualities: good aggregate threshold")); - using MT = magnitudeType; - - ArrayRCP data = agg_qualities->getData(0); - - LO num_bad_aggs = 0; - MT worst_agg = 0.0; + if (num_bad_aggs == 0) { + GetOStream(Statistics1) << "All aggregates passed the quality measure. Worst aggregate had quality " << worst_agg << ". Mean aggregate quality " << mean_good_agg << "." << std::endl; + } else { + GetOStream(Statistics1) << num_bad_aggs << " out of " << agg_qualities->getLocalLength() << " did not pass the quality measure. Worst aggregate had quality " << worst_agg << ". " + << "Mean bad aggregate quality " << mean_bad_agg << ". Mean good aggregate quality " << mean_good_agg << "." << std::endl; + } - MT mean_bad_agg = 0.0; - MT mean_good_agg = 0.0; + if (pL.get("aggregate qualities: file output")) { + std::string filename = pL.get("aggregate qualities: file base") + "." + std::to_string(level.GetLevelID()); + Xpetra::IO::Write(filename, *agg_qualities); + } + { + const auto n = size_t(agg_qualities->getLocalLength()); - for (size_t i=0;igetLocalLength();++i) { + std::vector tmp; + tmp.reserve(n); - if (data[i] > good_agg_thresh) { - num_bad_aggs++; - mean_bad_agg += data[i]; - } - else { - mean_good_agg += data[i]; - } - worst_agg = std::max(worst_agg, data[i]); + for (size_t i = 0; i < n; ++i) { + tmp.push_back(data[i]); } + std::sort(tmp.begin(), tmp.end()); - if (num_bad_aggs > 0) mean_bad_agg /= num_bad_aggs; - mean_good_agg /= agg_qualities->getLocalLength() - num_bad_aggs; + Teuchos::ArrayView percents = pL.get>("aggregate qualities: percentiles")(); - if (num_bad_aggs == 0) { - GetOStream(Statistics1) << "All aggregates passed the quality measure. Worst aggregate had quality " << worst_agg << ". Mean aggregate quality " << mean_good_agg << "." << std::endl; - } else { - GetOStream(Statistics1) << num_bad_aggs << " out of " << agg_qualities->getLocalLength() << " did not pass the quality measure. Worst aggregate had quality " << worst_agg << ". " - << "Mean bad aggregate quality " << mean_bad_agg << ". Mean good aggregate quality " << mean_good_agg << "." << std::endl; + GetOStream(Statistics1) << "AGG QUALITY HEADER : | LEVEL | TOTAL |"; + for (auto percent : percents) { + GetOStream(Statistics1) << std::fixed << std::setprecision(4) << 100.0 * percent << "% |"; } - - if (pL.get("aggregate qualities: file output")) { - std::string filename = pL.get("aggregate qualities: file base")+"."+std::to_string(level.GetLevelID()); - Xpetra::IO::Write(filename, *agg_qualities); - } - - { - const auto n = size_t(agg_qualities->getLocalLength()); - - std::vector tmp; - tmp.reserve(n); - - for (size_t i=0; i percents = pL.get >("aggregate qualities: percentiles")(); - - GetOStream(Statistics1) << "AGG QUALITY HEADER : | LEVEL | TOTAL |"; - for (auto percent : percents) { - GetOStream(Statistics1) << std::fixed << std::setprecision(4) <<100.0*percent << "% |"; - } - GetOStream(Statistics1) << std::endl; - - GetOStream(Statistics1) << "AGG QUALITY PERCENTILES: | " << level.GetLevelID() << " | " << n << "|"; - for (auto percent : percents) { - size_t i = size_t(n*percent); - i = i < n ? i : n-1u; - i = i > 0u ? i : 0u; - GetOStream(Statistics1) << std::fixed < 0u ? i : 0u; + GetOStream(Statistics1) << std::fixed << std::setprecision(4) << tmp[i] << " |"; } + GetOStream(Statistics1) << std::endl; } - - +} template - void AggregateQualityEstimateFactory::ComputeAggregateSizes(RCP A, RCP aggs, RCP agg_sizes) const { - - ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; - ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); - - // Iterate over each node in the aggregate - auto data = agg_sizes->getDataNonConst(0); - for (LO i=0; i<(LO)aggSizes.size(); i++) - data[i] = aggSizes[i]; +void AggregateQualityEstimateFactory::ComputeAggregateSizes(RCP A, RCP aggs, RCP agg_sizes) const { + ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; + ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); + + // Iterate over each node in the aggregate + auto data = agg_sizes->getDataNonConst(0); + for (LO i = 0; i < (LO)aggSizes.size(); i++) + data[i] = aggSizes[i]; } - - template - void AggregateQualityEstimateFactory::OutputAggSizes(const Level& level, RCP agg_sizes) const { +void AggregateQualityEstimateFactory::OutputAggSizes(const Level& level, RCP agg_sizes) const { + ParameterList pL = GetParameterList(); + using MT = magnitudeType; - ParameterList pL = GetParameterList(); - using MT = magnitudeType; + ArrayRCP data = agg_sizes->getData(0); - ArrayRCP data = agg_sizes->getData(0); - - - if (pL.get("aggregate qualities: file output")) { - std::string filename = pL.get("aggregate qualities: file base")+".sizes."+std::to_string(level.GetLevelID()); - Xpetra::IO::Write(filename, *agg_sizes); - } + if (pL.get("aggregate qualities: file output")) { + std::string filename = pL.get("aggregate qualities: file base") + ".sizes." + std::to_string(level.GetLevelID()); + Xpetra::IO::Write(filename, *agg_sizes); + } - { - size_t n = (size_t)agg_sizes->getLocalLength(); + { + size_t n = (size_t)agg_sizes->getLocalLength(); - std::vector tmp; - tmp.reserve(n); + std::vector tmp; + tmp.reserve(n); - for (size_t i=0; i(data[i])); - } + for (size_t i = 0; i < n; ++i) { + tmp.push_back(Teuchos::as(data[i])); + } - std::sort(tmp.begin(), tmp.end()); + std::sort(tmp.begin(), tmp.end()); - Teuchos::ArrayView percents = pL.get >("aggregate qualities: percentiles")(); - - GetOStream(Statistics1) << "AGG SIZE HEADER : | LEVEL | TOTAL |"; - for (auto percent : percents) { - GetOStream(Statistics1) << std::fixed << std::setprecision(4) <<100.0*percent << "% |"; - } - GetOStream(Statistics1) << std::endl; - - GetOStream(Statistics1) << "AGG SIZE PERCENTILES: | " << level.GetLevelID() << " | " << n << "|"; - for (auto percent : percents) { - size_t i = size_t(n*percent); - i = i < n ? i : n-1u; - i = i > 0u ? i : 0u; - GetOStream(Statistics1) << std::fixed < percents = pL.get>("aggregate qualities: percentiles")(); + GetOStream(Statistics1) << "AGG SIZE HEADER : | LEVEL | TOTAL |"; + for (auto percent : percents) { + GetOStream(Statistics1) << std::fixed << std::setprecision(4) << 100.0 * percent << "% |"; + } + GetOStream(Statistics1) << std::endl; + + GetOStream(Statistics1) << "AGG SIZE PERCENTILES: | " << level.GetLevelID() << " | " << n << "|"; + for (auto percent : percents) { + size_t i = size_t(n * percent); + i = i < n ? i : n - 1u; + i = i > 0u ? i : 0u; + GetOStream(Statistics1) << std::fixed << std::setprecision(4) << tmp[i] << " |"; } + GetOStream(Statistics1) << std::endl; } +} +} // namespace MueLu - -} // namespace MueLu - -#endif // MUELU_AGGREGATEQUALITYESTIMATE_DEF_HPP +#endif // MUELU_AGGREGATEQUALITYESTIMATE_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp index 06c3a5ed6917..eb8f5af4059b 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp @@ -86,19 +86,19 @@ namespace MueLu { ----------|--------------|------------ | Coordinates | BlockedCoordinatesTransferFactory | coarse level coordinates (unified) */ - template - class BlockedCoordinatesTransferFactory : public TwoLevelFactoryBase { +template +class BlockedCoordinatesTransferFactory : public TwoLevelFactoryBase { #undef MUELU_BLOCKEDCOORDINATESTRANSFERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - /*! @brief Constructor. + /*! @brief Constructor. @param vectorName The name of the quantity to be restricted. @param restrictionName The name of the restriction Matrix. @@ -106,53 +106,52 @@ namespace MueLu { The operator associated with projectionName will be applied to the MultiVector associated with vectorName. */ - BlockedCoordinatesTransferFactory() { } + BlockedCoordinatesTransferFactory() {} - //! Destructor. - virtual ~BlockedCoordinatesTransferFactory() { } + //! Destructor. + virtual ~BlockedCoordinatesTransferFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class will fall back to the settings in FactoryManager. */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - //@{ - /*! @brief Add (sub) coords factory in the end of list of factories in BlockedCoordinatesTransferFactory. + //@{ + /*! @brief Add (sub) coords factory in the end of list of factories in BlockedCoordinatesTransferFactory. */ - void AddFactory(const RCP& factory); + void AddFactory(const RCP &factory); + //! Returns number of sub factories. + size_t NumFactories() const { return subFactories_.size(); } - //! Returns number of sub factories. - size_t NumFactories() const { return subFactories_.size(); } + //@} + private: + //! list of user-defined sub Factories + std::vector > subFactories_; - //@} - private: - //! list of user-defined sub Factories - std::vector > subFactories_; +}; // class BlockedCoordinatesTransferFactory - }; // class BlockedCoordinatesTransferFactory - -} // namespace MueLu +} // namespace MueLu #define MUELU_BLOCKEDCOORDINATESTRANSFERFACTORY_SHORT -#endif // MUELU_BLOCKEDCOORDINATESTRANSFER_FACTORY_DECL_HPP +#endif // MUELU_BLOCKEDCOORDINATESTRANSFER_FACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp index e83e92696def..2c8150b4a969 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp @@ -58,110 +58,108 @@ namespace MueLu { - template - RCP BlockedCoordinatesTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set >("Coordinates", Teuchos::null, "Factory for coordinates generation"); - validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - return validParamList; +template +RCP BlockedCoordinatesTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set >("Coordinates", Teuchos::null, "Factory for coordinates generation"); + validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + return validParamList; +} + +template +void BlockedCoordinatesTransferFactory::DeclareInput(Level& /* fineLevel */, Level& coarseLevel) const { + Input(coarseLevel, "CoarseMap"); + + // Make sure the Level knows I need these sub-Factories + const size_t numSubFactories = NumFactories(); + for (size_t i = 0; i < numSubFactories; i++) { + const RCP& myFactory = subFactories_[i]; + coarseLevel.DeclareInput("Coordinates", myFactory.getRawPtr(), this); } - template - void BlockedCoordinatesTransferFactory::DeclareInput(Level& /* fineLevel */, Level& coarseLevel) const { - Input(coarseLevel, "CoarseMap"); - - // Make sure the Level knows I need these sub-Factories - const size_t numSubFactories = NumFactories(); - for(size_t i=0; i& myFactory = subFactories_[i]; - coarseLevel.DeclareInput("Coordinates", myFactory.getRawPtr(), this); - } - - // call DeclareInput of all user-given transfer factories - for (std::vector >::const_iterator it = subFactories_.begin(); it != subFactories_.end(); ++it) - (*it)->CallDeclareInput(coarseLevel); - } - - template - void BlockedCoordinatesTransferFactory::Build(Level & /* fineLevel */, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); - - typedef Xpetra::MultiVector::coordinateType,LO,GO,NO> dMV; - typedef Xpetra::BlockedMultiVector::coordinateType,LO,GO,NO> dBV; + // call DeclareInput of all user-given transfer factories + for (std::vector >::const_iterator it = subFactories_.begin(); it != subFactories_.end(); ++it) + (*it)->CallDeclareInput(coarseLevel); +} - GetOStream(Runtime0) << "Transferring (blocked) coordinates" << std::endl; +template +void BlockedCoordinatesTransferFactory::Build(Level& /* fineLevel */, Level& coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - const size_t numSubFactories = NumFactories(); - std::vector > subBlockMaps(numSubFactories); - std::vector > subBlockCoords(numSubFactories); + typedef Xpetra::MultiVector::coordinateType, LO, GO, NO> dMV; + typedef Xpetra::BlockedMultiVector::coordinateType, LO, GO, NO> dBV; - if (coarseLevel.IsAvailable("Coordinates", this)) { - GetOStream(Runtime0) << "Reusing coordinates" << std::endl; - return; - } + GetOStream(Runtime0) << "Transferring (blocked) coordinates" << std::endl; - // Get components - for(size_t i=0; i& myFactory = subFactories_[i]; - myFactory->CallBuild(coarseLevel); - subBlockCoords[i] = coarseLevel.Get >("Coordinates", myFactory.get()); - subBlockMaps[i] = subBlockCoords[i]->getMap(); - } + const size_t numSubFactories = NumFactories(); + std::vector > subBlockMaps(numSubFactories); + std::vector > subBlockCoords(numSubFactories); - // Blocked Map - RCP coarseCoordMapBlocked; + if (coarseLevel.IsAvailable("Coordinates", this)) { + GetOStream(Runtime0) << "Reusing coordinates" << std::endl; + return; + } - { - // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac - // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to - // logical blocks in the matrix - RCP coarseMap = Get< RCP >(coarseLevel, "CoarseMap"); - bool thyraMode = coarseMap->getThyraMode(); + // Get components + for (size_t i = 0; i < numSubFactories; i++) { + GetOStream(Runtime1) << "Generating Coordinates for block " << i << "/" << numSubFactories << std::endl; + const RCP& myFactory = subFactories_[i]; + myFactory->CallBuild(coarseLevel); + subBlockCoords[i] = coarseLevel.Get >("Coordinates", myFactory.get()); + subBlockMaps[i] = subBlockCoords[i]->getMap(); + } - ArrayView elementAList = coarseMap->getFullMap()->getLocalElementList(); + // Blocked Map + RCP coarseCoordMapBlocked; - LO blkSize = 1; - if (rcp_dynamic_cast(coarseMap->getMap(0, thyraMode)) != Teuchos::null) - blkSize = rcp_dynamic_cast(coarseMap->getMap(0, thyraMode))->getFixedBlockSize(); + { + // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac + // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to + // logical blocks in the matrix + RCP coarseMap = Get >(coarseLevel, "CoarseMap"); + bool thyraMode = coarseMap->getThyraMode(); - for(size_t i=1; i(coarseMap->getMap(i, thyraMode)) != Teuchos::null) - otherBlkSize = rcp_dynamic_cast(coarseMap->getMap(i, thyraMode))->getFixedBlockSize(); - TEUCHOS_TEST_FOR_EXCEPTION(otherBlkSize != blkSize, Exceptions::RuntimeError, "BlockedCoordinatesTransferFactory: Subblocks have different Block sizes. This is not yet supported."); - } + ArrayView elementAList = coarseMap->getFullMap()->getLocalElementList(); - GO indexBase = coarseMap->getFullMap()->getIndexBase(); - size_t numElements = elementAList.size() / blkSize; - Array elementList(numElements); + LO blkSize = 1; + if (rcp_dynamic_cast(coarseMap->getMap(0, thyraMode)) != Teuchos::null) + blkSize = rcp_dynamic_cast(coarseMap->getMap(0, thyraMode))->getFixedBlockSize(); - // Amalgamate the map - for (LO i = 0; i < Teuchos::as(numElements); i++) - elementList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase; + for (size_t i = 1; i < numSubFactories; i++) { + LO otherBlkSize = 1; + if (rcp_dynamic_cast(coarseMap->getMap(i, thyraMode)) != Teuchos::null) + otherBlkSize = rcp_dynamic_cast(coarseMap->getMap(i, thyraMode))->getFixedBlockSize(); + TEUCHOS_TEST_FOR_EXCEPTION(otherBlkSize != blkSize, Exceptions::RuntimeError, "BlockedCoordinatesTransferFactory: Subblocks have different Block sizes. This is not yet supported."); + } - RCP coarseCoordMap = MapFactory::Build(coarseMap->getFullMap()->lib(), - Teuchos::OrdinalTraits::invalid(), elementList, indexBase, coarseMap->getFullMap()->getComm()); + GO indexBase = coarseMap->getFullMap()->getIndexBase(); + size_t numElements = elementAList.size() / blkSize; + Array elementList(numElements); - coarseCoordMapBlocked = rcp(new BlockedMap(coarseCoordMap, subBlockMaps, thyraMode)); - } + // Amalgamate the map + for (LO i = 0; i < Teuchos::as(numElements); i++) + elementList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase; - // Build blocked coordinates vector - RCP bcoarseCoords = rcp(new dBV(coarseCoordMapBlocked,subBlockCoords)); + RCP coarseCoordMap = MapFactory::Build(coarseMap->getFullMap()->lib(), + Teuchos::OrdinalTraits::invalid(), elementList, indexBase, coarseMap->getFullMap()->getComm()); - // Turn the blocked coordinates vector into an unblocked one - RCP coarseCoords = bcoarseCoords->Merge(); - Set >(coarseLevel, "Coordinates", coarseCoords); + coarseCoordMapBlocked = rcp(new BlockedMap(coarseCoordMap, subBlockMaps, thyraMode)); } - template - void BlockedCoordinatesTransferFactory::AddFactory(const RCP& factory) { - subFactories_.push_back(factory); - } + // Build blocked coordinates vector + RCP bcoarseCoords = rcp(new dBV(coarseCoordMapBlocked, subBlockCoords)); + // Turn the blocked coordinates vector into an unblocked one + RCP coarseCoords = bcoarseCoords->Merge(); + Set >(coarseLevel, "Coordinates", coarseCoords); +} +template +void BlockedCoordinatesTransferFactory::AddFactory(const RCP& factory) { + subFactories_.push_back(factory); +} -} // namespace MueLu +} // namespace MueLu -#endif // MUELU_BLOCKEDCOORDINATESTRANSFER_FACTORY_DEF_HPP +#endif // MUELU_BLOCKEDCOORDINATESTRANSFER_FACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp index 63449a12d82b..387bbcba6dcc 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp @@ -60,95 +60,94 @@ #include "MueLu_TwoLevelFactoryBase.hpp" namespace MueLu { - /*! +/*! @class BlockedRAPFactory @brief Factory for building coarse matrices. */ - template - class BlockedRAPFactory : public TwoLevelFactoryBase { +template +class BlockedRAPFactory : public TwoLevelFactoryBase { #undef MUELU_BLOCKEDRAPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - BlockedRAPFactory(); + BlockedRAPFactory(); - virtual ~BlockedRAPFactory() = default; - //@} + virtual ~BlockedRAPFactory() = default; + //@} - //! @name Input - //@{ + //! @name Input + //@{ - RCP GetValidParameterList() const override; + RCP GetValidParameterList() const override; - void DeclareInput(Level &fineLevel, Level &coarseLevel) const override; + void DeclareInput(Level &fineLevel, Level &coarseLevel) const override; - //@} + //@} - //! @name Build methods. - //@{ - void Build(Level &fineLevel, Level &coarseLevel) const override; - //@} + //! @name Build methods. + //@{ + void Build(Level &fineLevel, Level &coarseLevel) const override; + //@} - //! @name Handling of user-defined transfer factories - //@{ + //! @name Handling of user-defined transfer factories + //@{ - //! Indicate that zero entries on the diagonal of Ac shall be repaired (i.e. if A(i,i) == 0.0 set A(i,i) = 1.0) - void SetRepairZeroDiagonal(bool const &repair) { - repairZeroDiagonals_ = repair; - if(repair) checkAc_ = true; // make sure that plausibility check is performed. Otherwise SetRepairZeroDiagonal(true) has no effect. - } + //! Indicate that zero entries on the diagonal of Ac shall be repaired (i.e. if A(i,i) == 0.0 set A(i,i) = 1.0) + void SetRepairZeroDiagonal(bool const &repair) { + repairZeroDiagonals_ = repair; + if (repair) checkAc_ = true; // make sure that plausibility check is performed. Otherwise SetRepairZeroDiagonal(true) has no effect. + } - //! Indicate that a simple plausibility check shall be done for Ac after building RAP - void SetPlausibilityCheck(bool const &check) { - checkAc_ = check; - } + //! Indicate that a simple plausibility check shall be done for Ac after building RAP + void SetPlausibilityCheck(bool const &check) { + checkAc_ = check; + } - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to the next coarser level. */ - void AddTransferFactory(const RCP& factory); + void AddTransferFactory(const RCP &factory); - // TODO add a function to remove a specific transfer factory? + // TODO add a function to remove a specific transfer factory? - //! Returns number of transfer factories. - size_t NumTransferFactories() const { return transferFacts_.size(); } + //! Returns number of transfer factories. + size_t NumTransferFactories() const { return transferFacts_.size(); } - //@} + //@} - private: + private: + //! @name internal plausibility check methods + //! checks main diagonal entries of (0,0) block. Does not affect entries in (1,1) block! + static void CheckMainDiagonal(RCP &bAc, bool repairZeroDiagonals = false); - //! @name internal plausibility check methods - //! checks main diagonal entries of (0,0) block. Does not affect entries in (1,1) block! - static void CheckMainDiagonal(RCP & bAc, bool repairZeroDiagonals = false); + //! If true, perform a basic plausibility check on Ac (default = false) + //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == true + bool checkAc_; - //! If true, perform a basic plausibility check on Ac (default = false) - //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == true - bool checkAc_; + //! If true, the CheckMainDiagonal routine automatically repairs zero entries on main diagonal (default = false) + //! i.e. if A(i,i) == 0.0 set A(i,i) = 1.0 + //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == true + bool repairZeroDiagonals_; - //! If true, the CheckMainDiagonal routine automatically repairs zero entries on main diagonal (default = false) - //! i.e. if A(i,i) == 0.0 set A(i,i) = 1.0 - //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == true - bool repairZeroDiagonals_; + //@{ - //@{ + //! list of user-defined transfer Factories + std::vector > transferFacts_; - //! list of user-defined transfer Factories - std::vector > transferFacts_; + //@} - //@} +}; //class BlockedRAPFactory - }; //class BlockedRAPFactory - -} //namespace MueLu +} //namespace MueLu #define MUELU_BLOCKEDRAPFACTORY_SHORT -#endif // MUELU_BLOCKEDRAPFACTORY_DECL_HPP +#endif // MUELU_BLOCKEDRAPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp index 7817372296cb..eeae6f0ed3b2 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp @@ -60,174 +60,169 @@ namespace MueLu { - template - BlockedRAPFactory::BlockedRAPFactory() - : checkAc_(false), repairZeroDiagonals_(false) - { } +template +BlockedRAPFactory::BlockedRAPFactory() + : checkAc_(false) + , repairZeroDiagonals_(false) {} - template - RCP BlockedRAPFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP BlockedRAPFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("transpose: use implicit"); -#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", null, "Generating factory of the matrix A used during the prolongator smoothing process"); - validParamList->set< RCP >("P", null, "Prolongator factory"); - validParamList->set< RCP >("R", null, "Restrictor factory"); + SET_VALID_ENTRY("transpose: use implicit"); +#undef SET_VALID_ENTRY + validParamList->set >("A", null, "Generating factory of the matrix A used during the prolongator smoothing process"); + validParamList->set >("P", null, "Prolongator factory"); + validParamList->set >("R", null, "Restrictor factory"); - return validParamList; - } - - template - void BlockedRAPFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - const Teuchos::ParameterList& pL = GetParameterList(); - if (pL.get("transpose: use implicit") == false) - Input(coarseLevel, "R"); + return validParamList; +} - Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - // call DeclareInput of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) - (*it)->CallDeclareInput(coarseLevel); - } +template +void BlockedRAPFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { + const Teuchos::ParameterList &pL = GetParameterList(); + if (pL.get("transpose: use implicit") == false) + Input(coarseLevel, "R"); - template - void BlockedRAPFactory::Build(Level &fineLevel, Level &coarseLevel) const { //FIXME make fineLevel const!! - FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel); + Input(fineLevel, "A"); + Input(coarseLevel, "P"); - const ParameterList& pL = GetParameterList(); + // call DeclareInput of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) + (*it)->CallDeclareInput(coarseLevel); +} - RCP A = Get< RCP >(fineLevel, "A"); - RCP P = Get< RCP >(coarseLevel, "P"); +template +void BlockedRAPFactory::Build(Level &fineLevel, Level &coarseLevel) const { //FIXME make fineLevel const!! + FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel); + const ParameterList &pL = GetParameterList(); - RCP bA = rcp_dynamic_cast(A); - RCP bP = rcp_dynamic_cast(P); - TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null() || bP.is_null(), Exceptions::BadCast, "Matrices A and P must be of type BlockedCrsMatrix."); + RCP A = Get >(fineLevel, "A"); + RCP P = Get >(coarseLevel, "P"); + RCP bA = rcp_dynamic_cast(A); + RCP bP = rcp_dynamic_cast(P); + TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null() || bP.is_null(), Exceptions::BadCast, "Matrices A and P must be of type BlockedCrsMatrix."); - RCP bAP; - RCP bAc; - { - SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); + RCP bAP; + RCP bAc; + { + SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); - // Triple matrix product for BlockedCrsMatrixClass - TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast, + // Triple matrix product for BlockedCrsMatrixClass + TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast, "Block matrix dimensions do not match: " - "A is " << bA->Rows() << "x" << bA->Cols() << - "P is " << bP->Rows() << "x" << bP->Cols()); + "A is " + << bA->Rows() << "x" << bA->Cols() << "P is " << bP->Rows() << "x" << bP->Cols()); - bAP = MatrixMatrix::TwoMatrixMultiplyBlock(*bA, false, *bP, false, GetOStream(Statistics2), true, true); - } - - - // If we do not modify matrix later, allow optimization of storage. - // This is necessary for new faster Epetra MM kernels. - bool doOptimizeStorage = !checkAc_; + bAP = MatrixMatrix::TwoMatrixMultiplyBlock(*bA, false, *bP, false, GetOStream(Statistics2), true, true); + } - const bool doTranspose = true; - const bool doFillComplete = true; - if (pL.get("transpose: use implicit") == true) { - SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); - bAc = MatrixMatrix::TwoMatrixMultiplyBlock(*bP, doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); + // If we do not modify matrix later, allow optimization of storage. + // This is necessary for new faster Epetra MM kernels. + bool doOptimizeStorage = !checkAc_; - } else { - RCP R = Get< RCP >(coarseLevel, "R"); - RCP bR = rcp_dynamic_cast(R); - TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, "Matrix R must be of type BlockedCrsMatrix."); + const bool doTranspose = true; + const bool doFillComplete = true; + if (pL.get("transpose: use implicit") == true) { + SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); + bAc = MatrixMatrix::TwoMatrixMultiplyBlock(*bP, doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); - TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast, - "Block matrix dimensions do not match: " - "R is " << bR->Rows() << "x" << bR->Cols() << - "A is " << bA->Rows() << "x" << bA->Cols()); + } else { + RCP R = Get >(coarseLevel, "R"); + RCP bR = rcp_dynamic_cast(R); + TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, "Matrix R must be of type BlockedCrsMatrix."); - SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); - bAc = MatrixMatrix::TwoMatrixMultiplyBlock(*bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); - } + TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast, + "Block matrix dimensions do not match: " + "R is " + << bR->Rows() << "x" << bR->Cols() << "A is " << bA->Rows() << "x" << bA->Cols()); + SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); + bAc = MatrixMatrix::TwoMatrixMultiplyBlock(*bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); + } - if (checkAc_) - CheckMainDiagonal(bAc); + if (checkAc_) + CheckMainDiagonal(bAc); - GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)"); + GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)"); - Set >(coarseLevel, "A", bAc); + Set >(coarseLevel, "A", bAc); - if (transferFacts_.begin() != transferFacts_.end()) { - SubFactoryMonitor m1(*this, "Projections", coarseLevel); + if (transferFacts_.begin() != transferFacts_.end()) { + SubFactoryMonitor m1(*this, "Projections", coarseLevel); - // call Build of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { - RCP fac = *it; + // call Build of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { + RCP fac = *it; - GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl; + GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl; - fac->CallBuild(coarseLevel); + fac->CallBuild(coarseLevel); - // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid - // of dangling data for CoordinatesTransferFactory - coarseLevel.Release(*fac); - } + // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid + // of dangling data for CoordinatesTransferFactory + coarseLevel.Release(*fac); } } +} + +template +void BlockedRAPFactory::CheckMainDiagonal(RCP &bAc, bool repairZeroDiagonals) { + RCP c00 = bAc->getMatrix(0, 0); + RCP Aout = MatrixFactory::Build(c00->getRowMap(), c00->getGlobalMaxNumRowEntries()); + + RCP diagVec = VectorFactory::Build(c00->getRowMap()); + c00->getLocalDiagCopy(*diagVec); + ArrayRCP diagVal = diagVec->getDataNonConst(0); + + // loop over local rows + for (size_t row = 0; row < c00->getLocalNumRows(); row++) { + // get global row id + GO grid = c00->getRowMap()->getGlobalElement(row); // global row id + + ArrayView indices; + ArrayView vals; + c00->getLocalRowView(row, indices, vals); + + // just copy all values in output + ArrayRCP indout(indices.size(), Teuchos::OrdinalTraits::zero()); + ArrayRCP valout(indices.size(), Teuchos::ScalarTraits::zero()); + + // just copy values + for (size_t i = 0; i < as(indices.size()); i++) { + GO gcid = c00->getColMap()->getGlobalElement(indices[i]); // LID -> GID (column) + indout[i] = gcid; + valout[i] = vals[i]; + } - - template - void BlockedRAPFactory::CheckMainDiagonal(RCP & bAc, bool repairZeroDiagonals) { - RCP c00 = bAc->getMatrix(0, 0); - RCP Aout = MatrixFactory::Build(c00->getRowMap(), c00->getGlobalMaxNumRowEntries()); - - RCP diagVec = VectorFactory::Build(c00->getRowMap()); - c00->getLocalDiagCopy(*diagVec); - ArrayRCP diagVal = diagVec->getDataNonConst(0); - - // loop over local rows - for (size_t row = 0; row < c00->getLocalNumRows(); row++) { - // get global row id - GO grid = c00->getRowMap()->getGlobalElement(row); // global row id - - ArrayView indices; - ArrayView vals; - c00->getLocalRowView(row, indices, vals); - - // just copy all values in output - ArrayRCP indout(indices.size(), Teuchos::OrdinalTraits::zero()); - ArrayRCP valout(indices.size(), Teuchos::ScalarTraits::zero()); - - // just copy values - for (size_t i = 0; i < as(indices.size()); i++) { - GO gcid = c00->getColMap()->getGlobalElement(indices[i]); // LID -> GID (column) - indout [i] = gcid; - valout [i] = vals[i]; - } - - Aout->insertGlobalValues(grid, indout.view(0, indout.size()), valout.view(0, valout.size())); - if (diagVal[row] == Teuchos::ScalarTraits::zero() && repairZeroDiagonals) { - // always overwrite diagonal entry - Aout->insertGlobalValues(grid, Teuchos::tuple(grid), Teuchos::tuple(1.0)); - } + Aout->insertGlobalValues(grid, indout.view(0, indout.size()), valout.view(0, valout.size())); + if (diagVal[row] == Teuchos::ScalarTraits::zero() && repairZeroDiagonals) { + // always overwrite diagonal entry + Aout->insertGlobalValues(grid, Teuchos::tuple(grid), Teuchos::tuple(1.0)); } + } - Aout->fillComplete(c00->getDomainMap(), c00->getRangeMap()); + Aout->fillComplete(c00->getDomainMap(), c00->getRangeMap()); - bAc->setMatrix(0, 0, Aout); - } + bAc->setMatrix(0, 0, Aout); +} - template - void BlockedRAPFactory::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, - "Transfer factory is not derived from TwoLevelFactoryBase. This is very strange. " - "(Note: you can remove this exception if there's a good reason for)"); - transferFacts_.push_back(factory); - } +template +void BlockedRAPFactory::AddTransferFactory(const RCP &factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION(rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, + "Transfer factory is not derived from TwoLevelFactoryBase. This is very strange. " + "(Note: you can remove this exception if there's a good reason for)"); + transferFacts_.push_back(factory); +} -} //namespace MueLu +} //namespace MueLu #define MUELU_BLOCKEDRAPFACTORY_SHORT -#endif // MUELU_BLOCKEDRAPFACTORY_DEF_HPP +#endif // MUELU_BLOCKEDRAPFACTORY_DEF_HPP // TODO add plausibility check // TODO add CheckMainDiagonal for Blocked operator diff --git a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp index 2ee51c482c54..82e8615c7f74 100644 --- a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp @@ -94,27 +94,27 @@ namespace MueLu { ----------|--------------|------------ | Coordinates | CoordinatesTransferFactory | coarse level coordinates */ - template - class CoordinatesTransferFactory : public TwoLevelFactoryBase { - public: - typedef Scalar scalar_type; - typedef LocalOrdinal local_ordinal_type; - typedef GlobalOrdinal global_ordinal_type; - typedef typename Node::device_type DeviceType; - typedef typename DeviceType::execution_space execution_space; - - private: +template +class CoordinatesTransferFactory : public TwoLevelFactoryBase { + public: + typedef Scalar scalar_type; + typedef LocalOrdinal local_ordinal_type; + typedef GlobalOrdinal global_ordinal_type; + typedef typename Node::device_type DeviceType; + typedef typename DeviceType::execution_space execution_space; + + private: #undef MUELU_COORDINATESTRANSFERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - /*! @brief Constructor. + /*! @brief Constructor. @param vectorName The name of the quantity to be restricted. @param restrictionName The name of the restriction Matrix. @@ -122,40 +122,39 @@ namespace MueLu { The operator associated with projectionName will be applied to the MultiVector associated with vectorName. */ - CoordinatesTransferFactory() { } + CoordinatesTransferFactory() {} - //! Destructor. - virtual ~CoordinatesTransferFactory() { } + //! Destructor. + virtual ~CoordinatesTransferFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class will fall back to the settings in FactoryManager. */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - private: + private: +}; // class CoordinatesTransferFactory - }; // class CoordinatesTransferFactory - -} // namespace MueLu +} // namespace MueLu #define MUELU_COORDINATESTRANSFERFACTORY_SHORT -#endif // MUELU_COORDINATESTRANSFER_FACTORY_DECL_HPP +#endif // MUELU_COORDINATESTRANSFER_FACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp index b8f0efbbdb2f..dc6bd4c6c3ec 100644 --- a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp @@ -60,227 +60,224 @@ namespace MueLu { - template - RCP CoordinatesTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set >("Coordinates", Teuchos::null, "Factory for coordinates generation"); - validParamList->set >("Aggregates", Teuchos::null, "Factory for coordinates generation"); - validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - validParamList->set ("structured aggregation", false, "Flag specifying that the geometric data is transferred for StructuredAggregationFactory"); - validParamList->set ("aggregation coupled", false, "Flag specifying if the aggregation algorithm was used in coupled mode."); - validParamList->set ("Geometric", false, "Flag specifying that the coordinates are transferred for GeneralGeometricPFactory"); - validParamList->set >("coarseCoordinates", Teuchos::null, "Factory for coarse coordinates generation"); - validParamList->set >("gCoarseNodesPerDim", Teuchos::null, "Factory providing the global number of nodes per spatial dimensions of the mesh"); - validParamList->set >("lCoarseNodesPerDim", Teuchos::null, "Factory providing the local number of nodes per spatial dimensions of the mesh"); - validParamList->set >("numDimensions" , Teuchos::null, "Factory providing the number of spatial dimensions of the mesh"); - validParamList->set ("write start", -1, "first level at which coordinates should be written to file"); - validParamList->set ("write end", -1, "last level at which coordinates should be written to file"); - validParamList->set ("hybrid aggregation", false, "Flag specifying that hybrid aggregation data is transfered for HybridAggregationFactory"); - validParamList->set >("aggregationRegionTypeCoarse", Teuchos::null, "Factory indicating what aggregation type is to be used on the coarse level of the region"); - validParamList->set ("interface aggregation", false, "Flag specifying that interface aggregation data is transfered for HybridAggregationFactory"); - validParamList->set >("coarseInterfacesDimensions", Teuchos::null, "Factory providing coarseInterfacesDimensions"); - validParamList->set >("nodeOnCoarseInterface", Teuchos::null, "Factory providing nodeOnCoarseInterface"); - - - return validParamList; - } - - template - void CoordinatesTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - static bool isAvailableCoords = false; - - const ParameterList& pL = GetParameterList(); - if(pL.get("structured aggregation") == true) { - if(pL.get("aggregation coupled") == true) { - Input(fineLevel, "gCoarseNodesPerDim"); - } - Input(fineLevel, "lCoarseNodesPerDim"); - Input(fineLevel, "numDimensions"); - } else if(pL.get("Geometric") == true) { - Input(coarseLevel, "coarseCoordinates"); - Input(coarseLevel, "gCoarseNodesPerDim"); - Input(coarseLevel, "lCoarseNodesPerDim"); - } else if(pL.get("hybrid aggregation") == true) { - Input(fineLevel, "aggregationRegionTypeCoarse"); - Input(fineLevel, "lCoarseNodesPerDim"); - Input(fineLevel, "numDimensions"); - if(pL.get("interface aggregation") == true) { - Input(fineLevel, "coarseInterfacesDimensions"); - Input(fineLevel, "nodeOnCoarseInterface"); - } - } else { - if (coarseLevel.GetRequestMode() == Level::REQUEST) - isAvailableCoords = coarseLevel.IsAvailable("Coordinates", this); - - if (isAvailableCoords == false) { - Input(fineLevel, "Coordinates"); - Input(fineLevel, "Aggregates"); - Input(fineLevel, "CoarseMap"); - } +template +RCP CoordinatesTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set >("Coordinates", Teuchos::null, "Factory for coordinates generation"); + validParamList->set >("Aggregates", Teuchos::null, "Factory for coordinates generation"); + validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + validParamList->set("structured aggregation", false, "Flag specifying that the geometric data is transferred for StructuredAggregationFactory"); + validParamList->set("aggregation coupled", false, "Flag specifying if the aggregation algorithm was used in coupled mode."); + validParamList->set("Geometric", false, "Flag specifying that the coordinates are transferred for GeneralGeometricPFactory"); + validParamList->set >("coarseCoordinates", Teuchos::null, "Factory for coarse coordinates generation"); + validParamList->set >("gCoarseNodesPerDim", Teuchos::null, "Factory providing the global number of nodes per spatial dimensions of the mesh"); + validParamList->set >("lCoarseNodesPerDim", Teuchos::null, "Factory providing the local number of nodes per spatial dimensions of the mesh"); + validParamList->set >("numDimensions", Teuchos::null, "Factory providing the number of spatial dimensions of the mesh"); + validParamList->set("write start", -1, "first level at which coordinates should be written to file"); + validParamList->set("write end", -1, "last level at which coordinates should be written to file"); + validParamList->set("hybrid aggregation", false, "Flag specifying that hybrid aggregation data is transfered for HybridAggregationFactory"); + validParamList->set >("aggregationRegionTypeCoarse", Teuchos::null, "Factory indicating what aggregation type is to be used on the coarse level of the region"); + validParamList->set("interface aggregation", false, "Flag specifying that interface aggregation data is transfered for HybridAggregationFactory"); + validParamList->set >("coarseInterfacesDimensions", Teuchos::null, "Factory providing coarseInterfacesDimensions"); + validParamList->set >("nodeOnCoarseInterface", Teuchos::null, "Factory providing nodeOnCoarseInterface"); + + return validParamList; +} + +template +void CoordinatesTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { + static bool isAvailableCoords = false; + + const ParameterList& pL = GetParameterList(); + if (pL.get("structured aggregation") == true) { + if (pL.get("aggregation coupled") == true) { + Input(fineLevel, "gCoarseNodesPerDim"); + } + Input(fineLevel, "lCoarseNodesPerDim"); + Input(fineLevel, "numDimensions"); + } else if (pL.get("Geometric") == true) { + Input(coarseLevel, "coarseCoordinates"); + Input(coarseLevel, "gCoarseNodesPerDim"); + Input(coarseLevel, "lCoarseNodesPerDim"); + } else if (pL.get("hybrid aggregation") == true) { + Input(fineLevel, "aggregationRegionTypeCoarse"); + Input(fineLevel, "lCoarseNodesPerDim"); + Input(fineLevel, "numDimensions"); + if (pL.get("interface aggregation") == true) { + Input(fineLevel, "coarseInterfacesDimensions"); + Input(fineLevel, "nodeOnCoarseInterface"); + } + } else { + if (coarseLevel.GetRequestMode() == Level::REQUEST) + isAvailableCoords = coarseLevel.IsAvailable("Coordinates", this); + + if (isAvailableCoords == false) { + Input(fineLevel, "Coordinates"); + Input(fineLevel, "Aggregates"); + Input(fineLevel, "CoarseMap"); } } +} - template - void CoordinatesTransferFactory::Build(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); +template +void CoordinatesTransferFactory::Build(Level& fineLevel, Level& coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - using xdMV = Xpetra::MultiVector::magnitudeType,LO,GO,NO>; + using xdMV = Xpetra::MultiVector::magnitudeType, LO, GO, NO>; - GetOStream(Runtime0) << "Transferring coordinates" << std::endl; + GetOStream(Runtime0) << "Transferring coordinates" << std::endl; - int numDimensions; - RCP coarseCoords; - RCP fineCoords; - Array gCoarseNodesPerDir; - Array lCoarseNodesPerDir; + int numDimensions; + RCP coarseCoords; + RCP fineCoords; + Array gCoarseNodesPerDir; + Array lCoarseNodesPerDir; - const ParameterList& pL = GetParameterList(); + const ParameterList& pL = GetParameterList(); - if(pL.get("hybrid aggregation") == true) { - std::string regionType = Get(fineLevel,"aggregationRegionTypeCoarse"); - numDimensions = Get(fineLevel, "numDimensions"); - lCoarseNodesPerDir = Get >(fineLevel, "lCoarseNodesPerDim"); - Set(coarseLevel, "aggregationRegionType", regionType); - Set (coarseLevel, "numDimensions", numDimensions); - Set > (coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + if (pL.get("hybrid aggregation") == true) { + std::string regionType = Get(fineLevel, "aggregationRegionTypeCoarse"); + numDimensions = Get(fineLevel, "numDimensions"); + lCoarseNodesPerDir = Get >(fineLevel, "lCoarseNodesPerDim"); + Set(coarseLevel, "aggregationRegionType", regionType); + Set(coarseLevel, "numDimensions", numDimensions); + Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); - if((pL.get("interface aggregation") == true) && (regionType == "uncoupled")) { - Array coarseInterfacesDimensions = Get >(fineLevel, "coarseInterfacesDimensions"); - Array nodeOnCoarseInterface = Get >(fineLevel, "nodeOnCoarseInterface"); - Set >(coarseLevel, "interfacesDimensions", coarseInterfacesDimensions); - Set >(coarseLevel, "nodeOnInterface", nodeOnCoarseInterface); - } + if ((pL.get("interface aggregation") == true) && (regionType == "uncoupled")) { + Array coarseInterfacesDimensions = Get >(fineLevel, "coarseInterfacesDimensions"); + Array nodeOnCoarseInterface = Get >(fineLevel, "nodeOnCoarseInterface"); + Set >(coarseLevel, "interfacesDimensions", coarseInterfacesDimensions); + Set >(coarseLevel, "nodeOnInterface", nodeOnCoarseInterface); + } - } else if(pL.get("structured aggregation") == true) { - if(pL.get("aggregation coupled") == true) { - gCoarseNodesPerDir = Get >(fineLevel, "gCoarseNodesPerDim"); - Set >(coarseLevel, "gNodesPerDim", gCoarseNodesPerDir); - } - lCoarseNodesPerDir = Get >(fineLevel, "lCoarseNodesPerDim"); - Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); - numDimensions = Get(fineLevel, "numDimensions"); - Set(coarseLevel, "numDimensions", numDimensions); - - } else if(pL.get("Geometric") == true) { - coarseCoords = Get >(coarseLevel, "coarseCoordinates"); - gCoarseNodesPerDir = Get >(coarseLevel, "gCoarseNodesPerDim"); - lCoarseNodesPerDir = Get >(coarseLevel, "lCoarseNodesPerDim"); + } else if (pL.get("structured aggregation") == true) { + if (pL.get("aggregation coupled") == true) { + gCoarseNodesPerDir = Get >(fineLevel, "gCoarseNodesPerDim"); Set >(coarseLevel, "gNodesPerDim", gCoarseNodesPerDir); - Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + } + lCoarseNodesPerDir = Get >(fineLevel, "lCoarseNodesPerDim"); + Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + numDimensions = Get(fineLevel, "numDimensions"); + Set(coarseLevel, "numDimensions", numDimensions); + + } else if (pL.get("Geometric") == true) { + coarseCoords = Get >(coarseLevel, "coarseCoordinates"); + gCoarseNodesPerDir = Get >(coarseLevel, "gCoarseNodesPerDim"); + lCoarseNodesPerDir = Get >(coarseLevel, "lCoarseNodesPerDim"); + Set >(coarseLevel, "gNodesPerDim", gCoarseNodesPerDir); + Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + + Set >(coarseLevel, "Coordinates", coarseCoords); + + } else { + if (coarseLevel.IsAvailable("Coordinates", this)) { + GetOStream(Runtime0) << "Reusing coordinates" << std::endl; + return; + } - Set >(coarseLevel, "Coordinates", coarseCoords); + fineCoords = Get >(fineLevel, "Coordinates"); + RCP coarseMap = Get >(fineLevel, "CoarseMap"); + + // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac + // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to + // logical blocks in the matrix + LO blkSize = 1; + if (rcp_dynamic_cast(coarseMap) != Teuchos::null) + blkSize = rcp_dynamic_cast(coarseMap)->getFixedBlockSize(); + + RCP coarseCoordMap; + RCP uniqueMap = fineCoords->getMap(); + if (blkSize > 1) { + // If the block size is greater than one, we need to create a coarse coordinate map + // FIXME: The amalgamation should really be done on device. + GO indexBase = coarseMap->getIndexBase(); + ArrayView elementAList = coarseMap->getLocalElementList(); + size_t numElements = elementAList.size() / blkSize; + Array elementList(numElements); + + // Amalgamate the map + for (LO i = 0; i < Teuchos::as(numElements); i++) + elementList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase; - } else { - if (coarseLevel.IsAvailable("Coordinates", this)) { - GetOStream(Runtime0) << "Reusing coordinates" << std::endl; - return; - } - - fineCoords = Get< RCP >(fineLevel, "Coordinates"); - RCP coarseMap = Get< RCP > (fineLevel, "CoarseMap"); - - // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac - // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to - // logical blocks in the matrix - LO blkSize = 1; - if (rcp_dynamic_cast(coarseMap) != Teuchos::null) - blkSize = rcp_dynamic_cast(coarseMap)->getFixedBlockSize(); - - RCP coarseCoordMap; - RCP uniqueMap = fineCoords->getMap(); - if(blkSize > 1) { - // If the block size is greater than one, we need to create a coarse coordinate map - // FIXME: The amalgamation should really be done on device. - GO indexBase = coarseMap->getIndexBase(); - ArrayView elementAList = coarseMap->getLocalElementList(); - size_t numElements = elementAList.size() / blkSize; - Array elementList(numElements); - - // Amalgamate the map - for (LO i = 0; i < Teuchos::as(numElements); i++) - elementList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase; - - { - SubFactoryMonitor sfm(*this, "MapFactory: coarseCoordMap", fineLevel); - coarseCoordMap = MapFactory ::Build(coarseMap->lib(), Teuchos::OrdinalTraits::invalid(), elementList, indexBase, coarseMap->getComm()); - } - } - else { - // If the block size is one, we can just use the coarse map for coordinates - coarseCoordMap = coarseMap; + { + SubFactoryMonitor sfm(*this, "MapFactory: coarseCoordMap", fineLevel); + coarseCoordMap = MapFactory ::Build(coarseMap->lib(), Teuchos::OrdinalTraits::invalid(), elementList, indexBase, coarseMap->getComm()); } + } else { + // If the block size is one, we can just use the coarse map for coordinates + coarseCoordMap = coarseMap; + } - // Build the coarseCoords MultiVector - coarseCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(coarseCoordMap, fineCoords->getNumVectors()); - + // Build the coarseCoords MultiVector + coarseCoords = Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO>::Build(coarseCoordMap, fineCoords->getNumVectors()); - RCP aggregates; - bool aggregatesCrossProcessors; - aggregates = Get >(fineLevel, "Aggregates"); - aggregatesCrossProcessors = aggregates->AggregatesCrossProcessors(); + RCP aggregates; + bool aggregatesCrossProcessors; + aggregates = Get >(fineLevel, "Aggregates"); + aggregatesCrossProcessors = aggregates->AggregatesCrossProcessors(); - // Create overlapped fine coordinates to reduce global communication - RCP ghostedCoords = fineCoords; - if (aggregatesCrossProcessors) { - RCP nonUniqueMap = aggregates->GetMap(); - RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + // Create overlapped fine coordinates to reduce global communication + RCP ghostedCoords = fineCoords; + if (aggregatesCrossProcessors) { + RCP nonUniqueMap = aggregates->GetMap(); + RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(nonUniqueMap, fineCoords->getNumVectors()); - ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT); - } + ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO>::Build(nonUniqueMap, fineCoords->getNumVectors()); + ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT); + } - // The good news is that this graph has already been constructed for the - // TentativePFactory and was cached in Aggregates. So this is a no-op. - auto aggGraph = aggregates->GetGraph(); - auto numAggs = aggGraph.numRows(); + // The good news is that this graph has already been constructed for the + // TentativePFactory and was cached in Aggregates. So this is a no-op. + auto aggGraph = aggregates->GetGraph(); + auto numAggs = aggGraph.numRows(); - auto fineCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto coarseCoordsView = coarseCoords->getDeviceLocalView(Xpetra::Access::OverwriteAll); + auto fineCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto coarseCoordsView = coarseCoords->getDeviceLocalView(Xpetra::Access::OverwriteAll); - // Fill in coarse coordinates - { - SubFactoryMonitor m2(*this, "AverageCoords", coarseLevel); + // Fill in coarse coordinates + { + SubFactoryMonitor m2(*this, "AverageCoords", coarseLevel); - const auto dim = ghostedCoords->getNumVectors(); + const auto dim = ghostedCoords->getNumVectors(); - typename AppendTrait::type fineCoordsRandomView = fineCoordsView; - for (size_t j = 0; j < dim; j++) { - Kokkos::parallel_for("MueLu:CoordinatesTransferF:Build:coord", Kokkos::RangePolicy(0, numAggs), - KOKKOS_LAMBDA(const LO i) { - // A row in this graph represents all node ids in the aggregate - // Therefore, averaging is very easy + typename AppendTrait::type fineCoordsRandomView = fineCoordsView; + for (size_t j = 0; j < dim; j++) { + Kokkos::parallel_for( + "MueLu:CoordinatesTransferF:Build:coord", Kokkos::RangePolicy(0, numAggs), + KOKKOS_LAMBDA(const LO i) { + // A row in this graph represents all node ids in the aggregate + // Therefore, averaging is very easy - auto aggregate = aggGraph.rowConst(i); + auto aggregate = aggGraph.rowConst(i); - typename Teuchos::ScalarTraits::magnitudeType sum = 0.0; // do not use Scalar here (Stokhos) - for (size_t colID = 0; colID < static_cast(aggregate.length); colID++) - sum += fineCoordsRandomView(aggregate(colID),j); + typename Teuchos::ScalarTraits::magnitudeType sum = 0.0; // do not use Scalar here (Stokhos) + for (size_t colID = 0; colID < static_cast(aggregate.length); colID++) + sum += fineCoordsRandomView(aggregate(colID), j); - coarseCoordsView(i,j) = sum / aggregate.length; - }); - } + coarseCoordsView(i, j) = sum / aggregate.length; + }); } - - Set >(coarseLevel, "Coordinates", coarseCoords); - } - int writeStart = pL.get("write start"), writeEnd = pL.get("write end"); - if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd) { - std::ostringstream buf; - buf << fineLevel.GetLevelID(); - std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; - Xpetra::IO::magnitudeType,LO,GO,NO>::Write(fileName,*fineCoords); - } - if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd) { - std::ostringstream buf; - buf << coarseLevel.GetLevelID(); - std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; - Xpetra::IO::magnitudeType,LO,GO,NO>::Write(fileName,*coarseCoords); - } + Set >(coarseLevel, "Coordinates", coarseCoords); + } + + int writeStart = pL.get("write start"), writeEnd = pL.get("write end"); + if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd) { + std::ostringstream buf; + buf << fineLevel.GetLevelID(); + std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; + Xpetra::IO::magnitudeType, LO, GO, NO>::Write(fileName, *fineCoords); + } + if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd) { + std::ostringstream buf; + buf << coarseLevel.GetLevelID(); + std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; + Xpetra::IO::magnitudeType, LO, GO, NO>::Write(fileName, *coarseCoords); } +} -} // namespace MueLu +} // namespace MueLu -#endif // MUELU_COORDINATESTRANSFER_FACTORY_DEF_HPP +#endif // MUELU_COORDINATESTRANSFER_FACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp index 75931f90de75..dcbfe56a3015 100644 --- a/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp @@ -52,57 +52,57 @@ namespace MueLu { - /*! +/*! @class DemoFactory class. @brief empty factory for demonstration */ - template - class DemoFactory : public SingleLevelFactoryBase { +template +class DemoFactory : public SingleLevelFactoryBase { #undef MUELU_DEMOFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - DemoFactory(); + //! Constructor. + DemoFactory(); - //! Destructor. - virtual ~DemoFactory(); + //! Destructor. + virtual ~DemoFactory(); - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class will fall back to the settings in FactoryManager. */ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & currentLevel) const; + //! Build an object with this factory. + void Build(Level ¤tLevel) const; - //@} + //@} - private: - // TODO add member variables + private: + // TODO add member variables - }; // class DemoFactory +}; // class DemoFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_DEMOFACTORY_SHORT -#endif // MUELU_DEMOFACTORY_DECL_HPP +#endif // MUELU_DEMOFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp b/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp index b390142ca2f8..27d0f47cf699 100644 --- a/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp @@ -55,25 +55,24 @@ namespace MueLu { - template - DemoFactory::DemoFactory() - { } +template +DemoFactory::DemoFactory() {} - template - DemoFactory::~DemoFactory() {} +template +DemoFactory::~DemoFactory() {} - template - void DemoFactory::DeclareInput(Level &/* currentLevel */) const { - // TODO: declare input for factory - //Input(currentLevel, varName_); - } +template +void DemoFactory::DeclareInput(Level& /* currentLevel */) const { + // TODO: declare input for factory + //Input(currentLevel, varName_); +} - template - void DemoFactory::Build(Level & /* currentLevel */) const { - // TODO: implement factory - } +template +void DemoFactory::Build(Level& /* currentLevel */) const { + // TODO: implement factory +} -} // namespace MueLu +} // namespace MueLu #define MUELU_DEMOFACTORY_SHORT -#endif // MUELU_DEMOFACTORY_DEF_HPP +#endif // MUELU_DEMOFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp index ab99f2f9b964..e887a682a144 100644 --- a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp @@ -55,7 +55,7 @@ namespace MueLu { - /*! +/*! @class DropNegativeEntriesFactory class. @brief Application-specific filtering for A. Can be used in context of graph coarsening and aggregation. @@ -63,50 +63,49 @@ namespace MueLu { Do not use this kind of filtering for regular PDEs unless you have very good reasons. */ - template - class DropNegativeEntriesFactory : public SingleLevelFactoryBase { +template +class DropNegativeEntriesFactory : public SingleLevelFactoryBase { #undef MUELU_DROPNEGATIVEENTRIESFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - DropNegativeEntriesFactory() { } + DropNegativeEntriesFactory() {} - //! Destructor. - virtual ~DropNegativeEntriesFactory() { } + //! Destructor. + virtual ~DropNegativeEntriesFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! + /*! @brief Build method. Builds filtered matrix and returns it in currentLevel. */ - void Build(Level& currentLevel) const; + void Build(Level& currentLevel) const; - //@} + //@} - }; //class DropNegativeEntriesFactory +}; //class DropNegativeEntriesFactory -} //namespace MueLu +} //namespace MueLu #define MUELU_DROPNEGATIVEENTRIESFACTORY_SHORT -#endif // MUELU_DROPNEGATIVEENTRIESFACTORY_DECL_HPP +#endif // MUELU_DROPNEGATIVEENTRIESFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp index 1bf7b0bfd4c8..362694cc8cbd 100644 --- a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp @@ -60,76 +60,76 @@ namespace MueLu { - template - RCP DropNegativeEntriesFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP DropNegativeEntriesFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) -#undef SET_VALID_ENTRY +#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); - return validParamList; - } + return validParamList; +} - template - void DropNegativeEntriesFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - } +template +void DropNegativeEntriesFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); +} - template - void DropNegativeEntriesFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Matrix filtering (springs)", currentLevel); +template +void DropNegativeEntriesFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Matrix filtering (springs)", currentLevel); - RCP Ain = Get< RCP >(currentLevel, "A"); + RCP Ain = Get >(currentLevel, "A"); - LocalOrdinal nDofsPerNode = Ain->GetFixedBlockSize(); + LocalOrdinal nDofsPerNode = Ain->GetFixedBlockSize(); - // create new empty Operator - Teuchos::RCP Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); + // create new empty Operator + Teuchos::RCP Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); - size_t numLocalRows = Ain->getLocalNumRows(); - for(size_t row=0; rowgetRowMap()->getGlobalElement(row); + size_t numLocalRows = Ain->getLocalNumRows(); + for (size_t row = 0; row < numLocalRows; row++) { + GlobalOrdinal grid = Ain->getRowMap()->getGlobalElement(row); - int rDofID = Teuchos::as(grid % nDofsPerNode); + int rDofID = Teuchos::as(grid % nDofsPerNode); - // extract row information from input matrix - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - Ain->getLocalRowView(row, indices, vals); + // extract row information from input matrix + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + Ain->getLocalRowView(row, indices, vals); - // just copy all values in output - Teuchos::ArrayRCP indout(indices.size(),Teuchos::ScalarTraits::zero()); - Teuchos::ArrayRCP valout(indices.size(),Teuchos::ScalarTraits::zero()); + // just copy all values in output + Teuchos::ArrayRCP indout(indices.size(), Teuchos::ScalarTraits::zero()); + Teuchos::ArrayRCP valout(indices.size(), Teuchos::ScalarTraits::zero()); - size_t nNonzeros = 0; - for(size_t i=0; i<(size_t)indices.size(); i++) { - GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id + size_t nNonzeros = 0; + for (size_t i = 0; i < (size_t)indices.size(); i++) { + GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id - int cDofID = Teuchos::as(gcid % nDofsPerNode); - if(rDofID == cDofID && Teuchos::ScalarTraits::magnitude(vals[i]) >= Teuchos::ScalarTraits::magnitude(Teuchos::ScalarTraits::zero())) { - indout [nNonzeros] = gcid; - valout [nNonzeros] = vals[i]; - nNonzeros++; - } + int cDofID = Teuchos::as(gcid % nDofsPerNode); + if (rDofID == cDofID && Teuchos::ScalarTraits::magnitude(vals[i]) >= Teuchos::ScalarTraits::magnitude(Teuchos::ScalarTraits::zero())) { + indout[nNonzeros] = gcid; + valout[nNonzeros] = vals[i]; + nNonzeros++; } - indout.resize(nNonzeros); - valout.resize(nNonzeros); - - Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); } + indout.resize(nNonzeros); + valout.resize(nNonzeros); - Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); + Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size())); + } - // copy block size information - Aout->SetFixedBlockSize(nDofsPerNode); + Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); - GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; + // copy block size information + Aout->SetFixedBlockSize(nDofsPerNode); - Set(currentLevel, "A", Aout); - } + GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; + + Set(currentLevel, "A", Aout); +} -} //namespace MueLu +} //namespace MueLu -#endif // MUELU_DROPNEGATIVEENTRIESFACTORY_DEF_HPP +#endif // MUELU_DROPNEGATIVEENTRIESFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp index 23f56137596d..799c13cff3d5 100644 --- a/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp @@ -58,60 +58,59 @@ #include "MueLu_Aggregates_fwd.hpp" namespace MueLu { - /*! +/*! @class FilteredAFactory class. @brief Factory for building filtered matrices using filtered graphs. */ - template - class FilteredAFactory : public SingleLevelFactoryBase { +template +class FilteredAFactory : public SingleLevelFactoryBase { #undef MUELU_FILTEREDAFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - FilteredAFactory() { } + FilteredAFactory() {} - //! Destructor. - virtual ~FilteredAFactory() { } + //! Destructor. + virtual ~FilteredAFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! + /*! @brief Build method. Builds filtered matrix and returns it in currentLevel. */ - void Build(Level& currentLevel) const; + void Build(Level& currentLevel) const; - //@} - private: - void BuildReuse(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const; - void BuildNew (const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const; - void BuildNewUsingRootStencil(const Matrix& A, const GraphBase& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const; - void ExperimentalLumping(const Matrix& A, Matrix& filteredA, double rho, double rho2) const; + //@} + private: + void BuildReuse(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const; + void BuildNew(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const; + void BuildNewUsingRootStencil(const Matrix& A, const GraphBase& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const; + void ExperimentalLumping(const Matrix& A, Matrix& filteredA, double rho, double rho2) const; - }; //class FilteredAFactory +}; //class FilteredAFactory -} //namespace MueLu +} //namespace MueLu #define MUELU_FILTEREDAFACTORY_SHORT -#endif // MUELU_FILTEREDAFACTORY_DECL_HPP +#endif // MUELU_FILTEREDAFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp b/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp index 2828b3c0cb0e..a1328f0afae6 100644 --- a/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp @@ -62,174 +62,162 @@ // Variable to enable lots of debug output #define MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING 0 - namespace MueLu { - template - void sort_and_unique(T & array) { - std::sort(array.begin(),array.end()); - std::unique(array.begin(),array.end()); - } - +template +void sort_and_unique(T& array) { + std::sort(array.begin(), array.end()); + std::unique(array.begin(), array.end()); +} - - template - RCP FilteredAFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP FilteredAFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("filtered matrix: use lumping"); - SET_VALID_ENTRY("filtered matrix: reuse graph"); - SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); - SET_VALID_ENTRY("filtered matrix: use root stencil"); - SET_VALID_ENTRY("filtered matrix: use spread lumping"); - SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor"); - SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap"); - SET_VALID_ENTRY("filtered matrix: Dirichlet threshold"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); - validParamList->set< RCP >("Graph", Teuchos::null, "Generating factory for coalesced filtered graph"); - validParamList->set< RCP >("Filtering", Teuchos::null, "Generating factory for filtering boolean"); - - - // Only need these for the "use root stencil" option - validParamList->set< RCP >("Aggregates", Teuchos::null, "Generating factory of the aggregates"); - validParamList->set< RCP >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo"); - return validParamList; + SET_VALID_ENTRY("filtered matrix: use lumping"); + SET_VALID_ENTRY("filtered matrix: reuse graph"); + SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); + SET_VALID_ENTRY("filtered matrix: use root stencil"); + SET_VALID_ENTRY("filtered matrix: use spread lumping"); + SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor"); + SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap"); + SET_VALID_ENTRY("filtered matrix: Dirichlet threshold"); +#undef SET_VALID_ENTRY + + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); + validParamList->set >("Graph", Teuchos::null, "Generating factory for coalesced filtered graph"); + validParamList->set >("Filtering", Teuchos::null, "Generating factory for filtering boolean"); + + // Only need these for the "use root stencil" option + validParamList->set >("Aggregates", Teuchos::null, "Generating factory of the aggregates"); + validParamList->set >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo"); + return validParamList; +} + +template +void FilteredAFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "Filtering"); + Input(currentLevel, "Graph"); + const ParameterList& pL = GetParameterList(); + if (pL.isParameter("filtered matrix: use root stencil") && pL.get("filtered matrix: use root stencil") == true) { + Input(currentLevel, "Aggregates"); + Input(currentLevel, "UnAmalgamationInfo"); } +} - template - void FilteredAFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "Filtering"); - Input(currentLevel, "Graph"); - const ParameterList& pL = GetParameterList(); - if(pL.isParameter("filtered matrix: use root stencil") && pL.get("filtered matrix: use root stencil") == true){ - Input(currentLevel, "Aggregates"); - Input(currentLevel, "UnAmalgamationInfo"); - } - } +template +void FilteredAFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Matrix filtering", currentLevel); - template - void FilteredAFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Matrix filtering", currentLevel); - - RCP A = Get< RCP >(currentLevel, "A"); - if (Get(currentLevel, "Filtering") == false) { - GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl; - Set(currentLevel, "A", A); - return; - } + RCP A = Get >(currentLevel, "A"); + if (Get(currentLevel, "Filtering") == false) { + GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl; + Set(currentLevel, "A", A); + return; + } - const ParameterList& pL = GetParameterList(); - bool lumping = pL.get("filtered matrix: use lumping"); - if (lumping) - GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; + const ParameterList& pL = GetParameterList(); + bool lumping = pL.get("filtered matrix: use lumping"); + if (lumping) + GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; - bool use_spread_lumping = pL.get("filtered matrix: use spread lumping"); - if (use_spread_lumping && (!lumping) ) - throw std::runtime_error("Must also request 'filtered matrix: use lumping' in order to use spread lumping"); + bool use_spread_lumping = pL.get("filtered matrix: use spread lumping"); + if (use_spread_lumping && (!lumping)) + throw std::runtime_error("Must also request 'filtered matrix: use lumping' in order to use spread lumping"); - if (use_spread_lumping) { - GetOStream(Runtime0) << "using spread lumping " << std::endl; - } + if (use_spread_lumping) { + GetOStream(Runtime0) << "using spread lumping " << std::endl; + } - double DdomAllowGrowthRate = 1.1; - double DdomCap = 2.0; - if (use_spread_lumping) { - DdomAllowGrowthRate = pL.get("filtered matrix: spread lumping diag dom growth factor"); - DdomCap = pL.get("filtered matrix: spread lumping diag dom cap"); - } - bool use_root_stencil = lumping && pL.get("filtered matrix: use root stencil"); - if (use_root_stencil) - GetOStream(Runtime0) << "Using root stencil for dropping" << std::endl; - double dirichlet_threshold = pL.get("filtered matrix: Dirichlet threshold"); - if(dirichlet_threshold >= 0.0) - GetOStream(Runtime0) << "Filtering Dirichlet threshold of "<("filtered matrix: reuse graph")) - GetOStream(Runtime0) << "Reusing graph"< G = Get< RCP >(currentLevel, "Graph"); - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) - { - FILE * f = fopen("graph.dat","w"); - size_t numGRows = G->GetNodeNumVertices(); - for (size_t i = 0; i < numGRows; i++) { - // Set up filtering array - ArrayView indsG = G->getNeighborVertices(i); - for(size_t j=0; j<(size_t)indsG.size(); j++) { - fprintf(f,"%d %d 1.0\n",(int)i,(int)indsG[j]); - } + double DdomAllowGrowthRate = 1.1; + double DdomCap = 2.0; + if (use_spread_lumping) { + DdomAllowGrowthRate = pL.get("filtered matrix: spread lumping diag dom growth factor"); + DdomCap = pL.get("filtered matrix: spread lumping diag dom cap"); + } + bool use_root_stencil = lumping && pL.get("filtered matrix: use root stencil"); + if (use_root_stencil) + GetOStream(Runtime0) << "Using root stencil for dropping" << std::endl; + double dirichlet_threshold = pL.get("filtered matrix: Dirichlet threshold"); + if (dirichlet_threshold >= 0.0) + GetOStream(Runtime0) << "Filtering Dirichlet threshold of " << dirichlet_threshold << std::endl; + + if (use_root_stencil || pL.get("filtered matrix: reuse graph")) + GetOStream(Runtime0) << "Reusing graph" << std::endl; + else + GetOStream(Runtime0) << "Generating new graph" << std::endl; + + RCP G = Get >(currentLevel, "Graph"); + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) { + FILE* f = fopen("graph.dat", "w"); + size_t numGRows = G->GetNodeNumVertices(); + for (size_t i = 0; i < numGRows; i++) { + // Set up filtering array + ArrayView indsG = G->getNeighborVertices(i); + for (size_t j = 0; j < (size_t)indsG.size(); j++) { + fprintf(f, "%d %d 1.0\n", (int)i, (int)indsG[j]); } - fclose(f); } + fclose(f); + } - RCP fillCompleteParams(new ParameterList); - fillCompleteParams->set("No Nonlocal Changes", true); - - RCP filteredA; - if(use_root_stencil) { - filteredA = MatrixFactory::Build(A->getCrsGraph()); - filteredA->fillComplete(fillCompleteParams); - filteredA->resumeFill(); - BuildNewUsingRootStencil(*A, *G, dirichlet_threshold, currentLevel,*filteredA, use_spread_lumping,DdomAllowGrowthRate, DdomCap); - filteredA->fillComplete(fillCompleteParams); - - } - else if (pL.get("filtered matrix: reuse graph")) { - filteredA = MatrixFactory::Build(A->getCrsGraph()); - filteredA->resumeFill(); - BuildReuse(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold,*filteredA); - // only lump inside BuildReuse if lumping is true and use_spread_lumping is false - // note: they use_spread_lumping cannot be true if lumping is false - - if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); - filteredA->fillComplete(fillCompleteParams); - - } else { - - filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); - BuildNew(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold,*filteredA); - // only lump inside BuildNew if lumping is true and use_spread_lumping is false - // note: they use_spread_lumping cannot be true if lumping is false - if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); - filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); - } - - - - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) - { - Xpetra::IO::Write("filteredA.dat", *filteredA); - - //original filtered A and actual A - Xpetra::IO::Write("A.dat", *A); - RCP origFilteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); - BuildNew(*A, *G, lumping, dirichlet_threshold,*origFilteredA); - if (use_spread_lumping) ExperimentalLumping(*A, *origFilteredA, DdomAllowGrowthRate, DdomCap); - origFilteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); - Xpetra::IO::Write("origFilteredA.dat", *origFilteredA); - } + RCP fillCompleteParams(new ParameterList); + fillCompleteParams->set("No Nonlocal Changes", true); + + RCP filteredA; + if (use_root_stencil) { + filteredA = MatrixFactory::Build(A->getCrsGraph()); + filteredA->fillComplete(fillCompleteParams); + filteredA->resumeFill(); + BuildNewUsingRootStencil(*A, *G, dirichlet_threshold, currentLevel, *filteredA, use_spread_lumping, DdomAllowGrowthRate, DdomCap); + filteredA->fillComplete(fillCompleteParams); + + } else if (pL.get("filtered matrix: reuse graph")) { + filteredA = MatrixFactory::Build(A->getCrsGraph()); + filteredA->resumeFill(); + BuildReuse(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, *filteredA); + // only lump inside BuildReuse if lumping is true and use_spread_lumping is false + // note: they use_spread_lumping cannot be true if lumping is false + + if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); + filteredA->fillComplete(fillCompleteParams); + + } else { + filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); + BuildNew(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, *filteredA); + // only lump inside BuildNew if lumping is true and use_spread_lumping is false + // note: they use_spread_lumping cannot be true if lumping is false + if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); + filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); + } + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) { + Xpetra::IO::Write("filteredA.dat", *filteredA); - filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); + //original filtered A and actual A + Xpetra::IO::Write("A.dat", *A); + RCP origFilteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); + BuildNew(*A, *G, lumping, dirichlet_threshold, *origFilteredA); + if (use_spread_lumping) ExperimentalLumping(*A, *origFilteredA, DdomAllowGrowthRate, DdomCap); + origFilteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); + Xpetra::IO::Write("origFilteredA.dat", *origFilteredA); + } - if (pL.get("filtered matrix: reuse eigenvalue")) { - // Reuse max eigenvalue from A - // It is unclear what eigenvalue is the best for the smoothing, but we already may have - // the D^{-1}A estimate in A, may as well use it. - // NOTE: ML does that too - filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); - } + filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); - Set(currentLevel, "A", filteredA); + if (pL.get("filtered matrix: reuse eigenvalue")) { + // Reuse max eigenvalue from A + // It is unclear what eigenvalue is the best for the smoothing, but we already may have + // the D^{-1}A estimate in A, may as well use it. + // NOTE: ML does that too + filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); } + Set(currentLevel, "A", filteredA); +} + // Epetra's API allows direct access to row array. // Tpetra's API does not, providing only ArrayView // But in most situations we are currently interested in, it is safe to assume @@ -238,801 +226,794 @@ namespace MueLu { // replaceLocalValues() call which is quite expensive due to all the searches. //#define ASSUME_DIRECT_ACCESS_TO_ROW // See github issue 10883#issuecomment-1256676340 - // Both Epetra and Tpetra matrix-matrix multiply use the following trick: - // if an entry of the left matrix is zero, it does not compute or store the - // zero value. - // - // This trick allows us to bypass constructing a new matrix. Instead, we - // make a deep copy of the original one, and fill it in with zeros, which - // are ignored during the prolongator smoothing. - template - void FilteredAFactory:: - BuildReuse(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const { - using TST = typename Teuchos::ScalarTraits; - SC zero = TST::zero(); - - - size_t blkSize = A.GetFixedBlockSize(); - - ArrayView inds; - ArrayView valsA; +// Both Epetra and Tpetra matrix-matrix multiply use the following trick: +// if an entry of the left matrix is zero, it does not compute or store the +// zero value. +// +// This trick allows us to bypass constructing a new matrix. Instead, we +// make a deep copy of the original one, and fill it in with zeros, which +// are ignored during the prolongator smoothing. +template +void FilteredAFactory:: + BuildReuse(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const { + using TST = typename Teuchos::ScalarTraits; + SC zero = TST::zero(); + + size_t blkSize = A.GetFixedBlockSize(); + + ArrayView inds; + ArrayView valsA; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW - ArrayView vals; + ArrayView vals; #else - Array vals; + Array vals; #endif - Array filter( std::max(blkSize*G.GetImportMap()->getLocalNumElements(), - A.getColMap()->getLocalNumElements()), - 0); + Array filter(std::max(blkSize * G.GetImportMap()->getLocalNumElements(), + A.getColMap()->getLocalNumElements()), + 0); - size_t numGRows = G.GetNodeNumVertices(); - for (size_t i = 0; i < numGRows; i++) { - // Set up filtering array - ArrayView indsG = G.getNeighborVertices(i); - for (size_t j = 0; j < as(indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 1; + size_t numGRows = G.GetNodeNumVertices(); + for (size_t i = 0; i < numGRows; i++) { + // Set up filtering array + ArrayView indsG = G.getNeighborVertices(i); + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 1; - for (size_t k = 0; k < blkSize; k++) { - LO row = i*blkSize + k; + for (size_t k = 0; k < blkSize; k++) { + LO row = i * blkSize + k; - A.getLocalRowView(row, inds, valsA); + A.getLocalRowView(row, inds, valsA); - size_t nnz = inds.size(); - if (nnz == 0) - continue; + size_t nnz = inds.size(); + if (nnz == 0) + continue; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW - // Transform ArrayView into ArrayView - ArrayView vals1; - filteredA.getLocalRowView(row, inds, vals1); - vals = ArrayView(const_cast(vals1.getRawPtr()), nnz); + // Transform ArrayView into ArrayView + ArrayView vals1; + filteredA.getLocalRowView(row, inds, vals1); + vals = ArrayView(const_cast(vals1.getRawPtr()), nnz); - memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz*sizeof(SC)); + memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz * sizeof(SC)); #else - vals = Array(valsA); + vals = Array(valsA); #endif - SC ZERO = Teuchos::ScalarTraits::zero(); - // SC ONE = Teuchos::ScalarTraits::one(); - SC A_rowsum = ZERO, F_rowsum = ZERO; - for(LO l = 0; l < (LO)inds.size(); l++) - A_rowsum += valsA[l]; + SC ZERO = Teuchos::ScalarTraits::zero(); + // SC ONE = Teuchos::ScalarTraits::one(); + SC A_rowsum = ZERO, F_rowsum = ZERO; + for (LO l = 0; l < (LO)inds.size(); l++) + A_rowsum += valsA[l]; - if (lumping == false) { - for (size_t j = 0; j < nnz; j++) - if (!filter[inds[j]]) - vals[j] = zero; + if (lumping == false) { + for (size_t j = 0; j < nnz; j++) + if (!filter[inds[j]]) + vals[j] = zero; - } else { - LO diagIndex = -1; - SC diagExtra = zero; - - for (size_t j = 0; j < nnz; j++) { - if (filter[inds[j]]) { - if (inds[j] == row) { - // Remember diagonal position - diagIndex = j; - } - continue; + } else { + LO diagIndex = -1; + SC diagExtra = zero; + + for (size_t j = 0; j < nnz; j++) { + if (filter[inds[j]]) { + if (inds[j] == row) { + // Remember diagonal position + diagIndex = j; } + continue; + } - diagExtra += vals[j]; + diagExtra += vals[j]; - vals[j] = zero; - } + vals[j] = zero; + } - // Lump dropped entries - // NOTE - // * Does it make sense to lump for elasticity? - // * Is it different for diffusion and elasticity? - //SC diagA = ZERO; - if (diagIndex != -1) { - //diagA = vals[diagIndex]; - vals[diagIndex] += diagExtra; - if(dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) { - - // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA); - for(LO l = 0; l < (LO)nnz; l++) - F_rowsum += vals[l]; - // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum); - vals[diagIndex] = TST::one(); - } + // Lump dropped entries + // NOTE + // * Does it make sense to lump for elasticity? + // * Is it different for diffusion and elasticity? + //SC diagA = ZERO; + if (diagIndex != -1) { + //diagA = vals[diagIndex]; + vals[diagIndex] += diagExtra; + if (dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) { + // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA); + for (LO l = 0; l < (LO)nnz; l++) + F_rowsum += vals[l]; + // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum); + vals[diagIndex] = TST::one(); } - } + } #ifndef ASSUME_DIRECT_ACCESS_TO_ROW - // Because we used a column map in the construction of the matrix - // we can just use insertLocalValues here instead of insertGlobalValues - filteredA.replaceLocalValues(row, inds, vals); + // Because we used a column map in the construction of the matrix + // we can just use insertLocalValues here instead of insertGlobalValues + filteredA.replaceLocalValues(row, inds, vals); #endif - } - - // Reset filtering array - for (size_t j = 0; j < as (indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 0; } - } - - template - void FilteredAFactory:: - BuildNew(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const { - using TST = typename Teuchos::ScalarTraits; - SC zero = Teuchos::ScalarTraits::zero(); - - size_t blkSize = A.GetFixedBlockSize(); - ArrayView indsA; - ArrayView valsA; - Array inds; - Array vals; - - Array filter(blkSize * G.GetImportMap()->getLocalNumElements(), 0); - - size_t numGRows = G.GetNodeNumVertices(); - for (size_t i = 0; i < numGRows; i++) { - // Set up filtering array - ArrayView indsG = G.getNeighborVertices(i); - for (size_t j = 0; j < as(indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 1; - - for (size_t k = 0; k < blkSize; k++) { - LO row = i*blkSize + k; - - A.getLocalRowView(row, indsA, valsA); - - size_t nnz = indsA.size(); - if (nnz == 0) - continue; - - inds.resize(indsA.size()); - vals.resize(valsA.size()); + // Reset filtering array + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 0; + } +} + +template +void FilteredAFactory:: + BuildNew(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const { + using TST = typename Teuchos::ScalarTraits; + SC zero = Teuchos::ScalarTraits::zero(); + + size_t blkSize = A.GetFixedBlockSize(); + + ArrayView indsA; + ArrayView valsA; + Array inds; + Array vals; + + Array filter(blkSize * G.GetImportMap()->getLocalNumElements(), 0); + + size_t numGRows = G.GetNodeNumVertices(); + for (size_t i = 0; i < numGRows; i++) { + // Set up filtering array + ArrayView indsG = G.getNeighborVertices(i); + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 1; + + for (size_t k = 0; k < blkSize; k++) { + LO row = i * blkSize + k; + + A.getLocalRowView(row, indsA, valsA); + + size_t nnz = indsA.size(); + if (nnz == 0) + continue; + + inds.resize(indsA.size()); + vals.resize(valsA.size()); + + size_t numInds = 0; + if (lumping == false) { + for (size_t j = 0; j < nnz; j++) + if (filter[indsA[j]]) { + inds[numInds] = indsA[j]; + vals[numInds] = valsA[j]; + numInds++; + } - size_t numInds = 0; - if (lumping == false) { - for (size_t j = 0; j < nnz; j++) - if (filter[indsA[j]]) { - inds[numInds] = indsA[j]; - vals[numInds] = valsA[j]; - numInds++; - } + } else { + LO diagIndex = -1; + SC diagExtra = zero; - } else { - LO diagIndex = -1; - SC diagExtra = zero; - - for (size_t j = 0; j < nnz; j++) { - if (filter[indsA[j]]) { - inds[numInds] = indsA[j]; - vals[numInds] = valsA[j]; + for (size_t j = 0; j < nnz; j++) { + if (filter[indsA[j]]) { + inds[numInds] = indsA[j]; + vals[numInds] = valsA[j]; - // Remember diagonal position - if (inds[numInds] == row) - diagIndex = numInds; + // Remember diagonal position + if (inds[numInds] == row) + diagIndex = numInds; - numInds++; + numInds++; - } else { - diagExtra += valsA[j]; - } + } else { + diagExtra += valsA[j]; } + } - // Lump dropped entries - // NOTE - // * Does it make sense to lump for elasticity? - // * Is it different for diffusion and elasticity? - if (diagIndex != -1) { - vals[diagIndex] += diagExtra; - if(dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) { - // SC A_rowsum = ZERO, F_rowsum = ZERO; - // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA); - // for(LO l = 0; l < (LO)nnz; l++) - // F_rowsum += vals[l]; - // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum); - vals[diagIndex] = TST::one(); - } + // Lump dropped entries + // NOTE + // * Does it make sense to lump for elasticity? + // * Is it different for diffusion and elasticity? + if (diagIndex != -1) { + vals[diagIndex] += diagExtra; + if (dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) { + // SC A_rowsum = ZERO, F_rowsum = ZERO; + // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA); + // for(LO l = 0; l < (LO)nnz; l++) + // F_rowsum += vals[l]; + // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum); + vals[diagIndex] = TST::one(); } - } - inds.resize(numInds); - vals.resize(numInds); - - - - // Because we used a column map in the construction of the matrix - // we can just use insertLocalValues here instead of insertGlobalValues - filteredA.insertLocalValues(row, inds, vals); } + inds.resize(numInds); + vals.resize(numInds); - // Reset filtering array - for (size_t j = 0; j < as (indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 0; + // Because we used a column map in the construction of the matrix + // we can just use insertLocalValues here instead of insertGlobalValues + filteredA.insertLocalValues(row, inds, vals); } - } - template - void FilteredAFactory:: - BuildNewUsingRootStencil(const Matrix& A, const GraphBase& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const { - using TST = typename Teuchos::ScalarTraits; - using Teuchos::arcp_const_cast; - SC ZERO = Teuchos::ScalarTraits::zero(); - SC ONE = Teuchos::ScalarTraits::one(); - LO INVALID = Teuchos::OrdinalTraits::invalid(); - - size_t numNodes = G.GetNodeNumVertices(); - size_t blkSize = A.GetFixedBlockSize(); - size_t numRows = A.getMap()->getLocalNumElements(); - ArrayView indsA; - ArrayView valsA; - ArrayRCP rowptr; - ArrayRCP inds; - ArrayRCP vals_const; - ArrayRCP vals; - - // We're going to grab the vals array from filteredA and then blitz it with NAN as a placeholder for "entries that have - // not yey been touched." If I see an entry in the primary loop that has a zero, then I assume it has been nuked by - // it's symmetric pair, so I add it to the diagonal. If it has a NAN, process as normal. - RCP filteredAcrs = dynamic_cast(&filteredA)->getCrsMatrix(); - filteredAcrs->getAllValues(rowptr,inds,vals_const); - vals = arcp_const_cast(vals_const); - Array vals_dropped_indicator(vals.size(),false); - - // In the badAggNeighbors loop, if the entry has any number besides NAN, I add it to the diagExtra and then zero the guy. - RCP aggregates = Get< RCP > (currentLevel, "Aggregates"); - RCP amalgInfo = Get< RCP > (currentLevel, "UnAmalgamationInfo"); - LO numAggs = aggregates->GetNumAggregates(); - - // Check map nesting - RCP rowMap = A.getRowMap(); - RCP colMap = A.getColMap(); - bool goodMap = MueLu::Utilities::MapsAreNested(*rowMap, *colMap); - TEUCHOS_TEST_FOR_EXCEPTION(!goodMap, Exceptions::RuntimeError,"FilteredAFactory: Maps are not nested"); - - // Since we're going to symmetrize this - Array diagIndex(numRows,INVALID); - Array diagExtra(numRows,ZERO); - - // Lists of nodes in each aggregate - struct { - // GH: For now, copy everything to host until we properly set this factory to run device code - // Instead, we'll copy data into HostMirrors and run the algorithms on host, saving optimization for later. - typename Aggregates::LO_view ptr, nodes, unaggregated; - typename Aggregates::LO_view::HostMirror ptr_h, nodes_h, unaggregated_h; - } nodesInAgg; - aggregates->ComputeNodesInAggregate(nodesInAgg.ptr, nodesInAgg.nodes, nodesInAgg.unaggregated); - nodesInAgg.ptr_h = Kokkos::create_mirror_view(nodesInAgg.ptr); - nodesInAgg.nodes_h = Kokkos::create_mirror_view(nodesInAgg.nodes); - nodesInAgg.unaggregated_h = Kokkos::create_mirror_view(nodesInAgg.unaggregated); - Kokkos::deep_copy(nodesInAgg.ptr_h, nodesInAgg.ptr); - Kokkos::deep_copy(nodesInAgg.nodes_h, nodesInAgg.nodes); - Kokkos::deep_copy(nodesInAgg.unaggregated_h, nodesInAgg.unaggregated); - Teuchos::ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getData(0); // GH: this is needed on device, grab the pointer after we call ComputeNodesInAggregate - - LO graphNumCols = G.GetImportMap()->getLocalNumElements(); - Array filter(graphNumCols, false); - - // Loop over the unaggregated nodes. Blitz those rows. We don't want to smooth singletons. - for(LO i=0; i< (LO)nodesInAgg.unaggregated_h.extent(0); i++) { - for (LO m = 0; m < (LO)blkSize; m++) { - LO row = amalgInfo->ComputeLocalDOF(nodesInAgg.unaggregated_h(i),m); - if (row >= (LO)numRows) continue; - size_t index_start = rowptr[row]; - A.getLocalRowView(row, indsA, valsA); - for(LO k=0; k<(LO)indsA.size(); k++) { - if(row == indsA[k]) { - vals[index_start+k] = ONE; - diagIndex[row] = k; - } - else - vals[index_start+k] = ZERO; - } + // Reset filtering array + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 0; + } +} + +template +void FilteredAFactory:: + BuildNewUsingRootStencil(const Matrix& A, const GraphBase& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const { + using TST = typename Teuchos::ScalarTraits; + using Teuchos::arcp_const_cast; + SC ZERO = Teuchos::ScalarTraits::zero(); + SC ONE = Teuchos::ScalarTraits::one(); + LO INVALID = Teuchos::OrdinalTraits::invalid(); + + size_t numNodes = G.GetNodeNumVertices(); + size_t blkSize = A.GetFixedBlockSize(); + size_t numRows = A.getMap()->getLocalNumElements(); + ArrayView indsA; + ArrayView valsA; + ArrayRCP rowptr; + ArrayRCP inds; + ArrayRCP vals_const; + ArrayRCP vals; + + // We're going to grab the vals array from filteredA and then blitz it with NAN as a placeholder for "entries that have + // not yey been touched." If I see an entry in the primary loop that has a zero, then I assume it has been nuked by + // it's symmetric pair, so I add it to the diagonal. If it has a NAN, process as normal. + RCP filteredAcrs = dynamic_cast(&filteredA)->getCrsMatrix(); + filteredAcrs->getAllValues(rowptr, inds, vals_const); + vals = arcp_const_cast(vals_const); + Array vals_dropped_indicator(vals.size(), false); + + // In the badAggNeighbors loop, if the entry has any number besides NAN, I add it to the diagExtra and then zero the guy. + RCP aggregates = Get >(currentLevel, "Aggregates"); + RCP amalgInfo = Get >(currentLevel, "UnAmalgamationInfo"); + LO numAggs = aggregates->GetNumAggregates(); + + // Check map nesting + RCP rowMap = A.getRowMap(); + RCP colMap = A.getColMap(); + bool goodMap = MueLu::Utilities::MapsAreNested(*rowMap, *colMap); + TEUCHOS_TEST_FOR_EXCEPTION(!goodMap, Exceptions::RuntimeError, "FilteredAFactory: Maps are not nested"); + + // Since we're going to symmetrize this + Array diagIndex(numRows, INVALID); + Array diagExtra(numRows, ZERO); + + // Lists of nodes in each aggregate + struct { + // GH: For now, copy everything to host until we properly set this factory to run device code + // Instead, we'll copy data into HostMirrors and run the algorithms on host, saving optimization for later. + typename Aggregates::LO_view ptr, nodes, unaggregated; + typename Aggregates::LO_view::HostMirror ptr_h, nodes_h, unaggregated_h; + } nodesInAgg; + aggregates->ComputeNodesInAggregate(nodesInAgg.ptr, nodesInAgg.nodes, nodesInAgg.unaggregated); + nodesInAgg.ptr_h = Kokkos::create_mirror_view(nodesInAgg.ptr); + nodesInAgg.nodes_h = Kokkos::create_mirror_view(nodesInAgg.nodes); + nodesInAgg.unaggregated_h = Kokkos::create_mirror_view(nodesInAgg.unaggregated); + Kokkos::deep_copy(nodesInAgg.ptr_h, nodesInAgg.ptr); + Kokkos::deep_copy(nodesInAgg.nodes_h, nodesInAgg.nodes); + Kokkos::deep_copy(nodesInAgg.unaggregated_h, nodesInAgg.unaggregated); + Teuchos::ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getData(0); // GH: this is needed on device, grab the pointer after we call ComputeNodesInAggregate + + LO graphNumCols = G.GetImportMap()->getLocalNumElements(); + Array filter(graphNumCols, false); + + // Loop over the unaggregated nodes. Blitz those rows. We don't want to smooth singletons. + for (LO i = 0; i < (LO)nodesInAgg.unaggregated_h.extent(0); i++) { + for (LO m = 0; m < (LO)blkSize; m++) { + LO row = amalgInfo->ComputeLocalDOF(nodesInAgg.unaggregated_h(i), m); + if (row >= (LO)numRows) continue; + size_t index_start = rowptr[row]; + A.getLocalRowView(row, indsA, valsA); + for (LO k = 0; k < (LO)indsA.size(); k++) { + if (row == indsA[k]) { + vals[index_start + k] = ONE; + diagIndex[row] = k; + } else + vals[index_start + k] = ZERO; } - }//end nodesInAgg.unaggregated.extent(0); - - - std::vector badCount(numAggs,0); - - // Find the biggest aggregate size in *nodes* - LO maxAggSize=0; - for(LO i=0; i goodAggNeighbors(G.getLocalMaxNumRowEntries()); - std::vector badAggNeighbors(std::min(G.getLocalMaxNumRowEntries()*maxAggSize,numNodes)); - - size_t numNewDrops=0; - size_t numOldDrops=0; - size_t numFixedDiags=0; - size_t numSymDrops = 0; - - for(LO i=0; iIsRoot(nodesInAgg.nodes_h(k))) { - root_node = nodesInAgg.nodes_h(k); break; - } + } + } //end nodesInAgg.unaggregated.extent(0); + + std::vector badCount(numAggs, 0); + + // Find the biggest aggregate size in *nodes* + LO maxAggSize = 0; + for (LO i = 0; i < numAggs; i++) + maxAggSize = std::max(maxAggSize, nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i)); + + // Loop over all the aggregates + std::vector goodAggNeighbors(G.getLocalMaxNumRowEntries()); + std::vector badAggNeighbors(std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes)); + + size_t numNewDrops = 0; + size_t numOldDrops = 0; + size_t numFixedDiags = 0; + size_t numSymDrops = 0; + + for (LO i = 0; i < numAggs; i++) { + LO numNodesInAggregate = nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i); + if (numNodesInAggregate == 0) continue; + + // Find the root *node* + LO root_node = INVALID; + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + if (aggregates->IsRoot(nodesInAgg.nodes_h(k))) { + root_node = nodesInAgg.nodes_h(k); + break; } + } - TEUCHOS_TEST_FOR_EXCEPTION(root_node == INVALID, - Exceptions::RuntimeError,"MueLu::FilteredAFactory::BuildNewUsingRootStencil: Cannot find root node"); + TEUCHOS_TEST_FOR_EXCEPTION(root_node == INVALID, + Exceptions::RuntimeError, "MueLu::FilteredAFactory::BuildNewUsingRootStencil: Cannot find root node"); - // Find the list of "good" node neighbors (aka nodes which border the root node in the Graph G) - ArrayView goodNodeNeighbors = G.getNeighborVertices(root_node); + // Find the list of "good" node neighbors (aka nodes which border the root node in the Graph G) + ArrayView goodNodeNeighbors = G.getNeighborVertices(root_node); - // Now find the list of "good" aggregate neighbors (aka the aggregates neighbor the root node in the Graph G) - goodAggNeighbors.resize(0); - for(LO k=0; k<(LO) goodNodeNeighbors.size(); k++) { - goodAggNeighbors.push_back(vertex2AggId[goodNodeNeighbors[k]]); - } - sort_and_unique(goodAggNeighbors); - - // Now we get the list of "bad" aggregate neighbors (aka aggregates which border the - // root node in the original matrix A, which are not goodNodeNeighbors). Since we - // don't have an amalgamated version of the original matrix, we use the matrix directly - badAggNeighbors.resize(0); - for(LO j = 0; j < (LO)blkSize; j++) { - LO row = amalgInfo->ComputeLocalDOF(root_node,j); - if (row >= (LO)numRows) continue; - A.getLocalRowView(row, indsA, valsA); - for(LO k=0; k<(LO)indsA.size(); k++) { - if ( (indsA[k] < (LO)numRows) && (TST::magnitude(valsA[k]) != TST::magnitude(ZERO))) { - LO node = amalgInfo->ComputeLocalNode(indsA[k]); - LO agg = vertex2AggId[node]; - if(!std::binary_search(goodAggNeighbors.begin(),goodAggNeighbors.end(),agg)) - badAggNeighbors.push_back(agg); - } + // Now find the list of "good" aggregate neighbors (aka the aggregates neighbor the root node in the Graph G) + goodAggNeighbors.resize(0); + for (LO k = 0; k < (LO)goodNodeNeighbors.size(); k++) { + goodAggNeighbors.push_back(vertex2AggId[goodNodeNeighbors[k]]); + } + sort_and_unique(goodAggNeighbors); + + // Now we get the list of "bad" aggregate neighbors (aka aggregates which border the + // root node in the original matrix A, which are not goodNodeNeighbors). Since we + // don't have an amalgamated version of the original matrix, we use the matrix directly + badAggNeighbors.resize(0); + for (LO j = 0; j < (LO)blkSize; j++) { + LO row = amalgInfo->ComputeLocalDOF(root_node, j); + if (row >= (LO)numRows) continue; + A.getLocalRowView(row, indsA, valsA); + for (LO k = 0; k < (LO)indsA.size(); k++) { + if ((indsA[k] < (LO)numRows) && (TST::magnitude(valsA[k]) != TST::magnitude(ZERO))) { + LO node = amalgInfo->ComputeLocalNode(indsA[k]); + LO agg = vertex2AggId[node]; + if (!std::binary_search(goodAggNeighbors.begin(), goodAggNeighbors.end(), agg)) + badAggNeighbors.push_back(agg); } } - sort_and_unique(badAggNeighbors); + } + sort_and_unique(badAggNeighbors); - // Go through the filtered graph and count the number of connections to the badAggNeighbors - // if there are 2 or more of these connections, remove them from the bad list. + // Go through the filtered graph and count the number of connections to the badAggNeighbors + // if there are 2 or more of these connections, remove them from the bad list. - for (LO k=nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i+1); k++) { - ArrayView nodeNeighbors = G.getNeighborVertices(k); - for (LO kk=0; kk < nodeNeighbors.size(); kk++) { - if ( (vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs)) - (badCount[vertex2AggId[nodeNeighbors[kk]]])++; - } - } - std::vector reallyBadAggNeighbors(std::min(G.getLocalMaxNumRowEntries()*maxAggSize,numNodes)); - reallyBadAggNeighbors.resize(0); - for (LO k=0; k < (LO) badAggNeighbors.size(); k++) { - if (badCount[badAggNeighbors[k]] <= 1 ) reallyBadAggNeighbors.push_back(badAggNeighbors[k]); + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + ArrayView nodeNeighbors = G.getNeighborVertices(k); + for (LO kk = 0; kk < nodeNeighbors.size(); kk++) { + if ((vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs)) + (badCount[vertex2AggId[nodeNeighbors[kk]]])++; } - for (LO k=nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i+1); k++) { - ArrayView nodeNeighbors = G.getNeighborVertices(k); - for (LO kk=0; kk < nodeNeighbors.size(); kk++) { - if ( (vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs)) - badCount[vertex2AggId[nodeNeighbors[kk]]] = 0; - } + } + std::vector reallyBadAggNeighbors(std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes)); + reallyBadAggNeighbors.resize(0); + for (LO k = 0; k < (LO)badAggNeighbors.size(); k++) { + if (badCount[badAggNeighbors[k]] <= 1) reallyBadAggNeighbors.push_back(badAggNeighbors[k]); + } + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + ArrayView nodeNeighbors = G.getNeighborVertices(k); + for (LO kk = 0; kk < nodeNeighbors.size(); kk++) { + if ((vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs)) + badCount[vertex2AggId[nodeNeighbors[kk]]] = 0; } + } - // For each of the reallyBadAggNeighbors, we go and blitz their connections to dofs in this aggregate. - // We remove the INVALID marker when we do this so we don't wind up doubling this up later - for(LO b=0; b<(LO)reallyBadAggNeighbors.size(); b++) { - LO bad_agg = reallyBadAggNeighbors[b]; - for (LO k=nodesInAgg.ptr_h(bad_agg); k < nodesInAgg.ptr_h(bad_agg+1); k++) { - LO bad_node = nodesInAgg.nodes_h(k); - for(LO j = 0; j < (LO)blkSize; j++) { - LO bad_row = amalgInfo->ComputeLocalDOF(bad_node,j); - if (bad_row >= (LO)numRows) continue; - size_t index_start = rowptr[bad_row]; - A.getLocalRowView(bad_row, indsA, valsA); - for(LO l = 0; l < (LO)indsA.size(); l++) { - if(indsA[l] < (LO)numRows && vertex2AggId[amalgInfo->ComputeLocalNode(indsA[l])] == i && vals_dropped_indicator[index_start+l] == false) { - vals_dropped_indicator[index_start + l] = true; - vals[index_start + l] = ZERO; - diagExtra[bad_row] += valsA[l]; - numSymDrops++; - } + // For each of the reallyBadAggNeighbors, we go and blitz their connections to dofs in this aggregate. + // We remove the INVALID marker when we do this so we don't wind up doubling this up later + for (LO b = 0; b < (LO)reallyBadAggNeighbors.size(); b++) { + LO bad_agg = reallyBadAggNeighbors[b]; + for (LO k = nodesInAgg.ptr_h(bad_agg); k < nodesInAgg.ptr_h(bad_agg + 1); k++) { + LO bad_node = nodesInAgg.nodes_h(k); + for (LO j = 0; j < (LO)blkSize; j++) { + LO bad_row = amalgInfo->ComputeLocalDOF(bad_node, j); + if (bad_row >= (LO)numRows) continue; + size_t index_start = rowptr[bad_row]; + A.getLocalRowView(bad_row, indsA, valsA); + for (LO l = 0; l < (LO)indsA.size(); l++) { + if (indsA[l] < (LO)numRows && vertex2AggId[amalgInfo->ComputeLocalNode(indsA[l])] == i && vals_dropped_indicator[index_start + l] == false) { + vals_dropped_indicator[index_start + l] = true; + vals[index_start + l] = ZERO; + diagExtra[bad_row] += valsA[l]; + numSymDrops++; } } } } + } - // Now lets fill the rows in this aggregate and figure out the diagonal lumping - // We loop over each node in the aggregate and then over the neighbors of that node - - for(LO k=nodesInAgg.ptr_h(i); k indsG = G.getNeighborVertices(row_node); - for (size_t j = 0; j < as(indsG.size()); j++) - filter[indsG[j]]=true; - - for (LO m = 0; m < (LO)blkSize; m++) { - LO row = amalgInfo->ComputeLocalDOF(row_node,m); - if (row >= (LO)numRows) continue; - size_t index_start = rowptr[row]; - A.getLocalRowView(row, indsA, valsA); - - for(LO l = 0; l < (LO)indsA.size(); l++) { - int col_node = amalgInfo->ComputeLocalNode(indsA[l]); - bool is_good = filter[col_node]; - if (indsA[l] == row) { - diagIndex[row] = l; - vals[index_start + l] = valsA[l]; - continue; - } + // Now lets fill the rows in this aggregate and figure out the diagonal lumping + // We loop over each node in the aggregate and then over the neighbors of that node - // If we've already dropped this guy (from symmetry above), then continue onward - if(vals_dropped_indicator[index_start +l] == true) { - if(is_good) numOldDrops++; - else numNewDrops++; - continue; - } + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + LO row_node = nodesInAgg.nodes_h(k); + // Set up filtering array + ArrayView indsG = G.getNeighborVertices(row_node); + for (size_t j = 0; j < as(indsG.size()); j++) + filter[indsG[j]] = true; - // FIXME: I'm assuming vertex2AggId is only length of the rowmap, so - // we won'd do secondary dropping on off-processor neighbors - if(is_good && indsA[l] < (LO)numRows) { - int agg = vertex2AggId[col_node]; - if(std::binary_search(reallyBadAggNeighbors.begin(),reallyBadAggNeighbors.end(),agg)) - is_good = false; - } + for (LO m = 0; m < (LO)blkSize; m++) { + LO row = amalgInfo->ComputeLocalDOF(row_node, m); + if (row >= (LO)numRows) continue; + size_t index_start = rowptr[row]; + A.getLocalRowView(row, indsA, valsA); - if(is_good){ - vals[index_start+l] = valsA[l]; - } - else { - if(!filter[col_node]) numOldDrops++; - else numNewDrops++; - diagExtra[row] += valsA[l]; - vals[index_start+l]=ZERO; - vals_dropped_indicator[index_start+l]=true; - } - } //end for l "indsA.size()" loop - - }//end m "blkSize" loop - - // Clear filtering array - for (size_t j = 0; j < as(indsG.size()); j++) - filter[indsG[j]]=false; - - }// end k loop over number of nodes in this agg - }//end i loop over numAggs - - if (!use_spread_lumping) { - // Now do the diagonal modifications in one, final pass - for(LO row=0; row <(LO)numRows; row++) { - if (diagIndex[row] != INVALID) { - size_t index_start = rowptr[row]; - size_t diagIndexInMatrix = index_start + diagIndex[row]; - // printf("diag_vals pre update = %8.2e\n", vals[diagIndex] ); - vals[diagIndexInMatrix] += diagExtra[row]; - SC A_rowsum=ZERO, A_absrowsum = ZERO, F_rowsum = ZERO; - - - if( (dirichletThresh >= 0.0 && TST::real(vals[diagIndexInMatrix]) <= dirichletThresh) || TST::real(vals[diagIndexInMatrix]) == ZERO) { - - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING>0) { - A.getLocalRowView(row, indsA, valsA); - // SC diagA = valsA[diagIndex[row]]; - // printf("WARNING: row %d (diagIndex=%d) diag(Afiltered) = %8.2e diag(A)=%8.2e numInds = %d\n",row,diagIndex[row],vals[diagIndexInMatrix],diagA,(LO)indsA.size()); - - for(LO l = 0; l < (LO)indsA.size(); l++) { - A_rowsum += valsA[l]; - A_absrowsum+=std::abs(valsA[l]); - } - for(LO l = 0; l < (LO)indsA.size(); l++) - F_rowsum += vals[index_start+l]; - // printf(" : A rowsum = %8.2e |A| rowsum = %8.2e rowsum = %8.2e\n",A_rowsum,A_absrowsum,F_rowsum); - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING > 1){ - // printf(" Avals ="); - // for(LO l = 0; l < (LO)indsA.size(); l++) - // printf("%d(%8.2e)[%d] ",(LO)indsA[l],valsA[l],(LO)l); - // printf("\n"); - // printf(" Fvals ="); - // for(LO l = 0; l < (LO)indsA.size(); l++) - // if(vals[index_start+l] != ZERO) - // printf("%d(%8.2e)[%d] ",(LO)indsA[l],vals[index_start+l],(LO)l); - } - } - // Don't know what to do, so blitz the row and dump a one on the diagonal - for(size_t l=rowptr[row]; lComputeLocalNode(indsA[l]); + bool is_good = filter[col_node]; + if (indsA[l] == row) { + diagIndex[row] = l; + vals[index_start + l] = valsA[l]; + continue; } - } - else { - GetOStream(Runtime0)<<"WARNING: Row "<getComm(), numNewDrops, g_newDrops); - MueLu_sumAll(A.getRowMap()->getComm(), numOldDrops, g_oldDrops); - MueLu_sumAll(A.getRowMap()->getComm(), numFixedDiags, g_fixedDiags); - GetOStream(Runtime0)<< "Filtering out "< - void FilteredAFactory:: - ExperimentalLumping(const Matrix& A, Matrix& filteredA, double irho, double irho2) const { - using TST = typename Teuchos::ScalarTraits; - SC zero = TST::zero(); - SC one = TST::one(); - - ArrayView inds; - ArrayView vals; - ArrayView finds; - ArrayView fvals; - - SC PosOffSum, NegOffSum, PosOffDropSum, NegOffDropSum; - SC diag, gamma, alpha; - LO NumPosKept, NumNegKept; - - SC noLumpDdom; - SC numer,denom; - SC PosFilteredSum, NegFilteredSum; - SC Target; - - SC rho = as(irho); - SC rho2 = as(irho2); - - for (LO row = 0; row < (LO) A.getRowMap()->getLocalNumElements(); row++) { - noLumpDdom = as(10000.0); // only used if diagonal is zero - // the whole idea sort of breaks down - // when the diagonal is zero. In particular, - // the old diag dominance ratio is infinity - // ... so what do we want for the new ddom - // ratio. Do we want to allow the diagonal - // to go negative, just to have a better ddom - // ratio? This current choice essentially - // changes 'Target' to a large number - // meaning that we will allow the new - // ddom number to be fairly large (because - // the old one was infinity) - - ArrayView tvals; - A.getLocalRowView(row, inds, vals); - size_t nnz = inds.size(); - if (nnz == 0) continue; - filteredA.getLocalRowView(row, finds, tvals);//assume 2 getLocalRowView()s - // have things in same order - fvals = ArrayView(const_cast(tvals.getRawPtr()), nnz); - - LO diagIndex = -1, fdiagIndex = -1; - - PosOffSum=zero; NegOffSum=zero; PosOffDropSum=zero; NegOffDropSum=zero; - diag=zero; NumPosKept=0; NumNegKept=0; - - // first record diagonal, offdiagonal sums and off diag dropped sums - for (size_t j = 0; j < nnz; j++) { - if (inds[j] == row) { - diagIndex = j; - diag = vals[j]; + // FIXME: I'm assuming vertex2AggId is only length of the rowmap, so + // we won'd do secondary dropping on off-processor neighbors + if (is_good && indsA[l] < (LO)numRows) { + int agg = vertex2AggId[col_node]; + if (std::binary_search(reallyBadAggNeighbors.begin(), reallyBadAggNeighbors.end(), agg)) + is_good = false; } - else { // offdiagonal - if (TST::real(vals[j]) > TST::real(zero) ) PosOffSum += vals[j]; - else NegOffSum += vals[j]; + + if (is_good) { + vals[index_start + l] = valsA[l]; + } else { + if (!filter[col_node]) + numOldDrops++; + else + numNewDrops++; + diagExtra[row] += valsA[l]; + vals[index_start + l] = ZERO; + vals_dropped_indicator[index_start + l] = true; } - } - PosOffDropSum = PosOffSum; - NegOffDropSum = NegOffSum; - NumPosKept = 0; - NumNegKept = 0; - LO j = 0; - for (size_t jj = 0; jj < (size_t) finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if (finds[jj] == row) fdiagIndex = jj; - else { - if (TST::real(vals[j]) > TST::real(zero) ) { - PosOffDropSum -= fvals[jj]; - if (TST::real(fvals[jj]) != TST::real(zero) ) NumPosKept++; + } //end for l "indsA.size()" loop + + } //end m "blkSize" loop + + // Clear filtering array + for (size_t j = 0; j < as(indsG.size()); j++) + filter[indsG[j]] = false; + + } // end k loop over number of nodes in this agg + } //end i loop over numAggs + + if (!use_spread_lumping) { + // Now do the diagonal modifications in one, final pass + for (LO row = 0; row < (LO)numRows; row++) { + if (diagIndex[row] != INVALID) { + size_t index_start = rowptr[row]; + size_t diagIndexInMatrix = index_start + diagIndex[row]; + // printf("diag_vals pre update = %8.2e\n", vals[diagIndex] ); + vals[diagIndexInMatrix] += diagExtra[row]; + SC A_rowsum = ZERO, A_absrowsum = ZERO, F_rowsum = ZERO; + + if ((dirichletThresh >= 0.0 && TST::real(vals[diagIndexInMatrix]) <= dirichletThresh) || TST::real(vals[diagIndexInMatrix]) == ZERO) { + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING > 0) { + A.getLocalRowView(row, indsA, valsA); + // SC diagA = valsA[diagIndex[row]]; + // printf("WARNING: row %d (diagIndex=%d) diag(Afiltered) = %8.2e diag(A)=%8.2e numInds = %d\n",row,diagIndex[row],vals[diagIndexInMatrix],diagA,(LO)indsA.size()); + + for (LO l = 0; l < (LO)indsA.size(); l++) { + A_rowsum += valsA[l]; + A_absrowsum += std::abs(valsA[l]); } - else { - NegOffDropSum -= fvals[jj]; - if (TST::real(fvals[jj]) != TST::real(zero) ) NumNegKept++; + for (LO l = 0; l < (LO)indsA.size(); l++) + F_rowsum += vals[index_start + l]; + // printf(" : A rowsum = %8.2e |A| rowsum = %8.2e rowsum = %8.2e\n",A_rowsum,A_absrowsum,F_rowsum); + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING > 1) { + // printf(" Avals ="); + // for(LO l = 0; l < (LO)indsA.size(); l++) + // printf("%d(%8.2e)[%d] ",(LO)indsA[l],valsA[l],(LO)l); + // printf("\n"); + // printf(" Fvals ="); + // for(LO l = 0; l < (LO)indsA.size(); l++) + // if(vals[index_start+l] != ZERO) + // printf("%d(%8.2e)[%d] ",(LO)indsA[l],vals[index_start+l],(LO)l); } } + // Don't know what to do, so blitz the row and dump a one on the diagonal + for (size_t l = rowptr[row]; l < rowptr[row + 1]; l++) { + vals[l] = ZERO; + } + vals[diagIndexInMatrix] = TST::one(); + numFixedDiags++; } + } else { + GetOStream(Runtime0) << "WARNING: Row " << row << " has no diagonal " << std::endl; + } + } /*end row "numRows" loop"*/ + } - // measure of diagonal dominance if no lumping is done. - if (TST::magnitude(diag) != TST::magnitude(zero) ) - noLumpDdom = (PosOffSum - NegOffSum)/diag; - - // Target is an acceptable diagonal dominance ratio - // which should really be larger than 1 - - Target = rho*noLumpDdom; - if (TST::magnitude(Target) <= TST::magnitude(rho)) Target = rho2; + // Copy all the goop out + for (LO row = 0; row < (LO)numRows; row++) { + filteredA.replaceLocalValues(row, inds(rowptr[row], rowptr[row + 1] - rowptr[row]), vals(rowptr[row], rowptr[row + 1] - rowptr[row])); + } + if (use_spread_lumping) ExperimentalLumping(A, filteredA, DdomAllowGrowthRate, DdomCap); - PosFilteredSum = PosOffSum - PosOffDropSum; - NegFilteredSum = NegOffSum - NegOffDropSum; - // Note: PosNotFilterdSum is not equal to the sum of the - // positive entries after lumping. It just reflects the - // pos offdiag sum of the filtered matrix before lumping - // and does not account for negative dropped terms lumped - // to the positive kept terms. + size_t g_newDrops = 0, g_oldDrops = 0, g_fixedDiags = 0; - // dropped positive offdiags always go to the diagonal as these - // always improve diagonal dominance. + MueLu_sumAll(A.getRowMap()->getComm(), numNewDrops, g_newDrops); + MueLu_sumAll(A.getRowMap()->getComm(), numOldDrops, g_oldDrops); + MueLu_sumAll(A.getRowMap()->getComm(), numFixedDiags, g_fixedDiags); + GetOStream(Runtime0) << "Filtering out " << g_newDrops << " edges, in addition to the " << g_oldDrops << " edges dropped earlier" << std::endl; + GetOStream(Runtime0) << "Fixing " << g_fixedDiags << " zero diagonal values" << std::endl; +} - diag += PosOffDropSum; +// fancy lumping trying to not just move everything to the diagonal but to also consider moving +// some lumping to the kept off-diagonals. We basically aim to not increase the diagonal +// dominance in a row. In particular, the goal is that row i satisfies +// +// lumpedDiagDomMeasure_i <= rho2 +// or +// lumpedDiagDomMeasure <= rho*unlumpedDiagDomMeasure +// +// NOTE: THIS CODE assumes direct access to a row. See comments above concerning +// ASSUME_DIRECT_ACCESS_TO_ROW +// +template +void FilteredAFactory:: + ExperimentalLumping(const Matrix& A, Matrix& filteredA, double irho, double irho2) const { + using TST = typename Teuchos::ScalarTraits; + SC zero = TST::zero(); + SC one = TST::one(); + + ArrayView inds; + ArrayView vals; + ArrayView finds; + ArrayView fvals; + + SC PosOffSum, NegOffSum, PosOffDropSum, NegOffDropSum; + SC diag, gamma, alpha; + LO NumPosKept, NumNegKept; + + SC noLumpDdom; + SC numer, denom; + SC PosFilteredSum, NegFilteredSum; + SC Target; + + SC rho = as(irho); + SC rho2 = as(irho2); + + for (LO row = 0; row < (LO)A.getRowMap()->getLocalNumElements(); row++) { + noLumpDdom = as(10000.0); // only used if diagonal is zero + // the whole idea sort of breaks down + // when the diagonal is zero. In particular, + // the old diag dominance ratio is infinity + // ... so what do we want for the new ddom + // ratio. Do we want to allow the diagonal + // to go negative, just to have a better ddom + // ratio? This current choice essentially + // changes 'Target' to a large number + // meaning that we will allow the new + // ddom number to be fairly large (because + // the old one was infinity) + + ArrayView tvals; + A.getLocalRowView(row, inds, vals); + size_t nnz = inds.size(); + if (nnz == 0) continue; + filteredA.getLocalRowView(row, finds, tvals); //assume 2 getLocalRowView()s + // have things in same order + fvals = ArrayView(const_cast(tvals.getRawPtr()), nnz); + + LO diagIndex = -1, fdiagIndex = -1; + + PosOffSum = zero; + NegOffSum = zero; + PosOffDropSum = zero; + NegOffDropSum = zero; + diag = zero; + NumPosKept = 0; + NumNegKept = 0; + + // first record diagonal, offdiagonal sums and off diag dropped sums + for (size_t j = 0; j < nnz; j++) { + if (inds[j] == row) { + diagIndex = j; + diag = vals[j]; + } else { // offdiagonal + if (TST::real(vals[j]) > TST::real(zero)) + PosOffSum += vals[j]; + else + NegOffSum += vals[j]; + } + } + PosOffDropSum = PosOffSum; + NegOffDropSum = NegOffSum; + NumPosKept = 0; + NumNegKept = 0; + LO j = 0; + for (size_t jj = 0; jj < (size_t)finds.size(); jj++) { + while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if (finds[jj] == row) + fdiagIndex = jj; + else { + if (TST::real(vals[j]) > TST::real(zero)) { + PosOffDropSum -= fvals[jj]; + if (TST::real(fvals[jj]) != TST::real(zero)) NumPosKept++; + } else { + NegOffDropSum -= fvals[jj]; + if (TST::real(fvals[jj]) != TST::real(zero)) NumNegKept++; + } + } + } - // now lets work on lumping dropped negative offdiags - gamma = -NegOffDropSum - PosFilteredSum; + // measure of diagonal dominance if no lumping is done. + if (TST::magnitude(diag) != TST::magnitude(zero)) + noLumpDdom = (PosOffSum - NegOffSum) / diag; + + // Target is an acceptable diagonal dominance ratio + // which should really be larger than 1 + + Target = rho * noLumpDdom; + if (TST::magnitude(Target) <= TST::magnitude(rho)) Target = rho2; + + PosFilteredSum = PosOffSum - PosOffDropSum; + NegFilteredSum = NegOffSum - NegOffDropSum; + // Note: PosNotFilterdSum is not equal to the sum of the + // positive entries after lumping. It just reflects the + // pos offdiag sum of the filtered matrix before lumping + // and does not account for negative dropped terms lumped + // to the positive kept terms. + + // dropped positive offdiags always go to the diagonal as these + // always improve diagonal dominance. + + diag += PosOffDropSum; + + // now lets work on lumping dropped negative offdiags + gamma = -NegOffDropSum - PosFilteredSum; + + if (TST::real(gamma) < TST::real(zero)) { + // the total amount of negative dropping is less than PosFilteredSum, + // so we can distribute this dropping to pos offdiags. After lumping + // the sum of the pos offdiags is just -gamma so we just assign pos + // offdiags proportional to vals[j]/PosFilteredSum + // Note: in this case the diagonal is not changed as all lumping + // occurs to the pos offdiags + + if (fdiagIndex != -1) fvals[fdiagIndex] = diag; + j = 0; + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((j != diagIndex) && (TST::real(vals[j]) > TST::real(zero)) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero))) + fvals[jj] = -gamma * (vals[j] / PosFilteredSum); + } + } else { + // So there are more negative values that need lumping than kept + // positive offdiags. Meaning there is enough negative lumping to + // completely clear out all pos offdiags. If we lump all negs + // to pos off diags, we'd actually change them to negative. We + // only do this if we are desperate. Otherwise, we'll clear out + // all the positive kept offdiags and try to lump the rest + // somewhere else. We defer the clearing of pos off diags + // to see first if we are going to be desperate. + + bool flipPosOffDiagsToNeg = false; + + // Even if we lumped by zeroing positive offdiags, we are still + // going to have more lumping to distribute to either + // 1) the diagonal + // 2) the kept negative offdiags + // 3) the kept positive offdiags (desperate) + + // Let's first considering lumping the remaining neg offdiag stuff + // to the diagonal ... if this does not increase the diagonal + // dominance ratio too much (given by rho). + + if ((TST::real(diag) > TST::real(gamma)) && + (TST::real((-NegFilteredSum) / (diag - gamma)) <= TST::real(Target))) { + // 1st if term above insures that resulting diagonal (=diag-gamma) + // is positive. . The left side of 2nd term is the diagonal dominance + // if we lump the remaining stuff (gamma) to the diagonal. Recall, + // that now there are no positive off-diags so the sum(abs(offdiags)) + // is just the negative of NegFilteredSum + + if (fdiagIndex != -1) fvals[fdiagIndex] = diag - gamma; + } else if (NumNegKept > 0) { + // need to do some lumping to neg offdiags to avoid a large + // increase in diagonal dominance. We first compute alpha + // which measures how much gamma should go to the + // negative offdiags. The rest will go to the diagonal + + numer = -NegFilteredSum - Target * (diag - gamma); + denom = gamma * (Target - TST::one()); + + // make sure that alpha is between 0 and 1 ... and that it doesn't + // result in a sign flip + // Note: when alpha is set to 1, then the diagonal is not modified + // and the negative offdiags just get shifted from those + // removed and those kept, meaning that the digaonal dominance + // should be the same as before + // + // can alpha be negative? It looks like denom should always + // be positive. The 'if' statement above + // Normally, diag-gamma should also be positive (but if it + // is negative then numer is guaranteed to be positve). + // look at the 'if' above, + // if (( TST::real(diag) > TST::real(gamma)) && + // ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) { + // + // Should guarantee that numer is positive. This is obvious when + // the second condition is false. When it is the first condition that + // is false, it follows that the two indiviudal terms in the numer + // formula must be positive. + + if (TST::magnitude(denom) < TST::magnitude(numer)) + alpha = TST::one(); + else + alpha = numer / denom; + if (TST::real(alpha) < TST::real(zero)) alpha = zero; + if (TST::real(diag) < TST::real((one - alpha) * gamma)) alpha = TST::one(); + + // first change the diagonal + + if (fdiagIndex != -1) fvals[fdiagIndex] = diag - (one - alpha) * gamma; + + // after lumping the sum of neg offdiags will be NegFilteredSum + // + alpha*gamma. That is the remaining negative entries altered + // by the percent (=alpha) of stuff (=gamma) that needs to be + // lumped after taking into account lumping to pos offdiags + + // Do this by assigning a fraction of NegFilteredSum+alpha*gamma + // proportional to vals[j]/NegFilteredSum + + SC temp = (NegFilteredSum + alpha * gamma) / NegFilteredSum; + j = 0; + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((jj != fdiagIndex) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) && + (TST::real(vals[j]) < TST::real(zero))) + fvals[jj] = temp * vals[j]; + } + } else { // desperate case + // So we don't have any kept negative offdiags ... - if (TST::real(gamma) < TST::real(zero) ) { - // the total amount of negative dropping is less than PosFilteredSum, - // so we can distribute this dropping to pos offdiags. After lumping - // the sum of the pos offdiags is just -gamma so we just assign pos - // offdiags proportional to vals[j]/PosFilteredSum - // Note: in this case the diagonal is not changed as all lumping - // occurs to the pos offdiags + if (NumPosKept > 0) { + // luckily we can push this stuff to the pos offdiags + // which now makes them negative + flipPosOffDiagsToNeg = true; - if (fdiagIndex != -1) fvals[fdiagIndex] = diag; j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ((j != diagIndex)&&(TST::real(vals[j]) > TST::real(zero) ) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero))) - fvals[jj] = -gamma*(vals[j]/PosFilteredSum); - + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((j != diagIndex) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) && + (TST::real(vals[j]) > TST::real(zero))) + fvals[jj] = -gamma / ((SC)NumPosKept); } } - else { - // So there are more negative values that need lumping than kept - // positive offdiags. Meaning there is enough negative lumping to - // completely clear out all pos offdiags. If we lump all negs - // to pos off diags, we'd actually change them to negative. We - // only do this if we are desperate. Otherwise, we'll clear out - // all the positive kept offdiags and try to lump the rest - // somewhere else. We defer the clearing of pos off diags - // to see first if we are going to be desperate. - - bool flipPosOffDiagsToNeg = false; - - // Even if we lumped by zeroing positive offdiags, we are still - // going to have more lumping to distribute to either - // 1) the diagonal - // 2) the kept negative offdiags - // 3) the kept positive offdiags (desperate) - - // Let's first considering lumping the remaining neg offdiag stuff - // to the diagonal ... if this does not increase the diagonal - // dominance ratio too much (given by rho). - - if (( TST::real(diag) > TST::real(gamma)) && - ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) { - // 1st if term above insures that resulting diagonal (=diag-gamma) - // is positive. . The left side of 2nd term is the diagonal dominance - // if we lump the remaining stuff (gamma) to the diagonal. Recall, - // that now there are no positive off-diags so the sum(abs(offdiags)) - // is just the negative of NegFilteredSum - - if (fdiagIndex != -1) fvals[fdiagIndex] = diag - gamma; - } - else if (NumNegKept > 0) { - // need to do some lumping to neg offdiags to avoid a large - // increase in diagonal dominance. We first compute alpha - // which measures how much gamma should go to the - // negative offdiags. The rest will go to the diagonal - - numer = -NegFilteredSum - Target*(diag-gamma); - denom = gamma*(Target - TST::one()); - - // make sure that alpha is between 0 and 1 ... and that it doesn't - // result in a sign flip - // Note: when alpha is set to 1, then the diagonal is not modified - // and the negative offdiags just get shifted from those - // removed and those kept, meaning that the digaonal dominance - // should be the same as before - // - // can alpha be negative? It looks like denom should always - // be positive. The 'if' statement above - // Normally, diag-gamma should also be positive (but if it - // is negative then numer is guaranteed to be positve). - // look at the 'if' above, - // if (( TST::real(diag) > TST::real(gamma)) && - // ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) { - // - // Should guarantee that numer is positive. This is obvious when - // the second condition is false. When it is the first condition that - // is false, it follows that the two indiviudal terms in the numer - // formula must be positive. - - if ( TST::magnitude(denom) < TST::magnitude(numer) ) alpha = TST::one(); - else alpha = numer/denom; - if ( TST::real(alpha) < TST::real(zero)) alpha = zero; - if ( TST::real(diag) < TST::real((one-alpha)*gamma) ) alpha = TST::one(); - - // first change the diagonal - - if (fdiagIndex != -1) fvals[fdiagIndex] = diag - (one-alpha)*gamma; - - // after lumping the sum of neg offdiags will be NegFilteredSum - // + alpha*gamma. That is the remaining negative entries altered - // by the percent (=alpha) of stuff (=gamma) that needs to be - // lumped after taking into account lumping to pos offdiags - - // Do this by assigning a fraction of NegFilteredSum+alpha*gamma - // proportional to vals[j]/NegFilteredSum - - SC temp = (NegFilteredSum+alpha*gamma)/NegFilteredSum; - j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ( (jj != fdiagIndex)&&(TST::magnitude(fvals[jj]) != TST::magnitude(zero) ) && - ( TST::real(vals[j]) < TST::real(zero) ) ) - fvals[jj] = temp*vals[j]; - } - } - else { // desperate case - // So we don't have any kept negative offdiags ... - - if (NumPosKept > 0) { - // luckily we can push this stuff to the pos offdiags - // which now makes them negative - flipPosOffDiagsToNeg = true; - - j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ( (j != diagIndex)&&(TST::magnitude(fvals[jj]) != TST::magnitude(zero) ) && - (TST::real(vals[j]) > TST::real(zero) )) - fvals[jj] = -gamma/( (SC) NumPosKept); - } - } - // else abandon rowsum preservation and do nothing - - } - if (!flipPosOffDiagsToNeg) { // not desperate so we now zero out - // all pos terms including some - // not originally filtered - // but zeroed due to lumping - j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ((jj != fdiagIndex)&& (TST::real(vals[j]) > TST::real(zero))) fvals[jj] = zero; - } - } - } // positive gamma else - - } //loop over all rows - } + // else abandon rowsum preservation and do nothing + } + if (!flipPosOffDiagsToNeg) { // not desperate so we now zero out + // all pos terms including some + // not originally filtered + // but zeroed due to lumping + j = 0; + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((jj != fdiagIndex) && (TST::real(vals[j]) > TST::real(zero))) fvals[jj] = zero; + } + } + } // positive gamma else + } //loop over all rows +} -} //namespace MueLu +} //namespace MueLu -#endif // MUELU_FILTEREDAFACTORY_DEF_HPP +#endif // MUELU_FILTEREDAFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp index 32b5389b0e1d..9940accf0925 100644 --- a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp @@ -47,7 +47,6 @@ #ifndef PACKAGES_MUELU_SRC_MISC_MUELU_FINELEVELINPUTDATAFACTORY_DECL_HPP_ #define PACKAGES_MUELU_SRC_MISC_MUELU_FINELEVELINPUTDATAFACTORY_DECL_HPP_ - #include #include "MueLu_ConfigDefs.hpp" @@ -62,63 +61,61 @@ #include "MueLu_Graph_fwd.hpp" namespace MueLuTests { - template - class FineLevelInputDataFactoryTester; +template +class FineLevelInputDataFactoryTester; } namespace MueLu { - /*! +/*! @class FineLevelInputData class. @brief Factory for piping in input data from the finest level into the MueLu data dependency system */ - template - class FineLevelInputDataFactory : public SingleLevelFactoryBase { - friend class MueLuTests::FineLevelInputDataFactoryTester; +template +class FineLevelInputDataFactory : public SingleLevelFactoryBase { + friend class MueLuTests::FineLevelInputDataFactoryTester; #undef MUELU_FINELEVELINPUTDATAFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - FineLevelInputDataFactory() { } + FineLevelInputDataFactory() {} - //! Destructor. - virtual ~FineLevelInputDataFactory() { } + //! Destructor. + virtual ~FineLevelInputDataFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! + /*! @brief Build method. */ - void Build(Level& currentLevel) const; - - //@} - private: + void Build(Level& currentLevel) const; - void test() const { std::cout << "TEST" << std::endl; } + //@} + private: + void test() const { std::cout << "TEST" << std::endl; } - }; //class FineLevelInputDataFactory +}; //class FineLevelInputDataFactory -} //namespace MueLu +} //namespace MueLu #define MUELU_FINELEVELINPUTDATAFACTORY_SHORT diff --git a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp index a54c59a6bf1f..c9db978a2ba5 100644 --- a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp @@ -55,131 +55,120 @@ namespace MueLu { - template - RCP FineLevelInputDataFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP FineLevelInputDataFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - // Variable name (e.g. A or P or Coordinates) - validParamList->set< std::string >("Variable", std::string("A"), "Variable name on all coarse levels (except the finest level)."); + // Variable name (e.g. A or P or Coordinates) + validParamList->set("Variable", std::string("A"), "Variable name on all coarse levels (except the finest level)."); - // Names of generating factories (on finest level and coarse levels) - validParamList->set< RCP >("Fine level factory", Teuchos::null, "Generating factory of the fine level variable"); - validParamList->set< RCP >("Coarse level factory", Teuchos::null, "Generating factory for data on all coarse levels (except the finest)"); + // Names of generating factories (on finest level and coarse levels) + validParamList->set >("Fine level factory", Teuchos::null, "Generating factory of the fine level variable"); + validParamList->set >("Coarse level factory", Teuchos::null, "Generating factory for data on all coarse levels (except the finest)"); - // Type of variable (see source code for a complete list of all available types) - validParamList->set ("Variable type", std::string("Matrix"), "Type of variable"); + // Type of variable (see source code for a complete list of all available types) + validParamList->set("Variable type", std::string("Matrix"), "Type of variable"); - return validParamList; - } - - template - void FineLevelInputDataFactory::DeclareInput(Level& currentLevel) const { - - const ParameterList & pL = GetParameterList(); - - std::string variableName = ""; - if(pL.isParameter("Variable")) - variableName = pL.get("Variable"); + return validParamList; +} - std::string factoryName = "NoFactory"; - if (currentLevel.GetLevelID() == 0) { - factoryName = "Fine level factory"; - } else { - factoryName = "Coarse level factory"; - } +template +void FineLevelInputDataFactory::DeclareInput(Level& currentLevel) const { + const ParameterList& pL = GetParameterList(); - TEUCHOS_TEST_FOR_EXCEPTION(variableName == "", MueLu::Exceptions::RuntimeError, "FineLevelInputDataFactory: no variable name provided. Please set \'Variable\' parameter in your input deck."); + std::string variableName = ""; + if (pL.isParameter("Variable")) + variableName = pL.get("Variable"); - // data must be specified in factory! (not in factory manager) - RCP fact = GetFactory(factoryName); - currentLevel.DeclareInput(variableName, fact.get(), this); + std::string factoryName = "NoFactory"; + if (currentLevel.GetLevelID() == 0) { + factoryName = "Fine level factory"; + } else { + factoryName = "Coarse level factory"; } - template - void FineLevelInputDataFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "InputUserData", currentLevel); + TEUCHOS_TEST_FOR_EXCEPTION(variableName == "", MueLu::Exceptions::RuntimeError, "FineLevelInputDataFactory: no variable name provided. Please set \'Variable\' parameter in your input deck."); - const ParameterList& pL = GetParameterList(); + // data must be specified in factory! (not in factory manager) + RCP fact = GetFactory(factoryName); + currentLevel.DeclareInput(variableName, fact.get(), this); +} - std::string variableName = ""; - if (pL.isParameter("Variable")) - variableName = pL.get("Variable"); +template +void FineLevelInputDataFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "InputUserData", currentLevel); - std::string variableType = ""; - if(pL.isParameter("Variable type")) - variableType = pL.get("Variable type"); + const ParameterList& pL = GetParameterList(); - std::string factoryName = "NoFactory"; - if (currentLevel.GetLevelID() == 0) { - factoryName = "Fine level factory"; - } else { - factoryName = "Coarse level factory"; - } - RCP fact = GetFactory(factoryName); + std::string variableName = ""; + if (pL.isParameter("Variable")) + variableName = pL.get("Variable"); - GetOStream(Debug) << "Use " << variableName << " of type " << variableType << " from " << factoryName << "(" << fact.get() << ")" << std::endl; + std::string variableType = ""; + if (pL.isParameter("Variable type")) + variableType = pL.get("Variable type"); - // check data type - //std::string strType = currentLevel.GetTypeName(variableName, fact.get()); - if (variableType == "int") { - int data = currentLevel.Get(variableName, fact.get()); + std::string factoryName = "NoFactory"; + if (currentLevel.GetLevelID() == 0) { + factoryName = "Fine level factory"; + } else { + factoryName = "Coarse level factory"; + } + RCP fact = GetFactory(factoryName); + + GetOStream(Debug) << "Use " << variableName << " of type " << variableType << " from " << factoryName << "(" << fact.get() << ")" << std::endl; + + // check data type + //std::string strType = currentLevel.GetTypeName(variableName, fact.get()); + if (variableType == "int") { + int data = currentLevel.Get(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType == "double") { + double data = currentLevel.Get(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType == "string") { + std::string data = currentLevel.Get(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else { + size_t npos = std::string::npos; + + if (variableType.find("Aggregates") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Graph") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("SmootherBase") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("SmootherPrototype") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Export") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Import") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Map") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Matrix") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); Set(currentLevel, variableName, data); - } else if (variableType == "double") { - double data = currentLevel.Get(variableName, fact.get()); + } else if (variableType.find("MultiVector") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); Set(currentLevel, variableName, data); - } else if (variableType == "string") { - std::string data = currentLevel.Get(variableName, fact.get()); + } else if (variableType.find("Operator") != npos) { + RCP data = currentLevel.Get >(variableName, fact.get()); Set(currentLevel, variableName, data); } else { - size_t npos = std::string::npos; - - if (variableType.find("Aggregates") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Graph") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("SmootherBase") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("SmootherPrototype") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Export") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Import") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Map") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Matrix") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("MultiVector") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Operator") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else { - // TAW: is this working with empty procs? - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "FineLevelInputDataFactory: cannot detect type of variable " << variableName << " generated by " << fact.get() << ". User provided type " << variableType ); - } + // TAW: is this working with empty procs? + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "FineLevelInputDataFactory: cannot detect type of variable " << variableName << " generated by " << fact.get() << ". User provided type " << variableType); } } +} -} //namespace MueLu +} //namespace MueLu #endif /* PACKAGES_MUELU_SRC_MISC_MUELU_FINELEVELINPUTDATAFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp index 8e28f8fc1c43..6d11f93c5ec2 100644 --- a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp @@ -54,7 +54,6 @@ #include "MueLu_InitialBlockNumberFactory_fwd.hpp" - namespace MueLu { /*! @@ -63,53 +62,52 @@ namespace MueLu { */ - template - class InitialBlockNumberFactory : public SingleLevelFactoryBase { +template +class InitialBlockNumberFactory : public SingleLevelFactoryBase { #undef MUELU_INITIALBLOCKNUMBERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. + public: + //! @name Constructors/Destructors. - /*! @brief Constructor. + /*! @brief Constructor. */ - InitialBlockNumberFactory() { } + InitialBlockNumberFactory() {} - //! Destructor. - virtual ~InitialBlockNumberFactory() { } + //! Destructor. + virtual ~InitialBlockNumberFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class will fall back to the settings in FactoryManager. */ - void DeclareInput(Level ¤tLevel) const; - - //@} + void DeclareInput(Level ¤tLevel) const; - //! @name Build methods. - //@{ + //@} - //! Build an object with this factory. - void Build(Level & currentLevel) const; + //! @name Build methods. + //@{ - //@} + //! Build an object with this factory. + void Build(Level ¤tLevel) const; - private: + //@} - }; // class InitialBlockNumberFactory + private: +}; // class InitialBlockNumberFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_INITIALBLOCKNUMBERFACTORY_SHORT -#endif // MUELU_INITIALBLOCKNUMBER_FACTORY_DECL_HPP +#endif // MUELU_INITIALBLOCKNUMBER_FACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp index 8cfe22245925..ae774c838857 100644 --- a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp @@ -59,40 +59,40 @@ namespace MueLu { - template - RCP InitialBlockNumberFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP InitialBlockNumberFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); -#undef SET_VALID_ENTRY + SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); +#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); - return validParamList; - } + return validParamList; +} - template - void InitialBlockNumberFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - } +template +void InitialBlockNumberFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); +} - template - void InitialBlockNumberFactory::Build(Level & currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - const ParameterList & pL = GetParameterList(); +template +void InitialBlockNumberFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + const ParameterList& pL = GetParameterList(); - RCP A = Get< RCP >(currentLevel, "A"); - LO blocksize = as(pL.get("aggregation: block diagonal: interleaved blocksize")); + RCP A = Get >(currentLevel, "A"); + LO blocksize = as(pL.get("aggregation: block diagonal: interleaved blocksize")); - GetOStream(Statistics1) << "Generating interleaved blocking with "< BlockNumber = LocalOrdinalVectorFactory::Build(A->getRowMap(),false); - Teuchos::ArrayRCP bn_data = BlockNumber->getDataNonConst(0); - for(LO i=0; i<(LO)A->getRowMap()->getLocalNumElements();i++) - bn_data[i] = i % blocksize; - - Set(currentLevel,"BlockNumber",BlockNumber); - } + GetOStream(Statistics1) << "Generating interleaved blocking with " << blocksize << " equations" << std::endl; + RCP BlockNumber = LocalOrdinalVectorFactory::Build(A->getRowMap(), false); + Teuchos::ArrayRCP bn_data = BlockNumber->getDataNonConst(0); + for (LO i = 0; i < (LO)A->getRowMap()->getLocalNumElements(); i++) + bn_data[i] = i % blocksize; -} // namespace MueLu + Set(currentLevel, "BlockNumber", BlockNumber); +} -#endif // MUELU_INITIALBLOCKNUMBER_FACTORY_DEF_HPP +} // namespace MueLu + +#endif // MUELU_INITIALBLOCKNUMBER_FACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp index b992723be328..3bbcf7c19fe9 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp @@ -48,8 +48,7 @@ #include "MueLu_SingleLevelFactoryBase.hpp" -namespace MueLu -{ +namespace MueLu { /*! @class InterfaceAggregationFactory class. @@ -118,23 +117,21 @@ namespace MueLu | CoarseDualNodeID2PrimalNodeID | InterfaceAggregationFactory | Coarsened mapping of dual node IDs two primal node IDs. */ -template -class InterfaceAggregationFactory : public SingleLevelFactoryBase -{ + class Node = DefaultNode> +class InterfaceAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_INTERFACEAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" -public: - + public: //! Input //@{ RCP GetValidParameterList() const override; - void DeclareInput(Level ¤tLevel) const override; + void DeclareInput(Level& currentLevel) const override; //@} @@ -142,11 +139,11 @@ class InterfaceAggregationFactory : public SingleLevelFactoryBase //@{ /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const override; + void Build(Level& currentLevel) const override; //@} -private: + private: /*! @brief Build dual aggregates based on a given dual-to-primal node mapping * * @param[in] prefix Prefix for screen output @@ -166,10 +163,9 @@ class InterfaceAggregationFactory : public SingleLevelFactoryBase * @param[in/out] currentLevel Level on which the aggregation needs to be performed */ void BuildBasedOnPrimalInterfaceDofMap(const std::string& prefix, Level& currentLevel) const; - }; -} // namespace MueLu +} // namespace MueLu #define MUELU_INTERFACEAGGREGATIONFACTORY_SHORT #endif /* MUELU_INTERFACEAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp index 4382cca2523b..30cb4dc0f6d5 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp @@ -58,75 +58,64 @@ #include "MueLu_InterfaceAggregationFactory_decl.hpp" -namespace MueLu -{ +namespace MueLu { template -RCP InterfaceAggregationFactory::GetValidParameterList() const -{ +RCP InterfaceAggregationFactory::GetValidParameterList() const { RCP validParamList = rcp(new ParameterList()); validParamList->set>("A", Teuchos::null, "Generating factory of A (matrix block related to dual DOFs)"); validParamList->set>("Aggregates", Teuchos::null, "Generating factory of the Aggregates (for block 0,0)"); validParamList->set("Dual/primal mapping strategy", "vague", - "Strategy to represent mapping between dual and primal quantities [node-based, dof-based]"); + "Strategy to represent mapping between dual and primal quantities [node-based, dof-based]"); validParamList->set>("DualNodeID2PrimalNodeID", Teuchos::null, - "Generating factory of the DualNodeID2PrimalNodeID map as input data in a Moertel-compatible std::map to map local IDs of dual nodes to local IDs of primal nodes"); + "Generating factory of the DualNodeID2PrimalNodeID map as input data in a Moertel-compatible std::map to map local IDs of dual nodes to local IDs of primal nodes"); validParamList->set("number of DOFs per dual node", -Teuchos::ScalarTraits::one(), - "Number of DOFs per dual node"); + "Number of DOFs per dual node"); validParamList->set>("Primal interface DOF map", Teuchos::null, - "Generating factory of the primal DOF row map of slave side of the coupling surface"); + "Generating factory of the primal DOF row map of slave side of the coupling surface"); return validParamList; -} // GetValidParameterList() +} // GetValidParameterList() template -void InterfaceAggregationFactory::DeclareInput(Level ¤tLevel) const -{ - Input(currentLevel, "A"); // matrix block of dual variables +void InterfaceAggregationFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); // matrix block of dual variables Input(currentLevel, "Aggregates"); const ParameterList &pL = GetParameterList(); - TEUCHOS_TEST_FOR_EXCEPTION(pL.get("Dual/primal mapping strategy")=="vague", Exceptions::InvalidArgument, - "Strategy for dual/primal mapping not selected. Please select one of the available strategies.") - if (pL.get("Dual/primal mapping strategy") == "node-based") - { - if (currentLevel.GetLevelID() == 0) - { + TEUCHOS_TEST_FOR_EXCEPTION(pL.get("Dual/primal mapping strategy") == "vague", Exceptions::InvalidArgument, + "Strategy for dual/primal mapping not selected. Please select one of the available strategies.") + if (pL.get("Dual/primal mapping strategy") == "node-based") { + if (currentLevel.GetLevelID() == 0) { TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("DualNodeID2PrimalNodeID", NoFactory::get()), - Exceptions::RuntimeError, "DualNodeID2PrimalNodeID was not provided by the user on level 0!"); + Exceptions::RuntimeError, "DualNodeID2PrimalNodeID was not provided by the user on level 0!"); currentLevel.DeclareInput("DualNodeID2PrimalNodeID", NoFactory::get(), this); - } - else - { + } else { Input(currentLevel, "DualNodeID2PrimalNodeID"); } - } - else if (pL.get("Dual/primal mapping strategy") == "dof-based") - { + } else if (pL.get("Dual/primal mapping strategy") == "dof-based") { if (currentLevel.GetLevelID() == 0) currentLevel.DeclareInput("Primal interface DOF map", NoFactory::get(), this); else Input(currentLevel, "Primal interface DOF map"); - } - else + } else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::InvalidArgument, "Unknown strategy for dual/primal mapping.") -} // DeclareInput +} // DeclareInput template -void InterfaceAggregationFactory::Build(Level ¤tLevel) const -{ +void InterfaceAggregationFactory::Build(Level ¤tLevel) const { const std::string prefix = "MueLu::InterfaceAggregationFactory::Build: "; FactoryMonitor m(*this, "Build", currentLevel); // Call a specialized build routine based on the format of user-given input - const ParameterList &pL = GetParameterList(); + const ParameterList &pL = GetParameterList(); const std::string parameterName = "Dual/primal mapping strategy"; if (pL.get(parameterName) == "node-based") BuildBasedOnNodeMapping(prefix, currentLevel); @@ -134,23 +123,22 @@ void InterfaceAggregationFactory::Bui BuildBasedOnPrimalInterfaceDofMap(prefix, currentLevel); else TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::InvalidArgument, - "MueLu::InterfaceAggregationFactory::Builld(): Unknown strategy for dual/primal mapping. Set a valid value for the parameter \"" << parameterName << "\".") + "MueLu::InterfaceAggregationFactory::Builld(): Unknown strategy for dual/primal mapping. Set a valid value for the parameter \"" << parameterName << "\".") } template -void InterfaceAggregationFactory::BuildBasedOnNodeMapping(const std::string& prefix, - Level ¤tLevel) const -{ +void InterfaceAggregationFactory::BuildBasedOnNodeMapping(const std::string &prefix, + Level ¤tLevel) const { using Dual2Primal_type = std::map; const ParameterList &pL = GetParameterList(); - RCP A = Get>(currentLevel, "A"); + RCP A = Get>(currentLevel, "A"); const LocalOrdinal numDofsPerDualNode = pL.get("number of DOFs per dual node"); - TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerDualNode::one(), Exceptions::InvalidArgument, - "Number of dual DOFs per node < 0 (default value). Specify a valid \"number of DOFs per dual node\" in the parameter list for the InterfaceAggregationFactory."); + TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerDualNode < Teuchos::ScalarTraits::one(), Exceptions::InvalidArgument, + "Number of dual DOFs per node < 0 (default value). Specify a valid \"number of DOFs per dual node\" in the parameter list for the InterfaceAggregationFactory."); - RCP primalAggregates = Get>(currentLevel, "Aggregates"); + RCP primalAggregates = Get>(currentLevel, "Aggregates"); ArrayRCP primalVertex2AggId = primalAggregates->GetVertex2AggId()->getData(0); // Get the user-prescribed mapping of dual to primal node IDs @@ -161,21 +149,20 @@ void InterfaceAggregationFactory::Bui mapNodesDualToPrimal = Get>(currentLevel, "DualNodeID2PrimalNodeID"); RCP operatorRangeMap = A->getRangeMap(); - const size_t myRank = operatorRangeMap->getComm()->getRank(); + const size_t myRank = operatorRangeMap->getComm()->getRank(); LocalOrdinal globalNumDualNodes = operatorRangeMap->getGlobalNumElements() / numDofsPerDualNode; - LocalOrdinal localNumDualNodes = operatorRangeMap->getLocalNumElements() / numDofsPerDualNode; + LocalOrdinal localNumDualNodes = operatorRangeMap->getLocalNumElements() / numDofsPerDualNode; TEUCHOS_TEST_FOR_EXCEPTION(localNumDualNodes != Teuchos::as(mapNodesDualToPrimal->size()), - std::runtime_error, prefix << " MueLu requires the range map and the DualNodeID2PrimalNodeID map to be compatible."); + std::runtime_error, prefix << " MueLu requires the range map and the DualNodeID2PrimalNodeID map to be compatible."); RCP dualNodeMap = Teuchos::null; if (numDofsPerDualNode == 1) dualNodeMap = operatorRangeMap; - else - { - GlobalOrdinal indexBase = operatorRangeMap->getIndexBase(); - auto comm = operatorRangeMap->getComm(); + else { + GlobalOrdinal indexBase = operatorRangeMap->getIndexBase(); + auto comm = operatorRangeMap->getComm(); std::vector myDualNodes = {}; for (size_t i = 0; i < operatorRangeMap->getLocalNumElements(); i += numDofsPerDualNode) @@ -184,7 +171,7 @@ void InterfaceAggregationFactory::Bui dualNodeMap = MapFactory::Build(operatorRangeMap->lib(), globalNumDualNodes, myDualNodes, indexBase, comm); } TEUCHOS_TEST_FOR_EXCEPTION(localNumDualNodes != Teuchos::as(dualNodeMap->getLocalNumElements()), - std::runtime_error, prefix << " Local number of dual nodes given by user is incompatible to the dual node map."); + std::runtime_error, prefix << " Local number of dual nodes given by user is incompatible to the dual node map."); RCP dualAggregates = rcp(new Aggregates(dualNodeMap)); dualAggregates->setObjectLabel("InterfaceAggregation"); @@ -193,7 +180,7 @@ void InterfaceAggregationFactory::Bui dualAggregates->AggregatesCrossProcessors(primalAggregates->AggregatesCrossProcessors()); ArrayRCP dualVertex2AggId = dualAggregates->GetVertex2AggId()->getDataNonConst(0); - ArrayRCP dualProcWinner = dualAggregates->GetProcWinner()->getDataNonConst(0); + ArrayRCP dualProcWinner = dualAggregates->GetProcWinner()->getDataNonConst(0); RCP coarseMapNodesDualToPrimal = rcp(new Dual2Primal_type()); RCP coarseMapNodesPrimalToDual = rcp(new Dual2Primal_type()); @@ -205,10 +192,9 @@ void InterfaceAggregationFactory::Bui * - assign dual nodes to dual aggregates * - recursively coarsen the dual-to-primal node mapping */ - LocalOrdinal localPrimalNodeID = - Teuchos::ScalarTraits::one(); - LocalOrdinal currentPrimalAggId = - Teuchos::ScalarTraits::one(); - for (LocalOrdinal localDualNodeID = 0; localDualNodeID < localNumDualNodes; ++localDualNodeID) - { + LocalOrdinal localPrimalNodeID = -Teuchos::ScalarTraits::one(); + LocalOrdinal currentPrimalAggId = -Teuchos::ScalarTraits::one(); + for (LocalOrdinal localDualNodeID = 0; localDualNodeID < localNumDualNodes; ++localDualNodeID) { // Get local ID of the primal node associated to the current dual node localPrimalNodeID = (*mapNodesDualToPrimal)[localDualNodeID]; @@ -217,17 +203,16 @@ void InterfaceAggregationFactory::Bui // Test if the current primal aggregate has no associated dual aggregate, yet. // Create new dual aggregate, if necessary. - if (coarseMapNodesPrimalToDual->count(currentPrimalAggId) == 0) - { + if (coarseMapNodesPrimalToDual->count(currentPrimalAggId) == 0) { // Associate a new dual aggregate w/ the current primal aggregate - (*coarseMapNodesPrimalToDual)[currentPrimalAggId] = numLocalDualAggregates; + (*coarseMapNodesPrimalToDual)[currentPrimalAggId] = numLocalDualAggregates; (*coarseMapNodesDualToPrimal)[numLocalDualAggregates] = currentPrimalAggId; ++numLocalDualAggregates; } // Fill the dual aggregate dualVertex2AggId[localDualNodeID] = (*coarseMapNodesPrimalToDual)[currentPrimalAggId]; - dualProcWinner[localDualNodeID] = myRank; + dualProcWinner[localDualNodeID] = myRank; } // Store dual aggregeate data as well as coarsening information @@ -235,25 +220,24 @@ void InterfaceAggregationFactory::Bui Set(currentLevel, "Aggregates", dualAggregates); Set(currentLevel, "CoarseDualNodeID2PrimalNodeID", coarseMapNodesDualToPrimal); GetOStream(Statistics1) << dualAggregates->description() << std::endl; -} // BuildBasedOnNodeMapping +} // BuildBasedOnNodeMapping template void InterfaceAggregationFactory::BuildBasedOnPrimalInterfaceDofMap( - const std::string& prefix, Level ¤tLevel) const -{ + const std::string &prefix, Level ¤tLevel) const { const GlobalOrdinal GO_ZERO = Teuchos::ScalarTraits::zero(); - const GlobalOrdinal GO_ONE = Teuchos::ScalarTraits::one(); + const GlobalOrdinal GO_ONE = Teuchos::ScalarTraits::one(); // filled with striding information from A01 - LocalOrdinal numDofsPerDualNode = 0; + LocalOrdinal numDofsPerDualNode = 0; LocalOrdinal numDofsPerPrimalNode = 0; // Grab the off-diagonal block (0,1) from the global blocked operator - RCP A01 = Get>(currentLevel, "A"); - RCP primalAggregates = Get>(currentLevel, "Aggregates"); + RCP A01 = Get>(currentLevel, "A"); + RCP primalAggregates = Get>(currentLevel, "Aggregates"); ArrayRCP primalVertex2AggId = primalAggregates->GetVertex2AggId()->getData(0); - auto comm = A01->getRowMap()->getComm(); + auto comm = A01->getRowMap()->getComm(); const int myRank = comm->getRank(); RCP primalInterfaceDofRowMap = Teuchos::null; @@ -266,22 +250,22 @@ void InterfaceAggregationFactory::Bui TEUCHOS_ASSERT(!primalInterfaceDofRowMap.is_null()); if (A01->IsView("stridedMaps") && rcp_dynamic_cast(A01->getRowMap("stridedMaps")) != Teuchos::null) { - auto stridedRowMap = rcp_dynamic_cast(A01->getRowMap("stridedMaps")); - auto stridedColMap = rcp_dynamic_cast(A01->getColMap("stridedMaps")); + auto stridedRowMap = rcp_dynamic_cast(A01->getRowMap("stridedMaps")); + auto stridedColMap = rcp_dynamic_cast(A01->getColMap("stridedMaps")); numDofsPerPrimalNode = Teuchos::as(stridedRowMap->getFixedBlockSize()); - numDofsPerDualNode = Teuchos::as(stridedColMap->getFixedBlockSize()); + numDofsPerDualNode = Teuchos::as(stridedColMap->getFixedBlockSize()); if (numDofsPerPrimalNode != numDofsPerDualNode) { - GetOStream(Warnings) << "InterfaceAggregation attempts to work with " - << numDofsPerPrimalNode << " primal DOFs per node and " << numDofsPerDualNode << " dual DOFs per node." - << "Be careful! Algorithm is not well-tested, if number of primal and dual DOFs per node differ." << std::endl; + GetOStream(Warnings) << "InterfaceAggregation attempts to work with " + << numDofsPerPrimalNode << " primal DOFs per node and " << numDofsPerDualNode << " dual DOFs per node." + << "Be careful! Algorithm is not well-tested, if number of primal and dual DOFs per node differ." << std::endl; } } - TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerPrimalNode==0, Exceptions::RuntimeError, - "InterfaceAggregationFactory could not extract the number of primal DOFs per node from striding information. At least, make sure that StridedMap information has actually been provided."); - TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerDualNode==0, Exceptions::RuntimeError, - "InterfaceAggregationFactory could not extract the number of dual DOFs per node from striding information. At least, make sure that StridedMap information has actually been provided."); + TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerPrimalNode == 0, Exceptions::RuntimeError, + "InterfaceAggregationFactory could not extract the number of primal DOFs per node from striding information. At least, make sure that StridedMap information has actually been provided."); + TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerDualNode == 0, Exceptions::RuntimeError, + "InterfaceAggregationFactory could not extract the number of dual DOFs per node from striding information. At least, make sure that StridedMap information has actually been provided."); /* Determine block information for primal block * @@ -291,7 +275,7 @@ void InterfaceAggregationFactory::Bui * - is 3 or 6 (for 2d or 3d problems) on coarser levels (# nullspace vectors) */ GlobalOrdinal primalDofOffset = GO_ZERO; - LocalOrdinal primalBlockDim = numDofsPerPrimalNode; + LocalOrdinal primalBlockDim = numDofsPerPrimalNode; /* Determine block information for Lagrange multipliers * @@ -303,22 +287,22 @@ void InterfaceAggregationFactory::Bui * are 3 or 6 displacement dofs per node) */ GlobalOrdinal dualDofOffset = A01->getColMap()->getMinAllGlobalIndex(); - LocalOrdinal dualBlockDim = numDofsPerDualNode; + LocalOrdinal dualBlockDim = numDofsPerDualNode; // Generate global replicated mapping "lagrNodeId -> dispNodeId" - RCP dualDofMap = A01->getDomainMap(); + RCP dualDofMap = A01->getDomainMap(); GlobalOrdinal gMaxDualNodeId = AmalgamationFactory::DOFGid2NodeId( dualDofMap->getMaxAllGlobalIndex(), dualBlockDim, dualDofOffset, dualDofMap->getIndexBase()); GlobalOrdinal gMinDualNodeId = AmalgamationFactory::DOFGid2NodeId( dualDofMap->getMinAllGlobalIndex(), dualBlockDim, dualDofOffset, dualDofMap->getIndexBase()); GetOStream(Runtime1) << " Dual DOF map: index base = " << dualDofMap->getIndexBase() - << ", block dim = " << dualBlockDim - << ", gid offset = " << dualDofOffset - << std::endl; + << ", block dim = " << dualBlockDim + << ", gid offset = " << dualDofOffset + << std::endl; GetOStream(Runtime1) << " [primal / dual] DOFs per node = [" << numDofsPerPrimalNode - << "/" << numDofsPerDualNode << "]" << std::endl; + << "/" << numDofsPerDualNode << "]" << std::endl; // Generate locally replicated vector for mapping dual node IDs to primal node IDs Array dualNodeId2primalNodeId(gMaxDualNodeId - gMinDualNodeId + 1, -GO_ONE); @@ -333,16 +317,15 @@ void InterfaceAggregationFactory::Bui // Fill mapping of Lagrange Node IDs to displacement aggregate IDs const size_t numMyPrimalInterfaceDOFs = primalInterfaceDofRowMap->getLocalNumElements(); - for (size_t r = 0; r < numMyPrimalInterfaceDOFs; r += numDofsPerPrimalNode) - { + for (size_t r = 0; r < numMyPrimalInterfaceDOFs; r += numDofsPerPrimalNode) { GlobalOrdinal gPrimalRowId = primalInterfaceDofRowMap->getGlobalElement(r); - if (A01->getRowMap()->isNodeGlobalElement(gPrimalRowId)) // Remove this if? + if (A01->getRowMap()->isNodeGlobalElement(gPrimalRowId)) // Remove this if? { - const LocalOrdinal lPrimalRowId = A01->getRowMap()->getLocalElement(gPrimalRowId); + const LocalOrdinal lPrimalRowId = A01->getRowMap()->getLocalElement(gPrimalRowId); const GlobalOrdinal gPrimalNodeId = AmalgamationFactory::DOFGid2NodeId(gPrimalRowId, primalBlockDim, primalDofOffset, primalInterfaceDofRowMap->getIndexBase()); - const LocalOrdinal lPrimalNodeId = lPrimalRowId / numDofsPerPrimalNode; - const LocalOrdinal primalAggId = primalVertex2AggId[lPrimalNodeId]; + const LocalOrdinal lPrimalNodeId = lPrimalRowId / numDofsPerPrimalNode; + const LocalOrdinal primalAggId = primalVertex2AggId[lPrimalNodeId]; const GlobalOrdinal gDualDofId = A01->getColMap()->getGlobalElement(r); @@ -350,32 +333,30 @@ void InterfaceAggregationFactory::Bui if (local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] == -GO_ONE) { local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] = gPrimalNodeId; - local_dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId] = primalAggId; + local_dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId] = primalAggId; } else { GetOStream(Warnings) << "PROC: " << myRank << " gDualNodeId " << gDualNodeId << " is already connected to primal nodeId " - << local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] - << ". Ignore new dispNodeId: " << gPrimalNodeId << std::endl; + << local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] + << ". Ignore new dispNodeId: " << gPrimalNodeId << std::endl; } - } } const int dualNodeId2primalNodeIdSize = Teuchos::as(local_dualNodeId2primalNodeId.size()); Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, dualNodeId2primalNodeIdSize, - &local_dualNodeId2primalNodeId[0], &dualNodeId2primalNodeId[0]); + &local_dualNodeId2primalNodeId[0], &dualNodeId2primalNodeId[0]); Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, dualNodeId2primalNodeIdSize, - &local_dualNodeId2primalAggId[0], &dualNodeId2primalAggId[0]); + &local_dualNodeId2primalAggId[0], &dualNodeId2primalAggId[0]); // build node map for dual variables // generate "artificial nodes" for lagrange multipliers // the node map is also used for defining the Aggregates for the lagrange multipliers std::vector dualNodes; - for (size_t r = 0; r < A01->getDomainMap()->getLocalNumElements(); r++) - { + for (size_t r = 0; r < A01->getDomainMap()->getLocalNumElements(); r++) { // determine global Lagrange multiplier row Dof // generate a node id using the grid, lagr_blockdim and lagr_offset // todo make sure, that // nodeId is unique and does not interfer with the displacement nodes - GlobalOrdinal gDualDofId = A01->getDomainMap()->getGlobalElement(r); + GlobalOrdinal gDualDofId = A01->getDomainMap()->getGlobalElement(r); GlobalOrdinal gDualNodeId = AmalgamationFactory::DOFGid2NodeId(gDualDofId, dualBlockDim, dualDofOffset, 0); dualNodes.push_back(gDualNodeId); } @@ -385,7 +366,7 @@ void InterfaceAggregationFactory::Bui // define node map for Lagrange multipliers Teuchos::RCP dualNodeMap = MapFactory::Build(A01->getRowMap()->lib(), - Teuchos::OrdinalTraits::invalid(), dualNodes, A01->getRowMap()->getIndexBase(), comm); + Teuchos::OrdinalTraits::invalid(), dualNodes, A01->getRowMap()->getIndexBase(), comm); // Build aggregates using the lagrange multiplier node map Teuchos::RCP dualAggregates = Teuchos::rcp(new Aggregates(dualNodeMap)); @@ -393,30 +374,29 @@ void InterfaceAggregationFactory::Bui // extract aggregate data structures to fill Teuchos::ArrayRCP dualVertex2AggId = dualAggregates->GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP dualProcWinner = dualAggregates->GetProcWinner()->getDataNonConst(0); + Teuchos::ArrayRCP dualProcWinner = dualAggregates->GetProcWinner()->getDataNonConst(0); // loop over local lagrange multiplier node ids LocalOrdinal nLocalAggregates = 0; std::map primalAggId2localDualAggId; - for (size_t lDualNodeID = 0; lDualNodeID < dualNodeMap->getLocalNumElements(); ++lDualNodeID) - { + for (size_t lDualNodeID = 0; lDualNodeID < dualNodeMap->getLocalNumElements(); ++lDualNodeID) { const GlobalOrdinal gDualNodeId = dualNodeMap->getGlobalElement(lDualNodeID); const GlobalOrdinal primalAggId = dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId]; if (primalAggId2localDualAggId.count(primalAggId) == 0) primalAggId2localDualAggId[primalAggId] = nLocalAggregates++; dualVertex2AggId[lDualNodeID] = primalAggId2localDualAggId[primalAggId]; - dualProcWinner[lDualNodeID] = myRank; + dualProcWinner[lDualNodeID] = myRank; } - const LocalOrdinal fullblocksize = numDofsPerDualNode; - const GlobalOrdinal offset = A01->getColMap()->getMinAllGlobalIndex(); - const LocalOrdinal blockid = -1; - const LocalOrdinal nStridedOffset = 0; + const LocalOrdinal fullblocksize = numDofsPerDualNode; + const GlobalOrdinal offset = A01->getColMap()->getMinAllGlobalIndex(); + const LocalOrdinal blockid = -1; + const LocalOrdinal nStridedOffset = 0; const LocalOrdinal stridedblocksize = fullblocksize; RCP> rowTranslation = rcp(new Array()); RCP> colTranslation = rcp(new Array()); - const size_t numMyDualNodes = dualNodeMap->getLocalNumElements(); + const size_t numMyDualNodes = dualNodeMap->getLocalNumElements(); for (size_t lDualNodeID = 0; lDualNodeID < numMyDualNodes; ++lDualNodeID) { for (LocalOrdinal dof = 0; dof < numDofsPerDualNode; ++dof) { rowTranslation->push_back(lDualNodeID); @@ -427,8 +407,8 @@ void InterfaceAggregationFactory::Bui TEUCHOS_ASSERT(A01->isFillComplete()); RCP dualAmalgamationInfo = rcp(new AmalgamationInfo(rowTranslation, colTranslation, - A01->getDomainMap(), A01->getDomainMap(), A01->getDomainMap(), - fullblocksize, offset, blockid, nStridedOffset, stridedblocksize)); + A01->getDomainMap(), A01->getDomainMap(), A01->getDomainMap(), + fullblocksize, offset, blockid, nStridedOffset, stridedblocksize)); dualAggregates->SetNumAggregates(nLocalAggregates); dualAggregates->AggregatesCrossProcessors(primalAggregates->AggregatesCrossProcessors()); @@ -441,8 +421,8 @@ void InterfaceAggregationFactory::Bui currentLevel.Set("Aggregates", dualAggregates, this); currentLevel.Set("UnAmalgamationInfo", dualAmalgamationInfo, this); -} // BuildBasedOnPrimalInterfaceDofMap +} // BuildBasedOnPrimalInterfaceDofMap -} // namespace MueLu +} // namespace MueLu #endif /* MUELU_INTERFACEAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp index 0c22adb5e443..8da95fcdcc7d 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp @@ -52,8 +52,7 @@ #include "MueLu_ConfigDefs.hpp" #include "MueLu_TwoLevelFactoryBase.hpp" -namespace MueLu -{ +namespace MueLu { /*! @class InterfaceMappingTransferFactory @@ -73,14 +72,13 @@ namespace MueLu The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see InterfaceAggregationFactory::GetValidParameters).
The * in the @c requested column states that the data is requested as input with all dependencies (see InterfaceAggregationFactory::DeclareInput). */ -template -class InterfaceMappingTransferFactory : public TwoLevelFactoryBase -{ + class Node = DefaultNode> +class InterfaceMappingTransferFactory : public TwoLevelFactoryBase { #undef MUELU_INTERFACEMAPPINGTRANSFERFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" -public: + public: //! Constructor. InterfaceMappingTransferFactory() = default; @@ -92,6 +90,6 @@ class InterfaceMappingTransferFactory : public TwoLevelFactoryBase void Build(Level &fineLevel, Level &coarseLevel) const override; }; -} // namespace MueLu +} // namespace MueLu #define MUELU_INTERFACEMAPPINGTRANSFERFACTORY_SHORT #endif /* MUELU_INTERFACEMAPPINGTRANSFERFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp index 5daab13cbdfb..a5e7f0dd0129 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp @@ -47,32 +47,28 @@ #include "MueLu_InterfaceMappingTransferFactory_decl.hpp" -namespace MueLu -{ +namespace MueLu { template -RCP InterfaceMappingTransferFactory::GetValidParameterList() const -{ +RCP InterfaceMappingTransferFactory::GetValidParameterList() const { RCP validParamList = rcp(new ParameterList()); validParamList->set>("CoarseDualNodeID2PrimalNodeID", Teuchos::null, "Generating factory of the CoarseDualNodeID2PrimalNodeID map"); return validParamList; } template -void InterfaceMappingTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const -{ +void InterfaceMappingTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { Input(fineLevel, "CoarseDualNodeID2PrimalNodeID"); } template -void InterfaceMappingTransferFactory::Build(Level &fineLevel, Level &coarseLevel) const -{ +void InterfaceMappingTransferFactory::Build(Level &fineLevel, Level &coarseLevel) const { Monitor m(*this, "Interface Mapping transfer factory"); RCP> coarseLagr2Dof = Get>>(fineLevel, "CoarseDualNodeID2PrimalNodeID"); Set(coarseLevel, "DualNodeID2PrimalNodeID", coarseLagr2Dof); } -} // namespace MueLu +} // namespace MueLu #endif /* MUELU_INTERFACEMAPPINGTRANSFERFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp index 91b50b0d5c1f..b24836aafa83 100644 --- a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp @@ -60,7 +60,7 @@ namespace MueLu { - /*! +/*! @class InverseApproximationFactory class. @brief Factory for building the approximate inverse of a matrix. @@ -93,45 +93,45 @@ namespace MueLu { | Ainv | InverseApproximationFactory | The approximate inverse of a given matrix. */ - template - class InverseApproximationFactory : public SingleLevelFactoryBase { +template +class InverseApproximationFactory : public SingleLevelFactoryBase { #undef MUELU_INVERSEAPPROXIMATIONFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" +#include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - InverseApproximationFactory() = default; + //! Constructor. + InverseApproximationFactory() = default; - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //@{ - //! @name Build methods. + //@{ + //! @name Build methods. - //! Build an object with this factory. - void Build(Level& currentLevel) const; + //! Build an object with this factory. + void Build(Level& currentLevel) const; - //@} + //@} - private: - //! Sparse inverse calculation method. - RCP GetSparseInverse(const RCP& A, const RCP& sparsityPattern) const; + private: + //! Sparse inverse calculation method. + RCP GetSparseInverse(const RCP& A, const RCP& sparsityPattern) const; - }; // class InverseApproximationFactory +}; // class InverseApproximationFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_INVERSEAPPROXIMATIONFACTORY_SHORT #endif /* MUELU_INVERSEAPPROXIMATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp index 7ec193a94d78..30fad853d9ba 100644 --- a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp @@ -69,147 +69,140 @@ namespace MueLu { - template - RCP InverseApproximationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; - - validParamList->set >("A", NoFactory::getRCP(), "Matrix to build the approximate inverse on.\n"); - - validParamList->set ("inverse: approximation type", "diagonal", "Method used to approximate the inverse."); - validParamList->set ("inverse: drop tolerance", 0.0 , "Values below this threshold are dropped from the matrix (or fixed if diagonal fixing is active)."); - validParamList->set ("inverse: fixing", false , "Keep diagonal and fix small entries with 1.0"); - - return validParamList; +template +RCP InverseApproximationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; + + validParamList->set>("A", NoFactory::getRCP(), "Matrix to build the approximate inverse on.\n"); + + validParamList->set("inverse: approximation type", "diagonal", "Method used to approximate the inverse."); + validParamList->set("inverse: drop tolerance", 0.0, "Values below this threshold are dropped from the matrix (or fixed if diagonal fixing is active)."); + validParamList->set("inverse: fixing", false, "Keep diagonal and fix small entries with 1.0"); + + return validParamList; +} + +template +void InverseApproximationFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); +} + +template +void InverseApproximationFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + using STS = Teuchos::ScalarTraits; + const SC one = STS::one(); + using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; + + const ParameterList& pL = GetParameterList(); + const bool fixing = pL.get("inverse: fixing"); + + // check which approximation type to use + const std::string method = pL.get("inverse: approximation type"); + TEUCHOS_TEST_FOR_EXCEPTION(method != "diagonal" && method != "lumping" && method != "sparseapproxinverse", Exceptions::RuntimeError, + "MueLu::InverseApproximationFactory::Build: Approximation type can be 'diagonal' or 'lumping' or " + "'sparseapproxinverse'."); + + RCP A = Get>(currentLevel, "A"); + RCP bA = Teuchos::rcp_dynamic_cast(A); + const bool isBlocked = (bA == Teuchos::null ? false : true); + + // if blocked operator is used, defaults to A(0,0) + if (isBlocked) A = bA->getMatrix(0, 0); + + const Magnitude tol = pL.get("inverse: drop tolerance"); + RCP Ainv = Teuchos::null; + + if (method == "diagonal") { + const auto diag = VectorFactory::Build(A->getRangeMap(), true); + A->getLocalDiagCopy(*diag); + const RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one)); + Ainv = MatrixFactory::Build(D); + } else if (method == "lumping") { + const auto diag = Utilities::GetLumpedMatrixDiagonal(*A); + const RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one)); + Ainv = MatrixFactory::Build(D); + } else if (method == "sparseapproxinverse") { + RCP sparsityPattern = Utilities::GetThresholdedGraph(A, tol, A->getGlobalMaxNumRowEntries()); + GetOStream(Statistics1) << "NNZ Graph(A): " << A->getCrsGraph()->getGlobalNumEntries() << " , NNZ Tresholded Graph(A): " << sparsityPattern->getGlobalNumEntries() << std::endl; + RCP pAinv = GetSparseInverse(A, sparsityPattern); + Ainv = Utilities::GetThresholdedMatrix(pAinv, tol, fixing, pAinv->getGlobalMaxNumRowEntries()); + GetOStream(Statistics1) << "NNZ Ainv: " << pAinv->getGlobalNumEntries() << ", NNZ Tresholded Ainv (parameter: " << tol << "): " << Ainv->getGlobalNumEntries() << std::endl; } - template - void InverseApproximationFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - } - - template - void InverseApproximationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - using STS = Teuchos::ScalarTraits; - const SC one = STS::one(); - using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; - - const ParameterList& pL = GetParameterList(); - const bool fixing = pL.get("inverse: fixing"); - - // check which approximation type to use - const std::string method = pL.get("inverse: approximation type"); - TEUCHOS_TEST_FOR_EXCEPTION(method != "diagonal" && method != "lumping" && method != "sparseapproxinverse", Exceptions::RuntimeError, - "MueLu::InverseApproximationFactory::Build: Approximation type can be 'diagonal' or 'lumping' or " - "'sparseapproxinverse'."); - - RCP A = Get >(currentLevel, "A"); - RCP bA = Teuchos::rcp_dynamic_cast(A); - const bool isBlocked = (bA == Teuchos::null ? false : true); - - // if blocked operator is used, defaults to A(0,0) - if(isBlocked) A = bA->getMatrix(0,0); - - const Magnitude tol = pL.get("inverse: drop tolerance"); - RCP Ainv = Teuchos::null; - - if(method=="diagonal") - { - const auto diag = VectorFactory::Build(A->getRangeMap(), true); - A->getLocalDiagCopy(*diag); - const RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one)); - Ainv = MatrixFactory::Build(D); + GetOStream(Statistics1) << "Approximate inverse calculated by: " << method << "." << std::endl; + GetOStream(Statistics1) << "Ainv has " << Ainv->getGlobalNumRows() << "x" << Ainv->getGlobalNumCols() << " rows and columns." << std::endl; + + Set(currentLevel, "Ainv", Ainv); +} + +template +RCP> +InverseApproximationFactory::GetSparseInverse(const RCP& Aorg, const RCP& sparsityPattern) const { + // construct the inverse matrix with the given sparsity pattern + RCP Ainv = MatrixFactory::Build(sparsityPattern); + Ainv->resumeFill(); + + // gather missing rows from other procs to generate an overlapping map + RCP rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), sparsityPattern->getColMap()); + RCP A = MatrixFactory::Build(Aorg, *rowImport); + + // loop over all rows of the inverse sparsity pattern (this can be done in parallel) + for (size_t k = 0; k < sparsityPattern->getLocalNumRows(); k++) { + // 1. get column indices Ik of local row k + ArrayView Ik; + sparsityPattern->getLocalRowView(k, Ik); + + // 2. get all local A(Ik,:) rows + Array> J(Ik.size()); + Array> Ak(Ik.size()); + Array Jk; + for (LO i = 0; i < Ik.size(); i++) { + A->getLocalRowView(Ik[i], J[i], Ak[i]); + for (LO j = 0; j < J[i].size(); j++) + Jk.append(J[i][j]); } - else if(method=="lumping") - { - const auto diag = Utilities::GetLumpedMatrixDiagonal(*A); - const RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one)); - Ainv = MatrixFactory::Build(D); - } - else if(method=="sparseapproxinverse") - { - RCP sparsityPattern = Utilities::GetThresholdedGraph(A, tol, A->getGlobalMaxNumRowEntries()); - GetOStream(Statistics1) << "NNZ Graph(A): " << A->getCrsGraph()->getGlobalNumEntries() << " , NNZ Tresholded Graph(A): " << sparsityPattern->getGlobalNumEntries() << std::endl; - RCP pAinv = GetSparseInverse(A, sparsityPattern); - Ainv = Utilities::GetThresholdedMatrix(pAinv, tol, fixing, pAinv->getGlobalMaxNumRowEntries()); - GetOStream(Statistics1) << "NNZ Ainv: " << pAinv->getGlobalNumEntries() << ", NNZ Tresholded Ainv (parameter: " << tol << "): " << Ainv->getGlobalNumEntries() << std::endl; - } - - GetOStream(Statistics1) << "Approximate inverse calculated by: " << method << "." << std::endl; - GetOStream(Statistics1) << "Ainv has " << Ainv->getGlobalNumRows() << "x" << Ainv->getGlobalNumCols() << " rows and columns." << std::endl; - - Set(currentLevel, "Ainv", Ainv); - } - - template - RCP> - InverseApproximationFactory::GetSparseInverse(const RCP& Aorg, const RCP& sparsityPattern) const { - - // construct the inverse matrix with the given sparsity pattern - RCP Ainv = MatrixFactory::Build(sparsityPattern); - Ainv->resumeFill(); - - // gather missing rows from other procs to generate an overlapping map - RCP rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), sparsityPattern->getColMap()); - RCP A = MatrixFactory::Build(Aorg, *rowImport); - - // loop over all rows of the inverse sparsity pattern (this can be done in parallel) - for(size_t k=0; kgetLocalNumRows(); k++) { - - // 1. get column indices Ik of local row k - ArrayView Ik; - sparsityPattern->getLocalRowView(k, Ik); - - // 2. get all local A(Ik,:) rows - Array> J(Ik.size()); - Array> Ak(Ik.size()); - Array Jk; - for (LO i = 0; i < Ik.size(); i++) { - A->getLocalRowView(Ik[i], J[i], Ak[i]); - for (LO j = 0; j < J[i].size(); j++) - Jk.append(J[i][j]); + // set of unique column indices Jk + std::sort(Jk.begin(), Jk.end()); + Jk.erase(std::unique(Jk.begin(), Jk.end()), Jk.end()); + // create map + std::map G; + for (LO i = 0; i < Jk.size(); i++) G.insert(std::pair(Jk[i], i)); + + // 3. merge rows together + Teuchos::SerialDenseMatrix localA(Jk.size(), Ik.size(), true); + for (LO i = 0; i < Ik.size(); i++) { + for (LO j = 0; j < J[i].size(); j++) { + localA(G.at(J[i][j]), i) = Ak[i][j]; } - // set of unique column indices Jk - std::sort(Jk.begin(), Jk.end()); - Jk.erase(std::unique(Jk.begin(), Jk.end()), Jk.end()); - // create map - std::map G; - for (LO i = 0; i < Jk.size(); i++) G.insert(std::pair(Jk[i], i)); - - // 3. merge rows together - Teuchos::SerialDenseMatrix localA(Jk.size(), Ik.size(), true); - for (LO i = 0; i < Ik.size(); i++) { - for (LO j = 0; j < J[i].size(); j++) { - localA(G.at(J[i][j]), i) = Ak[i][j]; - } - } - - // 4. get direction-vector - // diagonal needs an entry! - Teuchos::SerialDenseVector ek(Jk.size(), true); - ek[std::find(Jk.begin(), Jk.end(), k) - Jk.begin()] = Teuchos::ScalarTraits::one();; - - // 5. solve linear system for x - Teuchos::SerialDenseVector localX(Ik.size()); - Teuchos::SerialQRDenseSolver qrSolver; - qrSolver.setMatrix(Teuchos::rcp(&localA, false)); - qrSolver.setVectors(Teuchos::rcp(&localX, false), Teuchos::rcp(&ek, false)); - const int err = qrSolver.solve(); - TEUCHOS_TEST_FOR_EXCEPTION(err != 0, Exceptions::RuntimeError, - "MueLu::InverseApproximationFactory::GetSparseInverse: Error in serial QR solve."); - - // 6. set calculated row into Ainv - ArrayView Mk(localX.values(), localX.length()); - Ainv->replaceLocalValues(k, Ik, Mk); - } - Ainv->fillComplete(); - return Ainv; + // 4. get direction-vector + // diagonal needs an entry! + Teuchos::SerialDenseVector ek(Jk.size(), true); + ek[std::find(Jk.begin(), Jk.end(), k) - Jk.begin()] = Teuchos::ScalarTraits::one(); + ; + + // 5. solve linear system for x + Teuchos::SerialDenseVector localX(Ik.size()); + Teuchos::SerialQRDenseSolver qrSolver; + qrSolver.setMatrix(Teuchos::rcp(&localA, false)); + qrSolver.setVectors(Teuchos::rcp(&localX, false), Teuchos::rcp(&ek, false)); + const int err = qrSolver.solve(); + TEUCHOS_TEST_FOR_EXCEPTION(err != 0, Exceptions::RuntimeError, + "MueLu::InverseApproximationFactory::GetSparseInverse: Error in serial QR solve."); + + // 6. set calculated row into Ainv + ArrayView Mk(localX.values(), localX.length()); + Ainv->replaceLocalValues(k, Ik, Mk); } + Ainv->fillComplete(); + + return Ainv; +} -} // namespace MueLu +} // namespace MueLu #endif /* MUELU_INVERSEAPPROXIMATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp index 9a8207ae430a..fb5dea269c06 100644 --- a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp @@ -47,9 +47,9 @@ #define MUELU_LINEDETECTIONFACTORY_DECL_HPP // same as in SemiCoarsenPFactory (TODO rework this) -#define VERTICAL 1 -#define HORIZONTAL 2 -#define GRID_SUPPLIED -1 +#define VERTICAL 1 +#define HORIZONTAL 2 +#define GRID_SUPPLIED -1 #include "MueLu_ConfigDefs.hpp" #include "MueLu_LineDetectionFactory_fwd.hpp" @@ -59,82 +59,82 @@ namespace MueLu { - /*! +/*! @class LineDetectionFactory class. @brief Factory for building line detection information */ - template - class LineDetectionFactory : public SingleLevelFactoryBase { +template +class LineDetectionFactory : public SingleLevelFactoryBase { #undef MUELU_LINEDETECTIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - using coordinate_type = typename Teuchos::ScalarTraits::coordinateType; - using CoordinateMultiVector = typename Xpetra::MultiVector; + public: + using coordinate_type = typename Teuchos::ScalarTraits::coordinateType; + using CoordinateMultiVector = typename Xpetra::MultiVector; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - LineDetectionFactory() : Zorientation_(VERTICAL) { } + LineDetectionFactory() + : Zorientation_(VERTICAL) {} - //! Destructor. - virtual ~LineDetectionFactory() { } + //! Destructor. + virtual ~LineDetectionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! + /*! @brief Build method. Builds line detection information and stores it in currentLevel */ - void Build(Level& currentLevel) const; + void Build(Level& currentLevel) const; - //@} + //@} - private: - void sort_coordinates(LO numCoords, LO* OrigLoc, - coordinate_type* xvals, - coordinate_type* yvals, - coordinate_type* zvals, - coordinate_type* xtemp, - coordinate_type* ytemp, - coordinate_type* ztemp, - bool flipXY = false) const; + private: + void sort_coordinates(LO numCoords, LO* OrigLoc, + coordinate_type* xvals, + coordinate_type* yvals, + coordinate_type* zvals, + coordinate_type* xtemp, + coordinate_type* ytemp, + coordinate_type* ztemp, + bool flipXY = false) const; - LO ML_compute_line_info(LO LayerId[], LO VertLineId[], - LO Ndof, LO DofsPerNode, - LO MeshNumbering, LO NumNodesPerVertLine, - coordinate_type *xvals, coordinate_type *yvals, coordinate_type *zvals, - const Teuchos::Comm& comm ) const ; + LO ML_compute_line_info(LO LayerId[], LO VertLineId[], + LO Ndof, LO DofsPerNode, + LO MeshNumbering, LO NumNodesPerVertLine, + coordinate_type* xvals, coordinate_type* yvals, coordinate_type* zvals, + const Teuchos::Comm& comm) const; - void ML_az_dsort2(coordinate_type dlist[], LO N, LO list2[]) const; + void ML_az_dsort2(coordinate_type dlist[], LO N, LO list2[]) const; - //! internally stores line detection mode - //! can be either vertical, horizontal or coordinates - //! for the first run. On the coarser levels we automatically - //! switch to vertical mode - mutable LO Zorientation_; + //! internally stores line detection mode + //! can be either vertical, horizontal or coordinates + //! for the first run. On the coarser levels we automatically + //! switch to vertical mode + mutable LO Zorientation_; - }; //class LineDetectionFactory +}; //class LineDetectionFactory -} //namespace MueLu +} //namespace MueLu #define MUELU_LINEDETECTIONFACTORY_SHORT -#endif // MUELU_LINEDETECTIONFACTORY_DECL_HPP +#endif // MUELU_LINEDETECTIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp index f16534229ce6..baf777696119 100644 --- a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp @@ -57,452 +57,454 @@ namespace MueLu { - template - RCP LineDetectionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP LineDetectionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("linedetection: orientation"); - SET_VALID_ENTRY("linedetection: num layers"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for coorindates"); - - return validParamList; - } - - template - void LineDetectionFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - - // The factory needs the information about the number of z-layers. While this information is - // provided by the user for the finest level, the factory itself is responsible to provide the - // corresponding information on the coarser levels. Since a factory cannot be dependent on itself - // we use the NoFactory class as generator class, but remove the UserData keep flag, such that - // "NumZLayers" is part of the request/release mechanism. - // Please note, that this prevents us from having several (independent) CoarsePFactory instances! - // TODO: allow factory to dependent on self-generated data for TwoLevelFactories -> introduce ExpertRequest/Release in Level - currentLevel.DeclareInput("NumZLayers", NoFactory::get(), this); - currentLevel.RemoveKeepFlag("NumZLayers", NoFactory::get(), MueLu::UserData); + SET_VALID_ENTRY("linedetection: orientation"); + SET_VALID_ENTRY("linedetection: num layers"); +#undef SET_VALID_ENTRY + + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set >("Coordinates", Teuchos::null, "Generating factory for coorindates"); + + return validParamList; +} + +template +void LineDetectionFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + + // The factory needs the information about the number of z-layers. While this information is + // provided by the user for the finest level, the factory itself is responsible to provide the + // corresponding information on the coarser levels. Since a factory cannot be dependent on itself + // we use the NoFactory class as generator class, but remove the UserData keep flag, such that + // "NumZLayers" is part of the request/release mechanism. + // Please note, that this prevents us from having several (independent) CoarsePFactory instances! + // TODO: allow factory to dependent on self-generated data for TwoLevelFactories -> introduce ExpertRequest/Release in Level + currentLevel.DeclareInput("NumZLayers", NoFactory::get(), this); + currentLevel.RemoveKeepFlag("NumZLayers", NoFactory::get(), MueLu::UserData); +} + +template +void LineDetectionFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Line detection (Ray style)", currentLevel); + + LO NumZDir = 0; + RCP fineCoords; + ArrayRCP x, y, z; + coordinate_type *xptr = NULL, *yptr = NULL, *zptr = NULL; + + // obtain general variables + RCP A = Get >(currentLevel, "A"); + LO BlkSize = A->GetFixedBlockSize(); + RCP rowMap = A->getRowMap(); + LO Ndofs = rowMap->getLocalNumElements(); + LO Nnodes = Ndofs / BlkSize; + + // collect information provided by user + const ParameterList& pL = GetParameterList(); + const std::string lineOrientation = pL.get("linedetection: orientation"); + + // interpret "line orientation" parameter provided by the user on the finest level + if (currentLevel.GetLevelID() == 0) { + if (lineOrientation == "vertical") + Zorientation_ = VERTICAL; + else if (lineOrientation == "horizontal") + Zorientation_ = HORIZONTAL; + else if (lineOrientation == "coordinates") + Zorientation_ = GRID_SUPPLIED; + else + TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: The parameter 'semicoarsen: line orientation' must be either 'vertical', 'horizontal' or 'coordinates'."); } - template - void LineDetectionFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Line detection (Ray style)", currentLevel); - - LO NumZDir = 0; - RCP fineCoords; - ArrayRCP x, y, z; - coordinate_type *xptr = NULL, *yptr = NULL, *zptr = NULL; - - // obtain general variables - RCP A = Get< RCP > (currentLevel, "A"); - LO BlkSize = A->GetFixedBlockSize(); - RCP rowMap = A->getRowMap(); - LO Ndofs = rowMap->getLocalNumElements(); - LO Nnodes = Ndofs/BlkSize; - - // collect information provided by user - const ParameterList& pL = GetParameterList(); - const std::string lineOrientation = pL.get("linedetection: orientation"); - - // interpret "line orientation" parameter provided by the user on the finest level - if(currentLevel.GetLevelID() == 0) { - if(lineOrientation=="vertical") - Zorientation_ = VERTICAL; - else if (lineOrientation=="horizontal") - Zorientation_ = HORIZONTAL; - else if (lineOrientation=="coordinates") - Zorientation_ = GRID_SUPPLIED; - else - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: The parameter 'semicoarsen: line orientation' must be either 'vertical', 'horizontal' or 'coordinates'."); - } - - //TEUCHOS_TEST_FOR_EXCEPTION(Zorientation_!=VERTICAL, Exceptions::RuntimeError, "LineDetectionFactory: The 'horizontal' or 'coordinates' have not been tested!!!. Please remove this exception check and carefully test these modes!"); + //TEUCHOS_TEST_FOR_EXCEPTION(Zorientation_!=VERTICAL, Exceptions::RuntimeError, "LineDetectionFactory: The 'horizontal' or 'coordinates' have not been tested!!!. Please remove this exception check and carefully test these modes!"); - // obtain number of z layers (variable over levels) - // This information is user-provided on the finest level and transferred to the coarser - // levels by the SemiCoarsenPFactor using the internal "NumZLayers" variable. - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { - NumZDir = currentLevel.Get("NumZLayers", NoFactory::get()); //obtain info - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information from Level(0))" << std::endl; - } else { - // check whether user provides information or it can be reconstructed from coordinates - NumZDir = pL.get("linedetection: num layers"); - if(NumZDir == -1) { - bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); - - if (CoordsAvail == true) { - // try to reconstruct the number of layers from coordinates - fineCoords = Get< RCP > (currentLevel, "Coordinates"); - TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given."); - x = fineCoords->getDataNonConst(0); - y = fineCoords->getDataNonConst(1); - z = fineCoords->getDataNonConst(2); - xptr = x.getRawPtr(); - yptr = y.getRawPtr(); - zptr = z.getRawPtr(); - - LO NumCoords = Ndofs/BlkSize; - - /* sort coordinates so that we can order things according to lines */ - Teuchos::ArrayRCP TOrigLoc= Teuchos::arcp(NumCoords); LO* OrigLoc= TOrigLoc.getRawPtr(); - Teuchos::ArrayRCP Txtemp = Teuchos::arcp(NumCoords); coordinate_type* xtemp = Txtemp.getRawPtr(); - Teuchos::ArrayRCP Tytemp = Teuchos::arcp(NumCoords); coordinate_type* ytemp = Tytemp.getRawPtr(); - Teuchos::ArrayRCP Tztemp = Teuchos::arcp(NumCoords); coordinate_type* ztemp = Tztemp.getRawPtr(); - - // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can order things according to lines - // switch x and y coordinates for semi-coarsening... - sort_coordinates(NumCoords, OrigLoc, xptr, yptr, zptr, xtemp, ytemp, ztemp, true); - - /* go through each vertical line and populate blockIndices so all */ - /* dofs within a PDE within a vertical line correspond to one block.*/ - LO NumBlocks = 0; - LO NumNodesPerVertLine = 0; - LO index = 0; - - while ( index < NumCoords ) { - coordinate_type xfirst = xtemp[index]; coordinate_type yfirst = ytemp[index]; - LO next = index+1; - while ( (next != NumCoords) && (xtemp[next] == xfirst) && - (ytemp[next] == yfirst)) - next++; - if (NumBlocks == 0) { - NumNodesPerVertLine = next-index; - } - // the number of vertical lines must be the same on all processors - // TAW: Sep 14 2015: or zero as we allow "empty" processors - //TEUCHOS_TEST_FOR_EXCEPTION(next-index != NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works for constant block size now!!!\n"); - NumBlocks++; - index = next; + // obtain number of z layers (variable over levels) + // This information is user-provided on the finest level and transferred to the coarser + // levels by the SemiCoarsenPFactor using the internal "NumZLayers" variable. + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { + NumZDir = currentLevel.Get("NumZLayers", NoFactory::get()); //obtain info + GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information from Level(0))" << std::endl; + } else { + // check whether user provides information or it can be reconstructed from coordinates + NumZDir = pL.get("linedetection: num layers"); + if (NumZDir == -1) { + bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); + + if (CoordsAvail == true) { + // try to reconstruct the number of layers from coordinates + fineCoords = Get >(currentLevel, "Coordinates"); + TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given."); + x = fineCoords->getDataNonConst(0); + y = fineCoords->getDataNonConst(1); + z = fineCoords->getDataNonConst(2); + xptr = x.getRawPtr(); + yptr = y.getRawPtr(); + zptr = z.getRawPtr(); + + LO NumCoords = Ndofs / BlkSize; + + /* sort coordinates so that we can order things according to lines */ + Teuchos::ArrayRCP TOrigLoc = Teuchos::arcp(NumCoords); + LO* OrigLoc = TOrigLoc.getRawPtr(); + Teuchos::ArrayRCP Txtemp = Teuchos::arcp(NumCoords); + coordinate_type* xtemp = Txtemp.getRawPtr(); + Teuchos::ArrayRCP Tytemp = Teuchos::arcp(NumCoords); + coordinate_type* ytemp = Tytemp.getRawPtr(); + Teuchos::ArrayRCP Tztemp = Teuchos::arcp(NumCoords); + coordinate_type* ztemp = Tztemp.getRawPtr(); + + // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can order things according to lines + // switch x and y coordinates for semi-coarsening... + sort_coordinates(NumCoords, OrigLoc, xptr, yptr, zptr, xtemp, ytemp, ztemp, true); + + /* go through each vertical line and populate blockIndices so all */ + /* dofs within a PDE within a vertical line correspond to one block.*/ + LO NumBlocks = 0; + LO NumNodesPerVertLine = 0; + LO index = 0; + + while (index < NumCoords) { + coordinate_type xfirst = xtemp[index]; + coordinate_type yfirst = ytemp[index]; + LO next = index + 1; + while ((next != NumCoords) && (xtemp[next] == xfirst) && + (ytemp[next] == yfirst)) + next++; + if (NumBlocks == 0) { + NumNodesPerVertLine = next - index; } - - NumZDir = NumNodesPerVertLine; - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information reconstructed from provided node coordinates)" << std::endl; - } else { - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: BuildP: User has to provide valid number of layers (e.g. using the 'line detection: num layers' parameter)."); + // the number of vertical lines must be the same on all processors + // TAW: Sep 14 2015: or zero as we allow "empty" processors + //TEUCHOS_TEST_FOR_EXCEPTION(next-index != NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works for constant block size now!!!\n"); + NumBlocks++; + index = next; } + + NumZDir = NumNodesPerVertLine; + GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information reconstructed from provided node coordinates)" << std::endl; } else { - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information provided by user through 'line detection: num layers')" << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: BuildP: User has to provide valid number of layers (e.g. using the 'line detection: num layers' parameter)."); } - } // end else (user provides information or can be reconstructed) on finest level - } else { - // coarse level information - // TODO get rid of NoFactory here and use SemiCoarsenPFactory as source of NumZLayers instead. - if(currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { - NumZDir = currentLevel.Get("NumZLayers", NoFactory::get()); //obtain info - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << std::endl; } else { - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: BuildP: No NumZLayers variable found. This cannot be."); + GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information provided by user through 'line detection: num layers')" << std::endl; } + } // end else (user provides information or can be reconstructed) on finest level + } else { + // coarse level information + // TODO get rid of NoFactory here and use SemiCoarsenPFactory as source of NumZLayers instead. + if (currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { + NumZDir = currentLevel.Get("NumZLayers", NoFactory::get()); //obtain info + GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << std::endl; + } else { + TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: BuildP: No NumZLayers variable found. This cannot be."); } + } - // plausibility check and further variable collection - if (Zorientation_ == GRID_SUPPLIED) { // On finest level, fetch user-provided coordinates if available... - bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); - - if (CoordsAvail == false) { - if (currentLevel.GetLevelID() == 0) - throw Exceptions::RuntimeError("Coordinates must be supplied if line detection orientation not given."); - else - throw Exceptions::RuntimeError("Coordinates not generated by previous invocation of LineDetectionFactory's BuildP() method."); - } - fineCoords = Get< RCP > (currentLevel, "Coordinates"); - TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given."); - x = fineCoords->getDataNonConst(0); - y = fineCoords->getDataNonConst(1); - z = fineCoords->getDataNonConst(2); - xptr = x.getRawPtr(); - yptr = y.getRawPtr(); - zptr = z.getRawPtr(); - } + // plausibility check and further variable collection + if (Zorientation_ == GRID_SUPPLIED) { // On finest level, fetch user-provided coordinates if available... + bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); - // perform line detection - if (NumZDir > 0) { - LO *LayerId, *VertLineId; - Teuchos::ArrayRCP TLayerId = Teuchos::arcp(Nnodes); LayerId = TLayerId.getRawPtr(); - Teuchos::ArrayRCP TVertLineId= Teuchos::arcp(Nnodes); VertLineId = TVertLineId.getRawPtr(); - - NumZDir = ML_compute_line_info(LayerId, VertLineId, Ndofs, BlkSize, - Zorientation_, NumZDir,xptr,yptr,zptr, *(rowMap->getComm())); - //it is NumZDir=NCLayers*NVertLines*DofsPerNode; - - // store output data on current level - // The line detection data is used by the SemiCoarsenPFactory and the line smoothers in Ifpack/Ifpack2 - Set(currentLevel, "CoarseNumZLayers", NumZDir); - Set(currentLevel, "LineDetection_Layers", TLayerId); - Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); - } else { - Teuchos::ArrayRCP TLayerId = Teuchos::arcp(0); - Teuchos::ArrayRCP TVertLineId = Teuchos::arcp(0); - Teuchos::ArrayRCP TVertLineIdSmoo= Teuchos::arcp(0); - - // store output data on current level - // The line detection data is used by the SemiCoarsenPFactory and the line smoothers in Ifpack/Ifpack2 - Set(currentLevel, "CoarseNumZLayers", NumZDir); - Set(currentLevel, "LineDetection_Layers", TLayerId); - Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); + if (CoordsAvail == false) { + if (currentLevel.GetLevelID() == 0) + throw Exceptions::RuntimeError("Coordinates must be supplied if line detection orientation not given."); + else + throw Exceptions::RuntimeError("Coordinates not generated by previous invocation of LineDetectionFactory's BuildP() method."); } - - // automatically switch to vertical mode on the coarser levels - if(Zorientation_ != VERTICAL) - Zorientation_ = VERTICAL; + fineCoords = Get >(currentLevel, "Coordinates"); + TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given."); + x = fineCoords->getDataNonConst(0); + y = fineCoords->getDataNonConst(1); + z = fineCoords->getDataNonConst(2); + xptr = x.getRawPtr(); + yptr = y.getRawPtr(); + zptr = z.getRawPtr(); } - template - LocalOrdinal LineDetectionFactory::ML_compute_line_info(LocalOrdinal LayerId[], LocalOrdinal VertLineId[], LocalOrdinal Ndof, LocalOrdinal DofsPerNode, LocalOrdinal MeshNumbering, LocalOrdinal NumNodesPerVertLine, typename Teuchos::ScalarTraits::coordinateType *xvals, typename Teuchos::ScalarTraits::coordinateType *yvals, typename Teuchos::ScalarTraits::coordinateType *zvals, const Teuchos::Comm& /* comm */) const { - - LO Nnodes, NVertLines, MyNode; - LO NumCoords, next; //, subindex, subnext; - coordinate_type xfirst, yfirst; - coordinate_type *xtemp, *ytemp, *ztemp; - LO *OrigLoc; - LO i,j,count; - LO RetVal; + // perform line detection + if (NumZDir > 0) { + LO *LayerId, *VertLineId; + Teuchos::ArrayRCP TLayerId = Teuchos::arcp(Nnodes); + LayerId = TLayerId.getRawPtr(); + Teuchos::ArrayRCP TVertLineId = Teuchos::arcp(Nnodes); + VertLineId = TVertLineId.getRawPtr(); + + NumZDir = ML_compute_line_info(LayerId, VertLineId, Ndofs, BlkSize, + Zorientation_, NumZDir, xptr, yptr, zptr, *(rowMap->getComm())); + //it is NumZDir=NCLayers*NVertLines*DofsPerNode; + + // store output data on current level + // The line detection data is used by the SemiCoarsenPFactory and the line smoothers in Ifpack/Ifpack2 + Set(currentLevel, "CoarseNumZLayers", NumZDir); + Set(currentLevel, "LineDetection_Layers", TLayerId); + Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); + } else { + Teuchos::ArrayRCP TLayerId = Teuchos::arcp(0); + Teuchos::ArrayRCP TVertLineId = Teuchos::arcp(0); + Teuchos::ArrayRCP TVertLineIdSmoo = Teuchos::arcp(0); + + // store output data on current level + // The line detection data is used by the SemiCoarsenPFactory and the line smoothers in Ifpack/Ifpack2 + Set(currentLevel, "CoarseNumZLayers", NumZDir); + Set(currentLevel, "LineDetection_Layers", TLayerId); + Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); + } - RetVal = 0; - if ((MeshNumbering != VERTICAL) && (MeshNumbering != HORIZONTAL)) { - if ( (xvals == NULL) || (yvals == NULL) || (zvals == NULL)) RetVal = -1; - } - else { - if (NumNodesPerVertLine == -1) RetVal = -4; - if ( ((Ndof/DofsPerNode)%NumNodesPerVertLine) != 0) RetVal = -3; - } - if ( (Ndof%DofsPerNode) != 0) RetVal = -2; + // automatically switch to vertical mode on the coarser levels + if (Zorientation_ != VERTICAL) + Zorientation_ = VERTICAL; +} + +template +LocalOrdinal LineDetectionFactory::ML_compute_line_info(LocalOrdinal LayerId[], LocalOrdinal VertLineId[], LocalOrdinal Ndof, LocalOrdinal DofsPerNode, LocalOrdinal MeshNumbering, LocalOrdinal NumNodesPerVertLine, typename Teuchos::ScalarTraits::coordinateType* xvals, typename Teuchos::ScalarTraits::coordinateType* yvals, typename Teuchos::ScalarTraits::coordinateType* zvals, const Teuchos::Comm& /* comm */) const { + LO Nnodes, NVertLines, MyNode; + LO NumCoords, next; //, subindex, subnext; + coordinate_type xfirst, yfirst; + coordinate_type *xtemp, *ytemp, *ztemp; + LO* OrigLoc; + LO i, j, count; + LO RetVal; + + RetVal = 0; + if ((MeshNumbering != VERTICAL) && (MeshNumbering != HORIZONTAL)) { + if ((xvals == NULL) || (yvals == NULL) || (zvals == NULL)) RetVal = -1; + } else { + if (NumNodesPerVertLine == -1) RetVal = -4; + if (((Ndof / DofsPerNode) % NumNodesPerVertLine) != 0) RetVal = -3; + } + if ((Ndof % DofsPerNode) != 0) RetVal = -2; - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -1, Exceptions::RuntimeError, "Not semicoarsening as no mesh numbering information or coordinates are given\n"); - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -4, Exceptions::RuntimeError, "Not semicoarsening as the number of z nodes is not given.\n"); - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -3, Exceptions::RuntimeError, "Not semicoarsening as the total number of nodes is not evenly divisible by the number of z direction nodes .\n"); - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -2, Exceptions::RuntimeError, "Not semicoarsening as something is off with the number of degrees-of-freedom per node.\n"); + TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -1, Exceptions::RuntimeError, "Not semicoarsening as no mesh numbering information or coordinates are given\n"); + TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -4, Exceptions::RuntimeError, "Not semicoarsening as the number of z nodes is not given.\n"); + TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -3, Exceptions::RuntimeError, "Not semicoarsening as the total number of nodes is not evenly divisible by the number of z direction nodes .\n"); + TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -2, Exceptions::RuntimeError, "Not semicoarsening as something is off with the number of degrees-of-freedom per node.\n"); - Nnodes = Ndof/DofsPerNode; - for (MyNode = 0; MyNode < Nnodes; MyNode++) VertLineId[MyNode] = -1; - for (MyNode = 0; MyNode < Nnodes; MyNode++) LayerId[MyNode] = -1; + Nnodes = Ndof / DofsPerNode; + for (MyNode = 0; MyNode < Nnodes; MyNode++) VertLineId[MyNode] = -1; + for (MyNode = 0; MyNode < Nnodes; MyNode++) LayerId[MyNode] = -1; - if (MeshNumbering == VERTICAL) { - for (MyNode = 0; MyNode < Nnodes; MyNode++) { - LayerId[MyNode]= MyNode%NumNodesPerVertLine; - VertLineId[MyNode]= (MyNode- LayerId[MyNode])/NumNodesPerVertLine; - } + if (MeshNumbering == VERTICAL) { + for (MyNode = 0; MyNode < Nnodes; MyNode++) { + LayerId[MyNode] = MyNode % NumNodesPerVertLine; + VertLineId[MyNode] = (MyNode - LayerId[MyNode]) / NumNodesPerVertLine; } - else if (MeshNumbering == HORIZONTAL) { - NVertLines = Nnodes/NumNodesPerVertLine; - for (MyNode = 0; MyNode < Nnodes; MyNode++) { - VertLineId[MyNode] = MyNode%NVertLines; - LayerId[MyNode] = (MyNode- VertLineId[MyNode])/NVertLines; - } - } - else { - // coordinates mode: we distinguish between vertical line numbering for semi-coarsening and line smoothing - NumCoords = Ndof/DofsPerNode; - - // reserve temporary memory - Teuchos::ArrayRCP TOrigLoc= Teuchos::arcp(NumCoords); OrigLoc= TOrigLoc.getRawPtr(); - Teuchos::ArrayRCP Txtemp = Teuchos::arcp(NumCoords); xtemp = Txtemp.getRawPtr(); - Teuchos::ArrayRCP Tytemp = Teuchos::arcp(NumCoords); ytemp = Tytemp.getRawPtr(); - Teuchos::ArrayRCP Tztemp = Teuchos::arcp(NumCoords); ztemp = Tztemp.getRawPtr(); - - // build vertical line info for semi-coarsening - - // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can order things according to lines - // switch x and y coordinates for semi-coarsening... - sort_coordinates(NumCoords, OrigLoc, xvals, yvals, zvals, xtemp, ytemp, ztemp, /*true*/ true); - - LO NumBlocks = 0; - LO index = 0; - - while ( index < NumCoords ) { - xfirst = xtemp[index]; yfirst = ytemp[index]; - next = index+1; - while ( (next != NumCoords) && (xtemp[next] == xfirst) && - (ytemp[next] == yfirst)) - next++; - if (NumBlocks == 0) { - NumNodesPerVertLine = next-index; - } - // The number of vertical lines must be the same on all processors - // TAW: Sep 14, 2015: or zero as we allow for empty processors. - //TEUCHOS_TEST_FOR_EXCEPTION(next-index != NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works for constant block size now!!!\n"); - count = 0; - for (j= index; j < next; j++) { - VertLineId[OrigLoc[j]] = NumBlocks; - LayerId[OrigLoc[j]] = count++; - } - NumBlocks++; - index = next; - } + } else if (MeshNumbering == HORIZONTAL) { + NVertLines = Nnodes / NumNodesPerVertLine; + for (MyNode = 0; MyNode < Nnodes; MyNode++) { + VertLineId[MyNode] = MyNode % NVertLines; + LayerId[MyNode] = (MyNode - VertLineId[MyNode]) / NVertLines; } - - /* check that everyone was assigned */ - - for (i = 0; i < Nnodes; i++) { - if (VertLineId[i] == -1) { - GetOStream(Warnings1) << "Warning: did not assign " << i << " to a vertical line?????\n" << std::endl; + } else { + // coordinates mode: we distinguish between vertical line numbering for semi-coarsening and line smoothing + NumCoords = Ndof / DofsPerNode; + + // reserve temporary memory + Teuchos::ArrayRCP TOrigLoc = Teuchos::arcp(NumCoords); + OrigLoc = TOrigLoc.getRawPtr(); + Teuchos::ArrayRCP Txtemp = Teuchos::arcp(NumCoords); + xtemp = Txtemp.getRawPtr(); + Teuchos::ArrayRCP Tytemp = Teuchos::arcp(NumCoords); + ytemp = Tytemp.getRawPtr(); + Teuchos::ArrayRCP Tztemp = Teuchos::arcp(NumCoords); + ztemp = Tztemp.getRawPtr(); + + // build vertical line info for semi-coarsening + + // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can order things according to lines + // switch x and y coordinates for semi-coarsening... + sort_coordinates(NumCoords, OrigLoc, xvals, yvals, zvals, xtemp, ytemp, ztemp, /*true*/ true); + + LO NumBlocks = 0; + LO index = 0; + + while (index < NumCoords) { + xfirst = xtemp[index]; + yfirst = ytemp[index]; + next = index + 1; + while ((next != NumCoords) && (xtemp[next] == xfirst) && + (ytemp[next] == yfirst)) + next++; + if (NumBlocks == 0) { + NumNodesPerVertLine = next - index; } - if (LayerId[i] == -1) { - GetOStream(Warnings1) << "Warning: did not assign " << i << " to a Layer?????\n" << std::endl; + // The number of vertical lines must be the same on all processors + // TAW: Sep 14, 2015: or zero as we allow for empty processors. + //TEUCHOS_TEST_FOR_EXCEPTION(next-index != NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works for constant block size now!!!\n"); + count = 0; + for (j = index; j < next; j++) { + VertLineId[OrigLoc[j]] = NumBlocks; + LayerId[OrigLoc[j]] = count++; } + NumBlocks++; + index = next; } - - // TAW: Sep 14 2015: relax plausibility checks as we allow for empty processors - //MueLu_maxAll(&comm, NumNodesPerVertLine, i); - //if (NumNodesPerVertLine == -1) NumNodesPerVertLine = i; - //TEUCHOS_TEST_FOR_EXCEPTION(NumNodesPerVertLine != i,Exceptions::RuntimeError, "Different processors have different z direction line lengths?\n"); - - return NumNodesPerVertLine; } - /* Private member function to sort coordinates in arrays. This is an expert routine. Do not use or change.*/ - template - void LineDetectionFactory::sort_coordinates(LO numCoords, LO* OrigLoc, - typename Teuchos::ScalarTraits::coordinateType* xvals, - typename Teuchos::ScalarTraits::coordinateType* yvals, - typename Teuchos::ScalarTraits::coordinateType* zvals, - typename Teuchos::ScalarTraits::coordinateType* xtemp, - typename Teuchos::ScalarTraits::coordinateType* ytemp, - typename Teuchos::ScalarTraits::coordinateType* ztemp, - bool flipXY) const { - - if( flipXY == false ) { // for line-smoothing - for (LO i = 0; i < numCoords; i++) xtemp[i]= xvals[i]; - } else { // for semi-coarsening - for (LO i = 0; i < numCoords; i++) xtemp[i]= yvals[i]; - } - for (LO i = 0; i < numCoords; i++) OrigLoc[i]= i; + /* check that everyone was assigned */ - ML_az_dsort2(xtemp,numCoords,OrigLoc); - if( flipXY == false ) { // for line-smoothing - for (LO i = 0; i < numCoords; i++) ytemp[i]= yvals[OrigLoc[i]]; - } else { - for (LO i = 0; i < numCoords; i++) ytemp[i]= xvals[OrigLoc[i]]; + for (i = 0; i < Nnodes; i++) { + if (VertLineId[i] == -1) { + GetOStream(Warnings1) << "Warning: did not assign " << i << " to a vertical line?????\n" + << std::endl; } - - LO index = 0; - - while ( index < numCoords ) { - coordinate_type xfirst = xtemp[index]; - LO next = index+1; - while ( (next != numCoords) && (xtemp[next] == xfirst)) - next++; - ML_az_dsort2(&(ytemp[index]),next-index,&(OrigLoc[index])); - for (LO i = index; i < next; i++) ztemp[i]= zvals[OrigLoc[i]]; - /* One final sort so that the ztemps are in order */ - LO subindex = index; - while (subindex != next) { - coordinate_type yfirst = ytemp[subindex]; - LO subnext = subindex+1; - while ( (subnext != next) && (ytemp[subnext] == yfirst)) subnext++; - ML_az_dsort2(&(ztemp[subindex]),subnext-subindex,&(OrigLoc[subindex])); - subindex = subnext; - } - index = next; + if (LayerId[i] == -1) { + GetOStream(Warnings1) << "Warning: did not assign " << i << " to a Layer?????\n" + << std::endl; } - } - /* Sort coordinates and additional array accordingly (if provided). This is an expert routine borrowed from ML. Do not change.*/ - template - void LineDetectionFactory::ML_az_dsort2(typename Teuchos::ScalarTraits::coordinateType dlist[], LocalOrdinal N, LocalOrdinal list2[]) const { - LO l, r, j, i, flag; - LO RR2; - coordinate_type dRR, dK; - - // note: we use that routine for sorting coordinates only. No complex coordinates are assumed... - typedef Teuchos::ScalarTraits STS; - - if (N <= 1) return; - - l = N / 2 + 1; - r = N - 1; - l = l - 1; - dRR = dlist[l - 1]; - dK = dlist[l - 1]; + // TAW: Sep 14 2015: relax plausibility checks as we allow for empty processors + //MueLu_maxAll(&comm, NumNodesPerVertLine, i); + //if (NumNodesPerVertLine == -1) NumNodesPerVertLine = i; + //TEUCHOS_TEST_FOR_EXCEPTION(NumNodesPerVertLine != i,Exceptions::RuntimeError, "Different processors have different z direction line lengths?\n"); + + return NumNodesPerVertLine; +} + +/* Private member function to sort coordinates in arrays. This is an expert routine. Do not use or change.*/ +template +void LineDetectionFactory::sort_coordinates(LO numCoords, LO* OrigLoc, + typename Teuchos::ScalarTraits::coordinateType* xvals, + typename Teuchos::ScalarTraits::coordinateType* yvals, + typename Teuchos::ScalarTraits::coordinateType* zvals, + typename Teuchos::ScalarTraits::coordinateType* xtemp, + typename Teuchos::ScalarTraits::coordinateType* ytemp, + typename Teuchos::ScalarTraits::coordinateType* ztemp, + bool flipXY) const { + if (flipXY == false) { // for line-smoothing + for (LO i = 0; i < numCoords; i++) xtemp[i] = xvals[i]; + } else { // for semi-coarsening + for (LO i = 0; i < numCoords; i++) xtemp[i] = yvals[i]; + } + for (LO i = 0; i < numCoords; i++) OrigLoc[i] = i; - if (list2 != NULL) { - RR2 = list2[l - 1]; - while (r != 0) { - j = l; - flag = 1; + ML_az_dsort2(xtemp, numCoords, OrigLoc); + if (flipXY == false) { // for line-smoothing + for (LO i = 0; i < numCoords; i++) ytemp[i] = yvals[OrigLoc[i]]; + } else { + for (LO i = 0; i < numCoords; i++) ytemp[i] = xvals[OrigLoc[i]]; + } - while (flag == 1) { - i = j; - j = j + j; + LO index = 0; + + while (index < numCoords) { + coordinate_type xfirst = xtemp[index]; + LO next = index + 1; + while ((next != numCoords) && (xtemp[next] == xfirst)) + next++; + ML_az_dsort2(&(ytemp[index]), next - index, &(OrigLoc[index])); + for (LO i = index; i < next; i++) ztemp[i] = zvals[OrigLoc[i]]; + /* One final sort so that the ztemps are in order */ + LO subindex = index; + while (subindex != next) { + coordinate_type yfirst = ytemp[subindex]; + LO subnext = subindex + 1; + while ((subnext != next) && (ytemp[subnext] == yfirst)) subnext++; + ML_az_dsort2(&(ztemp[subindex]), subnext - subindex, &(OrigLoc[subindex])); + subindex = subnext; + } + index = next; + } +} + +/* Sort coordinates and additional array accordingly (if provided). This is an expert routine borrowed from ML. Do not change.*/ +template +void LineDetectionFactory::ML_az_dsort2(typename Teuchos::ScalarTraits::coordinateType dlist[], LocalOrdinal N, LocalOrdinal list2[]) const { + LO l, r, j, i, flag; + LO RR2; + coordinate_type dRR, dK; + + // note: we use that routine for sorting coordinates only. No complex coordinates are assumed... + typedef Teuchos::ScalarTraits STS; + + if (N <= 1) return; + + l = N / 2 + 1; + r = N - 1; + l = l - 1; + dRR = dlist[l - 1]; + dK = dlist[l - 1]; + + if (list2 != NULL) { + RR2 = list2[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) j = j + 1; - if (j > r + 1) + if (STS::real(dlist[j - 1]) > STS::real(dK)) { + dlist[i - 1] = dlist[j - 1]; + list2[i - 1] = list2[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) j = j + 1; - - if (STS::real(dlist[j - 1]) > STS::real(dK)) { - dlist[ i - 1] = dlist[ j - 1]; - list2[i - 1] = list2[j - 1]; - } - else { - flag = 0; - } } } - dlist[ i - 1] = dRR; - list2[i - 1] = RR2; - - if (l == 1) { - dRR = dlist [r]; - RR2 = list2[r]; - dK = dlist[r]; - dlist[r ] = dlist[0]; - list2[r] = list2[0]; - r = r - 1; - } - else { - l = l - 1; - dRR = dlist[ l - 1]; - RR2 = list2[l - 1]; - dK = dlist[l - 1]; - } } - dlist[ 0] = dRR; - list2[0] = RR2; + dlist[i - 1] = dRR; + list2[i - 1] = RR2; + + if (l == 1) { + dRR = dlist[r]; + RR2 = list2[r]; + dK = dlist[r]; + dlist[r] = dlist[0]; + list2[r] = list2[0]; + r = r - 1; + } else { + l = l - 1; + dRR = dlist[l - 1]; + RR2 = list2[l - 1]; + dK = dlist[l - 1]; + } } - else { - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; - if (j > r + 1) + dlist[0] = dRR; + list2[0] = RR2; + } else { + while (r != 0) { + j = l; + flag = 1; + while (flag == 1) { + i = j; + j = j + j; + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) j = j + 1; + if (STS::real(dlist[j - 1]) > STS::real(dK)) { + dlist[i - 1] = dlist[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) j = j + 1; - if (STS::real(dlist[j - 1]) > STS::real(dK)) { - dlist[ i - 1] = dlist[ j - 1]; - } - else { - flag = 0; - } } } - dlist[ i - 1] = dRR; - if (l == 1) { - dRR = dlist [r]; - dK = dlist[r]; - dlist[r ] = dlist[0]; - r = r - 1; - } - else { - l = l - 1; - dRR = dlist[ l - 1]; - dK = dlist[l - 1]; - } } - dlist[ 0] = dRR; + dlist[i - 1] = dRR; + if (l == 1) { + dRR = dlist[r]; + dK = dlist[r]; + dlist[r] = dlist[0]; + r = r - 1; + } else { + l = l - 1; + dRR = dlist[l - 1]; + dK = dlist[l - 1]; + } } - + dlist[0] = dRR; } -} //namespace MueLu +} +} //namespace MueLu -#endif // MUELU_LINEDETECTIONFACTORY_DEF_HPP +#endif // MUELU_LINEDETECTIONFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp index 0fb3650ef6a2..25baa51cc279 100644 --- a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp @@ -90,23 +90,21 @@ namespace MueLu { | TransferVec | LocalOrdinalTransferFactory | coarse level transfervec */ - - - template - class LocalOrdinalTransferFactory : public TwoLevelFactoryBase { +template +class LocalOrdinalTransferFactory : public TwoLevelFactoryBase { #undef MUELU_LOCALORDINALTRANSFERFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - // Default constructor is distabled - LocalOrdinalTransferFactory() = delete; + // Default constructor is distabled + LocalOrdinalTransferFactory() = delete; - /*! @brief Constructor. + /*! @brief Constructor. @param vectorName The name of the quantity to be restricted. @param restrictionName The name of the restriction Matrix. @@ -114,53 +112,55 @@ namespace MueLu { The operator associated with projectionName will be applied to the MultiVector associated with vectorName. */ - LocalOrdinalTransferFactory(const std::string & TransferVecName, const std::string & mode): TransferVecName_(TransferVecName) { - if(mode == "classical") useAggregatesMode_ = false; - else useAggregatesMode_ = true; - } + LocalOrdinalTransferFactory(const std::string &TransferVecName, const std::string &mode) + : TransferVecName_(TransferVecName) { + if (mode == "classical") + useAggregatesMode_ = false; + else + useAggregatesMode_ = true; + } - //! Destructor. - virtual ~LocalOrdinalTransferFactory() { } + //! Destructor. + virtual ~LocalOrdinalTransferFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class will fall back to the settings in FactoryManager. */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - private: + private: + void BuildAggregates(Level &fineLevel, Level &coarseLevel) const; - void BuildAggregates(Level & fineLevel, Level &coarseLevel) const; + void BuildFC(Level &fineLevel, Level &coarseLevel) const; - void BuildFC(Level & fineLevel, Level &coarseLevel) const; - - //! Use aggregates mode (as opposed to FC mode) - bool useAggregatesMode_; + //! Use aggregates mode (as opposed to FC mode) + bool useAggregatesMode_; - //! The name for the vector to be transfered. This allows us to have multiple factories for different variables - std::string TransferVecName_; + //! The name for the vector to be transfered. This allows us to have multiple factories for different variables + std::string TransferVecName_; - }; // class LocalOrdinalTransferFactory +}; // class LocalOrdinalTransferFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_LOCALORDINALTRANSFERFACTORY_SHORT -#endif // MUELU_LOCALORDINALTRANSFER_FACTORY_DECL_HPP +#endif // MUELU_LOCALORDINALTRANSFER_FACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp index d870306b54b6..40844e25fb15 100644 --- a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp @@ -61,201 +61,196 @@ namespace MueLu { - template - RCP LocalOrdinalTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP LocalOrdinalTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - validParamList->set >(TransferVecName_, Teuchos::null, "Factory for TransferVec generation"); - validParamList->set >("P Graph", Teuchos::null, "Factory for P generation"); - validParamList->set >("Aggregates", Teuchos::null, "Factory for aggregates generation"); - validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + validParamList->set >(TransferVecName_, Teuchos::null, "Factory for TransferVec generation"); + validParamList->set >("P Graph", Teuchos::null, "Factory for P generation"); + validParamList->set >("Aggregates", Teuchos::null, "Factory for aggregates generation"); + validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - return validParamList; - } + return validParamList; +} + +template +void LocalOrdinalTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { + static bool isAvailableXfer = false; + if (coarseLevel.GetRequestMode() == Level::REQUEST) { + isAvailableXfer = coarseLevel.IsAvailable(TransferVecName_, this); + if (isAvailableXfer == false) { + Input(fineLevel, TransferVecName_); + Input(fineLevel, "CoarseMap"); - template - void LocalOrdinalTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - static bool isAvailableXfer = false; - if (coarseLevel.GetRequestMode() == Level::REQUEST) { - isAvailableXfer = coarseLevel.IsAvailable(TransferVecName_, this); - if (isAvailableXfer == false) { - Input(fineLevel, TransferVecName_); - Input(fineLevel, "CoarseMap"); - - if(useAggregatesMode_) - Input(fineLevel, "Aggregates"); - else { - Input(coarseLevel, "P Graph"); - } + if (useAggregatesMode_) + Input(fineLevel, "Aggregates"); + else { + Input(coarseLevel, "P Graph"); } } - } +} + +template +void LocalOrdinalTransferFactory::Build(Level &fineLevel, Level &coarseLevel) const { + if (useAggregatesMode_) + BuildAggregates(fineLevel, coarseLevel); + else + BuildFC(fineLevel, coarseLevel); +} + +template +void LocalOrdinalTransferFactory::BuildFC(Level &fineLevel, Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - template - void LocalOrdinalTransferFactory::Build(Level & fineLevel, Level &coarseLevel) const { - if(useAggregatesMode_) BuildAggregates(fineLevel,coarseLevel); - else BuildFC(fineLevel,coarseLevel); + GetOStream(Runtime0) << "Transferring " << TransferVecName_ << std::endl; + LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + if (coarseLevel.IsAvailable(TransferVecName_, this)) { + GetOStream(Runtime0) << "Reusing " << TransferVecName_ << std::endl; + return; } - template - void LocalOrdinalTransferFactory::BuildFC(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); + // Get everything we need + RCP P = Get >(coarseLevel, "P Graph"); + RCP fineTV = Get >(fineLevel, TransferVecName_); + RCP coarseMap = Get >(fineLevel, "CoarseMap"); + RCP uniqueMap = fineTV->getMap(); + ArrayRCP fineData = fineTV->getData(0); - GetOStream(Runtime0) << "Transferring " <::invalid(); + // Allocate new LO Vector + RCP coarseTV = LocalOrdinalVectorFactory::Build(coarseMap, 1); + ArrayRCP coarseData = coarseTV->getDataNonConst(0); - if (coarseLevel.IsAvailable(TransferVecName_, this)) { - GetOStream(Runtime0) << "Reusing "< P = Get< RCP >(coarseLevel,"P Graph"); - RCP fineTV = Get< RCP >(fineLevel, TransferVecName_); - RCP coarseMap = Get< RCP > (fineLevel, "CoarseMap"); - RCP uniqueMap = fineTV->getMap(); - ArrayRCP fineData = fineTV->getData(0); - - // Allocate new LO Vector - RCP coarseTV = LocalOrdinalVectorFactory::Build(coarseMap,1); - ArrayRCP coarseData = coarseTV->getDataNonConst(0); - - // Invalidate everything first, to check for errors - for(LO i=0; igetDomainMap()->getLocalNumElements(); - for (LO row=0; row<(LO)P->getLocalNumRows(); row++) { - LO fineNumber = fineData[row]; - ArrayView indices; - P->getLocalRowView(row,indices); - - for(LO j=0; j<(LO)indices.size(); j++) { - LO col = indices[j]; - if (col >= domMapNumElements) { - // skip off rank entries of P - } else { - coarseData[col] = fineNumber; - } + // Fill in coarse TV + LO domMapNumElements = P->getDomainMap()->getLocalNumElements(); + for (LO row = 0; row < (LO)P->getLocalNumRows(); row++) { + LO fineNumber = fineData[row]; + ArrayView indices; + P->getLocalRowView(row, indices); + + for (LO j = 0; j < (LO)indices.size(); j++) { + LO col = indices[j]; + if (col >= domMapNumElements) { + // skip off rank entries of P + } else { + coarseData[col] = fineNumber; } } + } #ifdef HAVE_MUELU_DEBUG - size_t error_count = 0; - { - RCP coarseTVghosted; - RCP importer = P->getImporter(); - if (!importer.is_null()) { - coarseTVghosted = LocalOrdinalVectorFactory::Build(P->getColMap(),1); - coarseTVghosted->doImport(*coarseTV, *importer, Xpetra::INSERT); - } else { - coarseTVghosted = coarseTV; - } - ArrayRCP coarseDataGhosted = coarseTVghosted->getDataNonConst(0); - for (LO col=0; col<(LO)P->getColMap()->getLocalNumElements(); col++) { - if (coarseDataGhosted[col] == LO_INVALID) + size_t error_count = 0; + { + RCP coarseTVghosted; + RCP importer = P->getImporter(); + if (!importer.is_null()) { + coarseTVghosted = LocalOrdinalVectorFactory::Build(P->getColMap(), 1); + coarseTVghosted->doImport(*coarseTV, *importer, Xpetra::INSERT); + } else { + coarseTVghosted = coarseTV; + } + ArrayRCP coarseDataGhosted = coarseTVghosted->getDataNonConst(0); + for (LO col = 0; col < (LO)P->getColMap()->getLocalNumElements(); col++) { + if (coarseDataGhosted[col] == LO_INVALID) + error_count++; + } + for (LO row = 0; row < (LO)P->getLocalNumRows(); row++) { + LO fineNumber = fineData[row]; + ArrayView indices; + P->getLocalRowView(row, indices); + for (LO j = 0; j < (LO)indices.size(); j++) { + if (coarseDataGhosted[indices[j]] != fineNumber) error_count++; } - for (LO row=0; row<(LO)P->getLocalNumRows(); row++) { - LO fineNumber = fineData[row]; - ArrayView indices; - P->getLocalRowView(row,indices); - for(LO j=0; j<(LO)indices.size(); j++) { - if (coarseDataGhosted[indices[j]] != fineNumber) - error_count++; - } - } } + } - // Error checking: All nodes in an aggregate must share a local ordinal - if(error_count > 0) { - std::ostringstream ofs; - ofs << "LocalOrdinalTransferFactory("< 0) { + std::ostringstream ofs; + ofs << "LocalOrdinalTransferFactory(" << TransferVecName_ << "): ERROR: Each coarse dof must have a unique LO value. We had " << std::to_string(error_count) << " unknowns that did not match."; + throw std::runtime_error(ofs.str()); + } #endif - - Set >(coarseLevel, TransferVecName_, coarseTV); + Set >(coarseLevel, TransferVecName_, coarseTV); +} + +template +void LocalOrdinalTransferFactory::BuildAggregates(Level &fineLevel, Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); + + GetOStream(Runtime0) << "Transferring " << TransferVecName_ << std::endl; + RCP coarseTV; + RCP fineTV; + LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + if (coarseLevel.IsAvailable(TransferVecName_, this)) { + GetOStream(Runtime0) << "Reusing " << TransferVecName_ << std::endl; + return; } - + RCP aggregates = Get >(fineLevel, "Aggregates"); + fineTV = Get >(fineLevel, TransferVecName_); + RCP coarseMap = Get >(fineLevel, "CoarseMap"); + RCP uniqueMap = fineTV->getMap(); - template - void LocalOrdinalTransferFactory::BuildAggregates(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); + ArrayView elementAList = coarseMap->getLocalElementList(); - GetOStream(Runtime0) << "Transferring " < coarseTV; - RCP fineTV; - LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + coarseTV = LocalOrdinalVectorFactory::Build(coarseMap, 1); - if (coarseLevel.IsAvailable(TransferVecName_, this)) { - GetOStream(Runtime0) << "Reusing "< aggregates = Get< RCP > (fineLevel, "Aggregates"); - fineTV = Get< RCP >(fineLevel, TransferVecName_); - RCP coarseMap = Get< RCP > (fineLevel, "CoarseMap"); - RCP uniqueMap = fineTV->getMap(); - - ArrayView elementAList = coarseMap->getLocalElementList(); - - coarseTV = LocalOrdinalVectorFactory::Build(coarseMap,1); - - // Create overlapped fine TV to reduce global communication - RCP ghostedTV = fineTV; - if (aggregates->AggregatesCrossProcessors()) { - - RCP nonUniqueMap = aggregates->GetMap(); - RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - - ghostedTV = LocalOrdinalVectorFactory::Build(nonUniqueMap, 1); - ghostedTV->doImport(*fineTV, *importer, Xpetra::INSERT); - } - - // Get some info about aggregates - int myPID = uniqueMap->getComm()->getRank(); - ArrayRCP aggSizes = aggregates->ComputeAggregateSizesArrayRCP(); - const ArrayRCP vertex2AggID = aggregates->GetVertex2AggId()->getData(0); - const ArrayRCP procWinner = aggregates->GetProcWinner()->getData(0); - - - ArrayRCP fineData = ghostedTV->getData(0); - ArrayRCP coarseData = coarseTV->getDataNonConst(0); - - // Invalidate everything first, to check for errors - for(LO i=0; i ghostedTV = fineTV; + if (aggregates->AggregatesCrossProcessors()) { + RCP nonUniqueMap = aggregates->GetMap(); + RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + + ghostedTV = LocalOrdinalVectorFactory::Build(nonUniqueMap, 1); + ghostedTV->doImport(*fineTV, *importer, Xpetra::INSERT); + } + + // Get some info about aggregates + int myPID = uniqueMap->getComm()->getRank(); + ArrayRCP aggSizes = aggregates->ComputeAggregateSizesArrayRCP(); + const ArrayRCP vertex2AggID = aggregates->GetVertex2AggId()->getData(0); + const ArrayRCP procWinner = aggregates->GetProcWinner()->getData(0); - // Error checking: All nodes in an aggregate must share a local ordinal - if(error_count > 0) { - std::ostringstream ofs; - ofs << "LocalOrdinalTransferFactory: ERROR: Each aggregate must have a unique LO value. We had "< fineData = ghostedTV->getData(0); + ArrayRCP coarseData = coarseTV->getDataNonConst(0); + + // Invalidate everything first, to check for errors + for (LO i = 0; i < coarseData.size(); i++) + coarseData[i] = LO_INVALID; + + // Fill in coarse TV + size_t error_count = 0; + for (LO lnode = 0; lnode < vertex2AggID.size(); lnode++) { + if (procWinner[lnode] == myPID && + //lnode < vertex2AggID.size() && + lnode < fineData.size() && // TAW do not access off-processor data + vertex2AggID[lnode] < coarseData.size()) { + if (coarseData[vertex2AggID[lnode]] == LO_INVALID) + coarseData[vertex2AggID[lnode]] = fineData[lnode]; + if (coarseData[vertex2AggID[lnode]] != fineData[lnode]) + error_count++; } - - Set >(coarseLevel, TransferVecName_, coarseTV); + } + // Error checking: All nodes in an aggregate must share a local ordinal + if (error_count > 0) { + std::ostringstream ofs; + ofs << "LocalOrdinalTransferFactory: ERROR: Each aggregate must have a unique LO value. We had " << std::to_string(error_count) << " unknowns that did not match."; + throw std::runtime_error(ofs.str()); } -} // namespace MueLu + Set >(coarseLevel, TransferVecName_, coarseTV); +} + +} // namespace MueLu -#endif // MUELU_LOCALORDINALTRANSFER_FACTORY_DEF_HPP +#endif // MUELU_LOCALORDINALTRANSFER_FACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp index 37ff1ce0e56e..f8923152d657 100644 --- a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp @@ -56,141 +56,135 @@ namespace MueLu { - /*! +/*! @class LowPrecisionFactory class. @brief Factory for converting matrices to half precision operators */ - template - class LowPrecisionFactory : public SingleLevelFactoryBase { +template +class LowPrecisionFactory : public SingleLevelFactoryBase { #undef MUELU_LOWPRECISIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - LowPrecisionFactory() { } + LowPrecisionFactory() {} - //! Destructor. - virtual ~LowPrecisionFactory() { } + //! Destructor. + virtual ~LowPrecisionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! + /*! @brief Build method. Converts a matrix to half precision operators and returns it in currentLevel. */ - void Build(Level& currentLevel) const; + void Build(Level& currentLevel) const; - //@} - - }; //class LowPrecisionFactory + //@} +}; //class LowPrecisionFactory #if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) - template - class LowPrecisionFactory : public SingleLevelFactoryBase { - typedef double Scalar; +template +class LowPrecisionFactory : public SingleLevelFactoryBase { + typedef double Scalar; #undef MUELU_LOWPRECISIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - LowPrecisionFactory() { } + LowPrecisionFactory() {} - //! Destructor. - virtual ~LowPrecisionFactory() { } + //! Destructor. + virtual ~LowPrecisionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! + /*! @brief Build method. Converts a matrix to half precision operators and returns it in currentLevel. */ - void Build(Level& currentLevel) const; + void Build(Level& currentLevel) const; - //@} + //@} - }; //class LowPrecisionFactory +}; //class LowPrecisionFactory #endif - #if defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT) - template - class LowPrecisionFactory,LocalOrdinal,GlobalOrdinal,Node> : public SingleLevelFactoryBase { - typedef std::complex Scalar; +template +class LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node> : public SingleLevelFactoryBase { + typedef std::complex Scalar; #undef MUELU_LOWPRECISIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - LowPrecisionFactory() { } + LowPrecisionFactory() {} - //! Destructor. - virtual ~LowPrecisionFactory() { } + //! Destructor. + virtual ~LowPrecisionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! + /*! @brief Build method. Converts a matrix to half precision operators and returns it in currentLevel. */ - void Build(Level& currentLevel) const; + void Build(Level& currentLevel) const; - //@} + //@} - }; //class LowPrecisionFactory +}; //class LowPrecisionFactory #endif - -} //namespace MueLu +} //namespace MueLu #define MUELU_LOWPRECISIONFACTORY_SHORT -#endif // MUELU_LOWPRECISIONFACTORY_DECL_HPP +#endif // MUELU_LOWPRECISIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp index 5182d762e949..2a2f9f13817e 100644 --- a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp @@ -56,142 +56,136 @@ #include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" - namespace MueLu { - template - RCP LowPrecisionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set("matrix key", "A", ""); - validParamList->set< RCP >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - - return validParamList; - } +template +RCP LowPrecisionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - template - void LowPrecisionFactory::DeclareInput(Level& currentLevel) const { + validParamList->set("matrix key", "A", ""); + validParamList->set >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - Input(currentLevel, matrixKey); - } + return validParamList; +} - template - void LowPrecisionFactory::Build(Level& currentLevel) const { - using Teuchos::ParameterList; +template +void LowPrecisionFactory::DeclareInput(Level& currentLevel) const { + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + Input(currentLevel, matrixKey); +} - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); +template +void LowPrecisionFactory::Build(Level& currentLevel) const { + using Teuchos::ParameterList; - FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); - RCP A = Get< RCP >(currentLevel, matrixKey); + FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); - GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; - Set(currentLevel, matrixKey, A); - } + RCP A = Get >(currentLevel, matrixKey); + GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; + Set(currentLevel, matrixKey, A); +} #if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) - template - RCP LowPrecisionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set("matrix key", "A", ""); - validParamList->set< RCP >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - - return validParamList; +template +RCP LowPrecisionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set("matrix key", "A", ""); + validParamList->set >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + + return validParamList; +} + +template +void LowPrecisionFactory::DeclareInput(Level& currentLevel) const { + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + Input(currentLevel, matrixKey); +} + +template +void LowPrecisionFactory::Build(Level& currentLevel) const { + using Teuchos::ParameterList; + using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; + + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + + FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); + + RCP A = Get >(currentLevel, matrixKey); + + if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same::value) { + auto tpA = rcp_dynamic_cast(rcp_dynamic_cast(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix(); + auto tpLowA = tpA->template convert(); + auto tpLowOpA = rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); + auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); + auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); + Set(currentLevel, matrixKey, xpLowOpA); + return; } - template - void LowPrecisionFactory::DeclareInput(Level& currentLevel) const { - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - Input(currentLevel, matrixKey); - } - - template - void LowPrecisionFactory::Build(Level& currentLevel) const { - using Teuchos::ParameterList; - using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - - FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); - - RCP A = Get< RCP >(currentLevel, matrixKey); - - if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same::value) { - auto tpA = rcp_dynamic_cast(rcp_dynamic_cast(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix(); - auto tpLowA = tpA->template convert(); - auto tpLowOpA = rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); - auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); - auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); - Set(currentLevel, matrixKey, xpLowOpA); - return; - } - - GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; - Set(currentLevel, matrixKey, A); - } + GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; + Set(currentLevel, matrixKey, A); +} #endif - #if defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT) - template - RCP LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set("matrix key", "A", ""); - validParamList->set< RCP >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - - return validParamList; - } - - template - void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& currentLevel) const { - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - Input(currentLevel, matrixKey); +template +RCP LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set("matrix key", "A", ""); + validParamList->set >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); + + return validParamList; +} + +template +void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& currentLevel) const { + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + Input(currentLevel, matrixKey); +} + +template +void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { + using Teuchos::ParameterList; + using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; + + const ParameterList& pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + + FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); + + RCP A = Get >(currentLevel, matrixKey); + + if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same >::value) { + auto tpA = rcp_dynamic_cast(rcp_dynamic_cast(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix(); + auto tpLowA = tpA->template convert(); + auto tpLowOpA = rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); + auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); + auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); + Set(currentLevel, matrixKey, xpLowOpA); + return; } - template - void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { - using Teuchos::ParameterList; - using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - - FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); - - RCP A = Get< RCP >(currentLevel, matrixKey); - - if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same >::value) { - auto tpA = rcp_dynamic_cast(rcp_dynamic_cast(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix(); - auto tpLowA = tpA->template convert(); - auto tpLowOpA = rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); - auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); - auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); - Set(currentLevel, matrixKey, xpLowOpA); - return; - } - - GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; - Set(currentLevel, matrixKey, A); - } + GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; + Set(currentLevel, matrixKey, A); +} #endif -} //namespace MueLu +} //namespace MueLu -#endif // MUELU_LOWPRECISIONFACTORY_DEF_HPP +#endif // MUELU_LOWPRECISIONFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp index 5ca98e15b840..b2e6e42572ef 100644 --- a/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp @@ -52,7 +52,7 @@ namespace MueLu { - /*! +/*! @class MapTransferFactory class. @brief Factory to transfer a map from a fine to a coarse level @@ -87,36 +87,34 @@ namespace MueLu { */ - template - class MapTransferFactory : public TwoLevelFactoryBase { +template +class MapTransferFactory : public TwoLevelFactoryBase { #undef MUELU_MAPTRANSFERFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" - - public: +#include "MueLu_UseShortNames.hpp" - //! Input - //@{ + public: + //! Input + //@{ - RCP GetValidParameterList() const override; + RCP GetValidParameterList() const override; - void DeclareInput(Level& fineLevel, Level& coarseLevel) const override; + void DeclareInput(Level& fineLevel, Level& coarseLevel) const override; - //@} + //@} - //@{ - //! @name Build methods. + //@{ + //! @name Build methods. - //! Build an object with this factory. - void Build(Level& fineLevel, Level& coarseLevel) const override; + //! Build an object with this factory. + void Build(Level& fineLevel, Level& coarseLevel) const override; - //@} + //@} - private: - - /*! + private: + /*! @brief Get the max number of entries per row of P to be considered for map transfer To exclude some nullspace vectors (e.g. rotations in 2D or 3D elasticity), when doing the map transfer, @@ -127,14 +125,14 @@ namespace MueLu { @param[in] pL Parameter list with user-given configuration @return Number of entries per row of the prolongator to be used for the map transfer */ - int GetLimitOfProlongatorColumns(const ParameterList& pL) const; + int GetLimitOfProlongatorColumns(const ParameterList& pL) const; - //! Generating factory of input variable - mutable RCP mapFact_; + //! Generating factory of input variable + mutable RCP mapFact_; - }; // class MapTransferFactory +}; // class MapTransferFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_MAPTRANSFERFACTORY_SHORT #endif /* MUELU_MAPTRANSFERFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp index b96ee324a0df..cd508ac82a50 100644 --- a/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp @@ -59,140 +59,133 @@ namespace MueLu { - template - RCP MapTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->setEntry("map: name", Teuchos::ParameterEntry(std::string(""))); - validParamList->setEntry("map: factory", Teuchos::ParameterEntry(std::string("null"))); - - validParamList->set>("P", Teuchos::null, "Tentative prolongator factory"); - validParamList->set("nullspace vectors: limit to", "all", "Limit the number of nullspace vectors to be used for the map transfer (especially to exclude rotational vectors)."); - - return validParamList; +template +RCP MapTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->setEntry("map: name", Teuchos::ParameterEntry(std::string(""))); + validParamList->setEntry("map: factory", Teuchos::ParameterEntry(std::string("null"))); + + validParamList->set>("P", Teuchos::null, "Tentative prolongator factory"); + validParamList->set("nullspace vectors: limit to", "all", "Limit the number of nullspace vectors to be used for the map transfer (especially to exclude rotational vectors)."); + + return validParamList; +} + +template +void MapTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { + const ParameterList& pL = GetParameterList(); + const std::string mapFactName = pL.get("map: factory"); + const std::string mapName = pL.get("map: name"); + + if (fineLevel.GetLevelID() == 0) { + // Not needed, if the map is provided as user data + fineLevel.DeclareInput(mapName, NoFactory::get(), this); + } else { + // check whether user has provided a specific name for the MapFactory + if (mapFactName == "" || mapFactName == "NoFactory") + mapFact_ = MueLu::NoFactory::getRCP(); + else if (mapFactName != "null") + mapFact_ = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); + + // request map generated by mapFact_ + fineLevel.DeclareInput(mapName, mapFact_.get(), this); } - template - void MapTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - const ParameterList & pL = GetParameterList(); - const std::string mapFactName = pL.get("map: factory"); - const std::string mapName = pL.get("map: name"); - - if (fineLevel.GetLevelID() == 0) - { - // Not needed, if the map is provided as user data - fineLevel.DeclareInput(mapName, NoFactory::get(), this); - } - else - { - // check whether user has provided a specific name for the MapFactory - if (mapFactName == "" || mapFactName == "NoFactory") - mapFact_ = MueLu::NoFactory::getRCP(); - else if (mapFactName != "null") - mapFact_ = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); - - // request map generated by mapFact_ - fineLevel.DeclareInput(mapName, mapFact_.get(), this); - } - - // request Ptent - // note that "P" provided by the user (through XML file) is supposed to be of type TentativePFactory - Teuchos::RCP tentPFact = GetFactory("P"); - if (tentPFact == Teuchos::null) - tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); - coarseLevel.DeclareInput("P", tentPFact.get(), this); + // request Ptent + // note that "P" provided by the user (through XML file) is supposed to be of type TentativePFactory + Teuchos::RCP tentPFact = GetFactory("P"); + if (tentPFact == Teuchos::null) + tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); + coarseLevel.DeclareInput("P", tentPFact.get(), this); +} + +template +void MapTransferFactory::Build(Level& fineLevel, Level& coarseLevel) const { + Monitor m(*this, "MapTransferFactory"); + + const ParameterList& pL = GetParameterList(); + const std::string mapName = pL.get("map: name"); + const int maxNumProlongCols = GetLimitOfProlongatorColumns(pL); + + // fetch map from level + RCP transferMap = Teuchos::null; + if (fineLevel.GetLevelID() == 0) { + transferMap = fineLevel.Get>(mapName, NoFactory::get()); + } else { + if (fineLevel.IsAvailable(mapName, mapFact_.get()) == false) + GetOStream(Runtime0) << "MapTransferFactory::Build: User provided map \"" << mapName << "\" not found in Level class on level " << fineLevel.GetLevelID() << "." << std::endl; + transferMap = fineLevel.Get>(mapName, mapFact_.get()); } - template - void MapTransferFactory::Build(Level& fineLevel, Level& coarseLevel) const { - Monitor m(*this, "MapTransferFactory"); - - const ParameterList & pL = GetParameterList(); - const std::string mapName = pL.get("map: name"); - const int maxNumProlongCols = GetLimitOfProlongatorColumns(pL); - - // fetch map from level - RCP transferMap = Teuchos::null; - if (fineLevel.GetLevelID() == 0) { - transferMap = fineLevel.Get>(mapName, NoFactory::get()); - } else { - if (fineLevel.IsAvailable(mapName, mapFact_.get()) == false) - GetOStream(Runtime0) << "MapTransferFactory::Build: User provided map \"" << mapName << "\" not found in Level class on level " << fineLevel.GetLevelID() << "." << std::endl; - transferMap = fineLevel.Get>(mapName, mapFact_.get()); - } - - // Get default tentative prolongator factory - // Getting it that way ensures that the same factory instance will be used for both SaPFactory and NullspaceFactory. - RCP tentPFact = GetFactory("P"); - if (tentPFact == Teuchos::null) - tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); - TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("P", tentPFact.get()), Exceptions::RuntimeError, - "MueLu::MapTransferFactory::Build(): P (generated by TentativePFactory) not available."); - RCP Ptent = coarseLevel.Get >("P", tentPFact.get()); - - // loop over local rows of Ptent and figure out the corresponding coarse GIDs - Array coarseMapGids; - RCP prolongColMap = Ptent->getColMap(); - GO gRowID = -1; - int numColEntries = 0; - for (size_t row = 0; row < Ptent->getLocalNumRows(); ++row) { - gRowID = Ptent->getRowMap()->getGlobalElement(row); - - if (transferMap->isNodeGlobalElement(gRowID)) { - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - Ptent->getLocalRowView(row, indices, vals); - - numColEntries = as(indices.size()); - if (maxNumProlongCols > 0) - numColEntries = std::min(numColEntries, maxNumProlongCols); - - for (size_t col = 0; col < as(numColEntries); ++col) { - // mark all (selected) columns in Ptent(gRowID,*) to be coarse Dofs of next level transferMap - GO gcid = prolongColMap->getGlobalElement(indices[col]); - coarseMapGids.push_back(gcid); - } + // Get default tentative prolongator factory + // Getting it that way ensures that the same factory instance will be used for both SaPFactory and NullspaceFactory. + RCP tentPFact = GetFactory("P"); + if (tentPFact == Teuchos::null) + tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); + TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("P", tentPFact.get()), Exceptions::RuntimeError, + "MueLu::MapTransferFactory::Build(): P (generated by TentativePFactory) not available."); + RCP Ptent = coarseLevel.Get>("P", tentPFact.get()); + + // loop over local rows of Ptent and figure out the corresponding coarse GIDs + Array coarseMapGids; + RCP prolongColMap = Ptent->getColMap(); + GO gRowID = -1; + int numColEntries = 0; + for (size_t row = 0; row < Ptent->getLocalNumRows(); ++row) { + gRowID = Ptent->getRowMap()->getGlobalElement(row); + + if (transferMap->isNodeGlobalElement(gRowID)) { + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + Ptent->getLocalRowView(row, indices, vals); + + numColEntries = as(indices.size()); + if (maxNumProlongCols > 0) + numColEntries = std::min(numColEntries, maxNumProlongCols); + + for (size_t col = 0; col < as(numColEntries); ++col) { + // mark all (selected) columns in Ptent(gRowID,*) to be coarse Dofs of next level transferMap + GO gcid = prolongColMap->getGlobalElement(indices[col]); + coarseMapGids.push_back(gcid); } } - - // build coarse version of the input map - const GO INVALID = Teuchos::OrdinalTraits::invalid(); - std::sort(coarseMapGids.begin(), coarseMapGids.end()); - coarseMapGids.erase(std::unique(coarseMapGids.begin(), coarseMapGids.end()), coarseMapGids.end()); - RCP coarseTransferMap = MapFactory::Build(prolongColMap->lib(), INVALID, coarseMapGids(), - prolongColMap->getIndexBase(), prolongColMap->getComm()); - - // store map in coarse level - if (fineLevel.GetLevelID() == 0) - { - const std::string mapFactName = pL.get("map: factory"); - RCP mapFact = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); - coarseLevel.Set(mapName, coarseTransferMap, mapFact.get()); - } - else - coarseLevel.Set(mapName, coarseTransferMap, mapFact_.get()); - - } - - template - int MapTransferFactory::GetLimitOfProlongatorColumns(const ParameterList& pL) const - { - const std::string useTheseNspVectors = pL.get("nullspace vectors: limit to"); - - // Leave right away, if no limit is prescribed by the user - if (useTheseNspVectors == "all" || useTheseNspVectors == "") - return -1; - - // Simplify? Maybe replace by boolean flag "nullspace: exclude rotations" - int maxNumProlongCols = -1; - if (useTheseNspVectors == "translations") - maxNumProlongCols = 1; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::InvalidArgument, "Unknown subset of nullspace vectors to be used, when performing a map transfer.") - - return maxNumProlongCols; } -} // namespace MueLu + // build coarse version of the input map + const GO INVALID = Teuchos::OrdinalTraits::invalid(); + std::sort(coarseMapGids.begin(), coarseMapGids.end()); + coarseMapGids.erase(std::unique(coarseMapGids.begin(), coarseMapGids.end()), coarseMapGids.end()); + RCP coarseTransferMap = MapFactory::Build(prolongColMap->lib(), INVALID, coarseMapGids(), + prolongColMap->getIndexBase(), prolongColMap->getComm()); + + // store map in coarse level + if (fineLevel.GetLevelID() == 0) { + const std::string mapFactName = pL.get("map: factory"); + RCP mapFact = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); + coarseLevel.Set(mapName, coarseTransferMap, mapFact.get()); + } else + coarseLevel.Set(mapName, coarseTransferMap, mapFact_.get()); +} + +template +int MapTransferFactory::GetLimitOfProlongatorColumns(const ParameterList& pL) const { + const std::string useTheseNspVectors = pL.get("nullspace vectors: limit to"); + + // Leave right away, if no limit is prescribed by the user + if (useTheseNspVectors == "all" || useTheseNspVectors == "") + return -1; + + // Simplify? Maybe replace by boolean flag "nullspace: exclude rotations" + int maxNumProlongCols = -1; + if (useTheseNspVectors == "translations") + maxNumProlongCols = 1; + else + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::InvalidArgument, "Unknown subset of nullspace vectors to be used, when performing a map transfer.") + + return maxNumProlongCols; +} + +} // namespace MueLu #endif /* MUELU_MAPTRANSFERFACTORY_DEF_HPP_ */ \ No newline at end of file diff --git a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp index d5d4b459d55f..73de1b6b60cf 100644 --- a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp @@ -58,49 +58,45 @@ #include "MueLu_FactoryBase_fwd.hpp" namespace MueLu { - /*! +/*! @class MergedBlockedMatrix @brief Factory provides a merged version of a blocked matrix */ - template - class MergedBlockedMatrixFactory : public SingleLevelFactoryBase { +template +class MergedBlockedMatrixFactory : public SingleLevelFactoryBase { #undef MUELU_MERGEDBLOCKEDMATRIXFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - MergedBlockedMatrixFactory(); + MergedBlockedMatrixFactory(); - virtual ~MergedBlockedMatrixFactory() { } - //@} + virtual ~MergedBlockedMatrixFactory() {} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ - void Build(Level ¤tLevel) const; - //@} + //! @name Build methods. + //@{ + void Build(Level ¤tLevel) const; + //@} + private: +}; //class MergedBlockedMatrixFactory - - private: - - - }; //class MergedBlockedMatrixFactory - -} //namespace MueLu +} //namespace MueLu #define MUELU_MERGEDBLOCKEDMATRIXFACTORY_SHORT diff --git a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp index ce3a8cf46d0e..b36bed3f75de 100644 --- a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp @@ -56,28 +56,25 @@ namespace MueLu { template -MergedBlockedMatrixFactory::MergedBlockedMatrixFactory() -{ } +MergedBlockedMatrixFactory::MergedBlockedMatrixFactory() {} template RCP MergedBlockedMatrixFactory::GetValidParameterList() const { RCP validParamList = rcp(new ParameterList()); - validParamList->set< RCP >("A", MueLu::NoFactory::getRCP()/*Teuchos::null*/, "Generating factory of the matrix A used for building SchurComplement (must be a 2x2 blocked operator, default = MueLu::NoFactory::getRCP())"); + validParamList->set >("A", MueLu::NoFactory::getRCP() /*Teuchos::null*/, "Generating factory of the matrix A used for building SchurComplement (must be a 2x2 blocked operator, default = MueLu::NoFactory::getRCP())"); return validParamList; } - template void MergedBlockedMatrixFactory::DeclareInput(Level ¤tLevel) const { Input(currentLevel, "A"); } template -void MergedBlockedMatrixFactory::Build(Level & currentLevel) const -{ - FactoryMonitor m(*this, "MergedBlockedMatrix", currentLevel); +void MergedBlockedMatrixFactory::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "MergedBlockedMatrix", currentLevel); Teuchos::RCP A = Get >(currentLevel, "A"); RCP bA = Teuchos::rcp_dynamic_cast(A); @@ -93,6 +90,6 @@ void MergedBlockedMatrixFactory::Buil } } -} // namespace MueLu +} // namespace MueLu #endif /* MUELU_MERGEDBLOCKEDMATRIXFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp index 75f8fdc123d2..527c0ae38fc9 100644 --- a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp @@ -55,26 +55,26 @@ namespace MueLu { - /*! +/*! @class MultiVectorTransferFactory class. @brief Class for restricting a MultiVector from a finer to a coarser level. This is to be used in conjunction with Muelu::RAPFactory::AddTransferFactory(). */ - template - class MultiVectorTransferFactory : public TwoLevelFactoryBase { +template +class MultiVectorTransferFactory : public TwoLevelFactoryBase { #undef MUELU_MULTIVECTORTRANSFERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - /*! @brief Constructor. + /*! @brief Constructor. @param vectorName The name of the quantity to be restricted. @param restrictionName The name of the restriction Matrix. @@ -82,44 +82,43 @@ namespace MueLu { The operator associated with projectionName will be applied to the MultiVector associated with vectorName. */ - MultiVectorTransferFactory() { } + MultiVectorTransferFactory() {} - MultiVectorTransferFactory(std::string const & vectorName); // deprecated + MultiVectorTransferFactory(std::string const &vectorName); // deprecated - //! Destructor. - virtual ~MultiVectorTransferFactory() { } + //! Destructor. + virtual ~MultiVectorTransferFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that generate that data. If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class will fall back to the settings in FactoryManager. */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - private: + private: + static ArrayRCP expandCoordinates(ArrayRCP coord, LocalOrdinal blksize); - static ArrayRCP expandCoordinates(ArrayRCP coord, LocalOrdinal blksize); +}; // class MultiVectorTransferFactory - }; // class MultiVectorTransferFactory - -} // namespace MueLu +} // namespace MueLu #define MUELU_MULTIVECTORTRANSFERFACTORY_SHORT -#endif // MUELU_MULTIVECTORTRANSFER_FACTORY_DECL_HPP +#endif // MUELU_MULTIVECTORTRANSFER_FACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp index 4ecf3bedfc09..120dcd902ca0 100644 --- a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp @@ -54,73 +54,73 @@ namespace MueLu { - template - RCP MultiVectorTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP MultiVectorTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - validParamList->set< std::string > ("Vector name", "undefined", "Name of the vector that will be transferred on the coarse grid (level key)"); // TODO: how to set a validator without default value? - validParamList->set< RCP >("Vector factory", Teuchos::null, "Factory of the vector"); - validParamList->set< RCP >("R", Teuchos::null, "Factory of the transfer operator (restriction)"); + validParamList->set("Vector name", "undefined", "Name of the vector that will be transferred on the coarse grid (level key)"); // TODO: how to set a validator without default value? + validParamList->set >("Vector factory", Teuchos::null, "Factory of the vector"); + validParamList->set >("R", Teuchos::null, "Factory of the transfer operator (restriction)"); - return validParamList; - } + return validParamList; +} - template - MultiVectorTransferFactory::MultiVectorTransferFactory(std::string const & vectorName) { - SetParameter("Vector name", ParameterEntry(vectorName)); - } +template +MultiVectorTransferFactory::MultiVectorTransferFactory(std::string const &vectorName) { + SetParameter("Vector name", ParameterEntry(vectorName)); +} - template - void MultiVectorTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - const ParameterList & pL = GetParameterList(); - std::string vectorName = pL.get("Vector name"); +template +void MultiVectorTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { + const ParameterList &pL = GetParameterList(); + std::string vectorName = pL.get("Vector name"); - fineLevel.DeclareInput(vectorName, GetFactory("Vector factory").get(), this); - Input(coarseLevel, "R"); - } + fineLevel.DeclareInput(vectorName, GetFactory("Vector factory").get(), this); + Input(coarseLevel, "R"); +} - template - void MultiVectorTransferFactory::Build(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); +template +void MultiVectorTransferFactory::Build(Level &fineLevel, Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - const ParameterList & pL = GetParameterList(); - std::string vectorName = pL.get("Vector name"); + const ParameterList &pL = GetParameterList(); + std::string vectorName = pL.get("Vector name"); - RCP fineVector = fineLevel.Get< RCP >(vectorName, GetFactory("Vector factory").get()); - RCP transferOp = Get >(coarseLevel, "R"); + RCP fineVector = fineLevel.Get >(vectorName, GetFactory("Vector factory").get()); + RCP transferOp = Get >(coarseLevel, "R"); - RCP coarseVector = MultiVectorFactory::Build(transferOp->getRangeMap(), fineVector->getNumVectors()); - GetOStream(Runtime0) << "Transferring multivector \"" << vectorName << "\"" << std::endl; + RCP coarseVector = MultiVectorFactory::Build(transferOp->getRangeMap(), fineVector->getNumVectors()); + GetOStream(Runtime0) << "Transferring multivector \"" << vectorName << "\"" << std::endl; - RCP onesVector = MultiVectorFactory::Build(transferOp->getDomainMap(), 1); - onesVector->putScalar(Teuchos::ScalarTraits::one()); - RCP rowSumVector = MultiVectorFactory::Build(transferOp->getRangeMap(), 1); - transferOp->apply(*onesVector, *rowSumVector); - transferOp->apply(*fineVector, *coarseVector); + RCP onesVector = MultiVectorFactory::Build(transferOp->getDomainMap(), 1); + onesVector->putScalar(Teuchos::ScalarTraits::one()); + RCP rowSumVector = MultiVectorFactory::Build(transferOp->getRangeMap(), 1); + transferOp->apply(*onesVector, *rowSumVector); + transferOp->apply(*fineVector, *coarseVector); - if (vectorName == "Coordinates") - TEUCHOS_TEST_FOR_EXCEPTION(true,Exceptions::RuntimeError,"Use CoordinatesTransferFactory to transfer coordinates instead of MultiVectorTransferFactory."); + if (vectorName == "Coordinates") + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Use CoordinatesTransferFactory to transfer coordinates instead of MultiVectorTransferFactory."); - Set >(coarseLevel, vectorName, coarseVector); + Set >(coarseLevel, vectorName, coarseVector); - } // Build +} // Build - template - ArrayRCP MultiVectorTransferFactory::expandCoordinates(ArrayRCP coordinates, LocalOrdinal blksize) { - if (blksize == 1) - return coordinates; +template +ArrayRCP MultiVectorTransferFactory::expandCoordinates(ArrayRCP coordinates, LocalOrdinal blksize) { + if (blksize == 1) + return coordinates; - ArrayRCP expandCoord(coordinates.size()*blksize); //TODO: how to avoid automatic initialization of the vector? using arcp()? + ArrayRCP expandCoord(coordinates.size() * blksize); //TODO: how to avoid automatic initialization of the vector? using arcp()? - for(int i=0; i - class RAPFactory : public TwoLevelFactoryBase { +template +class RAPFactory : public TwoLevelFactoryBase { #undef MUELU_RAPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - RAPFactory(); + RAPFactory(); - virtual ~RAPFactory() { } + virtual ~RAPFactory() {} - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - void DeclareInput(Level& fineLevel, Level& coarseLevel) const; + void DeclareInput(Level& fineLevel, Level& coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ - void Build(Level& fineLevel, Level& coarseLevel) const; - //@} + //! @name Build methods. + //@{ + void Build(Level& fineLevel, Level& coarseLevel) const; + //@} - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to the next coarser level. */ - void AddTransferFactory(const RCP& factory); + void AddTransferFactory(const RCP& factory); - // TODO add a function to remove a specific transfer factory? + // TODO add a function to remove a specific transfer factory? - //! Returns number of transfer factories. - size_t NumTransferFactories() const { return transferFacts_.size(); } + //! Returns number of transfer factories. + size_t NumTransferFactories() const { return transferFacts_.size(); } - //@} + //@} - private: + private: + //@{ - //@{ - - mutable - bool hasDeclaredInput_; + mutable bool hasDeclaredInput_; - //@} + //@} - //@{ + //@{ - //! list of user-defined transfer Factories - std::vector > transferFacts_; + //! list of user-defined transfer Factories + std::vector > transferFacts_; - //@} + //@} - }; //class RAPFactory +}; //class RAPFactory -} //namespace MueLu +} //namespace MueLu #define MUELU_RAPFACTORY_SHORT -#endif // MUELU_RAPFACTORY_DECL_HPP +#endif // MUELU_RAPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp b/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp index 5e67cc295d48..217f20fc5625 100644 --- a/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_RAPFACTORY_DEF_HPP #define MUELU_RAPFACTORY_DEF_HPP - #include #include @@ -67,342 +66,349 @@ namespace MueLu { - template - RAPFactory::RAPFactory() - : hasDeclaredInput_(false) { } +template +RAPFactory::RAPFactory() + : hasDeclaredInput_(false) {} - template - RCP RAPFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP RAPFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("transpose: use implicit"); - SET_VALID_ENTRY("rap: triple product"); - SET_VALID_ENTRY("rap: fix zero diagonals"); - SET_VALID_ENTRY("rap: fix zero diagonals threshold"); - SET_VALID_ENTRY("rap: fix zero diagonals replacement"); - SET_VALID_ENTRY("rap: relative diagonal floor"); -#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", null, "Generating factory of the matrix A used during the prolongator smoothing process"); - validParamList->set< RCP >("P", null, "Prolongator factory"); - validParamList->set< RCP >("R", null, "Restrictor factory"); - - validParamList->set< bool > ("CheckMainDiagonal", false, "Check main diagonal for zeros"); - validParamList->set< bool > ("RepairMainDiagonal", false, "Repair zeros on main diagonal"); - - // Make sure we don't recursively validate options for the matrixmatrix kernels - ParameterList norecurse; - norecurse.disableRecursiveValidation(); - validParamList->set ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters"); - - return validParamList; - } + SET_VALID_ENTRY("transpose: use implicit"); + SET_VALID_ENTRY("rap: triple product"); + SET_VALID_ENTRY("rap: fix zero diagonals"); + SET_VALID_ENTRY("rap: fix zero diagonals threshold"); + SET_VALID_ENTRY("rap: fix zero diagonals replacement"); + SET_VALID_ENTRY("rap: relative diagonal floor"); +#undef SET_VALID_ENTRY + validParamList->set >("A", null, "Generating factory of the matrix A used during the prolongator smoothing process"); + validParamList->set >("P", null, "Prolongator factory"); + validParamList->set >("R", null, "Restrictor factory"); + + validParamList->set("CheckMainDiagonal", false, "Check main diagonal for zeros"); + validParamList->set("RepairMainDiagonal", false, "Repair zeros on main diagonal"); + + // Make sure we don't recursively validate options for the matrixmatrix kernels + ParameterList norecurse; + norecurse.disableRecursiveValidation(); + validParamList->set("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters"); + + return validParamList; +} + +template +void RAPFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { + const Teuchos::ParameterList& pL = GetParameterList(); + if (pL.get("transpose: use implicit") == false) + Input(coarseLevel, "R"); + + Input(fineLevel, "A"); + Input(coarseLevel, "P"); + + // call DeclareInput of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) + (*it)->CallDeclareInput(coarseLevel); + + hasDeclaredInput_ = true; +} + +template +void RAPFactory::Build(Level& fineLevel, Level& coarseLevel) const { + const bool doTranspose = true; + const bool doFillComplete = true; + const bool doOptimizeStorage = true; + RCP Ac; + { + FactoryMonitor m(*this, "Computing Ac", coarseLevel); + std::ostringstream levelstr; + levelstr << coarseLevel.GetLevelID(); + std::string labelstr = FormattingHelper::getColonLabel(coarseLevel.getObjectLabel()); + + TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_ == false, Exceptions::RuntimeError, + "MueLu::RAPFactory::Build(): CallDeclareInput has not been called before Build!"); - template - void RAPFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { const Teuchos::ParameterList& pL = GetParameterList(); - if (pL.get("transpose: use implicit") == false) - Input(coarseLevel, "R"); - - Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - // call DeclareInput of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) - (*it)->CallDeclareInput(coarseLevel); - - hasDeclaredInput_ = true; - } - - template - void RAPFactory::Build(Level& fineLevel, Level& coarseLevel) const { - const bool doTranspose = true; - const bool doFillComplete = true; - const bool doOptimizeStorage = true; - RCP Ac; - { - FactoryMonitor m(*this, "Computing Ac", coarseLevel); - std::ostringstream levelstr; - levelstr << coarseLevel.GetLevelID(); - std::string labelstr = FormattingHelper::getColonLabel(coarseLevel.getObjectLabel()); - - TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_ == false, Exceptions::RuntimeError, - "MueLu::RAPFactory::Build(): CallDeclareInput has not been called before Build!"); - - const Teuchos::ParameterList& pL = GetParameterList(); - RCP A = Get< RCP >(fineLevel, "A"); - RCP P = Get< RCP >(coarseLevel, "P"), AP; - // We don't have a valid P (e.g., # global aggregates = 0) so we bail. - // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize() - if (P == Teuchos::null) { - Ac = Teuchos::null; - Set(coarseLevel, "A", Ac); - return; - } + RCP A = Get >(fineLevel, "A"); + RCP P = Get >(coarseLevel, "P"), AP; + // We don't have a valid P (e.g., # global aggregates = 0) so we bail. + // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize() + if (P == Teuchos::null) { + Ac = Teuchos::null; + Set(coarseLevel, "A", Ac); + return; + } - bool isEpetra = A->getRowMap()->lib() == Xpetra::UseEpetra; - bool isGPU = + bool isEpetra = A->getRowMap()->lib() == Xpetra::UseEpetra; + bool isGPU = #ifdef KOKKOS_ENABLE_CUDA - (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) || + (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) || #endif #ifdef KOKKOS_ENABLE_HIP - (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) || + (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) || #endif #ifdef KOKKOS_ENABLE_SYCL - (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) || + (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) || #endif - false; + false; - if (pL.get("rap: triple product") == false || isEpetra || isGPU) { - if (pL.get("rap: triple product") && isEpetra) - GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for Epetra.\n"; + if (pL.get("rap: triple product") == false || isEpetra || isGPU) { + if (pL.get("rap: triple product") && isEpetra) + GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for Epetra.\n"; #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_SYCL) - if (pL.get("rap: triple product") && isGPU) - GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for " - << Node::execution_space::name() << std::endl; + if (pL.get("rap: triple product") && isGPU) + GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for " + << Node::execution_space::name() << std::endl; #endif - // Reuse pattern if available (multiple solve) - RCP APparams = rcp(new ParameterList); - if(pL.isSublist("matrixmatrix: kernel params")) - APparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); - - // By default, we don't need global constants for A*P - APparams->set("compute global constants: temporaries",APparams->get("compute global constants: temporaries",false)); - APparams->set("compute global constants",APparams->get("compute global constants",false)); - - if (coarseLevel.IsAvailable("AP reuse data", this)) { - GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous AP data" << std::endl; - - APparams = coarseLevel.Get< RCP >("AP reuse data", this); - - if (APparams->isParameter("graph")) - AP = APparams->get< RCP >("graph"); - } - - { - SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); - - AP = MatrixMatrix::Multiply(*A, !doTranspose, *P, !doTranspose, AP, GetOStream(Statistics2), - doFillComplete, doOptimizeStorage, labelstr+std::string("MueLu::A*P-")+levelstr.str(), APparams); - } - - // Reuse coarse matrix memory if available (multiple solve) - RCP RAPparams = rcp(new ParameterList); - if(pL.isSublist("matrixmatrix: kernel params")) - RAPparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); - - if (coarseLevel.IsAvailable("RAP reuse data", this)) { - GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous RAP data" << std::endl; - - RAPparams = coarseLevel.Get< RCP >("RAP reuse data", this); - - if (RAPparams->isParameter("graph")) - Ac = RAPparams->get< RCP >("graph"); - - // Some eigenvalue may have been cached with the matrix in the previous run. - // As the matrix values will be updated, we need to reset the eigenvalue. - Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); - } - - // We *always* need global constants for the RAP, but not for the temps - RAPparams->set("compute global constants: temporaries",RAPparams->get("compute global constants: temporaries",false)); - RAPparams->set("compute global constants",true); - - // Allow optimization of storage. - // This is necessary for new faster Epetra MM kernels. - // Seems to work with matrix modifications to repair diagonal entries. - - if (pL.get("transpose: use implicit") == true) { - SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); - - Ac = MatrixMatrix::Multiply(*P, doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), - doFillComplete, doOptimizeStorage, labelstr+std::string("MueLu::R*(AP)-implicit-")+levelstr.str(), RAPparams); - - } else { - RCP R = Get< RCP >(coarseLevel, "R"); - - SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); - - Ac = MatrixMatrix::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), - doFillComplete, doOptimizeStorage, labelstr+std::string("MueLu::R*(AP)-explicit-")+levelstr.str(), RAPparams); - } - - Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); - if(relativeFloor.size() > 0) { - Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor,GetOStream(Statistics2)); - } - - bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); - bool checkAc = pL.get("CheckMainDiagonal")|| pL.get("rap: fix zero diagonals"); ; - if (checkAc || repairZeroDiagonals) { - using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; - magnitudeType threshold; - if (pL.isType("rap: fix zero diagonals threshold")) - threshold = pL.get("rap: fix zero diagonals threshold"); - else - threshold = Teuchos::as(pL.get("rap: fix zero diagonals threshold")); - Scalar replacement = Teuchos::as(pL.get("rap: fix zero diagonals replacement")); - Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, replacement); - } - - if (IsPrint(Statistics2)) { - RCP params = rcp(new ParameterList());; - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); - GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); - } - - if(!Ac.is_null()) {std::ostringstream oss; oss << "A_" << coarseLevel.GetLevelID(); Ac->setObjectLabel(oss.str());} - Set(coarseLevel, "A", Ac); - - if (!isGPU) { - APparams->set("graph", AP); - Set(coarseLevel, "AP reuse data", APparams); - } - if (!isGPU) { - RAPparams->set("graph", Ac); - Set(coarseLevel, "RAP reuse data", RAPparams); - } + // Reuse pattern if available (multiple solve) + RCP APparams = rcp(new ParameterList); + if (pL.isSublist("matrixmatrix: kernel params")) + APparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); + + // By default, we don't need global constants for A*P + APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false)); + APparams->set("compute global constants", APparams->get("compute global constants", false)); + + if (coarseLevel.IsAvailable("AP reuse data", this)) { + GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous AP data" << std::endl; + + APparams = coarseLevel.Get >("AP reuse data", this); + + if (APparams->isParameter("graph")) + AP = APparams->get >("graph"); + } + + { + SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); + + AP = MatrixMatrix::Multiply(*A, !doTranspose, *P, !doTranspose, AP, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage, labelstr + std::string("MueLu::A*P-") + levelstr.str(), APparams); + } + + // Reuse coarse matrix memory if available (multiple solve) + RCP RAPparams = rcp(new ParameterList); + if (pL.isSublist("matrixmatrix: kernel params")) + RAPparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); + + if (coarseLevel.IsAvailable("RAP reuse data", this)) { + GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous RAP data" << std::endl; + + RAPparams = coarseLevel.Get >("RAP reuse data", this); + + if (RAPparams->isParameter("graph")) + Ac = RAPparams->get >("graph"); + + // Some eigenvalue may have been cached with the matrix in the previous run. + // As the matrix values will be updated, we need to reset the eigenvalue. + Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } + + // We *always* need global constants for the RAP, but not for the temps + RAPparams->set("compute global constants: temporaries", RAPparams->get("compute global constants: temporaries", false)); + RAPparams->set("compute global constants", true); + + // Allow optimization of storage. + // This is necessary for new faster Epetra MM kernels. + // Seems to work with matrix modifications to repair diagonal entries. + + if (pL.get("transpose: use implicit") == true) { + SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); + + Ac = MatrixMatrix::Multiply(*P, doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage, labelstr + std::string("MueLu::R*(AP)-implicit-") + levelstr.str(), RAPparams); + } else { - RCP RAPparams = rcp(new ParameterList); - if(pL.isSublist("matrixmatrix: kernel params")) - RAPparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); + RCP R = Get >(coarseLevel, "R"); - if (coarseLevel.IsAvailable("RAP reuse data", this)) { - GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous RAP data" << std::endl; + SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); - RAPparams = coarseLevel.Get< RCP >("RAP reuse data", this); + Ac = MatrixMatrix::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage, labelstr + std::string("MueLu::R*(AP)-explicit-") + levelstr.str(), RAPparams); + } + + Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); + if (relativeFloor.size() > 0) { + Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor, GetOStream(Statistics2)); + } + + bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); + bool checkAc = pL.get("CheckMainDiagonal") || pL.get("rap: fix zero diagonals"); + ; + if (checkAc || repairZeroDiagonals) { + using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; + magnitudeType threshold; + if (pL.isType("rap: fix zero diagonals threshold")) + threshold = pL.get("rap: fix zero diagonals threshold"); + else + threshold = Teuchos::as(pL.get("rap: fix zero diagonals threshold")); + Scalar replacement = Teuchos::as(pL.get("rap: fix zero diagonals replacement")); + Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, replacement); + } + + if (IsPrint(Statistics2)) { + RCP params = rcp(new ParameterList()); + ; + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); + GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); + } - if (RAPparams->isParameter("graph")) - Ac = RAPparams->get< RCP >("graph"); + if (!Ac.is_null()) { + std::ostringstream oss; + oss << "A_" << coarseLevel.GetLevelID(); + Ac->setObjectLabel(oss.str()); + } + Set(coarseLevel, "A", Ac); + + if (!isGPU) { + APparams->set("graph", AP); + Set(coarseLevel, "AP reuse data", APparams); + } + if (!isGPU) { + RAPparams->set("graph", Ac); + Set(coarseLevel, "RAP reuse data", RAPparams); + } + } else { + RCP RAPparams = rcp(new ParameterList); + if (pL.isSublist("matrixmatrix: kernel params")) + RAPparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); - // Some eigenvalue may have been cached with the matrix in the previous run. - // As the matrix values will be updated, we need to reset the eigenvalue. - Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); - } + if (coarseLevel.IsAvailable("RAP reuse data", this)) { + GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous RAP data" << std::endl; - // We *always* need global constants for the RAP, but not for the temps - RAPparams->set("compute global constants: temporaries",RAPparams->get("compute global constants: temporaries",false)); - RAPparams->set("compute global constants",true); + RAPparams = coarseLevel.Get >("RAP reuse data", this); + + if (RAPparams->isParameter("graph")) + Ac = RAPparams->get >("graph"); + + // Some eigenvalue may have been cached with the matrix in the previous run. + // As the matrix values will be updated, we need to reset the eigenvalue. + Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } - if (pL.get("transpose: use implicit") == true) { + // We *always* need global constants for the RAP, but not for the temps + RAPparams->set("compute global constants: temporaries", RAPparams->get("compute global constants: temporaries", false)); + RAPparams->set("compute global constants", true); - Ac = MatrixFactory::Build(P->getDomainMap(), Teuchos::as(0)); + if (pL.get("transpose: use implicit") == true) { + Ac = MatrixFactory::Build(P->getDomainMap(), Teuchos::as(0)); - SubFactoryMonitor m2(*this, "MxMxM: R x A x P (implicit)", coarseLevel); + SubFactoryMonitor m2(*this, "MxMxM: R x A x P (implicit)", coarseLevel); - Xpetra::TripleMatrixMultiply:: + Xpetra::TripleMatrixMultiply:: MultiplyRAP(*P, doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, doFillComplete, - doOptimizeStorage, labelstr+std::string("MueLu::R*A*P-implicit-")+levelstr.str(), - RAPparams); - } else { - RCP R = Get< RCP >(coarseLevel, "R"); - Ac = MatrixFactory::Build(R->getRowMap(), Teuchos::as(0)); + doOptimizeStorage, labelstr + std::string("MueLu::R*A*P-implicit-") + levelstr.str(), + RAPparams); + } else { + RCP R = Get >(coarseLevel, "R"); + Ac = MatrixFactory::Build(R->getRowMap(), Teuchos::as(0)); - SubFactoryMonitor m2(*this, "MxMxM: R x A x P (explicit)", coarseLevel); + SubFactoryMonitor m2(*this, "MxMxM: R x A x P (explicit)", coarseLevel); - Xpetra::TripleMatrixMultiply:: + Xpetra::TripleMatrixMultiply:: MultiplyRAP(*R, !doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, doFillComplete, - doOptimizeStorage, labelstr+std::string("MueLu::R*A*P-explicit-")+levelstr.str(), + doOptimizeStorage, labelstr + std::string("MueLu::R*A*P-explicit-") + levelstr.str(), RAPparams); - } - - Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); - if(relativeFloor.size() > 0) { - Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor,GetOStream(Statistics2)); - } - - bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); - bool checkAc = pL.get("CheckMainDiagonal")|| pL.get("rap: fix zero diagonals"); ; - if (checkAc || repairZeroDiagonals) { - using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; - magnitudeType threshold; - if (pL.isType("rap: fix zero diagonals threshold")) - threshold = pL.get("rap: fix zero diagonals threshold"); - else - threshold = Teuchos::as(pL.get("rap: fix zero diagonals threshold")); - Scalar replacement = Teuchos::as(pL.get("rap: fix zero diagonals replacement")); - Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, replacement); - } - - - if (IsPrint(Statistics2)) { - RCP params = rcp(new ParameterList());; - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); - GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); - } - - if(!Ac.is_null()) {std::ostringstream oss; oss << "A_" << coarseLevel.GetLevelID(); Ac->setObjectLabel(oss.str());} - Set(coarseLevel, "A", Ac); - - if (!isGPU) { - RAPparams->set("graph", Ac); - Set(coarseLevel, "RAP reuse data", RAPparams); - } } + Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); + if (relativeFloor.size() > 0) { + Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor, GetOStream(Statistics2)); + } - } + bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); + bool checkAc = pL.get("CheckMainDiagonal") || pL.get("rap: fix zero diagonals"); + ; + if (checkAc || repairZeroDiagonals) { + using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; + magnitudeType threshold; + if (pL.isType("rap: fix zero diagonals threshold")) + threshold = pL.get("rap: fix zero diagonals threshold"); + else + threshold = Teuchos::as(pL.get("rap: fix zero diagonals threshold")); + Scalar replacement = Teuchos::as(pL.get("rap: fix zero diagonals replacement")); + Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, replacement); + } -#ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(*Ac); -#endif // HAVE_MUELU_DEBUG - - if (transferFacts_.begin() != transferFacts_.end()) { - SubFactoryMonitor m(*this, "Projections", coarseLevel); - - // call Build of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { - RCP fac = *it; - GetOStream(Runtime0) << "RAPFactory: call transfer factory: " << fac->description() << std::endl; - fac->CallBuild(coarseLevel); - // Coordinates transfer is marginally different from all other operations - // because it is *optional*, and not required. For instance, we may need - // coordinates only on level 4 if we start repartitioning from that level, - // but we don't need them on level 1,2,3. As our current Hierarchy setup - // assumes propagation of dependencies only through three levels, this - // means that we need to rely on other methods to propagate optional data. - // - // The method currently used is through RAP transfer factories, which are - // simply factories which are called at the end of RAP with a single goal: - // transfer some fine data to coarser level. Because these factories are - // kind of outside of the mainline factories, they behave different. In - // particular, we call their Build method explicitly, rather than through - // Get calls. This difference is significant, as the Get call is smart - // enough to know when to release all factory dependencies, and Build is - // dumb. This led to the following CoordinatesTransferFactory sequence: - // 1. Request level 0 - // 2. Request level 1 - // 3. Request level 0 - // 4. Release level 0 - // 5. Release level 1 - // - // The problem is missing "6. Release level 0". Because it was missing, - // we had outstanding request on "Coordinates", "Aggregates" and - // "CoarseMap" on level 0. - // - // This was fixed by explicitly calling Release on transfer factories in - // RAPFactory. I am still unsure how exactly it works, but now we have - // clear data requests for all levels. - coarseLevel.Release(*fac); + if (IsPrint(Statistics2)) { + RCP params = rcp(new ParameterList()); + ; + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); + GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); } - } + if (!Ac.is_null()) { + std::ostringstream oss; + oss << "A_" << coarseLevel.GetLevelID(); + Ac->setObjectLabel(oss.str()); + } + Set(coarseLevel, "A", Ac); + + if (!isGPU) { + RAPparams->set("graph", Ac); + Set(coarseLevel, "RAP reuse data", RAPparams); + } + } } - template - void RAPFactory::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, - "MueLu::RAPFactory::AddTransferFactory: Transfer factory is not derived from TwoLevelFactoryBase. " - "This is very strange. (Note: you can remove this exception if there's a good reason for)"); - TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_, Exceptions::RuntimeError, "MueLu::RAPFactory::AddTransferFactory: Factory is being added after we have already declared input"); - transferFacts_.push_back(factory); +#ifdef HAVE_MUELU_DEBUG + MatrixUtils::checkLocalRowMapMatchesColMap(*Ac); +#endif // HAVE_MUELU_DEBUG + + if (transferFacts_.begin() != transferFacts_.end()) { + SubFactoryMonitor m(*this, "Projections", coarseLevel); + + // call Build of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { + RCP fac = *it; + GetOStream(Runtime0) << "RAPFactory: call transfer factory: " << fac->description() << std::endl; + fac->CallBuild(coarseLevel); + // Coordinates transfer is marginally different from all other operations + // because it is *optional*, and not required. For instance, we may need + // coordinates only on level 4 if we start repartitioning from that level, + // but we don't need them on level 1,2,3. As our current Hierarchy setup + // assumes propagation of dependencies only through three levels, this + // means that we need to rely on other methods to propagate optional data. + // + // The method currently used is through RAP transfer factories, which are + // simply factories which are called at the end of RAP with a single goal: + // transfer some fine data to coarser level. Because these factories are + // kind of outside of the mainline factories, they behave different. In + // particular, we call their Build method explicitly, rather than through + // Get calls. This difference is significant, as the Get call is smart + // enough to know when to release all factory dependencies, and Build is + // dumb. This led to the following CoordinatesTransferFactory sequence: + // 1. Request level 0 + // 2. Request level 1 + // 3. Request level 0 + // 4. Release level 0 + // 5. Release level 1 + // + // The problem is missing "6. Release level 0". Because it was missing, + // we had outstanding request on "Coordinates", "Aggregates" and + // "CoarseMap" on level 0. + // + // This was fixed by explicitly calling Release on transfer factories in + // RAPFactory. I am still unsure how exactly it works, but now we have + // clear data requests for all levels. + coarseLevel.Release(*fac); + } } +} + +template +void RAPFactory::AddTransferFactory(const RCP& factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, + "MueLu::RAPFactory::AddTransferFactory: Transfer factory is not derived from TwoLevelFactoryBase. " + "This is very strange. (Note: you can remove this exception if there's a good reason for)"); + TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_, Exceptions::RuntimeError, "MueLu::RAPFactory::AddTransferFactory: Factory is being added after we have already declared input"); + transferFacts_.push_back(factory); +} -} //namespace MueLu +} //namespace MueLu #define MUELU_RAPFACTORY_SHORT -#endif // MUELU_RAPFACTORY_DEF_HPP +#endif // MUELU_RAPFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp index 91cbf254d6fe..17433ebfe85b 100644 --- a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp @@ -62,90 +62,88 @@ #include "MueLu_TwoLevelFactoryBase.hpp" namespace MueLu { - /*! +/*! @class RAPShiftFactory @brief Factory for building coarse grid matrices, when the matrix is of the form K+a*M. Useful when you want to change the shift variable ("a") at every level. Each level must store the stiffness matrix K and mass matrix M separately. */ - template - class RAPShiftFactory : public TwoLevelFactoryBase { +template +class RAPShiftFactory : public TwoLevelFactoryBase { #undef MUELU_RAPSHIFTFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - RAPShiftFactory(); + RAPShiftFactory(); - virtual ~RAPShiftFactory() { } + virtual ~RAPShiftFactory() {} - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - void DeclareInput(Level &fineLevel, Level &coarseLevel) const; + void DeclareInput(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ - void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //! @name Build methods. + //@{ + void Build(Level &fineLevel, Level &coarseLevel) const; + //@} - //! @name Handling of user-defined transfer factories - //@{ + //! @name Handling of user-defined transfer factories + //@{ - //! Indicate that the restriction operator action should be implicitly defined by the transpose of the prolongator. - void SetImplicitTranspose(bool const &implicit) { - implicitTranspose_ = implicit; - } + //! Indicate that the restriction operator action should be implicitly defined by the transpose of the prolongator. + void SetImplicitTranspose(bool const &implicit) { + implicitTranspose_ = implicit; + } - void SetShifts(std::vector& shifts) { - shifts_.clear(); - shifts_ = shifts; - } + void SetShifts(std::vector &shifts) { + shifts_.clear(); + shifts_ = shifts; + } - //@} + //@} - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to the next coarser level. */ - void AddTransferFactory(const RCP& factory); + void AddTransferFactory(const RCP &factory); - // TODO add a function to remove a specific transfer factory? + // TODO add a function to remove a specific transfer factory? - //! Returns number of transfer factories. - size_t NumTransferFactories() const { return transferFacts_.size(); } + //! Returns number of transfer factories. + size_t NumTransferFactories() const { return transferFacts_.size(); } - //@} + //@} - private: + private: + //! If true, the action of the restriction operator action is implicitly defined by the transpose of the prolongator. + bool implicitTranspose_; - //! If true, the action of the restriction operator action is implicitly defined by the transpose of the prolongator. - bool implicitTranspose_; + //! list of user-defined transfer Factories + std::vector > transferFacts_; + // vector of shifting terms + std::vector shifts_; - //! list of user-defined transfer Factories - std::vector > transferFacts_; +}; //class RAPShiftFactory - // vector of shifting terms - std::vector shifts_; - - }; //class RAPShiftFactory - -} //namespace MueLu +} //namespace MueLu #define MUELU_RAPSHIFTFACTORY_SHORT -#endif // MUELU_RAPSHIFTFACTORY_DECL_HPP +#endif // MUELU_RAPSHIFTFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp index b3c1c0833bb4..1c06b531bccf 100644 --- a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp @@ -54,7 +54,6 @@ #include #include - #include "MueLu_RAPShiftFactory_decl.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" @@ -62,338 +61,342 @@ namespace MueLu { - /*********************************************************************************************************/ - template - RAPShiftFactory::RAPShiftFactory() - : implicitTranspose_(false) { } - +/*********************************************************************************************************/ +template +RAPShiftFactory::RAPShiftFactory() + : implicitTranspose_(false) {} - /*********************************************************************************************************/ - template - RCP RAPShiftFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +/*********************************************************************************************************/ +template +RCP RAPShiftFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("transpose: use implicit"); - SET_VALID_ENTRY("rap: fix zero diagonals"); - SET_VALID_ENTRY("rap: shift"); - SET_VALID_ENTRY("rap: shift array"); - SET_VALID_ENTRY("rap: cfl array"); - SET_VALID_ENTRY("rap: shift diagonal M"); - SET_VALID_ENTRY("rap: shift low storage"); - SET_VALID_ENTRY("rap: relative diagonal floor"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process"); - validParamList->set< RCP >("M", Teuchos::null, "Generating factory of the matrix M used during the non-Galerkin RAP"); - validParamList->set< RCP >("Mdiag", Teuchos::null, "Generating factory of the matrix Mdiag used during the non-Galerkin RAP"); - validParamList->set< RCP >("K", Teuchos::null, "Generating factory of the matrix K used during the non-Galerkin RAP"); - validParamList->set< RCP >("P", Teuchos::null, "Prolongator factory"); - validParamList->set< RCP >("R", Teuchos::null, "Restrictor factory"); - - validParamList->set< bool > ("CheckMainDiagonal", false, "Check main diagonal for zeros"); - validParamList->set< bool > ("RepairMainDiagonal", false, "Repair zeros on main diagonal"); - - validParamList->set > ("deltaT", Teuchos::null, "user deltaT"); - validParamList->set > ("cfl", Teuchos::null, "user cfl"); - validParamList->set > ("cfl-based shift array", Teuchos::null, "MueLu-generated shift array for CFL-based shifting"); - - // Make sure we don't recursively validate options for the matrixmatrix kernels - ParameterList norecurse; - norecurse.disableRecursiveValidation(); - validParamList->set ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters"); - - return validParamList; + SET_VALID_ENTRY("transpose: use implicit"); + SET_VALID_ENTRY("rap: fix zero diagonals"); + SET_VALID_ENTRY("rap: shift"); + SET_VALID_ENTRY("rap: shift array"); + SET_VALID_ENTRY("rap: cfl array"); + SET_VALID_ENTRY("rap: shift diagonal M"); + SET_VALID_ENTRY("rap: shift low storage"); + SET_VALID_ENTRY("rap: relative diagonal floor"); +#undef SET_VALID_ENTRY + + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process"); + validParamList->set >("M", Teuchos::null, "Generating factory of the matrix M used during the non-Galerkin RAP"); + validParamList->set >("Mdiag", Teuchos::null, "Generating factory of the matrix Mdiag used during the non-Galerkin RAP"); + validParamList->set >("K", Teuchos::null, "Generating factory of the matrix K used during the non-Galerkin RAP"); + validParamList->set >("P", Teuchos::null, "Prolongator factory"); + validParamList->set >("R", Teuchos::null, "Restrictor factory"); + + validParamList->set("CheckMainDiagonal", false, "Check main diagonal for zeros"); + validParamList->set("RepairMainDiagonal", false, "Repair zeros on main diagonal"); + + validParamList->set >("deltaT", Teuchos::null, "user deltaT"); + validParamList->set >("cfl", Teuchos::null, "user cfl"); + validParamList->set >("cfl-based shift array", Teuchos::null, "MueLu-generated shift array for CFL-based shifting"); + + // Make sure we don't recursively validate options for the matrixmatrix kernels + ParameterList norecurse; + norecurse.disableRecursiveValidation(); + validParamList->set("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters"); + + return validParamList; +} + +/*********************************************************************************************************/ +template +void RAPShiftFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { + const Teuchos::ParameterList &pL = GetParameterList(); + + bool use_mdiag = false; + if (pL.isParameter("rap: shift diagonal M")) + use_mdiag = pL.get("rap: shift diagonal M"); + + // The low storage version requires mdiag + bool use_low_storage = false; + if (pL.isParameter("rap: shift low storage")) { + use_low_storage = pL.get("rap: shift low storage"); + use_mdiag = use_low_storage ? true : use_mdiag; + } + + if (implicitTranspose_ == false) { + Input(coarseLevel, "R"); + } + + if (!use_low_storage) + Input(fineLevel, "K"); + else + Input(fineLevel, "A"); + Input(coarseLevel, "P"); + + if (!use_mdiag) + Input(fineLevel, "M"); + else + Input(fineLevel, "Mdiag"); + + // CFL array stuff + if (pL.isParameter("rap: cfl array") && pL.get >("rap: cfl array").size() > 0) { + if (fineLevel.GetLevelID() == 0) { + if (fineLevel.IsAvailable("deltaT", NoFactory::get())) { + fineLevel.DeclareInput("deltaT", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("fine deltaT", NoFactory::get()), + Exceptions::RuntimeError, + "deltaT was not provided by the user on level0!"); + } + + if (fineLevel.IsAvailable("cfl", NoFactory::get())) { + fineLevel.DeclareInput("cfl", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("fine cfl", NoFactory::get()), + Exceptions::RuntimeError, + "cfl was not provided by the user on level0!"); + } + } else { + Input(fineLevel, "cfl-based shift array"); + } + } + + // call DeclareInput of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { + (*it)->CallDeclareInput(coarseLevel); } +} - /*********************************************************************************************************/ - template - void RAPShiftFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - const Teuchos::ParameterList& pL = GetParameterList(); +template +void RAPShiftFactory::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const + { + FactoryMonitor m(*this, "Computing Ac", coarseLevel); + const Teuchos::ParameterList &pL = GetParameterList(); - bool use_mdiag = false; - if(pL.isParameter("rap: shift diagonal M")) - use_mdiag = pL.get("rap: shift diagonal M"); + bool M_is_diagonal = false; + if (pL.isParameter("rap: shift diagonal M")) + M_is_diagonal = pL.get("rap: shift diagonal M"); // The low storage version requires mdiag bool use_low_storage = false; - if(pL.isParameter("rap: shift low storage")) { + if (pL.isParameter("rap: shift low storage")) { use_low_storage = pL.get("rap: shift low storage"); - use_mdiag = use_low_storage ? true : use_mdiag; + M_is_diagonal = use_low_storage ? true : M_is_diagonal; } - if (implicitTranspose_ == false) { - Input(coarseLevel, "R"); + Teuchos::ArrayView doubleShifts; + Teuchos::ArrayRCP myshifts; + if (pL.isParameter("rap: shift array") && pL.get >("rap: shift array").size() > 0) { + // Do we have an array of shifts? If so, we set doubleShifts_ + doubleShifts = pL.get >("rap: shift array")(); } - - if(!use_low_storage) Input(fineLevel, "K"); - else Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - if(!use_mdiag) Input(fineLevel, "M"); - else Input(fineLevel, "Mdiag"); - - // CFL array stuff - if(pL.isParameter("rap: cfl array") && pL.get >("rap: cfl array").size() > 0) { - if(fineLevel.GetLevelID() == 0) { - if(fineLevel.IsAvailable("deltaT", NoFactory::get())) { - fineLevel.DeclareInput("deltaT", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("fine deltaT", NoFactory::get()), - Exceptions::RuntimeError, - "deltaT was not provided by the user on level0!"); + if (pL.isParameter("rap: cfl array") && pL.get >("rap: cfl array").size() > 0) { + // Do we have an array of CFLs? If so, we calculated the shifts from them. + Teuchos::ArrayView CFLs = pL.get >("rap: cfl array")(); + if (fineLevel.GetLevelID() == 0) { + double dt = Get(fineLevel, "deltaT"); + double cfl = Get(fineLevel, "cfl"); + double ts_at_cfl1 = dt / cfl; + myshifts.resize(CFLs.size()); + Teuchos::Array myCFLs(CFLs.size()); + myCFLs[0] = cfl; + + // Never make the CFL bigger + for (int i = 1; i < (int)CFLs.size(); i++) + myCFLs[i] = (CFLs[i] > cfl) ? cfl : CFLs[i]; + + { + std::ostringstream ofs; + ofs << "RAPShiftFactory: CFL schedule = "; + for (int i = 0; i < (int)CFLs.size(); i++) + ofs << " " << myCFLs[i]; + GetOStream(Statistics0) << ofs.str() << std::endl; } - - if(fineLevel.IsAvailable("cfl", NoFactory::get())) { - fineLevel.DeclareInput("cfl", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("fine cfl", NoFactory::get()), - Exceptions::RuntimeError, - "cfl was not provided by the user on level0!"); - } - } - else { - Input(fineLevel,"cfl-based shift array"); + GetOStream(Statistics0) << "RAPShiftFactory: Timestep at CFL=1 is " << ts_at_cfl1 << " " << std::endl; + + // The shift array needs to be 1/dt + for (int i = 0; i < (int)myshifts.size(); i++) + myshifts[i] = 1.0 / (ts_at_cfl1 * myCFLs[i]); + doubleShifts = myshifts(); + + { + std::ostringstream ofs; + ofs << "RAPShiftFactory: shift schedule = "; + for (int i = 0; i < (int)doubleShifts.size(); i++) + ofs << " " << doubleShifts[i]; + GetOStream(Statistics0) << ofs.str() << std::endl; + } + Set(coarseLevel, "cfl-based shift array", myshifts); + } else { + myshifts = Get >(fineLevel, "cfl-based shift array"); + doubleShifts = myshifts(); + Set(coarseLevel, "cfl-based shift array", myshifts); + // NOTE: If we're not on level zero, then we should have a shift array } } - // call DeclareInput of all user-given transfer factories - for(std::vector >::const_iterator it = transferFacts_.begin(); it!=transferFacts_.end(); ++it) { - (*it)->CallDeclareInput(coarseLevel); - } - } + // Inputs: K, M, P + // Note: In the low-storage case we do not keep a separate "K", we just use A + RCP K; + RCP M; + RCP Mdiag; + + if (use_low_storage) + K = Get >(fineLevel, "A"); + else + K = Get >(fineLevel, "K"); + if (!M_is_diagonal) + M = Get >(fineLevel, "M"); + else + Mdiag = Get >(fineLevel, "Mdiag"); + + RCP P = Get >(coarseLevel, "P"); + + // Build Kc = RKP, Mc = RMP + RCP KP, MP; + + // Reuse pattern if available (multiple solve) + // FIXME: Old style reuse doesn't work any more + // if (IsAvailable(coarseLevel, "AP Pattern")) { + // KP = Get< RCP >(coarseLevel, "AP Pattern"); + // MP = Get< RCP >(coarseLevel, "AP Pattern"); + // } - template - void RAPShiftFactory::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const { - FactoryMonitor m(*this, "Computing Ac", coarseLevel); - const Teuchos::ParameterList& pL = GetParameterList(); - - bool M_is_diagonal = false; - if(pL.isParameter("rap: shift diagonal M")) - M_is_diagonal = pL.get("rap: shift diagonal M"); - - // The low storage version requires mdiag - bool use_low_storage = false; - if(pL.isParameter("rap: shift low storage")) { - use_low_storage = pL.get("rap: shift low storage"); - M_is_diagonal = use_low_storage ? true : M_is_diagonal; + SubFactoryMonitor subM(*this, "MxM: K x P", coarseLevel); + KP = Xpetra::MatrixMatrix::Multiply(*K, false, *P, false, KP, GetOStream(Statistics2)); + if (!M_is_diagonal) { + MP = Xpetra::MatrixMatrix::Multiply(*M, false, *P, false, MP, GetOStream(Statistics2)); + } else { + MP = Xpetra::MatrixFactory2::BuildCopy(P); + MP->leftScale(*Mdiag); } - Teuchos::ArrayView doubleShifts; - Teuchos::ArrayRCP myshifts; - if(pL.isParameter("rap: shift array") && pL.get >("rap: shift array").size() > 0 ) { - // Do we have an array of shifts? If so, we set doubleShifts_ - doubleShifts = pL.get >("rap: shift array")(); - } - if(pL.isParameter("rap: cfl array") && pL.get >("rap: cfl array").size() > 0) { - // Do we have an array of CFLs? If so, we calculated the shifts from them. - Teuchos::ArrayView CFLs = pL.get >("rap: cfl array")(); - if(fineLevel.GetLevelID() == 0) { - double dt = Get(fineLevel,"deltaT"); - double cfl = Get(fineLevel,"cfl"); - double ts_at_cfl1 = dt / cfl; - myshifts.resize(CFLs.size()); - Teuchos::Array myCFLs(CFLs.size()); - myCFLs[0] = cfl; - - // Never make the CFL bigger - for(int i=1; i<(int)CFLs.size(); i++) - myCFLs[i] = (CFLs[i]> cfl) ? cfl : CFLs[i]; - - { - std::ostringstream ofs; - ofs<<"RAPShiftFactory: CFL schedule = "; - for(int i=0; i<(int)CFLs.size(); i++) - ofs<<" "< > (fineLevel,"cfl-based shift array"); - doubleShifts = myshifts(); - Set(coarseLevel,"cfl-based shift array",myshifts); - // NOTE: If we're not on level zero, then we should have a shift array - } - } - - // Inputs: K, M, P - // Note: In the low-storage case we do not keep a separate "K", we just use A - RCP K; - RCP M; - RCP Mdiag; - - if(use_low_storage) K = Get< RCP >(fineLevel, "A"); - else K = Get< RCP >(fineLevel, "K"); - if(!M_is_diagonal) M = Get< RCP >(fineLevel, "M"); - else Mdiag = Get< RCP >(fineLevel, "Mdiag"); - - RCP P = Get< RCP >(coarseLevel, "P"); - - // Build Kc = RKP, Mc = RMP - RCP KP, MP; - - // Reuse pattern if available (multiple solve) - // FIXME: Old style reuse doesn't work any more - // if (IsAvailable(coarseLevel, "AP Pattern")) { - // KP = Get< RCP >(coarseLevel, "AP Pattern"); - // MP = Get< RCP >(coarseLevel, "AP Pattern"); - // } - - { - SubFactoryMonitor subM(*this, "MxM: K x P", coarseLevel); - KP = Xpetra::MatrixMatrix::Multiply(*K, false, *P, false, KP, GetOStream(Statistics2)); - if(!M_is_diagonal) { - MP = Xpetra::MatrixMatrix::Multiply(*M, false, *P, false, MP, GetOStream(Statistics2)); - } - else { - MP = Xpetra::MatrixFactory2::BuildCopy(P); - MP->leftScale(*Mdiag); - } - - Set(coarseLevel, "AP Pattern", KP); - } + Set(coarseLevel, "AP Pattern", KP); + } - bool doOptimizedStorage = true; + bool doOptimizedStorage = true; - RCP Ac, Kc, Mc; + RCP Ac, Kc, Mc; - // Reuse pattern if available (multiple solve) - // if (IsAvailable(coarseLevel, "RAP Pattern")) - // Ac = Get< RCP >(coarseLevel, "RAP Pattern"); + // Reuse pattern if available (multiple solve) + // if (IsAvailable(coarseLevel, "RAP Pattern")) + // Ac = Get< RCP >(coarseLevel, "RAP Pattern"); - bool doFillComplete=true; - if (implicitTranspose_) { - SubFactoryMonitor m2(*this, "MxM: P' x (KP) (implicit)", coarseLevel); - Kc = Xpetra::MatrixMatrix::Multiply(*P, true, *KP, false, Kc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - Mc = Xpetra::MatrixMatrix::Multiply(*P, true, *MP, false, Mc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - } - else { - RCP R = Get< RCP >(coarseLevel, "R"); - SubFactoryMonitor m2(*this, "MxM: R x (KP) (explicit)", coarseLevel); - Kc = Xpetra::MatrixMatrix::Multiply(*R, false, *KP, false, Kc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - Mc = Xpetra::MatrixMatrix::Multiply(*R, false, *MP, false, Mc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - } + bool doFillComplete = true; + if (implicitTranspose_) { + SubFactoryMonitor m2(*this, "MxM: P' x (KP) (implicit)", coarseLevel); + Kc = Xpetra::MatrixMatrix::Multiply(*P, true, *KP, false, Kc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); + Mc = Xpetra::MatrixMatrix::Multiply(*P, true, *MP, false, Mc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); + } else { + RCP R = Get >(coarseLevel, "R"); + SubFactoryMonitor m2(*this, "MxM: R x (KP) (explicit)", coarseLevel); + Kc = Xpetra::MatrixMatrix::Multiply(*R, false, *KP, false, Kc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); + Mc = Xpetra::MatrixMatrix::Multiply(*R, false, *MP, false, Mc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); + } - // Get the shift - // FIXME - We should really get rid of the shifts array and drive this the same way everything else works - // If we're using the recursive "low storage" version, we need to shift by ( \prod_{i=1}^k shift[i] - \prod_{i=1}^{k-1} shift[i]) to - // get the recursive relationships correct - int level = coarseLevel.GetLevelID(); - Scalar shift = Teuchos::ScalarTraits::zero(); - if(!use_low_storage) { - // High Storage version - if(level < (int)shifts_.size()) shift = shifts_[level]; - else shift = Teuchos::as(pL.get("rap: shift")); - } - else { - // Low Storage Version - if(level < (int)shifts_.size()) { - if(level==1) shift = shifts_[level]; - else { - Scalar prod1 = Teuchos::ScalarTraits::one(); - for(int i=1; i < level-1; i++) { - prod1 *= shifts_[i]; - } - shift = (prod1 * shifts_[level] - prod1); - } - } - else if(doubleShifts.size() != 0) { - double d_shift = 0.0; - if(level < doubleShifts.size()) - d_shift = doubleShifts[level] - doubleShifts[level-1]; - - if(d_shift < 0.0) - GetOStream(Warnings1) << "WARNING: RAPShiftFactory has detected a negative shift... This implies a less stable coarse grid."<(d_shift); - } + // Get the shift + // FIXME - We should really get rid of the shifts array and drive this the same way everything else works + // If we're using the recursive "low storage" version, we need to shift by ( \prod_{i=1}^k shift[i] - \prod_{i=1}^{k-1} shift[i]) to + // get the recursive relationships correct + int level = coarseLevel.GetLevelID(); + Scalar shift = Teuchos::ScalarTraits::zero(); + if (!use_low_storage) { + // High Storage version + if (level < (int)shifts_.size()) + shift = shifts_[level]; + else + shift = Teuchos::as(pL.get("rap: shift")); + } else { + // Low Storage Version + if (level < (int)shifts_.size()) { + if (level == 1) + shift = shifts_[level]; else { - double base_shift = pL.get("rap: shift"); - if(level == 1) shift = Teuchos::as(base_shift); - else shift = Teuchos::as(pow(base_shift,level) - pow(base_shift,level-1)); + Scalar prod1 = Teuchos::ScalarTraits::one(); + for (int i = 1; i < level - 1; i++) { + prod1 *= shifts_[i]; + } + shift = (prod1 * shifts_[level] - prod1); } + } else if (doubleShifts.size() != 0) { + double d_shift = 0.0; + if (level < doubleShifts.size()) + d_shift = doubleShifts[level] - doubleShifts[level - 1]; + + if (d_shift < 0.0) + GetOStream(Warnings1) << "WARNING: RAPShiftFactory has detected a negative shift... This implies a less stable coarse grid." << std::endl; + shift = Teuchos::as(d_shift); + } else { + double base_shift = pL.get("rap: shift"); + if (level == 1) + shift = Teuchos::as(base_shift); + else + shift = Teuchos::as(pow(base_shift, level) - pow(base_shift, level - 1)); } - GetOStream(Runtime0) << "RAPShiftFactory: Using shift " << shift << std::endl; - - - // recombine to get K+shift*M - { - SubFactoryMonitor m2(*this, "Add: RKP + s*RMP", coarseLevel); - Xpetra::MatrixMatrix::TwoMatrixAdd(*Kc, false, Teuchos::ScalarTraits::one(), *Mc, false, shift, Ac, GetOStream(Statistics2)); - Ac->fillComplete(); - } - - Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); - if(relativeFloor.size() > 0) - Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor,GetOStream(Statistics2)); - - - bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); - bool checkAc = pL.get("CheckMainDiagonal")|| pL.get("rap: fix zero diagonals"); ; - if (checkAc || repairZeroDiagonals) - Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1)); - - RCP params = rcp(new ParameterList());; - params->set("printLoadBalancingInfo", true); - GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); - - Set(coarseLevel, "A", Ac); - // We only need K in the 'high storage' mode - if(!use_low_storage) - Set(coarseLevel, "K", Kc); - - if(!M_is_diagonal) { - Set(coarseLevel, "M", Mc); - } - else { - // If M is diagonal, then we only pass that part down the hierarchy - // NOTE: Should we be doing some kind of rowsum instead? - RCP Mcv = Xpetra::VectorFactory::Build(Mc->getRowMap(),false); - Mc->getLocalDiagCopy(*Mcv); - Set(coarseLevel, "Mdiag", Mcv); - } + } + GetOStream(Runtime0) << "RAPShiftFactory: Using shift " << shift << std::endl; - // Set(coarseLevel, "RAP Pattern", Ac); + // recombine to get K+shift*M + { + SubFactoryMonitor m2(*this, "Add: RKP + s*RMP", coarseLevel); + Xpetra::MatrixMatrix::TwoMatrixAdd(*Kc, false, Teuchos::ScalarTraits::one(), *Mc, false, shift, Ac, GetOStream(Statistics2)); + Ac->fillComplete(); } - if (transferFacts_.begin() != transferFacts_.end()) { - SubFactoryMonitor m(*this, "Projections", coarseLevel); - - // call Build of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { - RCP fac = *it; - GetOStream(Runtime0) << "RAPShiftFactory: call transfer factory: " << fac->description() << std::endl; - fac->CallBuild(coarseLevel); - // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid - // of dangling data for CoordinatesTransferFactory - coarseLevel.Release(*fac); - } + Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); + if (relativeFloor.size() > 0) + Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor, GetOStream(Statistics2)); + + bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); + bool checkAc = pL.get("CheckMainDiagonal") || pL.get("rap: fix zero diagonals"); + ; + if (checkAc || repairZeroDiagonals) + Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1)); + + RCP params = rcp(new ParameterList()); + ; + params->set("printLoadBalancingInfo", true); + GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); + + Set(coarseLevel, "A", Ac); + // We only need K in the 'high storage' mode + if (!use_low_storage) + Set(coarseLevel, "K", Kc); + + if (!M_is_diagonal) { + Set(coarseLevel, "M", Mc); + } else { + // If M is diagonal, then we only pass that part down the hierarchy + // NOTE: Should we be doing some kind of rowsum instead? + RCP Mcv = Xpetra::VectorFactory::Build(Mc->getRowMap(), false); + Mc->getLocalDiagCopy(*Mcv); + Set(coarseLevel, "Mdiag", Mcv); } + + // Set(coarseLevel, "RAP Pattern", Ac); } - template - void RAPShiftFactory::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "MueLu::RAPShiftFactory::AddTransferFactory: Transfer factory is not derived from TwoLevelFactoryBase. This is very strange. (Note: you can remove this exception if there's a good reason for)"); - transferFacts_.push_back(factory); + if (transferFacts_.begin() != transferFacts_.end()) { + SubFactoryMonitor m(*this, "Projections", coarseLevel); + + // call Build of all user-given transfer factories + for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { + RCP fac = *it; + GetOStream(Runtime0) << "RAPShiftFactory: call transfer factory: " << fac->description() << std::endl; + fac->CallBuild(coarseLevel); + // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid + // of dangling data for CoordinatesTransferFactory + coarseLevel.Release(*fac); + } } +} + +template +void RAPShiftFactory::AddTransferFactory(const RCP &factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "MueLu::RAPShiftFactory::AddTransferFactory: Transfer factory is not derived from TwoLevelFactoryBase. This is very strange. (Note: you can remove this exception if there's a good reason for)"); + transferFacts_.push_back(factory); +} -} //namespace MueLu +} //namespace MueLu #define MUELU_RAPSHIFTFACTORY_SHORT -#endif // MUELU_RAPSHIFTFACTORY_DEF_HPP +#endif // MUELU_RAPSHIFTFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp index e365ab23c75c..f5b01a2f79c3 100644 --- a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp @@ -60,14 +60,12 @@ #include #include - #include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" - namespace MueLu { - /*! +/*! @class SchurComplementFactory class. @brief Factory for building the Schur Complement for a 2x2 block matrix. @@ -101,46 +99,45 @@ namespace MueLu { | A | SchurComplementFactory | The schur complement of the given block matrix. */ - template - class SchurComplementFactory : public SingleLevelFactoryBase { +template +class SchurComplementFactory : public SingleLevelFactoryBase { #undef MUELU_SCHURCOMPLEMENTFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" - - public: - //! @name Constructors/Destructors. - //@{ +#include "MueLu_UseShortNames.hpp" - //! Constructor. - SchurComplementFactory() = default; + public: + //! @name Constructors/Destructors. + //@{ - //! Input - //@{ + //! Constructor. + SchurComplementFactory() = default; - void DeclareInput(Level& currentLevel) const; + //! Input + //@{ - RCP GetValidParameterList() const; + void DeclareInput(Level& currentLevel) const; - //@} + RCP GetValidParameterList() const; - //@{ - //! @name Build methods. + //@} - //! Build an object with this factory. - void Build(Level& currentLevel) const; + //@{ + //! @name Build methods. - //@} + //! Build an object with this factory. + void Build(Level& currentLevel) const; + //@} - private: - //! Schur complement calculation method. - RCP ComputeSchurComplement(RCP& bA, RCP& Ainv) const; + private: + //! Schur complement calculation method. + RCP ComputeSchurComplement(RCP& bA, RCP& Ainv) const; - }; // class SchurComplementFactory +}; // class SchurComplementFactory -} // namespace MueLu +} // namespace MueLu #define MUELU_SCHURCOMPLEMENTFACTORY_SHORT #endif /* MUELU_SCHURCOMPLEMENTFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp index 66cbd74fa792..c4503eec0d17 100644 --- a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp @@ -62,152 +62,148 @@ namespace MueLu { - template - RCP SchurComplementFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - const SC one = Teuchos::ScalarTraits::one(); - - validParamList->set >("A" , NoFactory::getRCP(), "Generating factory of the matrix A used for building Schur complement (must be a 2x2 blocked operator)"); - validParamList->set >("Ainv" , Teuchos::null, "Generating factory of the inverse matrix used in the Schur complement"); - - validParamList->set ("omega", one, "Scaling parameter in S = A(1,1) - 1/omega A(1,0) Ainv A(0,1)"); - - return validParamList; - } - - template - void SchurComplementFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - - // Get default or user-given inverse approximation factory - RCP AinvFact = GetFactory("Ainv"); - currentLevel.DeclareInput("Ainv", AinvFact.get(), this); - } - - template - void SchurComplementFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - RCP A = Get >(currentLevel, "A"); - RCP bA = rcp_dynamic_cast(A); - - TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null(), Exceptions::BadCast, - "MueLu::SchurComplementFactory::Build: input matrix A is not of type BlockedCrsMatrix!"); - TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != 2 || bA->Cols() != 2, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: input matrix A is a " << bA->Rows() << "x" << bA->Cols() << " block matrix. We expect a 2x2 blocked operator."); - - // Calculate Schur Complement - RCP Ainv = currentLevel.Get >("Ainv", this->GetFactory("Ainv").get()); - RCP S = ComputeSchurComplement(bA, Ainv); - - GetOStream(Statistics1) << "S has " << S->getGlobalNumRows() << "x" << S->getGlobalNumCols() << " rows and columns." << std::endl; - - // NOTE: "A" generated by this factory is actually the Schur complement - // matrix, but it is required as all smoothers expect "A" - Set(currentLevel, "A", S); - } - - template - RCP> - SchurComplementFactory::ComputeSchurComplement(RCP& bA, RCP& Ainv) const { - - using STS = Teuchos::ScalarTraits; - const SC zero = STS::zero(), one = STS::one(); - - RCP A01 = bA->getMatrix(0,1); - RCP A10 = bA->getMatrix(1,0); - RCP A11 = bA->getMatrix(1,1); - - RCP bA01 = Teuchos::rcp_dynamic_cast(A01); - const bool isBlocked = (bA01 == Teuchos::null ? false : true); - - const ParameterList& pL = GetParameterList(); - const SC omega = pL.get("omega"); - - TEUCHOS_TEST_FOR_EXCEPTION(omega == zero, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Scaling parameter omega must not be zero to avoid division by zero."); - - RCP S = Teuchos::null; // Schur complement - RCP D = Teuchos::null; // temporary result for A10*Ainv*A01 - - // only if the off-diagonal blocks A10 and A01 are non-zero we have to do the MM multiplication - if(A01.is_null() == false && A10.is_null() == false) { - // scale with -1/omega - Ainv->scale(Teuchos::as(-one/omega)); - - // build Schur complement operator - if (!isBlocked) { - RCP myparams = rcp(new ParameterList); - myparams->set("compute global constants", true); - - // -1/omega*Ainv*A01 - TEUCHOS_TEST_FOR_EXCEPTION(A01->getRangeMap()->isSameAs(*(Ainv->getDomainMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: RangeMap of A01 and domain map of Ainv are not the same."); - RCP C = MatrixMatrix::Multiply(*Ainv, false, *A01, false, GetOStream(Statistics2), true, true, std::string("SchurComplementFactory"), myparams); - - // -1/omega*A10*Ainv*A01 - TEUCHOS_TEST_FOR_EXCEPTION(A01->getRangeMap()->isSameAs(*(A10->getDomainMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: RangeMap of A10 and domain map A01 are not the same."); - D = MatrixMatrix::Multiply(*A10, false, *C, false, GetOStream(Statistics2), true, true, std::string("SchurComplementFactory"), myparams); - } - else { - // nested blocking - auto bA10 = Teuchos::rcp_dynamic_cast(A10); - auto bAinv = Teuchos::rcp_dynamic_cast(Ainv); - TEUCHOS_TEST_FOR_EXCEPTION(bAinv == Teuchos::null, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Casting Ainv to BlockedCrsMatrix not possible."); - - // -1/omega*bAinv*bA01 - TEUCHOS_TEST_FOR_EXCEPTION(bA01->Rows() != bAinv->Cols(), Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Block rows and cols of bA01 and bAinv are not compatible."); - RCP C = MatrixMatrix::TwoMatrixMultiplyBlock(*bAinv, false, *bA01, false, GetOStream(Statistics2)); - - // -1/omega*A10*Ainv*A01 - TEUCHOS_TEST_FOR_EXCEPTION(bA10->Rows() != bA01->Cols(), Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Block rows and cols of bA10 and bA01 are not compatible."); - D = MatrixMatrix::TwoMatrixMultiplyBlock(*bA10, false, *C, false, GetOStream(Statistics2)); - } - if (!A11.is_null()) { - MatrixMatrix::TwoMatrixAdd(*A11, false, one, *D, false, one, S, GetOStream(Statistics2)); - S->fillComplete(); - - TEUCHOS_TEST_FOR_EXCEPTION(A11->getRangeMap()->isSameAs(*(S->getRangeMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: RangeMap of A11 and S are not the same."); - TEUCHOS_TEST_FOR_EXCEPTION(A11->getDomainMap()->isSameAs(*(S->getDomainMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: DomainMap of A11 and S are not the same."); - } - else { - S = MatrixFactory::BuildCopy(D); - } +template +RCP SchurComplementFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + const SC one = Teuchos::ScalarTraits::one(); + + validParamList->set>("A", NoFactory::getRCP(), "Generating factory of the matrix A used for building Schur complement (must be a 2x2 blocked operator)"); + validParamList->set>("Ainv", Teuchos::null, "Generating factory of the inverse matrix used in the Schur complement"); + + validParamList->set("omega", one, "Scaling parameter in S = A(1,1) - 1/omega A(1,0) Ainv A(0,1)"); + + return validParamList; +} + +template +void SchurComplementFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + + // Get default or user-given inverse approximation factory + RCP AinvFact = GetFactory("Ainv"); + currentLevel.DeclareInput("Ainv", AinvFact.get(), this); +} + +template +void SchurComplementFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP A = Get>(currentLevel, "A"); + RCP bA = rcp_dynamic_cast(A); + + TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null(), Exceptions::BadCast, + "MueLu::SchurComplementFactory::Build: input matrix A is not of type BlockedCrsMatrix!"); + TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != 2 || bA->Cols() != 2, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: input matrix A is a " << bA->Rows() << "x" << bA->Cols() << " block matrix. We expect a 2x2 blocked operator."); + + // Calculate Schur Complement + RCP Ainv = currentLevel.Get>("Ainv", this->GetFactory("Ainv").get()); + RCP S = ComputeSchurComplement(bA, Ainv); + + GetOStream(Statistics1) << "S has " << S->getGlobalNumRows() << "x" << S->getGlobalNumCols() << " rows and columns." << std::endl; + + // NOTE: "A" generated by this factory is actually the Schur complement + // matrix, but it is required as all smoothers expect "A" + Set(currentLevel, "A", S); +} + +template +RCP> +SchurComplementFactory::ComputeSchurComplement(RCP& bA, RCP& Ainv) const { + using STS = Teuchos::ScalarTraits; + const SC zero = STS::zero(), one = STS::one(); + + RCP A01 = bA->getMatrix(0, 1); + RCP A10 = bA->getMatrix(1, 0); + RCP A11 = bA->getMatrix(1, 1); + + RCP bA01 = Teuchos::rcp_dynamic_cast(A01); + const bool isBlocked = (bA01 == Teuchos::null ? false : true); + + const ParameterList& pL = GetParameterList(); + const SC omega = pL.get("omega"); + + TEUCHOS_TEST_FOR_EXCEPTION(omega == zero, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Scaling parameter omega must not be zero to avoid division by zero."); + + RCP S = Teuchos::null; // Schur complement + RCP D = Teuchos::null; // temporary result for A10*Ainv*A01 + + // only if the off-diagonal blocks A10 and A01 are non-zero we have to do the MM multiplication + if (A01.is_null() == false && A10.is_null() == false) { + // scale with -1/omega + Ainv->scale(Teuchos::as(-one / omega)); + + // build Schur complement operator + if (!isBlocked) { + RCP myparams = rcp(new ParameterList); + myparams->set("compute global constants", true); + + // -1/omega*Ainv*A01 + TEUCHOS_TEST_FOR_EXCEPTION(A01->getRangeMap()->isSameAs(*(Ainv->getDomainMap())) == false, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: RangeMap of A01 and domain map of Ainv are not the same."); + RCP C = MatrixMatrix::Multiply(*Ainv, false, *A01, false, GetOStream(Statistics2), true, true, std::string("SchurComplementFactory"), myparams); + + // -1/omega*A10*Ainv*A01 + TEUCHOS_TEST_FOR_EXCEPTION(A01->getRangeMap()->isSameAs(*(A10->getDomainMap())) == false, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: RangeMap of A10 and domain map A01 are not the same."); + D = MatrixMatrix::Multiply(*A10, false, *C, false, GetOStream(Statistics2), true, true, std::string("SchurComplementFactory"), myparams); + } else { + // nested blocking + auto bA10 = Teuchos::rcp_dynamic_cast(A10); + auto bAinv = Teuchos::rcp_dynamic_cast(Ainv); + TEUCHOS_TEST_FOR_EXCEPTION(bAinv == Teuchos::null, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Casting Ainv to BlockedCrsMatrix not possible."); + + // -1/omega*bAinv*bA01 + TEUCHOS_TEST_FOR_EXCEPTION(bA01->Rows() != bAinv->Cols(), Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Block rows and cols of bA01 and bAinv are not compatible."); + RCP C = MatrixMatrix::TwoMatrixMultiplyBlock(*bAinv, false, *bA01, false, GetOStream(Statistics2)); + + // -1/omega*A10*Ainv*A01 + TEUCHOS_TEST_FOR_EXCEPTION(bA10->Rows() != bA01->Cols(), Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Block rows and cols of bA10 and bA01 are not compatible."); + D = MatrixMatrix::TwoMatrixMultiplyBlock(*bA10, false, *C, false, GetOStream(Statistics2)); } - else { - if (!A11.is_null()) { - S = MatrixFactory::BuildCopy(A11); - } else { - S = MatrixFactory::Build(A11->getRowMap(), 10 /*A11->getLocalMaxNumRowEntries()*/); - S->fillComplete(A11->getDomainMap(),A11->getRangeMap()); - } + if (!A11.is_null()) { + MatrixMatrix::TwoMatrixAdd(*A11, false, one, *D, false, one, S, GetOStream(Statistics2)); + S->fillComplete(); + + TEUCHOS_TEST_FOR_EXCEPTION(A11->getRangeMap()->isSameAs(*(S->getRangeMap())) == false, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: RangeMap of A11 and S are not the same."); + TEUCHOS_TEST_FOR_EXCEPTION(A11->getDomainMap()->isSameAs(*(S->getDomainMap())) == false, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: DomainMap of A11 and S are not the same."); + } else { + S = MatrixFactory::BuildCopy(D); } - - // Check whether Schur complement operator is a 1x1 block matrix. - // If so, unwrap it and return the CrsMatrix based Matrix object - // We need this, as single-block smoothers expect it this way. - // In case of Thyra GIDs we obtain a Schur complement operator in Thyra GIDs - // This may make some special handling in feeding the SchurComplement solver Apply routine - // necessary! - if (isBlocked) { - RCP bS = Teuchos::rcp_dynamic_cast(S); - - if (bS != Teuchos::null && bS->Rows() == 1 && bS->Cols() == 1) { - RCP temp = bS->getCrsMatrix(); - S.swap(temp); - } + } else { + if (!A11.is_null()) { + S = MatrixFactory::BuildCopy(A11); + } else { + S = MatrixFactory::Build(A11->getRowMap(), 10 /*A11->getLocalMaxNumRowEntries()*/); + S->fillComplete(A11->getDomainMap(), A11->getRangeMap()); } + } - return S; + // Check whether Schur complement operator is a 1x1 block matrix. + // If so, unwrap it and return the CrsMatrix based Matrix object + // We need this, as single-block smoothers expect it this way. + // In case of Thyra GIDs we obtain a Schur complement operator in Thyra GIDs + // This may make some special handling in feeding the SchurComplement solver Apply routine + // necessary! + if (isBlocked) { + RCP bS = Teuchos::rcp_dynamic_cast(S); + + if (bS != Teuchos::null && bS->Rows() == 1 && bS->Cols() == 1) { + RCP temp = bS->getCrsMatrix(); + S.swap(temp); + } } -} // namespace MueLu + return S; +} + +} // namespace MueLu #endif /* MUELU_SCHURCOMPLEMENTFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp index 698be7f781aa..175f873b56b5 100644 --- a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp @@ -55,7 +55,7 @@ namespace MueLu { - /*! +/*! @class SegregatedAFactory class. @brief Factory for building a new "segregated" A operator. Here, "segregated" means that the user provides a map (containing a subset of the row gids of the input matrix A) and the factory @@ -69,48 +69,46 @@ namespace MueLu { does not distinguish between matrix entries which are zero and nonzero. */ - template - class SegregatedAFactory : public SingleLevelFactoryBase { +template +class SegregatedAFactory : public SingleLevelFactoryBase { #undef MUELU_SEGREGATEDAFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! Constructor. - SegregatedAFactory() = default; + public: + //! Constructor. + SegregatedAFactory() = default; - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! + /*! @brief Build method. Builds filtered matrix and returns it in currentLevel. */ - void Build(Level& currentLevel) const; - - //@} + void Build(Level& currentLevel) const; - private: + //@} - //! Generating factory of input variable - mutable RCP mapFact_; + private: + //! Generating factory of input variable + mutable RCP mapFact_; - }; //class SegregatedAFactory +}; //class SegregatedAFactory -} //namespace MueLu +} //namespace MueLu #define MUELU_SEGREGATEDAFACTORY_SHORT -#endif // MUELU_SEGREGATEDAFACTORY_DECL_HPP +#endif // MUELU_SEGREGATEDAFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp index c2993c84253b..65d800ac4bc3 100644 --- a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp @@ -57,112 +57,109 @@ namespace MueLu { - template - RCP SegregatedAFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP SegregatedAFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) -#undef SET_VALID_ENTRY +#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); + validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); - validParamList->set< std::string > ("map: name", "", "Name of map (Xpetra::Map) provided by user containing the special DOFs."); - validParamList->set< std::string > ("map: factory", "", "Name of generating factory for 'map: name'"); + validParamList->set("map: name", "", "Name of map (Xpetra::Map) provided by user containing the special DOFs."); + validParamList->set("map: factory", "", "Name of generating factory for 'map: name'"); - return validParamList; - } + return validParamList; +} - template - void SegregatedAFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); +template +void SegregatedAFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); - const ParameterList& pL = GetParameterList(); - std::string mapName = pL.get ("map: name"); - std::string mapFactName = pL.get ("map: factory"); + const ParameterList& pL = GetParameterList(); + std::string mapName = pL.get("map: name"); + std::string mapFactName = pL.get("map: factory"); - if (currentLevel.GetLevelID() == 0) - { - // Not needed, if the map is provided as user data - currentLevel.DeclareInput(mapName, NoFactory::get(), this); - } - else - { - // check whether user has provided a specific name for the MapFactory - if (mapFactName == "" || mapFactName == "NoFactory") - mapFact_ = MueLu::NoFactory::getRCP(); - else if (mapFactName != "null") - mapFact_ = currentLevel.GetFactoryManager()->GetFactory(mapFactName); - - // request map generated by mapFact_ - currentLevel.DeclareInput(mapName, mapFact_.get(), this); - } + if (currentLevel.GetLevelID() == 0) { + // Not needed, if the map is provided as user data + currentLevel.DeclareInput(mapName, NoFactory::get(), this); + } else { + // check whether user has provided a specific name for the MapFactory + if (mapFactName == "" || mapFactName == "NoFactory") + mapFact_ = MueLu::NoFactory::getRCP(); + else if (mapFactName != "null") + mapFact_ = currentLevel.GetFactoryManager()->GetFactory(mapFactName); + + // request map generated by mapFact_ + currentLevel.DeclareInput(mapName, mapFact_.get(), this); + } +} + +template +void SegregatedAFactory::Build(Level& currentLevel) const { + FactoryMonitor m(*this, "Matrix filtering (segregation)", currentLevel); + + RCP Ain = Get>(currentLevel, "A"); + + const ParameterList& pL = GetParameterList(); + std::string mapName = pL.get("map: name"); + std::string mapFact = pL.get("map: factory"); + + // fetch map from level + RCP map = Teuchos::null; + if (currentLevel.GetLevelID() == 0) { + map = currentLevel.Get>(mapName, NoFactory::get()); + GetOStream(Statistics0) << "User provided map \"" << mapName << "\": length dimension=" << map->getGlobalNumElements() << std::endl; + } else { + if (currentLevel.IsAvailable(mapName, mapFact_.get()) == false) + GetOStream(Runtime0) << "User provided map \"" << mapName << "\" not found in Level class on level " << currentLevel.GetLevelID() << "." << std::endl; + map = currentLevel.Get>(mapName, mapFact_.get()); } - template - void SegregatedAFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Matrix filtering (segregation)", currentLevel); - - RCP Ain = Get< RCP >(currentLevel, "A"); - - const ParameterList& pL = GetParameterList(); - std::string mapName = pL.get ("map: name"); - std::string mapFact = pL.get ("map: factory"); - - // fetch map from level - RCP map = Teuchos::null; - if (currentLevel.GetLevelID() == 0) { - map = currentLevel.Get>(mapName, NoFactory::get()); - GetOStream(Statistics0) << "User provided map \"" << mapName << "\": length dimension=" << map->getGlobalNumElements() << std::endl; - } else { - if (currentLevel.IsAvailable(mapName, mapFact_.get()) == false) - GetOStream(Runtime0) << "User provided map \"" << mapName << "\" not found in Level class on level " << currentLevel.GetLevelID() << "." << std::endl; - map = currentLevel.Get>(mapName, mapFact_.get()); - } + // create new empty Operator + Teuchos::RCP Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); - // create new empty Operator - Teuchos::RCP Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); - - size_t numLocalRows = Ain->getLocalNumRows(); - for(size_t row=0; rowgetRowMap()->getGlobalElement(row); // global row id - bool isInMap = map->isNodeGlobalElement(grid); - - // extract row information from input matrix - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - Ain->getLocalRowView(row, indices, vals); - - // just copy all values in output - Teuchos::ArrayRCP indout(indices.size(),Teuchos::ScalarTraits::zero()); - Teuchos::ArrayRCP valout(indices.size(),Teuchos::ScalarTraits::zero()); - - size_t nNonzeros = 0; - for(size_t i=0; i<(size_t)indices.size(); i++) { // or can be parallelize this loop? - GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id - bool isInMap2 = map->isNodeGlobalElement(gcid); - - if (isInMap == isInMap2) { - indout [nNonzeros] = gcid; - valout [nNonzeros] = vals[i]; - nNonzeros++; - } - } - indout.resize(nNonzeros); - valout.resize(nNonzeros); + size_t numLocalRows = Ain->getLocalNumRows(); + for (size_t row = 0; row < numLocalRows; row++) { // how can i replace this by a parallel for? + GlobalOrdinal grid = Ain->getRowMap()->getGlobalElement(row); // global row id + bool isInMap = map->isNodeGlobalElement(grid); - Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); - } + // extract row information from input matrix + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + Ain->getLocalRowView(row, indices, vals); - Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); + // just copy all values in output + Teuchos::ArrayRCP indout(indices.size(), Teuchos::ScalarTraits::zero()); + Teuchos::ArrayRCP valout(indices.size(), Teuchos::ScalarTraits::zero()); - // copy block size information - Aout->SetFixedBlockSize(Ain->GetFixedBlockSize()); + size_t nNonzeros = 0; + for (size_t i = 0; i < (size_t)indices.size(); i++) { // or can be parallelize this loop? + GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id + bool isInMap2 = map->isNodeGlobalElement(gcid); - GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; + if (isInMap == isInMap2) { + indout[nNonzeros] = gcid; + valout[nNonzeros] = vals[i]; + nNonzeros++; + } + } + indout.resize(nNonzeros); + valout.resize(nNonzeros); - Set(currentLevel, "A", Aout); + Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size())); } -} //namespace MueLu + Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); + + // copy block size information + Aout->SetFixedBlockSize(Ain->GetFixedBlockSize()); + + GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; + + Set(currentLevel, "A", Aout); +} + +} //namespace MueLu -#endif // MUELU_SEGREGATEDAFACTORY_DEF_HPP +#endif // MUELU_SEGREGATEDAFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp index 86491d0ad5af..3c8845954f3c 100644 --- a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp @@ -54,57 +54,55 @@ namespace MueLu { - /*! +/*! @class StructuredLineDetectionFactory class. @brief Factory building line detection information on structured meshes */ - template - class StructuredLineDetectionFactory : public SingleLevelFactoryBase { +template +class StructuredLineDetectionFactory : public SingleLevelFactoryBase { #undef MUELU_STRUCTUREDLINEDETECTIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - StructuredLineDetectionFactory() { } + StructuredLineDetectionFactory() {} - //! Destructor. - virtual ~StructuredLineDetectionFactory() { } + //! Destructor. + virtual ~StructuredLineDetectionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! + /*! @brief Build method. Builds line detection information and stores it in currentLevel */ - void Build(Level& currentLevel) const; - - //@} + void Build(Level& currentLevel) const; - private: + //@} - }; //class StructuredLineDetectionFactory + private: +}; //class StructuredLineDetectionFactory -} //namespace MueLu +} //namespace MueLu #define MUELU_STRUCTUREDLINEDETECTIONFACTORY_SHORT -#endif // MUELU_STRUCTUREDLINEDETECTIONFACTORY_DECL_HPP +#endif // MUELU_STRUCTUREDLINEDETECTIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp index 86c2e0493498..eabf8dd4b967 100644 --- a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp @@ -54,76 +54,75 @@ namespace MueLu { - template - RCP StructuredLineDetectionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< std::string > ("orientation", "Z", "Lines orientation"); - validParamList->set< RCP >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimension provided by CoordinatesTransferFactory."); - - return validParamList; - } - - template - void StructuredLineDetectionFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - // Request the global number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); - } +template +RCP StructuredLineDetectionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set >("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set("orientation", "Z", "Lines orientation"); + validParamList->set >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimension provided by CoordinatesTransferFactory."); + + return validParamList; +} + +template +void StructuredLineDetectionFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + // Request the global number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); } else { - Input(currentLevel, "lNodesPerDim"); + TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); } + } else { + Input(currentLevel, "lNodesPerDim"); + } +} + +template +void StructuredLineDetectionFactory::Build(Level& currentLevel) const { + // The following three variables are needed by the line smoothers in Ifpack/Ifpack2 + LO NumZDir = 0; + Teuchos::ArrayRCP VertLineId = Teuchos::arcp(0); + + // collect information provided by user + const ParameterList& pL = GetParameterList(); + const std::string lineOrientation = pL.get("orientation"); + + // Extract data from currentLevel + RCP A = Get >(currentLevel, "A"); + Array lNodesPerDir = Get >(currentLevel, "lNodesPerDim"); + LO numNodes = lNodesPerDir[0] * lNodesPerDir[1] * lNodesPerDir[2]; + VertLineId.resize(numNodes); + if (lineOrientation == "X") { + NumZDir = lNodesPerDir[0]; + } else if (lineOrientation == "Y") { + NumZDir = lNodesPerDir[1]; + } else if (lineOrientation == "Z") { + NumZDir = lNodesPerDir[2]; } - template - void StructuredLineDetectionFactory::Build(Level& currentLevel) const { - - // The following three variables are needed by the line smoothers in Ifpack/Ifpack2 - LO NumZDir = 0; - Teuchos::ArrayRCP VertLineId = Teuchos::arcp(0); - - // collect information provided by user - const ParameterList& pL = GetParameterList(); - const std::string lineOrientation = pL.get("orientation"); - - // Extract data from currentLevel - RCP A = Get< RCP >(currentLevel, "A"); - Array lNodesPerDir = Get > (currentLevel, "lNodesPerDim"); - LO numNodes = lNodesPerDir[0]*lNodesPerDir[1]*lNodesPerDir[2]; - VertLineId.resize(numNodes); - if(lineOrientation == "X") { - NumZDir = lNodesPerDir[0]; - } else if(lineOrientation == "Y") { - NumZDir = lNodesPerDir[1]; - } else if(lineOrientation == "Z") { - NumZDir = lNodesPerDir[2]; - } - - for(LO k = 0; k < lNodesPerDir[2]; ++k) { - for(LO j = 0; j < lNodesPerDir[1]; ++j) { - for(LO i = 0; i < lNodesPerDir[0]; ++i) { - if(lineOrientation == "X") { - VertLineId[k*lNodesPerDir[1]*lNodesPerDir[0] + j*lNodesPerDir[0] + i] = k*lNodesPerDir[1] + j; - } else if(lineOrientation == "Y") { - VertLineId[k*lNodesPerDir[1]*lNodesPerDir[0] + j*lNodesPerDir[0] + i] = k*lNodesPerDir[0] + i; - } else if(lineOrientation == "Z") { - VertLineId[k*lNodesPerDir[1]*lNodesPerDir[0] + j*lNodesPerDir[0] + i] = j*lNodesPerDir[0] + i; - } + for (LO k = 0; k < lNodesPerDir[2]; ++k) { + for (LO j = 0; j < lNodesPerDir[1]; ++j) { + for (LO i = 0; i < lNodesPerDir[0]; ++i) { + if (lineOrientation == "X") { + VertLineId[k * lNodesPerDir[1] * lNodesPerDir[0] + j * lNodesPerDir[0] + i] = k * lNodesPerDir[1] + j; + } else if (lineOrientation == "Y") { + VertLineId[k * lNodesPerDir[1] * lNodesPerDir[0] + j * lNodesPerDir[0] + i] = k * lNodesPerDir[0] + i; + } else if (lineOrientation == "Z") { + VertLineId[k * lNodesPerDir[1] * lNodesPerDir[0] + j * lNodesPerDir[0] + i] = j * lNodesPerDir[0] + i; } } } - - Set(currentLevel, "CoarseNumZLayers", NumZDir); - Set(currentLevel, "LineDetection_VertLineIds", VertLineId); } -} //namespace MueLu + Set(currentLevel, "CoarseNumZLayers", NumZDir); + Set(currentLevel, "LineDetection_VertLineIds", VertLineId); +} + +} //namespace MueLu -#endif // MUELU_STRUCTUREDLINEDETECTIONFACTORY_DEF_HPP +#endif // MUELU_STRUCTUREDLINEDETECTIONFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp index 5f798befa2e8..45a296a724eb 100644 --- a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp @@ -59,52 +59,51 @@ namespace MueLu { - /*! +/*! @class ThresholdAFilterFactory class. @brief Factory for building a thresholded operator. */ - template - class ThresholdAFilterFactory : public SingleLevelFactoryBase { +template +class ThresholdAFilterFactory : public SingleLevelFactoryBase { #undef MUELU_THRESHOLDAFILTERFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" +#include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ + public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal=true, const GlobalOrdinal expectedNNZperRow=-1); + //! Constructor. + ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal = true, const GlobalOrdinal expectedNNZperRow = -1); - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level& currentLevel) const; - //@} + //@} - //@{ - //! @name Build methods. + //@{ + //! @name Build methods. - //! Build an object with this factory. - void Build(Level & currentLevel) const; + //! Build an object with this factory. + void Build(Level& currentLevel) const; - //@} + //@} - private: - std::string varName_; ///< name of input and output variable - const Scalar threshold_; ///< threshold parameter - const bool keepDiagonal_; - const GlobalOrdinal expectedNNZperRow_; + private: + std::string varName_; ///< name of input and output variable + const Scalar threshold_; ///< threshold parameter + const bool keepDiagonal_; + const GlobalOrdinal expectedNNZperRow_; +}; // class ThresholdAFilterFactory - }; // class ThresholdAFilterFactory - -} // namespace MueLu +} // namespace MueLu #define MUELU_THRESHOLDAFILTERFACTORY_SHORT -#endif // MUELU_THRESHOLDAFILTERFACTORY_DECL_HPP +#endif // MUELU_THRESHOLDAFILTERFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp index 10da9befa53d..ad8621640bb4 100644 --- a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp @@ -56,30 +56,31 @@ namespace MueLu { - template - ThresholdAFilterFactory::ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) - : varName_(ename), threshold_(threshold), keepDiagonal_(keepDiagonal), expectedNNZperRow_(expectedNNZperRow) - { } +template +ThresholdAFilterFactory::ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) + : varName_(ename) + , threshold_(threshold) + , keepDiagonal_(keepDiagonal) + , expectedNNZperRow_(expectedNNZperRow) {} - template - void ThresholdAFilterFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, varName_); - } +template +void ThresholdAFilterFactory::DeclareInput(Level& currentLevel) const { + Input(currentLevel, varName_); +} - template - void ThresholdAFilterFactory:: - Build (Level & currentLevel) const - { - FactoryMonitor m (*this, "A filter (thresholding)", currentLevel); +template +void ThresholdAFilterFactory:: + Build(Level& currentLevel) const { + FactoryMonitor m(*this, "A filter (thresholding)", currentLevel); - RCP Ain = Get< RCP >(currentLevel, varName_); - RCP Aout = - MueLu::Utilities::GetThresholdedMatrix(Ain, threshold_, keepDiagonal_, expectedNNZperRow_); + RCP Ain = Get >(currentLevel, varName_); + RCP Aout = + MueLu::Utilities::GetThresholdedMatrix(Ain, threshold_, keepDiagonal_, expectedNNZperRow_); - GetOStream(Statistics0) << "Nonzeros in " << varName_ << "(input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering " << varName_ << " (parameter: " << threshold_ << "): " << Aout->getGlobalNumEntries() << std::endl; - currentLevel.Set(varName_, Teuchos::rcp_dynamic_cast(Aout), this); - } + GetOStream(Statistics0) << "Nonzeros in " << varName_ << "(input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering " << varName_ << " (parameter: " << threshold_ << "): " << Aout->getGlobalNumEntries() << std::endl; + currentLevel.Set(varName_, Teuchos::rcp_dynamic_cast(Aout), this); +} -} // namespace MueLu +} // namespace MueLu -#endif // MUELU_THRESHOLDAFILTERFACTORY_DEF_HPP +#endif // MUELU_THRESHOLDAFILTERFACTORY_DEF_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp b/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp index d291d05c4a9f..bccaa36c61d7 100644 --- a/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp +++ b/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp @@ -52,44 +52,42 @@ namespace MueLu { - /*! +/*! @class BaseClass class. @brief Base class for MueLu classes @ingroup MueLuBaseClasses */ - class BaseClass - : public VerboseObject, public Describable - { +class BaseClass + : public VerboseObject, + public Describable { + public: + //! @name Constructors/Destructors + //@{ - public: + //! Destructor. + virtual ~BaseClass() {} - //! @name Constructors/Destructors - //@{ + //@} - //! Destructor. - virtual ~BaseClass() {} +}; // class BaseClass - //@} - - }; // class BaseClass - -} // namespace MueLu +} // namespace MueLu //! Helper macro for implementing Describable::describe() for BaseClass objects. // This macro defines ostream out0 that print only on root node. It print description() and indent the ostream. // Note: Runtime1 displays basic parameter information when Parameters0 is not enabled. -#define MUELU_DESCRIBE \ - using std::endl; \ +#define MUELU_DESCRIBE \ + using std::endl; \ Teuchos::FancyOStream& out0 = (VerboseObject::GetProcRankVerbose() == 0) ? out : VerboseObject::GetBlackHole(); \ - \ - if ((verbLevel & Runtime1) && (!(verbLevel & Parameters0))) \ - out << description() << std::endl; \ - else if (verbLevel & Runtime0) \ - out << BaseClass::description() << std::endl; \ - \ - Teuchos::OSTab tab1(out); \ + \ + if ((verbLevel & Runtime1) && (!(verbLevel & Parameters0))) \ + out << description() << std::endl; \ + else if (verbLevel & Runtime0) \ + out << BaseClass::description() << std::endl; \ + \ + Teuchos::OSTab tab1(out); \ // #define MUELU_BASECLASS_SHORT -#endif // ifndef MUELU_BASECLASS_HPP +#endif // ifndef MUELU_BASECLASS_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Describable.cpp b/packages/muelu/src/MueCentral/MueLu_Describable.cpp index c2bba36bc122..02e648e9cb0e 100644 --- a/packages/muelu/src/MueCentral/MueLu_Describable.cpp +++ b/packages/muelu/src/MueCentral/MueLu_Describable.cpp @@ -50,51 +50,50 @@ namespace MueLu { - Describable::~Describable() { } +Describable::~Describable() {} - void Describable::describe(Teuchos::FancyOStream &out_arg, const VerbLevel /* verbLevel */) const { - Teuchos::RCP out = rcp(&out_arg,false); //JG: no idea why we have to do that, but it's how Teuchos::Describable::describe() is implemented - Teuchos::OSTab tab(out); - *out << this->description() << std::endl; - } +void Describable::describe(Teuchos::FancyOStream &out_arg, const VerbLevel /* verbLevel */) const { + Teuchos::RCP out = rcp(&out_arg, false); //JG: no idea why we have to do that, but it's how Teuchos::Describable::describe() is implemented + Teuchos::OSTab tab(out); + *out << this->description() << std::endl; +} - std::string Describable::description() const { - std::string str = Teuchos::Describable::description(); +std::string Describable::description() const { + std::string str = Teuchos::Describable::description(); - // remove template parameters - size_t found = str.find_first_of("<"); - if (found != std::string::npos) - return str.substr(0, found); + // remove template parameters + size_t found = str.find_first_of("<"); + if (found != std::string::npos) + return str.substr(0, found); - return str; - } + return str; +} - void Describable::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const { describe(out, toMueLuVerbLevel(verbLevel)); } +void Describable::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const { describe(out, toMueLuVerbLevel(verbLevel)); } - std::string Describable::ShortClassName() const { - if ( shortClassName_.empty() ) - { - std::string str = Teuchos::Describable::description(); +std::string Describable::ShortClassName() const { + if (shortClassName_.empty()) { + std::string str = Teuchos::Describable::description(); - // remove template parameters - { - size_t found = str.find_first_of("<"); - if (found != std::string::npos) - str = str.substr(0, found); - } + // remove template parameters + { + size_t found = str.find_first_of("<"); + if (found != std::string::npos) + str = str.substr(0, found); + } - // remove namespace - { - size_t found = str.find_last_of(":"); - if (found != std::string::npos) - str = str.substr(found+1); - } - shortClassName_ = str; - } - return shortClassName_; + // remove namespace + { + size_t found = str.find_last_of(":"); + if (found != std::string::npos) + str = str.substr(found + 1); } + shortClassName_ = str; + } + return shortClassName_; +} -} // namespace MueLu +} // namespace MueLu #define MUELU_DESCRIBABLE_SHORT -#endif // MUELU_DESCRIBABLE_HPP +#endif // MUELU_DESCRIBABLE_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Describable.hpp b/packages/muelu/src/MueCentral/MueLu_Describable.hpp index 7dbb4dc08811..00c970677b5a 100644 --- a/packages/muelu/src/MueCentral/MueLu_Describable.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Describable.hpp @@ -46,55 +46,53 @@ #ifndef MUELU_DESCRIBABLE_DECL_HPP #define MUELU_DESCRIBABLE_DECL_HPP -#include // for string -#include "Teuchos_FancyOStream.hpp" // for FancyOStream -#include "Teuchos_VerbosityLevel.hpp" // for EVerbosityLevel +#include // for string +#include "Teuchos_FancyOStream.hpp" // for FancyOStream +#include "Teuchos_VerbosityLevel.hpp" // for EVerbosityLevel #include "Teuchos_Describable.hpp" #include "MueLu_VerbosityLevel.hpp" namespace MueLu { - /*! +/*! @class Describable @brief Base class for MueLu classes @ingroup MueLuBaseClasses */ - class Describable - : public Teuchos::Describable - { - mutable std::string shortClassName_ = ""; // cached so that we don't have to call demangleName() every time; mutable so that ShortClassName() can initialize lazily while remaining const +class Describable + : public Teuchos::Describable { + mutable std::string shortClassName_ = ""; // cached so that we don't have to call demangleName() every time; mutable so that ShortClassName() can initialize lazily while remaining const - public: + public: + //! Destructor. + virtual ~Describable(); - //! Destructor. - virtual ~Describable(); + //! @name MueLu Describe + //@{ - //! @name MueLu Describe - //@{ + virtual void describe(Teuchos::FancyOStream &out_arg, const VerbLevel verbLevel = Default) const; - virtual void describe(Teuchos::FancyOStream &out_arg, const VerbLevel verbLevel = Default) const; + //@} - //@} + //! @name Overridden from Teuchos::Describable + //@{ - //! @name Overridden from Teuchos::Describable - //@{ + //! Return a simple one-line description of this object. + virtual std::string description() const; - //! Return a simple one-line description of this object. - virtual std::string description() const; + //! Print the object with some verbosity level to an FancyOStream object. + void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::Describable::verbLevel_default) const; - //! Print the object with some verbosity level to an FancyOStream object. - void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::Describable::verbLevel_default) const; + //@} - //@} + //! Return the class name of the object, without template parameters and without namespace + virtual std::string ShortClassName() const; - //! Return the class name of the object, without template parameters and without namespace - virtual std::string ShortClassName() const; +}; // class Describable - }; // class Describable - -} // namespace MueLu +} // namespace MueLu #define MUELU_DESCRIBABLE_SHORT -#endif // MUELU_DESCRIBABLE_DECL_HPP +#endif // MUELU_DESCRIBABLE_DECL_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Factory.cpp b/packages/muelu/src/MueCentral/MueLu_Factory.cpp index 64c7032ce5bb..4b099c244891 100644 --- a/packages/muelu/src/MueCentral/MueLu_Factory.cpp +++ b/packages/muelu/src/MueCentral/MueLu_Factory.cpp @@ -48,9 +48,9 @@ namespace MueLu { - bool Factory::timerSync_ = false; +bool Factory::timerSync_ = false; #ifdef HAVE_MUELU_DEBUG - Factory::multipleCallCheckEnum Factory::multipleCallCheckGlobal_ = ENABLED; +Factory::multipleCallCheckEnum Factory::multipleCallCheckGlobal_ = ENABLED; #endif -} // namespace MueLu +} // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_Factory.hpp b/packages/muelu/src/MueCentral/MueLu_Factory.hpp index 0be93d76bdff..e14e12c34953 100644 --- a/packages/muelu/src/MueCentral/MueLu_Factory.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Factory.hpp @@ -47,14 +47,14 @@ #define MUELU_FACTORY_HPP #include -#include // for _Deque_iterator, operator!= -#include // for operator<<, etc -#include "Teuchos_ENull.hpp" // for ENull::null +#include // for _Deque_iterator, operator!= +#include // for operator<<, etc +#include "Teuchos_ENull.hpp" // for ENull::null #include "Teuchos_FilteredIterator.hpp" // for FilteredIterator, etc -#include "Teuchos_ParameterEntry.hpp" // for ParameterEntry -#include "Teuchos_ParameterList.hpp" // for ParameterList, etc -#include "Teuchos_RCPDecl.hpp" // for RCP -#include "Teuchos_RCPNode.hpp" // for operator<< +#include "Teuchos_ParameterEntry.hpp" // for ParameterEntry +#include "Teuchos_ParameterList.hpp" // for ParameterList, etc +#include "Teuchos_RCPDecl.hpp" // for RCP +#include "Teuchos_RCPNode.hpp" // for operator<< #include "Teuchos_StringIndexedOrderedValueObjectContainer.hpp" #include "Teuchos_RCP.hpp" @@ -66,171 +66,170 @@ namespace MueLu { - class Factory : public FactoryBase, public FactoryAcceptor, public ParameterListAcceptorImpl { +class Factory : public FactoryBase, public FactoryAcceptor, public ParameterListAcceptorImpl { + public: + //@{ Constructors/Destructors. - public: - //@{ Constructors/Destructors. - - //! Constructor. - Factory() + //! Constructor. + Factory() #ifdef HAVE_MUELU_DEBUG - : multipleCallCheck_(FIRSTCALL), lastLevelID_(-1) + : multipleCallCheck_(FIRSTCALL) + , lastLevelID_(-1) #endif - { } - - //! Destructor. - virtual ~Factory() { } - //@} - - //@{ - //! Configuration - - //! SetFactory is for expert users only. To change configuration of the preconditioner, use a factory manager. - virtual void SetFactory(const std::string& varName, const RCP& factory) { - RCP f = factory; - SetParameter(varName, ParameterEntry(f)); // parameter validation done in ParameterListAcceptorImpl + { + } + + //! Destructor. + virtual ~Factory() {} + //@} + + //@{ + //! Configuration + + //! SetFactory is for expert users only. To change configuration of the preconditioner, use a factory manager. + virtual void SetFactory(const std::string& varName, const RCP& factory) { + RCP f = factory; + SetParameter(varName, ParameterEntry(f)); // parameter validation done in ParameterListAcceptorImpl + } + + //! Default implementation of FactoryAcceptor::GetFactory() + const RCP GetFactory(const std::string& varName) const { + // Special treatment for "NoFactory" + if (varName == "NoFactory") + return MueLu::NoFactory::getRCP(); + + if (!GetParameterList().isParameter(varName) && GetValidParameterList() == Teuchos::null) { + // If the parameter is not on the list and there is not validator, the defaults values for 'varName' is not set. + // Failback by using directly the FactoryManager + // NOTE: call to GetValidParameterList() can be costly for classes that validate parameters. + // But it get called only (lazy '&&' operator) if the parameter 'varName' is not on the paramlist and + // the parameter 'varName' is always on the list when validator is present and 'varName' is valid (at least the default value is set). + return Teuchos::null; } - //! Default implementation of FactoryAcceptor::GetFactory() - const RCP GetFactory(const std::string& varName) const { - - // Special treatment for "NoFactory" - if (varName == "NoFactory") - return MueLu::NoFactory::getRCP(); - - if (!GetParameterList().isParameter(varName)&& GetValidParameterList() == Teuchos::null) { - // If the parameter is not on the list and there is not validator, the defaults values for 'varName' is not set. - // Failback by using directly the FactoryManager - // NOTE: call to GetValidParameterList() can be costly for classes that validate parameters. - // But it get called only (lazy '&&' operator) if the parameter 'varName' is not on the paramlist and - // the parameter 'varName' is always on the list when validator is present and 'varName' is valid (at least the default value is set). - return Teuchos::null; - } - - return GetParameterList().get< RCP >(varName); + return GetParameterList().get >(varName); + } + + RCP RemoveFactoriesFromList(const ParameterList& list) const { + RCP paramList = rcp(new ParameterList(list)); + // Remove FactoryBase entries from the list + // The solution would be much more elegant if ParameterList support std::list like operations + // In that case, we could simply write: + // for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) + // if (paramList.isType >(it->first)) + // it = paramList.erase(it); + // else + // it++; + ParameterList::ConstIterator it = paramList->begin(); + while (it != paramList->end()) { + it = paramList->begin(); + + for (; it != paramList->end(); it++) + if (paramList->isType >(it->first)) + paramList->remove(it->first); } - - RCP RemoveFactoriesFromList(const ParameterList& list) const { - RCP paramList = rcp(new ParameterList(list)); - // Remove FactoryBase entries from the list - // The solution would be much more elegant if ParameterList support std::list like operations - // In that case, we could simply write: - // for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) - // if (paramList.isType >(it->first)) - // it = paramList.erase(it); - // else - // it++; - ParameterList::ConstIterator it = paramList->begin(); - while (it != paramList->end()) { - it = paramList->begin(); - - for (; it != paramList->end(); it++) - if (paramList->isType >(it->first)) - paramList->remove(it->first); - } - return paramList; - } - - // SetParameterList(...); - - // GetParameterList(...); - - //@} - - virtual RCP GetValidParameterList() const { - return Teuchos::null; // Teuchos::null == GetValidParameterList() not implemented == skip validation and no default values (dangerous) - } - - protected: - - void Input(Level& level, const std::string& varName) const { - level.DeclareInput(varName, GetFactory(varName).get(), this); - } - // Similar to the other Input, but we have an alias (varParamName) to the generated data name (varName) - void Input(Level& level, const std::string& varName, const std::string& varParamName) const { - level.DeclareInput(varName, GetFactory(varParamName).get(), this); - } - - template - T Get(Level& level, const std::string& varName) const { - return level.Get(varName, GetFactory(varName).get()); - } - // Similar to the other Get, but we have an alias (varParamName) to the generated data name (varName) - template - T Get(Level& level, const std::string& varName, const std::string& varParamName) const { - return level.Get(varName, GetFactory(varParamName).get()); - } - - template - void Set(Level& level, const std::string& varName, const T& data) const { - return level.Set(varName, data, this); - } - - template - bool IsType(Level& level, const std::string& varName) const { - return level.IsType(varName, GetFactory(varName).get()); - } - - bool IsAvailable(Level& level, const std::string& varName) const { - return level.IsAvailable(varName, GetFactory(varName).get()); - } - - public: - static void EnableTimerSync() { timerSync_ = true; } - static void DisableTimerSync() { timerSync_ = false; } - - protected: - static bool timerSync_; + return paramList; + } + + // SetParameterList(...); + + // GetParameterList(...); + + //@} + + virtual RCP GetValidParameterList() const { + return Teuchos::null; // Teuchos::null == GetValidParameterList() not implemented == skip validation and no default values (dangerous) + } + + protected: + void Input(Level& level, const std::string& varName) const { + level.DeclareInput(varName, GetFactory(varName).get(), this); + } + // Similar to the other Input, but we have an alias (varParamName) to the generated data name (varName) + void Input(Level& level, const std::string& varName, const std::string& varParamName) const { + level.DeclareInput(varName, GetFactory(varParamName).get(), this); + } + + template + T Get(Level& level, const std::string& varName) const { + return level.Get(varName, GetFactory(varName).get()); + } + // Similar to the other Get, but we have an alias (varParamName) to the generated data name (varName) + template + T Get(Level& level, const std::string& varName, const std::string& varParamName) const { + return level.Get(varName, GetFactory(varParamName).get()); + } + + template + void Set(Level& level, const std::string& varName, const T& data) const { + return level.Set(varName, data, this); + } + + template + bool IsType(Level& level, const std::string& varName) const { + return level.IsType(varName, GetFactory(varName).get()); + } + + bool IsAvailable(Level& level, const std::string& varName) const { + return level.IsAvailable(varName, GetFactory(varName).get()); + } + + public: + static void EnableTimerSync() { timerSync_ = true; } + static void DisableTimerSync() { timerSync_ = false; } + + protected: + static bool timerSync_; #ifdef HAVE_MUELU_DEBUG - public: - enum multipleCallCheckEnum { ENABLED, DISABLED, FIRSTCALL }; - - void EnableMultipleCallCheck() const { multipleCallCheck_ = ENABLED; } - void DisableMultipleCallCheck() const { multipleCallCheck_ = DISABLED; } - void ResetDebugData() const { - if (multipleCallCheck_ == FIRSTCALL && lastLevelID_ == -1) - return; - - multipleCallCheck_ = FIRSTCALL; - lastLevelID_ = -1; - - const ParameterList& paramList = GetParameterList(); - - // We cannot use just FactoryManager to specify which factories call ResetDebugData(). - // The problem is that some factories are not present in the manager, but - // instead are only accessible through a parameter list of some factory. - // For instance, FilteredAFactory is only accessible from SaPFactory but - // nowhere else. So we miss those, and do not reset the data, resulting - // in problems. - // Therefore, for each factory we need to go through its dependent - // factories, and call reset on them. - for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) - if (paramList.isType >(it->first)) { - RCP fact = rcp_dynamic_cast(paramList.get >(it->first)); - if (fact != Teuchos::null && fact != NoFactory::getRCP()) - fact->ResetDebugData(); - } - } + public: + enum multipleCallCheckEnum{ENABLED, DISABLED, FIRSTCALL}; + + void EnableMultipleCallCheck() const { multipleCallCheck_ = ENABLED; } + void DisableMultipleCallCheck() const { multipleCallCheck_ = DISABLED; } + void ResetDebugData() const { + if (multipleCallCheck_ == FIRSTCALL && lastLevelID_ == -1) + return; + + multipleCallCheck_ = FIRSTCALL; + lastLevelID_ = -1; + + const ParameterList& paramList = GetParameterList(); + + // We cannot use just FactoryManager to specify which factories call ResetDebugData(). + // The problem is that some factories are not present in the manager, but + // instead are only accessible through a parameter list of some factory. + // For instance, FilteredAFactory is only accessible from SaPFactory but + // nowhere else. So we miss those, and do not reset the data, resulting + // in problems. + // Therefore, for each factory we need to go through its dependent + // factories, and call reset on them. + for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) + if (paramList.isType >(it->first)) { + RCP fact = rcp_dynamic_cast(paramList.get >(it->first)); + if (fact != Teuchos::null && fact != NoFactory::getRCP()) + fact->ResetDebugData(); + } + } - static void EnableMultipleCheckGlobally() { multipleCallCheckGlobal_ = ENABLED; } - static void DisableMultipleCheckGlobally() { multipleCallCheckGlobal_ = DISABLED; } + static void EnableMultipleCheckGlobally() { multipleCallCheckGlobal_ = ENABLED; } + static void DisableMultipleCheckGlobally() { multipleCallCheckGlobal_ = DISABLED; } - protected: - mutable multipleCallCheckEnum multipleCallCheck_; - static multipleCallCheckEnum multipleCallCheckGlobal_; - mutable int lastLevelID_; + protected: + mutable multipleCallCheckEnum multipleCallCheck_; + static multipleCallCheckEnum multipleCallCheckGlobal_; + mutable int lastLevelID_; #else - public: - void EnableMultipleCallCheck() const { } - void DisableMultipleCallCheck() const { } - void ResetDebugData() const { } - static void EnableMultipleCheckGlobally() { } - static void DisableMultipleCheckGlobally() { } + public: + void EnableMultipleCallCheck() const {} + void DisableMultipleCallCheck() const {} + void ResetDebugData() const {} + static void EnableMultipleCheckGlobally() {} + static void DisableMultipleCheckGlobally() {} #endif - }; //class Factory +}; //class Factory -} //namespace MueLu +} //namespace MueLu #define MUELU_FACTORY_SHORT -#endif //ifndef MUELU_FACTORY_HPP +#endif //ifndef MUELU_FACTORY_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp index efe14effc27a..78707751a1c4 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp @@ -54,29 +54,27 @@ namespace MueLu { - class FactoryAcceptor { +class FactoryAcceptor { + public: + virtual ~FactoryAcceptor() {} - public: + //@{ + //! Configuration - virtual ~FactoryAcceptor() { } + //! SetFactory is for expert users only. To change configuration of the preconditioner, use a factory manager. + virtual void SetFactory(const std::string& varName, const RCP& factory) = 0; - //@{ - //! Configuration + virtual const RCP GetFactory(const std::string& varName) const = 0; - //! SetFactory is for expert users only. To change configuration of the preconditioner, use a factory manager. - virtual void SetFactory(const std::string & varName, const RCP & factory) = 0; + // SetParameterList(...); - virtual const RCP GetFactory(const std::string & varName) const = 0; + // GetParameterList(...); - // SetParameterList(...); + //@} - // GetParameterList(...); +}; //class FactoryAcceptor - //@} - - }; //class FactoryAcceptor - -} //namespace MueLu +} //namespace MueLu #define MUELU_FACTORYACCEPTOR_SHORT -#endif //ifndef MUELU_FACTORYACCEPTOR_HPP +#endif //ifndef MUELU_FACTORYACCEPTOR_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp b/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp index 07c413c94e8f..ab5d21912b59 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp @@ -48,9 +48,9 @@ namespace MueLu { - int FactoryBase::GenerateUniqueId() { - static int i = 0; - return i++; - } +int FactoryBase::GenerateUniqueId() { + static int i = 0; + return i++; +} -} // namespace MueLu +} // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp index b962d7f6f510..ab16ff52e358 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp @@ -52,54 +52,51 @@ namespace MueLu { - /*! +/*! @class FactoryBase @brief Base class for factories (e.g., R, P, and A_coarse). @ingroup MueLuBaseClasses */ - class FactoryBase : public virtual BaseClass { +class FactoryBase : public virtual BaseClass { + public: + //@{ Constructors/Destructors. - public: - //@{ Constructors/Destructors. + //! Constructor. + FactoryBase() + : id_(FactoryBase::GenerateUniqueId()) {} - //! Constructor. - FactoryBase() - : id_(FactoryBase::GenerateUniqueId()) - { } + //! Destructor. + virtual ~FactoryBase() {} + //@} - //! Destructor. - virtual ~FactoryBase() { } - //@} - - //@{ - //! @name Build methods. + //@{ + //! @name Build methods. - virtual void CallBuild(Level & requestedLevel) const = 0; + virtual void CallBuild(Level& requestedLevel) const = 0; - virtual void CallDeclareInput(Level & requestedLevel) const = 0; - //@} + virtual void CallDeclareInput(Level& requestedLevel) const = 0; + //@} - //@{ - //! @name Access factory properties + //@{ + //! @name Access factory properties - /// return unique factory id - int GetID() const { return id_; }; + /// return unique factory id + int GetID() const { return id_; }; //@} #ifdef HAVE_MUELU_DEBUG - virtual void ResetDebugData() const = 0; + virtual void ResetDebugData() const = 0; #endif - private: - - static int GenerateUniqueId(); + private: + static int GenerateUniqueId(); - const int id_; + const int id_; - }; //class FactoryBase +}; //class FactoryBase -} //namespace MueLu +} //namespace MueLu #define MUELU_FACTORYBASE_SHORT -#endif //ifndef MUELU_FACTORYBASE_HPP +#endif //ifndef MUELU_FACTORYBASE_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp index 90b2c8f86089..01c6cce8ec86 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp @@ -55,57 +55,57 @@ namespace MueLu { - /*! +/*! @class FactoryManagerBase @brief Class that provides default factories within Needs class. @ingroup MueLuBaseClasses */ - class FactoryManagerBase : public BaseClass { +class FactoryManagerBase : public BaseClass { + public: + //@{ Constructors/Destructors. + FactoryManagerBase() + : bIgnoreUserData_(false) {} - public: - //@{ Constructors/Destructors. - FactoryManagerBase() : bIgnoreUserData_(false) { } + //! Destructor. + virtual ~FactoryManagerBase() {} - //! Destructor. - virtual ~FactoryManagerBase() { } + //@} - //@} + //@{ Get/Set functions. - //@{ Get/Set functions. + //! Get + // Return ref because user also give ref to the Hierarchy. + const virtual RCP GetFactory(const std::string& varName) const = 0; + //@} - //! Get - // Return ref because user also give ref to the Hierarchy. - const virtual RCP GetFactory(const std::string& varName) const = 0; - //@} + //! Check + // Return true if Factory associated with varName is registered + virtual bool hasFactory(const std::string& varName) const = 0; - //! Check - // Return true if Factory associated with varName is registered - virtual bool hasFactory(const std::string& varName) const = 0; - - // Free temporarily hold data at the end of Hierarchy::Setup() - // This method is const because the clean concerns only mutable data. - virtual void Clean() const { } // TODO: should be used inside of MueLu::Hierarchy + // Free temporarily hold data at the end of Hierarchy::Setup() + // This method is const because the clean concerns only mutable data. + virtual void Clean() const {} // TODO: should be used inside of MueLu::Hierarchy #ifdef HAVE_MUELU_DEBUG - virtual void ResetDebugData() const = 0; + virtual void ResetDebugData() const = 0; #endif - //! get IgnoreUserData flag - bool IgnoreUserData() const { return bIgnoreUserData_; } + //! get IgnoreUserData flag + bool IgnoreUserData() const { return bIgnoreUserData_; } - //! set IgnoreUserData flag - void SetIgnoreUserData(bool bIgnoreUserData = false) { bIgnoreUserData_ = bIgnoreUserData; } + //! set IgnoreUserData flag + void SetIgnoreUserData(bool bIgnoreUserData = false) { bIgnoreUserData_ = bIgnoreUserData; } - private: - //! boolean flag that controls behaviour of Level::GetFactory - //! if bIgnoreUserData == true, the Level::GetFactory function always asks the Factory manager for a valid factory given a variable name - //! if bIgnoreUserData == false, the Level::GetFactory prefers user-provided data for a variable name if available. Otherwise the factory manager is asked for a valid factory - //! default: bIgnoreUserData = false; - bool bIgnoreUserData_; + private: + //! boolean flag that controls behaviour of Level::GetFactory + //! if bIgnoreUserData == true, the Level::GetFactory function always asks the Factory manager for a valid factory given a variable name + //! if bIgnoreUserData == false, the Level::GetFactory prefers user-provided data for a variable name if available. Otherwise the factory manager is asked for a valid factory + //! default: bIgnoreUserData = false; + bool bIgnoreUserData_; - }; // class FactoryManagerBase +}; // class FactoryManagerBase -} // namespace MueLu +} // namespace MueLu #define MUELU_FACTORYMANAGERBASE_SHORT -#endif //ifndef MUELU_FACTORYMANAGERBASE_HPP +#endif //ifndef MUELU_FACTORYMANAGERBASE_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp index 2f4618bb44c0..9ec0f04ffef3 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp @@ -75,7 +75,6 @@ #include "MueLu_InterfaceMappingTransferFactory_fwd.hpp" #include "MueLu_InterfaceAggregationFactory_fwd.hpp" - #include "MueLu_CoalesceDropFactory_kokkos_fwd.hpp" #include "MueLu_NullspaceFactory_kokkos_fwd.hpp" #include "MueLu_SaPFactory_kokkos_fwd.hpp" @@ -84,7 +83,7 @@ namespace MueLu { - /*! +/*! @class FactoryManager class. @brief This class specifies the default factory that should generate some data on a Level if the data does not exist and the generating factory has not been specified. @@ -103,124 +102,120 @@ namespace MueLu { the Get call. If "no", then the FactoryManager will throw an exception indicating that it does not know how to generate A. */ - template - class FactoryManager : public FactoryManagerBase { +template +class FactoryManager : public FactoryManagerBase { #undef MUELU_FACTORYMANAGER_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructor/Destructors - //@{ + public: + //! @name Constructor/Destructors + //@{ - //! @brief Constructor. - FactoryManager() { - SetIgnoreUserData(false); // set IgnorUserData flag to false (default behaviour) - useKokkos_ = !Node::is_serial; - } + //! @brief Constructor. + FactoryManager() { + SetIgnoreUserData(false); // set IgnorUserData flag to false (default behaviour) + useKokkos_ = !Node::is_serial; + } - //! Constructor used by HierarchyFactory (temporary, will be removed) - FactoryManager(const std::map >& factoryTable) { - factoryTable_ = factoryTable; - SetIgnoreUserData(false); // set IgnorUserData flag to false (default behaviour) //TODO: use parent class constructor instead - useKokkos_ = !Node::is_serial; - } + //! Constructor used by HierarchyFactory (temporary, will be removed) + FactoryManager(const std::map >& factoryTable) { + factoryTable_ = factoryTable; + SetIgnoreUserData(false); // set IgnorUserData flag to false (default behaviour) //TODO: use parent class constructor instead + useKokkos_ = !Node::is_serial; + } - //! Destructor. - virtual ~FactoryManager() { } + //! Destructor. + virtual ~FactoryManager() {} - //@} + //@} - //! @name Get/Set functions. - //@{ + //! @name Get/Set functions. + //@{ - /*! @brief Set Factory + /*! @brief Set Factory Register the factory that should generate data if said factory is not specified in the request. @param[in] name of variable @param[in] factory that generates the data */ - void SetFactory(const std::string & varName, const RCP& factory); + void SetFactory(const std::string& varName, const RCP& factory); - /*! @brief Get factory associated with a particular data name. + /*! @brief Get factory associated with a particular data name. @param[in] varName name of variable. */ - const RCP GetFactory(const std::string& varName) const; + const RCP GetFactory(const std::string& varName) const; - /*! @brief Get factory associated with a particular data name (NONCONST version) + /*! @brief Get factory associated with a particular data name (NONCONST version) @param[in] varName name of variable. */ - const RCP GetFactoryNonConst(const std::string& varName); + const RCP GetFactoryNonConst(const std::string& varName); - //! Check - // Return true if Factory associated with varName is registered - bool hasFactory(const std::string& varName) const; + //! Check + // Return true if Factory associated with varName is registered + bool hasFactory(const std::string& varName) const; + //! + const RCP GetDefaultFactory(const std::string& varName) const; - //! - const RCP GetDefaultFactory(const std::string& varName) const; + void SetKokkosRefactor(const bool useKokkos) { + useKokkos_ = useKokkos; + } - void SetKokkosRefactor(const bool useKokkos) { - useKokkos_ = useKokkos; - } + bool GetKokkosRefactor() const { return useKokkos_; } - bool GetKokkosRefactor() const { return useKokkos_; } + //@} - //@} - - void Clean() const { defaultFactoryTable_.clear(); } + void Clean() const { defaultFactoryTable_.clear(); } #ifdef HAVE_MUELU_DEBUG - void ResetDebugData() const; + void ResetDebugData() const; #endif - void Print() const; - - private: + void Print() const; - //! @name Helper functions - //@{ + private: + //! @name Helper functions + //@{ - /*! Add a factory to the default factory list and return it. This helper function is used by GetDefaultFactory() + /*! Add a factory to the default factory list and return it. This helper function is used by GetDefaultFactory() @todo TODO factory->setObjectLabel("Default " + varName + "Factory"); */ - const RCP SetAndReturnDefaultFactory(const std::string& varName, const RCP& factory) const; - //@} + const RCP SetAndReturnDefaultFactory(const std::string& varName, const RCP& factory) const; + //@} - /*! @brief User-defined factories. + /*! @brief User-defined factories. * * User may overwrite default behaviour. The user provided factories are stored in a separate table. When we try to determine * which factory generates the data, this table is searched first. Note: we distinguish 'user defined factory' and 'default factory' to allow the deallocation of default factories separately. */ - std::map > factoryTable_; + std::map > factoryTable_; - /*! @brief Table that holds default factories. + /*! @brief Table that holds default factories. -# We distinguish 'user defined factory' and 'default factory' to allow the deallocation of default factories separately. -# defaultFactoryTable_ is mutable because default factories are only added to the list when they are requested to avoid allocation of unused factories. */ - mutable - std::map > defaultFactoryTable_; + mutable std::map > defaultFactoryTable_; - //! Whether or not to use kokkos factories. - bool useKokkos_; + //! Whether or not to use kokkos factories. + bool useKokkos_; - }; // class +}; // class -} // namespace MueLu +} // namespace MueLu #define MUELU_FACTORYMANAGER_SHORT -#endif // MUELU_FACTORYMANAGER_DECL_HPP +#endif // MUELU_FACTORYMANAGER_DECL_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp index f7004c043c69..02be7c2ccf8c 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp @@ -85,229 +85,227 @@ #include "MueLu_FactoryManager_decl.hpp" - namespace MueLu { -#define MUELU_KOKKOS_FACTORY(varName, oldFactory, newFactory) \ - (!useKokkos_) ? SetAndReturnDefaultFactory(varName, rcp(new oldFactory())) : \ - SetAndReturnDefaultFactory(varName, rcp(new newFactory())); - - template - void FactoryManager::SetFactory(const std::string& varName, const RCP& factory) { - factoryTable_[varName] = factory; - } +#define MUELU_KOKKOS_FACTORY(varName, oldFactory, newFactory) \ + (!useKokkos_) ? SetAndReturnDefaultFactory(varName, rcp(new oldFactory())) : SetAndReturnDefaultFactory(varName, rcp(new newFactory())); - template - const RCP FactoryManager::GetFactory(const std::string& varName) const { - if (factoryTable_.count(varName)) { - // Search user provided factories - return factoryTable_.find(varName)->second; - } +template +void FactoryManager::SetFactory(const std::string& varName, const RCP& factory) { + factoryTable_[varName] = factory; +} - // Search/create default factory for this name - return GetDefaultFactory(varName); +template +const RCP FactoryManager::GetFactory(const std::string& varName) const { + if (factoryTable_.count(varName)) { + // Search user provided factories + return factoryTable_.find(varName)->second; } - template - const RCP FactoryManager::GetFactoryNonConst(const std::string& varName) { - return Teuchos::rcp_const_cast(GetFactory(varName)); - } + // Search/create default factory for this name + return GetDefaultFactory(varName); +} + +template +const RCP FactoryManager::GetFactoryNonConst(const std::string& varName) { + return Teuchos::rcp_const_cast(GetFactory(varName)); +} + +template +bool FactoryManager::hasFactory(const std::string& varName) const { + if (factoryTable_.count(varName)) return true; + return false; +} + +template +const RCP FactoryManager::GetDefaultFactory(const std::string& varName) const { + if (defaultFactoryTable_.count(varName)) { + // The factory for this name was already created (possibly, for previous level, if we reuse factory manager) + return defaultFactoryTable_.find(varName)->second; + + } else { + // No factory was created for this name, but we may know which one to create + if (varName == "A") return SetAndReturnDefaultFactory(varName, rcp(new RAPFactory())); + if (varName == "Ainv") return SetAndReturnDefaultFactory(varName, rcp(new InverseApproximationFactory())); + if (varName == "RAP Pattern") return GetFactory("A"); + if (varName == "AP Pattern") return GetFactory("A"); + if (varName == "Ptent") return MUELU_KOKKOS_FACTORY(varName, TentativePFactory, TentativePFactory_kokkos); + if (varName == "P") { + // GetFactory("Ptent"): we need to use the same factory instance for both "P" and "Nullspace" + RCP factory; + if (useKokkos_) + factory = rcp(new SaPFactory_kokkos()); + else + factory = rcp(new SaPFactory()); + factory->SetFactory("P", GetFactory("Ptent")); + return SetAndReturnDefaultFactory(varName, factory); + } + if (varName == "Nullspace") { + // GetFactory("Ptent"): we need to use the same factory instance for both "P" and "Nullspace" + RCP factory; + if (useKokkos_) + factory = rcp(new NullspaceFactory_kokkos()); + else + factory = rcp(new NullspaceFactory()); + factory->SetFactory("Nullspace", GetFactory("Ptent")); + return SetAndReturnDefaultFactory(varName, factory); + } + if (varName == "Scaled Nullspace") return SetAndReturnDefaultFactory(varName, rcp(new ScaledNullspaceFactory())); - template - bool FactoryManager::hasFactory(const std::string& varName) const { - if (factoryTable_.count(varName)) return true; - return false; - } + if (varName == "Coordinates") return GetFactory("Ptent"); + if (varName == "Node Comm") return GetFactory("Ptent"); - template - const RCP FactoryManager::GetDefaultFactory(const std::string& varName) const { - if (defaultFactoryTable_.count(varName)) { - // The factory for this name was already created (possibly, for previous level, if we reuse factory manager) - return defaultFactoryTable_.find(varName)->second; - - } else { - // No factory was created for this name, but we may know which one to create - if (varName == "A") return SetAndReturnDefaultFactory(varName, rcp(new RAPFactory())); - if (varName == "Ainv") return SetAndReturnDefaultFactory(varName, rcp(new InverseApproximationFactory())); - if (varName == "RAP Pattern") return GetFactory("A"); - if (varName == "AP Pattern") return GetFactory("A"); - if (varName == "Ptent") return MUELU_KOKKOS_FACTORY(varName, TentativePFactory, TentativePFactory_kokkos); - if (varName == "P") { - // GetFactory("Ptent"): we need to use the same factory instance for both "P" and "Nullspace" - RCP factory; - if (useKokkos_) - factory = rcp(new SaPFactory_kokkos()); - else - factory = rcp(new SaPFactory()); - factory->SetFactory("P", GetFactory("Ptent")); - return SetAndReturnDefaultFactory(varName, factory); - } - if (varName == "Nullspace") { - // GetFactory("Ptent"): we need to use the same factory instance for both "P" and "Nullspace" - RCP factory; - if (useKokkos_) - factory = rcp(new NullspaceFactory_kokkos()); - else - factory = rcp(new NullspaceFactory()); - factory->SetFactory("Nullspace", GetFactory("Ptent")); - return SetAndReturnDefaultFactory(varName, factory); - } - if (varName == "Scaled Nullspace") return SetAndReturnDefaultFactory(varName, rcp(new ScaledNullspaceFactory())); - - if (varName == "Coordinates") return GetFactory("Ptent"); - if (varName == "Node Comm") return GetFactory("Ptent"); - - if (varName == "R") return SetAndReturnDefaultFactory(varName, rcp(new TransPFactory())); - if (varName == "RfromPfactory") return GetFactory("P"); + if (varName == "R") return SetAndReturnDefaultFactory(varName, rcp(new TransPFactory())); + if (varName == "RfromPfactory") return GetFactory("P"); #if defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MPI) - if (varName == "Partition") return SetAndReturnDefaultFactory(varName, rcp(new ZoltanInterface())); -#endif //ifdef HAVE_MPI + if (varName == "Partition") return SetAndReturnDefaultFactory(varName, rcp(new ZoltanInterface())); +#endif //ifdef HAVE_MPI - if (varName == "Importer") { + if (varName == "Importer") { #ifdef HAVE_MPI - return SetAndReturnDefaultFactory(varName, rcp(new RepartitionFactory())); + return SetAndReturnDefaultFactory(varName, rcp(new RepartitionFactory())); #else - return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); + return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); #endif - } - if (varName == "number of partitions") { + } + if (varName == "number of partitions") { #ifdef HAVE_MPI - return SetAndReturnDefaultFactory(varName, rcp(new RepartitionHeuristicFactory())); + return SetAndReturnDefaultFactory(varName, rcp(new RepartitionHeuristicFactory())); #else - return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); -#endif - } - if (varName == "repartition: heuristic target rows per process") return GetFactory("number of partitions"); - - if (varName == "Graph") return MUELU_KOKKOS_FACTORY(varName, CoalesceDropFactory, CoalesceDropFactory_kokkos); - if (varName == "UnAmalgamationInfo") return SetAndReturnDefaultFactory(varName, rcp(new AmalgamationFactory())); - if (varName == "Aggregates") return MUELU_KOKKOS_FACTORY(varName, UncoupledAggregationFactory, UncoupledAggregationFactory_kokkos); - if (varName == "AggregateQualities") return SetAndReturnDefaultFactory(varName, rcp(new AggregateQualityEstimateFactory())); - if (varName == "CoarseMap") return SetAndReturnDefaultFactory(varName, rcp(new CoarseMapFactory())); - if (varName == "DofsPerNode") return GetFactory("Graph"); - if (varName == "Filtering") return GetFactory("Graph"); - if (varName == "BlockNumber") return SetAndReturnDefaultFactory(varName, rcp(new InitialBlockNumberFactory())); - if (varName == "LineDetection_VertLineIds") return SetAndReturnDefaultFactory(varName, rcp(new LineDetectionFactory())); - if (varName == "LineDetection_Layers") return GetFactory("LineDetection_VertLineIds"); - if (varName == "CoarseNumZLayers") return GetFactory("LineDetection_VertLineIds"); - - // Structured - if (varName == "structuredInterpolationOrder") return SetAndReturnDefaultFactory(varName, rcp(new StructuredAggregationFactory())); - - // Non-Galerkin - if (varName == "K") return GetFactory("A"); - if (varName == "M") return GetFactory("A"); - if (varName == "Mdiag") return GetFactory("A"); - if (varName == "cfl-based shift array") return GetFactory("A"); - - // Same factory for both Pre and Post Smoother. Factory for key "Smoother" can be set by users. - if (varName == "PreSmoother") return GetFactory("Smoother"); - if (varName == "PostSmoother") return GetFactory("Smoother"); - - if (varName == "Ppattern") { - RCP PpFact = rcp(new PatternFactory); - PpFact->SetFactory("P", GetFactory("Ptent")); - return SetAndReturnDefaultFactory(varName, PpFact); - } - if (varName == "Constraint") return SetAndReturnDefaultFactory(varName, rcp(new ConstraintFactory())); - - if (varName == "Smoother") { - Teuchos::ParameterList smootherParamList; - smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel"); - smootherParamList.set("relaxation: sweeps", Teuchos::OrdinalTraits::one()); - smootherParamList.set("relaxation: damping factor", Teuchos::ScalarTraits::one()); - return SetAndReturnDefaultFactory(varName, rcp(new SmootherFactory(rcp(new TrilinosSmoother("RELAXATION", smootherParamList))))); - } - if (varName == "CoarseSolver") return SetAndReturnDefaultFactory(varName, rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null))); - - if (varName == "DualNodeID2PrimalNodeID") return SetAndReturnDefaultFactory(varName, rcp(new InterfaceMappingTransferFactory())); - if (varName == "CoarseDualNodeID2PrimalNodeID") return SetAndReturnDefaultFactory(varName, rcp(new InterfaceAggregationFactory())); -#ifdef HAVE_MUELU_INTREPID2 - // If we're asking for it, find who made P - if (varName == "pcoarsen: element to node map") return GetFactory("P"); + return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); #endif - - // NOTE: These are user data, but we might want to print them, so they need a default factory - if (varName == "Pnodal") return NoFactory::getRCP(); - if (varName == "NodeMatrix") return NoFactory::getRCP(); - if (varName == "NodeAggMatrix") return NoFactory::getRCP(); - - - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::FactoryManager::GetDefaultFactory(): No default factory available for building '" + varName + "'."); } - } - - template - const RCP FactoryManager::SetAndReturnDefaultFactory(const std::string& varName, const RCP& factory) const { - TEUCHOS_TEST_FOR_EXCEPTION(factory.is_null(), Exceptions::RuntimeError, "The default factory for building '" << varName << "' is null"); + if (varName == "repartition: heuristic target rows per process") return GetFactory("number of partitions"); + + if (varName == "Graph") return MUELU_KOKKOS_FACTORY(varName, CoalesceDropFactory, CoalesceDropFactory_kokkos); + if (varName == "UnAmalgamationInfo") return SetAndReturnDefaultFactory(varName, rcp(new AmalgamationFactory())); + if (varName == "Aggregates") return MUELU_KOKKOS_FACTORY(varName, UncoupledAggregationFactory, UncoupledAggregationFactory_kokkos); + if (varName == "AggregateQualities") return SetAndReturnDefaultFactory(varName, rcp(new AggregateQualityEstimateFactory())); + if (varName == "CoarseMap") return SetAndReturnDefaultFactory(varName, rcp(new CoarseMapFactory())); + if (varName == "DofsPerNode") return GetFactory("Graph"); + if (varName == "Filtering") return GetFactory("Graph"); + if (varName == "BlockNumber") return SetAndReturnDefaultFactory(varName, rcp(new InitialBlockNumberFactory())); + if (varName == "LineDetection_VertLineIds") return SetAndReturnDefaultFactory(varName, rcp(new LineDetectionFactory())); + if (varName == "LineDetection_Layers") return GetFactory("LineDetection_VertLineIds"); + if (varName == "CoarseNumZLayers") return GetFactory("LineDetection_VertLineIds"); + + // Structured + if (varName == "structuredInterpolationOrder") return SetAndReturnDefaultFactory(varName, rcp(new StructuredAggregationFactory())); + + // Non-Galerkin + if (varName == "K") return GetFactory("A"); + if (varName == "M") return GetFactory("A"); + if (varName == "Mdiag") return GetFactory("A"); + if (varName == "cfl-based shift array") return GetFactory("A"); + + // Same factory for both Pre and Post Smoother. Factory for key "Smoother" can be set by users. + if (varName == "PreSmoother") return GetFactory("Smoother"); + if (varName == "PostSmoother") return GetFactory("Smoother"); + + if (varName == "Ppattern") { + RCP PpFact = rcp(new PatternFactory); + PpFact->SetFactory("P", GetFactory("Ptent")); + return SetAndReturnDefaultFactory(varName, PpFact); + } + if (varName == "Constraint") return SetAndReturnDefaultFactory(varName, rcp(new ConstraintFactory())); + + if (varName == "Smoother") { + Teuchos::ParameterList smootherParamList; + smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel"); + smootherParamList.set("relaxation: sweeps", Teuchos::OrdinalTraits::one()); + smootherParamList.set("relaxation: damping factor", Teuchos::ScalarTraits::one()); + return SetAndReturnDefaultFactory(varName, rcp(new SmootherFactory(rcp(new TrilinosSmoother("RELAXATION", smootherParamList))))); + } + if (varName == "CoarseSolver") return SetAndReturnDefaultFactory(varName, rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null))); - GetOStream(Runtime1) << "Using default factory (" << factory->ShortClassName() <<"["<GetID()<<"]) for building '" << varName << "'." << std::endl; + if (varName == "DualNodeID2PrimalNodeID") return SetAndReturnDefaultFactory(varName, rcp(new InterfaceMappingTransferFactory())); + if (varName == "CoarseDualNodeID2PrimalNodeID") return SetAndReturnDefaultFactory(varName, rcp(new InterfaceAggregationFactory())); +#ifdef HAVE_MUELU_INTREPID2 + // If we're asking for it, find who made P + if (varName == "pcoarsen: element to node map") return GetFactory("P"); +#endif - defaultFactoryTable_[varName] = factory; + // NOTE: These are user data, but we might want to print them, so they need a default factory + if (varName == "Pnodal") return NoFactory::getRCP(); + if (varName == "NodeMatrix") return NoFactory::getRCP(); + if (varName == "NodeAggMatrix") return NoFactory::getRCP(); - return defaultFactoryTable_[varName]; + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::FactoryManager::GetDefaultFactory(): No default factory available for building '" + varName + "'."); } - - template - void FactoryManager::Print() const { - std::map >::const_iterator it; - Teuchos::FancyOStream& fancy = GetOStream(Debug); - //auto & fancy = std::cout;// For debugging - - - fancy << "Users factory table (factoryTable_):" << std::endl; - for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) { - fancy << " " << it->first << " -> "; - if (it->second.get() == NoFactory::get()) fancy << "NoFactory"; - else if (!it->second.get()) fancy<< "NULL"; - else { - fancy << it->second.get()->ShortClassName()<<"["<second.get()->GetID()<<"]"; +} + +template +const RCP FactoryManager::SetAndReturnDefaultFactory(const std::string& varName, const RCP& factory) const { + TEUCHOS_TEST_FOR_EXCEPTION(factory.is_null(), Exceptions::RuntimeError, "The default factory for building '" << varName << "' is null"); + + GetOStream(Runtime1) << "Using default factory (" << factory->ShortClassName() << "[" << factory->GetID() << "]) for building '" << varName << "'." << std::endl; + + defaultFactoryTable_[varName] = factory; + + return defaultFactoryTable_[varName]; +} + +template +void FactoryManager::Print() const { + std::map >::const_iterator it; + Teuchos::FancyOStream& fancy = GetOStream(Debug); + //auto & fancy = std::cout;// For debugging + + fancy << "Users factory table (factoryTable_):" << std::endl; + for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) { + fancy << " " << it->first << " -> "; + if (it->second.get() == NoFactory::get()) + fancy << "NoFactory"; + else if (!it->second.get()) + fancy << "NULL"; + else { + fancy << it->second.get()->ShortClassName() << "[" << it->second.get()->GetID() << "]"; #ifdef HAVE_MUELU_DEBUG - fancy<<"("<second.get()) <<")"; + fancy << "(" << Teuchos::toString(it->second.get()) << ")"; #endif - } - fancy<< std::endl; } + fancy << std::endl; + } - fancy << "Default factory table (defaultFactoryTable_):" << std::endl; - for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) { - fancy << " " << it->first << " -> "; - if (it->second.get() == NoFactory::get()) fancy << "NoFactory"; - else if (!it->second.get()) fancy<< "NULL"; - else { - fancy << it->second.get()->ShortClassName()<<"["<second.get()->GetID()<<"]"; + fancy << "Default factory table (defaultFactoryTable_):" << std::endl; + for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) { + fancy << " " << it->first << " -> "; + if (it->second.get() == NoFactory::get()) + fancy << "NoFactory"; + else if (!it->second.get()) + fancy << "NULL"; + else { + fancy << it->second.get()->ShortClassName() << "[" << it->second.get()->GetID() << "]"; #ifdef HAVE_MUELU_DEBUG - fancy<<"("<second.get()) <<")"; + fancy << "(" << Teuchos::toString(it->second.get()) << ")"; #endif - } - fancy<< std::endl; } - + fancy << std::endl; } +} #ifdef HAVE_MUELU_DEBUG - template - void FactoryManager::ResetDebugData() const { - std::map >::const_iterator it; - - for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) - if (!it->second.is_null()) - it->second->ResetDebugData(); - - for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) - if (!it->second.is_null()) - it->second->ResetDebugData(); - } +template +void FactoryManager::ResetDebugData() const { + std::map >::const_iterator it; + + for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) + if (!it->second.is_null()) + it->second->ResetDebugData(); + + for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) + if (!it->second.is_null()) + it->second->ResetDebugData(); +} #endif - #undef MUELU_KOKKOS_FACTORY -} // namespace MueLu +} // namespace MueLu //TODO: add operator[] //TODO: should we use a parameterList instead of a std::map? It might be useful to tag which factory have been used and report unused factory. //TODO: add an option 'NoDefault' to check if we are using any default factory. //TODO: use Teuchos::ConstNonConstObjectContainer to allow user to modify factories after a GetFactory() -#endif // MUELU_FACTORYMANAGER_DEF_HPP +#endif // MUELU_FACTORYMANAGER_DEF_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp index 2bfb4b97378b..ff560d2d746d 100644 --- a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp +++ b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp @@ -61,51 +61,46 @@ namespace MueLu { - //! An exception safe way to call the method 'Level::SetFactoryManager()' - class SetFactoryManager { +//! An exception safe way to call the method 'Level::SetFactoryManager()' +class SetFactoryManager { + public: + //@{ - public: - - //@{ - - /*! + /*! @brief Constructor Set a given factory manager on a specific level */ - SetFactoryManager(const RCP & level, const RCP & factoryManager) - : level_(level), prevFactoryManager_(level->GetFactoryManager()) - { - // set new factory manager - level->SetFactoryManager(factoryManager); - } - - //! Destructor. - virtual ~SetFactoryManager() { - // restore previous factory manager - level_->SetFactoryManager(prevFactoryManager_); - } - - //@} - - private: - //! needed to save & restore previous factoryManager - const RCP level_; - const RCP prevFactoryManager_; - }; - - - - - template - class HierarchyUtils { + SetFactoryManager(const RCP& level, const RCP& factoryManager) + : level_(level) + , prevFactoryManager_(level->GetFactoryManager()) { + // set new factory manager + level->SetFactoryManager(factoryManager); + } + + //! Destructor. + virtual ~SetFactoryManager() { + // restore previous factory manager + level_->SetFactoryManager(prevFactoryManager_); + } + + //@} + + private: + //! needed to save & restore previous factoryManager + const RCP level_; + const RCP prevFactoryManager_; +}; + +template +class HierarchyUtils { #undef MUELU_HIERARCHYUTILS_SHORT #include "MueLu_UseShortNames.hpp" - public: - /*! + public: + /*! \brief Add non-serializable data to Hierarchy Add non-serializable data given level-specific sublist \c nonSerialList to the Hierarchy \c H. @@ -131,14 +126,11 @@ namespace MueLu { @param H Hierarchy, where non-serializable data needs to be added @param nonSerialList Parameter list containing non-serializable data */ - static void AddNonSerializableDataToHierarchy(HierarchyManager& HM, Hierarchy& H, const ParameterList& nonSerialList); - static void CopyBetweenHierarchies(Hierarchy& fromHierarchy, Hierarchy& toHierarchy, const std::string fromLabel, const std::string toLabel, const std::string dataType); - }; - - - + static void AddNonSerializableDataToHierarchy(HierarchyManager& HM, Hierarchy& H, const ParameterList& nonSerialList); + static void CopyBetweenHierarchies(Hierarchy& fromHierarchy, Hierarchy& toHierarchy, const std::string fromLabel, const std::string toLabel, const std::string dataType); +}; -} // namespace MueLu +} // namespace MueLu #define MUELU_HIERARCHYUTILS_SHORT -#endif // MUELU_HIERARCHYUTILS_DECL_HPP +#endif // MUELU_HIERARCHYUTILS_DECL_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp index 788081aada5d..16e8daf9f842 100644 --- a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp +++ b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp @@ -62,356 +62,336 @@ namespace MueLu { - // Copy object from one hierarchy to another calling AddNewLevel as appropriate. - template - void HierarchyUtils::CopyBetweenHierarchies(Hierarchy& fromHierarchy, Hierarchy& toHierarchy, const std::string fromLabel, const std::string toLabel, const std::string dataType) { +// Copy object from one hierarchy to another calling AddNewLevel as appropriate. +template +void HierarchyUtils::CopyBetweenHierarchies(Hierarchy& fromHierarchy, Hierarchy& toHierarchy, const std::string fromLabel, const std::string toLabel, const std::string dataType) { + // add any necessary levels + for (int i = toHierarchy.GetNumLevels(); i < fromHierarchy.GetNumLevels(); i++) + toHierarchy.AddNewLevel(); - // add any necessary levels - for (int i = toHierarchy.GetNumLevels(); i < fromHierarchy.GetNumLevels(); i++) - toHierarchy.AddNewLevel(); - - for (int i = 0; i < fromHierarchy.GetNumLevels(); i++) { - RCP fromLevel = fromHierarchy.GetLevel(i); - RCP toLevel = toHierarchy.GetLevel(i); - - TEUCHOS_TEST_FOR_EXCEPTION(dataType != "RCP" && dataType != "RCP" - , Exceptions::InvalidArgument, - std::string("MueLu::Utils::CopyBetweenHierarchies: unknown data type(") + dataType + ")"); - if (fromLevel->IsAvailable(fromLabel)) { - if (dataType == "RCP" ) { - // Normally, we should only do - // toLevel->Set(toLabel,fromLevel->Get >(fromLabel)); - // The logic below is meant to handle a special case when we - // repartition a processor away, leaving behind a RCP on - // on the level instead of an RCP + for (int i = 0; i < fromHierarchy.GetNumLevels(); i++) { + RCP fromLevel = fromHierarchy.GetLevel(i); + RCP toLevel = toHierarchy.GetLevel(i); - auto tempOp = fromLevel->Get >(fromLabel); - auto tempMatrix = rcp_dynamic_cast(tempOp); - if(!tempMatrix.is_null()) toLevel->Set(toLabel,tempMatrix); - else toLevel->Set(toLabel,tempOp); - } - if (dataType == "RCP") { - toLevel->Set(toLabel,fromLevel->Get >(fromLabel)); - } + TEUCHOS_TEST_FOR_EXCEPTION(dataType != "RCP" && dataType != "RCP", Exceptions::InvalidArgument, + std::string("MueLu::Utils::CopyBetweenHierarchies: unknown data type(") + dataType + ")"); + if (fromLevel->IsAvailable(fromLabel)) { + if (dataType == "RCP") { + // Normally, we should only do + // toLevel->Set(toLabel,fromLevel->Get >(fromLabel)); + // The logic below is meant to handle a special case when we + // repartition a processor away, leaving behind a RCP on + // on the level instead of an RCP + + auto tempOp = fromLevel->Get>(fromLabel); + auto tempMatrix = rcp_dynamic_cast(tempOp); + if (!tempMatrix.is_null()) + toLevel->Set(toLabel, tempMatrix); + else + toLevel->Set(toLabel, tempOp); + } + if (dataType == "RCP") { + toLevel->Set(toLabel, fromLevel->Get>(fromLabel)); } } } +} - // Adds the following non-serializable data (A,P,R,Nullspace,Coordinates) from level-specific sublist nonSerialList, - // calling AddNewLevel as appropriate. - template - void HierarchyUtils::AddNonSerializableDataToHierarchy(HierarchyManager& HM, Hierarchy& H, const ParameterList& nonSerialList) { - typedef typename Xpetra::MultiVector::coordinateType, - LocalOrdinal, GlobalOrdinal, Node> realvaluedmultivector_type; +// Adds the following non-serializable data (A,P,R,Nullspace,Coordinates) from level-specific sublist nonSerialList, +// calling AddNewLevel as appropriate. +template +void HierarchyUtils::AddNonSerializableDataToHierarchy(HierarchyManager& HM, Hierarchy& H, const ParameterList& nonSerialList) { + typedef typename Xpetra::MultiVector::coordinateType, + LocalOrdinal, GlobalOrdinal, Node> + realvaluedmultivector_type; - for (ParameterList::ConstIterator nonSerialEntry = nonSerialList.begin(); nonSerialEntry != nonSerialList.end(); nonSerialEntry++) { - const std::string& levelName = nonSerialEntry->first; - // Check for match of the form "level X" where X is a positive integer - if (nonSerialList.isSublist(levelName) && levelName.find("level ") == 0 && levelName.size() > 6) { - int levelID = strtol(levelName.substr(6).c_str(), 0, 0); - if (levelID > 0) - { - // Do enough level adding so we can be sure to add the data to the right place - for (int i = H.GetNumLevels(); i <= levelID; i++) - H.AddNewLevel(); - } - RCP level = H.GetLevel(levelID); + for (ParameterList::ConstIterator nonSerialEntry = nonSerialList.begin(); nonSerialEntry != nonSerialList.end(); nonSerialEntry++) { + const std::string& levelName = nonSerialEntry->first; + // Check for match of the form "level X" where X is a positive integer + if (nonSerialList.isSublist(levelName) && levelName.find("level ") == 0 && levelName.size() > 6) { + int levelID = strtol(levelName.substr(6).c_str(), 0, 0); + if (levelID > 0) { + // Do enough level adding so we can be sure to add the data to the right place + for (int i = H.GetNumLevels(); i <= levelID; i++) + H.AddNewLevel(); + } + RCP level = H.GetLevel(levelID); - RCP M = Teuchos::rcp_dynamic_cast(HM.GetFactoryManager(levelID)); - TEUCHOS_TEST_FOR_EXCEPTION(M.is_null(), Exceptions::InvalidArgument, "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get FactoryManager"); + RCP M = Teuchos::rcp_dynamic_cast(HM.GetFactoryManager(levelID)); + TEUCHOS_TEST_FOR_EXCEPTION(M.is_null(), Exceptions::InvalidArgument, "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get FactoryManager"); - // Grab the level sublist & loop over parameters - const ParameterList& levelList = nonSerialList.sublist(levelName); - for (ParameterList::ConstIterator levelListEntry = levelList.begin(); levelListEntry != levelList.end(); levelListEntry++) { - const std::string& name = levelListEntry->first; - TEUCHOS_TEST_FOR_EXCEPTION(name != "A" && name != "P" && name != "R" && name != "K" && name != "M" && name != "Mdiag" && - name != "D0" && name != "M1" && name != "Ms" && name != "M0inv" && - name != "Pnodal" && name != "NodeMatrix" && name != "NodeAggMatrix" && - name != "Nullspace" && name != "Coordinates" && name != "pcoarsen: element to node map" && - name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && name != "Primal interface DOF map" && - !IsParamMuemexVariable(name), Exceptions::InvalidArgument, - std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: parameter list contains unknown data type(") + name + ")"); + // Grab the level sublist & loop over parameters + const ParameterList& levelList = nonSerialList.sublist(levelName); + for (ParameterList::ConstIterator levelListEntry = levelList.begin(); levelListEntry != levelList.end(); levelListEntry++) { + const std::string& name = levelListEntry->first; + TEUCHOS_TEST_FOR_EXCEPTION(name != "A" && name != "P" && name != "R" && name != "K" && name != "M" && name != "Mdiag" && + name != "D0" && name != "M1" && name != "Ms" && name != "M0inv" && + name != "Pnodal" && name != "NodeMatrix" && name != "NodeAggMatrix" && + name != "Nullspace" && name != "Coordinates" && name != "pcoarsen: element to node map" && + name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && name != "Primal interface DOF map" && + !IsParamMuemexVariable(name), + Exceptions::InvalidArgument, + std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: parameter list contains unknown data type(") + name + ")"); - // Get a valid communicator and lib - RCP > comm; - if (!level->GetComm().is_null()) - comm = level->GetComm(); - else if (level->IsAvailable("A")) { + // Get a valid communicator and lib + RCP> comm; + if (!level->GetComm().is_null()) + comm = level->GetComm(); + else if (level->IsAvailable("A")) { + RCP mat; + level->Get("A", mat); + comm = mat->getMap()->getComm(); + } else { + RCP level0 = H.GetLevel(0); + if (!level0->GetComm().is_null()) + comm = level0->GetComm(); + else { RCP mat; - level->Get("A", mat); + level0->Get("A", mat); comm = mat->getMap()->getComm(); - } else { - RCP level0 = H.GetLevel(0); - if (!level0->GetComm().is_null()) - comm = level0->GetComm(); - else { - RCP mat; - level0->Get("A", mat); - comm = mat->getMap()->getComm(); - } } - Xpetra::UnderlyingLib lib = level->lib(); + } + Xpetra::UnderlyingLib lib = level->lib(); + + if (name == "A") { + RCP mat; + if (levelListEntry->second.isType()) + // We might also want to read maps here. + mat = Xpetra::IO::Read(Teuchos::getValue(levelListEntry->second), lib, comm); + else + mat = Teuchos::getValue>(levelListEntry->second); + level->Set(name, mat, NoFactory::get()); + M->SetFactory(name, NoFactory::getRCP()); // TAW: not sure about this: be aware that this affects all levels + // However, A is accessible through NoFactory anyway, so it should + // be fine here. + } else if (name == "P" || name == "R" || name == "K" || name == "M") { + if (levelListEntry->second.isType>()) { + RCP mat; + mat = Teuchos::getValue>(levelListEntry->second); + + RCP fact = M->GetFactory(name); + level->AddKeepFlag(name, fact.get(), MueLu::UserData); + level->Set(name, mat, fact.get()); - if (name == "A") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, mat, NoFactory::get()); + } else { RCP mat; if (levelListEntry->second.isType()) // We might also want to read maps here. - mat = Xpetra::IO::Read(Teuchos::getValue(levelListEntry->second), lib, comm); + mat = Xpetra::IO::Read(Teuchos::getValue(levelListEntry->second), lib, comm); else - mat = Teuchos::getValue > (levelListEntry->second); - level->Set(name, mat, NoFactory::get()); - M->SetFactory(name, NoFactory::getRCP()); // TAW: not sure about this: be aware that this affects all levels - // However, A is accessible through NoFactory anyway, so it should - // be fine here. - } - else if(name == "P" || name == "R" || name == "K" || name == "M" ) { - if (levelListEntry->second.isType >()) { - RCP mat; - mat = Teuchos::getValue > (levelListEntry->second); - - RCP fact = M->GetFactory(name); - level->AddKeepFlag(name,fact.get(),MueLu::UserData); - level->Set(name, mat, fact.get()); + mat = Teuchos::getValue>(levelListEntry->second); - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, mat, NoFactory::get()); - } else { - RCP mat; - if (levelListEntry->second.isType()) - // We might also want to read maps here. - mat = Xpetra::IO::Read(Teuchos::getValue(levelListEntry->second), lib, comm); - else - mat = Teuchos::getValue > (levelListEntry->second); + RCP fact = M->GetFactory(name); + level->AddKeepFlag(name, fact.get(), MueLu::UserData); + level->Set(name, mat, fact.get()); - RCP fact = M->GetFactory(name); - level->AddKeepFlag(name,fact.get(),MueLu::UserData); - level->Set(name, mat, fact.get()); - - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, mat, NoFactory::get()); - } - } - else if (name == "D0" || name == "M1" || name == "Ms" || name == "M0inv" || name == "Pnodal" || name == "NodeMatrix" || name == "NodeAggMatrix") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - if (levelListEntry->second.isType >()) - level->Set(name, Teuchos::getValue > (levelListEntry->second), NoFactory::get()); - else - level->Set(name, Teuchos::getValue > (levelListEntry->second), NoFactory::get()); - } - else if (name == "Mdiag") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > (levelListEntry->second), NoFactory::get()); - } - else if (name == "Nullspace") - { - RCP vec; - if (levelListEntry->second.isType()) { - TEUCHOS_ASSERT(level->IsAvailable("A")); - RCP mat; - level->Get("A", mat); - auto map = mat->getMap(); - vec = Xpetra::IO::ReadMultiVector(Teuchos::getValue(levelListEntry->second), map); - } else - vec = Teuchos::getValue > (levelListEntry->second); level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); - level->Set(name, vec, NoFactory::get()); - //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here - // One should do this only in very special cases + level->Set(name, mat, NoFactory::get()); } - else if(name == "Coordinates") //Scalar of Coordinates MV is always double - { - RCP vec; - if (levelListEntry->second.isType()) { - TEUCHOS_ASSERT(level->IsAvailable("A")); - RCP mat; - level->Get("A", mat); - size_t blkSize = mat->GetFixedBlockSize(); - RCP nodeMap = mat->getRowMap(); - if (blkSize > 1) { - // Create a nodal map, as coordinates have not been expanded to a DOF map yet. - RCP dofMap = mat->getRowMap(); - GO indexBase = dofMap->getIndexBase(); - size_t numLocalDOFs = dofMap->getLocalNumElements(); - TEUCHOS_TEST_FOR_EXCEPTION(numLocalDOFs % blkSize, Exceptions::RuntimeError, - "HierarchyUtils: block size (" << blkSize << ") is incompatible with the number of local dofs in a row map (" << numLocalDOFs); - ArrayView GIDs = dofMap->getLocalElementList(); + } else if (name == "D0" || name == "M1" || name == "Ms" || name == "M0inv" || name == "Pnodal" || name == "NodeMatrix" || name == "NodeAggMatrix") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + if (levelListEntry->second.isType>()) + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + else + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + } else if (name == "Mdiag") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + } else if (name == "Nullspace") { + RCP vec; + if (levelListEntry->second.isType()) { + TEUCHOS_ASSERT(level->IsAvailable("A")); + RCP mat; + level->Get("A", mat); + auto map = mat->getMap(); + vec = Xpetra::IO::ReadMultiVector(Teuchos::getValue(levelListEntry->second), map); + } else + vec = Teuchos::getValue>(levelListEntry->second); + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, vec, NoFactory::get()); + //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here + // One should do this only in very special cases + } else if (name == "Coordinates") //Scalar of Coordinates MV is always double + { + RCP vec; + if (levelListEntry->second.isType()) { + TEUCHOS_ASSERT(level->IsAvailable("A")); + RCP mat; + level->Get("A", mat); + size_t blkSize = mat->GetFixedBlockSize(); + RCP nodeMap = mat->getRowMap(); + if (blkSize > 1) { + // Create a nodal map, as coordinates have not been expanded to a DOF map yet. + RCP dofMap = mat->getRowMap(); + GO indexBase = dofMap->getIndexBase(); + size_t numLocalDOFs = dofMap->getLocalNumElements(); + TEUCHOS_TEST_FOR_EXCEPTION(numLocalDOFs % blkSize, Exceptions::RuntimeError, + "HierarchyUtils: block size (" << blkSize << ") is incompatible with the number of local dofs in a row map (" << numLocalDOFs); + ArrayView GIDs = dofMap->getLocalElementList(); - Array nodeGIDs(numLocalDOFs/blkSize); - for (size_t i = 0; i < numLocalDOFs; i += blkSize) - nodeGIDs[i/blkSize] = (GIDs[i] - indexBase)/blkSize + indexBase; + Array nodeGIDs(numLocalDOFs / blkSize); + for (size_t i = 0; i < numLocalDOFs; i += blkSize) + nodeGIDs[i / blkSize] = (GIDs[i] - indexBase) / blkSize + indexBase; - Xpetra::global_size_t INVALID = Teuchos::OrdinalTraits::invalid(); - nodeMap = MapFactory::Build(dofMap->lib(), INVALID, nodeGIDs(), indexBase, dofMap->getComm()); - } - vec = Xpetra::IO::coordinateType,LocalOrdinal,GlobalOrdinal,Node>::ReadMultiVector(Teuchos::getValue(levelListEntry->second), nodeMap); - } else - vec = Teuchos::getValue > (levelListEntry->second); - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, vec, NoFactory::get()); - //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here - } - else if(name == "Node Comm") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - } - else if(name == "DualNodeID2PrimalNodeID") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); - } - else if(name == "Primal interface DOF map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); - } + Xpetra::global_size_t INVALID = Teuchos::OrdinalTraits::invalid(); + nodeMap = MapFactory::Build(dofMap->lib(), INVALID, nodeGIDs(), indexBase, dofMap->getComm()); + } + vec = Xpetra::IO::coordinateType, LocalOrdinal, GlobalOrdinal, Node>::ReadMultiVector(Teuchos::getValue(levelListEntry->second), nodeMap); + } else + vec = Teuchos::getValue>(levelListEntry->second); + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, vec, NoFactory::get()); + //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here + } else if (name == "Node Comm") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); + } else if (name == "DualNodeID2PrimalNodeID") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); + } else if (name == "Primal interface DOF map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + } #ifdef HAVE_MUELU_INTREPID2 - else if (name == "pcoarsen: element to node map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - } + else if (name == "pcoarsen: element to node map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); + } #endif - else + else #ifdef HAVE_MUELU_MATLAB - { - //Custom variable for Muemex - size_t typeNameStart = name.find_first_not_of(' '); - size_t typeNameEnd = name.find(' ', typeNameStart); - std::string typeName = name.substr(typeNameStart, typeNameEnd - typeNameStart); - std::transform(typeName.begin(), typeName.end(), typeName.begin(), ::tolower); - level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); - if(typeName == "matrix") - level->Set(name, Teuchos::getValue >(levelListEntry->second), NoFactory::get()); - else if(typeName == "multivector") - level->Set(name, Teuchos::getValue >(levelListEntry->second), NoFactory::get()); - else if(typeName == "map") - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - else if(typeName == "ordinalvector") - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - else if(typeName == "scalar") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - else if(typeName == "double") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - else if(typeName == "complex") - level->Set(name, Teuchos::getValue >(levelListEntry->second), NoFactory::get()); - else if(typeName == "int") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - else if(typeName == "string") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - } + { + //Custom variable for Muemex + size_t typeNameStart = name.find_first_not_of(' '); + size_t typeNameEnd = name.find(' ', typeNameStart); + std::string typeName = name.substr(typeNameStart, typeNameEnd - typeNameStart); + std::transform(typeName.begin(), typeName.end(), typeName.begin(), ::tolower); + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + if (typeName == "matrix") + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + else if (typeName == "multivector") + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + else if (typeName == "map") + level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); + else if (typeName == "ordinalvector") + level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); + else if (typeName == "scalar") + level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); + else if (typeName == "double") + level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); + else if (typeName == "complex") + level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); + else if (typeName == "int") + level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); + else if (typeName == "string") + level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); + } #else - { - throw std::runtime_error("Invalid non-serializable data on list"); - } -#endif + { + throw std::runtime_error("Invalid non-serializable data on list"); } - } else if (nonSerialList.isSublist(levelName) && levelName.find("user data") != std::string::npos) { - // So far only put data on level 0 - int levelID = 0; - RCP level = H.GetLevel(levelID); +#endif + } + } else if (nonSerialList.isSublist(levelName) && levelName.find("user data") != std::string::npos) { + // So far only put data on level 0 + int levelID = 0; + RCP level = H.GetLevel(levelID); - RCP M = Teuchos::rcp_dynamic_cast(HM.GetFactoryManager(levelID)); - TEUCHOS_TEST_FOR_EXCEPTION(M.is_null(), Exceptions::InvalidArgument, "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get FactoryManager"); + RCP M = Teuchos::rcp_dynamic_cast(HM.GetFactoryManager(levelID)); + TEUCHOS_TEST_FOR_EXCEPTION(M.is_null(), Exceptions::InvalidArgument, "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get FactoryManager"); - // Grab the user data sublist & loop over parameters - const ParameterList& userList = nonSerialList.sublist(levelName); - for (ParameterList::ConstIterator userListEntry = userList.begin(); userListEntry != userList.end(); userListEntry++) { - const std::string& name = userListEntry->first; - TEUCHOS_TEST_FOR_EXCEPTION(name != "P" && name != "R" && name != "K" && name != "M" && name != "Mdiag" && - name != "D0" && name != "M1" && name != "Ms" && name != "M0inv" && - name != "Nullspace" && name != "Coordinates" && name != "pcoarsen: element to node map" && - name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && name != "Primal interface DOF map" && - name != "output stream" && - !IsParamValidVariable(name), Exceptions::InvalidArgument, - std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: user data parameter list contains unknown data type (") + name + ")"); - if( name == "P" || name == "R" || name == "K" || name == "M" || name == "D0" || name == "M1" || name == "Ms" || name == "M0inv" ) { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > (userListEntry->second), NoFactory::get()); - } else if (name == "Mdiag") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - } else if (name == "Nullspace") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here - // One should do this only in very special cases - } else if(name == "Coordinates") {//Scalar of Coordinates MV is always double - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - } - else if(name == "Node Comm") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - } - else if(name == "DualNodeID2PrimalNodeID") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); - } - else if(name == "Primal interface DOF map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); - } + // Grab the user data sublist & loop over parameters + const ParameterList& userList = nonSerialList.sublist(levelName); + for (ParameterList::ConstIterator userListEntry = userList.begin(); userListEntry != userList.end(); userListEntry++) { + const std::string& name = userListEntry->first; + TEUCHOS_TEST_FOR_EXCEPTION(name != "P" && name != "R" && name != "K" && name != "M" && name != "Mdiag" && + name != "D0" && name != "M1" && name != "Ms" && name != "M0inv" && + name != "Nullspace" && name != "Coordinates" && name != "pcoarsen: element to node map" && + name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && name != "Primal interface DOF map" && + name != "output stream" && + !IsParamValidVariable(name), + Exceptions::InvalidArgument, + std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: user data parameter list contains unknown data type (") + name + ")"); + if (name == "P" || name == "R" || name == "K" || name == "M" || name == "D0" || name == "M1" || name == "Ms" || name == "M0inv") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + } else if (name == "Mdiag") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + } else if (name == "Nullspace") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here + // One should do this only in very special cases + } else if (name == "Coordinates") { //Scalar of Coordinates MV is always double + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + } else if (name == "Node Comm") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); + } else if (name == "DualNodeID2PrimalNodeID") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); + } else if (name == "Primal interface DOF map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + } #ifdef HAVE_MUELU_INTREPID2 - else if (name == "pcoarsen: element to node map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - } + else if (name == "pcoarsen: element to node map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); + } #endif - else if (name == "output stream") - { - H.SetMueLuOStream(Teuchos::getValue >(userListEntry->second)); - } - else { - //Custom variable - size_t typeNameStart = name.find_first_not_of(' '); - size_t typeNameEnd = name.find(' ', typeNameStart); - std::string typeName = name.substr(typeNameStart, typeNameEnd - typeNameStart); - size_t varNameStart = name.find_first_not_of(' ', typeNameEnd); - std::string varName = name.substr(varNameStart, name.size()); - std::transform(typeName.begin(), typeName.end(), typeName.begin(), ::tolower); - level->AddKeepFlag(varName, NoFactory::get(), MueLu::UserData); - if(typeName == "matrix") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "multivector") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "vector") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "map") - level->Set(varName, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - else if(typeName == "ordinalvector") - level->Set(varName, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - else if(typeName == "scalar") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "double") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "complex") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "int") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "string") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "array") - level->Set(varName, Teuchos::getValue > (userListEntry->second), NoFactory::get()); - else if(typeName == "array") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "arrayrcp") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "arrayrcp") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else - throw std::runtime_error("Invalid non-serializable data on list"); - } + else if (name == "output stream") { + H.SetMueLuOStream(Teuchos::getValue>(userListEntry->second)); + } else { + //Custom variable + size_t typeNameStart = name.find_first_not_of(' '); + size_t typeNameEnd = name.find(' ', typeNameStart); + std::string typeName = name.substr(typeNameStart, typeNameEnd - typeNameStart); + size_t varNameStart = name.find_first_not_of(' ', typeNameEnd); + std::string varName = name.substr(varNameStart, name.size()); + std::transform(typeName.begin(), typeName.end(), typeName.begin(), ::tolower); + level->AddKeepFlag(varName, NoFactory::get(), MueLu::UserData); + if (typeName == "matrix") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "multivector") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "vector") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "map") + level->Set(varName, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); + else if (typeName == "ordinalvector") + level->Set(varName, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); + else if (typeName == "scalar") + level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); + else if (typeName == "double") + level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); + else if (typeName == "complex") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "int") + level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); + else if (typeName == "string") + level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); + else if (typeName == "array") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "array") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "arrayrcp") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else if (typeName == "arrayrcp") + level->Set(varName, Teuchos::getValue>(userListEntry->second), NoFactory::get()); + else + throw std::runtime_error("Invalid non-serializable data on list"); } - // level->print(std::cout, MueLu::Debug); } + // level->print(std::cout, MueLu::Debug); } } -} // namespace MueLu +} +} // namespace MueLu #define MUELU_HIERARCHY_UTILS_SHORT -#endif // MUELU_HIERARCHYHELPERS_DEF_HPP +#endif // MUELU_HIERARCHYHELPERS_DEF_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp b/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp index 4c345a4e3c49..50c36f0beb8d 100644 --- a/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp @@ -63,7 +63,7 @@ #include "MueLu_Types.hpp" #include "MueLu_FactoryBase_fwd.hpp" -#include "MueLu_FactoryManager.hpp" // no fwd declaration because constructor of FactoryManager is used as a default parameter of Setup() +#include "MueLu_FactoryManager.hpp" // no fwd declaration because constructor of FactoryManager is used as a default parameter of Setup() #include "MueLu_KeepType.hpp" #include "MueLu_Level_fwd.hpp" #include "MueLu_MasterList.hpp" @@ -76,13 +76,13 @@ namespace MueLu { - enum class ConvergenceStatus { - Converged, - Unconverged, - Undefined - }; +enum class ConvergenceStatus { + Converged, + Unconverged, + Undefined +}; - /*! +/*! @class Hierarchy @brief Provides methods to build a multigrid hierarchy and apply multigrid cycles. @@ -92,111 +92,117 @@ namespace MueLu { restrictors, and coarse level discretizations. Additionally, this class contains an apply method that supports V and W cycles. */ - template - class Hierarchy : public BaseClass { +template +class Hierarchy : public BaseClass { #undef MUELU_HIERARCHY_SHORT #include "MueLu_UseShortNames.hpp" - typedef Teuchos::ScalarTraits STS; - typedef typename STS::magnitudeType MagnitudeType; - - //! Data struct for defining stopping criteria of multigrid iteration - struct ConvData { - ConvData() : maxIts_(1), tol_(-STS::magnitude(STS::one())) { } - ConvData(LO maxIts) : maxIts_(maxIts), tol_(-STS::magnitude(STS::one())) { } - ConvData(MagnitudeType tol) : maxIts_(10000), tol_(tol) { } - ConvData(std::pair p) : maxIts_(p.first), tol_(p.second) { } - - LO maxIts_; - MagnitudeType tol_; - }; - - public: - - //! @name Constructors/Destructors - //@{ + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType MagnitudeType; + + //! Data struct for defining stopping criteria of multigrid iteration + struct ConvData { + ConvData() + : maxIts_(1) + , tol_(-STS::magnitude(STS::one())) {} + ConvData(LO maxIts) + : maxIts_(maxIts) + , tol_(-STS::magnitude(STS::one())) {} + ConvData(MagnitudeType tol) + : maxIts_(10000) + , tol_(tol) {} + ConvData(std::pair p) + : maxIts_(p.first) + , tol_(p.second) {} + + LO maxIts_; + MagnitudeType tol_; + }; - //! Default constructor. - Hierarchy(); - //! Constructor that labels the hierarchy. - Hierarchy(const std::string& label); + public: + //! @name Constructors/Destructors + //@{ - //! Constructor - Hierarchy(const RCP & A); + //! Default constructor. + Hierarchy(); + //! Constructor that labels the hierarchy. + Hierarchy(const std::string& label); - //! Constructor - Hierarchy(const RCP & A, const std::string& label); + //! Constructor + Hierarchy(const RCP& A); - //! Destructor. - virtual ~Hierarchy() { } + //! Constructor + Hierarchy(const RCP& A, const std::string& label); - //@} + //! Destructor. + virtual ~Hierarchy() {} - //! @name Set/Get Methods. - //@{ + //@} - //! - static CycleType GetDefaultCycle() { return MasterList::getDefault("cycle type") == "V" ? VCYCLE : WCYCLE; } - static int GetDefaultCycleStartLevel() { return MasterList::getDefault("W cycle start level"); } - static bool GetDefaultImplicitTranspose() { return MasterList::getDefault("transpose: use implicit"); } - static bool GetDefaultFuseProlongationAndUpdate() { return MasterList::getDefault("fuse prolongation and update"); } - static Xpetra::global_size_t GetDefaultMaxCoarseSize() { return MasterList::getDefault("coarse: max size"); } - static int GetDefaultMaxLevels() { return MasterList::getDefault("max levels"); } - static bool GetDefaultPRrebalance() { return MasterList::getDefault("repartition: rebalance P and R"); } + //! @name Set/Get Methods. + //@{ - Xpetra::global_size_t GetMaxCoarseSize() const { return maxCoarseSize_; } - bool GetImplicitTranspose() const { return implicitTranspose_; } - bool GetFuseProlongationAndUpdate() const { return fuseProlongationAndUpdate_; } + //! + static CycleType GetDefaultCycle() { return MasterList::getDefault("cycle type") == "V" ? VCYCLE : WCYCLE; } + static int GetDefaultCycleStartLevel() { return MasterList::getDefault("W cycle start level"); } + static bool GetDefaultImplicitTranspose() { return MasterList::getDefault("transpose: use implicit"); } + static bool GetDefaultFuseProlongationAndUpdate() { return MasterList::getDefault("fuse prolongation and update"); } + static Xpetra::global_size_t GetDefaultMaxCoarseSize() { return MasterList::getDefault("coarse: max size"); } + static int GetDefaultMaxLevels() { return MasterList::getDefault("max levels"); } + static bool GetDefaultPRrebalance() { return MasterList::getDefault("repartition: rebalance P and R"); } - void SetMaxCoarseSize(Xpetra::global_size_t maxCoarseSize) { maxCoarseSize_ = maxCoarseSize; } - void SetPRrebalance(bool doPRrebalance) { doPRrebalance_ = doPRrebalance; } - void SetPRViaCopyrebalance(bool doPRViaCopyrebalance) { doPRViaCopyrebalance_ = doPRViaCopyrebalance; } - void SetImplicitTranspose(const bool& implicit) { implicitTranspose_ = implicit; } - void SetFuseProlongationAndUpdate(const bool& fuse) { fuseProlongationAndUpdate_ = fuse; } + Xpetra::global_size_t GetMaxCoarseSize() const { return maxCoarseSize_; } + bool GetImplicitTranspose() const { return implicitTranspose_; } + bool GetFuseProlongationAndUpdate() const { return fuseProlongationAndUpdate_; } - //@} + void SetMaxCoarseSize(Xpetra::global_size_t maxCoarseSize) { maxCoarseSize_ = maxCoarseSize; } + void SetPRrebalance(bool doPRrebalance) { doPRrebalance_ = doPRrebalance; } + void SetPRViaCopyrebalance(bool doPRViaCopyrebalance) { doPRViaCopyrebalance_ = doPRViaCopyrebalance; } + void SetImplicitTranspose(const bool& implicit) { implicitTranspose_ = implicit; } + void SetFuseProlongationAndUpdate(const bool& fuse) { fuseProlongationAndUpdate_ = fuse; } - //! + //@} - template - friend class Hierarchy; + //! - private: - int LastLevelID() const { return Levels_.size() - 1; } - void DumpCurrentGraph(int level) const; + template + friend class Hierarchy; - public: + private: + int LastLevelID() const { return Levels_.size() - 1; } + void DumpCurrentGraph(int level) const; - //! Add a level at the end of the hierarchy - void AddLevel(const RCP & level); + public: + //! Add a level at the end of the hierarchy + void AddLevel(const RCP& level); - //! Add a new level at the end of the hierarchy - void AddNewLevel(); + //! Add a new level at the end of the hierarchy + void AddNewLevel(); - //! Retrieve a certain level from hierarchy. - RCP & GetLevel(const int levelID = 0); + //! Retrieve a certain level from hierarchy. + RCP& GetLevel(const int levelID = 0); - int GetNumLevels() const; - int GetGlobalNumLevels() const; + int GetNumLevels() const; + int GetGlobalNumLevels() const; - MagnitudeType GetRate() const { return rate_; } + MagnitudeType GetRate() const { return rate_; } - // This function is global - double GetOperatorComplexity() const; + // This function is global + double GetOperatorComplexity() const; - // This function is global - double GetSmootherComplexity() const; + // This function is global + double GetSmootherComplexity() const; - //! Helper function - void CheckLevel(Level& level, int levelID); + //! Helper function + void CheckLevel(Level& level, int levelID); - void SetMatvecParams(RCP matvecParams); + void SetMatvecParams(RCP matvecParams); - //! Multi-level setup phase: build a new level of the hierarchy. - /*! This method is aimed to be used in a loop building the hierarchy level by level. See Hierarchy::Setup(manager, startLevel, numDesiredLevels) for an example of usage. + //! Multi-level setup phase: build a new level of the hierarchy. + /*! This method is aimed to be used in a loop building the hierarchy level by level. See Hierarchy::Setup(manager, startLevel, numDesiredLevels) for an example of usage. * * @param coarseLevelID ID of the level to be built. * @param fineLevelManager defines how to build missing data of the fineLevel (example: aggregates) @@ -233,30 +239,30 @@ namespace MueLu { - have been allocated - requests already posted. */ - bool Setup(int coarseLevelID, const RCP fineLevelManager /* = Teuchos::null */, const RCP coarseLevelManager, - const RCP nextLevelManager = Teuchos::null); + bool Setup(int coarseLevelID, const RCP fineLevelManager /* = Teuchos::null */, const RCP coarseLevelManager, + const RCP nextLevelManager = Teuchos::null); - //! - void Setup(const FactoryManagerBase& manager = FactoryManager(), int startLevel = 0, int numDesiredLevels = GetDefaultMaxLevels()); + //! + void Setup(const FactoryManagerBase& manager = FactoryManager(), int startLevel = 0, int numDesiredLevels = GetDefaultMaxLevels()); - void SetupRe(); + void SetupRe(); - //! Clear impermanent data from previous setup - void Clear(int startLevel = 0); - void ExpertClear(); + //! Clear impermanent data from previous setup + void Clear(int startLevel = 0); + void ExpertClear(); - //! Returns multigrid cycle type (supports VCYCLE and WCYCLE) - CycleType GetCycle() const { return Cycle_; } + //! Returns multigrid cycle type (supports VCYCLE and WCYCLE) + CycleType GetCycle() const { return Cycle_; } - //! Supports VCYCLE and WCYCLE types. - void SetCycle(CycleType Cycle) { Cycle_ = Cycle; } + //! Supports VCYCLE and WCYCLE types. + void SetCycle(CycleType Cycle) { Cycle_ = Cycle; } - void SetCycleStartLevel(int cycleStart) { WCycleStartLevel_ = cycleStart; } + void SetCycleStartLevel(int cycleStart) { WCycleStartLevel_ = cycleStart; } - //! Specify damping factor alpha such that x = x + alpha*P*c, where c is the coarse grid correction. - void SetProlongatorScalingFactor(double scalingFactor) { scalingFactor_ = scalingFactor; } + //! Specify damping factor alpha such that x = x + alpha*P*c, where c is the coarse grid correction. + void SetProlongatorScalingFactor(double scalingFactor) { scalingFactor_ = scalingFactor; } - /*! + /*! @brief Apply the multigrid preconditioner. In theory, more general cycle types than just V- and W-cycles are possible. However, @@ -268,10 +274,10 @@ namespace MueLu { @param InitialGuessIsZero Indicates whether the initial guess is zero @param startLevel index of starting level to build multigrid hierarchy (default = 0) */ - ConvergenceStatus Iterate(const MultiVector& B, MultiVector& X, ConvData conv = ConvData(), - bool InitialGuessIsZero = false, LO startLevel = 0); + ConvergenceStatus Iterate(const MultiVector& B, MultiVector& X, ConvData conv = ConvData(), + bool InitialGuessIsZero = false, LO startLevel = 0); - /*! + /*! @brief Print matrices in the multigrid hierarchy to file. @param[in] start start level @@ -280,168 +286,167 @@ namespace MueLu { Default behavior is to print system and transfer matrices from the entire hierarchy. Files are named "A_0.m", "P_1.m", "R_1.m", etc, and are in matrix market coordinate format. */ - void Write(const LO &start=-1, const LO &end=-1, const std::string &suffix=""); + void Write(const LO& start = -1, const LO& end = -1, const std::string& suffix = ""); - //@} + //@} - //! @name Permanent storage - //@{ + //! @name Permanent storage + //@{ - //! Call Level::Keep(ename, factory) for each level of the Hierarchy. - void Keep(const std::string & ename, const FactoryBase* factory = NoFactory::get()); + //! Call Level::Keep(ename, factory) for each level of the Hierarchy. + void Keep(const std::string& ename, const FactoryBase* factory = NoFactory::get()); - //! Call Level::Delete(ename, factory) for each level of the Hierarchy. - void Delete(const std::string& ename, const FactoryBase* factory = NoFactory::get()); + //! Call Level::Delete(ename, factory) for each level of the Hierarchy. + void Delete(const std::string& ename, const FactoryBase* factory = NoFactory::get()); - //! Call Level::AddKeepFlag for each level of the Hierarchy. - void AddKeepFlag(const std::string & ename, const FactoryBase* factory = NoFactory::get(), KeepType keep = MueLu::Keep); + //! Call Level::AddKeepFlag for each level of the Hierarchy. + void AddKeepFlag(const std::string& ename, const FactoryBase* factory = NoFactory::get(), KeepType keep = MueLu::Keep); - //! Call Level::RemoveKeepFlag for each level of the Hierarchy - void RemoveKeepFlag(const std::string & ename, const FactoryBase* factory, KeepType keep = MueLu::All); + //! Call Level::RemoveKeepFlag for each level of the Hierarchy + void RemoveKeepFlag(const std::string& ename, const FactoryBase* factory, KeepType keep = MueLu::All); - //@} + //@} - //! @name Overridden from Teuchos::Describable - //@{ + //! @name Overridden from Teuchos::Describable + //@{ - //! Return a simple one-line description of this object. - std::string description() const; + //! Return a simple one-line description of this object. + std::string description() const; - /*! @brief Print the Hierarchy with some verbosity level to a FancyOStream object. + /*! @brief Print the Hierarchy with some verbosity level to a FancyOStream object. @param[in] out The Teuchos::FancyOstream. @param[in] verbLevel Controls amount of output. */ - void describe(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; - void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_HIGH) const; + void describe(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; + void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_HIGH) const; - //! Hierarchy::print is local hierarchy function, thus the statistics can be different from global ones - void print(std::ostream& out = std::cout, const VerbLevel verbLevel = (MueLu::Parameters | MueLu::Statistics0)) const; + //! Hierarchy::print is local hierarchy function, thus the statistics can be different from global ones + void print(std::ostream& out = std::cout, const VerbLevel verbLevel = (MueLu::Parameters | MueLu::Statistics0)) const; - /*! Indicate whether the multigrid method is a preconditioner or a solver. + /*! Indicate whether the multigrid method is a preconditioner or a solver. This is used in conjunction with the verbosity level to determine whether the residuals can be printed. */ - void IsPreconditioner(const bool flag); + void IsPreconditioner(const bool flag); - //@} + //@} - void EnableGraphDumping(const std::string& filename, int levelID = 1) { - isDumpingEnabled_ = true; - dumpLevel_ = levelID; - dumpFile_ = filename; - } + void EnableGraphDumping(const std::string& filename, int levelID = 1) { + isDumpingEnabled_ = true; + dumpLevel_ = levelID; + dumpFile_ = filename; + } - void setlib(Xpetra::UnderlyingLib inlib) { lib_ = inlib; } - Xpetra::UnderlyingLib lib() { return lib_; } + void setlib(Xpetra::UnderlyingLib inlib) { lib_ = inlib; } + Xpetra::UnderlyingLib lib() { return lib_; } - //! force recreation of cached description_ next time description() is called: - void ResetDescription() { - description_ = ""; - } + //! force recreation of cached description_ next time description() is called: + void ResetDescription() { + description_ = ""; + } - void AllocateLevelMultiVectors(int numvecs, bool forceMapCheck=false); - void DeleteLevelMultiVectors(); + void AllocateLevelMultiVectors(int numvecs, bool forceMapCheck = false); + void DeleteLevelMultiVectors(); - protected: - const RCP& GetLevelManager(const int levelID) const { - return levelManagers_[levelID]; - } + protected: + const RCP& GetLevelManager(const int levelID) const { + return levelManagers_[levelID]; + } - private: - //! Copy constructor is not implemented. - Hierarchy(const Hierarchy &h); + private: + //! Copy constructor is not implemented. + Hierarchy(const Hierarchy& h); - //! Decide if the residual needs to be computed - bool IsCalculationOfResidualRequired(const LO startLevel, const ConvData& conv) const; + //! Decide if the residual needs to be computed + bool IsCalculationOfResidualRequired(const LO startLevel, const ConvData& conv) const; - /*! + /*! \brief Decide if the multigrid iteration is converged We judge convergence by comparing the current \c residualNorm to the user given \c convergenceTolerance and then return the appropriate \c ConvergenceStatus */ - ConvergenceStatus IsConverged(const Teuchos::Array& residualNorm, - const MagnitudeType convergenceTolerance) const; + ConvergenceStatus IsConverged(const Teuchos::Array& residualNorm, + const MagnitudeType convergenceTolerance) const; - //! Print \c residualNorm for this \c iteration to the screen - void PrintResidualHistory(const LO iteration, - const Teuchos::Array& residualNorm) const; + //! Print \c residualNorm for this \c iteration to the screen + void PrintResidualHistory(const LO iteration, + const Teuchos::Array& residualNorm) const; - //! Compute the residual norm and print it depending on the verbosity level - ConvergenceStatus ComputeResidualAndPrintHistory(const Operator& A, const MultiVector& X, - const MultiVector& B, const LO iteration, - const LO startLevel, const ConvData& conv, MagnitudeType& previousResidualNorm); + //! Compute the residual norm and print it depending on the verbosity level + ConvergenceStatus ComputeResidualAndPrintHistory(const Operator& A, const MultiVector& X, + const MultiVector& B, const LO iteration, + const LO startLevel, const ConvData& conv, MagnitudeType& previousResidualNorm); - //! Container for Level objects - Array > Levels_; + //! Container for Level objects + Array > Levels_; - //! We replace coordinates GIDs to make them consistent with matrix GIDs, - //! even if user does not do that. Ideally, though, we should completely - //! remove any notion of coordinate GIDs, and deal only with LIDs, assuming - //! that they are consistent with matrix block IDs - void ReplaceCoordinateMap(Level& level); + //! We replace coordinates GIDs to make them consistent with matrix GIDs, + //! even if user does not do that. Ideally, though, we should completely + //! remove any notion of coordinate GIDs, and deal only with LIDs, assuming + //! that they are consistent with matrix block IDs + void ReplaceCoordinateMap(Level& level); - //! Minimum size of a matrix on any level. If we fall below that, we stop - //! the coarsening - Xpetra::global_size_t maxCoarseSize_; + //! Minimum size of a matrix on any level. If we fall below that, we stop + //! the coarsening + Xpetra::global_size_t maxCoarseSize_; - //! Potential speed up of the setup by skipping R construction, and using - //! transpose matrix-matrix product for RAP - bool implicitTranspose_; + //! Potential speed up of the setup by skipping R construction, and using + //! transpose matrix-matrix product for RAP + bool implicitTranspose_; - //! Potential speed up of the solve by fusing prolongation and update steps. - //! This can lead to more iterations to round-off error accumulation. - bool fuseProlongationAndUpdate_; + //! Potential speed up of the solve by fusing prolongation and update steps. + //! This can lead to more iterations to round-off error accumulation. + bool fuseProlongationAndUpdate_; - //! Potential speed up of the setup by skipping rebalancing of P and R, and - //! doing extra import during solve - bool doPRrebalance_; - bool doPRViaCopyrebalance_; // fully explicit, needed for CombinePFactory + //! Potential speed up of the setup by skipping rebalancing of P and R, and + //! doing extra import during solve + bool doPRrebalance_; + bool doPRViaCopyrebalance_; // fully explicit, needed for CombinePFactory - //! Hierarchy may be used in a standalone mode, or as a preconditioner - bool isPreconditioner_; + //! Hierarchy may be used in a standalone mode, or as a preconditioner + bool isPreconditioner_; - //! V- or W-cycle - CycleType Cycle_; + //! V- or W-cycle + CycleType Cycle_; - //! Level at which to start W-cycle - int WCycleStartLevel_; + //! Level at which to start W-cycle + int WCycleStartLevel_; - //! Scaling factor to be applied to coarse grid correction. - double scalingFactor_; + //! Scaling factor to be applied to coarse grid correction. + double scalingFactor_; - //! Epetra/Tpetra mode - Xpetra::UnderlyingLib lib_; + //! Epetra/Tpetra mode + Xpetra::UnderlyingLib lib_; - //! cache description to avoid recreating in each call to description() - use ResetDescription() to force recreation in Setup, SetupRe, etc. - mutable std::string description_ = ""; // mutable so that we can lazily initialize in description(), which is declared const + //! cache description to avoid recreating in each call to description() - use ResetDescription() to force recreation in Setup, SetupRe, etc. + mutable std::string description_ = ""; // mutable so that we can lazily initialize in description(), which is declared const - /*! + /*! @brief Graph dumping If enabled, we dump the graph on a specified level into a specified file */ - bool isDumpingEnabled_; - // -1 = dump all levels, -2 = dump nothing - int dumpLevel_; - std::string dumpFile_; - - //! Convergece rate - MagnitudeType rate_; + bool isDumpingEnabled_; + // -1 = dump all levels, -2 = dump nothing + int dumpLevel_; + std::string dumpFile_; - //! Level managers used during the Setup - Array > levelManagers_; + //! Convergece rate + MagnitudeType rate_; - //! Caching (Multi)Vectors used in Hierarchy::Iterate() - int sizeOfAllocatedLevelMultiVectors_; - Array > residual_, coarseRhs_, coarseX_, coarseImport_, coarseExport_, correction_; + //! Level managers used during the Setup + Array > levelManagers_; + //! Caching (Multi)Vectors used in Hierarchy::Iterate() + int sizeOfAllocatedLevelMultiVectors_; + Array > residual_, coarseRhs_, coarseX_, coarseImport_, coarseExport_, correction_; - }; //class Hierarchy +}; //class Hierarchy -} //namespace MueLu +} //namespace MueLu #define MUELU_HIERARCHY_SHORT -#endif // MUELU_HIERARCHY_DECL_HPP +#endif // MUELU_HIERARCHY_DECL_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp b/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp index 13a40368f1c4..a0e250d889f6 100644 --- a/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp @@ -72,895 +72,899 @@ #include "Teuchos_TimeMonitor.hpp" - - namespace MueLu { - template - Hierarchy::Hierarchy() - : maxCoarseSize_(GetDefaultMaxCoarseSize()), implicitTranspose_(GetDefaultImplicitTranspose()), - fuseProlongationAndUpdate_(GetDefaultFuseProlongationAndUpdate()), - doPRrebalance_(GetDefaultPRrebalance()), doPRViaCopyrebalance_(false), isPreconditioner_(true), Cycle_(GetDefaultCycle()), WCycleStartLevel_(0), - scalingFactor_(Teuchos::ScalarTraits::one()), lib_(Xpetra::UseTpetra), isDumpingEnabled_(false), dumpLevel_(-2), rate_(-1), - sizeOfAllocatedLevelMultiVectors_(0) - { - AddLevel(rcp(new Level)); - } - - template - Hierarchy::Hierarchy(const std::string& label) - : Hierarchy() - { - setObjectLabel(label); - Levels_[0]->setObjectLabel(label); - } - - template - Hierarchy::Hierarchy(const RCP& A) - : maxCoarseSize_(GetDefaultMaxCoarseSize()), implicitTranspose_(GetDefaultImplicitTranspose()), - fuseProlongationAndUpdate_(GetDefaultFuseProlongationAndUpdate()), - doPRrebalance_(GetDefaultPRrebalance()), doPRViaCopyrebalance_(false), isPreconditioner_(true), Cycle_(GetDefaultCycle()), WCycleStartLevel_(0), - scalingFactor_(Teuchos::ScalarTraits::one()), isDumpingEnabled_(false), dumpLevel_(-2), rate_(-1), - sizeOfAllocatedLevelMultiVectors_(0) - { - lib_ = A->getDomainMap()->lib(); - - RCP Finest = rcp(new Level); - AddLevel(Finest); +template +Hierarchy::Hierarchy() + : maxCoarseSize_(GetDefaultMaxCoarseSize()) + , implicitTranspose_(GetDefaultImplicitTranspose()) + , fuseProlongationAndUpdate_(GetDefaultFuseProlongationAndUpdate()) + , doPRrebalance_(GetDefaultPRrebalance()) + , doPRViaCopyrebalance_(false) + , isPreconditioner_(true) + , Cycle_(GetDefaultCycle()) + , WCycleStartLevel_(0) + , scalingFactor_(Teuchos::ScalarTraits::one()) + , lib_(Xpetra::UseTpetra) + , isDumpingEnabled_(false) + , dumpLevel_(-2) + , rate_(-1) + , sizeOfAllocatedLevelMultiVectors_(0) { + AddLevel(rcp(new Level)); +} - Finest->Set("A", A); - } +template +Hierarchy::Hierarchy(const std::string& label) + : Hierarchy() { + setObjectLabel(label); + Levels_[0]->setObjectLabel(label); +} - template - Hierarchy::Hierarchy(const RCP& A, const std::string& label) - : Hierarchy(A) - { - setObjectLabel(label); - Levels_[0]->setObjectLabel(label); - } +template +Hierarchy::Hierarchy(const RCP& A) + : maxCoarseSize_(GetDefaultMaxCoarseSize()) + , implicitTranspose_(GetDefaultImplicitTranspose()) + , fuseProlongationAndUpdate_(GetDefaultFuseProlongationAndUpdate()) + , doPRrebalance_(GetDefaultPRrebalance()) + , doPRViaCopyrebalance_(false) + , isPreconditioner_(true) + , Cycle_(GetDefaultCycle()) + , WCycleStartLevel_(0) + , scalingFactor_(Teuchos::ScalarTraits::one()) + , isDumpingEnabled_(false) + , dumpLevel_(-2) + , rate_(-1) + , sizeOfAllocatedLevelMultiVectors_(0) { + lib_ = A->getDomainMap()->lib(); + + RCP Finest = rcp(new Level); + AddLevel(Finest); + + Finest->Set("A", A); +} - template - void Hierarchy::AddLevel(const RCP& level) { - int levelID = LastLevelID() + 1; // ID of the inserted level +template +Hierarchy::Hierarchy(const RCP& A, const std::string& label) + : Hierarchy(A) { + setObjectLabel(label); + Levels_[0]->setObjectLabel(label); +} - if (level->GetLevelID() != -1 && (level->GetLevelID() != levelID)) - GetOStream(Warnings1) << "Hierarchy::AddLevel(): Level with ID=" << level->GetLevelID() << - " have been added at the end of the hierarchy\n but its ID have been redefined" << - " because last level ID of the hierarchy was " << LastLevelID() << "." << std::endl; +template +void Hierarchy::AddLevel(const RCP& level) { + int levelID = LastLevelID() + 1; // ID of the inserted level - Levels_.push_back(level); - level->SetLevelID(levelID); - level->setlib(lib_); + if (level->GetLevelID() != -1 && (level->GetLevelID() != levelID)) + GetOStream(Warnings1) << "Hierarchy::AddLevel(): Level with ID=" << level->GetLevelID() << " have been added at the end of the hierarchy\n but its ID have been redefined" + << " because last level ID of the hierarchy was " << LastLevelID() << "." << std::endl; - level->SetPreviousLevel( (levelID == 0) ? Teuchos::null : Levels_[LastLevelID() - 1] ); - level->setObjectLabel(this->getObjectLabel()); - } + Levels_.push_back(level); + level->SetLevelID(levelID); + level->setlib(lib_); - template - void Hierarchy::AddNewLevel() { - RCP newLevel = Levels_[LastLevelID()]->Build(); // new coarse level, using copy constructor - newLevel->setlib(lib_); - this->AddLevel(newLevel); // add to hierarchy - } + level->SetPreviousLevel((levelID == 0) ? Teuchos::null : Levels_[LastLevelID() - 1]); + level->setObjectLabel(this->getObjectLabel()); +} - template - RCP & Hierarchy::GetLevel(const int levelID) { - TEUCHOS_TEST_FOR_EXCEPTION(levelID < 0 || levelID > LastLevelID(), Exceptions::RuntimeError, - "MueLu::Hierarchy::GetLevel(): invalid input parameter value: LevelID = " << levelID); - return Levels_[levelID]; - } +template +void Hierarchy::AddNewLevel() { + RCP newLevel = Levels_[LastLevelID()]->Build(); // new coarse level, using copy constructor + newLevel->setlib(lib_); + this->AddLevel(newLevel); // add to hierarchy +} - template - int Hierarchy::GetNumLevels() const { - return Levels_.size(); - } +template +RCP& Hierarchy::GetLevel(const int levelID) { + TEUCHOS_TEST_FOR_EXCEPTION(levelID < 0 || levelID > LastLevelID(), Exceptions::RuntimeError, + "MueLu::Hierarchy::GetLevel(): invalid input parameter value: LevelID = " << levelID); + return Levels_[levelID]; +} - template - int Hierarchy::GetGlobalNumLevels() const { - RCP A = Levels_[0]->template Get >("A"); - RCP > comm = A->getDomainMap()->getComm(); +template +int Hierarchy::GetNumLevels() const { + return Levels_.size(); +} - int numLevels = GetNumLevels(); - int numGlobalLevels; - Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numLevels, Teuchos::ptr(&numGlobalLevels)); +template +int Hierarchy::GetGlobalNumLevels() const { + RCP A = Levels_[0]->template Get >("A"); + RCP > comm = A->getDomainMap()->getComm(); - return numGlobalLevels; - } + int numLevels = GetNumLevels(); + int numGlobalLevels; + Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numLevels, Teuchos::ptr(&numGlobalLevels)); - template - double Hierarchy::GetOperatorComplexity() const { - double totalNnz = 0, lev0Nnz = 1; - for (int i = 0; i < GetNumLevels(); ++i) { - TEUCHOS_TEST_FOR_EXCEPTION(!(Levels_[i]->IsAvailable("A")) , Exceptions::RuntimeError, - "Operator complexity cannot be calculated because A is unavailable on level " << i); - RCP A = Levels_[i]->template Get >("A"); - if (A.is_null()) - break; + return numGlobalLevels; +} - RCP Am = rcp_dynamic_cast(A); - if (Am.is_null()) { - GetOStream(Warnings0) << "Some level operators are not matrices, operator complexity calculation aborted" << std::endl; - return 0.0; - } +template +double Hierarchy::GetOperatorComplexity() const { + double totalNnz = 0, lev0Nnz = 1; + for (int i = 0; i < GetNumLevels(); ++i) { + TEUCHOS_TEST_FOR_EXCEPTION(!(Levels_[i]->IsAvailable("A")), Exceptions::RuntimeError, + "Operator complexity cannot be calculated because A is unavailable on level " << i); + RCP A = Levels_[i]->template Get >("A"); + if (A.is_null()) + break; - totalNnz += as(Am->getGlobalNumEntries()); - if (i == 0) - lev0Nnz = totalNnz; + RCP Am = rcp_dynamic_cast(A); + if (Am.is_null()) { + GetOStream(Warnings0) << "Some level operators are not matrices, operator complexity calculation aborted" << std::endl; + return 0.0; } - return totalNnz / lev0Nnz; + + totalNnz += as(Am->getGlobalNumEntries()); + if (i == 0) + lev0Nnz = totalNnz; } + return totalNnz / lev0Nnz; +} - template - double Hierarchy::GetSmootherComplexity() const { - double node_sc = 0, global_sc=0; - double a0_nnz =0; - const size_t INVALID = Teuchos::OrdinalTraits::invalid(); - // Get cost of fine matvec - if (GetNumLevels() <= 0) return -1.0; - if (!Levels_[0]->IsAvailable("A")) return -1.0; - - RCP A = Levels_[0]->template Get >("A"); - if (A.is_null()) return -1.0; - RCP Am = rcp_dynamic_cast(A); - if(Am.is_null()) return -1.0; - a0_nnz = as(Am->getGlobalNumEntries()); - - // Get smoother complexity at each level - for (int i = 0; i < GetNumLevels(); ++i) { - size_t level_sc=0; - if(!Levels_[i]->IsAvailable("PreSmoother")) continue; - RCP S = Levels_[i]->template Get >("PreSmoother"); - if (S.is_null()) continue; - level_sc = S->getNodeSmootherComplexity(); - if(level_sc == INVALID) {global_sc=-1.0;break;} - - node_sc += as(level_sc); +template +double Hierarchy::GetSmootherComplexity() const { + double node_sc = 0, global_sc = 0; + double a0_nnz = 0; + const size_t INVALID = Teuchos::OrdinalTraits::invalid(); + // Get cost of fine matvec + if (GetNumLevels() <= 0) return -1.0; + if (!Levels_[0]->IsAvailable("A")) return -1.0; + + RCP A = Levels_[0]->template Get >("A"); + if (A.is_null()) return -1.0; + RCP Am = rcp_dynamic_cast(A); + if (Am.is_null()) return -1.0; + a0_nnz = as(Am->getGlobalNumEntries()); + + // Get smoother complexity at each level + for (int i = 0; i < GetNumLevels(); ++i) { + size_t level_sc = 0; + if (!Levels_[i]->IsAvailable("PreSmoother")) continue; + RCP S = Levels_[i]->template Get >("PreSmoother"); + if (S.is_null()) continue; + level_sc = S->getNodeSmootherComplexity(); + if (level_sc == INVALID) { + global_sc = -1.0; + break; } - double min_sc=0.0; - RCP > comm =A->getDomainMap()->getComm(); - Teuchos::reduceAll(*comm,Teuchos::REDUCE_SUM,node_sc,Teuchos::ptr(&global_sc)); - Teuchos::reduceAll(*comm,Teuchos::REDUCE_MIN,node_sc,Teuchos::ptr(&min_sc)); - - if(min_sc < 0.0) return -1.0; - else return global_sc / a0_nnz; + node_sc += as(level_sc); } + double min_sc = 0.0; + RCP > comm = A->getDomainMap()->getComm(); + Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, node_sc, Teuchos::ptr(&global_sc)); + Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, node_sc, Teuchos::ptr(&min_sc)); + if (min_sc < 0.0) + return -1.0; + else + return global_sc / a0_nnz; +} +// Coherence checks todo in Setup() (using an helper function): +template +void Hierarchy::CheckLevel(Level& level, int levelID) { + TEUCHOS_TEST_FOR_EXCEPTION(level.lib() != lib_, Exceptions::RuntimeError, + "MueLu::Hierarchy::CheckLevel(): wrong underlying linear algebra library."); + TEUCHOS_TEST_FOR_EXCEPTION(level.GetLevelID() != levelID, Exceptions::RuntimeError, + "MueLu::Hierarchy::CheckLevel(): wrong level ID"); + TEUCHOS_TEST_FOR_EXCEPTION(levelID != 0 && level.GetPreviousLevel() != Levels_[levelID - 1], Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): wrong level parent"); +} - // Coherence checks todo in Setup() (using an helper function): - template - void Hierarchy::CheckLevel(Level& level, int levelID) { - TEUCHOS_TEST_FOR_EXCEPTION(level.lib() != lib_, Exceptions::RuntimeError, - "MueLu::Hierarchy::CheckLevel(): wrong underlying linear algebra library."); - TEUCHOS_TEST_FOR_EXCEPTION(level.GetLevelID() != levelID, Exceptions::RuntimeError, - "MueLu::Hierarchy::CheckLevel(): wrong level ID"); - TEUCHOS_TEST_FOR_EXCEPTION(levelID != 0 && level.GetPreviousLevel() != Levels_[levelID-1], Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): wrong level parent"); - } - - template - void Hierarchy::SetMatvecParams(RCP matvecParams) { - for (int i = 0; i < GetNumLevels(); ++i) { - RCP level = Levels_[i]; - if (level->IsAvailable("A")) { - RCP Aop = level->Get >("A"); - RCP A = rcp_dynamic_cast(Aop); - if (!A.is_null()) { - RCP xpImporter = A->getCrsGraph()->getImporter(); - if (!xpImporter.is_null()) - xpImporter->setDistributorParameters(matvecParams); - RCP xpExporter = A->getCrsGraph()->getExporter(); - if (!xpExporter.is_null()) - xpExporter->setDistributorParameters(matvecParams); - } - } - if (level->IsAvailable("P")) { - RCP P = level->Get >("P"); - RCP xpImporter = P->getCrsGraph()->getImporter(); - if (!xpImporter.is_null()) - xpImporter->setDistributorParameters(matvecParams); - RCP xpExporter = P->getCrsGraph()->getExporter(); - if (!xpExporter.is_null()) - xpExporter->setDistributorParameters(matvecParams); - } - if (level->IsAvailable("R")) { - RCP R = level->Get >("R"); - RCP xpImporter = R->getCrsGraph()->getImporter(); +template +void Hierarchy::SetMatvecParams(RCP matvecParams) { + for (int i = 0; i < GetNumLevels(); ++i) { + RCP level = Levels_[i]; + if (level->IsAvailable("A")) { + RCP Aop = level->Get >("A"); + RCP A = rcp_dynamic_cast(Aop); + if (!A.is_null()) { + RCP xpImporter = A->getCrsGraph()->getImporter(); if (!xpImporter.is_null()) xpImporter->setDistributorParameters(matvecParams); - RCP xpExporter = R->getCrsGraph()->getExporter(); + RCP xpExporter = A->getCrsGraph()->getExporter(); if (!xpExporter.is_null()) xpExporter->setDistributorParameters(matvecParams); } - if (level->IsAvailable("Importer")) { - RCP xpImporter = level->Get< RCP >("Importer"); - if (!xpImporter.is_null()) - xpImporter->setDistributorParameters(matvecParams); - } + } + if (level->IsAvailable("P")) { + RCP P = level->Get >("P"); + RCP xpImporter = P->getCrsGraph()->getImporter(); + if (!xpImporter.is_null()) + xpImporter->setDistributorParameters(matvecParams); + RCP xpExporter = P->getCrsGraph()->getExporter(); + if (!xpExporter.is_null()) + xpExporter->setDistributorParameters(matvecParams); + } + if (level->IsAvailable("R")) { + RCP R = level->Get >("R"); + RCP xpImporter = R->getCrsGraph()->getImporter(); + if (!xpImporter.is_null()) + xpImporter->setDistributorParameters(matvecParams); + RCP xpExporter = R->getCrsGraph()->getExporter(); + if (!xpExporter.is_null()) + xpExporter->setDistributorParameters(matvecParams); + } + if (level->IsAvailable("Importer")) { + RCP xpImporter = level->Get >("Importer"); + if (!xpImporter.is_null()) + xpImporter->setDistributorParameters(matvecParams); } } +} - // The function uses three managers: fine, coarse and next coarse - // We construct the data for the coarse level, and do requests for the next coarse - template - bool Hierarchy::Setup(int coarseLevelID, - const RCP fineLevelManager, - const RCP coarseLevelManager, - const RCP nextLevelManager) { - // Use PrintMonitor/TimerMonitor instead of just a FactoryMonitor to print "Level 0" instead of Hierarchy(0) - // Print is done after the requests for next coarse level - - TEUCHOS_TEST_FOR_EXCEPTION(LastLevelID() < coarseLevelID, Exceptions::RuntimeError, - "MueLu::Hierarchy:Setup(): level " << coarseLevelID << " (specified by coarseLevelID argument) " - "must be built before calling this function."); - - Level& level = *Levels_[coarseLevelID]; - - std::string label = FormattingHelper::getColonLabel(level.getObjectLabel()); - TimeMonitor m1(*this, label + this->ShortClassName() + ": " + "Setup (total)"); - TimeMonitor m2(*this, label + this->ShortClassName() + ": " + "Setup" + " (total, level=" + Teuchos::toString(coarseLevelID) + ")"); - - // TODO: pass coarseLevelManager by reference - TEUCHOS_TEST_FOR_EXCEPTION(coarseLevelManager == Teuchos::null, Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): argument coarseLevelManager cannot be null"); +// The function uses three managers: fine, coarse and next coarse +// We construct the data for the coarse level, and do requests for the next coarse +template +bool Hierarchy::Setup(int coarseLevelID, + const RCP fineLevelManager, + const RCP coarseLevelManager, + const RCP nextLevelManager) { + // Use PrintMonitor/TimerMonitor instead of just a FactoryMonitor to print "Level 0" instead of Hierarchy(0) + // Print is done after the requests for next coarse level - typedef MueLu::TopRAPFactory TopRAPFactory; - typedef MueLu::TopSmootherFactory TopSmootherFactory; + TEUCHOS_TEST_FOR_EXCEPTION(LastLevelID() < coarseLevelID, Exceptions::RuntimeError, + "MueLu::Hierarchy:Setup(): level " << coarseLevelID << " (specified by coarseLevelID argument) " + "must be built before calling this function."); - if (levelManagers_.size() < coarseLevelID+1) - levelManagers_.resize(coarseLevelID+1); - levelManagers_[coarseLevelID] = coarseLevelManager; + Level& level = *Levels_[coarseLevelID]; - bool isFinestLevel = (fineLevelManager.is_null()); - bool isLastLevel = (nextLevelManager.is_null()); + std::string label = FormattingHelper::getColonLabel(level.getObjectLabel()); + TimeMonitor m1(*this, label + this->ShortClassName() + ": " + "Setup (total)"); + TimeMonitor m2(*this, label + this->ShortClassName() + ": " + "Setup" + " (total, level=" + Teuchos::toString(coarseLevelID) + ")"); - int oldRank = -1; - if (isFinestLevel) { - RCP A = level.Get< RCP >("A"); - RCP domainMap = A->getDomainMap(); - RCP > comm = domainMap->getComm(); + // TODO: pass coarseLevelManager by reference + TEUCHOS_TEST_FOR_EXCEPTION(coarseLevelManager == Teuchos::null, Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): argument coarseLevelManager cannot be null"); - // Initialize random seed for reproducibility - Utilities::SetRandomSeed(*comm); + typedef MueLu::TopRAPFactory TopRAPFactory; + typedef MueLu::TopSmootherFactory TopSmootherFactory; - // Record the communicator on the level (used for timers sync) - level.SetComm(comm); - oldRank = SetProcRankVerbose(comm->getRank()); + if (levelManagers_.size() < coarseLevelID + 1) + levelManagers_.resize(coarseLevelID + 1); + levelManagers_[coarseLevelID] = coarseLevelManager; - // Set the Hierarchy library to match that of the finest level matrix, - // even if it was already set - lib_ = domainMap->lib(); - level.setlib(lib_); + bool isFinestLevel = (fineLevelManager.is_null()); + bool isLastLevel = (nextLevelManager.is_null()); - } else { - // Permeate library to a coarser level - level.setlib(lib_); + int oldRank = -1; + if (isFinestLevel) { + RCP A = level.Get >("A"); + RCP domainMap = A->getDomainMap(); + RCP > comm = domainMap->getComm(); - Level& prevLevel = *Levels_[coarseLevelID-1]; - oldRank = SetProcRankVerbose(prevLevel.GetComm()->getRank()); - } + // Initialize random seed for reproducibility + Utilities::SetRandomSeed(*comm); - CheckLevel(level, coarseLevelID); + // Record the communicator on the level (used for timers sync) + level.SetComm(comm); + oldRank = SetProcRankVerbose(comm->getRank()); - // Attach FactoryManager to the fine level - RCP SFMFine; - if (!isFinestLevel) - SFMFine = rcp(new SetFactoryManager(Levels_[coarseLevelID-1], fineLevelManager)); + // Set the Hierarchy library to match that of the finest level matrix, + // even if it was already set + lib_ = domainMap->lib(); + level.setlib(lib_); - if (isFinestLevel && Levels_[coarseLevelID]->IsAvailable("Coordinates")) - ReplaceCoordinateMap(*Levels_[coarseLevelID]); + } else { + // Permeate library to a coarser level + level.setlib(lib_); - // Attach FactoryManager to the coarse level - SetFactoryManager SFMCoarse(Levels_[coarseLevelID], coarseLevelManager); + Level& prevLevel = *Levels_[coarseLevelID - 1]; + oldRank = SetProcRankVerbose(prevLevel.GetComm()->getRank()); + } - if (isDumpingEnabled_ && (dumpLevel_ == 0 || dumpLevel_ == -1) && coarseLevelID == 1) - DumpCurrentGraph(0); + CheckLevel(level, coarseLevelID); + + // Attach FactoryManager to the fine level + RCP SFMFine; + if (!isFinestLevel) + SFMFine = rcp(new SetFactoryManager(Levels_[coarseLevelID - 1], fineLevelManager)); + + if (isFinestLevel && Levels_[coarseLevelID]->IsAvailable("Coordinates")) + ReplaceCoordinateMap(*Levels_[coarseLevelID]); + + // Attach FactoryManager to the coarse level + SetFactoryManager SFMCoarse(Levels_[coarseLevelID], coarseLevelManager); + + if (isDumpingEnabled_ && (dumpLevel_ == 0 || dumpLevel_ == -1) && coarseLevelID == 1) + DumpCurrentGraph(0); + + RCP coarseFact; + RCP smootherFact = rcp(new TopSmootherFactory(coarseLevelManager, "Smoother")); + + int nextLevelID = coarseLevelID + 1; + + RCP SFMNext; + if (isLastLevel == false) { + // We are not at the coarsest level, so there is going to be another level ("next coarse") after this one ("coarse") + if (nextLevelID > LastLevelID()) + AddNewLevel(); + CheckLevel(*Levels_[nextLevelID], nextLevelID); + + // Attach FactoryManager to the next level (level after coarse) + SFMNext = rcp(new SetFactoryManager(Levels_[nextLevelID], nextLevelManager)); + Levels_[nextLevelID]->Request(TopRAPFactory(coarseLevelManager, nextLevelManager)); + + // Do smoother requests here. We don't know whether this is going to be + // the coarsest level or not, but we need to DeclareInput before we call + // coarseRAPFactory.Build(), otherwise some stuff may be erased after + // level releases + level.Request(*smootherFact); + + } else { + // Similar to smoother above, do the coarse solver request here. We don't + // know whether this is going to be the coarsest level or not, but we + // need to DeclareInput before we call coarseRAPFactory.Build(), + // otherwise some stuff may be erased after level releases. This is + // actually evident on ProjectorSmoother. It requires both "A" and + // "Nullspace". However, "Nullspace" is erased after all releases, so if + // we call the coarse factory request after RAP build we would not have + // any data, and cannot get it as we don't have previous managers. The + // typical trace looks like this: + // + // MueLu::Level(0)::GetFactory(Aggregates, 0): No FactoryManager + // during request for data " Aggregates" on level 0 by factory TentativePFactory + // during request for data " P" on level 1 by factory EminPFactory + // during request for data " P" on level 1 by factory TransPFactory + // during request for data " R" on level 1 by factory RAPFactory + // during request for data " A" on level 1 by factory TentativePFactory + // during request for data " Nullspace" on level 2 by factory NullspaceFactory + // during request for data " Nullspace" on level 2 by factory NullspacePresmoothFactory + // during request for data " Nullspace" on level 2 by factory ProjectorSmoother + // during request for data " PreSmoother" on level 2 by factory NoFactory + if (coarseFact.is_null()) + coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); + level.Request(*coarseFact); + } - RCP coarseFact; - RCP smootherFact = rcp(new TopSmootherFactory(coarseLevelManager, "Smoother")); + GetOStream(Runtime0) << std::endl; + PrintMonitor m0(*this, "Level " + Teuchos::toString(coarseLevelID), static_cast(Runtime0 | Test)); - int nextLevelID = coarseLevelID + 1; + // Build coarse level hierarchy + RCP Ac = Teuchos::null; + TopRAPFactory coarseRAPFactory(fineLevelManager, coarseLevelManager); - RCP SFMNext; - if (isLastLevel == false) { - // We are not at the coarsest level, so there is going to be another level ("next coarse") after this one ("coarse") - if (nextLevelID > LastLevelID()) - AddNewLevel(); - CheckLevel(*Levels_[nextLevelID], nextLevelID); + if (level.IsAvailable("A")) { + Ac = level.Get >("A"); + } else if (!isFinestLevel) { + // We only build here, the release is done later + coarseRAPFactory.Build(*level.GetPreviousLevel(), level); + } - // Attach FactoryManager to the next level (level after coarse) - SFMNext = rcp(new SetFactoryManager(Levels_[nextLevelID], nextLevelManager)); - Levels_[nextLevelID]->Request(TopRAPFactory(coarseLevelManager, nextLevelManager)); + bool setLastLevelviaMaxCoarseSize = false; + if (level.IsAvailable("A")) + Ac = level.Get >("A"); + RCP Acm = rcp_dynamic_cast(Ac); + + // Record the communicator on the level + if (!Ac.is_null()) + level.SetComm(Ac->getDomainMap()->getComm()); + + // Test if we reach the end of the hierarchy + bool isOrigLastLevel = isLastLevel; + if (isLastLevel) { + // Last level as we have achieved the max limit + isLastLevel = true; + + } else if (Ac.is_null()) { + // Last level for this processor, as it does not belong to the next + // subcommunicator. Other processors may continue working on the + // hierarchy + isLastLevel = true; + + } else { + if (!Acm.is_null() && Acm->getGlobalNumRows() <= maxCoarseSize_) { + // Last level as the size of the coarse matrix became too small + GetOStream(Runtime0) << "Max coarse size (<= " << maxCoarseSize_ << ") achieved" << std::endl; + isLastLevel = true; + if (Acm->getGlobalNumRows() != 0) setLastLevelviaMaxCoarseSize = true; + } + } - // Do smoother requests here. We don't know whether this is going to be - // the coarsest level or not, but we need to DeclareInput before we call - // coarseRAPFactory.Build(), otherwise some stuff may be erased after - // level releases - level.Request(*smootherFact); + if (!Ac.is_null() && !isFinestLevel) { + RCP A = Levels_[coarseLevelID - 1]->template Get >("A"); + RCP Am = rcp_dynamic_cast(A); + + const double maxCoarse2FineRatio = 0.8; + if (!Acm.is_null() && !Am.is_null() && Acm->getGlobalNumRows() > maxCoarse2FineRatio * Am->getGlobalNumRows()) { + // We could abort here, but for now we simply notify user. + // Couple of additional points: + // - if repartitioning is delayed until level K, but the aggregation + // procedure stagnates between levels K-1 and K. In this case, + // repartitioning could enable faster coarsening once again, but the + // hierarchy construction will abort due to the stagnation check. + // - if the matrix is small enough, we could move it to one processor. + GetOStream(Warnings0) << "Aggregation stagnated. Please check your matrix and/or adjust your configuration file." + << "Possible fixes:\n" + << " - reduce the maximum number of levels\n" + << " - enable repartitioning\n" + << " - increase the minimum coarse size." << std::endl; + } + } - } else { - // Similar to smoother above, do the coarse solver request here. We don't - // know whether this is going to be the coarsest level or not, but we - // need to DeclareInput before we call coarseRAPFactory.Build(), - // otherwise some stuff may be erased after level releases. This is - // actually evident on ProjectorSmoother. It requires both "A" and - // "Nullspace". However, "Nullspace" is erased after all releases, so if - // we call the coarse factory request after RAP build we would not have - // any data, and cannot get it as we don't have previous managers. The - // typical trace looks like this: - // - // MueLu::Level(0)::GetFactory(Aggregates, 0): No FactoryManager - // during request for data " Aggregates" on level 0 by factory TentativePFactory - // during request for data " P" on level 1 by factory EminPFactory - // during request for data " P" on level 1 by factory TransPFactory - // during request for data " R" on level 1 by factory RAPFactory - // during request for data " A" on level 1 by factory TentativePFactory - // during request for data " Nullspace" on level 2 by factory NullspaceFactory - // during request for data " Nullspace" on level 2 by factory NullspacePresmoothFactory - // during request for data " Nullspace" on level 2 by factory ProjectorSmoother - // during request for data " PreSmoother" on level 2 by factory NoFactory + if (isLastLevel) { + if (!isOrigLastLevel) { + // We did not expect to finish this early so we did request a smoother. + // We need a coarse solver instead. Do the magic. + level.Release(*smootherFact); if (coarseFact.is_null()) coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); level.Request(*coarseFact); } - GetOStream(Runtime0) << std::endl; - PrintMonitor m0(*this, "Level " + Teuchos::toString(coarseLevelID), static_cast(Runtime0 | Test)); - - // Build coarse level hierarchy - RCP Ac = Teuchos::null; - TopRAPFactory coarseRAPFactory(fineLevelManager, coarseLevelManager); - - if (level.IsAvailable("A")) { - Ac = level.Get >("A"); - } else if (!isFinestLevel) { - // We only build here, the release is done later - coarseRAPFactory.Build(*level.GetPreviousLevel(), level); - } + // Do the actual build, if we have any data. + // NOTE: this is not a great check, we may want to call Build() regardless. + if (!Ac.is_null()) + coarseFact->Build(level); - bool setLastLevelviaMaxCoarseSize = false; - if (level.IsAvailable("A")) - Ac = level.Get >("A"); - RCP Acm = rcp_dynamic_cast(Ac); + // Once the dirty deed is done, release stuff. The smoother has already + // been released. + level.Release(*coarseFact); - // Record the communicator on the level + } else { + // isLastLevel = false => isOrigLastLevel = false, meaning that we have + // requested the smoother. Now we need to build it and to release it. + // We don't need to worry about the coarse solver, as we didn't request it. if (!Ac.is_null()) - level.SetComm(Ac->getDomainMap()->getComm()); + smootherFact->Build(level); - // Test if we reach the end of the hierarchy - bool isOrigLastLevel = isLastLevel; - if (isLastLevel) { - // Last level as we have achieved the max limit - isLastLevel = true; - - } else if (Ac.is_null()) { - // Last level for this processor, as it does not belong to the next - // subcommunicator. Other processors may continue working on the - // hierarchy - isLastLevel = true; + level.Release(*smootherFact); + } - } else { - if (!Acm.is_null() && Acm->getGlobalNumRows() <= maxCoarseSize_) { - // Last level as the size of the coarse matrix became too small - GetOStream(Runtime0) << "Max coarse size (<= " << maxCoarseSize_ << ") achieved" << std::endl; - isLastLevel = true; - if (Acm->getGlobalNumRows() != 0) setLastLevelviaMaxCoarseSize = true; + if (isLastLevel == true) { + int actualNumLevels = nextLevelID; + if (isOrigLastLevel == false) { + // Earlier in the function, we constructed the next coarse level, and requested data for the that level, + // assuming that we are not at the coarsest level. Now, we changed our mind, so we have to release those. + Levels_[nextLevelID]->Release(TopRAPFactory(coarseLevelManager, nextLevelManager)); + + // We truncate/resize the hierarchy and possibly remove the last created level if there is + // something wrong with it as indicated by its P not being valid. This might happen + // if the global number of aggregates turns out to be zero + + if (!setLastLevelviaMaxCoarseSize) { + if (Levels_[nextLevelID - 1]->IsAvailable("P")) { + if (Levels_[nextLevelID - 1]->template Get >("P") == Teuchos::null) actualNumLevels = nextLevelID - 1; + } else + actualNumLevels = nextLevelID - 1; } } + if (actualNumLevels == nextLevelID - 1) { + // Didn't expect to finish early so we requested smoother but need coarse solver instead. + Levels_[nextLevelID - 2]->Release(*smootherFact); - if (!Ac.is_null() && !isFinestLevel) { - RCP A = Levels_[coarseLevelID-1]->template Get< RCP >("A"); - RCP Am = rcp_dynamic_cast(A); - - const double maxCoarse2FineRatio = 0.8; - if (!Acm.is_null() && !Am.is_null() && Acm->getGlobalNumRows() > maxCoarse2FineRatio * Am->getGlobalNumRows()) { - // We could abort here, but for now we simply notify user. - // Couple of additional points: - // - if repartitioning is delayed until level K, but the aggregation - // procedure stagnates between levels K-1 and K. In this case, - // repartitioning could enable faster coarsening once again, but the - // hierarchy construction will abort due to the stagnation check. - // - if the matrix is small enough, we could move it to one processor. - GetOStream(Warnings0) << "Aggregation stagnated. Please check your matrix and/or adjust your configuration file." - << "Possible fixes:\n" - << " - reduce the maximum number of levels\n" - << " - enable repartitioning\n" - << " - increase the minimum coarse size." << std::endl; - - } + if (Levels_[nextLevelID - 2]->IsAvailable("PreSmoother")) Levels_[nextLevelID - 2]->RemoveKeepFlag("PreSmoother", NoFactory::get()); + if (Levels_[nextLevelID - 2]->IsAvailable("PostSmoother")) Levels_[nextLevelID - 2]->RemoveKeepFlag("PostSmoother", NoFactory::get()); + if (coarseFact.is_null()) + coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); + Levels_[nextLevelID - 2]->Request(*coarseFact); + if (!(Levels_[nextLevelID - 2]->template Get >("A").is_null())) + coarseFact->Build(*(Levels_[nextLevelID - 2])); + Levels_[nextLevelID - 2]->Release(*coarseFact); } + Levels_.resize(actualNumLevels); + } - if (isLastLevel) { - if (!isOrigLastLevel) { - // We did not expect to finish this early so we did request a smoother. - // We need a coarse solver instead. Do the magic. - level.Release(*smootherFact); - if (coarseFact.is_null()) - coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); - level.Request(*coarseFact); - } + // I think this is the proper place for graph so that it shows every dependence + if (isDumpingEnabled_ && ((dumpLevel_ > 0 && coarseLevelID == dumpLevel_) || dumpLevel_ == -1)) + DumpCurrentGraph(coarseLevelID); - // Do the actual build, if we have any data. - // NOTE: this is not a great check, we may want to call Build() regardless. - if (!Ac.is_null()) - coarseFact->Build(level); + if (!isFinestLevel) { + // Release the hierarchy data + // We release so late to help blocked solvers, as the smoothers for them need A blocks + // which we construct in RAPFactory + level.Release(coarseRAPFactory); + } - // Once the dirty deed is done, release stuff. The smoother has already - // been released. - level.Release(*coarseFact); + if (oldRank != -1) + SetProcRankVerbose(oldRank); - } else { - // isLastLevel = false => isOrigLastLevel = false, meaning that we have - // requested the smoother. Now we need to build it and to release it. - // We don't need to worry about the coarse solver, as we didn't request it. - if (!Ac.is_null()) - smootherFact->Build(level); + return isLastLevel; +} - level.Release(*smootherFact); - } +template +void Hierarchy::SetupRe() { + int numLevels = Levels_.size(); + TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_.size() != numLevels, Exceptions::RuntimeError, + "Hierarchy::SetupRe: " << Levels_.size() << " levels, but " << levelManagers_.size() << " level factory managers"); - if (isLastLevel == true) { - int actualNumLevels = nextLevelID; - if (isOrigLastLevel == false) { - // Earlier in the function, we constructed the next coarse level, and requested data for the that level, - // assuming that we are not at the coarsest level. Now, we changed our mind, so we have to release those. - Levels_[nextLevelID]->Release(TopRAPFactory(coarseLevelManager, nextLevelManager)); + const int startLevel = 0; + Clear(startLevel); - // We truncate/resize the hierarchy and possibly remove the last created level if there is - // something wrong with it as indicated by its P not being valid. This might happen - // if the global number of aggregates turns out to be zero +#ifdef HAVE_MUELU_DEBUG + // Reset factories' data used for debugging + for (int i = 0; i < numLevels; i++) + levelManagers_[i]->ResetDebugData(); +#endif - if (!setLastLevelviaMaxCoarseSize) { - if (Levels_[nextLevelID-1]->IsAvailable("P")) { - if (Levels_[nextLevelID-1]->template Get >("P") == Teuchos::null) actualNumLevels = nextLevelID-1; - } - else actualNumLevels = nextLevelID-1; - } - } - if (actualNumLevels == nextLevelID-1) { - // Didn't expect to finish early so we requested smoother but need coarse solver instead. - Levels_[nextLevelID-2]->Release(*smootherFact); - - if (Levels_[nextLevelID-2]->IsAvailable("PreSmoother") ) Levels_[nextLevelID-2]->RemoveKeepFlag("PreSmoother" ,NoFactory::get()); - if (Levels_[nextLevelID-2]->IsAvailable("PostSmoother")) Levels_[nextLevelID-2]->RemoveKeepFlag("PostSmoother",NoFactory::get()); - if (coarseFact.is_null()) - coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); - Levels_[nextLevelID-2]->Request(*coarseFact); - if ( !(Levels_[nextLevelID-2]->template Get >("A").is_null() )) - coarseFact->Build( *(Levels_[nextLevelID-2])); - Levels_[nextLevelID-2]->Release(*coarseFact); - } - Levels_.resize(actualNumLevels); - } + int levelID; + for (levelID = startLevel; levelID < numLevels;) { + bool r = Setup(levelID, + (levelID != 0 ? levelManagers_[levelID - 1] : Teuchos::null), + levelManagers_[levelID], + (levelID + 1 != numLevels ? levelManagers_[levelID + 1] : Teuchos::null)); + levelID++; + if (r) break; + } + // We may construct fewer levels for some reason, make sure we continue + // doing that in the future + Levels_.resize(levelID); + levelManagers_.resize(levelID); - // I think this is the proper place for graph so that it shows every dependence - if (isDumpingEnabled_ && ( (dumpLevel_ > 0 && coarseLevelID == dumpLevel_) || dumpLevel_ == -1 ) ) - DumpCurrentGraph(coarseLevelID); + int sizeOfVecs = sizeOfAllocatedLevelMultiVectors_; - if (!isFinestLevel) { - // Release the hierarchy data - // We release so late to help blocked solvers, as the smoothers for them need A blocks - // which we construct in RAPFactory - level.Release(coarseRAPFactory); - } + AllocateLevelMultiVectors(sizeOfVecs, true); - if (oldRank != -1) - SetProcRankVerbose(oldRank); + // since the # of levels, etc. may have changed, force re-determination of description during next call to description() + ResetDescription(); - return isLastLevel; - } + describe(GetOStream(Statistics0), GetVerbLevel()); +} - template - void Hierarchy::SetupRe() { - int numLevels = Levels_.size(); - TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_.size() != numLevels, Exceptions::RuntimeError, - "Hierarchy::SetupRe: " << Levels_.size() << " levels, but " << levelManagers_.size() << " level factory managers"); +template +void Hierarchy::Setup(const FactoryManagerBase& manager, int startLevel, int numDesiredLevels) { + // Use MueLu::BaseClass::description() to avoid printing "{numLevels = 1}" (numLevels is increasing...) + PrintMonitor m0(*this, "Setup (" + this->MueLu::BaseClass::description() + ")", Runtime0); - const int startLevel = 0; - Clear(startLevel); + Clear(startLevel); -#ifdef HAVE_MUELU_DEBUG - // Reset factories' data used for debugging - for (int i = 0; i < numLevels; i++) - levelManagers_[i]->ResetDebugData(); + // Check Levels_[startLevel] exists. + TEUCHOS_TEST_FOR_EXCEPTION(Levels_.size() <= startLevel, Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): fine level (" << startLevel << ") does not exist"); -#endif + TEUCHOS_TEST_FOR_EXCEPTION(numDesiredLevels <= 0, Exceptions::RuntimeError, + "Constructing non-positive (" << numDesiredLevels << ") number of levels does not make sense."); - int levelID; - for (levelID = startLevel; levelID < numLevels;) { - bool r = Setup(levelID, - (levelID != 0 ? levelManagers_[levelID-1] : Teuchos::null), - levelManagers_[levelID], - (levelID+1 != numLevels ? levelManagers_[levelID+1] : Teuchos::null)); - levelID++; - if (r) break; - } - // We may construct fewer levels for some reason, make sure we continue - // doing that in the future - Levels_ .resize(levelID); - levelManagers_.resize(levelID); + // Check for fine level matrix A + TEUCHOS_TEST_FOR_EXCEPTION(!Levels_[startLevel]->IsAvailable("A"), Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): fine level (" << startLevel << ") has no matrix A! " + "Set fine level matrix A using Level.Set()"); - int sizeOfVecs = sizeOfAllocatedLevelMultiVectors_; + RCP A = Levels_[startLevel]->template Get >("A"); + lib_ = A->getDomainMap()->lib(); - AllocateLevelMultiVectors(sizeOfVecs, true); + if (IsPrint(Statistics2)) { + RCP Amat = rcp_dynamic_cast(A); - // since the # of levels, etc. may have changed, force re-determination of description during next call to description() - ResetDescription(); + if (!Amat.is_null()) { + RCP params = rcp(new ParameterList()); + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); - describe(GetOStream(Statistics0), GetVerbLevel()); + GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); + } else { + GetOStream(Warnings1) << "Fine level operator is not a matrix, statistics are not available" << std::endl; + } } - template - void Hierarchy::Setup(const FactoryManagerBase& manager, int startLevel, int numDesiredLevels) { - // Use MueLu::BaseClass::description() to avoid printing "{numLevels = 1}" (numLevels is increasing...) - PrintMonitor m0(*this, "Setup (" + this->MueLu::BaseClass::description() + ")", Runtime0); + RCP rcpmanager = rcpFromRef(manager); + + const int lastLevel = startLevel + numDesiredLevels - 1; + GetOStream(Runtime0) << "Setup loop: startLevel = " << startLevel << ", lastLevel = " << lastLevel + << " (stop if numLevels = " << numDesiredLevels << " or Ac.size() < " << maxCoarseSize_ << ")" << std::endl; + + // Setup multigrid levels + int iLevel = 0; + if (numDesiredLevels == 1) { + iLevel = 0; + Setup(startLevel, Teuchos::null, rcpmanager, Teuchos::null); // setup finest==coarsest level (first and last managers are Teuchos::null) + + } else { + bool bIsLastLevel = Setup(startLevel, Teuchos::null, rcpmanager, rcpmanager); // setup finest level (level 0) (first manager is Teuchos::null) + if (bIsLastLevel == false) { + for (iLevel = startLevel + 1; iLevel < lastLevel; iLevel++) { + bIsLastLevel = Setup(iLevel, rcpmanager, rcpmanager, rcpmanager); // setup intermediate levels + if (bIsLastLevel == true) + break; + } + if (bIsLastLevel == false) + Setup(lastLevel, rcpmanager, rcpmanager, Teuchos::null); // setup coarsest level (last manager is Teuchos::null) + } + } - Clear(startLevel); + // TODO: some check like this should be done at the beginning of the routine + TEUCHOS_TEST_FOR_EXCEPTION(iLevel != Levels_.size() - 1, Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): number of level"); - // Check Levels_[startLevel] exists. - TEUCHOS_TEST_FOR_EXCEPTION(Levels_.size() <= startLevel, Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): fine level (" << startLevel << ") does not exist"); + // TODO: this is not exception safe: manager will still hold default + // factories if you exit this function with an exception + manager.Clean(); - TEUCHOS_TEST_FOR_EXCEPTION(numDesiredLevels <= 0, Exceptions::RuntimeError, - "Constructing non-positive (" << numDesiredLevels << ") number of levels does not make sense."); + describe(GetOStream(Statistics0), GetVerbLevel()); +} - // Check for fine level matrix A - TEUCHOS_TEST_FOR_EXCEPTION(!Levels_[startLevel]->IsAvailable("A"), Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): fine level (" << startLevel << ") has no matrix A! " - "Set fine level matrix A using Level.Set()"); +template +void Hierarchy::Clear(int startLevel) { + if (startLevel < GetNumLevels()) + GetOStream(Runtime0) << "Clearing old data (if any)" << std::endl; - RCP A = Levels_[startLevel]->template Get >("A"); - lib_ = A->getDomainMap()->lib(); + for (int iLevel = startLevel; iLevel < GetNumLevels(); iLevel++) + Levels_[iLevel]->Clear(); +} - if (IsPrint(Statistics2)) { - RCP Amat = rcp_dynamic_cast(A); +template +void Hierarchy::ExpertClear() { + GetOStream(Runtime0) << "Clearing old data (expert)" << std::endl; + for (int iLevel = 0; iLevel < GetNumLevels(); iLevel++) + Levels_[iLevel]->ExpertClear(); +} - if (!Amat.is_null()) { - RCP params = rcp(new ParameterList()); - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); +#if defined(HAVE_MUELU_EXPERIMENTAL) && defined(HAVE_MUELU_ADDITIVE_VARIANT) +template +ConvergenceStatus Hierarchy::Iterate(const MultiVector& B, MultiVector& X, ConvData conv, + bool InitialGuessIsZero, LO startLevel) { + LO nIts = conv.maxIts_; + MagnitudeType tol = conv.tol_; - GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); - } else { - GetOStream(Warnings1) << "Fine level operator is not a matrix, statistics are not available" << std::endl; - } - } + std::string prefix = this->ShortClassName() + ": "; + std::string levelSuffix = " (level=" + toString(startLevel) + ")"; + std::string levelSuffix1 = " (level=" + toString(startLevel + 1) + ")"; - RCP rcpmanager = rcpFromRef(manager); + using namespace Teuchos; + RCP