From f09e897e7349cbb12e0c1d0f12183e3e3984c152 Mon Sep 17 00:00:00 2001 From: Graham Harper Date: Mon, 17 Jul 2023 11:05:01 -0600 Subject: [PATCH] Run a clangd format on all of MueLu's src directory --- .../MueLu_BrickAggregationFactory_decl.hpp | 161 +- .../MueLu_BrickAggregationFactory_def.hpp | 1034 +-- .../Containers/MueLu_Aggregates_decl.hpp | 600 +- .../Graph/Containers/MueLu_Aggregates_def.hpp | 441 +- .../src/Graph/Containers/MueLu_GraphBase.hpp | 93 +- .../src/Graph/Containers/MueLu_Graph_decl.hpp | 137 +- .../src/Graph/Containers/MueLu_Graph_def.hpp | 83 +- .../Graph/Containers/MueLu_LWGraph_decl.hpp | 237 +- .../Graph/Containers/MueLu_LWGraph_def.hpp | 72 +- .../Containers/MueLu_LWGraph_kokkos_decl.hpp | 204 +- .../Containers/MueLu_LWGraph_kokkos_def.hpp | 46 +- .../src/Graph/Containers/MueLu_LinkedList.cpp | 68 +- .../src/Graph/Containers/MueLu_LinkedList.hpp | 36 +- .../MueLu_LocalLWGraph_kokkos_decl.hpp | 238 +- .../MueLu_LocalLWGraph_kokkos_def.hpp | 70 +- .../Containers/MueLu_Zoltan2GraphAdapter.hpp | 264 +- .../MueLu_HybridAggregationFactory_decl.hpp | 163 +- .../MueLu_HybridAggregationFactory_def.hpp | 971 +-- .../MueLu_AmalgamationFactory_decl.hpp | 142 +- .../MueLu_AmalgamationFactory_def.hpp | 360 +- .../MueLu_AmalgamationInfo_decl.hpp | 316 +- .../MueLu_AmalgamationInfo_def.hpp | 482 +- .../MueLu_CoalesceDropFactory_decl.hpp | 233 +- .../MueLu_CoalesceDropFactory_def.hpp | 3550 ++++++----- .../MueLu_CoalesceDropFactory_kokkos_decl.hpp | 216 +- .../MueLu_CoalesceDropFactory_kokkos_def.hpp | 1645 ++--- .../MueLu_PreDropFunctionBaseClass_decl.hpp | 39 +- .../MueLu_PreDropFunctionConstVal_decl.hpp | 95 +- .../MueLu_PreDropFunctionConstVal_def.hpp | 63 +- .../MueLu_SmooVecCoalesceDropFactory_decl.hpp | 191 +- .../MueLu_SmooVecCoalesceDropFactory_def.hpp | 774 ++- .../MueLu_UnsmooshFactory_decl.hpp | 149 +- .../MueLu_UnsmooshFactory_def.hpp | 392 +- ...MueLu_VariableDofLaplacianFactory_decl.hpp | 555 +- .../MueLu_VariableDofLaplacianFactory_def.hpp | 1137 ++-- .../Graph/MueLu_AggregationAlgorithmBase.hpp | 52 +- .../MueLu_AggregationAlgorithmBase_kokkos.hpp | 53 +- .../MueLu_NotayAggregationFactory_decl.hpp | 96 +- .../MueLu_NotayAggregationFactory_def.hpp | 1698 ++--- ...Lu_AggregationStructuredAlgorithm_decl.hpp | 124 +- ...eLu_AggregationStructuredAlgorithm_def.hpp | 727 ++- ...egationStructuredAlgorithm_kokkos_decl.hpp | 314 +- ...regationStructuredAlgorithm_kokkos_def.hpp | 684 +- .../MueLu_IndexManager_decl.hpp | 264 +- .../MueLu_IndexManager_def.hpp | 393 +- .../MueLu_IndexManager_kokkos_decl.hpp | 208 +- .../MueLu_IndexManager_kokkos_def.hpp | 300 +- ...ueLu_StructuredAggregationFactory_decl.hpp | 132 +- ...MueLu_StructuredAggregationFactory_def.hpp | 563 +- ...ructuredAggregationFactory_kokkos_decl.hpp | 147 +- ...tructuredAggregationFactory_kokkos_def.hpp | 396 +- ...u_GlobalLexicographicIndexManager_decl.hpp | 84 +- ...Lu_GlobalLexicographicIndexManager_def.hpp | 663 +- ...Lu_LocalLexicographicIndexManager_decl.hpp | 121 +- ...eLu_LocalLexicographicIndexManager_def.hpp | 862 +-- .../MueLu_UncoupledIndexManager_decl.hpp | 94 +- .../MueLu_UncoupledIndexManager_def.hpp | 353 +- .../MueLu_AggregationPhase1Algorithm_decl.hpp | 121 +- .../MueLu_AggregationPhase1Algorithm_def.hpp | 332 +- ...AggregationPhase1Algorithm_kokkos_decl.hpp | 129 +- ..._AggregationPhase1Algorithm_kokkos_def.hpp | 404 +- ...MueLu_AggregationPhase2aAlgorithm_decl.hpp | 97 +- .../MueLu_AggregationPhase2aAlgorithm_def.hpp | 141 +- ...ggregationPhase2aAlgorithm_kokkos_decl.hpp | 122 +- ...AggregationPhase2aAlgorithm_kokkos_def.hpp | 517 +- ...MueLu_AggregationPhase2bAlgorithm_decl.hpp | 91 +- .../MueLu_AggregationPhase2bAlgorithm_def.hpp | 141 +- ...ggregationPhase2bAlgorithm_kokkos_decl.hpp | 120 +- ...AggregationPhase2bAlgorithm_kokkos_def.hpp | 397 +- .../MueLu_AggregationPhase3Algorithm_decl.hpp | 85 +- .../MueLu_AggregationPhase3Algorithm_def.hpp | 272 +- ...AggregationPhase3Algorithm_kokkos_decl.hpp | 101 +- ..._AggregationPhase3Algorithm_kokkos_def.hpp | 340 +- ...eLu_InterfaceAggregationAlgorithm_decl.hpp | 81 +- ...ueLu_InterfaceAggregationAlgorithm_def.hpp | 44 +- ..._IsolatedNodeAggregationAlgorithm_decl.hpp | 91 +- ...u_IsolatedNodeAggregationAlgorithm_def.hpp | 32 +- ...edNodeAggregationAlgorithm_kokkos_decl.hpp | 95 +- ...tedNodeAggregationAlgorithm_kokkos_def.hpp | 62 +- .../MueLu_OnePtAggregationAlgorithm_decl.hpp | 88 +- .../MueLu_OnePtAggregationAlgorithm_def.hpp | 31 +- ..._OnePtAggregationAlgorithm_kokkos_decl.hpp | 92 +- ...u_OnePtAggregationAlgorithm_kokkos_def.hpp | 125 +- ...erveDirichletAggregationAlgorithm_decl.hpp | 105 +- ...serveDirichletAggregationAlgorithm_def.hpp | 50 +- ...ichletAggregationAlgorithm_kokkos_decl.hpp | 109 +- ...richletAggregationAlgorithm_kokkos_def.hpp | 101 +- ...MueLu_UncoupledAggregationFactory_decl.hpp | 154 +- .../MueLu_UncoupledAggregationFactory_def.hpp | 472 +- ...ncoupledAggregationFactory_kokkos_decl.hpp | 305 +- ...UncoupledAggregationFactory_kokkos_def.hpp | 662 +- .../MueLu_UserAggregationFactory_decl.hpp | 16 +- .../MueLu_UserAggregationFactory_def.hpp | 173 +- packages/muelu/src/Headers/MueLu.hpp | 10 +- .../muelu/src/Headers/MueLu_ConfigDefs.hpp | 62 +- .../Headers/MueLu_Details_DefaultTypes.hpp | 27 +- packages/muelu/src/Headers/MueLu_Types.hpp | 77 +- .../muelu/src/Headers/MueLu_UseShortNames.hpp | 23 +- .../Headers/MueLu_UseShortNamesOrdinal.hpp | 130 +- .../src/Headers/MueLu_UseShortNamesScalar.hpp | 480 +- packages/muelu/src/Headers/MueLu_Version.hpp | 6 +- .../MueLu_FacadeClassBase_decl.hpp | 66 +- .../MueLu_FacadeClassBase_def.hpp | 13 +- .../MueLu_FacadeClassFactory_decl.hpp | 83 +- .../MueLu_FacadeClassFactory_def.hpp | 71 +- .../MueLu_Facade_BGS2x2_decl.hpp | 43 +- .../FacadeClasses/MueLu_Facade_BGS2x2_def.hpp | 767 ++- .../MueLu_Facade_Simple_decl.hpp | 43 +- .../FacadeClasses/MueLu_Facade_Simple_def.hpp | 781 ++- ...ptiveSaMLParameterListInterpreter_decl.hpp | 348 +- ...aptiveSaMLParameterListInterpreter_def.hpp | 832 +-- .../Interface/MueLu_FactoryFactory_decl.hpp | 1832 ++++-- .../src/Interface/MueLu_HierarchyFactory.hpp | 55 +- .../src/Interface/MueLu_HierarchyManager.hpp | 808 +-- .../MueLu_ML2MueLuParameterTranslator.cpp | 970 +-- .../MueLu_ML2MueLuParameterTranslator.hpp | 199 +- .../MueLu_MLParameterListInterpreter_decl.hpp | 294 +- .../MueLu_MLParameterListInterpreter_def.hpp | 1414 +++-- .../MueLu_ParameterListInterpreter.cpp | 66 +- .../MueLu_ParameterListInterpreter_decl.hpp | 439 +- .../MueLu_ParameterListInterpreter_def.hpp | 5590 ++++++++++------- .../Interface/MueLu_ParameterListUtils.cpp | 279 +- .../Interface/MueLu_ParameterListUtils.hpp | 94 +- ...u_AggregateQualityEstimateFactory_decl.hpp | 143 +- ...Lu_AggregateQualityEstimateFactory_def.hpp | 826 +-- ...BlockedCoordinatesTransferFactory_decl.hpp | 131 +- ..._BlockedCoordinatesTransferFactory_def.hpp | 214 +- .../src/Misc/MueLu_BlockedRAPFactory_decl.hpp | 137 +- .../src/Misc/MueLu_BlockedRAPFactory_def.hpp | 319 +- .../MueLu_CoordinatesTransferFactory_decl.hpp | 129 +- .../MueLu_CoordinatesTransferFactory_def.hpp | 472 +- .../muelu/src/Misc/MueLu_DemoFactory_decl.hpp | 69 +- .../muelu/src/Misc/MueLu_DemoFactory_def.hpp | 29 +- .../MueLu_DropNegativeEntriesFactory_decl.hpp | 72 +- .../MueLu_DropNegativeEntriesFactory_def.hpp | 151 +- .../src/Misc/MueLu_FilteredAFactory_decl.hpp | 93 +- .../src/Misc/MueLu_FilteredAFactory_def.hpp | 1856 +++--- .../MueLu_FineLevelInputDataFactory_decl.hpp | 80 +- .../MueLu_FineLevelInputDataFactory_def.hpp | 241 +- .../MueLu_InitialBlockNumberFactory_decl.hpp | 68 +- .../MueLu_InitialBlockNumberFactory_def.hpp | 69 +- ...MueLu_InterfaceAggregationFactory_decl.hpp | 125 +- .../MueLu_InterfaceAggregationFactory_def.hpp | 455 +- ...u_InterfaceMappingTransferFactory_decl.hpp | 23 +- ...Lu_InterfaceMappingTransferFactory_def.hpp | 25 +- ...MueLu_InverseApproximationFactory_decl.hpp | 125 +- .../MueLu_InverseApproximationFactory_def.hpp | 313 +- .../Misc/MueLu_LineDetectionFactory_decl.hpp | 111 +- .../Misc/MueLu_LineDetectionFactory_def.hpp | 928 +-- ...MueLu_LocalOrdinalTransferFactory_decl.hpp | 142 +- .../MueLu_LocalOrdinalTransferFactory_def.hpp | 346 +- .../Misc/MueLu_LowPrecisionFactory_decl.hpp | 170 +- .../Misc/MueLu_LowPrecisionFactory_def.hpp | 302 +- .../Misc/MueLu_MapTransferFactory_decl.hpp | 143 +- .../src/Misc/MueLu_MapTransferFactory_def.hpp | 272 +- .../MueLu_MergedBlockedMatrixFactory_decl.hpp | 63 +- .../MueLu_MergedBlockedMatrixFactory_def.hpp | 40 +- .../MueLu_MultiVectorTransferFactory_decl.hpp | 92 +- .../MueLu_MultiVectorTransferFactory_def.hpp | 154 +- .../muelu/src/Misc/MueLu_RAPFactory_decl.hpp | 92 +- .../muelu/src/Misc/MueLu_RAPFactory_def.hpp | 728 ++- .../src/Misc/MueLu_RAPShiftFactory_decl.hpp | 122 +- .../src/Misc/MueLu_RAPShiftFactory_def.hpp | 693 +- .../MueLu_SchurComplementFactory_decl.hpp | 108 +- .../Misc/MueLu_SchurComplementFactory_def.hpp | 336 +- .../Misc/MueLu_SegregatedAFactory_decl.hpp | 84 +- .../src/Misc/MueLu_SegregatedAFactory_def.hpp | 217 +- ...Lu_StructuredLineDetectionFactory_decl.hpp | 67 +- ...eLu_StructuredLineDetectionFactory_def.hpp | 138 +- .../MueLu_ThresholdAFilterFactory_decl.hpp | 62 +- .../MueLu_ThresholdAFilterFactory_def.hpp | 50 +- .../muelu/src/MueCentral/MueLu_BaseClass.hpp | 58 +- .../src/MueCentral/MueLu_Describable.cpp | 73 +- .../src/MueCentral/MueLu_Describable.hpp | 64 +- .../muelu/src/MueCentral/MueLu_Factory.cpp | 4 +- .../muelu/src/MueCentral/MueLu_Factory.hpp | 345 +- .../src/MueCentral/MueLu_FactoryAcceptor.hpp | 34 +- .../src/MueCentral/MueLu_FactoryBase.cpp | 8 +- .../src/MueCentral/MueLu_FactoryBase.hpp | 63 +- .../MueCentral/MueLu_FactoryManagerBase.hpp | 80 +- .../MueCentral/MueLu_FactoryManager_decl.hpp | 339 +- .../MueCentral/MueLu_FactoryManager_def.hpp | 482 +- .../MueCentral/MueLu_HierarchyUtils_decl.hpp | 154 +- .../MueCentral/MueLu_HierarchyUtils_def.hpp | 787 ++- .../src/MueCentral/MueLu_Hierarchy_decl.hpp | 686 +- .../src/MueCentral/MueLu_Hierarchy_def.hpp | 2995 +++++---- .../muelu/src/MueCentral/MueLu_KeepType.hpp | 35 +- packages/muelu/src/MueCentral/MueLu_Level.cpp | 948 +-- packages/muelu/src/MueCentral/MueLu_Level.hpp | 1027 +-- .../muelu/src/MueCentral/MueLu_MasterList.cpp | 2478 +++++--- .../muelu/src/MueCentral/MueLu_MasterList.hpp | 209 +- .../muelu/src/MueCentral/MueLu_NoFactory.cpp | 27 +- .../muelu/src/MueCentral/MueLu_NoFactory.hpp | 78 +- .../MueLu_ParameterListAcceptor.cpp | 156 +- .../MueLu_ParameterListAcceptor.hpp | 233 +- .../MueLu_SingleLevelFactoryBase.hpp | 179 +- .../MueCentral/MueLu_TopRAPFactory_decl.hpp | 35 +- .../MueCentral/MueLu_TopRAPFactory_def.hpp | 129 +- .../MueLu_TopSmootherFactory_decl.hpp | 35 +- .../MueLu_TopSmootherFactory_def.hpp | 202 +- .../MueCentral/MueLu_TwoLevelFactoryBase.hpp | 199 +- .../MueCentral/MueLu_VariableContainer.hpp | 539 +- .../src/MueCentral/MueLu_VerboseObject.cpp | 178 +- .../src/MueCentral/MueLu_VerboseObject.hpp | 154 +- .../src/MueCentral/MueLu_VerbosityLevel.cpp | 149 +- .../src/MueCentral/MueLu_VerbosityLevel.hpp | 141 +- .../MueLu_CreateXpetraPreconditioner.hpp | 382 +- .../src/Operators/MueLu_Maxwell1_decl.hpp | 528 +- .../src/Operators/MueLu_Maxwell1_def.hpp | 1822 +++--- .../Operators/MueLu_Maxwell_Utils_decl.hpp | 128 +- .../src/Operators/MueLu_Maxwell_Utils_def.hpp | 490 +- .../src/Operators/MueLu_MultiPhys_decl.hpp | 308 +- .../src/Operators/MueLu_MultiPhys_def.hpp | 552 +- .../src/Operators/MueLu_RefMaxwell_decl.hpp | 709 +-- .../src/Operators/MueLu_RefMaxwell_def.hpp | 4870 +++++++------- .../Operators/MueLu_XpetraOperator_decl.hpp | 136 +- .../Operators/MueLu_XpetraOperator_def.hpp | 118 +- .../MueLu_CloneRepartitionInterface_decl.hpp | 142 +- .../MueLu_CloneRepartitionInterface_def.hpp | 220 +- .../MueLu_IsorropiaInterface_decl.hpp | 101 +- .../MueLu_IsorropiaInterface_def.hpp | 407 +- .../MueLu_NodePartitionInterface_decl.hpp | 159 +- .../MueLu_NodePartitionInterface_def.hpp | 135 +- .../MueLu_RebalanceAcFactory_decl.hpp | 82 +- .../MueLu_RebalanceAcFactory_def.hpp | 231 +- .../MueLu_RebalanceBlockAcFactory_decl.hpp | 133 +- .../MueLu_RebalanceBlockAcFactory_def.hpp | 745 ++- ...ebalanceBlockInterpolationFactory_decl.hpp | 95 +- ...RebalanceBlockInterpolationFactory_def.hpp | 610 +- ..._RebalanceBlockRestrictionFactory_decl.hpp | 101 +- ...u_RebalanceBlockRestrictionFactory_def.hpp | 415 +- .../MueLu_RebalanceMapFactory_decl.hpp | 66 +- .../MueLu_RebalanceMapFactory_def.hpp | 175 +- .../MueLu_RebalanceTransferFactory_decl.hpp | 81 +- .../MueLu_RebalanceTransferFactory_def.hpp | 683 +- ...u_RepartitionBlockDiagonalFactory_decl.hpp | 61 +- ...Lu_RepartitionBlockDiagonalFactory_def.hpp | 77 +- .../MueLu_RepartitionFactory_decl.hpp | 199 +- .../MueLu_RepartitionFactory_def.hpp | 1022 +-- ...MueLu_RepartitionHeuristicFactory_decl.hpp | 172 +- .../MueLu_RepartitionHeuristicFactory_def.hpp | 513 +- .../MueLu_RepartitionInterface_decl.hpp | 171 +- .../MueLu_RepartitionInterface_def.hpp | 266 +- .../MueLu_Zoltan2Interface_decl.hpp | 333 +- .../MueLu_Zoltan2Interface_def.hpp | 411 +- .../MueLu_ZoltanInterface_decl.hpp | 229 +- .../Rebalancing/MueLu_ZoltanInterface_def.hpp | 504 +- .../MueLu_BlockedDirectSolver_decl.hpp | 139 +- .../MueLu_BlockedDirectSolver_def.hpp | 299 +- .../MueLu_BlockedGaussSeidelSmoother_decl.hpp | 252 +- .../MueLu_BlockedGaussSeidelSmoother_def.hpp | 564 +- .../MueLu_BlockedJacobiSmoother_decl.hpp | 256 +- .../MueLu_BlockedJacobiSmoother_def.hpp | 546 +- .../MueLu_BraessSarazinSmoother_decl.hpp | 158 +- .../MueLu_BraessSarazinSmoother_def.hpp | 608 +- ...ueLu_IndefBlockedDiagonalSmoother_decl.hpp | 177 +- ...MueLu_IndefBlockedDiagonalSmoother_def.hpp | 655 +- .../MueLu_SimpleSmoother_decl.hpp | 222 +- .../MueLu_SimpleSmoother_def.hpp | 709 ++- .../MueLu_TekoSmoother_decl.hpp | 625 +- .../MueLu_TekoSmoother_def.hpp | 12 +- .../MueLu_UzawaSmoother_decl.hpp | 180 +- .../MueLu_UzawaSmoother_def.hpp | 550 +- .../Smoothers/MueLu_AdvSmootherPrototype.hpp | 89 +- .../Smoothers/MueLu_Amesos2Smoother_decl.hpp | 215 +- .../Smoothers/MueLu_Amesos2Smoother_def.hpp | 577 +- .../src/Smoothers/MueLu_AmesosSmoother.cpp | 310 +- .../src/Smoothers/MueLu_AmesosSmoother.hpp | 258 +- .../Smoothers/MueLu_BelosSmoother_decl.hpp | 252 +- .../src/Smoothers/MueLu_BelosSmoother_def.hpp | 288 +- .../src/Smoothers/MueLu_DirectSolver_decl.hpp | 160 +- .../src/Smoothers/MueLu_DirectSolver_def.hpp | 512 +- .../Smoothers/MueLu_Ifpack2Smoother_decl.hpp | 345 +- .../Smoothers/MueLu_Ifpack2Smoother_def.hpp | 1939 +++--- .../src/Smoothers/MueLu_IfpackSmoother.cpp | 720 ++- .../src/Smoothers/MueLu_IfpackSmoother.hpp | 274 +- .../Smoothers/MueLu_MergedSmoother_decl.hpp | 134 +- .../Smoothers/MueLu_MergedSmoother_def.hpp | 340 +- .../MueLu_PermutingSmoother_decl.hpp | 190 +- .../Smoothers/MueLu_PermutingSmoother_def.hpp | 225 +- .../MueLu_ProjectorSmoother_decl.hpp | 136 +- .../Smoothers/MueLu_ProjectorSmoother_def.hpp | 199 +- .../MueLu_RefMaxwellSmoother_decl.hpp | 149 +- .../MueLu_RefMaxwellSmoother_def.hpp | 234 +- .../src/Smoothers/MueLu_SmootherBase.hpp | 110 +- .../src/Smoothers/MueLu_SmootherCloner.hpp | 9 +- .../Smoothers/MueLu_SmootherFactoryBase.hpp | 50 +- .../Smoothers/MueLu_SmootherFactory_decl.hpp | 252 +- .../Smoothers/MueLu_SmootherFactory_def.hpp | 479 +- .../MueLu_SmootherPrototype_decl.hpp | 140 +- .../Smoothers/MueLu_SmootherPrototype_def.hpp | 29 +- .../MueLu_StratimikosSmoother_decl.hpp | 294 +- .../MueLu_StratimikosSmoother_def.hpp | 523 +- .../Smoothers/MueLu_TrilinosSmoother_decl.hpp | 224 +- .../Smoothers/MueLu_TrilinosSmoother_def.hpp | 672 +- .../BaseClass/MueLu_PFactory_decl.hpp | 78 +- .../BlackBox/MueLu_BlackBoxPFactory_decl.hpp | 294 +- .../BlackBox/MueLu_BlackBoxPFactory_def.hpp | 3776 ++++++----- .../MueLu_BlockedCoarseMapFactory_decl.hpp | 65 +- .../MueLu_BlockedCoarseMapFactory_def.hpp | 113 +- .../MueLu_BlockedPFactory_decl.hpp | 239 +- .../MueLu_BlockedPFactory_def.hpp | 496 +- .../MueLu_ReorderBlockAFactory_decl.hpp | 60 +- .../MueLu_ReorderBlockAFactory_def.hpp | 292 +- .../MueLu_SubBlockAFactory_decl.hpp | 152 +- .../MueLu_SubBlockAFactory_def.hpp | 428 +- .../MueLu_ZeroSubBlockAFactory_decl.hpp | 84 +- .../MueLu_ZeroSubBlockAFactory_def.hpp | 179 +- .../MueLu_ClassicalMapFactory_decl.hpp | 125 +- .../MueLu_ClassicalMapFactory_def.hpp | 872 +-- .../MueLu_ClassicalPFactory_decl.hpp | 156 +- .../Classical/MueLu_ClassicalPFactory_def.hpp | 2022 +++--- .../MueLu_ConstraintFactory_decl.hpp | 76 +- .../MueLu_ConstraintFactory_def.hpp | 59 +- .../MueLu_Constraint_decl.hpp | 144 +- .../MueLu_Constraint_def.hpp | 306 +- .../MueLu_EminPFactory_decl.hpp | 69 +- .../MueLu_EminPFactory_def.hpp | 343 +- .../MueLu_NullspacePresmoothFactory_decl.hpp | 63 +- .../MueLu_NullspacePresmoothFactory_def.hpp | 107 +- .../MueLu_PatternFactory_decl.hpp | 67 +- .../MueLu_PatternFactory_def.hpp | 100 +- .../Solvers/MueLu_CGSolver_decl.hpp | 79 +- .../Solvers/MueLu_CGSolver_def.hpp | 232 +- .../Solvers/MueLu_GMRESSolver_decl.hpp | 80 +- .../Solvers/MueLu_GMRESSolver_def.hpp | 274 +- .../Solvers/MueLu_SolverBase.hpp | 60 +- .../MueLu_SteepestDescentSolver_decl.hpp | 66 +- .../MueLu_SteepestDescentSolver_def.hpp | 64 +- .../MueLu_GeneralGeometricPFactory_decl.hpp | 393 +- .../MueLu_GeneralGeometricPFactory_def.hpp | 3736 ++++++----- ...Lu_GeometricInterpolationPFactory_decl.hpp | 89 +- ...eLu_GeometricInterpolationPFactory_def.hpp | 1161 ++-- ...etricInterpolationPFactory_kokkos_decl.hpp | 122 +- ...metricInterpolationPFactory_kokkos_def.hpp | 1074 ++-- .../MueLu_RegionRFactory_decl.hpp | 73 +- .../MueLu_RegionRFactory_def.hpp | 2234 ++++--- .../MueLu_RegionRFactory_kokkos_decl.hpp | 85 +- .../MueLu_RegionRFactory_kokkos_def.hpp | 2328 ++++--- .../Generic/MueLu_CombinePFactory_decl.hpp | 151 +- .../Generic/MueLu_CombinePFactory_def.hpp | 431 +- .../Generic/MueLu_GenericRFactory_decl.hpp | 57 +- .../Generic/MueLu_GenericRFactory_def.hpp | 110 +- .../Generic/MueLu_ReplicatePFactory_decl.hpp | 109 +- .../Generic/MueLu_ReplicatePFactory_def.hpp | 224 +- .../Generic/MueLu_RfromP_Or_TransP_decl.hpp | 64 +- .../Generic/MueLu_RfromP_Or_TransP_def.hpp | 273 +- .../Generic/MueLu_TransPFactory_decl.hpp | 64 +- .../Generic/MueLu_TransPFactory_def.hpp | 124 +- .../MueLu_GeoInterpFactory_decl.hpp | 104 +- .../MueLu_GeoInterpFactory_def.hpp | 1891 +++--- .../MueLu_MHDRAPFactory_decl.hpp | 88 +- .../MueLu_MHDRAPFactory_def.hpp | 414 +- .../MueLu_Q2Q1Q2CoarseGridFactory_decl.hpp | 98 +- .../MueLu_Q2Q1Q2CoarseGridFactory_def.hpp | 369 +- ...MueLu_MatrixFreeTentativePFactory_decl.hpp | 174 +- .../MueLu_MatrixFreeTentativePFactory_def.hpp | 178 +- .../MueLu_MatrixFreeTentativeP_decl.hpp | 125 +- .../MueLu_MatrixFreeTentativeP_def.hpp | 127 +- .../MueLu_IntrepidPCoarsenFactory_decl.hpp | 341 +- .../MueLu_IntrepidPCoarsenFactory_def.hpp | 1325 ++-- .../MueLu_PgPFactory_decl.hpp | 145 +- .../MueLu_PgPFactory_def.hpp | 1144 ++-- .../MueLu_SemiCoarsenPFactory_decl.hpp | 145 +- .../MueLu_SemiCoarsenPFactory_def.hpp | 1861 +++--- .../MueLu_SemiCoarsenPFactory_kokkos_decl.hpp | 62 +- .../MueLu_SemiCoarsenPFactory_kokkos_def.hpp | 39 +- ..._ToggleCoordinatesTransferFactory_decl.hpp | 87 +- ...u_ToggleCoordinatesTransferFactory_def.hpp | 160 +- .../SemiCoarsen/MueLu_TogglePFactory_decl.hpp | 184 +- .../SemiCoarsen/MueLu_TogglePFactory_def.hpp | 353 +- .../MueLu_CoarseMapFactory_decl.hpp | 304 +- .../MueLu_CoarseMapFactory_def.hpp | 298 +- .../MueLu_NullspaceFactory_decl.hpp | 194 +- .../MueLu_NullspaceFactory_def.hpp | 572 +- .../MueLu_NullspaceFactory_kokkos_decl.hpp | 203 +- .../MueLu_NullspaceFactory_kokkos_def.hpp | 302 +- .../MueLu_ReitzingerPFactory_decl.hpp | 117 +- .../MueLu_ReitzingerPFactory_def.hpp | 861 +-- .../MueLu_SaPFactory_decl.hpp | 142 +- .../MueLu_SaPFactory_def.hpp | 1271 ++-- .../MueLu_SaPFactory_kokkos_decl.hpp | 164 +- .../MueLu_SaPFactory_kokkos_def.hpp | 1125 ++-- .../MueLu_ScaledNullspaceFactory_decl.hpp | 176 +- .../MueLu_ScaledNullspaceFactory_def.hpp | 153 +- .../MueLu_TentativePFactory_decl.hpp | 186 +- .../MueLu_TentativePFactory_def.hpp | 2177 ++++--- .../MueLu_TentativePFactory_kokkos_decl.hpp | 217 +- .../MueLu_TentativePFactory_kokkos_def.hpp | 2265 ++++--- .../User/MueLu_UserPFactory_decl.hpp | 51 +- .../Transfers/User/MueLu_UserPFactory_def.hpp | 167 +- ...u_AdaptiveSaMLParameterListInterpreter.cpp | 8 +- .../ETI_MueLu_FacadeBGS2x2.cpp | 7 +- .../ETI_MueLu_FacadeClassBase.cpp | 8 +- .../ETI_MueLu_FacadeClassFactory.cpp | 8 +- .../ETI_MueLu_FacadeSimple.cpp | 7 +- .../ETI_MueLu_FactoryFactory.cpp | 8 +- .../ETI_MueLu_MLParameterListInterpreter.cpp | 8 +- .../ETI_MueLu_ParameterListInterpreter.cpp | 8 +- .../MueLu_AMGXOperator_fwd.hpp | 4 +- ...aptiveSaMLParameterListInterpreter_fwd.hpp | 9 +- ...Lu_AggregateQualityEstimateFactory_fwd.hpp | 9 +- .../MueLu_Aggregates_fwd.hpp | 8 +- .../MueLu_AggregationExportFactory_fwd.hpp | 9 +- .../MueLu_AggregationPhase1Algorithm_fwd.hpp | 9 +- ..._AggregationPhase1Algorithm_kokkos_fwd.hpp | 9 +- .../MueLu_AggregationPhase2aAlgorithm_fwd.hpp | 9 +- ...AggregationPhase2aAlgorithm_kokkos_fwd.hpp | 9 +- .../MueLu_AggregationPhase2bAlgorithm_fwd.hpp | 9 +- ...AggregationPhase2bAlgorithm_kokkos_fwd.hpp | 9 +- .../MueLu_AggregationPhase3Algorithm_fwd.hpp | 9 +- ..._AggregationPhase3Algorithm_kokkos_fwd.hpp | 9 +- ...eLu_AggregationStructuredAlgorithm_fwd.hpp | 9 +- ...regationStructuredAlgorithm_kokkos_fwd.hpp | 9 +- ...MueLu_AlgebraicPermutationStrategy_fwd.hpp | 9 +- .../MueLu_AmalgamationFactory_fwd.hpp | 9 +- .../MueLu_AmalgamationInfo_fwd.hpp | 9 +- .../MueLu_Amesos2Smoother_fwd.hpp | 4 +- .../MueLu_AmesosSmoother_fwd.hpp | 10 +- .../MueLu_BelosSmoother_fwd.hpp | 4 +- .../MueLu_BlackBoxPFactory_fwd.hpp | 9 +- .../MueLu_BlockedCoarseMapFactory_fwd.hpp | 9 +- ..._BlockedCoordinatesTransferFactory_fwd.hpp | 9 +- .../MueLu_BlockedDirectSolver_fwd.hpp | 9 +- .../MueLu_BlockedGaussSeidelSmoother_fwd.hpp | 9 +- .../MueLu_BlockedJacobiSmoother_fwd.hpp | 9 +- .../MueLu_BlockedPFactory_fwd.hpp | 9 +- .../MueLu_BlockedRAPFactory_fwd.hpp | 9 +- .../MueLu_BraessSarazinSmoother_fwd.hpp | 9 +- .../MueLu_BrickAggregationFactory_fwd.hpp | 9 +- .../ForwardDeclaration/MueLu_CGSolver_fwd.hpp | 9 +- .../MueLu_ClassicalMapFactory_fwd.hpp | 9 +- .../MueLu_ClassicalPFactory_fwd.hpp | 9 +- .../MueLu_CloneRepartitionInterface_fwd.hpp | 9 +- .../MueLu_CoalesceDropFactory_fwd.hpp | 9 +- .../MueLu_CoalesceDropFactory_kokkos_fwd.hpp | 9 +- .../MueLu_CoarseMapFactory_fwd.hpp | 9 +- ...eLu_CoarseningVisualizationFactory_fwd.hpp | 9 +- .../MueLu_CombinePFactory_fwd.hpp | 9 +- .../MueLu_ConstraintFactory_fwd.hpp | 9 +- .../MueLu_Constraint_fwd.hpp | 9 +- .../MueLu_CoordinatesTransferFactory_fwd.hpp | 9 +- .../MueLu_CoupledRBMFactory_fwd.hpp | 9 +- .../MueLu_DemoFactory_fwd.hpp | 9 +- .../MueLu_DirectSolver_fwd.hpp | 9 +- .../MueLu_DropNegativeEntriesFactory_fwd.hpp | 9 +- .../MueLu_EminPFactory_fwd.hpp | 9 +- .../MueLu_FacadeClassFactory_fwd.hpp | 9 +- .../MueLu_FacadeSimple_fwd.hpp | 7 +- .../MueLu_FactoryBase_fwd.hpp | 7 +- .../MueLu_FactoryFactory_fwd.hpp | 9 +- .../MueLu_FactoryManagerBase_fwd.hpp | 7 +- .../MueLu_FactoryManager_fwd.hpp | 9 +- .../ForwardDeclaration/MueLu_Factory_fwd.hpp | 7 +- .../MueLu_FakeSmootherPrototype_fwd.hpp | 9 +- .../MueLu_FilteredAFactory_fwd.hpp | 9 +- .../MueLu_FineLevelInputDataFactory_fwd.hpp | 9 +- .../MueLu_GMRESSolver_fwd.hpp | 9 +- .../MueLu_GeneralGeometricPFactory_fwd.hpp | 9 +- .../MueLu_GenericRFactory_fwd.hpp | 9 +- ...eLu_GeometricInterpolationPFactory_fwd.hpp | 9 +- ...metricInterpolationPFactory_kokkos_fwd.hpp | 9 +- ...Lu_GlobalLexicographicIndexManager_fwd.hpp | 9 +- .../MueLu_GraphBase_fwd.hpp | 8 +- .../ForwardDeclaration/MueLu_Graph_fwd.hpp | 8 +- .../MueLu_HierarchyFactory_fwd.hpp | 9 +- .../MueLu_HierarchyHelpers_fwd.hpp | 10 +- .../MueLu_HierarchyManager_fwd.hpp | 9 +- .../MueLu_HierarchyUtils_fwd.hpp | 9 +- .../MueLu_Hierarchy_fwd.hpp | 9 +- .../MueLu_HybridAggregationFactory_fwd.hpp | 9 +- .../MueLu_Ifpack2Smoother_fwd.hpp | 4 +- .../MueLu_IfpackSmoother_fwd.hpp | 8 +- ...MueLu_IndefBlockedDiagonalSmoother_fwd.hpp | 9 +- .../MueLu_IndexManager_fwd.hpp | 9 +- .../MueLu_IndexManager_kokkos_fwd.hpp | 9 +- .../MueLu_InitialBlockNumberFactory_fwd.hpp | 9 +- ...ueLu_InterfaceAggregationAlgorithm_fwd.hpp | 9 +- .../MueLu_InterfaceAggregationFactory_fwd.hpp | 9 +- ...Lu_InterfaceMappingTransferFactory_fwd.hpp | 9 +- .../MueLu_IntrepidPCoarsenFactory_fwd.hpp | 4 +- .../MueLu_InverseApproximationFactory_fwd.hpp | 9 +- ...u_IsolatedNodeAggregationAlgorithm_fwd.hpp | 9 +- ...tedNodeAggregationAlgorithm_kokkos_fwd.hpp | 9 +- .../MueLu_IsorropiaInterface_fwd.hpp | 4 +- .../ForwardDeclaration/MueLu_LWGraph_fwd.hpp | 8 +- .../MueLu_LWGraph_kokkos_fwd.hpp | 9 +- ...MueLu_LeftoverAggregationAlgorithm_fwd.hpp | 9 +- .../ForwardDeclaration/MueLu_Level_fwd.hpp | 7 +- .../MueLu_LineDetectionFactory_fwd.hpp | 9 +- .../MueLu_LocalAggregationAlgorithm_fwd.hpp | 9 +- .../MueLu_LocalLWGraph_kokkos_fwd.hpp | 9 +- ...eLu_LocalLexicographicIndexManager_fwd.hpp | 9 +- .../MueLu_LocalOrdinalTransferFactory_fwd.hpp | 9 +- .../MueLu_LocalPermutationStrategy_fwd.hpp | 9 +- .../MueLu_LowPrecisionFactory_fwd.hpp | 9 +- .../MueLu_MLParameterListInterpreter_fwd.hpp | 9 +- .../MueLu_MapTransferFactory_fwd.hpp | 9 +- .../MueLu_MatlabSmoother_fwd.hpp | 4 +- .../MueLu_MatrixAnalysisFactory_fwd.hpp | 9 +- .../MueLu_MatrixFreeTentativePFactory_fwd.hpp | 9 +- .../MueLu_MatrixFreeTentativeP_fwd.hpp | 9 +- .../ForwardDeclaration/MueLu_Maxwell1_fwd.hpp | 9 +- .../MueLu_Maxwell_Utils_fwd.hpp | 9 +- .../MueLu_MergedBlockedMatrixFactory_fwd.hpp | 9 +- .../MueLu_MergedSmoother_fwd.hpp | 9 +- .../MueLu_MultiVectorTransferFactory_fwd.hpp | 9 +- .../MueLu_NodePartitionInterface_fwd.hpp | 4 +- .../MueLu_NotayAggregationFactory_fwd.hpp | 9 +- .../MueLu_NullspaceFactory_fwd.hpp | 9 +- .../MueLu_NullspaceFactory_kokkos_fwd.hpp | 9 +- .../MueLu_NullspacePresmoothFactory_fwd.hpp | 9 +- .../MueLu_OnePtAggregationAlgorithm_fwd.hpp | 9 +- ...u_OnePtAggregationAlgorithm_kokkos_fwd.hpp | 9 +- .../ForwardDeclaration/MueLu_PFactory_fwd.hpp | 7 +- .../MueLu_PRFactory_fwd.hpp | 8 +- .../MueLu_ParameterListInterpreter_fwd.hpp | 9 +- .../MueLu_PatternFactory_fwd.hpp | 9 +- .../MueLu_PerfModels_fwd.hpp | 9 +- .../MueLu_PerfUtils_fwd.hpp | 9 +- .../MueLu_PermutationFactory_fwd.hpp | 9 +- .../MueLu_PermutingSmoother_fwd.hpp | 9 +- .../MueLu_PgPFactory_fwd.hpp | 9 +- .../MueLu_PreDropFunctionBaseClass_fwd.hpp | 9 +- .../MueLu_PreDropFunctionConstVal_fwd.hpp | 9 +- ...serveDirichletAggregationAlgorithm_fwd.hpp | 9 +- ...richletAggregationAlgorithm_kokkos_fwd.hpp | 9 +- .../MueLu_ProjectorSmoother_fwd.hpp | 9 +- .../MueLu_RAPFactory_fwd.hpp | 9 +- .../MueLu_RAPShiftFactory_fwd.hpp | 9 +- .../ForwardDeclaration/MueLu_RFactory_fwd.hpp | 7 +- .../MueLu_RebalanceAcFactory_fwd.hpp | 9 +- .../MueLu_RebalanceBlockAcFactory_fwd.hpp | 9 +- ...RebalanceBlockInterpolationFactory_fwd.hpp | 9 +- ...u_RebalanceBlockRestrictionFactory_fwd.hpp | 9 +- .../MueLu_RebalanceMapFactory_fwd.hpp | 9 +- .../MueLu_RebalanceTransferFactory_fwd.hpp | 9 +- .../MueLu_RefMaxwellSmoother_fwd.hpp | 9 +- .../MueLu_RefMaxwell_fwd.hpp | 9 +- .../MueLu_RegionRFactory_fwd.hpp | 9 +- .../MueLu_RegionRFactory_kokkos_fwd.hpp | 9 +- .../MueLu_ReitzingerPFactory_fwd.hpp | 9 +- .../MueLu_ReorderBlockAFactory_fwd.hpp | 9 +- ...Lu_RepartitionBlockDiagonalFactory_fwd.hpp | 4 +- .../MueLu_RepartitionFactory_fwd.hpp | 4 +- .../MueLu_RepartitionHeuristicFactory_fwd.hpp | 4 +- .../MueLu_RepartitionInterface_fwd.hpp | 9 +- .../MueLu_ReplicatePFactory_fwd.hpp | 9 +- .../MueLu_RfromP_Or_TransP_fwd.hpp | 9 +- .../MueLu_RigidBodyModeFactory_fwd.hpp | 9 +- .../MueLu_SaPFactory_fwd.hpp | 9 +- .../MueLu_SaPFactory_kokkos_fwd.hpp | 9 +- .../MueLu_ScaledNullspaceFactory_fwd.hpp | 9 +- .../MueLu_SchurComplementFactory_fwd.hpp | 9 +- .../MueLu_SegregatedAFactory_fwd.hpp | 9 +- .../MueLu_SemiCoarsenPFactory_fwd.hpp | 9 +- .../MueLu_SemiCoarsenPFactory_kokkos_fwd.hpp | 9 +- .../MueLu_ShiftedLaplacianOperator_fwd.hpp | 4 +- .../MueLu_ShiftedLaplacian_fwd.hpp | 7 +- .../MueLu_SimpleSmoother_fwd.hpp | 9 +- .../MueLu_SingleLevelFactoryBase_fwd.hpp | 7 +- .../MueLu_SingleLevelMatlabFactory_fwd.hpp | 4 +- .../MueLu_SmooVecCoalesceDropFactory_fwd.hpp | 9 +- .../MueLu_SmootherBase_fwd.hpp | 9 +- .../MueLu_SmootherFactoryBase_fwd.hpp | 7 +- .../MueLu_SmootherFactory_fwd.hpp | 9 +- .../MueLu_SmootherPrototype_fwd.hpp | 9 +- .../ForwardDeclaration/MueLu_Smoother_fwd.hpp | 9 +- .../MueLu_SolverBase_fwd.hpp | 9 +- .../MueLu_SteepestDescentSolver_fwd.hpp | 9 +- .../MueLu_StratimikosSmoother_fwd.hpp | 4 +- ...MueLu_StructuredAggregationFactory_fwd.hpp | 9 +- ...tructuredAggregationFactory_kokkos_fwd.hpp | 9 +- ...eLu_StructuredLineDetectionFactory_fwd.hpp | 9 +- .../MueLu_SubBlockAFactory_fwd.hpp | 9 +- .../MueLu_TekoSmoother_fwd.hpp | 4 +- .../MueLu_TentativePFactory_fwd.hpp | 9 +- .../MueLu_TentativePFactory_kokkos_fwd.hpp | 9 +- .../MueLu_ThresholdAFilterFactory_fwd.hpp | 9 +- ...u_ToggleCoordinatesTransferFactory_fwd.hpp | 9 +- .../MueLu_TogglePFactory_fwd.hpp | 9 +- .../MueLu_TopRAPFactory_fwd.hpp | 9 +- .../MueLu_TopSmootherFactory_fwd.hpp | 9 +- .../MueLu_TpetraOperator_fwd.hpp | 9 +- .../MueLu_TransPFactory_fwd.hpp | 9 +- .../MueLu_TrilinosSmoother_fwd.hpp | 9 +- .../MueLu_TwoLevelFactoryBase_fwd.hpp | 7 +- .../MueLu_TwoLevelMatlabFactory_fwd.hpp | 4 +- .../MueLu_UncoupledAggregationFactory_fwd.hpp | 9 +- ...UncoupledAggregationFactory_kokkos_fwd.hpp | 9 +- .../MueLu_UncoupledIndexManager_fwd.hpp | 9 +- .../MueLu_UnsmooshFactory_fwd.hpp | 9 +- .../MueLu_UserAggregationFactory_fwd.hpp | 9 +- .../MueLu_UserPFactory_fwd.hpp | 9 +- .../MueLu_UtilitiesBase_fwd.hpp | 9 +- .../MueLu_Utilities_fwd.hpp | 9 +- .../MueLu_UzawaSmoother_fwd.hpp | 9 +- .../MueLu_VariableContainer_fwd.hpp | 7 +- .../MueLu_VariableDofLaplacianFactory_fwd.hpp | 9 +- .../MueLu_VisualizationHelpers_fwd.hpp | 9 +- .../MueLu_XpetraOperator_fwd.hpp | 9 +- .../MueLu_ZeroSubBlockAFactory_fwd.hpp | 9 +- .../MueLu_Zoltan2Interface_fwd.hpp | 4 +- .../MueLu_ZoltanInterface_fwd.hpp | 4 +- packages/muelu/src/Utils/ML_Linker.hpp | 135 +- .../MueLu_AggregationExportFactory_decl.hpp | 253 +- .../MueLu_AggregationExportFactory_def.hpp | 1525 ++--- ...ueLu_AlgebraicPermutationStrategy_decl.hpp | 118 +- ...MueLu_AlgebraicPermutationStrategy_def.hpp | 1376 ++-- .../muelu/src/Utils/MueLu_AvatarInterface.cpp | 438 +- .../muelu/src/Utils/MueLu_AvatarInterface.hpp | 125 +- .../muelu/src/Utils/MueLu_BoostGraphviz.hpp | 23 +- ...Lu_CoarseningVisualizationFactory_decl.hpp | 121 +- ...eLu_CoarseningVisualizationFactory_def.hpp | 617 +- .../Utils/MueLu_CoupledRBMFactory_decl.hpp | 109 +- .../src/Utils/MueLu_CoupledRBMFactory_def.hpp | 303 +- .../Utils/MueLu_DisableMultipleCallCheck.hpp | 23 +- packages/muelu/src/Utils/MueLu_ETI_3arg.hpp | 40 +- packages/muelu/src/Utils/MueLu_ETI_4arg.hpp | 40 +- .../muelu/src/Utils/MueLu_ETI_4arg_Xpetra.hpp | 43 +- packages/muelu/src/Utils/MueLu_Exceptions.cpp | 29 +- packages/muelu/src/Utils/MueLu_Exceptions.hpp | 88 +- .../MueLu_LocalPermutationStrategy_decl.hpp | 95 +- .../MueLu_LocalPermutationStrategy_def.hpp | 627 +- .../MueLu_MatrixAnalysisFactory_decl.hpp | 110 +- .../Utils/MueLu_MatrixAnalysisFactory_def.hpp | 270 +- packages/muelu/src/Utils/MueLu_Memory.cpp | 137 +- packages/muelu/src/Utils/MueLu_Memory.hpp | 23 +- packages/muelu/src/Utils/MueLu_Monitor.cpp | 170 +- packages/muelu/src/Utils/MueLu_Monitor.hpp | 490 +- .../src/Utils/MueLu_MutuallyExclusiveTime.cpp | 302 +- .../src/Utils/MueLu_MutuallyExclusiveTime.hpp | 191 +- .../muelu/src/Utils/MueLu_PerfModels_decl.hpp | 224 +- .../muelu/src/Utils/MueLu_PerfModels_def.hpp | 1127 ++-- .../muelu/src/Utils/MueLu_PerfUtils_decl.hpp | 41 +- .../muelu/src/Utils/MueLu_PerfUtils_def.hpp | 763 ++- .../Utils/MueLu_PermutationFactory_decl.hpp | 75 +- .../Utils/MueLu_PermutationFactory_def.hpp | 100 +- .../Utils/MueLu_RigidBodyModeFactory_decl.hpp | 94 +- .../Utils/MueLu_RigidBodyModeFactory_def.hpp | 194 +- .../muelu/src/Utils/MueLu_TimeMonitor.cpp | 139 +- .../muelu/src/Utils/MueLu_TimeMonitor.hpp | 123 +- packages/muelu/src/Utils/MueLu_Utilities.cpp | 656 +- .../src/Utils/MueLu_UtilitiesBase_decl.hpp | 849 +-- .../src/Utils/MueLu_UtilitiesBase_def.hpp | 3670 ++++++----- .../muelu/src/Utils/MueLu_Utilities_decl.hpp | 2353 ++++--- .../muelu/src/Utils/MueLu_Utilities_def.hpp | 1397 ++-- .../Utils/MueLu_VisualizationHelpers_decl.hpp | 291 +- .../Utils/MueLu_VisualizationHelpers_def.hpp | 2373 +++---- 649 files changed, 95439 insertions(+), 79096 deletions(-) diff --git a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp index 6b38a9f96750..c618fa5cb0ae 100644 --- a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_decl.hpp @@ -48,127 +48,130 @@ #include "MueLu_ConfigDefs.hpp" -#include #include -#include +#include #include +#include #include -#include #include +#include -#include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_BrickAggregationFactory_fwd.hpp" +#include "MueLu_SingleLevelFactoryBase.hpp" +#include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_Exceptions.hpp" #include "MueLu_GraphBase_fwd.hpp" #include "MueLu_Graph_fwd.hpp" #include "MueLu_LWGraph_fwd.hpp" #include "MueLu_Level_fwd.hpp" -#include "MueLu_Aggregates_fwd.hpp" -#include "MueLu_Exceptions.hpp" #include "MueLu_Utilities_fwd.hpp" /*! @class BrickAggregationFactory - @brief Aggregation method for generating "brick" aggregates. It also does "hotdogs" and "pancakes." - - This factory can generate aggregates of size 1, 2 or 3 in each dimension, in any combination. + @brief Aggregation method for generating "brick" aggregates. It also does + "hotdogs" and "pancakes." + + This factory can generate aggregates of size 1, 2 or 3 in each dimension, in + any combination. */ namespace MueLu { - template - class BrickAggregationFactory : public SingleLevelFactoryBase { +template +class BrickAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_BRICKAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - private: - typedef Teuchos::ScalarTraits STS; - - // Comparator for doubles - // Generally, the coordinates for coarser levels would come out of averaging of fine level coordinates - // It is possible that the result of the averaging differs slightly between clusters, as we might have - // 3x2 and 2x2 cluster which would result in averaging 6 and 4 y-coordinates respectively, leading to - // slightly different results. - // Therefore, we hardcode a constant so that close points are considered the same. - class compare { - public: - bool operator()(const Scalar& x, const Scalar& y) const { - if (STS::magnitude(x - y) < 1e-14) - return false; - return STS::real(x) < STS::real(y); - } - }; - typedef std::map container; - +private: + typedef Teuchos::ScalarTraits STS; + + // Comparator for doubles + // Generally, the coordinates for coarser levels would come out of averaging + // of fine level coordinates It is possible that the result of the averaging + // differs slightly between clusters, as we might have 3x2 and 2x2 cluster + // which would result in averaging 6 and 4 y-coordinates respectively, leading + // to slightly different results. Therefore, we hardcode a constant so that + // close points are considered the same. + class compare { public: - //! @name Constructors/Destructors. - //@{ + bool operator()(const Scalar &x, const Scalar &y) const { + if (STS::magnitude(x - y) < 1e-14) + return false; + return STS::real(x) < STS::real(y); + } + }; + typedef std::map container; - //! Constructor. - BrickAggregationFactory() : nDim_(-1), nx_(-1), ny_(-1), nz_(-1), bx_(-1), by_(-1), bz_(-1) { }; +public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~BrickAggregationFactory() { } + //! Constructor. + BrickAggregationFactory() + : nDim_(-1), nx_(-1), ny_(-1), nz_(-1), bx_(-1), by_(-1), bz_(-1){}; - RCP GetValidParameterList() const; + //! Destructor. + virtual ~BrickAggregationFactory() {} - //@} + RCP GetValidParameterList() const; - // Options shared by all aggregation algorithms + //@} - //! Input - //@{ + // Options shared by all aggregation algorithms - void DeclareInput(Level ¤tLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level ¤tLevel) const; - //! @name Build methods. - //@{ + //@} - /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + //! @name Build methods. + //@{ - //@} + /*! @brief Build aggregates. */ + void Build(Level ¤tLevel) const; - private: - void Setup(const RCP >& comm, const RCP::magnitudeType,LO,GO,NO> >& coords, const RCP& map) const; - RCP Construct1DMap(const RCP >& comm, const ArrayRCP::magnitudeType>& x) const; + //@} - void BuildGraph(Level& currentLevel, const RCP& A) const; +private: + void + Setup(const RCP> &comm, + const RCP::magnitudeType, LO, GO, NO>> + &coords, + const RCP &map) const; + RCP + Construct1DMap(const RCP> &comm, + const ArrayRCP::magnitudeType> &x) const; + void BuildGraph(Level ¤tLevel, const RCP &A) const; - bool isDirichlet(LocalOrdinal LID) const; - bool isRoot (LocalOrdinal LID) const; - GlobalOrdinal getRoot (LocalOrdinal LID) const; - GlobalOrdinal getAggGID(LocalOrdinal LID) const; + bool isDirichlet(LocalOrdinal LID) const; + bool isRoot(LocalOrdinal LID) const; + GlobalOrdinal getRoot(LocalOrdinal LID) const; + GlobalOrdinal getAggGID(LocalOrdinal LID) const; - void getIJK(LocalOrdinal LID, int &i, int &j, int &k) const; - void getAggIJK(LocalOrdinal LID, int &i, int &j, int &k) const; + void getIJK(LocalOrdinal LID, int &i, int &j, int &k) const; + void getAggIJK(LocalOrdinal LID, int &i, int &j, int &k) const; - mutable - int nDim_; - mutable - RCP xMap_, yMap_, zMap_; - mutable - ArrayRCP::magnitudeType> x_, y_, z_; - mutable - int nx_, ny_, nz_; - mutable - int bx_, by_, bz_; - mutable - bool dirichletX_,dirichletY_,dirichletZ_; - mutable - int naggx_, naggy_, naggz_; + mutable int nDim_; + mutable RCP xMap_, yMap_, zMap_; + mutable ArrayRCP::magnitudeType> + x_, y_, z_; + mutable int nx_, ny_, nz_; + mutable int bx_, by_, bz_; + mutable bool dirichletX_, dirichletY_, dirichletZ_; + mutable int naggx_, naggy_, naggz_; - mutable - std::map revMap_; - }; // class BrickAggregationFactory + mutable std::map revMap_; +}; // class BrickAggregationFactory -} +} // namespace MueLu #define MUELU_BRICKAGGREGATIONFACTORY_SHORT #endif /* MUELU_BRICKAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp index aa6863becd89..06f65af9638d 100644 --- a/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/BrickAggregation/MueLu_BrickAggregationFactory_def.hpp @@ -48,8 +48,8 @@ #include "MueLu_BrickAggregationFactory_decl.hpp" #ifdef HAVE_MPI -#include #include +#include #endif #include @@ -62,526 +62,592 @@ #include #include "MueLu_Aggregates.hpp" +#include "MueLu_Graph.hpp" +#include "MueLu_GraphBase.hpp" +#include "MueLu_LWGraph.hpp" #include "MueLu_Level.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" #include "MueLu_Utilities.hpp" -#include "MueLu_GraphBase.hpp" -#include "MueLu_Graph.hpp" -#include "MueLu_LWGraph.hpp" - namespace MueLu { - template - RCP BrickAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: brick x size"); - SET_VALID_ENTRY("aggregation: brick y size"); - SET_VALID_ENTRY("aggregation: brick z size"); - SET_VALID_ENTRY("aggregation: brick x Dirichlet"); - SET_VALID_ENTRY("aggregation: brick y Dirichlet"); - SET_VALID_ENTRY("aggregation: brick z Dirichlet"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory for matrix"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for coordinates"); - return validParamList; +template +RCP +BrickAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: brick x size"); + SET_VALID_ENTRY("aggregation: brick y size"); + SET_VALID_ENTRY("aggregation: brick z size"); + SET_VALID_ENTRY("aggregation: brick x Dirichlet"); + SET_VALID_ENTRY("aggregation: brick y Dirichlet"); + SET_VALID_ENTRY("aggregation: brick z Dirichlet"); +#undef SET_VALID_ENTRY + + validParamList->set>("A", Teuchos::null, + "Generating factory for matrix"); + validParamList->set>( + "Coordinates", Teuchos::null, "Generating factory for coordinates"); + return validParamList; +} + +template +void BrickAggregationFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "Coordinates"); +} + +// The current implementation cannot deal with bricks larger than 3x3(x3) in +// parallel. The reason is that aggregation infrastructure in place has +// major drawbacks. +// +// Aggregates class is constructed with a help of a provided map, either +// taken from a graph, or provided directly. This map is usually taken to be +// a column map of a matrix. The reason for that is that if we have an +// overlapped aggregation, we want the processor owning aggregates to store +// agg id for all nodes in this aggregate. If we used row map, there would +// be no way for the processor to know whether there are some other nodes on +// a different processor which belong to its aggregate. On the other hand, +// using column map allows both vertex2AggId and procWinner arrays in +// Aggregates class to store some extra data, such as whether nodes belonging +// to a different processor belong to this processor aggregate. +// +// The drawback of this is that it stores only overlap=1 data. For aggressive +// coarsening, such a brick aggregation with a large single dimension of +// brick, it could happen that we need to know depth two or more extra nodes +// in the other processor subdomain. +// +// Another issue is that we may have some implicit connection between +// aggregate map and maps of A used in the construction of a tentative +// prolongator. +// +// Another issue is that it seems that some info is unused or not required. +// Specifically, it seems that if a node belongs to an aggregate on a +// different processor, we don't actually need to set vertex2AggId and +// procWinner, despite the following comment in +// Aggregates decl: +// vertex2AggId[k] gives a local id +// corresponding to the aggregate to which +// local id k has been assigned. While k +// is the local id on my processor (MyPID) +// vertex2AggId[k] is the local id on the +// processor which actually owns the +// aggregate. This owning processor has id +// given by procWinner[k]. +// It is possible that that info is only used during arbitration in +// CoupledAggregationFactory. +// +// The steps that we need to do to resolve this issue: +// - Break the link between maps in TentativePFactory, allowing any maps in +// Aggregates +// - Allow Aggregates to construct their own maps, if necessary, OR +// - construct aggregates based on row map +template +void BrickAggregationFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + typedef Xpetra::MultiVector< + typename Teuchos::ScalarTraits::magnitudeType, LO, GO, NO> + MultiVector_d; + + const ParameterList &pL = GetParameterList(); + RCP coords = + Get>(currentLevel, "Coordinates"); + RCP A = Get>(currentLevel, "A"); + RCP rowMap = A->getRowMap(); + RCP colMap = A->getColMap(); + GO GO_INVALID = Teuchos::OrdinalTraits::invalid(); + + RCP> comm = rowMap->getComm(); + int numProcs = comm->getSize(); + int myRank = comm->getRank(); + + int numPoints = colMap->getLocalNumElements(); + + bx_ = pL.get("aggregation: brick x size"); + by_ = pL.get("aggregation: brick y size"); + bz_ = pL.get("aggregation: brick z size"); + + dirichletX_ = pL.get("aggregation: brick x Dirichlet"); + dirichletY_ = pL.get("aggregation: brick y Dirichlet"); + dirichletZ_ = pL.get("aggregation: brick z Dirichlet"); + if (dirichletX_) + GetOStream(Runtime0) << "Dirichlet boundaries in the x direction" + << std::endl; + if (dirichletY_) + GetOStream(Runtime0) << "Dirichlet boundaries in the y direction" + << std::endl; + if (dirichletZ_) + GetOStream(Runtime0) << "Dirichlet boundaries in the z direction" + << std::endl; + + if (numProcs > 1) { + // TODO: deal with block size > 1 (see comments above) + // TEUCHOS_TEST_FOR_EXCEPTION(bx_ > 3 || by_ > 3 || bz_ > 3, + // Exceptions::RuntimeError, "Currently cannot deal with brick size > 3"); } - template - void BrickAggregationFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "Coordinates"); + RCP overlappedCoords = coords; + RCP importer = ImportFactory::Build(coords->getMap(), colMap); + if (!importer.is_null()) { + overlappedCoords = Xpetra::MultiVectorFactory< + typename Teuchos::ScalarTraits::magnitudeType, LO, GO, + NO>::Build(colMap, coords->getNumVectors()); + overlappedCoords->doImport(*coords, *importer, Xpetra::INSERT); } - // The current implementation cannot deal with bricks larger than 3x3(x3) in - // parallel. The reason is that aggregation infrastructure in place has - // major drawbacks. - // - // Aggregates class is constructed with a help of a provided map, either - // taken from a graph, or provided directly. This map is usually taken to be - // a column map of a matrix. The reason for that is that if we have an - // overlapped aggregation, we want the processor owning aggregates to store - // agg id for all nodes in this aggregate. If we used row map, there would - // be no way for the processor to know whether there are some other nodes on - // a different processor which belong to its aggregate. On the other hand, - // using column map allows both vertex2AggId and procWinner arrays in - // Aggregates class to store some extra data, such as whether nodes belonging - // to a different processor belong to this processor aggregate. - // - // The drawback of this is that it stores only overlap=1 data. For aggressive - // coarsening, such a brick aggregation with a large single dimension of - // brick, it could happen that we need to know depth two or more extra nodes - // in the other processor subdomain. - // - // Another issue is that we may have some implicit connection between - // aggregate map and maps of A used in the construction of a tentative - // prolongator. - // - // Another issue is that it seems that some info is unused or not required. - // Specifically, it seems that if a node belongs to an aggregate on a - // different processor, we don't actually need to set vertex2AggId and - // procWinner, despite the following comment in - // Aggregates decl: - // vertex2AggId[k] gives a local id - // corresponding to the aggregate to which - // local id k has been assigned. While k - // is the local id on my processor (MyPID) - // vertex2AggId[k] is the local id on the - // processor which actually owns the - // aggregate. This owning processor has id - // given by procWinner[k]. - // It is possible that that info is only used during arbitration in - // CoupledAggregationFactory. - // - // The steps that we need to do to resolve this issue: - // - Break the link between maps in TentativePFactory, allowing any maps in Aggregates - // - Allow Aggregates to construct their own maps, if necessary, OR - // - construct aggregates based on row map - template - void BrickAggregationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - typedef Xpetra::MultiVector::magnitudeType,LO,GO,NO> MultiVector_d; - - const ParameterList& pL = GetParameterList(); - RCP coords = Get >(currentLevel, "Coordinates"); - RCP A = Get< RCP > (currentLevel, "A"); - RCP rowMap = A->getRowMap(); - RCP colMap = A->getColMap(); - GO GO_INVALID = Teuchos::OrdinalTraits::invalid(); - - RCP > comm = rowMap->getComm(); - int numProcs = comm->getSize(); - int myRank = comm->getRank(); - - int numPoints = colMap->getLocalNumElements(); - - bx_ = pL.get("aggregation: brick x size"); - by_ = pL.get("aggregation: brick y size"); - bz_ = pL.get("aggregation: brick z size"); - - dirichletX_ = pL.get("aggregation: brick x Dirichlet"); - dirichletY_ = pL.get("aggregation: brick y Dirichlet"); - dirichletZ_ = pL.get("aggregation: brick z Dirichlet"); - if(dirichletX_) GetOStream(Runtime0) << "Dirichlet boundaries in the x direction"< 1) { - // TODO: deal with block size > 1 (see comments above) - //TEUCHOS_TEST_FOR_EXCEPTION(bx_ > 3 || by_ > 3 || bz_ > 3, Exceptions::RuntimeError, "Currently cannot deal with brick size > 3"); + // Setup misc structures + // Logically, we construct enough data to query topological information of a + // rectangular grid + Setup(comm, overlappedCoords, colMap); + + GetOStream(Runtime0) << "Using brick size: " << bx_ + << (nDim_ > 1 ? "x " + toString(by_) : "") + << (nDim_ > 2 ? "x " + toString(bz_) : "") << std::endl; + + // Build the graph + BuildGraph(currentLevel, A); + + // Construct aggregates + RCP aggregates = rcp(new Aggregates(colMap)); + aggregates->setObjectLabel("Brick"); + + ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates->GetProcWinner()->getDataNonConst(0); + + // In the first pass, we set a mapping from a vertex to aggregate global id. + // We deal with a structured rectangular mesh, therefore we know the structure + // of aggregates. For each vertex we can tell exactly which aggregate it + // belongs to. If we determine that the aggregate does not belong to us (i.e. + // the root vertex does not belong to this processor, or is outside and we + // lost "" arbitration), we record the global aggregate id in order to fetch + // the local info from the processor owning the aggregate. This is required + // for aggregates, as it uses the local aggregate ids of the owning processor. + std::set myAggGIDs, remoteAggGIDs; + for (LO LID = 0; LID < numPoints; LID++) { + GO aggGID = getAggGID(LID); + // printf("[%d] (%d,%d,%d) => agg + // %d\n",LID,(int)(*xMap_)[x_[LID]],nDim_ > 1 ? (int)(*yMap_)[y_[LID]] + // : -1,nDim_ > 2 ? (int)(*zMap_)[z_[LID]] : -1,(int)aggGID); + if (aggGID == GO_INVALID) + continue; + // printf("[%d] getRoot = %d\n",(int)LID,(int)getRoot(LID)); + + if ((revMap_.find(getRoot(LID)) != revMap_.end()) && + rowMap->isNodeGlobalElement( + colMap->getGlobalElement(revMap_[getRoot(LID)]))) { + // Root of the brick aggregate containing GID (<- LID) belongs to us + vertex2AggId[LID] = aggGID; + myAggGIDs.insert(aggGID); + + if (isRoot(LID)) + aggregates->SetIsRoot(LID); + // printf("[%d] initial vertex2AggId = + //%d\n",(int)LID,(int)vertex2AggId[LID]); + } else { + remoteAggGIDs.insert(aggGID); } - - RCP overlappedCoords = coords; - RCP importer = ImportFactory::Build(coords->getMap(), colMap); - if (!importer.is_null()) { - overlappedCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(colMap, coords->getNumVectors()); - overlappedCoords->doImport(*coords, *importer, Xpetra::INSERT); - } - - // Setup misc structures - // Logically, we construct enough data to query topological information of a rectangular grid - Setup(comm, overlappedCoords, colMap); - - GetOStream(Runtime0) << "Using brick size: " << bx_ - << (nDim_ > 1 ? "x " + toString(by_) : "") - << (nDim_ > 2 ? "x " + toString(bz_) : "") << std::endl; - - // Build the graph - BuildGraph(currentLevel,A); - - // Construct aggregates - RCP aggregates = rcp(new Aggregates(colMap)); - aggregates->setObjectLabel("Brick"); - - ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); - - // In the first pass, we set a mapping from a vertex to aggregate global id. We deal with a structured - // rectangular mesh, therefore we know the structure of aggregates. For each vertex we can tell exactly - // which aggregate it belongs to. - // If we determine that the aggregate does not belong to us (i.e. the root vertex does not belong to this - // processor, or is outside and we lost "" arbitration), we record the global aggregate id in order to - // fetch the local info from the processor owning the aggregate. This is required for aggregates, as it - // uses the local aggregate ids of the owning processor. - std::set myAggGIDs, remoteAggGIDs; - for (LO LID = 0; LID < numPoints; LID++) { - GO aggGID = getAggGID(LID); - // printf("[%d] (%d,%d,%d) => agg %d\n",LID,(int)(*xMap_)[x_[LID]],nDim_ > 1 ? (int)(*yMap_)[y_[LID]] : -1,nDim_ > 2 ? (int)(*zMap_)[z_[LID]] : -1,(int)aggGID); - if(aggGID == GO_INVALID) continue; - // printf("[%d] getRoot = %d\n",(int)LID,(int)getRoot(LID)); - - if ((revMap_.find(getRoot(LID)) != revMap_.end()) && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) { - // Root of the brick aggregate containing GID (<- LID) belongs to us - vertex2AggId[LID] = aggGID; - myAggGIDs.insert(aggGID); - - if (isRoot(LID)) - aggregates->SetIsRoot(LID); - // printf("[%d] initial vertex2AggId = %d\n",(int)LID,(int)vertex2AggId[LID]); - } else { - remoteAggGIDs.insert(aggGID); - } - } - size_t numAggregates = myAggGIDs .size(); - size_t numRemote = remoteAggGIDs.size(); - aggregates->SetNumAggregates(numAggregates); - - std::map AggG2L; // Map: Agg GID -> Agg LID (possibly on a different processor) - std::map AggG2R; // Map: Agg GID -> processor rank owning aggregate - - Array myAggGIDsArray(numAggregates), remoteAggGIDsArray(numRemote); - - // Fill in the maps for aggregates that we own - size_t ind = 0; - for (typename std::set::const_iterator it = myAggGIDs.begin(); it != myAggGIDs.end(); it++) { - AggG2L[*it] = ind; - AggG2R[*it] = myRank; - - myAggGIDsArray[ind++] = *it; - } - - // The map is a convenient way to fetch remote local indices from global indices. - RCP aggMap = MapFactory::Build(rowMap->lib(), Teuchos::OrdinalTraits::invalid(), - myAggGIDsArray, 0, comm); - - ind = 0; - for (typename std::set::const_iterator it = remoteAggGIDs.begin(); it != remoteAggGIDs.end(); it++) - remoteAggGIDsArray[ind++] = *it; - - // Fetch the required aggregate local ids and ranks - Array remoteProcIDs(numRemote); - Array remoteLIDs (numRemote); - aggMap->getRemoteIndexList(remoteAggGIDsArray, remoteProcIDs, remoteLIDs); - - // Fill in the maps for aggregates that we don't own but which have some of our vertices - for (size_t i = 0; i < numRemote; i++) { - AggG2L[remoteAggGIDsArray[i]] = remoteLIDs [i]; - AggG2R[remoteAggGIDsArray[i]] = remoteProcIDs[i]; - } - - // Remap aggregate GIDs to LIDs and set up owning processors - for (LO LID = 0; LID < numPoints; LID++) { - if (revMap_.find(getRoot(LID)) != revMap_.end() && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) { - GO aggGID = vertex2AggId[LID]; - if(aggGID != MUELU_UNAGGREGATED) { - vertex2AggId[LID] = AggG2L[aggGID]; - procWinner [LID] = AggG2R[aggGID]; - } - } - } - - - GO numGlobalRemote; - MueLu_sumAll(comm, as(numRemote), numGlobalRemote); - aggregates->AggregatesCrossProcessors(numGlobalRemote); - - Set(currentLevel, "Aggregates", aggregates); - - GetOStream(Statistics1) << aggregates->description() << std::endl; } + size_t numAggregates = myAggGIDs.size(); + size_t numRemote = remoteAggGIDs.size(); + aggregates->SetNumAggregates(numAggregates); - template - void BrickAggregationFactory:: - Setup(const RCP >& comm, const RCP::magnitudeType,LO,GO,NO> >& coords, const RCP& /* map */) const { - nDim_ = coords->getNumVectors(); - - x_ = coords->getData(0); - xMap_ = Construct1DMap(comm, x_); - nx_ = xMap_->size(); - - ny_ = 1; - if (nDim_ > 1) { - y_ = coords->getData(1); - yMap_ = Construct1DMap(comm, y_); - ny_ = yMap_->size(); - } - - nz_ = 1; - if (nDim_ > 2) { - z_ = coords->getData(2); - zMap_ = Construct1DMap(comm, z_); - nz_ = zMap_->size(); - } - - for (size_t ind = 0; ind < coords->getLocalLength(); ind++) { - GO i = (*xMap_)[(coords->getData(0))[ind]], j = 0, k = 0; - if (nDim_ > 1) - j = (*yMap_)[(coords->getData(1))[ind]]; - if (nDim_ > 2) - k = (*zMap_)[(coords->getData(2))[ind]]; - - revMap_[k*ny_*nx_ + j*nx_ + i] = ind; - } - - - // Get the number of aggregates in each direction, correcting for Dirichlet - int xboost = dirichletX_ ? 1 : 0; - int yboost = dirichletY_ ? 1 : 0; - int zboost = dirichletZ_ ? 1 : 0; - naggx_ = (nx_-2*xboost)/bx_ + ((nx_-2*xboost) % bx_ ? 1 : 0); + std::map + AggG2L; // Map: Agg GID -> Agg LID (possibly on a different processor) + std::map AggG2R; // Map: Agg GID -> processor rank owning aggregate - if(nDim_ > 1) - naggy_ = (ny_-2*yboost)/by_ + ( (ny_-2*yboost) % by_ ? 1 : 0); - else - naggy_ = 1; + Array myAggGIDsArray(numAggregates), remoteAggGIDsArray(numRemote); - if(nDim_ > 2) - naggz_ = (nz_-2*zboost)/bz_ + ( (nz_-2*zboost) % bz_ ? 1 : 0); - else - naggz_ = 1; + // Fill in the maps for aggregates that we own + size_t ind = 0; + for (typename std::set::const_iterator it = myAggGIDs.begin(); + it != myAggGIDs.end(); it++) { + AggG2L[*it] = ind; + AggG2R[*it] = myRank; + myAggGIDsArray[ind++] = *it; } - template - RCP::container> - BrickAggregationFactory:: - Construct1DMap (const RCP >& comm, - const ArrayRCP::magnitudeType>& x) const - { - int n = x.size(); - - // Step 1: Create a local vector with unique coordinate points - RCP gMap = rcp(new container); - for (int i = 0; i < n; i++) - (*gMap)[x[i]] = 0; + // The map is a convenient way to fetch remote local indices from global + // indices. + RCP aggMap = MapFactory::Build( + rowMap->lib(), Teuchos::OrdinalTraits::invalid(), + myAggGIDsArray, 0, comm); + + ind = 0; + for (typename std::set::const_iterator it = remoteAggGIDs.begin(); + it != remoteAggGIDs.end(); it++) + remoteAggGIDsArray[ind++] = *it; + + // Fetch the required aggregate local ids and ranks + Array remoteProcIDs(numRemote); + Array remoteLIDs(numRemote); + aggMap->getRemoteIndexList(remoteAggGIDsArray, remoteProcIDs, remoteLIDs); + + // Fill in the maps for aggregates that we don't own but which have some of + // our vertices + for (size_t i = 0; i < numRemote; i++) { + AggG2L[remoteAggGIDsArray[i]] = remoteLIDs[i]; + AggG2R[remoteAggGIDsArray[i]] = remoteProcIDs[i]; + } -#ifdef HAVE_MPI - // Step 2: exchange coordinates - // NOTE: we assume the coordinates are double, or double compatible - // That means that for complex case, we assume that all imaginary parts are zeros - int numProcs = comm->getSize(); - if (numProcs > 1) { - RCP > dupMpiComm = rcp_dynamic_cast >(comm->duplicate()); - - MPI_Comm rawComm = (*dupMpiComm->getRawMpiComm())(); - - int sendCnt = gMap->size(), cnt = 0, recvSize; - Array recvCnt(numProcs), Displs(numProcs); - Array sendBuf, recvBuf; - - sendBuf.resize(sendCnt); - for (typename container::const_iterator cit = gMap->begin(); cit != gMap->end(); cit++) - sendBuf[cnt++] = Teuchos::as(STS::real(cit->first)); - - MPI_Allgather(&sendCnt, 1, MPI_INT, recvCnt.getRawPtr(), 1, MPI_INT, rawComm); - Displs[0] = 0; - for (int i = 0; i < numProcs-1; i++) - Displs[i+1] = Displs[i] + recvCnt[i]; - recvSize = Displs[numProcs-1] + recvCnt[numProcs-1]; - recvBuf.resize(recvSize); - MPI_Allgatherv(sendBuf.getRawPtr(), sendCnt, MPI_DOUBLE, recvBuf.getRawPtr(), recvCnt.getRawPtr(), Displs.getRawPtr(), MPI_DOUBLE, rawComm); - - for (int i = 0; i < recvSize; i++) - (*gMap)[as(recvBuf[i])] = 0; + // Remap aggregate GIDs to LIDs and set up owning processors + for (LO LID = 0; LID < numPoints; LID++) { + if (revMap_.find(getRoot(LID)) != revMap_.end() && + rowMap->isNodeGlobalElement( + colMap->getGlobalElement(revMap_[getRoot(LID)]))) { + GO aggGID = vertex2AggId[LID]; + if (aggGID != MUELU_UNAGGREGATED) { + vertex2AggId[LID] = AggG2L[aggGID]; + procWinner[LID] = AggG2R[aggGID]; + } } -#endif - - GO cnt = 0; - for (typename container::iterator it = gMap->begin(); it != gMap->end(); it++) - it->second = cnt++; - - return gMap; } - template - bool BrickAggregationFactory::isRoot(LocalOrdinal LID) const { - int i,j,k; - getIJK(LID,i,j,k); - - return (k*ny_*nx_ + j*nx_ + i) == getRoot(LID); + GO numGlobalRemote; + MueLu_sumAll(comm, as(numRemote), numGlobalRemote); + aggregates->AggregatesCrossProcessors(numGlobalRemote); + + Set(currentLevel, "Aggregates", aggregates); + + GetOStream(Statistics1) << aggregates->description() << std::endl; +} + +template +void BrickAggregationFactory::Setup( + const RCP> &comm, + const RCP::magnitudeType, LO, GO, NO>> + &coords, + const RCP & /* map */) const { + nDim_ = coords->getNumVectors(); + + x_ = coords->getData(0); + xMap_ = Construct1DMap(comm, x_); + nx_ = xMap_->size(); + + ny_ = 1; + if (nDim_ > 1) { + y_ = coords->getData(1); + yMap_ = Construct1DMap(comm, y_); + ny_ = yMap_->size(); } - template - bool BrickAggregationFactory::isDirichlet(LocalOrdinal LID) const { - bool boundary = false; - int i,j,k; - getIJK(LID,i,j,k); - if( dirichletX_ && (i == 0 || i == nx_-1) ) - boundary = true; - if(nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_-1) ) - boundary = true; - if(nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_-1) ) - boundary = true; - - return boundary; + nz_ = 1; + if (nDim_ > 2) { + z_ = coords->getData(2); + zMap_ = Construct1DMap(comm, z_); + nz_ = zMap_->size(); } + for (size_t ind = 0; ind < coords->getLocalLength(); ind++) { + GO i = (*xMap_)[(coords->getData(0))[ind]], j = 0, k = 0; + if (nDim_ > 1) + j = (*yMap_)[(coords->getData(1))[ind]]; + if (nDim_ > 2) + k = (*zMap_)[(coords->getData(2))[ind]]; - template - GlobalOrdinal BrickAggregationFactory::getRoot(LocalOrdinal LID) const { - if(isDirichlet(LID)) - return Teuchos::OrdinalTraits::invalid(); - - int aggI,aggJ,aggK; - getAggIJK(LID,aggI,aggJ,aggK); - int xboost = dirichletX_ ? 1 : 0; - int yboost = dirichletY_ ? 1 : 0; - int zboost = dirichletZ_ ? 1 : 0; - - int i = xboost + aggI*bx_ + (bx_-1)/2; - int j = (nDim_>1) ? yboost + aggJ*by_ + (by_-1)/2 : 0; - int k = (nDim_>2) ? zboost + aggK*bz_ + (bz_-1)/2 : 0; - - return k*ny_*nx_ + j*nx_ + i; - } - - template - void BrickAggregationFactory::getIJK(LocalOrdinal LID, int &i, int &j, int &k) const { - i = (*xMap_)[x_[LID]]; - j = (nDim_>1) ? (*yMap_)[y_[LID]] : 0; - k = (nDim_>2) ? (*zMap_)[z_[LID]] : 0; + revMap_[k * ny_ * nx_ + j * nx_ + i] = ind; } + // Get the number of aggregates in each direction, correcting for Dirichlet + int xboost = dirichletX_ ? 1 : 0; + int yboost = dirichletY_ ? 1 : 0; + int zboost = dirichletZ_ ? 1 : 0; + naggx_ = (nx_ - 2 * xboost) / bx_ + ((nx_ - 2 * xboost) % bx_ ? 1 : 0); + + if (nDim_ > 1) + naggy_ = (ny_ - 2 * yboost) / by_ + ((ny_ - 2 * yboost) % by_ ? 1 : 0); + else + naggy_ = 1; + + if (nDim_ > 2) + naggz_ = (nz_ - 2 * zboost) / bz_ + ((nz_ - 2 * zboost) % bz_ ? 1 : 0); + else + naggz_ = 1; +} + +template +RCP::container> +BrickAggregationFactory:: + Construct1DMap(const RCP> &comm, + const ArrayRCP::magnitudeType> &x) const { + int n = x.size(); + + // Step 1: Create a local vector with unique coordinate points + RCP gMap = rcp(new container); + for (int i = 0; i < n; i++) + (*gMap)[x[i]] = 0; - template - void BrickAggregationFactory::getAggIJK(LocalOrdinal LID, int &i, int &j, int &k) const { - int xboost = dirichletX_ ? 1 : 0; - int yboost = dirichletY_ ? 1 : 0; - int zboost = dirichletZ_ ? 1 : 0; - int pointI, pointJ, pointK; - getIJK(LID,pointI,pointJ,pointK); - i = (pointI-xboost)/bx_; - - if (nDim_ > 1) j = (pointJ-yboost)/by_; - else j = 0; - - if (nDim_ > 2) k = (pointK-zboost)/bz_; - else k = 0; - } - - template - GlobalOrdinal BrickAggregationFactory::getAggGID(LocalOrdinal LID) const { - bool boundary = false; - - int i, j, k; - getIJK(LID,i,j,k); - int ii , jj, kk; - getAggIJK(LID,ii,jj,kk); - - if( dirichletX_ && (i == 0 || i == nx_ - 1)) boundary = true; - if (nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_ - 1)) boundary = true; - if (nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_ - 1)) boundary = true; - - /* - if(boundary) - printf("[%d] coord = (%d,%d,%d) {%d,%d,%d} agg = (%d,%d,%d) {%d,%d,%d} => agg %s\n",LID,i,j,k,nx_,ny_,nz_,ii,jj,kk,naggx_,naggy_,naggz_,"BOUNDARY"); - else - printf("[%d] coord = (%d,%d,%d) {%d,%d,%d} agg = (%d,%d,%d) {%d,%d,%d} => agg %d\n",LID,i,j,k,nx_,ny_,nz_,ii,jj,kk,naggx_,naggy_,naggz_,kk*naggy_*naggx_ + jj*naggx_ + ii); - */ - - if (boundary) - return Teuchos::OrdinalTraits::invalid(); - else - return Teuchos::as(kk*naggy_*naggx_) + Teuchos::as(jj*naggx_) + ii; - +#ifdef HAVE_MPI + // Step 2: exchange coordinates + // NOTE: we assume the coordinates are double, or double compatible + // That means that for complex case, we assume that all imaginary parts are + // zeros + int numProcs = comm->getSize(); + if (numProcs > 1) { + RCP> dupMpiComm = + rcp_dynamic_cast>(comm->duplicate()); + + MPI_Comm rawComm = (*dupMpiComm->getRawMpiComm())(); + + int sendCnt = gMap->size(), cnt = 0, recvSize; + Array recvCnt(numProcs), Displs(numProcs); + Array sendBuf, recvBuf; + + sendBuf.resize(sendCnt); + for (typename container::const_iterator cit = gMap->begin(); + cit != gMap->end(); cit++) + sendBuf[cnt++] = Teuchos::as(STS::real(cit->first)); + + MPI_Allgather(&sendCnt, 1, MPI_INT, recvCnt.getRawPtr(), 1, MPI_INT, + rawComm); + Displs[0] = 0; + for (int i = 0; i < numProcs - 1; i++) + Displs[i + 1] = Displs[i] + recvCnt[i]; + recvSize = Displs[numProcs - 1] + recvCnt[numProcs - 1]; + recvBuf.resize(recvSize); + MPI_Allgatherv(sendBuf.getRawPtr(), sendCnt, MPI_DOUBLE, + recvBuf.getRawPtr(), recvCnt.getRawPtr(), Displs.getRawPtr(), + MPI_DOUBLE, rawComm); + + for (int i = 0; i < recvSize; i++) + (*gMap)[as(recvBuf[i])] = 0; } +#endif - - template - void BrickAggregationFactory::BuildGraph(Level& currentLevel, const RCP& A) const { - // TODO: Currently only works w/ 1 DOF per node - double dirichletThreshold = 0.0; - - if(bx_ > 1 && (nDim_ <= 1 || by_ > 1) && (nDim_ <=2 || bz_>1) ) { - FactoryMonitor m(*this, "Generating Graph (trivial)", currentLevel); - /*** Case 1: Use the matrix is the graph ***/ - // Bricks are of non-trivial size in all active dimensions - RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - graph->SetBoundaryNodeMap(boundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } - Set(currentLevel, "DofsPerNode", 1); - Set(currentLevel, "Graph", graph); - Set(currentLevel, "Filtering",false); + GO cnt = 0; + for (typename container::iterator it = gMap->begin(); it != gMap->end(); it++) + it->second = cnt++; + + return gMap; +} + +template +bool BrickAggregationFactory::isRoot( + LocalOrdinal LID) const { + int i, j, k; + getIJK(LID, i, j, k); + + return (k * ny_ * nx_ + j * nx_ + i) == getRoot(LID); +} + +template +bool BrickAggregationFactory::isDirichlet(LocalOrdinal LID) const { + bool boundary = false; + int i, j, k; + getIJK(LID, i, j, k); + if (dirichletX_ && (i == 0 || i == nx_ - 1)) + boundary = true; + if (nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_ - 1)) + boundary = true; + if (nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_ - 1)) + boundary = true; + + return boundary; +} + +template +GlobalOrdinal +BrickAggregationFactory::getRoot( + LocalOrdinal LID) const { + if (isDirichlet(LID)) + return Teuchos::OrdinalTraits::invalid(); + + int aggI, aggJ, aggK; + getAggIJK(LID, aggI, aggJ, aggK); + int xboost = dirichletX_ ? 1 : 0; + int yboost = dirichletY_ ? 1 : 0; + int zboost = dirichletZ_ ? 1 : 0; + + int i = xboost + aggI * bx_ + (bx_ - 1) / 2; + int j = (nDim_ > 1) ? yboost + aggJ * by_ + (by_ - 1) / 2 : 0; + int k = (nDim_ > 2) ? zboost + aggK * bz_ + (bz_ - 1) / 2 : 0; + + return k * ny_ * nx_ + j * nx_ + i; +} + +template +void BrickAggregationFactory::getIJK( + LocalOrdinal LID, int &i, int &j, int &k) const { + i = (*xMap_)[x_[LID]]; + j = (nDim_ > 1) ? (*yMap_)[y_[LID]] : 0; + k = (nDim_ > 2) ? (*zMap_)[z_[LID]] : 0; +} + +template +void BrickAggregationFactory::getAggIJK(LocalOrdinal LID, int &i, int &j, + int &k) const { + int xboost = dirichletX_ ? 1 : 0; + int yboost = dirichletY_ ? 1 : 0; + int zboost = dirichletZ_ ? 1 : 0; + int pointI, pointJ, pointK; + getIJK(LID, pointI, pointJ, pointK); + i = (pointI - xboost) / bx_; + + if (nDim_ > 1) + j = (pointJ - yboost) / by_; + else + j = 0; + + if (nDim_ > 2) + k = (pointK - zboost) / bz_; + else + k = 0; +} + +template +GlobalOrdinal +BrickAggregationFactory::getAggGID( + LocalOrdinal LID) const { + bool boundary = false; + + int i, j, k; + getIJK(LID, i, j, k); + int ii, jj, kk; + getAggIJK(LID, ii, jj, kk); + + if (dirichletX_ && (i == 0 || i == nx_ - 1)) + boundary = true; + if (nDim_ > 1 && dirichletY_ && (j == 0 || j == ny_ - 1)) + boundary = true; + if (nDim_ > 2 && dirichletZ_ && (k == 0 || k == nz_ - 1)) + boundary = true; + + /* + if(boundary) + printf("[%d] coord = (%d,%d,%d) {%d,%d,%d} agg = (%d,%d,%d) {%d,%d,%d} => + agg %s\n",LID,i,j,k,nx_,ny_,nz_,ii,jj,kk,naggx_,naggy_,naggz_,"BOUNDARY"); + else + printf("[%d] coord = (%d,%d,%d) {%d,%d,%d} agg = (%d,%d,%d) {%d,%d,%d} => + agg %d\n",LID,i,j,k,nx_,ny_,nz_,ii,jj,kk,naggx_,naggy_,naggz_,kk*naggy_*naggx_ + + jj*naggx_ + ii); + */ + + if (boundary) + return Teuchos::OrdinalTraits::invalid(); + else + return Teuchos::as(kk * naggy_ * naggx_) + + Teuchos::as(jj * naggx_) + ii; +} + +template +void BrickAggregationFactory::BuildGraph(Level ¤tLevel, + const RCP &A) const { + // TODO: Currently only works w/ 1 DOF per node + double dirichletThreshold = 0.0; + + if (bx_ > 1 && (nDim_ <= 1 || by_ > 1) && (nDim_ <= 2 || bz_ > 1)) { + FactoryMonitor m(*this, "Generating Graph (trivial)", currentLevel); + /*** Case 1: Use the matrix is the graph ***/ + // Bricks are of non-trivial size in all active dimensions + RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast( + MueLu::Utilities::DetectDirichletRows( + *A, dirichletThreshold)); + graph->SetBoundaryNodeMap(boundaryNodes); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " Dirichlet nodes" << std::endl; } - else { - FactoryMonitor m(*this, "Generating Graph", currentLevel); - /*** Case 2: Dropping required ***/ - // There is at least one active dimension in which we are not coarsening. - // Those connections need to be dropped - bool drop_x = (bx_ == 1); - bool drop_y = (nDim_> 1 && by_ == 1); - bool drop_z = (nDim_> 2 && bz_ == 1); - - ArrayRCP rows (A->getLocalNumRows()+1); - ArrayRCP columns(A->getLocalNumEntries()); - - size_t N = A->getRowMap()->getLocalNumElements(); - - // FIXME: Do this on the host because indexing functions are host functions - auto G = A->getLocalMatrixHost().graph; - auto rowptr = G.row_map; - auto colind = G.entries; - - int ct=0; - rows[0] = 0; - for(size_t row=0; rowgetColMap()->getLocalElement(A->getRowMap()->getGlobalElement(row)); - getIJK(row2,ir,jr,kr); - - for(size_t cidx=rowptr[row]; cidx 1 && by_ == 1); + bool drop_z = (nDim_ > 2 && bz_ == 1); + + ArrayRCP rows(A->getLocalNumRows() + 1); + ArrayRCP columns(A->getLocalNumEntries()); + + size_t N = A->getRowMap()->getLocalNumElements(); + + // FIXME: Do this on the host because indexing functions are host functions + auto G = A->getLocalMatrixHost().graph; + auto rowptr = G.row_map; + auto colind = G.entries; + + int ct = 0; + rows[0] = 0; + for (size_t row = 0; row < N; row++) { + // NOTE: Assumes that the first part of the colmap is the rowmap + int ir, jr, kr; + LO row2 = A->getColMap()->getLocalElement( + A->getRowMap()->getGlobalElement(row)); + getIJK(row2, ir, jr, kr); + + for (size_t cidx = rowptr[row]; cidx < rowptr[row + 1]; cidx++) { + int ic, jc, kc; + LO col = colind[cidx]; + getIJK(col, ic, jc, kc); + + if ((row2 != col) && ((drop_x && ir != ic) || (drop_y && jr != jc) || + (drop_z && kr != kc))) { + // Drop it + // printf("[%4d] DROP row = (%d,%d,%d) col = + // (%d,%d,%d)\n",(int)row,ir,jr,kr,ic,jc,kc); + } else { + // Keep it + // printf("[%4d] KEEP row = (%d,%d,%d) col = + // (%d,%d,%d)\n",(int)row,ir,jr,kr,ic,jc,kc); + columns[ct] = col; + ct++; } - rows[row+1] = ct; - }//end for - - RCP graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A")); - - - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - graph->SetBoundaryNodeMap(boundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; } - Set(currentLevel, "DofsPerNode", 1); - Set(currentLevel, "Graph", graph); - Set(currentLevel, "Filtering",true); - }//end else - - - }//end BuildGraph - - + rows[row + 1] = ct; + } // end for + + RCP graph = + rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), + "thresholded graph of A")); + + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast( + MueLu::Utilities::DetectDirichletRows( + *A, dirichletThreshold)); + graph->SetBoundaryNodeMap(boundaryNodes); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " Dirichlet nodes" << std::endl; + } + Set(currentLevel, "DofsPerNode", 1); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "Filtering", true); + } // end else +} // end BuildGraph -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_BRICKAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp index 056673b7b9db..042f6a720b14 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_decl.hpp @@ -54,32 +54,32 @@ #include "MueLu_Aggregates_fwd.hpp" #include -#include -#include #include +#include +#include #include "MueLu_BaseClass.hpp" -#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Graph.hpp" #include "MueLu_GraphBase.hpp" #include "MueLu_IndexManager.hpp" #include "MueLu_IndexManager_kokkos.hpp" +#include "MueLu_LWGraph_kokkos.hpp" -#define MUELU_UNAGGREGATED -1 /* indicates that a node is unassigned to */ - /* any aggregate. */ - -#define MUELU_UNASSIGNED -1 /* indicates a vertex is not yet claimed */ - /* by a processor during aggregation. */ - /* Note, it is possible at */ - /* this stage that some processors may have*/ - /* claimed their copy of a vertex for one */ - /* of their aggregates. However, some */ - /* arbitration still needs to occur. */ - /* The corresponding procWinner[]'s remain */ - /* as MUELU_UNASSIGNED until */ - /* ArbitrateAndCommunicate() is */ - /* invoked to arbitrate. */ +#define MUELU_UNAGGREGATED -1 /* indicates that a node is unassigned to */ + /* any aggregate. */ + +#define MUELU_UNASSIGNED -1 /* indicates a vertex is not yet claimed */ + /* by a processor during aggregation. */ + /* Note, it is possible at */ + /* this stage that some processors may have*/ + /* claimed their copy of a vertex for one */ + /* of their aggregates. However, some */ + /* arbitration still needs to occur. */ + /* The corresponding procWinner[]'s remain */ + /* as MUELU_UNASSIGNED until */ + /* ArbitrateAndCommunicate() is */ + /* invoked to arbitrate. */ /***************************************************************************** @@ -102,275 +102,309 @@ namespace MueLu { where rows (or vertices) correspond to aggregates and colunmns (or edges) correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class Aggregates; - - template - class Aggregates > : public BaseClass { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - using device_type = DeviceType; - using range_type = Kokkos::RangePolicy; - using LO_view = Kokkos::View; - - using aggregates_sizes_type = Kokkos::View; - - private: - // For compatibility - typedef node_type Node; +template class Aggregates; + +template +class Aggregates> + : public BaseClass { +public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + using device_type = DeviceType; + using range_type = Kokkos::RangePolicy; + using LO_view = Kokkos::View; + + using aggregates_sizes_type = Kokkos::View; + +private: + // For compatibility + typedef node_type Node; #undef MUELU_AGGREGATES_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - // Defining types that require the short names included above - using local_graph_type = typename LWGraph_kokkos::local_graph_type; - using colors_view_type = Kokkos::View; - - /*! @brief Standard constructor for Aggregates structure - * - * Standard constructor of aggregates takes a Graph object as parameter. - * Uses the graph.GetImportMap() to initialize the internal vector for mapping nodes to (local) aggregate ids as well as - * the mapping of node to the owning processor id. - * - */ - Aggregates(const GraphBase & graph); - - /*! @brief Standard constructor for Aggregates structure - * - * Standard constructor of aggregates takes a LWGraph object as parameter. - * Uses the graph.GetImportMap() to initialize the internal vector for mapping nodes to (local) aggregate ids as well as - * the mapping of node to the owning processor id. - * - */ - Aggregates(LWGraph_kokkos graph); - - /*! @brief Constructor for Aggregates structure - * - * This constructor takes a RCP pointer to a map which is used for the internal mappings of nodes to the (local) aggregate ids and the owning processor. - * - */ - Aggregates(const RCP& map); - - /*! @brief Destructor - * - */ - virtual ~Aggregates() { } - - //! @name Set/Get Methods for specific aggregation data - //@{ - - /*! @brief Get the index manager used by structured aggregation algorithms. - This has to be done by the aggregation factory. - */ - RCP& GetIndexManagerKokkos() { return geoDataKokkos_; } - - /*! @brief Set the index manager used by structured aggregation algorithms. - This has to be done by the aggregation factory. - */ - void SetIndexManagerKokkos(RCP & geoDataKokkos) { geoDataKokkos_ = geoDataKokkos; } - - /*! @brief Get the index manager used by various aggregation algorithms. - This has to be done by the aggregation factory. - */ - RCP& GetIndexManager() { return geoData_; } - - /*! @brief Set the index manager used by various aggregation algorithms. - This has to be done by the aggregation factory. - */ - void SetIndexManager(RCP & geoData) { geoData_ = geoData; } - - /*! @brief Get a distance 2 coloring of the underlying graph. - The coloring is computed and set during Phase1 of aggregation. - */ - colors_view_type& GetGraphColors() { return graphColors_; } - - /*! @brief Set a distance 2 coloring of the underlying graph. - The coloring is computed and set during Phase1 of aggregation. - */ - void SetGraphColors(colors_view_type graphColors) { graphColors_ = graphColors; } - - /*! @brief Get the number of colors needed by the distance 2 coloring. - */ - LO GetGraphNumColors() { return graphNumColors_; } - - /*! @brief Set the number of colors needed by the distance 2 coloring. - */ - void SetGraphNumColors(const LO graphNumColors) { graphNumColors_ = graphNumColors; } - - //@} - - /*! @brief Set number of local aggregates on current processor. - - This has to be done by the aggregation routines. - */ - void SetNumAggregates(LO nAggregates) { numAggregates_ = nAggregates; } - - /*! @brief Set number of global aggregates on current processor. - - This has to be done by the aggregation routines. - */ - void SetNumGlobalAggregates(GO nGlobalAggregates) { numGlobalAggregates_ = nGlobalAggregates; } - - ///< returns the number of aggregates of the current processor. Note: could/should be renamed to GetNumLocalAggregates? - KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const { - return numAggregates_; - } - - //! @brief Record whether aggregates include DOFs from other processes. - KOKKOS_INLINE_FUNCTION void AggregatesCrossProcessors(const bool& flag) { - aggregatesIncludeGhosts_ = flag; - } - - /*! @brief Return false if and only if no aggregates include DOFs from other processes. - - Used in construction of tentative prolongator to skip a communication phase. - */ - KOKKOS_INLINE_FUNCTION bool AggregatesCrossProcessors() const { - return aggregatesIncludeGhosts_; - } - - /*! @brief Returns a nonconstant vector that maps local node IDs to local aggregates IDs. - - For local node ID i, the corresponding vector entry v[i] is the local aggregate id to which i belongs on the current processor. - */ - RCP& GetVertex2AggIdNonConst() { return vertex2AggId_; } - - /*! @brief Returns nonconstant vector that maps local node IDs to owning processor IDs. - - For local node ID i, the corresponding vector entry v[i] is the owning processor ID. - */ - RCP& GetProcWinnerNonConst() { return procWinner_; } - /*! @brief Returns constant vector that maps local node IDs to local aggregates IDs. - - For local node ID i, the corresponding vector entry v[i] is the local aggregate id to which i belongs on the current processor. - */ - const RCP& GetVertex2AggId() const { return vertex2AggId_; } - - /*! @brief Returns constant vector that maps local node IDs to owning processor IDs. - - For local node ID i, the corresponding vector entry v[i] is the owning processor ID. - */ - const RCP& GetProcWinner() const { return procWinner_; } - - //! Returns true if node with given local node id is marked to be a root node - inline bool IsRoot(LO i) const { return isRoot_[i]; } - - /*! @brief Set root node information. - - Used by aggregation methods only. - */ - inline void SetIsRoot(LO i, bool value = true) { isRoot_[i] = value; } - - const RCP GetMap() const; ///< returns (overlapping) map of aggregate/node distribution - - /*! @brief Compute sizes of aggregates - - Returns the number of nodes in each aggregate in an array. - If the aggregate sizes are not stored internally (which is the default), they are computed and returned. - If the aggregate sizes have been stored internally, then they are *not* recomputed, but instead the - stored sizes are returned. - - @param[in] forceRecompute if true, force recomputation of the aggregate sizes. - */ - typename aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute = false) const; - - /*! @brief Compute sizes of aggregates - - Returns the number of nodes in each aggregate in an array. - If the aggregate sizes are not stored internally (which is the default), they are computed and returned. - If the aggregate sizes have been stored internally, then they are *not* recomputed, but instead the - stored sizes are returned. - - @param[in] forceRecompute if true, force recomputation of the aggregate sizes. - */ - Teuchos::ArrayRCP ComputeAggregateSizesArrayRCP(bool forceRecompute = false) const; - - local_graph_type GetGraph() const; - - /*! @brief Generates a compressed list of nodes in each aggregate, where - the entries in aggNodes[aggPtr[i]] up to aggNodes[aggPtr[i+1]-1] contain the nodes in aggregate i. - unaggregated contains the list of nodes which are, for whatever reason, not aggregated (e.g. Dirichlet) - */ - void ComputeNodesInAggregate(LO_view & aggPtr, LO_view & aggNodes, LO_view & unaggregated) const; - - //! Get global number of aggregates - // If # of global aggregates is unknown, this method does coummunication and internally record the value - GO GetNumGlobalAggregatesComputeIfNeeded(); - - //! @name Overridden from Teuchos::Describable - //@{ - - //! Return a simple one-line description of this object. - std::string description() const; - - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - void print(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = verbLevel_default) const; - - private: - LO numAggregates_; ///< Number of aggregates on this processor - GO numGlobalAggregates_; ///< Number of global aggregates - - /*! vertex2AggId[k] gives a local id corresponding to the aggregate to which - * local id k has been assigned. While k is the local id on my processor (MyPID), - * vertex2AggId[k] is the local id on the processor which actually owns the aggregate. - */ - RCP vertex2AggId_; - - /*! - * If k is the local id on my processor (MyPID), the owning processor has the - * id given by procWinner[k] - */ - RCP procWinner_; - - /*! geoData stores an index manager object that is used to perform structured aggreation - * on a problem. - */ - RCP geoDataKokkos_; - - /*! geoData stores an index manager object that is used to perform structured aggreation - * on a problem. - */ - RCP geoData_; - - /*! graphColors_ stores a view that assigns a color to each node in the graph - * These colors are used to parallelize the aggregation process in UncoupledAggregation - */ - colors_view_type graphColors_; - - /*! graphNumColors_ stores the number of colors that are needed to perform a distance 2 - * coloring of the underlying graph. - */ - LO graphNumColors_; - - //! An ArrayRCP of booleans specifying if a local entry is an aggregate root. - Teuchos::ArrayRCP isRoot_; - - //! Set to false iff aggregates do not include any DOFs belong to other processes. - bool aggregatesIncludeGhosts_; - - //! Array of sizes of each local aggregate. - mutable - aggregates_sizes_type aggregateSizes_; - - /*! aggragateSizesHost_ is a host copy of aggregate sizes, which - * helps slightly reduce the cost of calling ComputeAggregateSizes - * from different parts of MueLu that require such data on the host device. - */ - mutable - typename aggregates_sizes_type::HostMirror aggregateSizesHost_; - - //! Aggregates represented as Kokkos graph type - mutable - local_graph_type graph_; - }; - -} //namespace MueLu +public: + // Defining types that require the short names included above + using local_graph_type = typename LWGraph_kokkos::local_graph_type; + using colors_view_type = + Kokkos::View; + + /*! @brief Standard constructor for Aggregates structure + * + * Standard constructor of aggregates takes a Graph object as parameter. + * Uses the graph.GetImportMap() to initialize the internal vector for mapping + * nodes to (local) aggregate ids as well as the mapping of node to the owning + * processor id. + * + */ + Aggregates(const GraphBase &graph); + + /*! @brief Standard constructor for Aggregates structure + * + * Standard constructor of aggregates takes a LWGraph object as parameter. + * Uses the graph.GetImportMap() to initialize the internal vector for mapping + * nodes to (local) aggregate ids as well as the mapping of node to the owning + * processor id. + * + */ + Aggregates(LWGraph_kokkos graph); + + /*! @brief Constructor for Aggregates structure + * + * This constructor takes a RCP pointer to a map which is used for the + * internal mappings of nodes to the (local) aggregate ids and the owning + * processor. + * + */ + Aggregates(const RCP &map); + + /*! @brief Destructor + * + */ + virtual ~Aggregates() {} + + //! @name Set/Get Methods for specific aggregation data + //@{ + + /*! @brief Get the index manager used by structured aggregation algorithms. + This has to be done by the aggregation factory. + */ + RCP &GetIndexManagerKokkos() { return geoDataKokkos_; } + + /*! @brief Set the index manager used by structured aggregation algorithms. + This has to be done by the aggregation factory. + */ + void SetIndexManagerKokkos(RCP &geoDataKokkos) { + geoDataKokkos_ = geoDataKokkos; + } + + /*! @brief Get the index manager used by various aggregation algorithms. + This has to be done by the aggregation factory. + */ + RCP &GetIndexManager() { return geoData_; } + + /*! @brief Set the index manager used by various aggregation algorithms. + This has to be done by the aggregation factory. + */ + void SetIndexManager(RCP &geoData) { geoData_ = geoData; } + + /*! @brief Get a distance 2 coloring of the underlying graph. + The coloring is computed and set during Phase1 of aggregation. + */ + colors_view_type &GetGraphColors() { return graphColors_; } + + /*! @brief Set a distance 2 coloring of the underlying graph. + The coloring is computed and set during Phase1 of aggregation. + */ + void SetGraphColors(colors_view_type graphColors) { + graphColors_ = graphColors; + } + + /*! @brief Get the number of colors needed by the distance 2 coloring. + */ + LO GetGraphNumColors() { return graphNumColors_; } + + /*! @brief Set the number of colors needed by the distance 2 coloring. + */ + void SetGraphNumColors(const LO graphNumColors) { + graphNumColors_ = graphNumColors; + } + + //@} + + /*! @brief Set number of local aggregates on current processor. + + This has to be done by the aggregation routines. + */ + void SetNumAggregates(LO nAggregates) { numAggregates_ = nAggregates; } + + /*! @brief Set number of global aggregates on current processor. + + This has to be done by the aggregation routines. + */ + void SetNumGlobalAggregates(GO nGlobalAggregates) { + numGlobalAggregates_ = nGlobalAggregates; + } + + ///< returns the number of aggregates of the current processor. Note: + ///< could/should be renamed to GetNumLocalAggregates? + KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const { return numAggregates_; } + + //! @brief Record whether aggregates include DOFs from other processes. + KOKKOS_INLINE_FUNCTION void AggregatesCrossProcessors(const bool &flag) { + aggregatesIncludeGhosts_ = flag; + } + + /*! @brief Return false if and only if no aggregates include DOFs from other + processes. + + Used in construction of tentative prolongator to skip a communication + phase. + */ + KOKKOS_INLINE_FUNCTION bool AggregatesCrossProcessors() const { + return aggregatesIncludeGhosts_; + } + + /*! @brief Returns a nonconstant vector that maps local node IDs to local + aggregates IDs. + + For local node ID i, the corresponding vector entry v[i] is the local + aggregate id to which i belongs on the current processor. + */ + RCP &GetVertex2AggIdNonConst() { return vertex2AggId_; } + + /*! @brief Returns nonconstant vector that maps local node IDs to owning + processor IDs. + + For local node ID i, the corresponding vector entry v[i] is the owning + processor ID. + */ + RCP &GetProcWinnerNonConst() { return procWinner_; } + /*! @brief Returns constant vector that maps local node IDs to local + aggregates IDs. + + For local node ID i, the corresponding vector entry v[i] is the local + aggregate id to which i belongs on the current processor. + */ + const RCP &GetVertex2AggId() const { return vertex2AggId_; } + + /*! @brief Returns constant vector that maps local node IDs to owning + processor IDs. + + For local node ID i, the corresponding vector entry v[i] is the owning + processor ID. + */ + const RCP &GetProcWinner() const { return procWinner_; } + + //! Returns true if node with given local node id is marked to be a root node + inline bool IsRoot(LO i) const { return isRoot_[i]; } + + /*! @brief Set root node information. + + Used by aggregation methods only. + */ + inline void SetIsRoot(LO i, bool value = true) { isRoot_[i] = value; } + + const RCP + GetMap() const; ///< returns (overlapping) map of aggregate/node distribution + + /*! @brief Compute sizes of aggregates + + Returns the number of nodes in each aggregate in an array. + If the aggregate sizes are not stored internally (which is the default), + they are computed and returned. If the aggregate sizes have been stored + internally, then they are *not* recomputed, but instead the stored sizes are + returned. + + @param[in] forceRecompute if true, force recomputation of the aggregate + sizes. + */ + typename aggregates_sizes_type::const_type + ComputeAggregateSizes(bool forceRecompute = false) const; + + /*! @brief Compute sizes of aggregates + + Returns the number of nodes in each aggregate in an array. + If the aggregate sizes are not stored internally (which is the default), + they are computed and returned. If the aggregate sizes have been stored + internally, then they are *not* recomputed, but instead the stored sizes are + returned. + + @param[in] forceRecompute if true, force recomputation of the aggregate + sizes. + */ + Teuchos::ArrayRCP + ComputeAggregateSizesArrayRCP(bool forceRecompute = false) const; + + local_graph_type GetGraph() const; + + /*! @brief Generates a compressed list of nodes in each aggregate, where + the entries in aggNodes[aggPtr[i]] up to aggNodes[aggPtr[i+1]-1] contain the + nodes in aggregate i. unaggregated contains the list of nodes which are, for + whatever reason, not aggregated (e.g. Dirichlet) + */ + void ComputeNodesInAggregate(LO_view &aggPtr, LO_view &aggNodes, + LO_view &unaggregated) const; + + //! Get global number of aggregates + // If # of global aggregates is unknown, this method does coummunication and + // internally record the value + GO GetNumGlobalAggregatesComputeIfNeeded(); + + //! @name Overridden from Teuchos::Describable + //@{ + + //! Return a simple one-line description of this object. + std::string description() const; + + //! Print the object with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + void + print(Teuchos::FancyOStream &out, + const Teuchos::EVerbosityLevel verbLevel = verbLevel_default) const; + +private: + LO numAggregates_; ///< Number of aggregates on this processor + GO numGlobalAggregates_; ///< Number of global aggregates + + /*! vertex2AggId[k] gives a local id corresponding to the aggregate to which + * local id k has been assigned. While k is the local id on my processor + * (MyPID), vertex2AggId[k] is the local id on the processor which actually + * owns the aggregate. + */ + RCP vertex2AggId_; + + /*! + * If k is the local id on my processor (MyPID), the owning processor has the + * id given by procWinner[k] + */ + RCP procWinner_; + + /*! geoData stores an index manager object that is used to perform structured + * aggreation on a problem. + */ + RCP geoDataKokkos_; + + /*! geoData stores an index manager object that is used to perform structured + * aggreation on a problem. + */ + RCP geoData_; + + /*! graphColors_ stores a view that assigns a color to each node in the graph + * These colors are used to parallelize the aggregation process in + * UncoupledAggregation + */ + colors_view_type graphColors_; + + /*! graphNumColors_ stores the number of colors that are needed to perform a + * distance 2 coloring of the underlying graph. + */ + LO graphNumColors_; + + //! An ArrayRCP of booleans specifying if a local entry is an aggregate root. + Teuchos::ArrayRCP isRoot_; + + //! Set to false iff aggregates do not include any DOFs belong to other + //! processes. + bool aggregatesIncludeGhosts_; + + //! Array of sizes of each local aggregate. + mutable aggregates_sizes_type aggregateSizes_; + + /*! aggragateSizesHost_ is a host copy of aggregate sizes, which + * helps slightly reduce the cost of calling ComputeAggregateSizes + * from different parts of MueLu that require such data on the host device. + */ + mutable typename aggregates_sizes_type::HostMirror aggregateSizesHost_; + + //! Aggregates represented as Kokkos graph type + mutable local_graph_type graph_; +}; + +} // namespace MueLu #define MUELU_AGGREGATES_SHORT #endif // MUELU_AGGREGATES_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp index c9940f5524ac..085d8f96610b 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Aggregates_def.hpp @@ -47,266 +47,329 @@ #define MUELU_AGGREGATES_DEF_HPP #include -#include #include +#include #include -#include "MueLu_LWGraph_kokkos.hpp" +#include "MueLu_Aggregates_decl.hpp" #include "MueLu_Graph.hpp" #include "MueLu_GraphBase.hpp" +#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Utilities_decl.hpp" -#include "MueLu_Aggregates_decl.hpp" namespace MueLu { - template - Aggregates >::Aggregates(const GraphBase & graph) { - numAggregates_ = 0; - numGlobalAggregates_ = 0; +template +Aggregates>:: + Aggregates(const GraphBase &graph) { + numAggregates_ = 0; + numGlobalAggregates_ = 0; - vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); - vertex2AggId_->putScalar(MUELU_UNAGGREGATED); + vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); + vertex2AggId_->putScalar(MUELU_UNAGGREGATED); - procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); - procWinner_->putScalar(MUELU_UNASSIGNED); + procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); + procWinner_->putScalar(MUELU_UNASSIGNED); - isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), false); + isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), + false); - // slow but safe, force TentativePFactory to build column map for P itself - aggregatesIncludeGhosts_ = true; - } + // slow but safe, force TentativePFactory to build column map for P itself + aggregatesIncludeGhosts_ = true; +} - template - Aggregates >:: - Aggregates(LWGraph_kokkos graph) { - numAggregates_ = 0; - numGlobalAggregates_ = 0; +template +Aggregates>:: + Aggregates(LWGraph_kokkos graph) { + numAggregates_ = 0; + numGlobalAggregates_ = 0; - vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); - vertex2AggId_->putScalar(MUELU_UNAGGREGATED); + vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1); + vertex2AggId_->putScalar(MUELU_UNAGGREGATED); - procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); - procWinner_->putScalar(MUELU_UNASSIGNED); + procWinner_ = LOVectorFactory::Build(graph.GetImportMap()); + procWinner_->putScalar(MUELU_UNASSIGNED); - isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), false); + isRoot_ = Teuchos::ArrayRCP(graph.GetImportMap()->getLocalNumElements(), + false); - // slow but safe, force TentativePFactory to build column map for P itself - aggregatesIncludeGhosts_ = true; - } + // slow but safe, force TentativePFactory to build column map for P itself + aggregatesIncludeGhosts_ = true; +} - template - Aggregates >:: - Aggregates(const RCP& map) { - numAggregates_ = 0; - numGlobalAggregates_ = 0; +template +Aggregates>:: + Aggregates(const RCP &map) { + numAggregates_ = 0; + numGlobalAggregates_ = 0; - vertex2AggId_ = LOMultiVectorFactory::Build(map, 1); - vertex2AggId_->putScalar(MUELU_UNAGGREGATED); + vertex2AggId_ = LOMultiVectorFactory::Build(map, 1); + vertex2AggId_->putScalar(MUELU_UNAGGREGATED); - procWinner_ = LOVectorFactory::Build(map); - procWinner_->putScalar(MUELU_UNASSIGNED); + procWinner_ = LOVectorFactory::Build(map); + procWinner_->putScalar(MUELU_UNASSIGNED); - isRoot_ = Teuchos::ArrayRCP(map->getLocalNumElements(), false); + isRoot_ = Teuchos::ArrayRCP(map->getLocalNumElements(), false); - // slow but safe, force TentativePFactory to build column map for P itself - aggregatesIncludeGhosts_ = true; - } + // slow but safe, force TentativePFactory to build column map for P itself + aggregatesIncludeGhosts_ = true; +} - template - typename Aggregates >::aggregates_sizes_type::const_type - Aggregates >::ComputeAggregateSizes(bool forceRecompute) const { - if (aggregateSizes_.size() && !forceRecompute) { - return aggregateSizes_; +template +typename Aggregates>:: + aggregates_sizes_type::const_type + Aggregates>:: + ComputeAggregateSizes(bool forceRecompute) const { + if (aggregateSizes_.size() && !forceRecompute) { + return aggregateSizes_; - } else { - // It is necessary to initialize this to 0 - aggregates_sizes_type aggregateSizes("aggregates", numAggregates_); + } else { + // It is necessary to initialize this to 0 + aggregates_sizes_type aggregateSizes("aggregates", numAggregates_); - int myPID = GetMap()->getComm()->getRank(); + int myPID = GetMap()->getComm()->getRank(); - auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto vertex2AggId = + vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly); - typename AppendTrait::type aggregateSizesAtomic = aggregateSizes; - Kokkos::parallel_for("MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0,procWinner.size()), - KOKKOS_LAMBDA(const LO i) { + typename AppendTrait::type + aggregateSizesAtomic = aggregateSizes; + Kokkos::parallel_for( + "MueLu:Aggregates:ComputeAggregateSizes:for", + range_type(0, procWinner.size()), KOKKOS_LAMBDA(const LO i) { if (procWinner(i, 0) == myPID) aggregateSizesAtomic(vertex2AggId(i, 0))++; }); - aggregateSizes_ = aggregateSizes; - - return aggregateSizes; - } + aggregateSizes_ = aggregateSizes; + return aggregateSizes; } - - template - typename Teuchos::ArrayRCP - Aggregates >:: - ComputeAggregateSizesArrayRCP(bool forceRecompute) const { - auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute); - - // if this is the first time this is called, setup the host mirror and fill it - if(!aggregateSizesHost_.is_allocated()) { - aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes); +} + +template +typename Teuchos::ArrayRCP +Aggregates>:: + ComputeAggregateSizesArrayRCP(bool forceRecompute) const { + auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute); + + // if this is the first time this is called, setup the host mirror and fill it + if (!aggregateSizesHost_.is_allocated()) { + aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes); + Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes); + } else { + // otherwise, only update if we forced a recompute + if (forceRecompute) Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes); - } else { - // otherwise, only update if we forced a recompute - if(forceRecompute) - Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes); - } + } - // put the data in an ArrayRCP, but do not give it ownership of the data - Teuchos::ArrayRCP aggregateSizesArrayRCP(aggregateSizesHost_.data(),0,aggregateSizesHost_.extent(0),false); + // put the data in an ArrayRCP, but do not give it ownership of the data + Teuchos::ArrayRCP aggregateSizesArrayRCP( + aggregateSizesHost_.data(), 0, aggregateSizesHost_.extent(0), false); - return aggregateSizesArrayRCP; - } + return aggregateSizesArrayRCP; +} - template - typename Aggregates >::local_graph_type - Aggregates >::GetGraph() const { - using row_map_type = typename local_graph_type::row_map_type; - using entries_type = typename local_graph_type::entries_type; - using size_type = typename local_graph_type::size_type; +template +typename Aggregates< + LocalOrdinal, GlobalOrdinal, + Tpetra::KokkosCompat::KokkosDeviceWrapperNode>::local_graph_type +Aggregates>:: + GetGraph() const { + using row_map_type = typename local_graph_type::row_map_type; + using entries_type = typename local_graph_type::entries_type; + using size_type = typename local_graph_type::size_type; - auto numAggregates = numAggregates_; + auto numAggregates = numAggregates_; - if (static_cast(graph_.numRows()) == numAggregates) - return graph_; + if (static_cast(graph_.numRows()) == numAggregates) + return graph_; - auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto sizes = ComputeAggregateSizes(); + auto vertex2AggId = + vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto sizes = ComputeAggregateSizes(); - // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0. - typename row_map_type::non_const_type rows("Agg_rows", numAggregates+1); // rows(0) = 0 automatically + // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0. + typename row_map_type::non_const_type rows( + "Agg_rows", numAggregates + 1); // rows(0) = 0 automatically - // parallel_scan (exclusive) - Kokkos::parallel_scan("MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates), - KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) { + // parallel_scan (exclusive) + Kokkos::parallel_scan( + "MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates), + KOKKOS_LAMBDA(const LO i, LO &update, const bool &final_pass) { update += sizes(i); if (final_pass) - rows(i+1) = update; + rows(i + 1) = update; }); - decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates+1); // +1 is just for ease - Kokkos::deep_copy(offsets, rows); + decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), + numAggregates + 1); // +1 is just for ease + Kokkos::deep_copy(offsets, rows); - int myPID = GetMap()->getComm()->getRank(); + int myPID = GetMap()->getComm()->getRank(); - size_type numNNZ; - { - Kokkos::View numNNZ_device = Kokkos::subview(rows, numAggregates); - typename Kokkos::View::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device); - Kokkos::deep_copy(numNNZ_host, numNNZ_device); - numNNZ = numNNZ_host(); - } - typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ); - size_t realnnz = 0; - Kokkos::parallel_reduce("MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()), - KOKKOS_LAMBDA(const LO i, size_t& nnz) { + size_type numNNZ; + { + Kokkos::View numNNZ_device = + Kokkos::subview(rows, numAggregates); + typename Kokkos::View::HostMirror numNNZ_host = + Kokkos::create_mirror_view(numNNZ_device); + Kokkos::deep_copy(numNNZ_host, numNNZ_device); + numNNZ = numNNZ_host(); + } + typename entries_type::non_const_type cols( + Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ); + size_t realnnz = 0; + Kokkos::parallel_reduce( + "MueLu:Aggregates:GetGraph:compute_cols", + range_type(0, procWinner.size()), + KOKKOS_LAMBDA(const LO i, size_t &nnz) { if (procWinner(i, 0) == myPID) { - typedef typename std::remove_reference< decltype( offsets(0) ) >::type atomic_incr_type; - auto idx = Kokkos::atomic_fetch_add( &offsets(vertex2AggId(i,0)), atomic_incr_type(1)); + typedef typename std::remove_reference::type + atomic_incr_type; + auto idx = Kokkos::atomic_fetch_add(&offsets(vertex2AggId(i, 0)), + atomic_incr_type(1)); cols(idx) = i; nnz++; } - }, realnnz); - TEUCHOS_TEST_FOR_EXCEPTION(realnnz != numNNZ, Exceptions::RuntimeError, - "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz"); - - graph_ = local_graph_type(cols, rows); - - return graph_; - } - - template - void - Aggregates >::ComputeNodesInAggregate(LO_view & aggPtr, LO_view & aggNodes, LO_view & unaggregated) const { - LO numAggs = GetNumAggregates(); - LO numNodes = vertex2AggId_->getLocalLength(); - auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); - typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true); - LO INVALID = Teuchos::OrdinalTraits::invalid(); - - aggPtr = LO_view("aggPtr",numAggs+1); - aggNodes = LO_view("aggNodes",numNodes); - LO_view aggCurr("agg curr",numAggs+1); - - // Construct the "rowptr" and the counter - Kokkos::parallel_scan("MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0,numAggs+1), - KOKKOS_LAMBDA(const LO aggIdx, LO& aggOffset, bool final_pass) { + }, + realnnz); + TEUCHOS_TEST_FOR_EXCEPTION(realnnz != numNNZ, Exceptions::RuntimeError, + "MueLu: Internal error: Something is wrong with " + "aggregates graph construction: numNNZ = " + << numNNZ << " != " << realnnz + << " = realnnz"); + + graph_ = local_graph_type(cols, rows); + + return graph_; +} + +template +void Aggregates>:: + ComputeNodesInAggregate(LO_view &aggPtr, LO_view &aggNodes, + LO_view &unaggregated) const { + LO numAggs = GetNumAggregates(); + LO numNodes = vertex2AggId_->getLocalLength(); + auto vertex2AggId = + vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly); + typename aggregates_sizes_type::const_type aggSizes = + ComputeAggregateSizes(true); + LO INVALID = Teuchos::OrdinalTraits::invalid(); + + aggPtr = LO_view("aggPtr", numAggs + 1); + aggNodes = LO_view("aggNodes", numNodes); + LO_view aggCurr("agg curr", numAggs + 1); + + // Construct the "rowptr" and the counter + Kokkos::parallel_scan( + "MueLu:Aggregates:ComputeNodesInAggregate:scan", + range_type(0, numAggs + 1), + KOKKOS_LAMBDA(const LO aggIdx, LO &aggOffset, bool final_pass) { LO count = 0; - if(aggIdx < numAggs) + if (aggIdx < numAggs) count = aggSizes(aggIdx); - if(final_pass) { + if (final_pass) { aggPtr(aggIdx) = aggOffset; aggCurr(aggIdx) = aggOffset; - if(aggIdx==numAggs) + if (aggIdx == numAggs) aggCurr(numAggs) = 0; // use this for counting unaggregated nodes } aggOffset += count; }); - // Preallocate unaggregated to the correct size - LO numUnaggregated = 0; - Kokkos::parallel_reduce("MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0,numNodes), - KOKKOS_LAMBDA(const LO nodeIdx, LO & count) { - if(vertex2AggId(nodeIdx,0)==INVALID) + // Preallocate unaggregated to the correct size + LO numUnaggregated = 0; + Kokkos::parallel_reduce( + "MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", + range_type(0, numNodes), + KOKKOS_LAMBDA(const LO nodeIdx, LO &count) { + if (vertex2AggId(nodeIdx, 0) == INVALID) count++; - }, numUnaggregated); - unaggregated = LO_view("unaggregated",numUnaggregated); + }, + numUnaggregated); + unaggregated = LO_view("unaggregated", numUnaggregated); - // Stick the nodes in each aggregate's spot - Kokkos::parallel_for("MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0,numNodes), + // Stick the nodes in each aggregate's spot + Kokkos::parallel_for( + "MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0, numNodes), KOKKOS_LAMBDA(const LO nodeIdx) { - LO aggIdx = vertex2AggId(nodeIdx,0); - if(aggIdx != INVALID) { + LO aggIdx = vertex2AggId(nodeIdx, 0); + if (aggIdx != INVALID) { // atomic postincrement aggCurr(aggIdx) each time - aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx),1)) = nodeIdx; + aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx), 1)) = nodeIdx; } else { // same, but using last entry of aggCurr for unaggregated nodes - unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs),1)) = nodeIdx; + unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs), 1)) = + nodeIdx; } }); - +} + +template +std::string Aggregates< + LocalOrdinal, GlobalOrdinal, + Tpetra::KokkosCompat::KokkosDeviceWrapperNode>::description() + const { + if (numGlobalAggregates_ == -1) + return BaseClass::description() + "{nGlobalAggregates = not computed}"; + else + return BaseClass::description() + + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}"; +} + +template +void Aggregates>:: + print(Teuchos::FancyOStream &out, + const Teuchos::EVerbosityLevel verbLevel) const { + MUELU_DESCRIBE; + + if (verbLevel & Statistics1) { + if (numGlobalAggregates_ == -1) + out0 << "Global number of aggregates: not computed " << std::endl; + else + out0 << "Global number of aggregates: " << numGlobalAggregates_ + << std::endl; } - - template - std::string Aggregates >::description() const { - if (numGlobalAggregates_ == -1) return BaseClass::description() + "{nGlobalAggregates = not computed}"; - else return BaseClass::description() + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}"; +} + +template +GlobalOrdinal +Aggregates>:: + GetNumGlobalAggregatesComputeIfNeeded() { + + if (numGlobalAggregates_ != -1) { + LO nAggregates = GetNumAggregates(); + GO nGlobalAggregates; + MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, + nGlobalAggregates); + SetNumGlobalAggregates(nGlobalAggregates); } - - template - void Aggregates >::print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { - MUELU_DESCRIBE; - - if (verbLevel & Statistics1) { - if (numGlobalAggregates_ == -1) out0 << "Global number of aggregates: not computed " << std::endl; - else out0 << "Global number of aggregates: " << numGlobalAggregates_ << std::endl; - } - } - - template - GlobalOrdinal Aggregates >::GetNumGlobalAggregatesComputeIfNeeded() { - - if (numGlobalAggregates_ != -1) { - LO nAggregates = GetNumAggregates(); - GO nGlobalAggregates; - MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates); - SetNumGlobalAggregates(nGlobalAggregates); - } - return numGlobalAggregates_; - } - - template - const RCP> > - Aggregates>::GetMap() const { - return vertex2AggId_->getMap(); - } - -} //namespace MueLu + return numGlobalAggregates_; +} + +template +const RCP>> +Aggregates>::GetMap() + const { + return vertex2AggId_->getMap(); +} + +} // namespace MueLu #endif // MUELU_AGGREGATES_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp b/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp index 462f8aef3153..44dd210fc876 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_GraphBase.hpp @@ -46,7 +46,7 @@ #ifndef MUELU_GRAPHBASE_HPP #define MUELU_GRAPHBASE_HPP -#include // global_size_t +#include // global_size_t #include #include "MueLu_ConfigDefs.hpp" @@ -61,67 +61,68 @@ namespace MueLu { Pure virtual base class for MueLu representations of graphs. */ - template - class GraphBase - : public BaseClass { +template +class GraphBase : public BaseClass { #undef MUELU_GRAPHBASE_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - // For Zoltan2 compatibility - using lno_t = LocalOrdinal; - using gno_t = GlobalOrdinal; - using node_t = Node; +public: + // For Zoltan2 compatibility + using lno_t = LocalOrdinal; + using gno_t = GlobalOrdinal; + using node_t = Node; - //! @name Constructors/Destructors. - //@{ - virtual ~GraphBase() {}; - //@} + //! @name Constructors/Destructors. + //@{ + virtual ~GraphBase(){}; + //@} - virtual const RCP > GetComm() const = 0; - virtual const RCP GetDomainMap() const = 0; - virtual const RCP GetImportMap() const = 0; + virtual const RCP> GetComm() const = 0; + virtual const RCP GetDomainMap() const = 0; + virtual const RCP GetImportMap() const = 0; - //! @name Query graph attributes. - //@{ + //! @name Query graph attributes. + //@{ - //! Return number of vertices owned by the calling node. - virtual size_t GetNodeNumVertices() const = 0; + //! Return number of vertices owned by the calling node. + virtual size_t GetNodeNumVertices() const = 0; - //! Return number of edges owned by the calling node. - virtual size_t GetNodeNumEdges() const = 0; + //! Return number of edges owned by the calling node. + virtual size_t GetNodeNumEdges() const = 0; - virtual void SetBoundaryNodeMap(const ArrayRCP & boundaryArray) = 0; + virtual void + SetBoundaryNodeMap(const ArrayRCP &boundaryArray) = 0; - virtual size_t getLocalMaxNumRowEntries() const = 0; + virtual size_t getLocalMaxNumRowEntries() const = 0; - virtual const ArrayRCP GetBoundaryNodeMap() const = 0; + virtual const ArrayRCP GetBoundaryNodeMap() const = 0; - //FIXME is this necessary? - //! Return number of global edges in the graph. - virtual Xpetra::global_size_t GetGlobalNumEdges() const = 0; + // FIXME is this necessary? + //! Return number of global edges in the graph. + virtual Xpetra::global_size_t GetGlobalNumEdges() const = 0; - //! Return the list of vertices adjacent to the vertex 'v'. - virtual Teuchos::ArrayView getNeighborVertices(LocalOrdinal v) const = 0; + //! Return the list of vertices adjacent to the vertex 'v'. + virtual Teuchos::ArrayView + getNeighborVertices(LocalOrdinal v) const = 0; - //! Return true if vertex with local id 'v' is on current process. - virtual bool isLocalNeighborVertex(LocalOrdinal v) const = 0; - //@} + //! Return true if vertex with local id 'v' is on current process. + virtual bool isLocalNeighborVertex(LocalOrdinal v) const = 0; + //@} - //! @name Print graph. - //@{ - /// Return a simple one-line description of the Graph. - virtual std::string description() const = 0; + //! @name Print graph. + //@{ + /// Return a simple one-line description of the Graph. + virtual std::string description() const = 0; - //! Print the Graph with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - virtual void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const = 0; - //@} - - }; + //! Print the Graph with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = + // Default) const;; + virtual void print(Teuchos::FancyOStream &out, + const VerbLevel verbLevel = Default) const = 0; + //@} +}; } // namespace MueLu diff --git a/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp index 5ecc7c87e000..7b6c31bda060 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Graph_decl.hpp @@ -47,13 +47,13 @@ #define MUELU_GRAPH_DECL_HPP #include // global_size_t -#include // inline functions requires class declaration +#include // inline functions requires class declaration #include #include "MueLu_ConfigDefs.hpp" -#include "MueLu_Graph_fwd.hpp" #include "MueLu_GraphBase.hpp" +#include "MueLu_Graph_fwd.hpp" namespace MueLu { @@ -62,78 +62,91 @@ namespace MueLu { @brief MueLu representation of a compressed row storage graph. This class holds an underlying Xpetra_CrsGraph. - This class can be considered a facade, as MueLu needs only limited functionality for aggregation. + This class can be considered a facade, as MueLu needs only limited + functionality for aggregation. */ - template - class Graph - : public MueLu::GraphBase { //FIXME shortnames isn't working +template +class Graph : public MueLu::GraphBase { // FIXME shortnames isn't working #undef MUELU_GRAPH_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - Graph(const RCP & graph, const std::string & /* objectLabel */=""); - - virtual ~Graph() {} - //@} - - size_t GetNodeNumVertices() const { return graph_->getLocalNumRows(); } - size_t GetNodeNumEdges() const { return graph_->getLocalNumEntries(); } - - Xpetra::global_size_t GetGlobalNumEdges() const { return graph_->getGlobalNumEntries(); } - - const RCP > GetComm() const { return graph_->getComm(); } - const RCP GetDomainMap() const { return graph_->getDomainMap(); } - //! Returns overlapping import map (nodes). - const RCP GetImportMap() const { return graph_->getColMap(); } - - const RCP GetGraph() const {return graph_;} - - //! Set map with local ids of boundary nodes. - void SetBoundaryNodeMap(const ArrayRCP& localDirichletNodes) { localDirichletNodes_ = localDirichletNodes; } - - //! Returns map with local ids of boundary nodes. - const ArrayRCP GetBoundaryNodeMap() const { return localDirichletNodes_; } - - //! Returns the maximum number of entries across all rows/columns on this node - size_t getLocalMaxNumRowEntries () const { return graph_->getLocalMaxNumRowEntries(); } - - //! Return the list of vertices adjacent to the vertex 'v'. - ArrayView getNeighborVertices(LO i) const { - ArrayView rowView; - graph_->getLocalRowView(i, rowView); - return rowView; - } - - //! Return true if vertex with local id 'v' is on current process. - bool isLocalNeighborVertex(LO i) const { return i >= minLocalIndex_ && i <= maxLocalIndex_; } +public: + //! @name Constructors/Destructors. + //@{ + Graph(const RCP &graph, + const std::string & /* objectLabel */ = ""); + + virtual ~Graph() {} + //@} + + size_t GetNodeNumVertices() const { return graph_->getLocalNumRows(); } + size_t GetNodeNumEdges() const { return graph_->getLocalNumEntries(); } + + Xpetra::global_size_t GetGlobalNumEdges() const { + return graph_->getGlobalNumEntries(); + } + + const RCP> GetComm() const { + return graph_->getComm(); + } + const RCP GetDomainMap() const { return graph_->getDomainMap(); } + //! Returns overlapping import map (nodes). + const RCP GetImportMap() const { return graph_->getColMap(); } + + const RCP GetGraph() const { return graph_; } + + //! Set map with local ids of boundary nodes. + void SetBoundaryNodeMap(const ArrayRCP &localDirichletNodes) { + localDirichletNodes_ = localDirichletNodes; + } + + //! Returns map with local ids of boundary nodes. + const ArrayRCP GetBoundaryNodeMap() const { + return localDirichletNodes_; + } + + //! Returns the maximum number of entries across all rows/columns on this node + size_t getLocalMaxNumRowEntries() const { + return graph_->getLocalMaxNumRowEntries(); + } + + //! Return the list of vertices adjacent to the vertex 'v'. + ArrayView getNeighborVertices(LO i) const { + ArrayView rowView; + graph_->getLocalRowView(i, rowView); + return rowView; + } + + //! Return true if vertex with local id 'v' is on current process. + bool isLocalNeighborVertex(LO i) const { + return i >= minLocalIndex_ && i <= maxLocalIndex_; + } #ifdef MUELU_UNUSED - size_t GetNodeNumGhost() const; + size_t GetNodeNumGhost() const; #endif - /// Return a simple one-line description of the Graph. - std::string description() const { return "MueLu.description()"; } - - //! Print the Graph with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; + /// Return a simple one-line description of the Graph. + std::string description() const { return "MueLu.description()"; } - private: + //! Print the Graph with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = + // Default) const;; + void print(Teuchos::FancyOStream &out, + const VerbLevel verbLevel = Default) const; - RCP graph_; +private: + RCP graph_; - //! Vector of Dirichlet boundary node IDs on current process. - ArrayRCP localDirichletNodes_; + //! Vector of Dirichlet boundary node IDs on current process. + ArrayRCP localDirichletNodes_; - // local index boundaries (cached from domain map) - LO minLocalIndex_, maxLocalIndex_; - }; + // local index boundaries (cached from domain map) + LO minLocalIndex_, maxLocalIndex_; +}; } // namespace MueLu diff --git a/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp index 7d9bf76e1cf3..2bd8cd8e8989 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Graph_def.hpp @@ -46,59 +46,66 @@ #ifndef MUELU_GRAPH_DEF_HPP #define MUELU_GRAPH_DEF_HPP -#include "Xpetra_Map.hpp" #include "Xpetra_CrsGraph.hpp" +#include "Xpetra_Map.hpp" -#include "MueLu_Graph_decl.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_Graph_decl.hpp" namespace MueLu { - template - Graph::Graph(const RCP & graph, const std::string & /* objectLabel */) : graph_(graph) { - minLocalIndex_ = graph_->getDomainMap()->getMinLocalIndex(); - maxLocalIndex_ = graph_->getDomainMap()->getMaxLocalIndex(); - } +template +Graph::Graph( + const RCP &graph, const std::string & /* objectLabel */) + : graph_(graph) { + minLocalIndex_ = graph_->getDomainMap()->getMinLocalIndex(); + maxLocalIndex_ = graph_->getDomainMap()->getMaxLocalIndex(); +} #ifdef MUELU_UNUSED - template - size_t Graph::GetNodeNumGhost() const { - /* - Ray's comments about nGhost: - Graph->NGhost == graph_->RowMatrixColMap()->NumMyElements() - graph_->MatrixDomainMap()->NumMyElements() - is basically right. But we've had some issues about how epetra handles empty columns. - Probably worth discussing this with Jonathan and Chris to see if this is ALWAYS right. - */ - size_t nGhost = graph_->getColMap()->getLocalNumElements() - graph_->getDomainMap()->getLocalNumElements(); - if (nGhost < 0) nGhost = 0; // FIXME: size_t is unsigned. +template +size_t Graph::GetNodeNumGhost() const { + /* + Ray's comments about nGhost: + Graph->NGhost == graph_->RowMatrixColMap()->NumMyElements() - + graph_->MatrixDomainMap()->NumMyElements() is basically right. But we've had + some issues about how epetra handles empty columns. Probably worth + discussing this with Jonathan and Chris to see if this is ALWAYS right. + */ + size_t nGhost = graph_->getColMap()->getLocalNumElements() - + graph_->getDomainMap()->getLocalNumElements(); + if (nGhost < 0) + nGhost = 0; // FIXME: size_t is unsigned. - return nGhost; - } + return nGhost; +} #endif - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const { - template - void Graph::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { - MUELU_DESCRIBE; - - if (verbLevel & Parameters0) { - //out0 << "Prec. type: " << type_ << std::endl; - } +//! Print the object with some verbosity level to an FancyOStream object. +// using MueLu::Describable::describe; // overloading, not hiding +// void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = +// Default) const { +template +void Graph::print( + Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + MUELU_DESCRIBE; - if (verbLevel & Parameters1) { - //out0 << "Linear Algebra: " << toString(lib_) << std::endl; - //out0 << "PrecType: " << type_ << std::endl; - //out0 << "Parameter list: " << std::endl; { Teuchos::OSTab tab2(out); out << paramList_; } - //out0 << "Overlap: " << overlap_ << std::endl; - } + if (verbLevel & Parameters0) { + // out0 << "Prec. type: " << type_ << std::endl; + } - if (verbLevel & Debug) { - graph_->describe(out0, Teuchos::VERB_EXTREME); - } + if (verbLevel & Parameters1) { + // out0 << "Linear Algebra: " << toString(lib_) << std::endl; + // out0 << "PrecType: " << type_ << std::endl; + // out0 << "Parameter list: " << std::endl; { Teuchos::OSTab tab2(out); out + // << paramList_; } out0 << "Overlap: " << overlap_ << std::endl; } + if (verbLevel & Debug) { + graph_->describe(out0, Teuchos::VERB_EXTREME); + } } +} // namespace MueLu + #endif // MUELU_GRAPH_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp index f6b3c8338cbe..3a0a1f2a3577 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_decl.hpp @@ -47,14 +47,14 @@ #define MUELU_LWGRAPH_DECL_HPP #include // global_size_t -#include // inline functions requires class declaration +#include // inline functions requires class declaration #include #include "MueLu_ConfigDefs.hpp" -#include "MueLu_LWGraph_fwd.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase.hpp" +#include "MueLu_LWGraph_fwd.hpp" namespace MueLu { @@ -62,119 +62,134 @@ namespace MueLu { @class LWGraph @brief Lightweight MueLu representation of a compressed row storage graph. - This class is lightweight in the sense that it holds to local graph information. These were built without using - fillComplete. + This class is lightweight in the sense that it holds to local graph + information. These were built without using fillComplete. TODO handle systems */ - template - class LWGraph : public MueLu::GraphBase { +template +class LWGraph : public MueLu::GraphBase { #undef MUELU_LWGRAPH_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! LWGraph constructor - // - // @param[in] rowPtrs: Array containing row offsets (CSR format) - // @param[in] colPtrs: Array containing local column indices (CSR format) - // @param[in] domainMap: non-overlapping (domain) map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] importMap: overlapping map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] objectLabel: label string - LWGraph(const ArrayRCP& rowPtrs, const ArrayRCP& colPtrs, - const RCP& domainMap, const RCP& importMap, const std::string& objectLabel = "") - : rows_(rowPtrs), columns_(colPtrs), domainMap_(domainMap), importMap_(importMap), domainMapRef_(*domainMap), objectLabel_(objectLabel) - { - minLocalIndex_ = domainMapRef_.getMinLocalIndex(); - maxLocalIndex_ = domainMapRef_.getMaxLocalIndex(); - - maxNumRowEntries_ = 0; - - LO nRows = as(rowPtrs.size()-1); - for (LO i = 0; i < nRows; i++) - maxNumRowEntries_ = std::max(maxNumRowEntries_, as(rowPtrs[i+1] - rowPtrs[i])); - } - - virtual ~LWGraph() {} - //@} - - size_t GetNodeNumVertices() const { return rows_.size()-1; } - size_t GetNodeNumEdges() const { return rows_[rows_.size()-1]; } - - // TODO: do we really need this function - // It is being called from CoupledAggregation, but do we need it there? - Xpetra::global_size_t GetGlobalNumEdges() const { - Xpetra::global_size_t in = GetNodeNumEdges(), out; - Teuchos::reduceAll(*domainMap_->getComm(), Teuchos::REDUCE_SUM, in, Teuchos::outArg(out)); - return out; - } - - const RCP > GetComm() const { return domainMap_->getComm(); } - const RCP GetDomainMap() const { return domainMap_; } - //! Returns overlapping import map (nodes). - const RCP GetImportMap() const { return importMap_; } - - void SetBoundaryNodeMap(RCP const &/* map */) { throw Exceptions::NotImplemented("LWGraph: Boundary node map not implemented."); } - - //! Return the list of vertices adjacent to the vertex 'v'. - Teuchos::ArrayView getNeighborVertices(LO i) const { return columns_.view(rows_[i], rows_[i+1]-rows_[i]); } - - //! Return true if vertex with local id 'v' is on current process. - bool isLocalNeighborVertex(LO i) const { return i >= minLocalIndex_ && i <= maxLocalIndex_; } - - //! Set boolean array indicating which rows correspond to Dirichlet boundaries. - void SetBoundaryNodeMap(const ArrayRCP& bndry) { dirichletBoundaries_ = bndry; } - - //! Returns the maximum number of entries across all rows/columns on this node - size_t getLocalMaxNumRowEntries () const { return maxNumRowEntries_; } - - //! Returns map with global ids of boundary nodes. - const ArrayRCP GetBoundaryNodeMap() const { return dirichletBoundaries_; } - - - /// Return a simple one-line description of the Graph. - std::string description() const { return "MueLu.description()"; } //FIXME use object's label - - //! Return the row pointers of the local graph - const ArrayRCP getRowPtrs() const { - return rows_; - } - - //! Return the list entries in the local graph - const ArrayRCP getEntries() const { - return columns_; - } - - //! Print the Graph with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - - - RCP GetCrsGraph() const; - - private: - - //! Indices into columns_ array. Part of local graph information. - const ArrayRCP rows_; - //! Columns corresponding to connections. Part of local graph information. - const ArrayRCP columns_; - //! Graph maps - const RCP domainMap_, importMap_; - const Map& domainMapRef_; - //! Name of this graph. - const std::string objectLabel_; - //! Boolean array marking Dirichlet rows. - ArrayRCP dirichletBoundaries_; - - // local index boundaries (cached from domain map) - LO minLocalIndex_, maxLocalIndex_; - size_t maxNumRowEntries_; - }; +public: + //! @name Constructors/Destructors. + //@{ + + //! LWGraph constructor + // + // @param[in] rowPtrs: Array containing row offsets (CSR format) + // @param[in] colPtrs: Array containing local column indices (CSR format) + // @param[in] domainMap: non-overlapping (domain) map for graph. Usually + // provided by AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] importMap: overlapping map for graph. Usually provided by + // AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] objectLabel: label string + LWGraph(const ArrayRCP &rowPtrs, const ArrayRCP &colPtrs, + const RCP &domainMap, const RCP &importMap, + const std::string &objectLabel = "") + : rows_(rowPtrs), columns_(colPtrs), domainMap_(domainMap), + importMap_(importMap), domainMapRef_(*domainMap), + objectLabel_(objectLabel) { + minLocalIndex_ = domainMapRef_.getMinLocalIndex(); + maxLocalIndex_ = domainMapRef_.getMaxLocalIndex(); + + maxNumRowEntries_ = 0; + + LO nRows = as(rowPtrs.size() - 1); + for (LO i = 0; i < nRows; i++) + maxNumRowEntries_ = + std::max(maxNumRowEntries_, as(rowPtrs[i + 1] - rowPtrs[i])); + } + + virtual ~LWGraph() {} + //@} + + size_t GetNodeNumVertices() const { return rows_.size() - 1; } + size_t GetNodeNumEdges() const { return rows_[rows_.size() - 1]; } + + // TODO: do we really need this function + // It is being called from CoupledAggregation, but do we need it there? + Xpetra::global_size_t GetGlobalNumEdges() const { + Xpetra::global_size_t in = GetNodeNumEdges(), out; + Teuchos::reduceAll(*domainMap_->getComm(), Teuchos::REDUCE_SUM, in, + Teuchos::outArg(out)); + return out; + } + + const RCP> GetComm() const { + return domainMap_->getComm(); + } + const RCP GetDomainMap() const { return domainMap_; } + //! Returns overlapping import map (nodes). + const RCP GetImportMap() const { return importMap_; } + + void SetBoundaryNodeMap(RCP const & /* map */) { + throw Exceptions::NotImplemented( + "LWGraph: Boundary node map not implemented."); + } + + //! Return the list of vertices adjacent to the vertex 'v'. + Teuchos::ArrayView getNeighborVertices(LO i) const { + return columns_.view(rows_[i], rows_[i + 1] - rows_[i]); + } + + //! Return true if vertex with local id 'v' is on current process. + bool isLocalNeighborVertex(LO i) const { + return i >= minLocalIndex_ && i <= maxLocalIndex_; + } + + //! Set boolean array indicating which rows correspond to Dirichlet + //! boundaries. + void SetBoundaryNodeMap(const ArrayRCP &bndry) { + dirichletBoundaries_ = bndry; + } + + //! Returns the maximum number of entries across all rows/columns on this node + size_t getLocalMaxNumRowEntries() const { return maxNumRowEntries_; } + + //! Returns map with global ids of boundary nodes. + const ArrayRCP GetBoundaryNodeMap() const { + return dirichletBoundaries_; + } + + /// Return a simple one-line description of the Graph. + std::string description() const { + return "MueLu.description()"; + } // FIXME use object's label + + //! Return the row pointers of the local graph + const ArrayRCP getRowPtrs() const { return rows_; } + + //! Return the list entries in the local graph + const ArrayRCP getEntries() const { return columns_; } + + //! Print the Graph with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = + // Default) const;; + void print(Teuchos::FancyOStream &out, + const VerbLevel verbLevel = Default) const; + + RCP GetCrsGraph() const; + +private: + //! Indices into columns_ array. Part of local graph information. + const ArrayRCP rows_; + //! Columns corresponding to connections. Part of local graph information. + const ArrayRCP columns_; + //! Graph maps + const RCP domainMap_, importMap_; + const Map &domainMapRef_; + //! Name of this graph. + const std::string objectLabel_; + //! Boolean array marking Dirichlet rows. + ArrayRCP dirichletBoundaries_; + + // local index boundaries (cached from domain map) + LO minLocalIndex_, maxLocalIndex_; + size_t maxNumRowEntries_; +}; } // namespace MueLu diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp index 230a6c908587..eb6b8cab5196 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_def.hpp @@ -52,46 +52,52 @@ namespace MueLu { - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const { - template - void LWGraph::print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { - // MUELU_DESCRIBE; +//! Print the object with some verbosity level to an FancyOStream object. +// using MueLu::Describable::describe; // overloading, not hiding +// void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = +// Default) const { +template +void LWGraph::print( + Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + // MUELU_DESCRIBE; - if (verbLevel & Parameters0) { - //out0 << "Prec. type: " << type_ << std::endl; - } - - if (verbLevel & Parameters1) { - //out0 << "Linear Algebra: " << toString(lib_) << std::endl; - //out0 << "PrecType: " << type_ << std::endl; - //out0 << "Parameter list: " << std::endl; { Teuchos::OSTab tab2(out); out << paramList_; } - //out0 << "Overlap: " << overlap_ << std::endl; - } - - if (verbLevel & Debug) { - RCP col_map = importMap_.is_null() ? domainMap_ : importMap_; + if (verbLevel & Parameters0) { + // out0 << "Prec. type: " << type_ << std::endl; + } - for (LO i = 0; i < rows_.size()-1; i++) { - for (LO j = rows_[i]; j < rows_[i+1]; j++) - out<< domainMap_->getGlobalElement(i) << " " << col_map->getGlobalElement(columns_[j])< col_map = importMap_.is_null() ? domainMap_ : importMap_; - template - RCP > LWGraph::GetCrsGraph() const { - ArrayRCP rowPtrs; - rowPtrs.resize(rows_.size()); - for (size_t i=0; i(rows_.size()); i++) - rowPtrs[i] = rows_[i]; - auto graph = Xpetra::CrsGraphFactory::Build(GetDomainMap(), GetImportMap(), rowPtrs, Teuchos::arcp_const_cast(getEntries())); - graph->fillComplete(); - return graph; + for (LO i = 0; i < rows_.size() - 1; i++) { + for (LO j = rows_[i]; j < rows_[i + 1]; j++) + out << domainMap_->getGlobalElement(i) << " " + << col_map->getGlobalElement(columns_[j]) << std::endl; } + } +} +template +RCP> +LWGraph::GetCrsGraph() const { + ArrayRCP rowPtrs; + rowPtrs.resize(rows_.size()); + for (size_t i = 0; i < Teuchos::as(rows_.size()); i++) + rowPtrs[i] = rows_[i]; + auto graph = + Xpetra::CrsGraphFactory::Build( + GetDomainMap(), GetImportMap(), rowPtrs, + Teuchos::arcp_const_cast(getEntries())); + graph->fillComplete(); + return graph; } +} // namespace MueLu + #endif // MUELU_LWGRAPH_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp index dd8a0231b15a..bcfa83db1718 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp @@ -51,121 +51,119 @@ #include #include -#include // global_size_t +#include // global_size_t #include -#include "MueLu_VerbosityLevel.hpp" #include "MueLu_LWGraph_kokkos_fwd.hpp" +#include "MueLu_VerbosityLevel.hpp" #include #include "MueLu_Exceptions.hpp" namespace MueLu { - /*! - @class LWGraph_kokkos - @brief Lightweight MueLu representation of a compressed row storage graph - - This class is lightweight in the sense that it holds to local graph - information. These were built without using fillComplete. - */ - template - class LWGraph_kokkos; - - // Partial specialization for DeviceType - template - class LWGraph_kokkos> { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using memory_space = typename DeviceType::memory_space; - using device_type = Kokkos::Device; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - using local_lw_graph_type = MueLu::LocalLWGraph_kokkos; - using size_type = size_t; - - using map_type = Xpetra::Map; - using local_graph_type = typename local_lw_graph_type::local_graph_type; - using boundary_nodes_type = typename local_lw_graph_type::boundary_nodes_type; - - private: - // For compatibility - typedef node_type Node; +/*! + @class LWGraph_kokkos + @brief Lightweight MueLu representation of a compressed row storage graph + + This class is lightweight in the sense that it holds to local graph + information. These were built without using fillComplete. + */ +template +class LWGraph_kokkos; + +// Partial specialization for DeviceType +template +class LWGraph_kokkos< + LocalOrdinal, GlobalOrdinal, + Tpetra::KokkosCompat::KokkosDeviceWrapperNode> { +public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using memory_space = typename DeviceType::memory_space; + using device_type = Kokkos::Device; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + using local_lw_graph_type = + MueLu::LocalLWGraph_kokkos; + using size_type = size_t; + + using map_type = Xpetra::Map; + using local_graph_type = typename local_lw_graph_type::local_graph_type; + using boundary_nodes_type = typename local_lw_graph_type::boundary_nodes_type; + +private: + // For compatibility + typedef node_type Node; #undef MUELU_LWGRAPH_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! LWGraph constructor - // - // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS data - // @param[in] domainMap: non-overlapping (domain) map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] importMap: overlapping map for graph. Usually provided by AmalgamationFactory stored in UnAmalgamationInfo container - // @param[in] objectLabel: label string - LWGraph_kokkos(const local_graph_type& graph, - const RCP& domainMap, - const RCP& importMap, - const std::string& objectLabel = "") - : lclLWGraph_(graph, domainMap), domainMap_(domainMap), importMap_(importMap), objectLabel_(objectLabel) { } - - ~LWGraph_kokkos() = default; - //@} - - const RCP > GetComm() const { - return domainMap_->getComm(); - } - const RCP GetDomainMap() const { - return domainMap_; - } - //! Return overlapping import map (nodes). - const RCP GetImportMap() const { - return importMap_; - } - - //! Return number of graph vertices - KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { - return lclLWGraph_.GetNodeNumVertices(); - } - //! Return number of graph edges - KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { - return lclLWGraph_.GetNodeNumEdges(); - } - - //! Returns the maximum number of entries across all rows/columns on this node - KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries () const { - return lclLWGraph_.getLocalMaxNumRowEntries(); - } - - /// Return a simple one-line description of the Graph. - std::string description() const { - return "LWGraph (" + objectLabel_ + ")"; - } - - //! Print the Graph with some verbosity level to an FancyOStream object. - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - - local_lw_graph_type& getLocalLWGraph() const { - return lclLWGraph_; - } - - private: - - //! Underlying graph (with label) - mutable local_lw_graph_type lclLWGraph_; - - //! Graph maps - const RCP domainMap_; - const RCP importMap_; - - //! Name of this graph. - const std::string objectLabel_; - }; - -} +public: + //! @name Constructors/Destructors. + //@{ + + //! LWGraph constructor + // + // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS + // data + // @param[in] domainMap: non-overlapping (domain) map for graph. Usually + // provided by AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] importMap: overlapping map for graph. Usually provided by + // AmalgamationFactory stored in UnAmalgamationInfo container + // @param[in] objectLabel: label string + LWGraph_kokkos(const local_graph_type &graph, + const RCP &domainMap, + const RCP &importMap, + const std::string &objectLabel = "") + : lclLWGraph_(graph, domainMap), domainMap_(domainMap), + importMap_(importMap), objectLabel_(objectLabel) {} + + ~LWGraph_kokkos() = default; + //@} + + const RCP> GetComm() const { + return domainMap_->getComm(); + } + const RCP GetDomainMap() const { return domainMap_; } + //! Return overlapping import map (nodes). + const RCP GetImportMap() const { return importMap_; } + + //! Return number of graph vertices + KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { + return lclLWGraph_.GetNodeNumVertices(); + } + //! Return number of graph edges + KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { + return lclLWGraph_.GetNodeNumEdges(); + } + + //! Returns the maximum number of entries across all rows/columns on this node + KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const { + return lclLWGraph_.getLocalMaxNumRowEntries(); + } + + /// Return a simple one-line description of the Graph. + std::string description() const { return "LWGraph (" + objectLabel_ + ")"; } + + //! Print the Graph with some verbosity level to an FancyOStream object. + void print(Teuchos::FancyOStream &out, + const VerbLevel verbLevel = Default) const; + + local_lw_graph_type &getLocalLWGraph() const { return lclLWGraph_; } + +private: + //! Underlying graph (with label) + mutable local_lw_graph_type lclLWGraph_; + + //! Graph maps + const RCP domainMap_; + const RCP importMap_; + + //! Name of this graph. + const std::string objectLabel_; +}; + +} // namespace MueLu #define MUELU_LWGRAPH_KOKKOS_SHORT #endif // MUELU_LWGRAPH_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp index 4d164f1b8f50..c51ca312259c 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp @@ -55,38 +55,40 @@ namespace MueLu { - template - void LWGraph_kokkos>:: - print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { +template +void LWGraph_kokkos>:: + print(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { - if (verbLevel & Debug) { - auto graph = lclLWGraph_.getGraph(); - RCP col_map = importMap_.is_null() ? domainMap_ : importMap_; - int mypid = col_map->getComm()->getRank(); + if (verbLevel & Debug) { + auto graph = lclLWGraph_.getGraph(); + RCP col_map = importMap_.is_null() ? domainMap_ : importMap_; + int mypid = col_map->getComm()->getRank(); - { + { std::ostringstream ss; ss << "[pid " << mypid << "] num entries=" << graph.entries.size(); out << ss.str() << std::endl; - } + } - const size_t numRows = graph.numRows(); - auto rowPtrs = graph.row_map; - auto columns = graph.entries; - for (size_t i=0; i < numRows; ++i) { - std::ostringstream ss; - ss << "[pid " << mypid << "] row " << domainMap_->getGlobalElement(i) << ":"; - ss << " (numEntries=" << rowPtrs(i+1)-rowPtrs(i) << ")"; + const size_t numRows = graph.numRows(); + auto rowPtrs = graph.row_map; + auto columns = graph.entries; + for (size_t i = 0; i < numRows; ++i) { + std::ostringstream ss; + ss << "[pid " << mypid << "] row " << domainMap_->getGlobalElement(i) + << ":"; + ss << " (numEntries=" << rowPtrs(i + 1) - rowPtrs(i) << ")"; - auto rowView = graph.rowConst(i); - for (LO j = 0; j < rowView.length; j++) { - ss << " " << col_map->getGlobalElement(rowView.colidx(j)); - } - out << ss.str() << std::endl; + auto rowView = graph.rowConst(i); + for (LO j = 0; j < rowView.length; j++) { + ss << " " << col_map->getGlobalElement(rowView.colidx(j)); } + out << ss.str() << std::endl; } } +} -} //namespace MueLu +} // namespace MueLu #endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp index 04192eee8d72..e8b594d5c824 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp @@ -43,51 +43,51 @@ // *********************************************************************** // // @HEADER -#include // for NULL #include "MueLu_LinkedList.hpp" +#include // for NULL namespace MueLu { - LinkedList::LinkedList() : nodeHead(NULL), nodeTail(NULL) { } +LinkedList::LinkedList() : nodeHead(NULL), nodeTail(NULL) {} - LinkedList::~LinkedList() { - while (nodeHead != NULL) - DeleteHead(); - } +LinkedList::~LinkedList() { + while (nodeHead != NULL) + DeleteHead(); +} - bool LinkedList::IsEmpty() { - return nodeHead == NULL; - } +bool LinkedList::IsEmpty() { return nodeHead == NULL; } - void LinkedList::Add(int iNode) { - MueLu_Node *newNode = new MueLu_Node; - newNode->nodeId = iNode; - newNode->next = NULL; - if (nodeHead == NULL) { - nodeHead = newNode; - nodeTail = newNode; - } else { - nodeTail->next = newNode; - nodeTail = newNode; - } +void LinkedList::Add(int iNode) { + MueLu_Node *newNode = new MueLu_Node; + newNode->nodeId = iNode; + newNode->next = NULL; + if (nodeHead == NULL) { + nodeHead = newNode; + nodeTail = newNode; + } else { + nodeTail->next = newNode; + nodeTail = newNode; } +} - int LinkedList::Pop() { // get head and remove first node - if (IsEmpty()) return -1; +int LinkedList::Pop() { // get head and remove first node + if (IsEmpty()) + return -1; - int iNode = nodeHead->nodeId; - DeleteHead(); - return iNode; - } + int iNode = nodeHead->nodeId; + DeleteHead(); + return iNode; +} - void LinkedList::DeleteHead() { - if (IsEmpty()) return; - - MueLu_Node *newNode = nodeHead; - nodeHead = newNode->next; - delete newNode; - } +void LinkedList::DeleteHead() { + if (IsEmpty()) + return; + MueLu_Node *newNode = nodeHead; + nodeHead = newNode->next; + delete newNode; } -//TODO: nodeTail unused -> remove? +} // namespace MueLu + +// TODO: nodeTail unused -> remove? diff --git a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp index ba8d95e30751..96366ee55c34 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp @@ -52,33 +52,31 @@ namespace MueLu { - typedef struct MueLu_Node_Struct - { - int nodeId; - struct MueLu_Node_Struct *next; - } MueLu_Node; +typedef struct MueLu_Node_Struct { + int nodeId; + struct MueLu_Node_Struct *next; +} MueLu_Node; - class LinkedList { +class LinkedList { - public: - LinkedList(); +public: + LinkedList(); - ~LinkedList(); + ~LinkedList(); - bool IsEmpty(); + bool IsEmpty(); - void Add(int iNode); + void Add(int iNode); - int Pop(); + int Pop(); - private: - MueLu_Node *nodeHead; - MueLu_Node *nodeTail; +private: + MueLu_Node *nodeHead; + MueLu_Node *nodeTail; - void DeleteHead(); + void DeleteHead(); +}; - }; - -} +} // namespace MueLu #endif // MUELU_LINKEDLIST_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp index 294fe160e530..3ce790d2e52e 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_decl.hpp @@ -51,135 +51,137 @@ #include #include -#include // global_size_t +#include // global_size_t #include -#include "MueLu_VerbosityLevel.hpp" #include "MueLu_LocalLWGraph_kokkos_fwd.hpp" +#include "MueLu_VerbosityLevel.hpp" #include "MueLu_Exceptions.hpp" namespace MueLu { - /*! - @class LocalLWGraph_kokkos - @brief Lightweight MueLu representation of a compressed row storage graph - - This class is lightweight in the sense that it holds to local graph - information. These were built without using fillComplete. - */ - template - class LocalLWGraph_kokkos; - - // Partial specialization for DeviceType - template - class LocalLWGraph_kokkos> { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using memory_space = typename DeviceType::memory_space; - using device_type = Kokkos::Device; - using range_type = Kokkos::RangePolicy; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - using size_type = size_t; - - using local_graph_type = Kokkos::StaticCrsGraph; - using boundary_nodes_type = Kokkos::View; - using row_type = Kokkos::View; - using map_type = Xpetra::Map; - - private: - // For compatibility - typedef node_type Node; +/*! + @class LocalLWGraph_kokkos + @brief Lightweight MueLu representation of a compressed row storage graph + + This class is lightweight in the sense that it holds to local graph + information. These were built without using fillComplete. + */ +template +class LocalLWGraph_kokkos; + +// Partial specialization for DeviceType +template +class LocalLWGraph_kokkos< + LocalOrdinal, GlobalOrdinal, + Tpetra::KokkosCompat::KokkosDeviceWrapperNode> { +public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using memory_space = typename DeviceType::memory_space; + using device_type = Kokkos::Device; + using range_type = Kokkos::RangePolicy; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + using size_type = size_t; + + using local_graph_type = + Kokkos::StaticCrsGraph; + using boundary_nodes_type = Kokkos::View; + using row_type = Kokkos::View; + using map_type = Xpetra::Map; + +private: + // For compatibility + typedef node_type Node; #undef MUELU_LOCALLWGRAPH_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! LocalLWGraph constructor - // - // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS data - LocalLWGraph_kokkos(const local_graph_type& graph, - const RCP& domainMap); - - ~LocalLWGraph_kokkos() = default; - //@} - - //! Return number of graph vertices - KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { - return graph_.numRows(); - } - //! Return number of graph edges - KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { - return graph_.row_map(GetNodeNumVertices()); - } - - //! Returns the maximum number of entries across all rows/columns on this node - KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries () const { - return maxNumRowEntries_; - } - - //! Return the row pointers of the local graph - KOKKOS_INLINE_FUNCTION typename local_graph_type::row_map_type getRowPtrs() const { - return graph_.row_map; - } - - //! Return the list entries in the local graph - KOKKOS_INLINE_FUNCTION typename local_graph_type::entries_type getEntries() const { - return graph_.entries; - } - - //! Return the list of vertices adjacent to the vertex 'v'. - // Unfortunately, C++11 does not support the following: - // auto getNeighborVertices(LO i) const -> decltype(rowView) - // auto return with decltype was only introduced in C++14 - KOKKOS_INLINE_FUNCTION - Kokkos::GraphRowViewConst getNeighborVertices(LO i) const { - auto rowView = graph_.rowConst(i); - - return rowView; - } - - //! Return true if vertex with local id 'v' is on current process. - KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const { - return i >= minLocalIndex_ && i <= maxLocalIndex_; - } - - //! Set boolean array indicating which rows correspond to Dirichlet boundaries. - KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry) { - dirichletBoundaries_ = bndry; - } - - //! Returns map with global ids of boundary nodes. - KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const { - return dirichletBoundaries_; - } - - const local_graph_type& getGraph() const { - return graph_; - } - - private: - - //! Underlying graph (with label) - const local_graph_type graph_; - - //! Boolean array marking Dirichlet rows. - boundary_nodes_type dirichletBoundaries_; - - //! Local index boundaries (cached from domain map) - LO minLocalIndex_, maxLocalIndex_; - size_type maxNumRowEntries_; - - }; - -} +public: + //! @name Constructors/Destructors. + //@{ + + //! LocalLWGraph constructor + // + // @param[in] graph: local graph of type Kokkos::StaticCrsGraph containing CRS + // data + LocalLWGraph_kokkos(const local_graph_type &graph, + const RCP &domainMap); + + ~LocalLWGraph_kokkos() = default; + //@} + + //! Return number of graph vertices + KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const { + return graph_.numRows(); + } + //! Return number of graph edges + KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const { + return graph_.row_map(GetNodeNumVertices()); + } + + //! Returns the maximum number of entries across all rows/columns on this node + KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const { + return maxNumRowEntries_; + } + + //! Return the row pointers of the local graph + KOKKOS_INLINE_FUNCTION typename local_graph_type::row_map_type + getRowPtrs() const { + return graph_.row_map; + } + + //! Return the list entries in the local graph + KOKKOS_INLINE_FUNCTION typename local_graph_type::entries_type + getEntries() const { + return graph_.entries; + } + + //! Return the list of vertices adjacent to the vertex 'v'. + // Unfortunately, C++11 does not support the following: + // auto getNeighborVertices(LO i) const -> decltype(rowView) + // auto return with decltype was only introduced in C++14 + KOKKOS_INLINE_FUNCTION + Kokkos::GraphRowViewConst getNeighborVertices(LO i) const { + auto rowView = graph_.rowConst(i); + + return rowView; + } + + //! Return true if vertex with local id 'v' is on current process. + KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const { + return i >= minLocalIndex_ && i <= maxLocalIndex_; + } + + //! Set boolean array indicating which rows correspond to Dirichlet + //! boundaries. + KOKKOS_INLINE_FUNCTION void + SetBoundaryNodeMap(const boundary_nodes_type bndry) { + dirichletBoundaries_ = bndry; + } + + //! Returns map with global ids of boundary nodes. + KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const { + return dirichletBoundaries_; + } + + const local_graph_type &getGraph() const { return graph_; } + +private: + //! Underlying graph (with label) + const local_graph_type graph_; + + //! Boolean array marking Dirichlet rows. + boundary_nodes_type dirichletBoundaries_; + + //! Local index boundaries (cached from domain map) + LO minLocalIndex_, maxLocalIndex_; + size_type maxNumRowEntries_; +}; + +} // namespace MueLu #define MUELU_LOCALLWGRAPH_KOKKOS_SHORT #endif // MUELU_LOCALLWGRAPH_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp index 785706b1a002..f191a810d891 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LocalLWGraph_kokkos_def.hpp @@ -55,49 +55,49 @@ namespace MueLu { - namespace { // anonymous +namespace { // anonymous - template - class MaxNumRowEntriesFunctor { - public: - MaxNumRowEntriesFunctor(RowType rowPointers) : rowPointers_(rowPointers) { } +template class MaxNumRowEntriesFunctor { +public: + MaxNumRowEntriesFunctor(RowType rowPointers) : rowPointers_(rowPointers) {} - KOKKOS_INLINE_FUNCTION - void operator()(const LocalOrdinal i, size_t& maxLength) const { - size_t d = rowPointers_(i+1) - rowPointers_(i); + KOKKOS_INLINE_FUNCTION + void operator()(const LocalOrdinal i, size_t &maxLength) const { + size_t d = rowPointers_(i + 1) - rowPointers_(i); - maxLength = (d > maxLength ? d : maxLength); - } + maxLength = (d > maxLength ? d : maxLength); + } - KOKKOS_INLINE_FUNCTION - void join(volatile size_t& dest, const volatile size_t& src) { - dest = (dest > src ? dest : src); - } + KOKKOS_INLINE_FUNCTION + void join(volatile size_t &dest, const volatile size_t &src) { + dest = (dest > src ? dest : src); + } - KOKKOS_INLINE_FUNCTION - void init(size_t& initValue) { - initValue = 0; - } + KOKKOS_INLINE_FUNCTION + void init(size_t &initValue) { initValue = 0; } - private: - RowType rowPointers_; - }; +private: + RowType rowPointers_; +}; - } +} // namespace - template - LocalLWGraph_kokkos>:: - LocalLWGraph_kokkos(const local_graph_type& graph, - const RCP& domainMap) - : graph_(graph) - { - minLocalIndex_ = domainMap->getMinLocalIndex(); - maxLocalIndex_ = domainMap->getMaxLocalIndex(); - - MaxNumRowEntriesFunctor maxNumRowEntriesFunctor(graph_.row_map); - Kokkos::parallel_reduce("MueLu:LocalLWGraph:LWGraph:maxnonzeros", range_type(0,graph_.numRows()), maxNumRowEntriesFunctor, maxNumRowEntries_); - } +template +LocalLWGraph_kokkos>:: + LocalLWGraph_kokkos(const local_graph_type &graph, + const RCP &domainMap) + : graph_(graph) { + minLocalIndex_ = domainMap->getMinLocalIndex(); + maxLocalIndex_ = domainMap->getMaxLocalIndex(); + + MaxNumRowEntriesFunctor + maxNumRowEntriesFunctor(graph_.row_map); + Kokkos::parallel_reduce("MueLu:LocalLWGraph:LWGraph:maxnonzeros", + range_type(0, graph_.numRows()), + maxNumRowEntriesFunctor, maxNumRowEntries_); +} -} //namespace MueLu +} // namespace MueLu #endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp b/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp index e227583d92f2..566a57339105 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_Zoltan2GraphAdapter.hpp @@ -51,80 +51,79 @@ #if defined(HAVE_MUELU_ZOLTAN2) -#include -#include +#include "MueLu_GraphBase.hpp" #include +#include +#include #include -#include #include -#include +#include #include -#include "MueLu_GraphBase.hpp" - - +#include // Zoltab2 InputTraits for MueLu Graph objects namespace Zoltan2 { -template -struct InputTraits > -{ +template +struct InputTraits> { typedef Zoltan2::default_scalar_t scalar_t; - typedef LocalOrdinal lno_t; + typedef LocalOrdinal lno_t; typedef GlobalOrdinal gno_t; typedef size_t offset_t; - typedef Zoltan2::default_part_t part_t; - typedef Node node_t; - static inline std::string name() {return "MueLu::Graph";} + typedef Zoltan2::default_part_t part_t; + typedef Node node_t; + static inline std::string name() { return "MueLu::Graph"; } Z2_STATIC_ASSERT_TYPES // validate the types }; -}//end namespace Zoltan2 - +} // end namespace Zoltan2 namespace MueLu { -template -class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { +template +class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { public: - #ifndef DOXYGEN_SHOULD_SKIP_THIS - typedef typename Zoltan2::InputTraits::scalar_t scalar_t; - typedef typename Zoltan2::InputTraits::offset_t offset_t; - typedef typename Zoltan2::InputTraits::lno_t lno_t; - typedef typename Zoltan2::InputTraits::gno_t gno_t; - typedef typename Zoltan2::InputTraits::part_t part_t; - typedef typename Zoltan2::InputTraits::node_t node_t; + typedef typename Zoltan2::InputTraits::scalar_t scalar_t; + typedef typename Zoltan2::InputTraits::offset_t offset_t; + typedef typename Zoltan2::InputTraits::lno_t lno_t; + typedef typename Zoltan2::InputTraits::gno_t gno_t; + typedef typename Zoltan2::InputTraits::part_t part_t; + typedef typename Zoltan2::InputTraits::node_t node_t; typedef User xgraph_t; typedef User user_t; typedef UserCoord userCoord_t; #endif //! MueLu::GraphBase Compatibility Layer - const Teuchos::RCP< const Teuchos::Comm< int > > getComm() const { return graph_->GetComm();} - const Teuchos::RCP< const Xpetra::Map > getRowMap() const { return graph_->GetDomainMap();} - const RCP< const Xpetra::Map > getColMap() const { - // For some GraphBases' this is a ColMap, in others it is a seperate map that is - // only non-null in parallel. - Teuchos::RCP > map = graph_->GetImportMap(); - if(map.is_null()) map = graph_->GetDomainMap(); + const Teuchos::RCP> getComm() const { + return graph_->GetComm(); + } + const Teuchos::RCP> + getRowMap() const { + return graph_->GetDomainMap(); + } + const RCP> getColMap() const { + // For some GraphBases' this is a ColMap, in others it is a seperate map + // that is only non-null in parallel. + Teuchos::RCP> map = + graph_->GetImportMap(); + if (map.is_null()) + map = graph_->GetDomainMap(); return map; } - size_t getLocalNumEntries() const { return graph_->GetNodeNumEdges();} - size_t getLocalNumRows() const { return getRowMap()->getLocalNumElements();} - size_t getLocalNumCols() const { return getColMap()->getLocalNumElements();} + size_t getLocalNumEntries() const { return graph_->GetNodeNumEdges(); } + size_t getLocalNumRows() const { return getRowMap()->getLocalNumElements(); } + size_t getLocalNumCols() const { return getColMap()->getLocalNumElements(); } - void getLocalRowView(lno_t LocalRow, Teuchos::ArrayView< const lno_t > &indices) const { - indices = graph_->getNeighborVertices(LocalRow); + void getLocalRowView(lno_t LocalRow, + Teuchos::ArrayView &indices) const { + indices = graph_->getNeighborVertices(LocalRow); } - - /*! \brief Destructor */ - ~MueLuGraphBaseAdapter() { } + ~MueLuGraphBaseAdapter() {} /*! \brief Constructor for graph with no weights or coordinates. * \param ingraph the Epetra_CrsGraph, Tpetra::CrsGraph or Xpetra::CrsGraph @@ -135,8 +134,8 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { * one does because the user is obviously a Trilinos user. */ - MueLuGraphBaseAdapter(const RCP &ingraph, - int nVtxWeights=0, int nEdgeWeights=0); + MueLuGraphBaseAdapter(const RCP &ingraph, int nVtxWeights = 0, + int nEdgeWeights = 0); /*! \brief Provide a pointer to weights for the primary entity type. * \param val A pointer to the weights for index \c idx. @@ -227,8 +226,7 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { // TODO: Need to add option for columns or nonzeros? size_t getLocalNumVertices() const { return getLocalNumRows(); } - void getVertexIDsView(const gno_t *&ids) const - { + void getVertexIDsView(const gno_t *&ids) const { ids = NULL; if (getLocalNumVertices()) ids = getRowMap()->getLocalElementList().getRawPtr(); @@ -236,98 +234,92 @@ class MueLuGraphBaseAdapter : public Zoltan2::GraphAdapter { size_t getLocalNumEdges() const { return getLocalNumEntries(); } - void getEdgesView(const offset_t *&offsets, const gno_t *&adjIds) const - { + void getEdgesView(const offset_t *&offsets, const gno_t *&adjIds) const { offsets = offs_.getRawPtr(); adjIds = (getLocalNumEdges() ? adjids_.getRawPtr() : NULL); } - int getNumWeightsPerVertex() const { return nWeightsPerVertex_;} + int getNumWeightsPerVertex() const { return nWeightsPerVertex_; } void getVertexWeightsView(const scalar_t *&weights, int &stride, - int idx) const - { - if(idx<0 || idx >= nWeightsPerVertex_) - { + int idx) const { + if (idx < 0 || idx >= nWeightsPerVertex_) { std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid vertex weight index " << idx << std::endl; + emsg << __FILE__ << ":" << __LINE__ << " Invalid vertex weight index " + << idx << std::endl; throw std::runtime_error(emsg.str()); } - size_t length; vertexWeights_[idx].getStridedList(length, weights, stride); } - bool useDegreeAsVertexWeight(int idx) const {return vertexDegreeWeight_[idx];} + bool useDegreeAsVertexWeight(int idx) const { + return vertexDegreeWeight_[idx]; + } - int getNumWeightsPerEdge() const { return nWeightsPerEdge_;} + int getNumWeightsPerEdge() const { return nWeightsPerEdge_; } - void getEdgeWeightsView(const scalar_t *&weights, int &stride, int idx) const - { - if(idx<0 || idx >= nWeightsPerEdge_) - { + void getEdgeWeightsView(const scalar_t *&weights, int &stride, + int idx) const { + if (idx < 0 || idx >= nWeightsPerEdge_) { std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid edge weight index " << idx << std::endl; + emsg << __FILE__ << ":" << __LINE__ << " Invalid edge weight index " + << idx << std::endl; throw std::runtime_error(emsg.str()); } - size_t length; edgeWeights_[idx].getStridedList(length, weights, stride); } - template - void applyPartitioningSolution(const User &in, User *&out, - const Zoltan2::PartitioningSolution &solution) const { - TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument,"applyPartitionlingSolution not implemeneted"); -} + void applyPartitioningSolution( + const User &in, User *&out, + const Zoltan2::PartitioningSolution &solution) const { + TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument, + "applyPartitionlingSolution not implemeneted"); + } template - void applyPartitioningSolution(const User &in, RCP &out, - const Zoltan2::PartitioningSolution &solution) const { - TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument,"applyPartitionlingSolution not implemeneted"); + void applyPartitioningSolution( + const User &in, RCP &out, + const Zoltan2::PartitioningSolution &solution) const { + TEUCHOS_TEST_FOR_EXCEPTION(1, std::invalid_argument, + "applyPartitionlingSolution not implemeneted"); } - private: - - RCP ingraph_; - RCP graph_; - RCP > comm_; + RCP ingraph_; + RCP graph_; + RCP> comm_; ArrayRCP offs_; ArrayRCP adjids_; int nWeightsPerVertex_; - ArrayRCP > vertexWeights_; + ArrayRCP> vertexWeights_; ArrayRCP vertexDegreeWeight_; int nWeightsPerEdge_; - ArrayRCP > edgeWeights_; + ArrayRCP> edgeWeights_; int coordinateDim_; - ArrayRCP > coords_; - + ArrayRCP> coords_; }; - ///////////////////////////////////////////////////////////////// // Definitions ///////////////////////////////////////////////////////////////// template - MueLuGraphBaseAdapter::MueLuGraphBaseAdapter( - const RCP &ingraph, int nVtxWgts, int nEdgeWgts): - ingraph_(ingraph), graph_(), comm_() , offs_(), adjids_(), +MueLuGraphBaseAdapter::MueLuGraphBaseAdapter( + const RCP &ingraph, int nVtxWgts, int nEdgeWgts) + : ingraph_(ingraph), graph_(), comm_(), offs_(), adjids_(), nWeightsPerVertex_(nVtxWgts), vertexWeights_(), vertexDegreeWeight_(), - nWeightsPerEdge_(nEdgeWgts), edgeWeights_(), - coordinateDim_(0), coords_() -{ - typedef Zoltan2::StridedData input_t; + nWeightsPerEdge_(nEdgeWgts), edgeWeights_(), coordinateDim_(0), + coords_() { + typedef Zoltan2::StridedData input_t; graph_ = ingraph; comm_ = getRowMap()->getComm(); @@ -338,40 +330,37 @@ template // because edge Ids are not usually stored in vertex id order. size_t n = nvtx + 1; offs_.resize(n); - offset_t* offs = const_cast(offs_.getRawPtr()); - gno_t* adjids=0; - if(nedges > 0) { + offset_t *offs = const_cast(offs_.getRawPtr()); + gno_t *adjids = 0; + if (nedges > 0) { adjids_.resize(nedges); - adjids = const_cast(adjids_.getRawPtr()); + adjids = const_cast(adjids_.getRawPtr()); } offs[0] = 0; - for (size_t v=0; v < nvtx; v++){ + for (size_t v = 0; v < nvtx; v++) { ArrayView nbors; getLocalRowView(v, nbors); - offs[v+1] = offs[v] + nbors.size(); - for (offset_t e=offs[v], i=0; e < offs[v+1]; e++) { + offs[v + 1] = offs[v] + nbors.size(); + for (offset_t e = offs[v], i = 0; e < offs[v + 1]; e++) { adjids[e] = getColMap()->getGlobalElement(nbors[i++]); } } if (nWeightsPerVertex_ > 0) { vertexWeights_ = - arcp(new input_t[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); + arcp(new input_t[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); vertexDegreeWeight_ = - arcp(new bool[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); - for (int i=0; i < nWeightsPerVertex_; i++) + arcp(new bool[nWeightsPerVertex_], 0, nWeightsPerVertex_, true); + for (int i = 0; i < nWeightsPerVertex_; i++) vertexDegreeWeight_[i] = false; } - - } //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setWeights( - const scalar_t *weightVal, int stride, int idx) -{ +void MueLuGraphBaseAdapter::setWeights( + const scalar_t *weightVal, int stride, int idx) { if (this->getPrimaryEntityType() == Zoltan2::GRAPH_VERTEX) setVertexWeights(weightVal, stride, idx); else @@ -380,29 +369,25 @@ template //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setVertexWeights( - const scalar_t *weightVal, int stride, int idx) -{ - typedef Zoltan2::StridedData input_t; +void MueLuGraphBaseAdapter::setVertexWeights( + const scalar_t *weightVal, int stride, int idx) { + typedef Zoltan2::StridedData input_t; - if(idx<0 || idx >= nWeightsPerVertex_) - { - std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid vertex weight index " << idx << std::endl; - throw std::runtime_error(emsg.str()); + if (idx < 0 || idx >= nWeightsPerVertex_) { + std::ostringstream emsg; + emsg << __FILE__ << ":" << __LINE__ << " Invalid vertex weight index " + << idx << std::endl; + throw std::runtime_error(emsg.str()); } size_t nvtx = getLocalNumVertices(); - ArrayRCP weightV(weightVal, 0, nvtx*stride, false); + ArrayRCP weightV(weightVal, 0, nvtx * stride, false); vertexWeights_[idx] = input_t(weightV, stride); } //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setWeightIsDegree( - int idx) -{ +void MueLuGraphBaseAdapter::setWeightIsDegree(int idx) { if (this->getPrimaryEntityType() == Zoltan2::GRAPH_VERTEX) setVertexWeightIsDegree(idx); else { @@ -416,15 +401,12 @@ template //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setVertexWeightIsDegree( - int idx) -{ - if(idx<0 || idx >= nWeightsPerVertex_) - { - std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid vertex weight index " << idx << std::endl; - throw std::runtime_error(emsg.str()); +void MueLuGraphBaseAdapter::setVertexWeightIsDegree(int idx) { + if (idx < 0 || idx >= nWeightsPerVertex_) { + std::ostringstream emsg; + emsg << __FILE__ << ":" << __LINE__ << " Invalid vertex weight index " + << idx << std::endl; + throw std::runtime_error(emsg.str()); } vertexDegreeWeight_[idx] = true; @@ -432,28 +414,24 @@ template //////////////////////////////////////////////////////////////////////////// template - void MueLuGraphBaseAdapter::setEdgeWeights( - const scalar_t *weightVal, int stride, int idx) -{ - typedef Zoltan2::StridedData input_t; +void MueLuGraphBaseAdapter::setEdgeWeights( + const scalar_t *weightVal, int stride, int idx) { + typedef Zoltan2::StridedData input_t; - if(idx<0 || idx >= nWeightsPerEdge_) - { - std::ostringstream emsg; - emsg << __FILE__ << ":" << __LINE__ - << " Invalid edge weight index " << idx << std::endl; - throw std::runtime_error(emsg.str()); + if (idx < 0 || idx >= nWeightsPerEdge_) { + std::ostringstream emsg; + emsg << __FILE__ << ":" << __LINE__ << " Invalid edge weight index " << idx + << std::endl; + throw std::runtime_error(emsg.str()); } size_t nedges = getLocalNumEdges(); - ArrayRCP weightV(weightVal, 0, nedges*stride, false); + ArrayRCP weightV(weightVal, 0, nedges * stride, false); edgeWeights_[idx] = input_t(weightV, stride); } +} // namespace MueLu -} //namespace MueLu - +#endif // MUELU_HAVE_ZOLTAN2 -#endif// MUELU_HAVE_ZOLTAN2 - #endif diff --git a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp index 879332ca0d44..ba8d7e7af0a9 100644 --- a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_decl.hpp @@ -46,19 +46,18 @@ #ifndef MUELU_HYBRIDAGGREGATIONFACTORY_DECL_HPP_ #define MUELU_HYBRIDAGGREGATIONFACTORY_DECL_HPP_ - #include #include "MueLu_ConfigDefs.hpp" -#include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_HybridAggregationFactory_fwd.hpp" +#include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_AggregationAlgorithmBase.hpp" -#include "MueLu_Level_fwd.hpp" -#include "MueLu_GraphBase_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase_fwd.hpp" +#include "MueLu_Level_fwd.hpp" // Uncoupled Agg #include "MueLu_InterfaceAggregationAlgorithm_fwd.hpp" @@ -77,111 +76,135 @@ namespace MueLu { /*! @class HybridAggregationFactory class. - @brief Factory for building aggregates on meshes partly structured and partly unstructured. + @brief Factory for building aggregates on meshes partly structured and + partly unstructured. - Factory for creating aggregates from partly structured grids. The hybrid aggregation method - returns an aggregate structure used by prolongator factories. + Factory for creating aggregates from partly structured grids. The hybrid + aggregation method returns an aggregate structure used by prolongator + factories. Internally, each node has a status which can be one of the following: Node status | Meaning ------------|--------- - READY | Node is not aggregated and can be used for building a new aggregate or can be added to an existing aggregate. - AGGREGATED | Node is aggregated. - IGNORED | Node is not considered for aggregation (it may have been dropped or put into a singleton aggregate) - BOUNDARY | Node is a Dirichlet boundary node (with one or more Dirichlet boundary conditions). - ONEPT | The user forces the aggregation algorithm to treat the node as a singleton. Important: Do not forget to set aggregation: allow user-specified singletons to true! Otherwise Phase3 will just handle the ONEPT nodes and probably not build singletons + READY | Node is not aggregated and can be used for building a new + aggregate or can be added to an existing aggregate. AGGREGATED | Node is + aggregated. IGNORED | Node is not considered for aggregation (it may have + been dropped or put into a singleton aggregate) BOUNDARY | Node is a + Dirichlet boundary node (with one or more Dirichlet boundary conditions). + ONEPT | The user forces the aggregation algorithm to treat the node as + a singleton. Important: Do not forget to set aggregation: allow + user-specified singletons to true! Otherwise Phase3 will just handle the + ONEPT nodes and probably not build singletons @ingroup Aggregation ## Input/output of HybridAggregationFactory ## ### User parameters of HybridAggregationFactory ### - Parameter | type | default | master.xml | validated | requested | description + Parameter | type | default | master.xml | validated | requested | + description ----------|------|---------|:----------:|:---------:|:---------:|------------ - Graph | Factory | null | | * | * | Generating factory for variable 'Graph' - DofsPerNode | Factory | null | | * | * | Generating factory for variable 'DofsPerNode', usually the same as for 'Graph' - OnePt aggregate map name | string | | | * | * | Name of input map for single node aggregates (default=''). Makes only sense if the parameter 'aggregation: allow user-specified singletons' is set to true. - OnePt aggregate map factory | Factory | null | | * | * | Generating factory of (DOF) map for single node aggregates. Makes only sense if the parameter 'aggregation: allow user-specified singletons' is set to true. - aggregation: max agg size | int | see master.xml | * | * | | Maximum number of nodes per aggregate. - aggregation: min agg size | int | see master.xml | * | * | | Minimum number of nodes necessary to build a new aggregate. - aggregation: max selected neighbors | int | see master.xml | * | * | | Maximum number of neighbor nodes already in aggregate (needed in Phase1) - aggregation: ordering | string | "natural" | * | * | | Ordering of node aggregation (can be either "natural", "graph" or "random"). - aggregation: enable phase 1 | bool | true | * | * | |Turn on/off phase 1 aggregation - aggregation: enable phase 2a | bool | true | * | * | |Turn on/off phase 2a aggregation - aggregation: enable phase 2b | bool | true | * | * | |Turn on/off phase 2b aggregation - aggregation: enable phase 3 | bool | true | * | * | |Turn on/off phase 3 aggregation - aggregation: preserve Dirichlet points | bool | false | * | * | | preserve Dirichlet points as singleton nodes (default=false, i.e., drop Dirichlet nodes during aggregation) - aggregation: allow user-specified singletons | bool | false | * | * | | Turn on/off OnePtAggregationAlgorithm (default=false) - aggregationRegionType| Factory | null | | * | * | Factory generating the type of aggregation to use on the region - - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see HybridAggregationFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see HybridAggregationFactory::DeclareInput). + Graph | Factory | null | | * | * | Generating + factory for variable 'Graph' DofsPerNode | Factory | null | | * | * | + Generating factory for variable 'DofsPerNode', usually the same as for + 'Graph' OnePt aggregate map name | string | | | * | * | Name of input map + for single node aggregates (default=''). Makes only sense if the parameter + 'aggregation: allow user-specified singletons' is set to true. OnePt + aggregate map factory | Factory | null | | * | * | Generating factory of + (DOF) map for single node aggregates. Makes only sense if the parameter + 'aggregation: allow user-specified singletons' is set to true. aggregation: + max agg size | int | see master.xml | * | * | | Maximum number of nodes per + aggregate. aggregation: min agg size | int | see master.xml | * | * | | + Minimum number of nodes necessary to build a new aggregate. aggregation: max + selected neighbors | int | see master.xml | * | * | | Maximum number of + neighbor nodes already in aggregate (needed in Phase1) aggregation: ordering + | string | "natural" | * | * | | Ordering of node aggregation (can be either + "natural", "graph" or "random"). aggregation: enable phase 1 | bool | true | + * | * | |Turn on/off phase 1 aggregation aggregation: enable phase 2a | + bool | true | * | * | |Turn on/off phase 2a aggregation aggregation: enable + phase 2b | bool | true | * | * | |Turn on/off phase 2b aggregation + aggregation: enable phase 3 | bool | true | * | * | |Turn on/off phase 3 + aggregation aggregation: preserve Dirichlet points | bool | false | * | * | + | preserve Dirichlet points as singleton nodes (default=false, i.e., drop + Dirichlet nodes during aggregation) aggregation: allow user-specified + singletons | bool | false | * | * | | Turn on/off OnePtAggregationAlgorithm + (default=false) aggregationRegionType| Factory | null | | * | * | Factory + generating the type of aggregation to use on the region + + + The * in the @c master.xml column denotes that the parameter is defined in + the @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + HybridAggregationFactory::GetValidParameters).
The * in the @c requested + column states that the data is requested as input with all dependencies (see + HybridAggregationFactory::DeclareInput). ### Variables provided by HybridAggregationFactory ### - After HybridAggregationFactory::Build the following data is available (if requested) + After HybridAggregationFactory::Build the following data is available (if + requested) Parameter | generated by | description ----------|--------------|------------ - | Aggregates | HybridAggregationFactory | Container class with aggregation information. See also Aggregates. + | Aggregates | HybridAggregationFactory | Container class with + aggregation information. See also Aggregates. */ - template - class HybridAggregationFactory : public SingleLevelFactoryBase { +template +class HybridAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_HYBRIDAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - HybridAggregationFactory(); +public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~HybridAggregationFactory() { } + //! Constructor. + HybridAggregationFactory(); - RCP GetValidParameterList() const; + //! Destructor. + virtual ~HybridAggregationFactory() {} - //@} + RCP GetValidParameterList() const; - //! Input - //@{ + //@} - void DeclareInput(Level ¤tLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level ¤tLevel) const; - //! @name Build methods. - //@{ + //@} - /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; + //! @name Build methods. + //@{ - /*! @brief Specifically build aggregates along interfaces */ - void BuildInterfaceAggregates(Level& currentLevel, RCP aggregates, - std::vector& aggStat, LO& numNonAggregatedNodes, - Array coarseRate) const; + /*! @brief Build aggregates. */ + void Build(Level ¤tLevel) const; - //@} + /*! @brief Specifically build aggregates along interfaces */ + void BuildInterfaceAggregates(Level ¤tLevel, RCP aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes, + Array coarseRate) const; - private: + //@} - //! aggregation algorithms - // will be filled in Build routine - mutable std::vector > > algos_; +private: + //! aggregation algorithms + // will be filled in Build routine + mutable std::vector>> + algos_; - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - }; // class HybridAggregationFactory +}; // class HybridAggregationFactory -} +} // namespace MueLu #define MUELU_HYBRIDAGGREGATIONFACTORY_SHORT #endif /* MUELU_HYBRIDAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp index d0ac7bcc7496..4e9340afe8b8 100644 --- a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp @@ -46,10 +46,10 @@ #ifndef MUELU_HYBRIDAGGREGATIONFACTORY_DEF_HPP_ #define MUELU_HYBRIDAGGREGATIONFACTORY_DEF_HPP_ -#include #include -#include +#include #include +#include #include #include "MueLu_HybridAggregationFactory_decl.hpp" @@ -71,515 +71,576 @@ //#include "MueLu_GlobalLexicographicIndexManager.hpp" // Shared -#include "MueLu_Level.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Aggregates.hpp" +#include "MueLu_GraphBase.hpp" +#include "MueLu_Level.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" - namespace MueLu { - template - HybridAggregationFactory:: - HybridAggregationFactory() : bDefinitionPhase_(true) - { } - - template - RCP HybridAggregationFactory:: - GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - // From UncoupledAggregationFactory - SET_VALID_ENTRY("aggregation: max agg size"); - SET_VALID_ENTRY("aggregation: min agg size"); - SET_VALID_ENTRY("aggregation: max selected neighbors"); - SET_VALID_ENTRY("aggregation: ordering"); - validParamList->getEntry("aggregation: ordering").setValidator( - rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); - SET_VALID_ENTRY("aggregation: enable phase 1"); - SET_VALID_ENTRY("aggregation: enable phase 2a"); - SET_VALID_ENTRY("aggregation: enable phase 2b"); - SET_VALID_ENTRY("aggregation: enable phase 3"); - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: match ML phase2a"); - SET_VALID_ENTRY("aggregation: phase2a agg factor"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - - // From StructuredAggregationFactory - SET_VALID_ENTRY("aggregation: coarsening rate"); - SET_VALID_ENTRY("aggregation: coarsening order"); - SET_VALID_ENTRY("aggregation: number of spatial dimensions"); - - // From HybridAggregationFactory - SET_VALID_ENTRY("aggregation: use interface aggregation"); -#undef SET_VALID_ENTRY - - /* From UncoupledAggregation */ - // general variables needed in AggregationFactory - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - // special variables necessary for OnePtAggregationAlgorithm - validParamList->set ("OnePt aggregate map name", "", - "Name of input map for single node aggregates. (default='')"); - validParamList->set ("OnePt aggregate map factory", "", - "Generating factory of (DOF) map for single node aggregates."); - - // InterfaceAggregation parameters - validParamList->set ("Interface aggregate map name", "", - "Name of input map for interface aggregates. (default='')"); - validParamList->set ("Interface aggregate map factory", "", - "Generating factory of (DOF) map for interface aggregates."); - validParamList->set > ("interfacesDimensions", Teuchos::null, - "Describes the dimensions of all the interfaces on this rank."); - validParamList->set > ("nodeOnInterface", Teuchos::null, - "List the LIDs of the nodes on any interface."); - - /* From StructuredAggregation */ - // general variables needed in AggregationFactory - validParamList->set >("numDimensions", Teuchos::null, - "Number of spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("lNodesPerDim", Teuchos::null, - "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory."); - - - // Hybrid Aggregation Params - validParamList->set > ("aggregationRegionType", Teuchos::null, - "Type of aggregation to use on the region (\"structured\" or \"uncoupled\")"); - - return validParamList; - } - - template - void HybridAggregationFactory:: - DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - - ParameterList pL = GetParameterList(); - - - - /* StructuredAggregation */ - - // Request the local number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("aggregationRegionType", NoFactory::get())) { - currentLevel.DeclareInput("aggregationRegionType", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("aggregationRegionType",NoFactory::get()), - Exceptions::RuntimeError, - "Aggregation region type was not provided by the user!"); - } - if(currentLevel.IsAvailable("numDimensions", NoFactory::get())) { - currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), - Exceptions::RuntimeError, - "numDimensions was not provided by the user on level0!"); - } - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); - } +template +HybridAggregationFactory::HybridAggregationFactory() + : bDefinitionPhase_(true) {} + +template +RCP +HybridAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + // From UncoupledAggregationFactory + SET_VALID_ENTRY("aggregation: max agg size"); + SET_VALID_ENTRY("aggregation: min agg size"); + SET_VALID_ENTRY("aggregation: max selected neighbors"); + SET_VALID_ENTRY("aggregation: ordering"); + validParamList->getEntry("aggregation: ordering") + .setValidator(rcp(new validatorType( + Teuchos::tuple("natural", "graph", "random"), + "aggregation: ordering"))); + SET_VALID_ENTRY("aggregation: enable phase 1"); + SET_VALID_ENTRY("aggregation: enable phase 2a"); + SET_VALID_ENTRY("aggregation: enable phase 2b"); + SET_VALID_ENTRY("aggregation: enable phase 3"); + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: match ML phase2a"); + SET_VALID_ENTRY("aggregation: phase2a agg factor"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + + // From StructuredAggregationFactory + SET_VALID_ENTRY("aggregation: coarsening rate"); + SET_VALID_ENTRY("aggregation: coarsening order"); + SET_VALID_ENTRY("aggregation: number of spatial dimensions"); + + // From HybridAggregationFactory + SET_VALID_ENTRY("aggregation: use interface aggregation"); +#undef SET_VALID_ENTRY + + /* From UncoupledAggregation */ + // general variables needed in AggregationFactory + validParamList->set>( + "Graph", null, "Generating factory of the graph"); + validParamList->set>( + "DofsPerNode", null, + "Generating factory for variable \'DofsPerNode\', usually the same as " + "for \'Graph\'"); + // special variables necessary for OnePtAggregationAlgorithm + validParamList->set( + "OnePt aggregate map name", "", + "Name of input map for single node aggregates. (default='')"); + validParamList->set( + "OnePt aggregate map factory", "", + "Generating factory of (DOF) map for single node aggregates."); + + // InterfaceAggregation parameters + validParamList->set( + "Interface aggregate map name", "", + "Name of input map for interface aggregates. (default='')"); + validParamList->set( + "Interface aggregate map factory", "", + "Generating factory of (DOF) map for interface aggregates."); + validParamList->set>( + "interfacesDimensions", Teuchos::null, + "Describes the dimensions of all the interfaces on this rank."); + validParamList->set>( + "nodeOnInterface", Teuchos::null, + "List the LIDs of the nodes on any interface."); + + /* From StructuredAggregation */ + // general variables needed in AggregationFactory + validParamList->set>( + "numDimensions", Teuchos::null, + "Number of spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set>( + "lNodesPerDim", Teuchos::null, + "Number of nodes per spatial dimmension provided by " + "CoordinatesTransferFactory."); + + // Hybrid Aggregation Params + validParamList->set>( + "aggregationRegionType", Teuchos::null, + "Type of aggregation to use on the region (\"structured\" or " + "\"uncoupled\")"); + + return validParamList; +} + +template +void HybridAggregationFactory::DeclareInput( + Level ¤tLevel) const { + Input(currentLevel, "Graph"); + + ParameterList pL = GetParameterList(); + + /* StructuredAggregation */ + + // Request the local number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("aggregationRegionType", NoFactory::get())) { + currentLevel.DeclareInput("aggregationRegionType", NoFactory::get(), + this); } else { - Input(currentLevel, "aggregationRegionType"); - Input(currentLevel, "numDimensions"); - Input(currentLevel, "lNodesPerDim"); + TEUCHOS_TEST_FOR_EXCEPTION( + !currentLevel.IsAvailable("aggregationRegionType", NoFactory::get()), + Exceptions::RuntimeError, + "Aggregation region type was not provided by the user!"); } + if (currentLevel.IsAvailable("numDimensions", NoFactory::get())) { + currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION( + currentLevel.IsAvailable("numDimensions", NoFactory::get()), + Exceptions::RuntimeError, + "numDimensions was not provided by the user on level0!"); + } + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION( + currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); + } + } else { + Input(currentLevel, "aggregationRegionType"); + Input(currentLevel, "numDimensions"); + Input(currentLevel, "lNodesPerDim"); + } + /* UncoupledAggregation */ + Input(currentLevel, "DofsPerNode"); - - /* UncoupledAggregation */ - Input(currentLevel, "DofsPerNode"); - - // request special data necessary for InterfaceAggregation - if (pL.get("aggregation: use interface aggregation") == true){ - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("interfacesDimensions", NoFactory::get())) { - currentLevel.DeclareInput("interfacesDimensions", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("interfacesDimensions", NoFactory::get()), - Exceptions::RuntimeError, - "interfacesDimensions was not provided by the user on level0!"); - } - if(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { - currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), - Exceptions::RuntimeError, - "nodeOnInterface was not provided by the user on level0!"); - } + // request special data necessary for InterfaceAggregation + if (pL.get("aggregation: use interface aggregation") == true) { + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("interfacesDimensions", NoFactory::get())) { + currentLevel.DeclareInput("interfacesDimensions", NoFactory::get(), + this); } else { - Input(currentLevel, "interfacesDimensions"); - Input(currentLevel, "nodeOnInterface"); + TEUCHOS_TEST_FOR_EXCEPTION( + !currentLevel.IsAvailable("interfacesDimensions", NoFactory::get()), + Exceptions::RuntimeError, + "interfacesDimensions was not provided by the user on level0!"); } - } - - // request special data necessary for OnePtAggregationAlgorithm - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - if (mapOnePtName.length() > 0) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); + if (currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { + currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); + TEUCHOS_TEST_FOR_EXCEPTION( + !currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), + Exceptions::RuntimeError, + "nodeOnInterface was not provided by the user on level0!"); } + } else { + Input(currentLevel, "interfacesDimensions"); + Input(currentLevel, "nodeOnInterface"); } - } // DeclareInput() - - template - void HybridAggregationFactory:: - Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); + } - RCP out; - if(const char* dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); + // request special data necessary for OnePtAggregationAlgorithm + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + if (mapOnePtName.length() > 0) { + std::string mapOnePtFactName = + pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); } + } +} // DeclareInput() + +template +void HybridAggregationFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP out; + if (const char *dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - *out << "Entering hybrid aggregation" << std::endl; - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - if (pL.get("aggregation: max agg size") == -1) - pL.set("aggregation: max agg size", INT_MAX); - - // define aggregation algorithms - RCP graphFact = GetFactory("Graph"); - - // General problem informations are gathered from data stored in the problem matix. - RCP graph = Get< RCP >(currentLevel, "Graph"); - RCP fineMap = graph->GetDomainMap(); - const int myRank = fineMap->getComm()->getRank(); - const int numRanks = fineMap->getComm()->getSize(); - - out->setProcRankAndSize(graph->GetImportMap()->getComm()->getRank(), - graph->GetImportMap()->getComm()->getSize()); - - // Build aggregates - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("HB"); + *out << "Entering hybrid aggregation" << std::endl; + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all + // aggregation algorithm information is fixed + + if (pL.get("aggregation: max agg size") == -1) + pL.set("aggregation: max agg size", INT_MAX); + + // define aggregation algorithms + RCP graphFact = GetFactory("Graph"); + + // General problem informations are gathered from data stored in the problem + // matix. + RCP graph = Get>(currentLevel, "Graph"); + RCP fineMap = graph->GetDomainMap(); + const int myRank = fineMap->getComm()->getRank(); + const int numRanks = fineMap->getComm()->getSize(); + + out->setProcRankAndSize(graph->GetImportMap()->getComm()->getRank(), + graph->GetImportMap()->getComm()->getSize()); + + // Build aggregates + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("HB"); + + // construct aggStat information + const LO numRows = graph->GetNodeNumVertices(); + std::vector aggStat(numRows, READY); + + // Get aggregation type for region + std::string regionType; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated + // factory. + regionType = currentLevel.Get("aggregationRegionType", + NoFactory::get()); + } else { + // On level > 0, data is provided directly by generating factories. + regionType = Get(currentLevel, "aggregationRegionType"); + } - // construct aggStat information - const LO numRows = graph->GetNodeNumVertices(); - std::vector aggStat(numRows, READY); + int numDimensions = 0; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated + // factory. + numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + } else { + // On level > 0, data is provided directly by generating factories. + numDimensions = Get(currentLevel, "numDimensions"); + } - // Get aggregation type for region - std::string regionType; - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - regionType = currentLevel.Get("aggregationRegionType", NoFactory::get()); + // Get the coarsening rate (potentially used for both structured and uncoupled + // aggregation if interface) + std::string coarseningRate = + pL.get("aggregation: coarsening rate"); + Teuchos::Array coarseRate; + try { + coarseRate = Teuchos::fromStringToArray(coarseningRate); + } catch (const Teuchos::InvalidArrayStringRepresentation &e) { + GetOStream(Errors, -1) << " *** \"aggregation: coarsening rate\" must be a " + "string convertible into an array! *** " + << std::endl; + throw e; + } + TEUCHOS_TEST_FOR_EXCEPTION( + (coarseRate.size() > 1) && (coarseRate.size() < numDimensions), + Exceptions::RuntimeError, + "\"aggregation: coarsening rate\" must have at least as many" + " components as the number of spatial dimensions in the problem."); + + algos_.clear(); + LO numNonAggregatedNodes = numRows; + if (regionType == "structured") { + // Add AggregationStructuredAlgorithm + algos_.push_back(rcp(new AggregationStructuredAlgorithm(graphFact))); + + // Since we want to operate on nodes and not dof, we need to modify the + // rowMap in order to obtain a nodeMap. + const int interpolationOrder = pL.get("aggregation: coarsening order"); + Array lFineNodesPerDir(3); + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated + // factory. + lFineNodesPerDir = + currentLevel.Get>("lNodesPerDim", NoFactory::get()); } else { // On level > 0, data is provided directly by generating factories. - regionType = Get< std::string >(currentLevel, "aggregationRegionType"); + lFineNodesPerDir = Get>(currentLevel, "lNodesPerDim"); } - int numDimensions = 0; - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); - } else { - // On level > 0, data is provided directly by generating factories. - numDimensions = Get(currentLevel, "numDimensions"); + // Set lFineNodesPerDir to 1 for directions beyond numDimensions + for (int dim = numDimensions; dim < 3; ++dim) { + lFineNodesPerDir[dim] = 1; } - // Get the coarsening rate (potentially used for both structured and uncoupled aggregation if interface) - std::string coarseningRate = pL.get("aggregation: coarsening rate"); - Teuchos::Array coarseRate; - try { - coarseRate = Teuchos::fromStringToArray(coarseningRate); - } catch(const Teuchos::InvalidArrayStringRepresentation& e) { - GetOStream(Errors,-1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " - << std::endl; - throw e; + // Now that we have extracted info from the level, create the IndexManager + RCP> geoData; + geoData = rcp(new MueLu::UncoupledIndexManager( + fineMap->getComm(), false, numDimensions, interpolationOrder, myRank, + numRanks, Array(3, -1), lFineNodesPerDir, coarseRate, false)); + + TEUCHOS_TEST_FOR_EXCEPTION( + fineMap->getLocalNumElements() != + static_cast(geoData->getNumLocalFineNodes()), + Exceptions::RuntimeError, + "The local number of elements in the graph's map is not equal to " + "the number of nodes given by: lNodesPerDim!"); + + aggregates->SetIndexManager(geoData); + aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); + + Set(currentLevel, "lCoarseNodesPerDim", + geoData->getLocalCoarseNodesPerDir()); + + } // end structured aggregation setup + + if (regionType == "uncoupled") { + // Add unstructred aggregation phases + algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: use interface aggregation") == true) + algos_.push_back(rcp(new InterfaceAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: allow user-specified singletons") == true) + algos_.push_back(rcp(new OnePtAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 1") == true) + algos_.push_back(rcp(new AggregationPhase1Algorithm(graphFact))); + if (pL.get("aggregation: enable phase 2a") == true) + algos_.push_back(rcp(new AggregationPhase2aAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 2b") == true) + algos_.push_back(rcp(new AggregationPhase2bAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 3") == true) + algos_.push_back(rcp(new AggregationPhase3Algorithm(graphFact))); + + *out << " Build interface aggregates" << std::endl; + // interface + if (pL.get("aggregation: use interface aggregation") == true) { + BuildInterfaceAggregates(currentLevel, aggregates, aggStat, + numNonAggregatedNodes, coarseRate); } - TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), - Exceptions::RuntimeError, - "\"aggregation: coarsening rate\" must have at least as many" - " components as the number of spatial dimensions in the problem."); - - algos_.clear(); - LO numNonAggregatedNodes = numRows; - if (regionType == "structured") { - // Add AggregationStructuredAlgorithm - algos_.push_back(rcp(new AggregationStructuredAlgorithm(graphFact))); - - // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to - // obtain a nodeMap. - const int interpolationOrder = pL.get("aggregation: coarsening order"); - Array lFineNodesPerDir(3); - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); - } else { - // On level > 0, data is provided directly by generating factories. - lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); - } - - // Set lFineNodesPerDir to 1 for directions beyond numDimensions - for(int dim = numDimensions; dim < 3; ++dim) { - lFineNodesPerDir[dim] = 1; - } - // Now that we have extracted info from the level, create the IndexManager - RCP > geoData; - geoData = rcp(new MueLu::UncoupledIndexManager(fineMap->getComm(), - false, - numDimensions, - interpolationOrder, - myRank, - numRanks, - Array(3, -1), - lFineNodesPerDir, - coarseRate, false)); - - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() - != static_cast(geoData->getNumLocalFineNodes()), - Exceptions::RuntimeError, - "The local number of elements in the graph's map is not equal to " - "the number of nodes given by: lNodesPerDim!"); - - aggregates->SetIndexManager(geoData); - aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); - - Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); - - } // end structured aggregation setup - - if (regionType == "uncoupled"){ - // Add unstructred aggregation phases - algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); - if (pL.get("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); - if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm (graphFact))); - - *out << " Build interface aggregates" << std::endl; - // interface - if (pL.get("aggregation: use interface aggregation") == true) { - BuildInterfaceAggregates(currentLevel, aggregates, aggStat, numNonAggregatedNodes, - coarseRate); - } + *out << "Treat Dirichlet BC" << std::endl; + // Dirichlet boundary + ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); + if (dirichletBoundaryMap != Teuchos::null) + for (LO i = 0; i < numRows; i++) + if (dirichletBoundaryMap[i] == true) + aggStat[i] = BOUNDARY; - *out << "Treat Dirichlet BC" << std::endl; - // Dirichlet boundary - ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); - if (dirichletBoundaryMap != Teuchos::null) - for (LO i = 0; i < numRows; i++) - if (dirichletBoundaryMap[i] == true) - aggStat[i] = BOUNDARY; - - // OnePt aggregation - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - RCP OnePtMap = Teuchos::null; - if (mapOnePtName.length()) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); - } + // OnePt aggregation + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + RCP OnePtMap = Teuchos::null; + if (mapOnePtName.length()) { + std::string mapOnePtFactName = + pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + OnePtMap = currentLevel.Get>(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + OnePtMap = currentLevel.Get>(mapOnePtName, mapOnePtFact.get()); } + } - LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); - GO indexBase = graph->GetDomainMap()->getIndexBase(); - if (OnePtMap != Teuchos::null) { - for (LO i = 0; i < numRows; i++) { - // reconstruct global row id (FIXME only works for contiguous maps) - GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; - for (LO kr = 0; kr < nDofsPerNode; kr++) - if (OnePtMap->isNodeGlobalElement(grid + kr)) - aggStat[i] = ONEPT; - } + LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); + GO indexBase = graph->GetDomainMap()->getIndexBase(); + if (OnePtMap != Teuchos::null) { + for (LO i = 0; i < numRows; i++) { + // reconstruct global row id (FIXME only works for contiguous maps) + GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * + nDofsPerNode + + indexBase; + for (LO kr = 0; kr < nDofsPerNode; kr++) + if (OnePtMap->isNodeGlobalElement(grid + kr)) + aggStat[i] = ONEPT; } + } - // Create a fake lCoarseNodesPerDir for CoordinatesTranferFactory - Array lCoarseNodesPerDir(3,-1); - Set(currentLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir); - } // end uncoupled aggregation setup - - aggregates->AggregatesCrossProcessors(false); // No coupled aggregation - - *out << "Run all the algorithms on the local rank" << std::endl; - for (size_t a = 0; a < algos_.size(); a++) { - std::string phase = algos_[a]->description(); - SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); - *out << regionType <<" | Executing phase " << a << std::endl; + // Create a fake lCoarseNodesPerDir for CoordinatesTranferFactory + Array lCoarseNodesPerDir(3, -1); + Set(currentLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir); + } // end uncoupled aggregation setup - int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); - algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); - algos_[a]->SetProcRankVerbose(oldRank); - *out << regionType <<" | Done Executing phase " << a << std::endl; - } + aggregates->AggregatesCrossProcessors(false); // No coupled aggregation - *out << "Compute statistics on aggregates" << std::endl; - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); + *out << "Run all the algorithms on the local rank" << std::endl; + for (size_t a = 0; a < algos_.size(); a++) { + std::string phase = algos_[a]->description(); + SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); + *out << regionType << " | Executing phase " << a << std::endl; - Set(currentLevel, "Aggregates", aggregates); - Set(currentLevel, "numDimensions", numDimensions); - Set(currentLevel, "aggregationRegionTypeCoarse", regionType); + int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); + algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, + numNonAggregatedNodes); + algos_[a]->SetProcRankVerbose(oldRank); + *out << regionType << " | Done Executing phase " << a << std::endl; + } - GetOStream(Statistics1) << aggregates->description() << std::endl; - *out << "HybridAggregation done!" << std::endl; + *out << "Compute statistics on aggregates" << std::endl; + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + + Set(currentLevel, "Aggregates", aggregates); + Set(currentLevel, "numDimensions", numDimensions); + Set(currentLevel, "aggregationRegionTypeCoarse", regionType); + + GetOStream(Statistics1) << aggregates->description() << std::endl; + *out << "HybridAggregation done!" << std::endl; +} + +template +void HybridAggregationFactory:: + BuildInterfaceAggregates(Level ¤tLevel, RCP aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes, + Array coarseRate) const { + FactoryMonitor m(*this, "BuildInterfaceAggregates", currentLevel); + + RCP out; + if (const char *dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - template - void HybridAggregationFactory:: - BuildInterfaceAggregates(Level& currentLevel, RCP aggregates, - std::vector& aggStat, LO& numNonAggregatedNodes, - Array coarseRate) const { - FactoryMonitor m(*this, "BuildInterfaceAggregates", currentLevel); - - RCP out; - if(const char* dbg = std::getenv("MUELU_HYBRIDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + // Extract and format input data for algo + if (coarseRate.size() == 1) { + coarseRate.resize(3, coarseRate[0]); + } + ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates->GetProcWinner()->getDataNonConst(0); + Array interfacesDimensions = + Get>(currentLevel, "interfacesDimensions"); + Array nodesOnInterfaces = Get>(currentLevel, "nodeOnInterface"); + const int numInterfaces = interfacesDimensions.size() / 3; + const int myRank = aggregates->GetMap()->getComm()->getRank(); + + // Create coarse level container to gather data on the fly + Array coarseInterfacesDimensions(interfacesDimensions.size()); + Array nodesOnCoarseInterfaces; + { // Scoping the temporary variables... + LO endRate, totalNumCoarseNodes = 0, numCoarseNodes; + for (int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { + numCoarseNodes = 1; + for (int dim = 0; dim < 3; ++dim) { + endRate = (interfacesDimensions[3 * interfaceIdx + dim] - 1) % + coarseRate[dim]; + if (interfacesDimensions[3 * interfaceIdx + dim] == 1) { + coarseInterfacesDimensions[3 * interfaceIdx + dim] = 1; + } else { + coarseInterfacesDimensions[3 * interfaceIdx + dim] = + (interfacesDimensions[3 * interfaceIdx + dim] - 1) / + coarseRate[dim] + + 2; + if (endRate == 0) { + coarseInterfacesDimensions[3 * interfaceIdx + dim]--; + } + } + numCoarseNodes *= coarseInterfacesDimensions[3 * interfaceIdx + dim]; + } + totalNumCoarseNodes += numCoarseNodes; } + nodesOnCoarseInterfaces.resize(totalNumCoarseNodes, -1); + } - // Extract and format input data for algo - if(coarseRate.size() == 1) {coarseRate.resize(3, coarseRate[0]);} - ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); - Array interfacesDimensions = Get >(currentLevel, "interfacesDimensions"); - Array nodesOnInterfaces = Get >(currentLevel, "nodeOnInterface"); - const int numInterfaces = interfacesDimensions.size() / 3; - const int myRank = aggregates->GetMap()->getComm()->getRank(); - - // Create coarse level container to gather data on the fly - Array coarseInterfacesDimensions(interfacesDimensions.size()); - Array nodesOnCoarseInterfaces; - { // Scoping the temporary variables... - LO endRate, totalNumCoarseNodes = 0, numCoarseNodes; - for(int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { - numCoarseNodes = 1; - for(int dim = 0; dim < 3; ++dim) { - endRate = (interfacesDimensions[3*interfaceIdx + dim] - 1) % coarseRate[dim]; - if(interfacesDimensions[3*interfaceIdx + dim] == 1) { - coarseInterfacesDimensions[3*interfaceIdx + dim] = 1; - } else { - coarseInterfacesDimensions[3*interfaceIdx + dim] - = (interfacesDimensions[3*interfaceIdx+dim]-1) / coarseRate[dim] + 2; - if(endRate==0){ coarseInterfacesDimensions[3*interfaceIdx + dim]--;} - } - numCoarseNodes *= coarseInterfacesDimensions[3*interfaceIdx + dim]; + Array endRate(3); + LO interfaceOffset = 0, aggregateCount = 0, coarseNodeCount = 0; + for (int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { + ArrayView fineNodesPerDim = interfacesDimensions(3 * interfaceIdx, 3); + ArrayView coarseNodesPerDim = + coarseInterfacesDimensions(3 * interfaceIdx, 3); + LO numInterfaceNodes = 1, numCoarseNodes = 1; + for (int dim = 0; dim < 3; ++dim) { + numInterfaceNodes *= fineNodesPerDim[dim]; + numCoarseNodes *= coarseNodesPerDim[dim]; + endRate[dim] = (fineNodesPerDim[dim] - 1) % coarseRate[dim]; + } + ArrayView interfaceNodes = + nodesOnInterfaces(interfaceOffset, numInterfaceNodes); + + interfaceOffset += numInterfaceNodes; + + LO rem, rate, fineNodeIdx; + Array nodeIJK(3), coarseIJK(3), rootIJK(3); + // First find treat coarse nodes as they generate the aggregate IDs + // and they might be repeated on multiple interfaces (think corners and + // edges). + for (LO coarseNodeIdx = 0; coarseNodeIdx < numCoarseNodes; + ++coarseNodeIdx) { + coarseIJK[2] = + coarseNodeIdx / (coarseNodesPerDim[0] * coarseNodesPerDim[1]); + rem = coarseNodeIdx % (coarseNodesPerDim[0] * coarseNodesPerDim[1]); + coarseIJK[1] = rem / coarseNodesPerDim[0]; + coarseIJK[0] = rem % coarseNodesPerDim[0]; + + for (LO dim = 0; dim < 3; ++dim) { + if (coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { + nodeIJK[dim] = fineNodesPerDim[dim] - 1; + } else { + nodeIJK[dim] = coarseIJK[dim] * coarseRate[dim]; } - totalNumCoarseNodes += numCoarseNodes; } - nodesOnCoarseInterfaces.resize(totalNumCoarseNodes, -1); + fineNodeIdx = + (nodeIJK[2] * fineNodesPerDim[1] + nodeIJK[1]) * fineNodesPerDim[0] + + nodeIJK[0]; + + if (aggStat[interfaceNodes[fineNodeIdx]] == READY) { + vertex2AggId[interfaceNodes[fineNodeIdx]] = aggregateCount; + procWinner[interfaceNodes[fineNodeIdx]] = myRank; + aggStat[interfaceNodes[fineNodeIdx]] = AGGREGATED; + ++aggregateCount; + --numNonAggregatedNodes; + } + nodesOnCoarseInterfaces[coarseNodeCount] = + vertex2AggId[interfaceNodes[fineNodeIdx]]; + ++coarseNodeCount; } - Array endRate(3); - LO interfaceOffset = 0, aggregateCount = 0, coarseNodeCount = 0; - for(int interfaceIdx = 0; interfaceIdx < numInterfaces; ++interfaceIdx) { - ArrayView fineNodesPerDim = interfacesDimensions(3*interfaceIdx, 3); - ArrayView coarseNodesPerDim = coarseInterfacesDimensions(3*interfaceIdx, 3); - LO numInterfaceNodes = 1, numCoarseNodes = 1; - for(int dim = 0; dim < 3; ++dim) { - numInterfaceNodes *= fineNodesPerDim[dim]; - numCoarseNodes *= coarseNodesPerDim[dim]; - endRate[dim] = (fineNodesPerDim[dim]-1) % coarseRate[dim]; + // Now loop over all the node on the interface + // skip the coarse nodes as they are already aggregated + // and find the appropriate aggregate ID for the fine nodes. + for (LO nodeIdx = 0; nodeIdx < numInterfaceNodes; ++nodeIdx) { + + // If the node is already aggregated skip it! + if (aggStat[interfaceNodes[nodeIdx]] == AGGREGATED) { + continue; } - ArrayView interfaceNodes = nodesOnInterfaces(interfaceOffset, numInterfaceNodes); - - interfaceOffset += numInterfaceNodes; - - LO rem, rate, fineNodeIdx; - Array nodeIJK(3), coarseIJK(3), rootIJK(3); - // First find treat coarse nodes as they generate the aggregate IDs - // and they might be repeated on multiple interfaces (think corners and edges). - for(LO coarseNodeIdx = 0; coarseNodeIdx < numCoarseNodes; ++coarseNodeIdx) { - coarseIJK[2] = coarseNodeIdx / (coarseNodesPerDim[0]*coarseNodesPerDim[1]); - rem = coarseNodeIdx % (coarseNodesPerDim[0]*coarseNodesPerDim[1]); - coarseIJK[1] = rem / coarseNodesPerDim[0]; - coarseIJK[0] = rem % coarseNodesPerDim[0]; - - for(LO dim = 0; dim < 3; ++dim) { - if(coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { - nodeIJK[dim] = fineNodesPerDim[dim] - 1; - } else { - nodeIJK[dim] = coarseIJK[dim]*coarseRate[dim]; - } + + nodeIJK[2] = nodeIdx / (fineNodesPerDim[0] * fineNodesPerDim[1]); + rem = nodeIdx % (fineNodesPerDim[0] * fineNodesPerDim[1]); + nodeIJK[1] = rem / fineNodesPerDim[0]; + nodeIJK[0] = rem % fineNodesPerDim[0]; + + for (int dim = 0; dim < 3; ++dim) { + coarseIJK[dim] = nodeIJK[dim] / coarseRate[dim]; + rem = nodeIJK[dim] % coarseRate[dim]; + if (nodeIJK[dim] < fineNodesPerDim[dim] - endRate[dim]) { + rate = coarseRate[dim]; + } else { + rate = endRate[dim]; } - fineNodeIdx = (nodeIJK[2]*fineNodesPerDim[1] + nodeIJK[1])*fineNodesPerDim[0] + nodeIJK[0]; - - if(aggStat[interfaceNodes[fineNodeIdx]] == READY) { - vertex2AggId[interfaceNodes[fineNodeIdx]] = aggregateCount; - procWinner[interfaceNodes[fineNodeIdx]] = myRank; - aggStat[interfaceNodes[fineNodeIdx]] = AGGREGATED; - ++aggregateCount; - --numNonAggregatedNodes; + if (rem > (rate / 2)) { + ++coarseIJK[dim]; } - nodesOnCoarseInterfaces[coarseNodeCount] = vertex2AggId[interfaceNodes[fineNodeIdx]]; - ++coarseNodeCount; } - // Now loop over all the node on the interface - // skip the coarse nodes as they are already aggregated - // and find the appropriate aggregate ID for the fine nodes. - for(LO nodeIdx = 0; nodeIdx < numInterfaceNodes; ++nodeIdx) { - - // If the node is already aggregated skip it! - if(aggStat[interfaceNodes[nodeIdx]] == AGGREGATED) {continue;} - - nodeIJK[2] = nodeIdx / (fineNodesPerDim[0]*fineNodesPerDim[1]); - rem = nodeIdx % (fineNodesPerDim[0]*fineNodesPerDim[1]); - nodeIJK[1] = rem / fineNodesPerDim[0]; - nodeIJK[0] = rem % fineNodesPerDim[0]; - - for(int dim = 0; dim < 3; ++dim) { - coarseIJK[dim] = nodeIJK[dim] / coarseRate[dim]; - rem = nodeIJK[dim] % coarseRate[dim]; - if(nodeIJK[dim] < fineNodesPerDim[dim] - endRate[dim]) { - rate = coarseRate[dim]; - } else { - rate = endRate[dim]; - } - if(rem > (rate / 2)) {++coarseIJK[dim];} - } - - for(LO dim = 0; dim < 3; ++dim) { - if(coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { - nodeIJK[dim] = fineNodesPerDim[dim] - 1; - } else { - nodeIJK[dim] = coarseIJK[dim]*coarseRate[dim]; - } + for (LO dim = 0; dim < 3; ++dim) { + if (coarseIJK[dim] == coarseNodesPerDim[dim] - 1) { + nodeIJK[dim] = fineNodesPerDim[dim] - 1; + } else { + nodeIJK[dim] = coarseIJK[dim] * coarseRate[dim]; } - fineNodeIdx = (nodeIJK[2]*fineNodesPerDim[1] + nodeIJK[1])*fineNodesPerDim[0] + nodeIJK[0]; - - vertex2AggId[interfaceNodes[nodeIdx]] = vertex2AggId[interfaceNodes[fineNodeIdx]]; - procWinner[interfaceNodes[nodeIdx]] = myRank; - aggStat[interfaceNodes[nodeIdx]] = AGGREGATED; - --numNonAggregatedNodes; - } // Loop over interface nodes - } // Loop over the interfaces + } + fineNodeIdx = + (nodeIJK[2] * fineNodesPerDim[1] + nodeIJK[1]) * fineNodesPerDim[0] + + nodeIJK[0]; - // Update aggregates information before subsequent aggregation algorithms - aggregates->SetNumAggregates(aggregateCount); + vertex2AggId[interfaceNodes[nodeIdx]] = + vertex2AggId[interfaceNodes[fineNodeIdx]]; + procWinner[interfaceNodes[nodeIdx]] = myRank; + aggStat[interfaceNodes[nodeIdx]] = AGGREGATED; + --numNonAggregatedNodes; + } // Loop over interface nodes + } // Loop over the interfaces - // Set coarse data for next level - Set(currentLevel, "coarseInterfacesDimensions", coarseInterfacesDimensions); - Set(currentLevel, "nodeOnCoarseInterface", nodesOnCoarseInterfaces); + // Update aggregates information before subsequent aggregation algorithms + aggregates->SetNumAggregates(aggregateCount); - } // BuildInterfaceAggregates() + // Set coarse data for next level + Set(currentLevel, "coarseInterfacesDimensions", coarseInterfacesDimensions); + Set(currentLevel, "nodeOnCoarseInterface", nodesOnCoarseInterfaces); -} //namespace MueLu +} // BuildInterfaceAggregates() +} // namespace MueLu #endif /* MUELU_HYBRIDAGGREGATIONFACTORY_DEF_HPP */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp index 7cca1e45f86e..c2afcbab2c94 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_decl.hpp @@ -46,88 +46,98 @@ #ifndef MUELU_AMALGAMATIONFACTORY_DECL_HPP #define MUELU_AMALGAMATIONFACTORY_DECL_HPP -#include #include +#include +#include "MueLu_AmalgamationInfo_fwd.hpp" #include "MueLu_ConfigDefs.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" -#include "MueLu_AmalgamationInfo_fwd.hpp" -#include "MueLu_Level_fwd.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_Level_fwd.hpp" namespace MueLu { - /*! - @class AmalgamationFactory - @brief AmalgamationFactory for subblocks of strided map based amalgamation data +/*! + @class AmalgamationFactory + @brief AmalgamationFactory for subblocks of strided map based amalgamation + data - Class generates unamalgamation information using matrix A with strided maps. - It stores the output information within an AmalgamationInfo object as "UnAmalgamationInfo". - This object contains + Class generates unamalgamation information using matrix A with strided maps. + It stores the output information within an AmalgamationInfo object as + "UnAmalgamationInfo". This object contains - \li \c nodegid2dofgids_ a map of all node ids of which the current proc has corresponding DOF gids (used by \c TentativePFactory). - \li \c gNodeIds vector of all node ids on the current proc (may be less than nodegid2dofgids_.size()). These nodes are stored on the current proc. + \li \c nodegid2dofgids_ a map of all node ids of which the current proc has + corresponding DOF gids (used by \c TentativePFactory). \li \c gNodeIds vector + of all node ids on the current proc (may be less than + nodegid2dofgids_.size()). These nodes are stored on the current proc. - */ +*/ - template - class AmalgamationFactory : public SingleLevelFactoryBase { +template +class AmalgamationFactory : public SingleLevelFactoryBase { #undef MUELU_AMALGAMATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! Constructor - AmalgamationFactory() = default; - - //! Destructor - virtual ~AmalgamationFactory() = default; - - RCP GetValidParameterList() const override; - - //@} - - //! Input - //@{ - - void DeclareInput(Level ¤tLevel) const override; - - //@} - - void Build(Level ¤tLevel) const override; - - /*! @brief Translate global (row/column) id to global amalgamation block id - * - * @note Assume that the node map has the same \c indexBase as the dof map - * - * @param gid (GlobalOrdinal): input global id (row gid or column gid) - * @param blockSize (LocalOrdinal): block size (needed for constant block size) - * @param offset (GlobalOrdinal): global offset for dofs (stored in strided map, default = 0) - * @param indexBase (GlobalOrdinal): indexBase for DOF map (and node map, default = 0) - */ - static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset /*= 0*/, - const GlobalOrdinal indexBase/* = 0*/); - - /*! @brief Method to create merged map for systems of PDEs. - * - * @param sourceMap (const Map&): source map with dofs which shall be amalgamated to a node map - * @param A (const Matrix&): operator A (matrix) with striding information (if available) - * @param amalgamatedMap (const Map&): amalgamated node based map - * @param translation (Array&): array storing local node ids given local dof ids (needed in CoalesceDropFactory) - */ - static void AmalgamateMap(const Map& sourceMap, const Matrix& A, RCP& amalgamatedMap, Array& translation); - - - }; //class AmalgamationFactory - -} //namespace MueLu +public: + //! @name Constructors/Destructors. + //@{ + + //! Constructor + AmalgamationFactory() = default; + + //! Destructor + virtual ~AmalgamationFactory() = default; + + RCP GetValidParameterList() const override; + + //@} + + //! Input + //@{ + + void DeclareInput(Level ¤tLevel) const override; + + //@} + + void Build(Level ¤tLevel) const override; + + /*! @brief Translate global (row/column) id to global amalgamation block id + * + * @note Assume that the node map has the same \c indexBase as the dof map + * + * @param gid (GlobalOrdinal): input global id (row gid or column gid) + * @param blockSize (LocalOrdinal): block size (needed for constant block + * size) + * @param offset (GlobalOrdinal): global offset for dofs (stored in strided + * map, default = 0) + * @param indexBase (GlobalOrdinal): indexBase for DOF map (and node map, + * default = 0) + */ + static const GlobalOrdinal + DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, + const GlobalOrdinal offset /*= 0*/, + const GlobalOrdinal indexBase /* = 0*/); + + /*! @brief Method to create merged map for systems of PDEs. + * + * @param sourceMap (const Map&): source map with dofs which shall be + * amalgamated to a node map + * @param A (const Matrix&): operator A (matrix) with striding information (if + * available) + * @param amalgamatedMap (const Map&): amalgamated node based map + * @param translation (Array&): array storing local node ids given local + * dof ids (needed in CoalesceDropFactory) + */ + static void AmalgamateMap(const Map &sourceMap, const Matrix &A, + RCP &amalgamatedMap, + Array &translation); + +}; // class AmalgamationFactory + +} // namespace MueLu #define MUELU_AMALGAMATIONFACTORY_SHORT #endif // MUELU_AMALGAMATIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp index 60baf9efe0fe..7efaddda985a 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationFactory_def.hpp @@ -50,192 +50,220 @@ #include "MueLu_AmalgamationFactory_decl.hpp" -#include "MueLu_Level.hpp" #include "MueLu_AmalgamationInfo.hpp" +#include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - RCP AmalgamationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - return validParamList; - } - - template - void AmalgamationFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); // sub-block from blocked A - } - - template - void AmalgamationFactory::Build(Level ¤tLevel) const - { - FactoryMonitor m(*this, "Build", currentLevel); - - RCP A = Get< RCP >(currentLevel, "A"); - - /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. - fullblocksize is the number of storage blocks that must kept together during the amalgamation process. - - Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: - - numPDEs = fullblocksize * storageblocksize. - - If numPDEs==1 - Matrix is point storage (classical CRS storage). storageblocksize=1 and fullblocksize=1 - No other values makes sense. - - If numPDEs>1 - If matrix uses point storage, then storageblocksize=1 and fullblockssize=numPDEs. - If matrix uses block storage, with block size of n, then storageblocksize=n, and fullblocksize=numPDEs/n. - Thus far, only storageblocksize=numPDEs and fullblocksize=1 has been tested. - */ - - - LO fullblocksize = 1; // block dim for fixed size blocks - GO offset = 0; // global offset of dof gids - LO blockid = -1; // block id in strided map - LO nStridedOffset = 0; // DOF offset for strided block id "blockid" (default = 0) - LO stridedblocksize = fullblocksize; // size of strided block id "blockid" (default = fullblocksize, only if blockid!=-1 stridedblocksize <= fullblocksize) - LO storageblocksize = A->GetStorageBlockSize(); - // GO indexBase = A->getRowMap()->getIndexBase(); // index base for maps (unused) - - // 1) check for blocking/striding information - - if (A->IsView("stridedMaps") && Teuchos::rcp_dynamic_cast(A->getRowMap("stridedMaps")) != Teuchos::null) { - Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // NOTE: "stridedMaps are always non-overlapping (correspond to range and domain maps!) - RCP stridedRowMap = Teuchos::rcp_dynamic_cast(A->getRowMap()); - TEUCHOS_TEST_FOR_EXCEPTION(stridedRowMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed."); - fullblocksize = stridedRowMap->getFixedBlockSize(); - offset = stridedRowMap->getOffset(); - blockid = stridedRowMap->getStridedBlockId(); - - if (blockid > -1) { - std::vector stridingInfo = stridedRowMap->getStridingData(); - for (size_t j = 0; j < Teuchos::as(blockid); j++) - nStridedOffset += stridingInfo[j]; - stridedblocksize = Teuchos::as(stridingInfo[blockid]); - - } else { - stridedblocksize = fullblocksize; - } - // Correct for the storageblocksize - // NOTE: Before this point fullblocksize is actually numPDEs - TEUCHOS_TEST_FOR_EXCEPTION(fullblocksize % storageblocksize != 0,Exceptions::RuntimeError,"AmalgamationFactory: fullblocksize needs to be a multiple of A->GetStorageBlockSize()"); - fullblocksize /= storageblocksize; - stridedblocksize /= storageblocksize; - - oldView = A->SwitchToView(oldView); - GetOStream(Runtime1) << "AmalagamationFactory::Build():" << " found fullblocksize=" << fullblocksize << " and stridedblocksize=" << stridedblocksize << " from strided maps. offset=" << offset << std::endl; - - } else { - GetOStream(Warnings0) << "AmalagamationFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl; - } - +template +RCP +AmalgamationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + validParamList->set>( + "A", Teuchos::null, "Generating factory of the matrix A"); + return validParamList; +} + +template +void AmalgamationFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); // sub-block from blocked A +} + +template +void AmalgamationFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP A = Get>(currentLevel, "A"); + + /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block + in the chosen storage scheme. fullblocksize is the number of storage blocks + that must kept together during the amalgamation process. + + Both of these quantities may be different than numPDEs (from + GetFixedBlockSize()), but the following must always hold: + + numPDEs = fullblocksize * storageblocksize. + + If numPDEs==1 + Matrix is point storage (classical CRS storage). storageblocksize=1 and + fullblocksize=1 No other values makes sense. + + If numPDEs>1 + If matrix uses point storage, then storageblocksize=1 and + fullblockssize=numPDEs. If matrix uses block storage, with block size of n, + then storageblocksize=n, and fullblocksize=numPDEs/n. Thus far, only + storageblocksize=numPDEs and fullblocksize=1 has been tested. + */ + + LO fullblocksize = 1; // block dim for fixed size blocks + GO offset = 0; // global offset of dof gids + LO blockid = -1; // block id in strided map + LO nStridedOffset = + 0; // DOF offset for strided block id "blockid" (default = 0) + LO stridedblocksize = + fullblocksize; // size of strided block id "blockid" (default = + // fullblocksize, only if blockid!=-1 stridedblocksize <= + // fullblocksize) + LO storageblocksize = A->GetStorageBlockSize(); + // GO indexBase = A->getRowMap()->getIndexBase(); // index base for + // maps (unused) + + // 1) check for blocking/striding information + + if (A->IsView("stridedMaps") && + Teuchos::rcp_dynamic_cast( + A->getRowMap("stridedMaps")) != Teuchos::null) { + Xpetra::viewLabel_t oldView = A->SwitchToView( + "stridedMaps"); // NOTE: "stridedMaps are always non-overlapping + // (correspond to range and domain maps!) + RCP stridedRowMap = + Teuchos::rcp_dynamic_cast(A->getRowMap()); + TEUCHOS_TEST_FOR_EXCEPTION( + stridedRowMap == Teuchos::null, Exceptions::BadCast, + "MueLu::CoalesceFactory::Build: cast to strided row map failed."); + fullblocksize = stridedRowMap->getFixedBlockSize(); + offset = stridedRowMap->getOffset(); + blockid = stridedRowMap->getStridedBlockId(); + + if (blockid > -1) { + std::vector stridingInfo = stridedRowMap->getStridingData(); + for (size_t j = 0; j < Teuchos::as(blockid); j++) + nStridedOffset += stridingInfo[j]; + stridedblocksize = Teuchos::as(stridingInfo[blockid]); - // build node row map (uniqueMap) and node column map (nonUniqueMap) - // the arrays rowTranslation and colTranslation contain the local node id - // given a local dof id. They are only necessary for the CoalesceDropFactory if - // fullblocksize > 1 - RCP uniqueMap, nonUniqueMap; - RCP amalgamationData; - RCP > rowTranslation = Teuchos::null; - RCP > colTranslation = Teuchos::null; - - if (fullblocksize > 1) { - // mfh 14 Apr 2015: These need to have different names than - // rowTranslation and colTranslation, in order to avoid - // shadowing warnings (-Wshadow with GCC). Alternately, it - // looks like you could just assign to the existing variables in - // this scope, rather than creating new ones. - RCP > theRowTranslation = rcp(new Array); - RCP > theColTranslation = rcp(new Array); - AmalgamateMap(*(A->getRowMap()), *A, uniqueMap, *theRowTranslation); - AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, *theColTranslation); - - amalgamationData = rcp(new AmalgamationInfo(theRowTranslation, - theColTranslation, - uniqueMap, - nonUniqueMap, - A->getColMap(), - fullblocksize, - offset, - blockid, - nStridedOffset, - stridedblocksize) ); } else { - amalgamationData = rcp(new AmalgamationInfo(rowTranslation, // Teuchos::null - colTranslation, // Teuchos::null - A->getRowMap(), // unique map of graph - A->getColMap(), // non-unique map of graph - A->getColMap(), // column map of A - fullblocksize, - offset, - blockid, - nStridedOffset, - stridedblocksize) ); + stridedblocksize = fullblocksize; } + // Correct for the storageblocksize + // NOTE: Before this point fullblocksize is actually numPDEs + TEUCHOS_TEST_FOR_EXCEPTION(fullblocksize % storageblocksize != 0, + Exceptions::RuntimeError, + "AmalgamationFactory: fullblocksize needs to be " + "a multiple of A->GetStorageBlockSize()"); + fullblocksize /= storageblocksize; + stridedblocksize /= storageblocksize; + + oldView = A->SwitchToView(oldView); + GetOStream(Runtime1) << "AmalagamationFactory::Build():" + << " found fullblocksize=" << fullblocksize + << " and stridedblocksize=" << stridedblocksize + << " from strided maps. offset=" << offset + << std::endl; + + } else { + GetOStream(Warnings0) + << "AmalagamationFactory::Build(): no striding information available. " + "Use blockdim=1 with offset=0" + << std::endl; + } - // store (un)amalgamation information on current level - Set(currentLevel, "UnAmalgamationInfo", amalgamationData); + // build node row map (uniqueMap) and node column map (nonUniqueMap) + // the arrays rowTranslation and colTranslation contain the local node id + // given a local dof id. They are only necessary for the CoalesceDropFactory + // if fullblocksize > 1 + RCP uniqueMap, nonUniqueMap; + RCP amalgamationData; + RCP> rowTranslation = Teuchos::null; + RCP> colTranslation = Teuchos::null; + + if (fullblocksize > 1) { + // mfh 14 Apr 2015: These need to have different names than + // rowTranslation and colTranslation, in order to avoid + // shadowing warnings (-Wshadow with GCC). Alternately, it + // looks like you could just assign to the existing variables in + // this scope, rather than creating new ones. + RCP> theRowTranslation = rcp(new Array); + RCP> theColTranslation = rcp(new Array); + AmalgamateMap(*(A->getRowMap()), *A, uniqueMap, *theRowTranslation); + AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, *theColTranslation); + + amalgamationData = rcp(new AmalgamationInfo( + theRowTranslation, theColTranslation, uniqueMap, nonUniqueMap, + A->getColMap(), fullblocksize, offset, blockid, nStridedOffset, + stridedblocksize)); + } else { + amalgamationData = rcp(new AmalgamationInfo( + rowTranslation, // Teuchos::null + colTranslation, // Teuchos::null + A->getRowMap(), // unique map of graph + A->getColMap(), // non-unique map of graph + A->getColMap(), // column map of A + fullblocksize, offset, blockid, nStridedOffset, stridedblocksize)); } - template - void AmalgamationFactory::AmalgamateMap(const Map& sourceMap, const Matrix& A, RCP& amalgamatedMap, Array& translation) { - typedef typename ArrayView::size_type size_type; - typedef std::unordered_map container; - - GO indexBase = sourceMap.getIndexBase(); - ArrayView elementAList = sourceMap.getLocalElementList(); - size_type numElements = elementAList.size(); - container filter; - - GO offset = 0; - LO blkSize = A.GetFixedBlockSize() / A.GetStorageBlockSize(); - if (A.IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A.getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - offset = strMap->getOffset(); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - } + // store (un)amalgamation information on current level + Set(currentLevel, "UnAmalgamationInfo", amalgamationData); +} + +template +void AmalgamationFactory::AmalgamateMap(const Map &sourceMap, + const Matrix &A, + RCP &amalgamatedMap, + Array &translation) { + typedef typename ArrayView::size_type size_type; + typedef std::unordered_map container; + + GO indexBase = sourceMap.getIndexBase(); + ArrayView elementAList = sourceMap.getLocalElementList(); + size_type numElements = elementAList.size(); + container filter; + + GO offset = 0; + LO blkSize = A.GetFixedBlockSize() / A.GetStorageBlockSize(); + if (A.IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A.getRowMap("stridedMaps"); + Teuchos::RCP strMap = + Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, + "Map is not of type StridedMap"); + offset = strMap->getOffset(); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + } - Array elementList(numElements); - translation.resize(numElements); + Array elementList(numElements); + translation.resize(numElements); - size_type numRows = 0; - for (size_type id = 0; id < numElements; id++) { - GO dofID = elementAList[id]; - GO nodeID = AmalgamationFactory::DOFGid2NodeId(dofID, blkSize, offset, indexBase); + size_type numRows = 0; + for (size_type id = 0; id < numElements; id++) { + GO dofID = elementAList[id]; + GO nodeID = + AmalgamationFactory::DOFGid2NodeId(dofID, blkSize, offset, indexBase); - typename container::iterator it = filter.find(nodeID); - if (it == filter.end()) { - filter[nodeID] = numRows; + typename container::iterator it = filter.find(nodeID); + if (it == filter.end()) { + filter[nodeID] = numRows; - translation[id] = numRows; - elementList[numRows] = nodeID; + translation[id] = numRows; + elementList[numRows] = nodeID; - numRows++; + numRows++; - } else { - translation[id] = it->second; - } + } else { + translation[id] = it->second; } - elementList.resize(numRows); - - amalgamatedMap = MapFactory::Build(sourceMap.lib(), Teuchos::OrdinalTraits::invalid(), elementList, indexBase, sourceMap.getComm()); - } + elementList.resize(numRows); - template - const GlobalOrdinal AmalgamationFactory::DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, - const GlobalOrdinal offset, const GlobalOrdinal indexBase) { - GlobalOrdinal globalblockid = ((GlobalOrdinal) gid - offset - indexBase) / blockSize + indexBase; - return globalblockid; - } + amalgamatedMap = MapFactory::Build( + sourceMap.lib(), Teuchos::OrdinalTraits::invalid(), + elementList, indexBase, sourceMap.getComm()); +} -} //namespace MueLu +template +const GlobalOrdinal +AmalgamationFactory::DOFGid2NodeId( + GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, + const GlobalOrdinal indexBase) { + GlobalOrdinal globalblockid = + ((GlobalOrdinal)gid - offset - indexBase) / blockSize + indexBase; + return globalblockid; +} -#endif /* MUELU_SUBBLOCKUNAMALGAMATIONFACTORY_DEF_HPP */ +} // namespace MueLu +#endif /* MUELU_SUBBLOCKUNAMALGAMATIONFACTORY_DEF_HPP */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp index b14a14a1cc10..9c183bd76eea 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_decl.hpp @@ -53,17 +53,17 @@ #ifndef MUELU_AMALGAMATIONINFO_DECL_HPP_ #define MUELU_AMALGAMATIONINFO_DECL_HPP_ -#include // global_size_t +#include // global_size_t +#include #include #include -#include #include "MueLu_ConfigDefs.hpp" #include "MueLu_BaseClass.hpp" -#include "MueLu_AmalgamationInfo_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_AmalgamationInfo_fwd.hpp" namespace MueLu { @@ -71,166 +71,172 @@ namespace MueLu { @class AmalgamationInfo @brief minimal container class for storing amalgamation information - Helps create a mapping between local node id on current processor to local DOFs ids on - current processor. That mapping is used for unamalgamation. + Helps create a mapping between local node id on current processor to local + DOFs ids on current processor. That mapping is used for unamalgamation. */ - template - class AmalgamationInfo - : public BaseClass { +template +class AmalgamationInfo : public BaseClass { #undef MUELU_AMALGAMATIONINFO_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - /// Constructor - AmalgamationInfo(RCP > rowTranslation, - RCP > colTranslation, - RCP nodeRowMap, - RCP nodeColMap, - RCP const &columnMap, - LO fullblocksize, GO offset, LO blockid, LO nStridedOffset, LO stridedblocksize) : - rowTranslation_(rowTranslation), - colTranslation_(colTranslation), - nodeRowMap_(nodeRowMap), - nodeColMap_(nodeColMap), - columnMap_(columnMap), - fullblocksize_(fullblocksize), - offset_(offset), - blockid_(blockid), - nStridedOffset_(nStridedOffset), - stridedblocksize_(stridedblocksize), - indexBase_(columnMap->getIndexBase()) - {} - - /// Destructor - virtual ~AmalgamationInfo() {} - - /// Return a simple one-line description of this object. - std::string description() const { return "AmalgamationInfo"; } - - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const;; - void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; - - RCP getNodeRowMap() const { return nodeRowMap_; } //! < returns the node row map for the graph - RCP getNodeColMap() const { return nodeColMap_; } //! < returns the node column map for the graph - - /* @brief Translation arrays - * - * Returns translation arrays providing local node ids given local dof ids built from either - * the non-overlapping (unique) row map or the overlapping (non-unique) column map. - * The getColTranslation routine, e.g., is used for the MergeRows routine in CoalesceDropFactory. - */ - //@{ - RCP > getRowTranslation() const { return rowTranslation_; } - RCP > getColTranslation() const { return colTranslation_; } - //@} - - /*! @brief UnamalgamateAggregates - - Puts all dofs for aggregate \c i in aggToRowMap[\c i]. Also calculate aggregate sizes. - */ - void UnamalgamateAggregates(const Aggregates& aggregates, Teuchos::ArrayRCP& aggStart, Teuchos::ArrayRCP& aggToRowMap) const; - void UnamalgamateAggregatesLO(const Aggregates& aggregates, Teuchos::ArrayRCP& aggStart, Teuchos::ArrayRCP& aggToRowMap) const; - - /*! @brief ComputeUnamalgamatedImportDofMap - * build overlapping dof row map from aggregates needed for overlapping null space - */ - Teuchos::RCP< Xpetra::Map > ComputeUnamalgamatedImportDofMap(const Aggregates& aggregates) const; - - private: - - void UnamalgamateAggregates(const Teuchos::RCP &nodeMap, +public: + /// Constructor + AmalgamationInfo(RCP> rowTranslation, RCP> colTranslation, + RCP nodeRowMap, RCP nodeColMap, + RCP const &columnMap, LO fullblocksize, GO offset, + LO blockid, LO nStridedOffset, LO stridedblocksize) + : rowTranslation_(rowTranslation), colTranslation_(colTranslation), + nodeRowMap_(nodeRowMap), nodeColMap_(nodeColMap), columnMap_(columnMap), + fullblocksize_(fullblocksize), offset_(offset), blockid_(blockid), + nStridedOffset_(nStridedOffset), stridedblocksize_(stridedblocksize), + indexBase_(columnMap->getIndexBase()) {} + + /// Destructor + virtual ~AmalgamationInfo() {} + + /// Return a simple one-line description of this object. + std::string description() const { return "AmalgamationInfo"; } + + //! Print the object with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = + // Default) const;; + void print(Teuchos::FancyOStream &out, + const VerbLevel verbLevel = Default) const; + + RCP getNodeRowMap() const { + return nodeRowMap_; + } //! < returns the node row map for the graph + RCP getNodeColMap() const { + return nodeColMap_; + } //! < returns the node column map for the graph + + /* @brief Translation arrays + * + * Returns translation arrays providing local node ids given local dof ids + * built from either the non-overlapping (unique) row map or the overlapping + * (non-unique) column map. The getColTranslation routine, e.g., is used for + * the MergeRows routine in CoalesceDropFactory. + */ + //@{ + RCP> getRowTranslation() const { return rowTranslation_; } + RCP> getColTranslation() const { return colTranslation_; } + //@} + + /*! @brief UnamalgamateAggregates + + Puts all dofs for aggregate \c i in aggToRowMap[\c i]. Also calculate + aggregate sizes. + */ + void + UnamalgamateAggregates(const Aggregates &aggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const; + void UnamalgamateAggregatesLO(const Aggregates &aggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const; + + /*! @brief ComputeUnamalgamatedImportDofMap + * build overlapping dof row map from aggregates needed for overlapping null + * space + */ + Teuchos::RCP> + ComputeUnamalgamatedImportDofMap(const Aggregates &aggregates) const; + +private: + void + UnamalgamateAggregates(const Teuchos::RCP &nodeMap, + const RCP &procWinnerVec, + const RCP &vertex2AggIdVec, + const GO numAggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const; + + void UnamalgamateAggregatesLO(const Teuchos::RCP &nodeMap, const RCP &procWinnerVec, const RCP &vertex2AggIdVec, const GO numAggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const; - - void UnamalgamateAggregatesLO(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, - const GO numAggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const; - - Teuchos::RCP< Xpetra::Map > ComputeUnamalgamatedImportDofMap(const Teuchos::RCP &nodeMap) const; - - public: - - /*! @brief ComputeGlobalDOF - * - * Return global dof id associated with global node id gNodeID and dof index k - * - * \note We assume that \c indexBase_ is valid for both the node and the dof map. - * - * @param (GO): global node id - * @param (LO): local dof index within node - * @return (GO): global dof id - */ - GO ComputeGlobalDOF(GO const &gNodeID, LO const &k=0) const; - - /*! @brief ComputeLocalDOF - * return locbal dof id associated with local node id lNodeID and dof index k - * - * @param (LO): local node id - * @param (LO): local dof index within node - * @return (LO): local dof id - */ - LO ComputeLocalDOF(LocalOrdinal const &lNodeID, LocalOrdinal const &k) const; - - LO ComputeLocalNode(LocalOrdinal const &ldofID) const; - - /*! Access routines */ - - /// returns offset of global dof ids - GO GlobalOffset() { return offset_; } - - /// returns striding information - void GetStridingInformation(LO& fullBlockSize, LO& blockID, LO& stridingOffset, LO& stridedBlockSize, GO& indexBase) { - fullBlockSize = fullblocksize_; - blockID = blockid_; - stridingOffset = nStridedOffset_; - stridedBlockSize = stridedblocksize_; - indexBase = indexBase_; - } - - private: - - //! @name amalgamation information variables - //@{ - - //! Arrays containing local node ids given local dof ids - RCP > rowTranslation_; - RCP > colTranslation_; - - //! node row and column map of graph (built from row and column map of A) - RCP nodeRowMap_; - RCP nodeColMap_; - - /*! @brief DOF map (really column map of A) - - We keep a RCP on the column map to make sure that the map is still valid when it is used. - */ - RCP columnMap_; - - //@} - - //! @name Strided map information. - //@{ - LO fullblocksize_; - GO offset_; - LO blockid_; - LO nStridedOffset_; - LO stridedblocksize_; - GO indexBase_; - //@} - - }; + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const; + + Teuchos::RCP> + ComputeUnamalgamatedImportDofMap( + const Teuchos::RCP &nodeMap) const; + +public: + /*! @brief ComputeGlobalDOF + * + * Return global dof id associated with global node id gNodeID and dof index k + * + * \note We assume that \c indexBase_ is valid for both the node and the dof + * map. + * + * @param (GO): global node id + * @param (LO): local dof index within node + * @return (GO): global dof id + */ + GO ComputeGlobalDOF(GO const &gNodeID, LO const &k = 0) const; + + /*! @brief ComputeLocalDOF + * return locbal dof id associated with local node id lNodeID and dof index k + * + * @param (LO): local node id + * @param (LO): local dof index within node + * @return (LO): local dof id + */ + LO ComputeLocalDOF(LocalOrdinal const &lNodeID, LocalOrdinal const &k) const; + + LO ComputeLocalNode(LocalOrdinal const &ldofID) const; + + /*! Access routines */ + + /// returns offset of global dof ids + GO GlobalOffset() { return offset_; } + + /// returns striding information + void GetStridingInformation(LO &fullBlockSize, LO &blockID, + LO &stridingOffset, LO &stridedBlockSize, + GO &indexBase) { + fullBlockSize = fullblocksize_; + blockID = blockid_; + stridingOffset = nStridedOffset_; + stridedBlockSize = stridedblocksize_; + indexBase = indexBase_; + } + +private: + //! @name amalgamation information variables + //@{ + + //! Arrays containing local node ids given local dof ids + RCP> rowTranslation_; + RCP> colTranslation_; + + //! node row and column map of graph (built from row and column map of A) + RCP nodeRowMap_; + RCP nodeColMap_; + + /*! @brief DOF map (really column map of A) + + We keep a RCP on the column map to make sure that the map is still valid when + it is used. + */ + RCP columnMap_; + + //@} + + //! @name Strided map information. + //@{ + LO fullblocksize_; + GO offset_; + LO blockid_; + LO nStridedOffset_; + LO stridedblocksize_; + GO indexBase_; + //@} +}; } // namespace MueLu diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp index f46746657142..b5842f002643 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_AmalgamationInfo_def.hpp @@ -63,265 +63,273 @@ namespace MueLu { - template - void AmalgamationInfo:: - UnamalgamateAggregates(const Aggregates& aggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - - UnamalgamateAggregates(aggregates.GetMap(), - aggregates.GetProcWinner(), - aggregates.GetVertex2AggId(), - aggregates.GetNumAggregates(), - aggStart, - aggToRowMap); - - } //UnamalgamateAggregates - - template - void AmalgamationInfo:: - UnamalgamateAggregates(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, - const GO numAggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - - int myPid = nodeMap->getComm()->getRank(); - Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); - Teuchos::ArrayRCP procWinner = procWinnerVec->getDataNonConst(0); - Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); - const LO size = procWinner.size(); - - std::vector sizes(numAggregates); - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - if (procWinner[lnode] == myPid) - sizes[myAgg] += 1; - } - } else { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - if (procWinner[lnode] == myPid) { - GO gnodeid = nodeGlobalElts[lnode]; - for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { - GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid,k); - if (columnMap_->isNodeGlobalElement(gDofIndex)) - sizes[myAgg] += 1; - } +template +void AmalgamationInfo:: + UnamalgamateAggregates( + const Aggregates &aggregates, Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + + UnamalgamateAggregates(aggregates.GetMap(), aggregates.GetProcWinner(), + aggregates.GetVertex2AggId(), + aggregates.GetNumAggregates(), aggStart, aggToRowMap); + +} // UnamalgamateAggregates + +template +void AmalgamationInfo:: + UnamalgamateAggregates( + const Teuchos::RCP &nodeMap, + const RCP &procWinnerVec, + const RCP &vertex2AggIdVec, const GO numAggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + + int myPid = nodeMap->getComm()->getRank(); + Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); + Teuchos::ArrayRCP procWinner = procWinnerVec->getDataNonConst(0); + Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); + const LO size = procWinner.size(); + + std::vector sizes(numAggregates); + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; + if (procWinner[lnode] == myPid) + sizes[myAgg] += 1; + } + } else { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; + if (procWinner[lnode] == myPid) { + GO gnodeid = nodeGlobalElts[lnode]; + for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid, k); + if (columnMap_->isNodeGlobalElement(gDofIndex)) + sizes[myAgg] += 1; } } } - aggStart = ArrayRCP(numAggregates+1,0); - aggStart[0] = Teuchos::ScalarTraits::zero(); - for (GO i=0; i(aggStart[numAggregates],0); - - // count, how many dofs have been recorded for each aggregate so far - Array numDofs(numAggregates, 0); // empty array with number of Dofs for each aggregate - - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; - if (procWinner[lnode] == myPid) { - aggToRowMap[ aggStart[myAgg] + numDofs[myAgg] ] = ComputeGlobalDOF(nodeGlobalElts[lnode]); - ++(numDofs[myAgg]); - } + } + aggStart = ArrayRCP(numAggregates + 1, 0); + aggStart[0] = Teuchos::ScalarTraits::zero(); + for (GO i = 0; i < numAggregates; ++i) { + aggStart[i + 1] = aggStart[i] + sizes[i]; + } + aggToRowMap = ArrayRCP(aggStart[numAggregates], 0); + + // count, how many dofs have been recorded for each aggregate so far + Array numDofs(numAggregates, + 0); // empty array with number of Dofs for each aggregate + + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; + if (procWinner[lnode] == myPid) { + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = + ComputeGlobalDOF(nodeGlobalElts[lnode]); + ++(numDofs[myAgg]); } - } else { - for (LO lnode = 0; lnode < size; ++lnode) { - LO myAgg = vertex2AggId[lnode]; + } + } else { + for (LO lnode = 0; lnode < size; ++lnode) { + LO myAgg = vertex2AggId[lnode]; - if (procWinner[lnode] == myPid) { - GO gnodeid = nodeGlobalElts[lnode]; - for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { - GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid,k); - if (columnMap_->isNodeGlobalElement(gDofIndex)) { - aggToRowMap[ aggStart[myAgg] + numDofs[myAgg] ] = gDofIndex; - ++(numDofs[myAgg]); - } + if (procWinner[lnode] == myPid) { + GO gnodeid = nodeGlobalElts[lnode]; + for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid, k); + if (columnMap_->isNodeGlobalElement(gDofIndex)) { + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = gDofIndex; + ++(numDofs[myAgg]); } } } } - // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() - - } //UnamalgamateAggregates - - template - void AmalgamationInfo:: - UnamalgamateAggregatesLO(const Aggregates& aggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - UnamalgamateAggregatesLO(aggregates.GetMap(), - aggregates.GetProcWinner(), - aggregates.GetVertex2AggId(), - aggregates.GetNumAggregates(), - aggStart, - aggToRowMap); } + // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() - template - void AmalgamationInfo:: - UnamalgamateAggregatesLO(const Teuchos::RCP &nodeMap, - const RCP &procWinnerVec, - const RCP &vertex2AggIdVec, - const GO numAggregates, - Teuchos::ArrayRCP& aggStart, - Teuchos::ArrayRCP& aggToRowMap) const { - - int myPid = nodeMap->getComm()->getRank(); - Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); - - Teuchos::ArrayRCP procWinner = procWinnerVec ->getDataNonConst(0); - Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); - - - // FIXME: Do we need to compute size here? Or can we use existing? - const LO size = procWinner.size(); - - std::vector sizes(numAggregates); - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; lnode++) - if (procWinner[lnode] == myPid) - sizes[vertex2AggId[lnode]]++; - } else { - for (LO lnode = 0; lnode < size; lnode++) - if (procWinner[lnode] == myPid) { - GO nodeGID = nodeGlobalElts[lnode]; - - for (LO k = 0; k < stridedblocksize_; k++) { - GO GID = ComputeGlobalDOF(nodeGID, k); - if (columnMap_->isNodeGlobalElement(GID)) - sizes[vertex2AggId[lnode]]++; - } - } - } +} // UnamalgamateAggregates - aggStart = ArrayRCP(numAggregates+1); // FIXME: useless initialization with zeros - aggStart[0] = 0; - for (GO i = 0; i < numAggregates; i++) - aggStart[i+1] = aggStart[i] + sizes[i]; - - aggToRowMap = ArrayRCP(aggStart[numAggregates], 0); - - // count, how many dofs have been recorded for each aggregate so far - Array numDofs(numAggregates, 0); // empty array with number of DOFs for each aggregate - if (stridedblocksize_ == 1) { - for (LO lnode = 0; lnode < size; ++lnode) - if (procWinner[lnode] == myPid) { - LO myAgg = vertex2AggId[lnode]; - aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode; - numDofs[myAgg]++; - } - } else { - for (LO lnode = 0; lnode < size; ++lnode) - if (procWinner[lnode] == myPid) { - LO myAgg = vertex2AggId[lnode]; - GO nodeGID = nodeGlobalElts[lnode]; - - for (LO k = 0; k < stridedblocksize_; k++) { - GO GID = ComputeGlobalDOF(nodeGID, k); - if (columnMap_->isNodeGlobalElement(GID)) { - aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode*stridedblocksize_ + k; - numDofs[myAgg]++; - } - } +template +void AmalgamationInfo:: + UnamalgamateAggregatesLO(const Aggregates &aggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + UnamalgamateAggregatesLO(aggregates.GetMap(), aggregates.GetProcWinner(), + aggregates.GetVertex2AggId(), + aggregates.GetNumAggregates(), aggStart, + aggToRowMap); +} + +template +void AmalgamationInfo:: + UnamalgamateAggregatesLO(const Teuchos::RCP &nodeMap, + const RCP &procWinnerVec, + const RCP &vertex2AggIdVec, + const GO numAggregates, + Teuchos::ArrayRCP &aggStart, + Teuchos::ArrayRCP &aggToRowMap) const { + + int myPid = nodeMap->getComm()->getRank(); + Teuchos::ArrayView nodeGlobalElts = nodeMap->getLocalElementList(); + + Teuchos::ArrayRCP procWinner = procWinnerVec->getDataNonConst(0); + Teuchos::ArrayRCP vertex2AggId = vertex2AggIdVec->getDataNonConst(0); + + // FIXME: Do we need to compute size here? Or can we use existing? + const LO size = procWinner.size(); + + std::vector sizes(numAggregates); + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; lnode++) + if (procWinner[lnode] == myPid) + sizes[vertex2AggId[lnode]]++; + } else { + for (LO lnode = 0; lnode < size; lnode++) + if (procWinner[lnode] == myPid) { + GO nodeGID = nodeGlobalElts[lnode]; + + for (LO k = 0; k < stridedblocksize_; k++) { + GO GID = ComputeGlobalDOF(nodeGID, k); + if (columnMap_->isNodeGlobalElement(GID)) + sizes[vertex2AggId[lnode]]++; } - } - // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() - - } //UnamalgamateAggregatesLO - - template - void AmalgamationInfo::print(Teuchos::FancyOStream &out, - const VerbLevel verbLevel) const - { - if (!(verbLevel & Debug)) - return; - - out << "AmalgamationInfo -- Striding information:" - << "\n fullBlockSize = " << fullblocksize_ - << "\n blockID = " << blockid_ - << "\n stridingOffset = " << nStridedOffset_ - << "\n stridedBlockSize = " << stridedblocksize_ - << "\n indexBase = " << indexBase_ - << std::endl; - - out << "AmalgamationInfo -- DOFs to nodes mapping:\n" - << " Mapping of row DOFs to row nodes:" << *rowTranslation_() - << "\n\n Mapping of column DOFs to column nodes:" << *colTranslation_() - << std::endl; - - out << "AmalgamationInfo -- row node map:" << std::endl; - nodeRowMap_->describe(out, Teuchos::VERB_EXTREME); - - out << "AmalgamationInfo -- column node map:" << std::endl; - nodeColMap_->describe(out, Teuchos::VERB_EXTREME); + } } - ///////////////////////////////////////////////////////////////////////////// + aggStart = ArrayRCP(numAggregates + + 1); // FIXME: useless initialization with zeros + aggStart[0] = 0; + for (GO i = 0; i < numAggregates; i++) + aggStart[i + 1] = aggStart[i] + sizes[i]; - template - RCP > AmalgamationInfo:: - ComputeUnamalgamatedImportDofMap(const Aggregates& aggregates) const { - return ComputeUnamalgamatedImportDofMap(aggregates.GetMap()); - } + aggToRowMap = ArrayRCP(aggStart[numAggregates], 0); - template - RCP > AmalgamationInfo:: - ComputeUnamalgamatedImportDofMap(const Teuchos::RCP &nodeMap) const { - - Teuchos::RCP > myDofGids = Teuchos::rcp(new std::vector); - Teuchos::ArrayView gEltList = nodeMap->getLocalElementList(); - LO nodeElements = Teuchos::as(nodeMap->getLocalNumElements()); - if (stridedblocksize_ == 1) { - for (LO n = 0; npush_back(gDofIndex); + // count, how many dofs have been recorded for each aggregate so far + Array numDofs(numAggregates, + 0); // empty array with number of DOFs for each aggregate + if (stridedblocksize_ == 1) { + for (LO lnode = 0; lnode < size; ++lnode) + if (procWinner[lnode] == myPid) { + LO myAgg = vertex2AggId[lnode]; + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode; + numDofs[myAgg]++; } - } else { - for (LO n = 0; nisNodeGlobalElement(gDofIndex)) - myDofGids->push_back(gDofIndex); + } else { + for (LO lnode = 0; lnode < size; ++lnode) + if (procWinner[lnode] == myPid) { + LO myAgg = vertex2AggId[lnode]; + GO nodeGID = nodeGlobalElts[lnode]; + + for (LO k = 0; k < stridedblocksize_; k++) { + GO GID = ComputeGlobalDOF(nodeGID, k); + if (columnMap_->isNodeGlobalElement(GID)) { + aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = + lnode * stridedblocksize_ + k; + numDofs[myAgg]++; + } } } - } - - Teuchos::ArrayRCP arr_myDofGids = Teuchos::arcp( myDofGids ); - Teuchos::RCP importDofMap = MapFactory::Build(nodeMap->lib(), Teuchos::OrdinalTraits::invalid(), arr_myDofGids(), nodeMap->getIndexBase(), nodeMap->getComm()); - return importDofMap; - } - - ///////////////////////////////////////////////////////////////////////////// - - template - GlobalOrdinal AmalgamationInfo:: - ComputeGlobalDOF(GlobalOrdinal const &gNodeID, LocalOrdinal const &k) const { - // here, the assumption is, that the node map has the same indexBase as the dof map - // this is the node map index base this is the dof map index base - GlobalOrdinal gDofIndex = offset_ + (gNodeID-indexBase_)*fullblocksize_ + nStridedOffset_ + k + indexBase_; - return gDofIndex; - } - - template - LocalOrdinal AmalgamationInfo::ComputeLocalDOF(LocalOrdinal const &lNodeID, LocalOrdinal const &k) const { - LocalOrdinal lDofIndex = lNodeID*fullblocksize_ + k; - return lDofIndex; } - - - template - LocalOrdinal AmalgamationInfo::ComputeLocalNode(LocalOrdinal const &ldofID) const { - return (ldofID - ldofID%fullblocksize_) / fullblocksize_; + // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() + +} // UnamalgamateAggregatesLO + +template +void AmalgamationInfo::print( + Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + if (!(verbLevel & Debug)) + return; + + out << "AmalgamationInfo -- Striding information:" + << "\n fullBlockSize = " << fullblocksize_ + << "\n blockID = " << blockid_ + << "\n stridingOffset = " << nStridedOffset_ + << "\n stridedBlockSize = " << stridedblocksize_ + << "\n indexBase = " << indexBase_ << std::endl; + + out << "AmalgamationInfo -- DOFs to nodes mapping:\n" + << " Mapping of row DOFs to row nodes:" << *rowTranslation_() + << "\n\n Mapping of column DOFs to column nodes:" << *colTranslation_() + << std::endl; + + out << "AmalgamationInfo -- row node map:" << std::endl; + nodeRowMap_->describe(out, Teuchos::VERB_EXTREME); + + out << "AmalgamationInfo -- column node map:" << std::endl; + nodeColMap_->describe(out, Teuchos::VERB_EXTREME); +} + +///////////////////////////////////////////////////////////////////////////// + +template +RCP> +AmalgamationInfo:: + ComputeUnamalgamatedImportDofMap(const Aggregates &aggregates) const { + return ComputeUnamalgamatedImportDofMap(aggregates.GetMap()); +} + +template +RCP> +AmalgamationInfo:: + ComputeUnamalgamatedImportDofMap( + const Teuchos::RCP &nodeMap) const { + + Teuchos::RCP> myDofGids = Teuchos::rcp(new std::vector); + Teuchos::ArrayView gEltList = nodeMap->getLocalElementList(); + LO nodeElements = Teuchos::as(nodeMap->getLocalNumElements()); + if (stridedblocksize_ == 1) { + for (LO n = 0; n < nodeElements; n++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gEltList[n]); + myDofGids->push_back(gDofIndex); + } + } else { + for (LO n = 0; n < nodeElements; n++) { + for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { + GlobalOrdinal gDofIndex = ComputeGlobalDOF(gEltList[n], k); + if (columnMap_->isNodeGlobalElement(gDofIndex)) + myDofGids->push_back(gDofIndex); + } + } } -} //namespace - + Teuchos::ArrayRCP arr_myDofGids = Teuchos::arcp(myDofGids); + Teuchos::RCP importDofMap = MapFactory::Build( + nodeMap->lib(), Teuchos::OrdinalTraits::invalid(), + arr_myDofGids(), nodeMap->getIndexBase(), nodeMap->getComm()); + return importDofMap; +} + +///////////////////////////////////////////////////////////////////////////// + +template +GlobalOrdinal +AmalgamationInfo::ComputeGlobalDOF( + GlobalOrdinal const &gNodeID, LocalOrdinal const &k) const { + // here, the assumption is, that the node map has the same indexBase as the + // dof map + // this is the node map index base this is the dof + // map index base + GlobalOrdinal gDofIndex = offset_ + (gNodeID - indexBase_) * fullblocksize_ + + nStridedOffset_ + k + indexBase_; + return gDofIndex; +} + +template +LocalOrdinal +AmalgamationInfo::ComputeLocalDOF( + LocalOrdinal const &lNodeID, LocalOrdinal const &k) const { + LocalOrdinal lDofIndex = lNodeID * fullblocksize_ + k; + return lDofIndex; +} + +template +LocalOrdinal +AmalgamationInfo::ComputeLocalNode( + LocalOrdinal const &ldofID) const { + return (ldofID - ldofID % fullblocksize_) / fullblocksize_; +} + +} // namespace MueLu #endif /* MUELU_AMALGAMATIONINFO_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp index 5e03e2fae3ef..2099b1c9968d 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_decl.hpp @@ -46,142 +46,173 @@ #ifndef MUELU_COALESCEDROPFACTORY_DECL_HPP #define MUELU_COALESCEDROPFACTORY_DECL_HPP +#include //TODO +#include +#include +#include #include #include +#include #include #include -#include -#include -#include //TODO -#include -#include +#include "MueLu_CoalesceDropFactory_fwd.hpp" #include "MueLu_ConfigDefs.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" -#include "MueLu_CoalesceDropFactory_fwd.hpp" #include "MueLu_Utilities_fwd.hpp" -#include "MueLu_Level_fwd.hpp" +#include "MueLu_AmalgamationFactory_fwd.hpp" +#include "MueLu_AmalgamationInfo_fwd.hpp" #include "MueLu_GraphBase.hpp" #include "MueLu_Graph_fwd.hpp" #include "MueLu_LWGraph_fwd.hpp" -#include "MueLu_AmalgamationInfo_fwd.hpp" -#include "MueLu_AmalgamationFactory_fwd.hpp" +#include "MueLu_Level_fwd.hpp" #include "MueLu_PreDropFunctionBaseClass_fwd.hpp" #include "MueLu_PreDropFunctionConstVal_fwd.hpp" namespace MueLu { - /*! - @class CoalesceDropFactory - @brief Factory for creating a graph based on a given matrix. - - Factory for creating graphs from matrices with entries selectively dropped. - - ## Code paths ## - - Both the classic dropping strategy as well as a coordinate-based distance laplacian method - is implemented. For performance reasons there are four distinctive code paths for the - classical method: - - - one DOF per node without dropping (i.e. "aggregation: drop tol" = 0.0) - - one DOF per node with dropping (i.e. "aggregation: drop tol" > 0.0) - - number of DOFs per node > 1 withouth dropping - - number of DOFs per node > 1 with dropping - - Additionally, there is a code path for the distance-laplacian mode. - - ## Input/output of CoalesceDropFactory ## - - ### User parameters of CoalesceDropFactory ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the operator A - UnAmalgamationInfo | Factory | null | | * | * | Generating factory of type AmalgamationFactory which generates the variable 'UnAmalgamationInfo'. Do not change the default unless you know what you are doing. - Coordinates | Factory | null | | * | (*) | Generating factory for variable 'Coordinates'. The coordinates are only needed if "distance laplacian" is chosen for the parameter "aggregation: drop scheme" - "aggregation: drop scheme" | std::string | "classical" | * | * | | Coalescing algorithm. You can choose either "classical" (=default) or "distance laplacian" - "aggregation: drop tol" | double | 0.0 | * | * | | Threshold parameter for dropping small entries - "aggregation: Dirichlet threshold" | double | 0.0 | * | * | | Threshold for determining whether entries are zero during Dirichlet row detection - "lightweight wrap" | bool | true | | * | | hidden switch between fast implementation based on MueLu::LWGraph and a failsafe slower implementation based on Xpetra::Graph (for comparison). The user should not change the default value (=true) - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see CoalesceDropFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see CoalesceDropFactory::DeclareInput). - - ### Variables provided by UncoupledAggregationFactory ### - - After CoalesceDropFactory::Build the following data is available (if requested) - - Parameter | generated by | description - ----------|--------------|------------ - Graph | CoalesceDropFactory | Graph of matrix A - DofsPerNode | CoalesceDropFactory | number of DOFs per node. Note, that we assume a constant number of DOFs per node for all nodes associated with the operator A. - - ## Amalgamation process ## - - The CoalesceDropFactory is internally using the AmalgamationFactory for amalgamating the dof-based maps to node-based maps. The AmalgamationFactory creates the "UnAmalgamationInfo" container - which basically stores all the necessary information for translating dof based data to node based data and vice versa. The container is used, since this way the amalgamation is only done once - and later reused by other factories. - - Of course, often one does not need the information from the "UnAmalgamationInfo" container since the same information could be extracted of the "Graph" or the map from the "Coordinates" vector. - However, there are also some situations (e.g. when doing rebalancing based on HyperGraph partitioning without coordinate information) where one has not access to a "Graph" or "Coordinates" variable. - */ - - template - class CoalesceDropFactory : public SingleLevelFactoryBase { +/*! + @class CoalesceDropFactory + @brief Factory for creating a graph based on a given matrix. + + Factory for creating graphs from matrices with entries selectively dropped. + + ## Code paths ## + + Both the classic dropping strategy as well as a coordinate-based distance + laplacian method is implemented. For performance reasons there are four + distinctive code paths for the classical method: + + - one DOF per node without dropping (i.e. "aggregation: drop tol" = 0.0) + - one DOF per node with dropping (i.e. "aggregation: drop tol" > 0.0) + - number of DOFs per node > 1 withouth dropping + - number of DOFs per node > 1 with dropping + + Additionally, there is a code path for the distance-laplacian mode. + + ## Input/output of CoalesceDropFactory ## + + ### User parameters of CoalesceDropFactory ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the + operator A UnAmalgamationInfo | Factory | null | | * | * | Generating + factory of type AmalgamationFactory which generates the variable + 'UnAmalgamationInfo'. Do not change the default unless you know what you are + doing. Coordinates | Factory | null | | * | (*) | Generating + factory for variable 'Coordinates'. The coordinates are only needed if + "distance laplacian" is chosen for the parameter "aggregation: drop scheme" + "aggregation: drop scheme" | std::string | "classical" | * | * | | + Coalescing algorithm. You can choose either "classical" (=default) or + "distance laplacian" "aggregation: drop tol" | double | 0.0 | * | * | | + Threshold parameter for dropping small entries "aggregation: Dirichlet + threshold" | double | 0.0 | * | * | | Threshold for determining whether + entries are zero during Dirichlet row detection "lightweight wrap" | bool | + true | | * | | hidden switch between fast implementation based on + MueLu::LWGraph and a failsafe slower implementation based on Xpetra::Graph + (for comparison). The user should not change the default value (=true) + + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + CoalesceDropFactory::GetValidParameters).
The * in the @c requested column + states that the data is requested as input with all dependencies (see + CoalesceDropFactory::DeclareInput). + + ### Variables provided by UncoupledAggregationFactory ### + + After CoalesceDropFactory::Build the following data is available (if + requested) + + Parameter | generated by | description + ----------|--------------|------------ + Graph | CoalesceDropFactory | Graph of matrix A + DofsPerNode | CoalesceDropFactory | number of DOFs per node. Note, that we + assume a constant number of DOFs per node for all nodes associated with the + operator A. + + ## Amalgamation process ## + + The CoalesceDropFactory is internally using the AmalgamationFactory for + amalgamating the dof-based maps to node-based maps. The AmalgamationFactory + creates the "UnAmalgamationInfo" container which basically stores all the + necessary information for translating dof based data to node based data and + vice versa. The container is used, since this way the amalgamation is only + done once and later reused by other factories. + + Of course, often one does not need the information from the + "UnAmalgamationInfo" container since the same information could be extracted + of the "Graph" or the map from the "Coordinates" vector. However, there are + also some situations (e.g. when doing rebalancing based on HyperGraph + partitioning without coordinate information) where one has not access to a + "Graph" or "Coordinates" variable. +*/ + +template +class CoalesceDropFactory : public SingleLevelFactoryBase { #undef MUELU_COALESCEDROPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! Constructor - CoalesceDropFactory(); - - //! Destructor - virtual ~CoalesceDropFactory() { } +public: + //! @name Constructors/Destructors. + //@{ - RCP GetValidParameterList() const; + //! Constructor + CoalesceDropFactory(); - //@} + //! Destructor + virtual ~CoalesceDropFactory() {} - //! Input - //@{ + RCP GetValidParameterList() const; - void DeclareInput(Level ¤tLevel) const; + //@} - /// set predrop function - void SetPreDropFunction(const RCP > &predrop) { predrop_ = predrop; } + //! Input + //@{ - //@} + void DeclareInput(Level ¤tLevel) const; - void Build(Level ¤tLevel) const; // Build + /// set predrop function + void + SetPreDropFunction(const RCP> &predrop) { + predrop_ = predrop; + } - private: + //@} - // pre-drop function - mutable - RCP predrop_; + void Build(Level ¤tLevel) const; // Build - //! Method to merge rows of matrix for systems of PDEs. - void MergeRows(const Matrix& A, const LO row, Array& cols, const Array& translation) const; - void MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP& ghostedDiagVals, SC threshold, Array& cols, const Array& translation) const; +private: + // pre-drop function + mutable RCP predrop_; + //! Method to merge rows of matrix for systems of PDEs. + void MergeRows(const Matrix &A, const LO row, Array &cols, + const Array &translation) const; + void MergeRowsWithDropping(const Matrix &A, const LO row, + const ArrayRCP &ghostedDiagVals, + SC threshold, Array &cols, + const Array &translation) const; - // When we want to decouple a block diagonal system (returns Teuchos::null if generate_matrix is false) - Teuchos::RCP > BlockDiagonalize(Level & currentLevel,const RCP & A, bool generate_matrix) const; + // When we want to decouple a block diagonal system (returns Teuchos::null if + // generate_matrix is false) + Teuchos::RCP> + BlockDiagonalize(Level ¤tLevel, const RCP &A, + bool generate_matrix) const; - // When we want to decouple a block diagonal system via a *graph* - void BlockDiagonalizeGraph(const RCP & inputGraph, const RCP & ghostedBlockNumber, RCP & outputGraph, RCP & importer) const; + // When we want to decouple a block diagonal system via a *graph* + void BlockDiagonalizeGraph(const RCP &inputGraph, + const RCP &ghostedBlockNumber, + RCP &outputGraph, + RCP &importer) const; - }; //class CoalesceDropFactory +}; // class CoalesceDropFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_COALESCEDROPFACTORY_SHORT #endif // MUELU_COALESCEDROPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp index a26a47b7e971..b81a4c32d336 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp @@ -46,18 +46,18 @@ #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP #define MUELU_COALESCEDROPFACTORY_DEF_HPP -#include #include -#include +#include #include -#include +#include #include +#include #include -#include #include +#include #include -#include #include +#include #include @@ -66,10 +66,10 @@ #include "MueLu_AmalgamationFactory.hpp" #include "MueLu_AmalgamationInfo.hpp" #include "MueLu_Exceptions.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Graph.hpp" -#include "MueLu_Level.hpp" +#include "MueLu_GraphBase.hpp" #include "MueLu_LWGraph.hpp" +#include "MueLu_Level.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" #include "MueLu_PreDropFunctionConstVal.hpp" @@ -87,1048 +87,1260 @@ // Should be removed once we are confident that this works. //#define DJS_READ_ENV_VARIABLES - namespace MueLu { - namespace Details { - template - struct DropTol { - - DropTol() = default; - DropTol(DropTol const&) = default; - DropTol(DropTol &&) = default; - - DropTol& operator=(DropTol const&) = default; - DropTol& operator=(DropTol &&) = default; - - DropTol(real_type val_, real_type diag_, LO col_, bool drop_) - : val{val_}, diag{diag_}, col{col_}, drop{drop_} - {} - - real_type val {Teuchos::ScalarTraits::zero()}; - real_type diag {Teuchos::ScalarTraits::zero()}; - LO col {Teuchos::OrdinalTraits::invalid()}; - bool drop {true}; - - // CMS: Auxillary information for debugging info - // real_type aux_val {Teuchos::ScalarTraits::nan()}; - }; - } - - - template - RCP CoalesceDropFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop tol"); - SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: greedy Dirichlet"); - SET_VALID_ENTRY("aggregation: row sum drop tol"); - SET_VALID_ENTRY("aggregation: drop scheme"); - SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); - SET_VALID_ENTRY("aggregation: distance laplacian directional weights"); - SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); - - { - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; - // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values) - validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple("signed classical sa","classical", "distance laplacian","signed classical","block diagonal","block diagonal classical","block diagonal distance laplacian","block diagonal signed classical","block diagonal colored signed classical"), "aggregation: drop scheme"))); - - } - SET_VALID_ENTRY("aggregation: distance laplacian algo"); - SET_VALID_ENTRY("aggregation: classical algo"); - SET_VALID_ENTRY("aggregation: coloring: localize color graph"); -#undef SET_VALID_ENTRY - validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access"); - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); - validParamList->set< RCP >("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber"); - - return validParamList; +namespace Details { +template struct DropTol { + + DropTol() = default; + DropTol(DropTol const &) = default; + DropTol(DropTol &&) = default; + + DropTol &operator=(DropTol const &) = default; + DropTol &operator=(DropTol &&) = default; + + DropTol(real_type val_, real_type diag_, LO col_, bool drop_) + : val{val_}, diag{diag_}, col{col_}, drop{drop_} {} + + real_type val{Teuchos::ScalarTraits::zero()}; + real_type diag{Teuchos::ScalarTraits::zero()}; + LO col{Teuchos::OrdinalTraits::invalid()}; + bool drop{true}; + + // CMS: Auxillary information for debugging info + // real_type aux_val {Teuchos::ScalarTraits::nan()}; +}; +} // namespace Details + +template +RCP +CoalesceDropFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: drop tol"); + SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: greedy Dirichlet"); + SET_VALID_ENTRY("aggregation: row sum drop tol"); + SET_VALID_ENTRY("aggregation: drop scheme"); + SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); + SET_VALID_ENTRY("aggregation: distance laplacian directional weights"); + SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); + + { + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + // "signed classical" is the Ruge-Stuben style (relative to max + // off-diagonal), "sign classical sa" is the signed version of the sa + // criterion (relative to the diagonal values) + validParamList->getEntry("aggregation: drop scheme") + .setValidator(rcp(new validatorType( + Teuchos::tuple( + "signed classical sa", "classical", "distance laplacian", + "signed classical", "block diagonal", + "block diagonal classical", "block diagonal distance laplacian", + "block diagonal signed classical", + "block diagonal colored signed classical"), + "aggregation: drop scheme"))); } - - template - CoalesceDropFactory::CoalesceDropFactory() : predrop_(Teuchos::null) { } - - template - void CoalesceDropFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "UnAmalgamationInfo"); - - const ParameterList& pL = GetParameterList(); - if (pL.get("lightweight wrap") == true) { - std::string algo = pL.get("aggregation: drop scheme"); - if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") { - Input(currentLevel, "Coordinates"); - } - if(algo == "signed classical sa") - ; - else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) { - Input(currentLevel, "BlockNumber"); - } - } - - } - - template - void CoalesceDropFactory::Build(Level ¤tLevel) const { - - FactoryMonitor m(*this, "Build", currentLevel); - - typedef Teuchos::ScalarTraits STS; - typedef typename STS::magnitudeType real_type; - typedef Xpetra::MultiVector RealValuedMultiVector; - typedef Xpetra::MultiVectorFactory RealValuedMultiVectorFactory; - - if (predrop_ != Teuchos::null) - GetOStream(Parameters0) << predrop_->description(); - - RCP realA = Get< RCP >(currentLevel, "A"); - RCP amalInfo = Get< RCP >(currentLevel, "UnAmalgamationInfo"); - const ParameterList & pL = GetParameterList(); - bool doExperimentalWrap = pL.get("lightweight wrap"); - - GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl; + SET_VALID_ENTRY("aggregation: distance laplacian algo"); + SET_VALID_ENTRY("aggregation: classical algo"); + SET_VALID_ENTRY("aggregation: coloring: localize color graph"); +#undef SET_VALID_ENTRY + validParamList->set("lightweight wrap", true, + "Experimental option for lightweight graph access"); + + validParamList->set>( + "A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>( + "UnAmalgamationInfo", Teuchos::null, + "Generating factory for UnAmalgamationInfo"); + validParamList->set>( + "Coordinates", Teuchos::null, "Generating factory for Coordinates"); + validParamList->set>( + "BlockNumber", Teuchos::null, "Generating factory for BlockNUmber"); + + return validParamList; +} + +template +CoalesceDropFactory::CoalesceDropFactory() + : predrop_(Teuchos::null) {} + +template +void CoalesceDropFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "UnAmalgamationInfo"); + + const ParameterList &pL = GetParameterList(); + if (pL.get("lightweight wrap") == true) { std::string algo = pL.get("aggregation: drop scheme"); - const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); - - RCP Coords; - RCP A; - - bool use_block_algorithm=false; - LO interleaved_blocksize = as(pL.get("aggregation: block diagonal: interleaved blocksize")); - bool useSignedClassicalRS = false; - bool useSignedClassicalSA = false; - bool generateColoringGraph = false; - - // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it - // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case - typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); - - - RCP ghostedBlockNumber; - ArrayRCP g_block_id; - - if(algo == "distance laplacian" ) { - // Grab the coordinates for distance laplacian - Coords = Get< RCP >(currentLevel, "Coordinates"); - A = realA; + if (algo == "distance laplacian" || + algo == "block diagonal distance laplacian") { + Input(currentLevel, "Coordinates"); } - else if(algo == "signed classical sa") { - useSignedClassicalSA = true; - algo = "classical"; - A = realA; + if (algo == "signed classical sa") + ; + else if (algo.find("block diagonal") != std::string::npos || + algo.find("signed classical") != std::string::npos) { + Input(currentLevel, "BlockNumber"); } - else if(algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") { - useSignedClassicalRS = true; - // if(realA->GetFixedBlockSize() > 1) { - RCP BlockNumber = Get >(currentLevel, "BlockNumber"); - // Ghost the column block numbers if we need to - RCP importer = realA->getCrsGraph()->getImporter(); - if(!importer.is_null()) { - SubFactoryMonitor m1(*this, "Block Number import", currentLevel); - ghostedBlockNumber= Xpetra::VectorFactory::Build(importer->getTargetMap()); - ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); - } - else { - ghostedBlockNumber = BlockNumber; - } - g_block_id = ghostedBlockNumber->getData(0); - // } - if(algo == "block diagonal colored signed classical") - generateColoringGraph=true; - algo = "classical"; - A = realA; - - } - else if(algo == "block diagonal") { - // Handle the "block diagonal" filtering and then leave - BlockDiagonalize(currentLevel,realA,false); - return; + } +} + +template +void CoalesceDropFactory::Build( + Level ¤tLevel) const { + + FactoryMonitor m(*this, "Build", currentLevel); + + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + typedef Xpetra::MultiVectorFactory + RealValuedMultiVectorFactory; + + if (predrop_ != Teuchos::null) + GetOStream(Parameters0) << predrop_->description(); + + RCP realA = Get>(currentLevel, "A"); + RCP amalInfo = + Get>(currentLevel, "UnAmalgamationInfo"); + const ParameterList &pL = GetParameterList(); + bool doExperimentalWrap = pL.get("lightweight wrap"); + + GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap + << std::endl; + std::string algo = pL.get("aggregation: drop scheme"); + const bool aggregationMayCreateDirichlet = + pL.get("aggregation: dropping may create Dirichlet"); + + RCP Coords; + RCP A; + + bool use_block_algorithm = false; + LO interleaved_blocksize = + as(pL.get("aggregation: block diagonal: interleaved blocksize")); + bool useSignedClassicalRS = false; + bool useSignedClassicalSA = false; + bool generateColoringGraph = false; + + // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice + // (we'll do it in the block diagonalization). So we'll clobber the rowSumTol + // with -1.0 in this case + typename STS::magnitudeType rowSumTol = as( + pL.get("aggregation: row sum drop tol")); + + RCP ghostedBlockNumber; + ArrayRCP g_block_id; + + if (algo == "distance laplacian") { + // Grab the coordinates for distance laplacian + Coords = Get>(currentLevel, "Coordinates"); + A = realA; + } else if (algo == "signed classical sa") { + useSignedClassicalSA = true; + algo = "classical"; + A = realA; + } else if (algo == "signed classical" || + algo == "block diagonal colored signed classical" || + algo == "block diagonal signed classical") { + useSignedClassicalRS = true; + // if(realA->GetFixedBlockSize() > 1) { + RCP BlockNumber = + Get>(currentLevel, "BlockNumber"); + // Ghost the column block numbers if we need to + RCP importer = realA->getCrsGraph()->getImporter(); + if (!importer.is_null()) { + SubFactoryMonitor m1(*this, "Block Number import", currentLevel); + ghostedBlockNumber = Xpetra::VectorFactory::Build( + importer->getTargetMap()); + ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); + } else { + ghostedBlockNumber = BlockNumber; } - else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") { - // Handle the "block diagonal" filtering, and then continue onward - use_block_algorithm = true; - RCP filteredMatrix = BlockDiagonalize(currentLevel,realA,true); - if(algo == "block diagonal distance laplacian") { - // We now need to expand the coordinates by the interleaved blocksize - RCP OldCoords = Get< RCP >(currentLevel, "Coordinates"); - if (OldCoords->getLocalLength() != realA->getLocalNumRows()) { - LO dim = (LO) OldCoords->getNumVectors(); - Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(),dim); - for(LO k=0; k old_vec = OldCoords->getData(k); - ArrayRCP new_vec = Coords->getDataNonConst(k); - for(LO i=0; i <(LO)OldCoords->getLocalLength(); i++) { - LO new_base = i*dim; - for(LO j=0; jgetData(0); + // } + if (algo == "block diagonal colored signed classical") + generateColoringGraph = true; + algo = "classical"; + A = realA; + + } else if (algo == "block diagonal") { + // Handle the "block diagonal" filtering and then leave + BlockDiagonalize(currentLevel, realA, false); + return; + } else if (algo == "block diagonal classical" || + algo == "block diagonal distance laplacian") { + // Handle the "block diagonal" filtering, and then continue onward + use_block_algorithm = true; + RCP filteredMatrix = BlockDiagonalize(currentLevel, realA, true); + if (algo == "block diagonal distance laplacian") { + // We now need to expand the coordinates by the interleaved blocksize + RCP OldCoords = + Get>(currentLevel, "Coordinates"); + if (OldCoords->getLocalLength() != realA->getLocalNumRows()) { + LO dim = (LO)OldCoords->getNumVectors(); + Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim); + for (LO k = 0; k < dim; k++) { + ArrayRCP old_vec = OldCoords->getData(k); + ArrayRCP new_vec = Coords->getDataNonConst(k); + for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) { + LO new_base = i * dim; + for (LO j = 0; j < interleaved_blocksize; j++) + new_vec[new_base + j] = old_vec[i]; } } - else { - Coords = OldCoords; - } - algo = "distance laplacian"; + } else { + Coords = OldCoords; } - else if(algo == "block diagonal classical") { - algo = "classical"; - } - // All cases - A = filteredMatrix; - rowSumTol = -1.0; - } - else { - A = realA; + algo = "distance laplacian"; + } else if (algo == "block diagonal classical") { + algo = "classical"; } + // All cases + A = filteredMatrix; + rowSumTol = -1.0; + } else { + A = realA; + } - // Distance Laplacian weights - Array dlap_weights = pL.get >("aggregation: distance laplacian directional weights"); - enum {NO_WEIGHTS=0, SINGLE_WEIGHTS, BLOCK_WEIGHTS}; - int use_dlap_weights = NO_WEIGHTS; - if(algo == "distance laplacian") { - LO dim = (LO) Coords->getNumVectors(); - // If anything isn't 1.0 we need to turn on the weighting - bool non_unity = false; - for (LO i=0; !non_unity && i<(LO)dlap_weights.size(); i++) { - if(dlap_weights[i] != 1.0) { - non_unity = true; - } + // Distance Laplacian weights + Array dlap_weights = pL.get>( + "aggregation: distance laplacian directional weights"); + enum { NO_WEIGHTS = 0, SINGLE_WEIGHTS, BLOCK_WEIGHTS }; + int use_dlap_weights = NO_WEIGHTS; + if (algo == "distance laplacian") { + LO dim = (LO)Coords->getNumVectors(); + // If anything isn't 1.0 we need to turn on the weighting + bool non_unity = false; + for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) { + if (dlap_weights[i] != 1.0) { + non_unity = true; } - if(non_unity) { - LO blocksize = use_block_algorithm ? as(pL.get("aggregation: block diagonal: interleaved blocksize")) : 1; - if((LO)dlap_weights.size() == dim) - use_dlap_weights = SINGLE_WEIGHTS; - else if((LO)dlap_weights.size() == blocksize * dim) - use_dlap_weights = BLOCK_WEIGHTS; - else { - TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, - "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize"); - } - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Using distance laplacian weights: "<(pL.get( + "aggregation: block diagonal: interleaved blocksize")) + : 1; + if ((LO)dlap_weights.size() == dim) + use_dlap_weights = SINGLE_WEIGHTS; + else if ((LO)dlap_weights.size() == blocksize * dim) + use_dlap_weights = BLOCK_WEIGHTS; + else { + TEUCHOS_TEST_FOR_EXCEPTION( + 1, Exceptions::RuntimeError, + "length of 'aggregation: distance laplacian directional weights' " + "must equal the coordinate dimension OR the coordinate dimension " + "times the blocksize"); } + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) + << "Using distance laplacian weights: " << dlap_weights + << std::endl; } + } - // decide wether to use the fast-track code path for standard maps or the somewhat slower - // code path for non-standard maps - /*bool bNonStandardMaps = false; - if (A->IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A->getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0) - bNonStandardMaps = true; - }*/ - - if (doExperimentalWrap) { - TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm"); - TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)"); - - SC threshold; - // If we're doing the ML-style halving of the drop tol at each level, we do that here. + // decide wether to use the fast-track code path for standard maps or the + // somewhat slower code path for non-standard maps + /*bool bNonStandardMaps = false; + if (A->IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A->getRowMap("stridedMaps"); + Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, + Exceptions::RuntimeError, "Map is not of type StridedMap"); if + (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0) + bNonStandardMaps = true; + }*/ + + if (doExperimentalWrap) { + TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", + Exceptions::RuntimeError, + "Dropping function must not be provided for \"" + << algo << "\" algorithm"); + TEUCHOS_TEST_FOR_EXCEPTION( + algo != "classical" && algo != "distance laplacian" && + algo != "signed classical", + Exceptions::RuntimeError, + "\"algorithm\" must be one of (classical|distance laplacian|signed " + "classical)"); + + SC threshold; + // If we're doing the ML-style halving of the drop tol at each level, we do + // that here. if (pL.get("aggregation: use ml scaling of drop tol")) - threshold = pL.get("aggregation: drop tol") / pow(2.0,currentLevel.GetLevelID()); + threshold = pL.get("aggregation: drop tol") / + pow(2.0, currentLevel.GetLevelID()); else - threshold = as(pL.get("aggregation: drop tol")); - + threshold = as(pL.get("aggregation: drop tol")); - std::string distanceLaplacianAlgoStr = pL.get("aggregation: distance laplacian algo"); - std::string classicalAlgoStr = pL.get("aggregation: classical algo"); - real_type realThreshold = STS::magnitude(threshold);// CMS: Rename this to "magnitude threshold" sometime + std::string distanceLaplacianAlgoStr = + pL.get("aggregation: distance laplacian algo"); + std::string classicalAlgoStr = + pL.get("aggregation: classical algo"); + real_type realThreshold = STS::magnitude( + threshold); // CMS: Rename this to "magnitude threshold" sometime - //////////////////////////////////////////////////// - // Remove this bit once we are confident that cut-based dropping works. + //////////////////////////////////////////////////// + // Remove this bit once we are confident that cut-based dropping works. #ifdef HAVE_MUELU_DEBUG - int distanceLaplacianCutVerbose = 0; + int distanceLaplacianCutVerbose = 0; #endif #ifdef DJS_READ_ENV_VARIABLES - if (getenv("MUELU_DROP_TOLERANCE_MODE")) { - distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE")); - } + if (getenv("MUELU_DROP_TOLERANCE_MODE")) { + distanceLaplacianAlgoStr = + std::string(getenv("MUELU_DROP_TOLERANCE_MODE")); + } - if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) { - auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD")); - realThreshold = 1e-4*tmp; - } + if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) { + auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD")); + realThreshold = 1e-4 * tmp; + } -# ifdef HAVE_MUELU_DEBUG - if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) { - distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE")); - } -# endif +#ifdef HAVE_MUELU_DEBUG + if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) { + distanceLaplacianCutVerbose = + atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE")); + } #endif - //////////////////////////////////////////////////// - - enum decisionAlgoType {defaultAlgo, unscaled_cut, scaled_cut, scaled_cut_symmetric}; - - decisionAlgoType distanceLaplacianAlgo = defaultAlgo; - decisionAlgoType classicalAlgo = defaultAlgo; - if (algo == "distance laplacian") { - if (distanceLaplacianAlgoStr == "default") - distanceLaplacianAlgo = defaultAlgo; - else if (distanceLaplacianAlgoStr == "unscaled cut") - distanceLaplacianAlgo = unscaled_cut; - else if (distanceLaplacianAlgoStr == "scaled cut") - distanceLaplacianAlgo = scaled_cut; - else if (distanceLaplacianAlgoStr == "scaled cut symmetric") - distanceLaplacianAlgo = scaled_cut_symmetric; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\""); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize()<< std::endl; - } else if (algo == "classical") { - if (classicalAlgoStr == "default") - classicalAlgo = defaultAlgo; - else if (classicalAlgoStr == "unscaled cut") - classicalAlgo = unscaled_cut; - else if (classicalAlgoStr == "scaled cut") - classicalAlgo = scaled_cut; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\""); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - - } else - GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - Set(currentLevel, "Filtering", (threshold != STS::zero())); - - const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); - - - // NOTE: We don't support signed classical RS or SA with cut drop at present - TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation"); - TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation"); - - GO numDropped = 0, numTotal = 0; - std::string graphType = "unamalgamated"; //for description purposes only - - - /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. - BlockSize is the number of storage blocks that must kept together during the amalgamation process. - - Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: +#endif + //////////////////////////////////////////////////// - numPDEs = BlockSize * storageblocksize. - - If numPDEs==1 - Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1 - No other values makes sense. + enum decisionAlgoType { + defaultAlgo, + unscaled_cut, + scaled_cut, + scaled_cut_symmetric + }; - If numPDEs>1 - If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs. - If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n. - Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested. - */ - TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,Exceptions::RuntimeError,"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); - const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); + decisionAlgoType distanceLaplacianAlgo = defaultAlgo; + decisionAlgoType classicalAlgo = defaultAlgo; + if (algo == "distance laplacian") { + if (distanceLaplacianAlgoStr == "default") + distanceLaplacianAlgo = defaultAlgo; + else if (distanceLaplacianAlgoStr == "unscaled cut") + distanceLaplacianAlgo = unscaled_cut; + else if (distanceLaplacianAlgoStr == "scaled cut") + distanceLaplacianAlgo = scaled_cut; + else if (distanceLaplacianAlgoStr == "scaled cut symmetric") + distanceLaplacianAlgo = scaled_cut_symmetric; + else + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "\"aggregation: distance laplacian algo\" must be one of " + "(default|unscaled cut|scaled cut), not \"" + << distanceLaplacianAlgoStr << "\""); + GetOStream(Runtime0) << "algorithm = \"" << algo + << "\" distance laplacian algorithm = \"" + << distanceLaplacianAlgoStr + << "\": threshold = " << threshold + << ", blocksize = " << A->GetFixedBlockSize() + << std::endl; + } else if (algo == "classical") { + if (classicalAlgoStr == "default") + classicalAlgo = defaultAlgo; + else if (classicalAlgoStr == "unscaled cut") + classicalAlgo = unscaled_cut; + else if (classicalAlgoStr == "scaled cut") + classicalAlgo = scaled_cut; + else + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "\"aggregation: classical algo\" must be one of (default|unscaled " + "cut|scaled cut), not \"" + << classicalAlgoStr << "\""); + GetOStream(Runtime0) << "algorithm = \"" << algo + << "\" classical algorithm = \"" << classicalAlgoStr + << "\": threshold = " << threshold + << ", blocksize = " << A->GetFixedBlockSize() + << std::endl; + + } else + GetOStream(Runtime0) << "algorithm = \"" << algo + << "\": threshold = " << threshold + << ", blocksize = " << A->GetFixedBlockSize() + << std::endl; + Set(currentLevel, "Filtering", (threshold != STS::zero())); + + const typename STS::magnitudeType dirichletThreshold = STS::magnitude( + as(pL.get("aggregation: Dirichlet threshold"))); + + // NOTE: We don't support signed classical RS or SA with cut drop at present + TEUCHOS_TEST_FOR_EXCEPTION( + useSignedClassicalRS && classicalAlgo != defaultAlgo, + Exceptions::RuntimeError, + "\"aggregation: classical algo\" != default is not supported for " + "scalled classical aggregation"); + TEUCHOS_TEST_FOR_EXCEPTION( + useSignedClassicalSA && classicalAlgo != defaultAlgo, + Exceptions::RuntimeError, + "\"aggregation: classical algo\" != default is not supported for " + "scalled classical sa aggregation"); + GO numDropped = 0, numTotal = 0; + std::string graphType = "unamalgamated"; // for description purposes only + + /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a + block in the chosen storage scheme. BlockSize is the number of storage + blocks that must kept together during the amalgamation process. + + Both of these quantities may be different than numPDEs (from + GetFixedBlockSize()), but the following must always hold: + + numPDEs = BlockSize * storageblocksize. + + If numPDEs==1 + Matrix is point storage (classical CRS storage). storageblocksize=1 and + BlockSize=1 No other values makes sense. + + If numPDEs>1 + If matrix uses point storage, then storageblocksize=1 and + BlockSize=numPDEs. If matrix uses block storage, with block size of n, then + storageblocksize=n, and BlockSize=numPDEs/n. Thus far, only + storageblocksize=numPDEs and BlockSize=1 has been tested. + */ + TEUCHOS_TEST_FOR_EXCEPTION( + A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, + Exceptions::RuntimeError, + "A->GetFixedBlockSize() needs to be a multiple of " + "A->GetStorageBlockSize()"); + const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); + + /************************** RS or SA-style Classical Dropping (and variants) + * **************************/ + if (algo == "classical") { + if (predrop_ == null) { + // ap: this is a hack: had to declare predrop_ as mutable + predrop_ = rcp(new PreDropFunctionConstVal(threshold)); + } - /************************** RS or SA-style Classical Dropping (and variants) **************************/ - if (algo == "classical") { - if (predrop_ == null) { - // ap: this is a hack: had to declare predrop_ as mutable - predrop_ = rcp(new PreDropFunctionConstVal(threshold)); + if (predrop_ != null) { + RCP predropConstVal = + rcp_dynamic_cast(predrop_); + TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, + Exceptions::BadCast, + "MueLu::CoalesceFactory::Build: cast to " + "PreDropFunctionConstVal failed."); + // If a user provided a predrop function, it overwrites the XML + // threshold parameter + SC newt = predropConstVal->GetThreshold(); + if (newt != threshold) { + GetOStream(Warnings0) + << "switching threshold parameter from " << threshold + << " (list) to " << newt << " (user function" << std::endl; + threshold = newt; + } + } + // At this points we either have + // (predrop_ != null) + // Therefore, it is sufficient to check only threshold + if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && + !useSignedClassicalSA && A->hasCrsGraph()) { + // Case 1: scalar problem, no dropping => just use matrix graph + RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); + // Detect and record rows that correspond to Dirichlet boundary + // conditions + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast( + MueLu::Utilities::DetectDirichletRows( + *A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); + + graph->SetBoundaryNodeMap(boundaryNodes); + numTotal = A->getLocalNumEntries(); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " Dirichlet nodes" << std::endl; } - if (predrop_ != null) { - RCP predropConstVal = rcp_dynamic_cast(predrop_); - TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast, - "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed."); - // If a user provided a predrop function, it overwrites the XML threshold parameter - SC newt = predropConstVal->GetThreshold(); - if (newt != threshold) { - GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl; - threshold = newt; + Set(currentLevel, "DofsPerNode", 1); + Set(currentLevel, "Graph", graph); + + } else if ((BlockSize == 1 && threshold != STS::zero()) || + (BlockSize == 1 && threshold == STS::zero() && + !A->hasCrsGraph()) || + (BlockSize == 1 && useSignedClassicalRS) || + (BlockSize == 1 && useSignedClassicalSA)) { + // Case 2: scalar problem with dropping => record the column indices of + // undropped entries, but still use original + // graph's map information, + // e.g., whether index is local + // OR a matrix without a CrsGraph + + // allocate space for the local graph + ArrayRCP rows(A->getLocalNumRows() + 1); + ArrayRCP columns(A->getLocalNumEntries()); + + using MT = typename STS::magnitudeType; + RCP ghostedDiag; + ArrayRCP ghostedDiagVals; + ArrayRCP negMaxOffDiagonal; + // RS style needs the max negative off-diagonal, SA style needs the + // diagonal + if (useSignedClassicalRS) { + if (ghostedBlockNumber.is_null()) { + negMaxOffDiagonal = + MueLu::Utilities::GetMatrixMaxMinusOffDiagonal( + *A); + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) + << "Calculated max point off-diagonal" << std::endl; + } else { + negMaxOffDiagonal = + MueLu::Utilities::GetMatrixMaxMinusOffDiagonal( + *A, *ghostedBlockNumber); + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) + << "Calculating max block off-diagonal" << std::endl; } + } else { + ghostedDiag = + MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); + ghostedDiagVals = ghostedDiag->getData(0); } - // At this points we either have - // (predrop_ != null) - // Therefore, it is sufficient to check only threshold - if ( BlockSize==1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) { - // Case 1: scalar problem, no dropping => just use matrix graph - RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); - // Detect and record rows that correspond to Dirichlet boundary conditions - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast( + MueLu::Utilities::DetectDirichletRows( + *A, dirichletThreshold)); + if (rowSumTol > 0.) { + if (ghostedBlockNumber.is_null()) { + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) + << "Applying point row sum criterion." << std::endl; Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - - graph->SetBoundaryNodeMap(boundaryNodes); - numTotal = A->getLocalNumEntries(); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } - - Set(currentLevel, "DofsPerNode", 1); - Set(currentLevel, "Graph", graph); - - } else if ( (BlockSize == 1 && threshold != STS::zero()) || - (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) || - (BlockSize == 1 && useSignedClassicalRS) || - (BlockSize == 1 && useSignedClassicalSA) ) { - // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original - // graph's map information, e.g., whether index is local - // OR a matrix without a CrsGraph - - // allocate space for the local graph - ArrayRCP rows (A->getLocalNumRows()+1); - ArrayRCP columns(A->getLocalNumEntries()); - - using MT = typename STS::magnitudeType; - RCP ghostedDiag; - ArrayRCP ghostedDiagVals; - ArrayRCP negMaxOffDiagonal; - // RS style needs the max negative off-diagonal, SA style needs the diagonal - if(useSignedClassicalRS) { - if(ghostedBlockNumber.is_null()) { - negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A); - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl; - } - else { - negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A,*ghostedBlockNumber); - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl; - } - } - else { - ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); - ghostedDiagVals = ghostedDiag->getData(0); - } - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) { - if(ghostedBlockNumber.is_null()) { - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl; - Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - } else { - if (GetVerbLevel() & Statistics1) - GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl; - Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes); - } + } else { + if (GetVerbLevel() & Statistics1) + GetOStream(Statistics1) + << "Applying block row sum criterion." << std::endl; + Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, + boundaryNodes); } + } - LO realnnz = 0; - rows[0] = 0; - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - size_t nnz = A->getNumEntriesInLocalRow(row); - bool rowIsDirichlet = boundaryNodes[row]; - ArrayView indices; - ArrayView vals; - A->getLocalRowView(row, indices, vals); - - if(classicalAlgo == defaultAlgo) { - //FIXME the current predrop function uses the following - //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid ) - //FIXME but the threshold doesn't take into account the rows' diagonal entries - //FIXME For now, hardwiring the dropping in here - - LO rownnz = 0; - if(useSignedClassicalRS) { - // Signed classical RS style - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; - MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]); - MT neg_aij = - STS::real(vals[colID]); - /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID], - g_block_id.is_null() ? -1 : g_block_id[row], - g_block_id.is_null() ? -1 : g_block_id[col], - neg_aij, max_neg_aik);*/ - if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows[row+1] = realnnz; + LO realnnz = 0; + rows[0] = 0; + for (LO row = 0; + row < Teuchos::as(A->getRowMap()->getLocalNumElements()); + ++row) { + size_t nnz = A->getNumEntriesInLocalRow(row); + bool rowIsDirichlet = boundaryNodes[row]; + ArrayView indices; + ArrayView vals; + A->getLocalRowView(row, indices, vals); + + if (classicalAlgo == defaultAlgo) { + // FIXME the current predrop function uses the following + // FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == + // gcid ) + // FIXME but the threshold doesn't take into account the rows' + // diagonal entries + // FIXME For now, hardwiring the dropping in here + + LO rownnz = 0; + if (useSignedClassicalRS) { + // Signed classical RS style + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; + MT max_neg_aik = + realThreshold * STS::real(negMaxOffDiagonal[row]); + MT neg_aij = -STS::real(vals[colID]); + /* if(row==1326) printf("A(%d,%d) = %6.4e, + block = (%d,%d) neg_aij = %6.4e max_neg_aik = + %6.4e\n",row,col,vals[colID], g_block_id.is_null() ? -1 : + g_block_id[row], g_block_id.is_null() ? -1 : g_block_id[col], + neg_aij, max_neg_aik);*/ + if ((!rowIsDirichlet && + (g_block_id.is_null() || + g_block_id[row] == g_block_id[col]) && + neg_aij > max_neg_aik) || + row == col) { + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; } - else if(useSignedClassicalSA) { - // Signed classical SA style - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; + rows[row + 1] = realnnz; + } else if (useSignedClassicalSA) { + // Signed classical SA style + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; - bool is_nonpositive = STS::real(vals[colID]) <= 0; - MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| - MT aij = is_nonpositive ? STS::magnitude(vals[colID]*vals[colID]) : (-STS::magnitude(vals[colID]*vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0 - /* - if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID], - vals[colID],aij, aiiajj); - */ - - if ((!rowIsDirichlet && aij > aiiajj) || row == col) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows[row+1] = realnnz; + bool is_nonpositive = STS::real(vals[colID]) <= 0; + MT aiiajj = STS::magnitude( + threshold * threshold * ghostedDiagVals[col] * + ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| + MT aij = is_nonpositive + ? STS::magnitude(vals[colID] * vals[colID]) + : (-STS::magnitude( + vals[colID] * + vals[colID])); // + |a_ij|^2, if a_ij < 0, - + // |a_ij|^2 if a_ij >=0 + /* + if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = + %6.4e aiiajj = %6.4e\n",row,col,vals[colID], vals[colID],aij, + aiiajj); + */ + + if ((!rowIsDirichlet && aij > aiiajj) || row == col) { + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; } - else { - // Standard abs classical - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { - LO col = indices[colID]; - MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| - MT aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2 - - if ((!rowIsDirichlet && aij > aiiajj) || row == col) { + rows[row + 1] = realnnz; + } else { + // Standard abs classical + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; + MT aiiajj = STS::magnitude( + threshold * threshold * ghostedDiagVals[col] * + ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| + MT aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2 + + if ((!rowIsDirichlet && aij > aiiajj) || row == col) { columns[realnnz++] = col; rownnz++; - } else - numDropped++; - } - rows[row+1] = realnnz; + } else + numDropped++; } + rows[row + 1] = realnnz; } - else { - /* Cut Algorithm */ - //CMS - using DropTol = Details::DropTol; - std::vector drop_vec; - drop_vec.reserve(nnz); - const real_type zero = Teuchos::ScalarTraits::zero(); - const real_type one = Teuchos::ScalarTraits::one(); - LO rownnz = 0; - // NOTE: This probably needs to be fixed for rowsum - - // find magnitudes - for (LO colID = 0; colID < (LO)nnz; colID++) { - LO col = indices[colID]; - if (row == col) { - drop_vec.emplace_back( zero, one, colID, false); - continue; - } - - // Don't aggregate boundaries - if(boundaryNodes[colID]) continue; - typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| - typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2 - drop_vec.emplace_back(aij, aiiajj, colID, false); + } else { + /* Cut Algorithm */ + // CMS + using DropTol = Details::DropTol; + std::vector drop_vec; + drop_vec.reserve(nnz); + const real_type zero = Teuchos::ScalarTraits::zero(); + const real_type one = Teuchos::ScalarTraits::one(); + LO rownnz = 0; + // NOTE: This probably needs to be fixed for rowsum + + // find magnitudes + for (LO colID = 0; colID < (LO)nnz; colID++) { + LO col = indices[colID]; + if (row == col) { + drop_vec.emplace_back(zero, one, colID, false); + continue; } - const size_t n = drop_vec.size(); - - if (classicalAlgo == unscaled_cut) { - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val > b.val; - }); - - bool drop = false; - for (size_t i=1; i realThreshold*b) { - drop = true; + // Don't aggregate boundaries + if (boundaryNodes[colID]) + continue; + typename STS::magnitudeType aiiajj = + STS::magnitude(threshold * threshold * ghostedDiagVals[col] * + ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| + typename STS::magnitudeType aij = + STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2 + drop_vec.emplace_back(aij, aiiajj, colID, false); + } + + const size_t n = drop_vec.size(); + + if (classicalAlgo == unscaled_cut) { + std::sort(drop_vec.begin(), drop_vec.end(), + [](DropTol const &a, DropTol const &b) { + return a.val > b.val; + }); + + bool drop = false; + for (size_t i = 1; i < n; ++i) { + if (!drop) { + auto const &x = drop_vec[i - 1]; + auto const &y = drop_vec[i]; + auto a = x.val; + auto b = y.val; + if (a > realThreshold * b) { + drop = true; #ifdef HAVE_MUELU_DEBUG - if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; - } -#endif + if (distanceLaplacianCutVerbose) { + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n + << ", " << row << std::endl; } +#endif } - drop_vec[i].drop = drop; } - } else if (classicalAlgo == scaled_cut) { - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val/a.diag > b.val/b.diag; - }); - bool drop = false; - // printf("[%d] Scaled Cut: ",(int)row); - // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep"); - for (size_t i=1; i realThreshold*b) { - drop = true; + drop_vec[i].drop = drop; + } + } else if (classicalAlgo == scaled_cut) { + std::sort(drop_vec.begin(), drop_vec.end(), + [](DropTol const &a, DropTol const &b) { + return a.val / a.diag > b.val / b.diag; + }); + bool drop = false; + // printf("[%d] Scaled Cut: ",(int)row); + // printf("%3d(%4s) + // ",indices[drop_vec[0].col],"keep"); + for (size_t i = 1; i < n; ++i) { + if (!drop) { + auto const &x = drop_vec[i - 1]; + auto const &y = drop_vec[i]; + auto a = x.val / x.diag; + auto b = y.val / y.diag; + if (a > realThreshold * b) { + drop = true; #ifdef HAVE_MUELU_DEBUG - if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; - } -#endif - } - // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep"); - + if (distanceLaplacianCutVerbose) { + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n + << ", " << row << std::endl; } - drop_vec[i].drop = drop; +#endif } - // printf("\n"); - } - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.col < b.col; - } - ); - - for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) { - LO col = indices[drop_vec[idxID].col]; - // don't drop diagonal - if (row == col) { - columns[realnnz++] = col; - rownnz++; - continue; - } - - if (!drop_vec[idxID].drop) { - columns[realnnz++] = col; - rownnz++; - } else { - numDropped++; + // printf("%3d(%4s) + // ",indices[drop_vec[i].col],drop?"drop":"keep"); } + drop_vec[i].drop = drop; } - // CMS - rows[row+1] = realnnz; - + // printf("\n"); } - }//end for row - - columns.resize(realnnz); - numTotal = A->getLocalNumEntries(); + std::sort(drop_vec.begin(), drop_vec.end(), + [](DropTol const &a, DropTol const &b) { + return a.col < b.col; + }); + + for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) { + LO col = indices[drop_vec[idxID].col]; + // don't drop diagonal + if (row == col) { + columns[realnnz++] = col; + rownnz++; + continue; + } - if (aggregationMayCreateDirichlet) { - // If the only element remaining after filtering is diagonal, mark node as boundary - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - if (rows[row+1]- rows[row] <= 1) - boundaryNodes[row] = true; + if (!drop_vec[idxID].drop) { + columns[realnnz++] = col; + rownnz++; + } else { + numDropped++; + } } + // CMS + rows[row + 1] = realnnz; } - - RCP graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A")); - graph->SetBoundaryNodeMap(boundaryNodes); - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + } // end for row + + columns.resize(realnnz); + numTotal = A->getLocalNumEntries(); + + if (aggregationMayCreateDirichlet) { + // If the only element remaining after filtering is diagonal, mark + // node as boundary + for (LO row = 0; + row < Teuchos::as(A->getRowMap()->getLocalNumElements()); + ++row) { + if (rows[row + 1] - rows[row] <= 1) + boundaryNodes[row] = true; } - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", 1); - - // If we're doing signed classical, we might want to block-diagonalize *after* the dropping - if(generateColoringGraph) { - RCP colorGraph; - RCP importer = A->getCrsGraph()->getImporter(); - BlockDiagonalizeGraph(graph,ghostedBlockNumber,colorGraph,importer); - Set(currentLevel, "Coloring Graph",colorGraph); - // #define CMS_DUMP -#ifdef CMS_DUMP - { - Xpetra::IO::Write("m_regular_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast(graph)->GetCrsGraph()); - Xpetra::IO::Write("m_color_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast(colorGraph)->GetCrsGraph()); - // int rank = graph->GetDomainMap()->getComm()->getRank(); - // { - // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out); - // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); - // colorGraph->print(*fancy,Debug); - // } - // { - // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out); - // RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); - // graph->print(*fancy,Debug); - // } - - } -#endif - }//end generateColoringGraph - } else if (BlockSize > 1 && threshold == STS::zero()) { - // Case 3: Multiple DOF/node problem without dropping - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); - - graphType = "amalgamated"; + } - // build node row map (uniqueMap) and node column map (nonUniqueMap) - // the arrays rowTranslation and colTranslation contain the local node id - // given a local dof id. The data is calculated by the AmalgamationFactory and - // stored in the variable container "UnAmalgamationInfo" - RCP uniqueMap = amalInfo->getNodeRowMap(); - RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslation = *(amalInfo->getRowTranslation()); - Array colTranslation = *(amalInfo->getColTranslation()); - - // get number of local nodes - LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - - // Allocate space for the local graph - ArrayRCP rows = ArrayRCP(numRows+1); - ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); - - const ArrayRCP amalgBoundaryNodes(numRows, false); - - // Detect and record rows that correspond to Dirichlet boundary conditions - // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size - // TODO the array one bigger than the number of local rows, and the last entry can - // TODO hold the actual number of boundary nodes. Clever, huh? - ArrayRCP pointBoundaryNodes; - pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - - - // extract striding information - LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map) - LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A->IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A->getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + RCP graph = + rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), + "thresholded graph of A")); + graph->SetBoundaryNodeMap(boundaryNodes); + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " Dirichlet nodes" << std::endl; + } + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", 1); + + // If we're doing signed classical, we might want to block-diagonalize + // *after* the dropping + if (generateColoringGraph) { + RCP colorGraph; + RCP importer = A->getCrsGraph()->getImporter(); + BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, + importer); + Set(currentLevel, "Coloring Graph", colorGraph); + // #define CMS_DUMP +#ifdef CMS_DUMP + { + Xpetra::IO::Write( + "m_regular_graph." + std::to_string(currentLevel.GetLevelID()), + *rcp_dynamic_cast(graph)->GetCrsGraph()); + Xpetra::IO::Write( + "m_color_graph." + std::to_string(currentLevel.GetLevelID()), + *rcp_dynamic_cast(colorGraph)->GetCrsGraph()); + // int rank = graph->GetDomainMap()->getComm()->getRank(); + // { + // std::ofstream ofs(std::string("m_color_graph_") + + // std::to_string(currentLevel.GetLevelID())+std::string("_") + + // std::to_string(rank) + std::string(".dat"),std::ofstream::out); + // RCP fancy = + // Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); + // colorGraph->print(*fancy,Debug); + // } + // { + // std::ofstream ofs(std::string("m_regular_graph_") + + // std::to_string(currentLevel.GetLevelID())+std::string("_") + + // std::to_string(rank) + std::string(".dat"),std::ofstream::out); + // RCP fancy = + // Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs)); + // graph->print(*fancy,Debug); + // } } +#endif + } // end generateColoringGraph + } else if (BlockSize > 1 && threshold == STS::zero()) { + // Case 3: Multiple DOF/node problem without dropping + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); + + graphType = "amalgamated"; + + // build node row map (uniqueMap) and node column map (nonUniqueMap) + // the arrays rowTranslation and colTranslation contain the local node + // id given a local dof id. The data is calculated by the + // AmalgamationFactory and stored in the variable container + // "UnAmalgamationInfo" + RCP uniqueMap = amalInfo->getNodeRowMap(); + RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslation = *(amalInfo->getRowTranslation()); + Array colTranslation = *(amalInfo->getColTranslation()); + + // get number of local nodes + LO numRows = + Teuchos::as(uniqueMap->getLocalNumElements()); + + // Allocate space for the local graph + ArrayRCP rows = ArrayRCP(numRows + 1); + ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); + + const ArrayRCP amalgBoundaryNodes(numRows, false); + + // Detect and record rows that correspond to Dirichlet boundary + // conditions + // TODO If we use ArrayRCP, then we can record boundary nodes as + // usual. Size + // TODO the array one bigger than the number of local rows, and the last + // entry can + // TODO hold the actual number of boundary nodes. Clever, huh? + ArrayRCP pointBoundaryNodes; + pointBoundaryNodes = Teuchos::arcp_const_cast( + MueLu::Utilities::DetectDirichletRows( + *A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - // loop over all local nodes - LO realnnz = 0; - rows[0] = 0; - Array indicesExtra; - for (LO row = 0; row < numRows; row++) { - ArrayView indices; - indicesExtra.resize(0); + // extract striding information + LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of + // dofs per node in strided map) + LO blkId = -1; //< the block id within the strided map (or -1 if it is a + // full block map) + LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the + // block within the strided map + if (A->IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A->getRowMap("stridedMaps"); + Teuchos::RCP strMap = + Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, + "Map is not of type StridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + } - // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet - // Note, that pointBoundaryNodes lives on the dofmap (and not the node map). - // Therefore, looping over all dofs is fine here. We use blkPartSize as we work - // with local ids. - // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet) - // node. - bool isBoundary = false; - if (pL.get("aggregation: greedy Dirichlet") == true) { - for (LO j = 0; j < blkPartSize; j++) { - if (pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = true; - break; - } - } - } else { - isBoundary = true; - for (LO j = 0; j < blkPartSize; j++) { - if (!pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = false; - break; - } + // loop over all local nodes + LO realnnz = 0; + rows[0] = 0; + Array indicesExtra; + for (LO row = 0; row < numRows; row++) { + ArrayView indices; + indicesExtra.resize(0); + + // The amalgamated row is marked as Dirichlet iff all point rows are + // Dirichlet Note, that pointBoundaryNodes lives on the dofmap (and + // not the node map). Therefore, looping over all dofs is fine here. + // We use blkPartSize as we work with local ids. + // TODO: Here we have different options of how to define a node to be + // a boundary (or Dirichlet) node. + bool isBoundary = false; + if (pL.get("aggregation: greedy Dirichlet") == true) { + for (LO j = 0; j < blkPartSize; j++) { + if (pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = true; + break; } } - - // Merge rows of A - // The array indicesExtra contains local column node ids for the current local node "row" - if (!isBoundary) - MergeRows(*A, row, indicesExtra, colTranslation); - else - indicesExtra.push_back(row); - indices = indicesExtra; - numTotal += indices.size(); - - // add the local column node ids to the full columns array which - // contains the local column node ids for all local node rows - LO nnz = indices.size(), rownnz = 0; - for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; - columns[realnnz++] = col; - rownnz++; - } - - if (rownnz == 1) { - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - amalgBoundaryNodes[row] = true; + } else { + isBoundary = true; + for (LO j = 0; j < blkPartSize; j++) { + if (!pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = false; + break; + } } - rows[row+1] = realnnz; - } //for (LO row = 0; row < numRows; row++) - columns.resize(realnnz); - - RCP graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - graph->SetBoundaryNodeMap(amalgBoundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; - - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes - << " agglomerated Dirichlet nodes" << std::endl; } - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", blkSize); // full block size - - } else if (BlockSize > 1 && threshold != STS::zero()) { - // Case 4: Multiple DOF/node problem with dropping - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); - graphType = "amalgamated"; - - // build node row map (uniqueMap) and node column map (nonUniqueMap) - // the arrays rowTranslation and colTranslation contain the local node id - // given a local dof id. The data is calculated by the AmalgamationFactory and - // stored in the variable container "UnAmalgamationInfo" - RCP uniqueMap = amalInfo->getNodeRowMap(); - RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslation = *(amalInfo->getRowTranslation()); - Array colTranslation = *(amalInfo->getColTranslation()); - - // get number of local nodes - LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); - - // Allocate space for the local graph - ArrayRCP rows = ArrayRCP(numRows+1); - ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); - - const ArrayRCP amalgBoundaryNodes(numRows, false); - - // Detect and record rows that correspond to Dirichlet boundary conditions - // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size - // TODO the array one bigger than the number of local rows, and the last entry can - // TODO hold the actual number of boundary nodes. Clever, huh? - ArrayRCP pointBoundaryNodes; - pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - - - // extract striding information - LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map) - LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A->IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A->getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + // Merge rows of A + // The array indicesExtra contains local column node ids for the + // current local node "row" + if (!isBoundary) + MergeRows(*A, row, indicesExtra, colTranslation); + else + indicesExtra.push_back(row); + indices = indicesExtra; + numTotal += indices.size(); + + // add the local column node ids to the full columns array which + // contains the local column node ids for all local node rows + LO nnz = indices.size(), rownnz = 0; + for (LO colID = 0; colID < nnz; colID++) { + LO col = indices[colID]; + columns[realnnz++] = col; + rownnz++; } - // extract diagonal data for dropping strategy - RCP ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); - const ArrayRCP ghostedDiagVals = ghostedDiag->getData(0); + if (rownnz == 1) { + // If the only element remaining after filtering is diagonal, mark + // node as boundary + // FIXME: this should really be replaced by the following + // if (indices.size() == 1 && indices[0] == row) + // boundaryNodes[row] = true; + // We do not do it this way now because there is no framework for + // distinguishing isolated and boundary nodes in the aggregation + // algorithms + amalgBoundaryNodes[row] = true; + } + rows[row + 1] = realnnz; + } // for (LO row = 0; row < numRows; row++) + columns.resize(realnnz); + + RCP graph = rcp(new LWGraph( + rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); + graph->SetBoundaryNodeMap(amalgBoundaryNodes); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; + + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) + << "Detected " << numGlobalBoundaryNodes + << " agglomerated Dirichlet nodes" << std::endl; + } - // loop over all local nodes - LO realnnz = 0; - rows[0] = 0; - Array indicesExtra; - for (LO row = 0; row < numRows; row++) { - ArrayView indices; - indicesExtra.resize(0); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); // full block size + + } else if (BlockSize > 1 && threshold != STS::zero()) { + // Case 4: Multiple DOF/node problem with dropping + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); + graphType = "amalgamated"; + + // build node row map (uniqueMap) and node column map (nonUniqueMap) + // the arrays rowTranslation and colTranslation contain the local node + // id given a local dof id. The data is calculated by the + // AmalgamationFactory and stored in the variable container + // "UnAmalgamationInfo" + RCP uniqueMap = amalInfo->getNodeRowMap(); + RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslation = *(amalInfo->getRowTranslation()); + Array colTranslation = *(amalInfo->getColTranslation()); + + // get number of local nodes + LO numRows = + Teuchos::as(uniqueMap->getLocalNumElements()); + + // Allocate space for the local graph + ArrayRCP rows = ArrayRCP(numRows + 1); + ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); + + const ArrayRCP amalgBoundaryNodes(numRows, false); + + // Detect and record rows that correspond to Dirichlet boundary + // conditions + // TODO If we use ArrayRCP, then we can record boundary nodes as + // usual. Size + // TODO the array one bigger than the number of local rows, and the last + // entry can + // TODO hold the actual number of boundary nodes. Clever, huh? + ArrayRCP pointBoundaryNodes; + pointBoundaryNodes = Teuchos::arcp_const_cast( + MueLu::Utilities::DetectDirichletRows( + *A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet - // Note, that pointBoundaryNodes lives on the dofmap (and not the node map). - // Therefore, looping over all dofs is fine here. We use blkPartSize as we work - // with local ids. - // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet) - // node. - bool isBoundary = false; - if (pL.get("aggregation: greedy Dirichlet") == true) { - for (LO j = 0; j < blkPartSize; j++) { - if (pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = true; - break; - } - } - } else { - isBoundary = true; - for (LO j = 0; j < blkPartSize; j++) { - if (!pointBoundaryNodes[row*blkPartSize+j]) { - isBoundary = false; - break; - } - } - } + // extract striding information + LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of + // dofs per node in strided map) + LO blkId = -1; //< the block id within the strided map (or -1 if it is a + // full block map) + LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the + // block within the strided map + if (A->IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A->getRowMap("stridedMaps"); + Teuchos::RCP strMap = + Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, + "Map is not of type StridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + } - // Merge rows of A - // The array indicesExtra contains local column node ids for the current local node "row" - if (!isBoundary) - MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation); - else - indicesExtra.push_back(row); - indices = indicesExtra; - numTotal += indices.size(); + // extract diagonal data for dropping strategy + RCP ghostedDiag = + MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); + const ArrayRCP ghostedDiagVals = ghostedDiag->getData(0); - // add the local column node ids to the full columns array which - // contains the local column node ids for all local node rows - LO nnz = indices.size(), rownnz = 0; - for (LO colID = 0; colID < nnz; colID++) { - LO col = indices[colID]; - columns[realnnz++] = col; - rownnz++; + // loop over all local nodes + LO realnnz = 0; + rows[0] = 0; + Array indicesExtra; + for (LO row = 0; row < numRows; row++) { + ArrayView indices; + indicesExtra.resize(0); + + // The amalgamated row is marked as Dirichlet iff all point rows are + // Dirichlet Note, that pointBoundaryNodes lives on the dofmap (and + // not the node map). Therefore, looping over all dofs is fine here. + // We use blkPartSize as we work with local ids. + // TODO: Here we have different options of how to define a node to be + // a boundary (or Dirichlet) node. + bool isBoundary = false; + if (pL.get("aggregation: greedy Dirichlet") == true) { + for (LO j = 0; j < blkPartSize; j++) { + if (pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = true; + break; + } } - - if (rownnz == 1) { - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - amalgBoundaryNodes[row] = true; + } else { + isBoundary = true; + for (LO j = 0; j < blkPartSize; j++) { + if (!pointBoundaryNodes[row * blkPartSize + j]) { + isBoundary = false; + break; + } } - rows[row+1] = realnnz; - } //for (LO row = 0; row < numRows; row++) - columns.resize(realnnz); - - RCP graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - graph->SetBoundaryNodeMap(amalgBoundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; + } - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes - << " agglomerated Dirichlet nodes" << std::endl; + // Merge rows of A + // The array indicesExtra contains local column node ids for the + // current local node "row" + if (!isBoundary) + MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, + indicesExtra, colTranslation); + else + indicesExtra.push_back(row); + indices = indicesExtra; + numTotal += indices.size(); + + // add the local column node ids to the full columns array which + // contains the local column node ids for all local node rows + LO nnz = indices.size(), rownnz = 0; + for (LO colID = 0; colID < nnz; colID++) { + LO col = indices[colID]; + columns[realnnz++] = col; + rownnz++; } - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", blkSize); // full block size + if (rownnz == 1) { + // If the only element remaining after filtering is diagonal, mark + // node as boundary + // FIXME: this should really be replaced by the following + // if (indices.size() == 1 && indices[0] == row) + // boundaryNodes[row] = true; + // We do not do it this way now because there is no framework for + // distinguishing isolated and boundary nodes in the aggregation + // algorithms + amalgBoundaryNodes[row] = true; + } + rows[row + 1] = realnnz; + } // for (LO row = 0; row < numRows; row++) + columns.resize(realnnz); + + RCP graph = rcp(new LWGraph( + rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); + graph->SetBoundaryNodeMap(amalgBoundaryNodes); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; + + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) + << "Detected " << numGlobalBoundaryNodes + << " agglomerated Dirichlet nodes" << std::endl; } - } else if (algo == "distance laplacian") { - LO blkSize = A->GetFixedBlockSize(); - GO indexBase = A->getRowMap()->getIndexBase(); - // [*0*] : FIXME - // ap: somehow, if I move this line to [*1*], Belos throws an error - // I'm not sure what's going on. Do we always have to Get data, if we did - // DeclareInput for it? - // RCP Coords = Get< RCP >(currentLevel, "Coordinates"); - - // Detect and record rows that correspond to Dirichlet boundary conditions - // TODO If we use ArrayRCP, then we can record boundary nodes as usual. Size - // TODO the array one bigger than the number of local rows, and the last entry can - // TODO hold the actual number of boundary nodes. Clever, huh? - ArrayRCP pointBoundaryNodes; - pointBoundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); - - if ( (blkSize == 1) && (threshold == STS::zero()) ) { - // Trivial case: scalar problem, no dropping. Can return original graph - RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); - graph->SetBoundaryNodeMap(pointBoundaryNodes); - graphType="unamalgamated"; - numTotal = A->getLocalNumEntries(); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < pointBoundaryNodes.size(); ++i) - if (pointBoundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); // full block size + } - Set(currentLevel, "DofsPerNode", blkSize); - Set(currentLevel, "Graph", graph); + } else if (algo == "distance laplacian") { + LO blkSize = A->GetFixedBlockSize(); + GO indexBase = A->getRowMap()->getIndexBase(); + // [*0*] : FIXME + // ap: somehow, if I move this line to [*1*], Belos throws an error + // I'm not sure what's going on. Do we always have to Get data, if we did + // DeclareInput for it? + // RCP Coords = Get< + // RCP >(currentLevel, "Coordinates"); - } else { - // ap: We make quite a few assumptions here; general case may be a lot different, - // but much much harder to implement. We assume that: - // 1) all maps are standard maps, not strided maps - // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic - // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i - // - // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo, - // but as I totally don't understand that code, here is my solution - - // [*1*]: see [*0*] - - // Check that the number of local coordinates is consistent with the #rows in A - TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements()/blkSize != Coords->getLocalLength(), Exceptions::Incompatible, - "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size ("<< blkSize <<")."); - - const RCP colMap = A->getColMap(); - RCP uniqueMap, nonUniqueMap; - Array colTranslation; - if (blkSize == 1) { - uniqueMap = A->getRowMap(); - nonUniqueMap = A->getColMap(); - graphType="unamalgamated"; + // Detect and record rows that correspond to Dirichlet boundary conditions + // TODO If we use ArrayRCP, then we can record boundary nodes as + // usual. Size + // TODO the array one bigger than the number of local rows, and the last + // entry can + // TODO hold the actual number of boundary nodes. Clever, huh? + ArrayRCP pointBoundaryNodes; + pointBoundaryNodes = Teuchos::arcp_const_cast( + MueLu::Utilities::DetectDirichletRows( + *A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes); + + if ((blkSize == 1) && (threshold == STS::zero())) { + // Trivial case: scalar problem, no dropping. Can return original graph + RCP graph = rcp(new Graph(A->getCrsGraph(), "graph of A")); + graph->SetBoundaryNodeMap(pointBoundaryNodes); + graphType = "unamalgamated"; + numTotal = A->getLocalNumEntries(); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < pointBoundaryNodes.size(); ++i) + if (pointBoundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " Dirichlet nodes" << std::endl; + } - } else { - uniqueMap = Coords->getMap(); - TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible, - "Different index bases for matrix and coordinates"); + Set(currentLevel, "DofsPerNode", blkSize); + Set(currentLevel, "Graph", graph); + + } else { + // ap: We make quite a few assumptions here; general case may be a lot + // different, but much much harder to implement. We assume that: + // 1) all maps are standard maps, not strided maps + // 2) global indices of dofs in A are related to dofs in coordinates in + // a simple arithmetic + // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) + // correspond to node i + // + // NOTE: Potentially, some of the code below could be simplified with + // UnAmalgamationInfo, but as I totally don't understand that code, here + // is my solution + + // [*1*]: see [*0*] + + // Check that the number of local coordinates is consistent with the + // #rows in A + TEUCHOS_TEST_FOR_EXCEPTION( + A->getRowMap()->getLocalNumElements() / blkSize != + Coords->getLocalLength(), + Exceptions::Incompatible, + "Coordinate vector length (" + << Coords->getLocalLength() + << ") is incompatible with number of rows in A (" + << A->getRowMap()->getLocalNumElements() + << ") by modulo block size (" << blkSize << ")."); + + const RCP colMap = A->getColMap(); + RCP uniqueMap, nonUniqueMap; + Array colTranslation; + if (blkSize == 1) { + uniqueMap = A->getRowMap(); + nonUniqueMap = A->getColMap(); + graphType = "unamalgamated"; - AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation); + } else { + uniqueMap = Coords->getMap(); + TEUCHOS_TEST_FOR_EXCEPTION( + uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible, + "Different index bases for matrix and coordinates"); - graphType = "amalgamated"; - } - LO numRows = Teuchos::as(uniqueMap->getLocalNumElements()); + AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, + nonUniqueMap, colTranslation); - RCP ghostedCoords; - RCP ghostedLaplDiag; - Teuchos::ArrayRCP ghostedLaplDiagData; - if (threshold != STS::zero()) { - // Get ghost coordinates - RCP importer; - { - SubFactoryMonitor m1(*this, "Import construction", currentLevel); - if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) { - GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl; - importer = realA->getCrsGraph()->getImporter(); - } else { - GetOStream(Warnings0) << "Constructing new importer instance" << std::endl; - importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - } - } //subtimer - ghostedCoords = Xpetra::MultiVectorFactory::Build(nonUniqueMap, Coords->getNumVectors()); - { + graphType = "amalgamated"; + } + LO numRows = + Teuchos::as(uniqueMap->getLocalNumElements()); + + RCP ghostedCoords; + RCP ghostedLaplDiag; + Teuchos::ArrayRCP ghostedLaplDiagData; + if (threshold != STS::zero()) { + // Get ghost coordinates + RCP importer; + { + SubFactoryMonitor m1(*this, "Import construction", currentLevel); + if (blkSize == 1 && + realA->getCrsGraph()->getImporter() != Teuchos::null) { + GetOStream(Warnings1) + << "Using existing importer from matrix graph" << std::endl; + importer = realA->getCrsGraph()->getImporter(); + } else { + GetOStream(Warnings0) + << "Constructing new importer instance" << std::endl; + importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + } + } // subtimer + ghostedCoords = + Xpetra::MultiVectorFactory::Build( + nonUniqueMap, Coords->getNumVectors()); + { SubFactoryMonitor m1(*this, "Coordinate import", currentLevel); ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT); - } //subtimer + } // subtimer - // Construct Distance Laplacian diagonal - RCP localLaplDiag = VectorFactory::Build(uniqueMap); - Array indicesExtra; - Teuchos::Array> coordData; - if (threshold != STS::zero()) { - const size_t numVectors = ghostedCoords->getNumVectors(); - coordData.reserve(numVectors); - for (size_t j = 0; j < numVectors; j++) { - Teuchos::ArrayRCP tmpData=ghostedCoords->getData(j); - coordData.push_back(tmpData); - } + // Construct Distance Laplacian diagonal + RCP localLaplDiag = VectorFactory::Build(uniqueMap); + Array indicesExtra; + Teuchos::Array> coordData; + if (threshold != STS::zero()) { + const size_t numVectors = ghostedCoords->getNumVectors(); + coordData.reserve(numVectors); + for (size_t j = 0; j < numVectors; j++) { + Teuchos::ArrayRCP tmpData = + ghostedCoords->getData(j); + coordData.push_back(tmpData); } - { - SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel); + } + { + SubFactoryMonitor m1(*this, "Laplacian local diagonal", + currentLevel); ArrayRCP localLaplDiagData = localLaplDiag->getDataNonConst(0); for (LO row = 0; row < numRows; row++) { ArrayView indices; @@ -1150,90 +1362,114 @@ namespace MueLu { const LO col = indices[colID]; if (row != col) { - if(use_dlap_weights == SINGLE_WEIGHTS) { - /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col, - MueLu::Utilities::Distance2(coordData, row, col), - MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col));*/ - localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col); - } - else if(use_dlap_weights == BLOCK_WEIGHTS) { + if (use_dlap_weights == SINGLE_WEIGHTS) { + /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted + Distance = %6.4e\n",row,col, + MueLu::Utilities::Distance2(coordData, + row, col), + MueLu::Utilities::Distance2(dlap_weights(),coordData, + row, col));*/ + localLaplDiagData[row] += + STS::one() / + MueLu::Utilities::Distance2( + dlap_weights(), coordData, row, col); + } else if (use_dlap_weights == BLOCK_WEIGHTS) { int block_id = row % interleaved_blocksize; int block_start = block_id * interleaved_blocksize; - localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col); - } - else { - // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities::Distance2(coordData, row, col)); - localLaplDiagData[row] += STS::one() / MueLu::Utilities::Distance2(coordData, row, col); + localLaplDiagData[row] += + STS::one() / + MueLu::Utilities::Distance2( + dlap_weights(block_start, interleaved_blocksize), + coordData, row, col); + } else { + // printf("[%d,%d] Unweighted Distance = + // %6.4e\n",row,col,MueLu::Utilities::Distance2(coordData, + // row, col)); + localLaplDiagData[row] += + STS::one() / + MueLu::Utilities::Distance2( + coordData, row, col); } haveAddedToDiag = true; } } - // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns. - // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs. + // Deal with the situation where boundary conditions have only + // been enforced on rows, but not on columns. We enforce dropping + // of these entries by assigning a very large number to the + // diagonal entries corresponding to BCs. if (!haveAddedToDiag) localLaplDiagData[row] = STS::rmax(); } - } //subtimer - { - SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel); + } // subtimer + { + SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", + currentLevel); ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); - ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); + ghostedLaplDiag->doImport(*localLaplDiag, *importer, + Xpetra::INSERT); ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0); - } //subtimer + } // subtimer - } else { - GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl; - } + } else { + GetOStream(Runtime0) + << "Skipping distance laplacian construction due to 0 threshold" + << std::endl; + } - // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian + // NOTE: ghostedLaplDiagData might be zero if we don't actually + // calculate the laplacian - // allocate space for the local graph - ArrayRCP rows = ArrayRCP(numRows+1); - ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); + // allocate space for the local graph + ArrayRCP rows = ArrayRCP(numRows + 1); + ArrayRCP columns = ArrayRCP(A->getLocalNumEntries()); #ifdef HAVE_MUELU_DEBUG - // DEBUGGING - for(LO i=0; i<(LO)columns.size(); i++) columns[i]=-666; + // DEBUGGING + for (LO i = 0; i < (LO)columns.size(); i++) + columns[i] = -666; #endif - // Extra array for if we're allowing symmetrization with cutting - ArrayRCP rows_stop; - bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric; - if(use_stop_array) - rows_stop.resize(numRows); - - const ArrayRCP amalgBoundaryNodes(numRows, false); + // Extra array for if we're allowing symmetrization with cutting + ArrayRCP rows_stop; + bool use_stop_array = threshold != STS::zero() && + distanceLaplacianAlgo == scaled_cut_symmetric; + if (use_stop_array) + rows_stop.resize(numRows); - LO realnnz = 0; - rows[0] = 0; + const ArrayRCP amalgBoundaryNodes(numRows, false); - Array indicesExtra; - { + LO realnnz = 0; + rows[0] = 0; + + Array indicesExtra; + { SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel); Teuchos::Array> coordData; if (threshold != STS::zero()) { const size_t numVectors = ghostedCoords->getNumVectors(); coordData.reserve(numVectors); for (size_t j = 0; j < numVectors; j++) { - Teuchos::ArrayRCP tmpData=ghostedCoords->getData(j); + Teuchos::ArrayRCP tmpData = + ghostedCoords->getData(j); coordData.push_back(tmpData); } } - ArrayView vals;//CMS hackery + ArrayView vals; // CMS hackery for (LO row = 0; row < numRows; row++) { ArrayView indices; indicesExtra.resize(0); - bool isBoundary = false; + bool isBoundary = false; if (blkSize == 1) { - // ArrayView vals;//CMS uncomment + // ArrayView vals;//CMS uncomment A->getLocalRowView(row, indices, vals); - isBoundary = pointBoundaryNodes[row]; + isBoundary = pointBoundaryNodes[row]; } else { - // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet + // The amalgamated row is marked as Dirichlet iff all point rows + // are Dirichlet for (LO j = 0; j < blkSize; j++) { - if (!pointBoundaryNodes[row*blkSize+j]) { + if (!pointBoundaryNodes[row * blkSize + j]) { isBoundary = false; break; } @@ -1250,15 +1486,15 @@ namespace MueLu { LO nnz = indices.size(), rownnz = 0; - if(use_stop_array) { - rows[row+1] = rows[row]+nnz; - realnnz = rows[row]; - } + if (use_stop_array) { + rows[row + 1] = rows[row] + nnz; + realnnz = rows[row]; + } if (threshold != STS::zero()) { // default if (distanceLaplacianAlgo == defaultAlgo) { - /* Standard Distance Laplacian */ + /* Standard Distance Laplacian */ for (LO colID = 0; colID < nnz; colID++) { LO col = indices[colID]; @@ -1269,23 +1505,35 @@ namespace MueLu { continue; } - // We do not want the distance Laplacian aggregating boundary nodes - if(isBoundary) continue; + // We do not want the distance Laplacian aggregating boundary + // nodes + if (isBoundary) + continue; SC laplVal; - if(use_dlap_weights == SINGLE_WEIGHTS) { - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col); - } - else if(use_dlap_weights == BLOCK_WEIGHTS) { + if (use_dlap_weights == SINGLE_WEIGHTS) { + laplVal = + STS::one() / + MueLu::Utilities::Distance2( + dlap_weights(), coordData, row, col); + } else if (use_dlap_weights == BLOCK_WEIGHTS) { int block_id = row % interleaved_blocksize; int block_start = block_id * interleaved_blocksize; - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col); - } - else { - laplVal = STS::one() / MueLu::Utilities::Distance2(coordData, row, col); + laplVal = + STS::one() / + MueLu::Utilities::Distance2( + dlap_weights(block_start, interleaved_blocksize), + coordData, row, col); + } else { + laplVal = + STS::one() / + MueLu::Utilities::Distance2( + coordData, row, col); } - real_type aiiajj = STS::magnitude(realThreshold*realThreshold * ghostedLaplDiagData[row]*ghostedLaplDiagData[col]); - real_type aij = STS::magnitude(laplVal*laplVal); + real_type aiiajj = STS::magnitude( + realThreshold * realThreshold * ghostedLaplDiagData[row] * + ghostedLaplDiagData[col]); + real_type aij = STS::magnitude(laplVal * laplVal); if (aij > aiiajj) { columns[realnnz++] = col; @@ -1295,12 +1543,12 @@ namespace MueLu { } } } else { - /* Cut Algorithm */ - using DropTol = Details::DropTol; + /* Cut Algorithm */ + using DropTol = Details::DropTol; std::vector drop_vec; drop_vec.reserve(nnz); const real_type zero = Teuchos::ScalarTraits::zero(); - const real_type one = Teuchos::ScalarTraits::one(); + const real_type one = Teuchos::ScalarTraits::one(); // find magnitudes for (LO colID = 0; colID < nnz; colID++) { @@ -1308,27 +1556,38 @@ namespace MueLu { LO col = indices[colID]; if (row == col) { - drop_vec.emplace_back( zero, one, colID, false); + drop_vec.emplace_back(zero, one, colID, false); continue; } - // We do not want the distance Laplacian aggregating boundary nodes - if(isBoundary) continue; + // We do not want the distance Laplacian aggregating boundary + // nodes + if (isBoundary) + continue; SC laplVal; - if(use_dlap_weights == SINGLE_WEIGHTS) { - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(),coordData, row, col); - } - else if(use_dlap_weights == BLOCK_WEIGHTS) { + if (use_dlap_weights == SINGLE_WEIGHTS) { + laplVal = + STS::one() / + MueLu::Utilities::Distance2( + dlap_weights(), coordData, row, col); + } else if (use_dlap_weights == BLOCK_WEIGHTS) { int block_id = row % interleaved_blocksize; int block_start = block_id * interleaved_blocksize; - laplVal = STS::one() / MueLu::Utilities::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col); - } - else { - laplVal = STS::one() / MueLu::Utilities::Distance2(coordData, row, col); + laplVal = + STS::one() / + MueLu::Utilities::Distance2( + dlap_weights(block_start, interleaved_blocksize), + coordData, row, col); + } else { + laplVal = + STS::one() / + MueLu::Utilities::Distance2( + coordData, row, col); } - real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row]*ghostedLaplDiagData[col]); - real_type aij = STS::magnitude(laplVal*laplVal); + real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * + ghostedLaplDiagData[col]); + real_type aij = STS::magnitude(laplVal * laplVal); drop_vec.emplace_back(aij, aiiajj, colID, false); } @@ -1337,52 +1596,52 @@ namespace MueLu { if (distanceLaplacianAlgo == unscaled_cut) { - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val > b.val; - } - ); + std::sort(drop_vec.begin(), drop_vec.end(), + [](DropTol const &a, DropTol const &b) { + return a.val > b.val; + }); bool drop = false; - for (size_t i=1; i realThreshold*b) { + if (a > realThreshold * b) { drop = true; #ifdef HAVE_MUELU_DEBUG if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " + << n << ", " << row << std::endl; } #endif } } drop_vec[i].drop = drop; } - } - else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) { + } else if (distanceLaplacianAlgo == scaled_cut || + distanceLaplacianAlgo == scaled_cut_symmetric) { - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.val/a.diag > b.val/b.diag; - } - ); + std::sort(drop_vec.begin(), drop_vec.end(), + [](DropTol const &a, DropTol const &b) { + return a.val / a.diag > b.val / b.diag; + }); bool drop = false; - for (size_t i=1; i realThreshold*b) { + auto const &x = drop_vec[i - 1]; + auto const &y = drop_vec[i]; + auto a = x.val / x.diag; + auto b = y.val / y.diag; + if (a > realThreshold * b) { drop = true; #ifdef HAVE_MUELU_DEBUG if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl; - } + std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " + << n << ", " << row << std::endl; + } #endif } } @@ -1390,36 +1649,38 @@ namespace MueLu { } } - std::sort( drop_vec.begin(), drop_vec.end() - , [](DropTol const& a, DropTol const& b) { - return a.col < b.col; - } - ); + std::sort(drop_vec.begin(), drop_vec.end(), + [](DropTol const &a, DropTol const &b) { + return a.col < b.col; + }); - for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) { + for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) { LO col = indices[drop_vec[idxID].col]; - // don't drop diagonal if (row == col) { columns[realnnz++] = col; rownnz++; - // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val); + // printf("(%d,%d) KEEP %13s matrix = + //%6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val); continue; } if (!drop_vec[idxID].drop) { columns[realnnz++] = col; - // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); + // printf("(%d,%d) KEEP dlap = %6.4e matrix = + //%6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); rownnz++; } else { - // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); + // printf("(%d,%d) DROP dlap = %6.4e matrix = + //%6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val); numDropped++; } } } } else { - // Skip laplace calculation and threshold comparison for zero threshold + // Skip laplace calculation and threshold comparison for zero + // threshold for (LO colID = 0; colID < nnz; colID++) { LO col = indices[colID]; columns[realnnz++] = col; @@ -1427,594 +1688,703 @@ namespace MueLu { } } - if ( rownnz == 1) { - // If the only element remaining after filtering is diagonal, mark node as boundary + if (rownnz == 1) { + // If the only element remaining after filtering is diagonal, mark + // node as boundary // FIXME: this should really be replaced by the following // if (indices.size() == 1 && indices[0] == row) // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms + // We do not do it this way now because there is no framework for + // distinguishing isolated and boundary nodes in the aggregation + // algorithms amalgBoundaryNodes[row] = true; } - if(use_stop_array) - rows_stop[row] = rownnz + rows[row]; - else - rows[row+1] = realnnz; - } //for (LO row = 0; row < numRows; row++) - - } //subtimer - - if (use_stop_array) { - // Do symmetrization of the cut matrix - // NOTE: We assume nested row/column maps here - for (LO row = 0; row < numRows; row++) { - for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) { - LO col = columns[colidx]; - if(col >= numRows) continue; - - bool found = false; - for(LO t_col = rows[col] ; !found && t_col < rows_stop[col]; t_col++) { - if (columns[t_col] == row) - found = true; - } - // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing - // into a Dirichlet unknown. In that case don't. - if(!found && !pointBoundaryNodes[col] && rows_stop[col] < rows[col+1]) { - LO new_idx = rows_stop[col]; - // printf("(%d,%d) SYMADD entry\n",col,row); - columns[new_idx] = row; - rows_stop[col]++; - numDropped--; - } - } - } - - // Condense everything down - LO current_start=0; - for (LO row = 0; row < numRows; row++) { - LO old_start = current_start; - for (LO col = rows[row]; col < rows_stop[row]; col++) { - if(current_start != col) { - columns[current_start] = columns[col]; - } - current_start++; - } - rows[row] = old_start; - } - rows[numRows] = realnnz = current_start; - - } - - columns.resize(realnnz); - - RCP graph; - { - SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel); - graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - graph->SetBoundaryNodeMap(amalgBoundaryNodes); - } //subtimer - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; + if (use_stop_array) + rows_stop[row] = rownnz + rows[row]; + else + rows[row + 1] = realnnz; + } // for (LO row = 0; row < numRows; row++) - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; + } // subtimer - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes" - << " using threshold " << dirichletThreshold << std::endl; + if (use_stop_array) { + // Do symmetrization of the cut matrix + // NOTE: We assume nested row/column maps here + for (LO row = 0; row < numRows; row++) { + for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) { + LO col = columns[colidx]; + if (col >= numRows) + continue; + + bool found = false; + for (LO t_col = rows[col]; !found && t_col < rows_stop[col]; + t_col++) { + if (columns[t_col] == row) + found = true; + } + // We didn't find the transpose buddy, so let's symmetrize, unless + // we'd be symmetrizing into a Dirichlet unknown. In that case + // don't. + if (!found && !pointBoundaryNodes[col] && + rows_stop[col] < rows[col + 1]) { + LO new_idx = rows_stop[col]; + // printf("(%d,%d) SYMADD entry\n",col,row); + columns[new_idx] = row; + rows_stop[col]++; + numDropped--; + } + } } - Set(currentLevel, "Graph", graph); - Set(currentLevel, "DofsPerNode", blkSize); + // Condense everything down + LO current_start = 0; + for (LO row = 0; row < numRows; row++) { + LO old_start = current_start; + for (LO col = rows[row]; col < rows_stop[row]; col++) { + if (current_start != col) { + columns[current_start] = columns[col]; + } + current_start++; + } + rows[row] = old_start; + } + rows[numRows] = realnnz = current_start; } - } - if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) { - RCP > comm = A->getRowMap()->getComm(); - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; - if (numGlobalTotal != 0) - GetOStream(Statistics1) << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)"; - GetOStream(Statistics1) << std::endl; - } + columns.resize(realnnz); - } else { - //what Tobias has implemented - - SC threshold = as(pL.get("aggregation: drop tol")); - //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - GetOStream(Runtime0) << "algorithm = \"" << "failsafe" << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - Set(currentLevel, "Filtering", (threshold != STS::zero())); - - RCP rowMap = A->getRowMap(); - RCP colMap = A->getColMap(); - - LO blockdim = 1; // block dim for fixed size blocks - GO indexBase = rowMap->getIndexBase(); // index base of maps - GO offset = 0; - - // 1) check for blocking/striding information - if(A->IsView("stridedMaps") && - Teuchos::rcp_dynamic_cast(A->getRowMap("stridedMaps")) != Teuchos::null) { - Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!) - RCP strMap = Teuchos::rcp_dynamic_cast(A->getRowMap()); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed."); - blockdim = strMap->getFixedBlockSize(); - offset = strMap->getOffset(); - oldView = A->SwitchToView(oldView); - GetOStream(Statistics1) << "CoalesceDropFactory::Build():" << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl; - } else GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl; - - // 2) get row map for amalgamated matrix (graph of A) - // with same distribution over all procs as row map of A - RCP nodeMap = amalInfo->getNodeRowMap(); - GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl; - - // 3) create graph of amalgamated matrix - RCP crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries()*blockdim); - - LO numRows = A->getRowMap()->getLocalNumElements(); - LO numNodes = nodeMap->getLocalNumElements(); - const ArrayRCP amalgBoundaryNodes(numNodes, false); - const ArrayRCP numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node - bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid - - // 4) do amalgamation. generate graph of amalgamated matrix - // Note, this code is much more inefficient than the leightwight implementation - // Most of the work has already been done in the AmalgamationFactory - for(LO row=0; rowgetGlobalElement(row); - - // reinitialize boolean helper variable - bIsDiagonalEntry = false; - - // translate grid to nodeid - GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase); - - size_t nnz = A->getNumEntriesInLocalRow(row); - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - A->getLocalRowView(row, indices, vals); - - RCP > cnodeIds = Teuchos::rcp(new std::vector); // global column block ids - LO realnnz = 0; - for(LO col=0; col(nnz); col++) { - GO gcid = colMap->getGlobalElement(indices[col]); // global column id - - if(vals[col]!=STS::zero()) { - GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase); - cnodeIds->push_back(cnodeId); - realnnz++; // increment number of nnz in matrix row - if (grid == gcid) bIsDiagonalEntry = true; - } + RCP graph; + { + SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel); + graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, + "amalgamated graph of A")); + graph->SetBoundaryNodeMap(amalgBoundaryNodes); + } // subtimer + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; + + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) + << "Detected " << numGlobalBoundaryNodes + << " agglomerated Dirichlet nodes" + << " using threshold " << dirichletThreshold << std::endl; } - if(realnnz == 1 && bIsDiagonalEntry == true) { - LO lNodeId = nodeMap->getLocalElement(nodeId); - numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId - if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes - amalgBoundaryNodes[lNodeId] = true; - } + Set(currentLevel, "Graph", graph); + Set(currentLevel, "DofsPerNode", blkSize); + } + } - Teuchos::ArrayRCP arr_cnodeIds = Teuchos::arcp( cnodeIds ); + if ((GetVerbLevel() & Statistics1) && + !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) { + RCP> comm = A->getRowMap()->getComm(); + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); + GetOStream(Statistics1) + << "Number of dropped entries in " << graphType + << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; + if (numGlobalTotal != 0) + GetOStream(Statistics1) << " (" + << 100 * Teuchos::as(numGlobalDropped) / + Teuchos::as(numGlobalTotal) + << "%)"; + GetOStream(Statistics1) << std::endl; + } - if(arr_cnodeIds.size() > 0 ) - crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds()); - } - // fill matrix graph - crsGraph->fillComplete(nodeMap,nodeMap); + } else { + // what Tobias has implemented + + SC threshold = as(pL.get("aggregation: drop tol")); + // GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << + // threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + GetOStream(Runtime0) << "algorithm = \"" + << "failsafe" + << "\": threshold = " << threshold + << ", blocksize = " << A->GetFixedBlockSize() + << std::endl; + Set(currentLevel, "Filtering", (threshold != STS::zero())); + + RCP rowMap = A->getRowMap(); + RCP colMap = A->getColMap(); + + LO blockdim = 1; // block dim for fixed size blocks + GO indexBase = rowMap->getIndexBase(); // index base of maps + GO offset = 0; + + // 1) check for blocking/striding information + if (A->IsView("stridedMaps") && + Teuchos::rcp_dynamic_cast( + A->getRowMap("stridedMaps")) != Teuchos::null) { + Xpetra::viewLabel_t oldView = A->SwitchToView( + "stridedMaps"); // note: "stridedMaps are always non-overlapping + // (correspond to range and domain maps!) + RCP strMap = + Teuchos::rcp_dynamic_cast(A->getRowMap()); + TEUCHOS_TEST_FOR_EXCEPTION( + strMap == Teuchos::null, Exceptions::BadCast, + "MueLu::CoalesceFactory::Build: cast to strided row map failed."); + blockdim = strMap->getFixedBlockSize(); + offset = strMap->getOffset(); + oldView = A->SwitchToView(oldView); + GetOStream(Statistics1) + << "CoalesceDropFactory::Build():" + << " found blockdim=" << blockdim + << " from strided maps. offset=" << offset << std::endl; + } else + GetOStream(Statistics1) + << "CoalesceDropFactory::Build(): no striding information available. " + "Use blockdim=1 with offset=0" + << std::endl; + + // 2) get row map for amalgamated matrix (graph of A) + // with same distribution over all procs as row map of A + RCP nodeMap = amalInfo->getNodeRowMap(); + GetOStream(Statistics1) + << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() + << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl; + + // 3) create graph of amalgamated matrix + RCP crsGraph = CrsGraphFactory::Build( + nodeMap, A->getLocalMaxNumRowEntries() * blockdim); + + LO numRows = A->getRowMap()->getLocalNumElements(); + LO numNodes = nodeMap->getLocalNumElements(); + const ArrayRCP amalgBoundaryNodes(numNodes, false); + const ArrayRCP numberDirichletRowsPerNode( + numNodes, 0); // helper array counting the number of Dirichlet nodes + // associated with node + bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid + + // 4) do amalgamation. generate graph of amalgamated matrix + // Note, this code is much more inefficient than the leightwight + // implementation Most of the work has already been done in the + // AmalgamationFactory + for (LO row = 0; row < numRows; row++) { + // get global DOF id + GO grid = rowMap->getGlobalElement(row); + + // reinitialize boolean helper variable + bIsDiagonalEntry = false; + + // translate grid to nodeid + GO nodeId = + AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase); - // 5) create MueLu Graph object - RCP graph = rcp(new Graph(crsGraph, "amalgamated graph of A")); + size_t nnz = A->getNumEntriesInLocalRow(row); + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + A->getLocalRowView(row, indices, vals); - // Detect and record rows that correspond to Dirichlet boundary conditions - graph->SetBoundaryNodeMap(amalgBoundaryNodes); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) - if (amalgBoundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + RCP> cnodeIds = + Teuchos::rcp(new std::vector); // global column block ids + LO realnnz = 0; + for (LO col = 0; col < Teuchos::as(nnz); col++) { + GO gcid = colMap->getGlobalElement(indices[col]); // global column id + + if (vals[col] != STS::zero()) { + GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, + offset, indexBase); + cnodeIds->push_back(cnodeId); + realnnz++; // increment number of nnz in matrix row + if (grid == gcid) + bIsDiagonalEntry = true; + } } - // 6) store results in Level - //graph->SetBoundaryNodeMap(gBoundaryNodeMap); - Set(currentLevel, "DofsPerNode", blockdim); - Set(currentLevel, "Graph", graph); + if (realnnz == 1 && bIsDiagonalEntry == true) { + LO lNodeId = nodeMap->getLocalElement(nodeId); + numberDirichletRowsPerNode[lNodeId] += + 1; // increment Dirichlet row counter associated with lNodeId + if (numberDirichletRowsPerNode[lNodeId] == + blockdim) // mark full Dirichlet nodes + amalgBoundaryNodes[lNodeId] = true; + } - } //if (doExperimentalWrap) ... else ... + Teuchos::ArrayRCP arr_cnodeIds = Teuchos::arcp(cnodeIds); + if (arr_cnodeIds.size() > 0) + crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds()); + } + // fill matrix graph + crsGraph->fillComplete(nodeMap, nodeMap); - } //Build + // 5) create MueLu Graph object + RCP graph = rcp(new Graph(crsGraph, "amalgamated graph of A")); - template - void CoalesceDropFactory::MergeRows(const Matrix& A, const LO row, Array& cols, const Array& translation) const { - typedef typename ArrayView::size_type size_type; + // Detect and record rows that correspond to Dirichlet boundary conditions + graph->SetBoundaryNodeMap(amalgBoundaryNodes); - // extract striding information - LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A.IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A.getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - if (strMap->getStridedBlockId() > -1) - blkSize = Teuchos::as(strMap->getStridingData()[strMap->getStridedBlockId()]); + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < amalgBoundaryNodes.size(); ++i) + if (amalgBoundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " Dirichlet nodes" << std::endl; } - // count nonzero entries in all dof rows associated with node row - size_t nnz = 0, pos = 0; - for (LO j = 0; j < blkSize; j++) - nnz += A.getNumEntriesInLocalRow(row*blkSize+j); + // 6) store results in Level + // graph->SetBoundaryNodeMap(gBoundaryNodeMap); + Set(currentLevel, "DofsPerNode", blockdim); + Set(currentLevel, "Graph", graph); + + } // if (doExperimentalWrap) ... else ... + +} // Build + +template +void CoalesceDropFactory::MergeRows( + const Matrix &A, const LO row, Array &cols, + const Array &translation) const { + typedef typename ArrayView::size_type size_type; + + // extract striding information + LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within + // the strided map + if (A.IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A.getRowMap("stridedMaps"); + Teuchos::RCP strMap = + Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, + "Map is not of type StridedMap"); + if (strMap->getStridedBlockId() > -1) + blkSize = Teuchos::as( + strMap->getStridingData()[strMap->getStridedBlockId()]); + } - if (nnz == 0) { - cols.resize(0); - return; + // count nonzero entries in all dof rows associated with node row + size_t nnz = 0, pos = 0; + for (LO j = 0; j < blkSize; j++) + nnz += A.getNumEntriesInLocalRow(row * blkSize + j); + + if (nnz == 0) { + cols.resize(0); + return; + } + + cols.resize(nnz); + + // loop over all local dof rows associated with local node "row" + ArrayView inds; + ArrayView vals; + for (LO j = 0; j < blkSize; j++) { + A.getLocalRowView(row * blkSize + j, inds, vals); + size_type numIndices = inds.size(); + + if (numIndices == 0) // skip empty dof rows + continue; + + // cols: stores all local node ids for current local node id "row" + cols[pos++] = translation[inds[0]]; + for (size_type k = 1; k < numIndices; k++) { + LO nodeID = translation[inds[k]]; + // Here we try to speed up the process by reducing the size of an array + // to sort. This works if the column nonzeros belonging to the same + // node are stored consequently. + if (nodeID != cols[pos - 1]) + cols[pos++] = nodeID; } + } + cols.resize(pos); + nnz = pos; + + // Sort and remove duplicates + std::sort(cols.begin(), cols.end()); + pos = 0; + for (size_t j = 1; j < nnz; j++) + if (cols[j] != cols[pos]) + cols[++pos] = cols[j]; + cols.resize(pos + 1); +} + +template +void CoalesceDropFactory:: + MergeRowsWithDropping(const Matrix &A, const LO row, + const ArrayRCP &ghostedDiagVals, + SC threshold, Array &cols, + const Array &translation) const { + typedef typename ArrayView::size_type size_type; + typedef Teuchos::ScalarTraits STS; + + // extract striding information + LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within + // the strided map + if (A.IsView("stridedMaps") == true) { + Teuchos::RCP myMap = A.getRowMap("stridedMaps"); + Teuchos::RCP strMap = + Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, + "Map is not of type StridedMap"); + if (strMap->getStridedBlockId() > -1) + blkSize = Teuchos::as( + strMap->getStridingData()[strMap->getStridedBlockId()]); + } - cols.resize(nnz); + // count nonzero entries in all dof rows associated with node row + size_t nnz = 0, pos = 0; + for (LO j = 0; j < blkSize; j++) + nnz += A.getNumEntriesInLocalRow(row * blkSize + j); - // loop over all local dof rows associated with local node "row" - ArrayView inds; - ArrayView vals; - for (LO j = 0; j < blkSize; j++) { - A.getLocalRowView(row*blkSize+j, inds, vals); - size_type numIndices = inds.size(); + if (nnz == 0) { + cols.resize(0); + return; + } + + cols.resize(nnz); + + // loop over all local dof rows associated with local node "row" + ArrayView inds; + ArrayView vals; + for (LO j = 0; j < blkSize; j++) { + A.getLocalRowView(row * blkSize + j, inds, vals); + size_type numIndices = inds.size(); + + if (numIndices == 0) // skip empty dof rows + continue; + + // cols: stores all local node ids for current local node id "row" + LO prevNodeID = -1; + for (size_type k = 0; k < numIndices; k++) { + LO dofID = inds[k]; + LO nodeID = translation[inds[k]]; - if (numIndices == 0) // skip empty dof rows - continue; + // we avoid a square root by using squared values + typename STS::magnitudeType aiiajj = STS::magnitude( + threshold * threshold * ghostedDiagVals[dofID] * + ghostedDiagVals[row * blkSize + j]); // eps^2 * |a_ii| * |a_jj| + typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]); + + // check dropping criterion + if (aij > aiiajj || (row * blkSize + j == dofID)) { + // accept entry in graph - // cols: stores all local node ids for current local node id "row" - cols[pos++] = translation[inds[0]]; - for (size_type k = 1; k < numIndices; k++) { - LO nodeID = translation[inds[k]]; // Here we try to speed up the process by reducing the size of an array // to sort. This works if the column nonzeros belonging to the same // node are stored consequently. - if (nodeID != cols[pos-1]) + if (nodeID != prevNodeID) { cols[pos++] = nodeID; + prevNodeID = nodeID; + } } } - cols.resize(pos); - nnz = pos; - - // Sort and remove duplicates - std::sort(cols.begin(), cols.end()); - pos = 0; - for (size_t j = 1; j < nnz; j++) - if (cols[j] != cols[pos]) - cols[++pos] = cols[j]; - cols.resize(pos+1); + } + cols.resize(pos); + nnz = pos; + + // Sort and remove duplicates + std::sort(cols.begin(), cols.end()); + pos = 0; + for (size_t j = 1; j < nnz; j++) + if (cols[j] != cols[pos]) + cols[++pos] = cols[j]; + cols.resize(pos + 1); + + return; +} + +template +Teuchos::RCP> +CoalesceDropFactory::BlockDiagonalize(Level ¤tLevel, + const RCP &A, + bool generate_matrix) const { + typedef Teuchos::ScalarTraits STS; + + const ParameterList &pL = GetParameterList(); + const typename STS::magnitudeType dirichletThreshold = STS::magnitude( + as(pL.get("aggregation: Dirichlet threshold"))); + const typename STS::magnitudeType rowSumTol = as( + pL.get("aggregation: row sum drop tol")); + + RCP BlockNumber = + Get>(currentLevel, "BlockNumber"); + RCP ghostedBlockNumber; + GetOStream(Statistics1) + << "Using BlockDiagonal Graph before dropping (with provided blocking)" + << std::endl; + + // Ghost the column block numbers if we need to + RCP importer = A->getCrsGraph()->getImporter(); + if (!importer.is_null()) { + SubFactoryMonitor m1(*this, "Block Number import", currentLevel); + ghostedBlockNumber = + Xpetra::VectorFactory::Build(importer->getTargetMap()); + ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); + } else { + ghostedBlockNumber = BlockNumber; } - template - void CoalesceDropFactory::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP& ghostedDiagVals, SC threshold, Array& cols, const Array& translation) const { - typedef typename ArrayView::size_type size_type; - typedef Teuchos::ScalarTraits STS; - - // extract striding information - LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map - if (A.IsView("stridedMaps") == true) { - Teuchos::RCP myMap = A.getRowMap("stridedMaps"); - Teuchos::RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap"); - if (strMap->getStridedBlockId() > -1) - blkSize = Teuchos::as(strMap->getStridingData()[strMap->getStridedBlockId()]); - } - - // count nonzero entries in all dof rows associated with node row - size_t nnz = 0, pos = 0; - for (LO j = 0; j < blkSize; j++) - nnz += A.getNumEntriesInLocalRow(row*blkSize+j); - - if (nnz == 0) { - cols.resize(0); - return; - } - - cols.resize(nnz); + // Accessors for block numbers + Teuchos::ArrayRCP row_block_number = BlockNumber->getData(0); + Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); + + // allocate space for the local graph + ArrayRCP rows_mat; + ArrayRCP rows_graph, columns; + ArrayRCP values; + RCP crs_matrix_wrap; + + if (generate_matrix) { + crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0)); + crs_matrix_wrap->getCrsMatrix()->allocateAllValues( + A->getLocalNumEntries(), rows_mat, columns, values); + } else { + rows_graph.resize(A->getLocalNumRows() + 1); + columns.resize(A->getLocalNumEntries()); + values.resize(A->getLocalNumEntries()); + } - // loop over all local dof rows associated with local node "row" - ArrayView inds; + LO realnnz = 0; + GO numDropped = 0, numTotal = 0; + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); + ++row) { + LO row_block = row_block_number[row]; + size_t nnz = A->getNumEntriesInLocalRow(row); + ArrayView indices; ArrayView vals; - for (LO j = 0; j < blkSize; j++) { - A.getLocalRowView(row*blkSize+j, inds, vals); - size_type numIndices = inds.size(); - - if (numIndices == 0) // skip empty dof rows - continue; - - // cols: stores all local node ids for current local node id "row" - LO prevNodeID = -1; - for (size_type k = 0; k < numIndices; k++) { - LO dofID = inds[k]; - LO nodeID = translation[inds[k]]; - - // we avoid a square root by using squared values - typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold*ghostedDiagVals[dofID]*ghostedDiagVals[row*blkSize+j]); // eps^2 * |a_ii| * |a_jj| - typename STS::magnitudeType aij = STS::magnitude(vals[k]*vals[k]); - - // check dropping criterion - if (aij > aiiajj || (row*blkSize+j == dofID)) { - // accept entry in graph - - // Here we try to speed up the process by reducing the size of an array - // to sort. This works if the column nonzeros belonging to the same - // node are stored consequently. - if (nodeID != prevNodeID) { - cols[pos++] = nodeID; - prevNodeID = nodeID; - } - } - } + A->getLocalRowView(row, indices, vals); + + LO rownnz = 0; + for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + LO col = indices[colID]; + LO col_block = col_block_number[col]; + + if (row_block == col_block) { + if (generate_matrix) + values[realnnz] = vals[colID]; + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; } - cols.resize(pos); - nnz = pos; - - // Sort and remove duplicates - std::sort(cols.begin(), cols.end()); - pos = 0; - for (size_t j = 1; j < nnz; j++) - if (cols[j] != cols[pos]) - cols[++pos] = cols[j]; - cols.resize(pos+1); - - return; + if (generate_matrix) + rows_mat[row + 1] = realnnz; + else + rows_graph[row + 1] = realnnz; } + ArrayRCP boundaryNodes = Teuchos::arcp_const_cast( + MueLu::Utilities::DetectDirichletRows( + *A, dirichletThreshold)); + if (rowSumTol > 0.) + Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); + if (!generate_matrix) { + // We can't resize an Arrayrcp and pass the checks for setAllValues + values.resize(realnnz); + columns.resize(realnnz); + } + numTotal = A->getLocalNumEntries(); + + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; + for (LO i = 0; i < boundaryNodes.size(); ++i) + if (boundaryNodes[i]) + numLocalBoundaryNodes++; + RCP> comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " Dirichlet nodes" << std::endl; + + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); + GetOStream(Statistics1) + << "Number of dropped entries in block-diagonalized matrix graph: " + << numGlobalDropped << "/" << numGlobalTotal; + if (numGlobalTotal != 0) + GetOStream(Statistics1) << " (" + << 100 * Teuchos::as(numGlobalDropped) / + Teuchos::as(numGlobalTotal) + << "%)"; + GetOStream(Statistics1) << std::endl; + } - template - Teuchos::RCP > CoalesceDropFactory::BlockDiagonalize(Level & currentLevel,const RCP& A,bool generate_matrix) const { - typedef Teuchos::ScalarTraits STS; - - const ParameterList & pL = GetParameterList(); - const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); - const typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); - - RCP BlockNumber = Get >(currentLevel, "BlockNumber"); - RCP ghostedBlockNumber; - GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)"< importer = A->getCrsGraph()->getImporter(); - if(!importer.is_null()) { - SubFactoryMonitor m1(*this, "Block Number import", currentLevel); - ghostedBlockNumber= Xpetra::VectorFactory::Build(importer->getTargetMap()); - ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); - } - else { - ghostedBlockNumber = BlockNumber; - } + Set(currentLevel, "Filtering", true); + + if (generate_matrix) { + // NOTE: Trying to use A's Import/Export objects will cause the code to + // segfault back in Build() with errors on the Import if you're using + // Epetra. I'm not really sure why. By using the Col==Domain and Row==Range + // maps, we get null Import/Export objects here, which is legit, because we + // never use them anyway. + crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values); + crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), + A->getRowMap()); + } else { + RCP graph = + rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), + "block-diagonalized graph of A")); + graph->SetBoundaryNodeMap(boundaryNodes); + Set(currentLevel, "Graph", graph); + } - // Accessors for block numbers - Teuchos::ArrayRCP row_block_number = BlockNumber->getData(0); - Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); - - // allocate space for the local graph - ArrayRCP rows_mat; - ArrayRCP rows_graph,columns; - ArrayRCP values; - RCP crs_matrix_wrap; - - if(generate_matrix) { - crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0)); - crs_matrix_wrap->getCrsMatrix()->allocateAllValues(A->getLocalNumEntries(), rows_mat, columns, values); - } - else { - rows_graph.resize(A->getLocalNumRows()+1); - columns.resize(A->getLocalNumEntries()); - values.resize(A->getLocalNumEntries()); - } - - LO realnnz = 0; - GO numDropped = 0, numTotal = 0; - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { + Set(currentLevel, "DofsPerNode", 1); + return crs_matrix_wrap; +} + +template +void CoalesceDropFactory:: + BlockDiagonalizeGraph(const RCP &inputGraph, + const RCP &ghostedBlockNumber, + RCP &outputGraph, + RCP &importer) const { + + TEUCHOS_TEST_FOR_EXCEPTION( + ghostedBlockNumber.is_null(), Exceptions::RuntimeError, + "BlockDiagonalizeGraph(): ghostedBlockNumber is null."); + const ParameterList &pL = GetParameterList(); + + const bool localizeColoringGraph = + pL.get("aggregation: coloring: localize color graph"); + + GetOStream(Statistics1) + << "Using BlockDiagonal Graph after Dropping (with provided blocking)"; + if (localizeColoringGraph) + GetOStream(Statistics1) << ", with localization" << std::endl; + else + GetOStream(Statistics1) << ", without localization" << std::endl; + + // Accessors for block numbers + Teuchos::ArrayRCP row_block_number = ghostedBlockNumber->getData(0); + Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); + + // allocate space for the local graph + ArrayRCP rows_mat; + ArrayRCP rows_graph, columns; + + rows_graph.resize(inputGraph->GetNodeNumVertices() + 1); + columns.resize(inputGraph->GetNodeNumEdges()); + + LO realnnz = 0; + GO numDropped = 0, numTotal = 0; + const LO numRows = + Teuchos::as(inputGraph->GetDomainMap()->getLocalNumElements()); + if (localizeColoringGraph) { + + for (LO row = 0; row < numRows; ++row) { LO row_block = row_block_number[row]; - size_t nnz = A->getNumEntriesInLocalRow(row); - ArrayView indices; - ArrayView vals; - A->getLocalRowView(row, indices, vals); + ArrayView indices = inputGraph->getNeighborVertices(row); LO rownnz = 0; - for (LO colID = 0; colID < Teuchos::as(nnz); colID++) { + for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { LO col = indices[colID]; LO col_block = col_block_number[col]; - - if(row_block == col_block) { - if(generate_matrix) values[realnnz] = vals[colID]; + + if ((row_block == col_block) && (col < numRows)) { columns[realnnz++] = col; rownnz++; } else numDropped++; } - if(generate_matrix) rows_mat[row+1] = realnnz; - else rows_graph[row+1] = realnnz; - } - - ArrayRCP boundaryNodes = Teuchos::arcp_const_cast(MueLu::Utilities::DetectDirichletRows(*A, dirichletThreshold)); - if (rowSumTol > 0.) - Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes); - - - if(!generate_matrix) { - // We can't resize an Arrayrcp and pass the checks for setAllValues - values.resize(realnnz); - columns.resize(realnnz); - } - numTotal = A->getLocalNumEntries(); - - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - for (LO i = 0; i < boundaryNodes.size(); ++i) - if (boundaryNodes[i]) - numLocalBoundaryNodes++; - RCP > comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; - if (numGlobalTotal != 0) - GetOStream(Statistics1) << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)"; - GetOStream(Statistics1) << std::endl; + rows_graph[row + 1] = realnnz; } + } else { + // ghosting of boundary node map + Teuchos::ArrayRCP boundaryNodes = + inputGraph->GetBoundaryNodeMap(); + auto boundaryNodesVector = + Xpetra::VectorFactory::Build(inputGraph->GetDomainMap()); + for (size_t i = 0; i < inputGraph->GetNodeNumVertices(); i++) + boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i]; + // Xpetra::IO::Write("boundary",*boundaryNodesVector); + auto boundaryColumnVector = + Xpetra::VectorFactory::Build(inputGraph->GetImportMap()); + boundaryColumnVector->doImport(*boundaryNodesVector, *importer, + Xpetra::INSERT); + auto boundaryColumn = boundaryColumnVector->getData(0); + + for (LO row = 0; row < numRows; ++row) { + LO row_block = row_block_number[row]; + ArrayView indices = inputGraph->getNeighborVertices(row); - Set(currentLevel, "Filtering", true); + LO rownnz = 0; + for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { + LO col = indices[colID]; + LO col_block = col_block_number[col]; - if(generate_matrix) { - // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import - // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects - // here, which is legit, because we never use them anyway. - crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat,columns,values); - crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap()); - } - else { - RCP graph = rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), "block-diagonalized graph of A")); - graph->SetBoundaryNodeMap(boundaryNodes); - Set(currentLevel, "Graph", graph); + if ((row_block == col_block) && + ((row == col) || (boundaryColumn[col] == 0))) { + columns[realnnz++] = col; + rownnz++; + } else + numDropped++; + } + rows_graph[row + 1] = realnnz; } - - - Set(currentLevel, "DofsPerNode", 1); - return crs_matrix_wrap; } + columns.resize(realnnz); + numTotal = inputGraph->GetNodeNumEdges(); + + if (GetVerbLevel() & Statistics1) { + RCP> comm = inputGraph->GetDomainMap()->getComm(); + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); + GetOStream(Statistics1) + << "Number of dropped entries in block-diagonalized matrix graph: " + << numGlobalDropped << "/" << numGlobalTotal; + if (numGlobalTotal != 0) + GetOStream(Statistics1) << " (" + << 100 * Teuchos::as(numGlobalDropped) / + Teuchos::as(numGlobalTotal) + << "%)"; + GetOStream(Statistics1) << std::endl; + } - template - void CoalesceDropFactory::BlockDiagonalizeGraph(const RCP & inputGraph, const RCP & ghostedBlockNumber, RCP & outputGraph, RCP & importer) const { - - TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null."); - const ParameterList & pL = GetParameterList(); - - const bool localizeColoringGraph = pL.get("aggregation: coloring: localize color graph"); - - GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)"; - if (localizeColoringGraph) - GetOStream(Statistics1) << ", with localization" < row_block_number = ghostedBlockNumber->getData(0); - Teuchos::ArrayRCP col_block_number = ghostedBlockNumber->getData(0); - - // allocate space for the local graph - ArrayRCP rows_mat; - ArrayRCP rows_graph,columns; - - rows_graph.resize(inputGraph->GetNodeNumVertices()+1); - columns.resize(inputGraph->GetNodeNumEdges()); - - LO realnnz = 0; - GO numDropped = 0, numTotal = 0; - const LO numRows = Teuchos::as(inputGraph->GetDomainMap()->getLocalNumElements()); - if (localizeColoringGraph) { - - for (LO row = 0; row < numRows; ++row) { - LO row_block = row_block_number[row]; - ArrayView indices = inputGraph->getNeighborVertices(row); - - LO rownnz = 0; - for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { - LO col = indices[colID]; - LO col_block = col_block_number[col]; - - if((row_block == col_block) && (col < numRows)) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows_graph[row+1] = realnnz; - } - } else { - // ghosting of boundary node map - Teuchos::ArrayRCP boundaryNodes = inputGraph->GetBoundaryNodeMap(); - auto boundaryNodesVector = Xpetra::VectorFactory::Build(inputGraph->GetDomainMap()); - for (size_t i=0; iGetNodeNumVertices(); i++) - boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i]; - // Xpetra::IO::Write("boundary",*boundaryNodesVector); - auto boundaryColumnVector = Xpetra::VectorFactory::Build(inputGraph->GetImportMap()); - boundaryColumnVector->doImport(*boundaryNodesVector,*importer, Xpetra::INSERT); - auto boundaryColumn = boundaryColumnVector->getData(0); - - for (LO row = 0; row < numRows; ++row) { - LO row_block = row_block_number[row]; - ArrayView indices = inputGraph->getNeighborVertices(row); - - LO rownnz = 0; - for (LO colID = 0; colID < Teuchos::as(indices.size()); colID++) { - LO col = indices[colID]; - LO col_block = col_block_number[col]; - - if((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) { - columns[realnnz++] = col; - rownnz++; - } else - numDropped++; - } - rows_graph[row+1] = realnnz; - } - } - - columns.resize(realnnz); - numTotal = inputGraph->GetNodeNumEdges(); - - if (GetVerbLevel() & Statistics1) { - RCP > comm = inputGraph->GetDomainMap()->getComm(); - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal; - if (numGlobalTotal != 0) - GetOStream(Statistics1) << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)"; - GetOStream(Statistics1) << std::endl; - } - - if (localizeColoringGraph) { - outputGraph = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A")); - outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); - } else { - TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra); + if (localizeColoringGraph) { + outputGraph = rcp(new LWGraph( + rows_graph, columns, inputGraph->GetDomainMap(), + inputGraph->GetImportMap(), "block-diagonalized graph of A")); + outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); + } else { + TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra); #ifdef HAVE_XPETRA_TPETRA - auto outputGraph2 = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A")); - - auto tpGraph = Xpetra::toTpetra(rcp_const_cast(outputGraph2->GetCrsGraph())); - auto sym = rcp(new Tpetra::CrsGraphTransposer(tpGraph)); - auto tpGraphSym = sym->symmetrize(); - - auto colIndsSym = // FIXME persistingView is temporary; better fix would be change to LWGraph constructor - Kokkos::Compat::persistingView(tpGraphSym->getLocalIndicesHost()); - - auto rowsSym = tpGraphSym->getLocalRowPtrsHost(); - ArrayRCP rows_graphSym; - rows_graphSym.resize(rowsSym.size()); - for (size_t row = 0; row < rowsSym.size(); row++) - rows_graphSym[row] = rowsSym[row]; - outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A")); - outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); + auto outputGraph2 = rcp(new LWGraph( + rows_graph, columns, inputGraph->GetDomainMap(), + inputGraph->GetImportMap(), "block-diagonalized graph of A")); + + auto tpGraph = Xpetra::toTpetra( + rcp_const_cast(outputGraph2->GetCrsGraph())); + auto sym = + rcp(new Tpetra::CrsGraphTransposer( + tpGraph)); + auto tpGraphSym = sym->symmetrize(); + + auto colIndsSym = // FIXME persistingView is temporary; better fix would be + // change to LWGraph constructor + Kokkos::Compat::persistingView(tpGraphSym->getLocalIndicesHost()); + + auto rowsSym = tpGraphSym->getLocalRowPtrsHost(); + ArrayRCP rows_graphSym; + rows_graphSym.resize(rowsSym.size()); + for (size_t row = 0; row < rowsSym.size(); row++) + rows_graphSym[row] = rowsSym[row]; + outputGraph = + rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), + Xpetra::toXpetra(tpGraphSym->getColMap()), + "block-diagonalized graph of A")); + outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap()); #endif - } - - } - - + } +} -} //namespace MueLu +} // namespace MueLu #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp index 45d2601b230d..41ca7e4a3604 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp @@ -55,123 +55,141 @@ #include "MueLu_CoalesceDropFactory_kokkos_fwd.hpp" #include "MueLu_AmalgamationInfo_fwd.hpp" -#include "MueLu_Level_fwd.hpp" #include "MueLu_LWGraph_kokkos_fwd.hpp" +#include "MueLu_Level_fwd.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_Utilities_fwd.hpp" namespace MueLu { - /*! - @class CoalesceDropFactory_kokkos - @brief Factory for creating a graph based on a given matrix. - - Factory for creating graphs from matrices with entries selectively dropped. - This factory combines the functionality of CoalesceDropFactory and FilteredAFactory from the non-Kokkos - code path. - - For an in-depth discussion, see https://github.com/trilinos/Trilinos/issues/1676. - - ## Code paths ## - - Both the classic dropping strategy as well as a coordinate-based distance - laplacian method is implemented. For performance reasons there are four - distinctive code paths for the classical method: - - - one DOF per node without dropping (i.e. "aggregation: drop tol" = 0.0) - - one DOF per node with dropping (i.e. "aggregation: drop tol" > 0.0) - - DOFs per node > 1 withouth dropping - - DOFs per node > 1 with dropping - - Additionally there is a code path for the distance-laplacian mode. - - ## Input/output of CoalesceDropFactory_kokkos ## - - ### User parameters of CoalesceDropFactory_kokkos ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the operator A - UnAmalgamationInfo | Factory | null | | * | * | Generating factory of type AmalgamationFactory which generates the variable 'UnAmalgamationInfo'. Do not change the default unless you know what you are doing. - Coordinates | Factory | null | | * | (*) | Generating factory for variable 'Coordinates'. The coordinates are only needed if "distance laplacian" is chosen for the parameter "aggregation: drop scheme" - "aggregation: drop scheme" | std::string | "classical" | * | * | | Coalescing algorithm. You can choose either "classical" (=default) or "distance laplacian" - "aggregation: drop tol" | double | 0.0 | * | * | | Threshold parameter for dropping small entries - "aggregation: Dirichlet threshold" | double | 0.0 | * | * | | Threshold for determining whether entries are zero during Dirichlet row detection - "lightweight wrap" | bool | true | | * | | hidden switch between fast implementation based on MueLu::LWGraph and a failsafe slower implementation based on Xpetra::Graph (for comparison). The user should not change the default value (=true) - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see CoalesceDropFactory_kokkos::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see CoalesceDropFactory_kokkos::DeclareInput). - - ### Variables provided by UncoupledAggregationFactory ### - - After CoalesceDropFactory_kokkos::Build the following data is available (if requested) - - Parameter | generated by | description - ----------|--------------|------------ - Graph | CoalesceDropFactory_kokkos | Graph of matrix A - DofsPerNode | CoalesceDropFactory_kokkos | number of DOFs per node. Note, that we assume a constant number of DOFs per node for all nodes associated with the operator A. - - ## Amalgamation process ## - - The CoalesceDropFactory_kokkos is internally using the AmalgamationFactory - for amalgamating the dof-based maps to node-based maps. The - AmalgamationFactory creates the "UnAmalgamationInfo" container which - basically stores all the necessary information for translating dof based - data to node based data and vice versa. The container is used, since this - way the amalgamation is only done once and later reused by other factories. - - Of course, often one does not need the information from the - "UnAmalgamationInfo" container since the same information could be - extracted of the "Graph" or the map from the "Coordinates" vector. - However, there are also some situations (e.g. when doing rebalancing based - on HyperGraph partitioning without coordinate information) where one has - not access to a "Graph" or "Coordinates" variable. - */ - template - class CoalesceDropFactory_kokkos; - - template - class CoalesceDropFactory_kokkos > : public SingleLevelFactoryBase { - public: - using local_ordinal_type = LocalOrdinal; - using global_ordinal_type = GlobalOrdinal; - using execution_space = typename DeviceType::execution_space; - using range_type = Kokkos::RangePolicy; - using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; - - private: - // For compatibility - using Node = node_type; +/*! + @class CoalesceDropFactory_kokkos + @brief Factory for creating a graph based on a given matrix. + + Factory for creating graphs from matrices with entries selectively dropped. + This factory combines the functionality of CoalesceDropFactory and + FilteredAFactory from the non-Kokkos code path. + + For an in-depth discussion, see + https://github.com/trilinos/Trilinos/issues/1676. + + ## Code paths ## + + Both the classic dropping strategy as well as a coordinate-based distance + laplacian method is implemented. For performance reasons there are four + distinctive code paths for the classical method: + + - one DOF per node without dropping (i.e. "aggregation: drop tol" = 0.0) + - one DOF per node with dropping (i.e. "aggregation: drop tol" > 0.0) + - DOFs per node > 1 withouth dropping + - DOFs per node > 1 with dropping + + Additionally there is a code path for the distance-laplacian mode. + + ## Input/output of CoalesceDropFactory_kokkos ## + + ### User parameters of CoalesceDropFactory_kokkos ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the + operator A UnAmalgamationInfo | Factory | null | | * | * | Generating + factory of type AmalgamationFactory which generates the variable + 'UnAmalgamationInfo'. Do not change the default unless you know what you are + doing. Coordinates | Factory | null | | * | (*) | Generating + factory for variable 'Coordinates'. The coordinates are only needed if + "distance laplacian" is chosen for the parameter "aggregation: drop scheme" + "aggregation: drop scheme" | std::string | "classical" | * | * | | + Coalescing algorithm. You can choose either "classical" (=default) or + "distance laplacian" "aggregation: drop tol" | double | 0.0 | * | * | | + Threshold parameter for dropping small entries "aggregation: Dirichlet + threshold" | double | 0.0 | * | * | | Threshold for determining whether + entries are zero during Dirichlet row detection "lightweight wrap" | bool | + true | | * | | hidden switch between fast implementation based on + MueLu::LWGraph and a failsafe slower implementation based on Xpetra::Graph + (for comparison). The user should not change the default value (=true) + + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + CoalesceDropFactory_kokkos::GetValidParameters).
The * in the @c requested + column states that the data is requested as input with all dependencies (see + CoalesceDropFactory_kokkos::DeclareInput). + + ### Variables provided by UncoupledAggregationFactory ### + + After CoalesceDropFactory_kokkos::Build the following data is available (if + requested) + + Parameter | generated by | description + ----------|--------------|------------ + Graph | CoalesceDropFactory_kokkos | Graph of matrix A + DofsPerNode | CoalesceDropFactory_kokkos | number of DOFs per node. Note, that + we assume a constant number of DOFs per node for all nodes associated with the + operator A. + + ## Amalgamation process ## + + The CoalesceDropFactory_kokkos is internally using the AmalgamationFactory + for amalgamating the dof-based maps to node-based maps. The + AmalgamationFactory creates the "UnAmalgamationInfo" container which + basically stores all the necessary information for translating dof based + data to node based data and vice versa. The container is used, since this + way the amalgamation is only done once and later reused by other factories. + + Of course, often one does not need the information from the + "UnAmalgamationInfo" container since the same information could be + extracted of the "Graph" or the map from the "Coordinates" vector. + However, there are also some situations (e.g. when doing rebalancing based + on HyperGraph partitioning without coordinate information) where one has + not access to a "Graph" or "Coordinates" variable. +*/ +template +class CoalesceDropFactory_kokkos; + +template +class CoalesceDropFactory_kokkos< + Scalar, LocalOrdinal, GlobalOrdinal, + Tpetra::KokkosCompat::KokkosDeviceWrapperNode> + : public SingleLevelFactoryBase { +public: + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using execution_space = typename DeviceType::execution_space; + using range_type = Kokkos::RangePolicy; + using node_type = Tpetra::KokkosCompat::KokkosDeviceWrapperNode; + +private: + // For compatibility + using Node = node_type; #undef MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! Constructor - CoalesceDropFactory_kokkos() { } +public: + //! @name Constructors/Destructors. + //@{ - //! Destructor - virtual ~CoalesceDropFactory_kokkos() { } + //! Constructor + CoalesceDropFactory_kokkos() {} - RCP GetValidParameterList() const; + //! Destructor + virtual ~CoalesceDropFactory_kokkos() {} - //@} + RCP GetValidParameterList() const; - //! Input - //@{ + //@} - void DeclareInput(Level& currentLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level ¤tLevel) const; - void Build(Level& currentLevel) const; + //@} - }; + void Build(Level ¤tLevel) const; +}; -} //namespace MueLu +} // namespace MueLu #define MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp index af490b6a5c24..2e26c6fdaf65 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp @@ -46,8 +46,8 @@ #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP -#include #include +#include #include "Xpetra_Matrix.hpp" @@ -55,888 +55,1011 @@ #include "MueLu_AmalgamationInfo.hpp" #include "MueLu_Exceptions.hpp" -#include "MueLu_Level.hpp" #include "MueLu_LWGraph_kokkos.hpp" +#include "MueLu_Level.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" #include "MueLu_Utilities.hpp" namespace MueLu { +namespace CoalesceDrop_Kokkos_Details { // anonymous - namespace CoalesceDrop_Kokkos_Details { // anonymous +template class ScanFunctor { +public: + ScanFunctor(RowType rows_) : rows(rows_) {} - template - class ScanFunctor { - public: - ScanFunctor(RowType rows_) : rows(rows_) { } + KOKKOS_INLINE_FUNCTION + void operator()(const LO i, LO &upd, const bool &final) const { + upd += rows(i); + if (final) + rows(i) = upd; + } - KOKKOS_INLINE_FUNCTION - void operator()(const LO i, LO& upd, const bool& final) const { - upd += rows(i); - if (final) - rows(i) = upd; - } +private: + RowType rows; +}; + +template class ClassicalDropFunctor { +private: + typedef typename GhostedViewType::value_type SC; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::magnitudeType magnitudeType; + + GhostedViewType + diag; // corresponds to overlapped diagonal multivector (2D View) + magnitudeType eps; + +public: + ClassicalDropFunctor(GhostedViewType ghostedDiag, magnitudeType threshold) + : diag(ghostedDiag), eps(threshold) {} + + // Return true if we drop, false if not + KOKKOS_FORCEINLINE_FUNCTION + bool operator()(LO row, LO col, SC val) const { + // We avoid square root by using squared values + auto aiiajj = ATS::magnitude(diag(row, 0)) * + ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + + return (aij2 <= eps * eps * aiiajj); + } +}; + +template class DistanceFunctor { +private: + typedef typename CoordsType::value_type SC; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::magnitudeType magnitudeType; + +public: + typedef SC value_type; + +public: + DistanceFunctor(CoordsType coords_) : coords(coords_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType distance2(LO row, LO col) const { + SC d = ATS::zero(), s; + for (size_t j = 0; j < coords.extent(1); j++) { + s = coords(row, j) - coords(col, j); + d += s * s; + } + return ATS::magnitude(d); + } - private: - RowType rows; - }; - - template - class ClassicalDropFunctor { - private: - typedef typename GhostedViewType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) - magnitudeType eps; - - public: - ClassicalDropFunctor(GhostedViewType ghostedDiag, magnitudeType threshold) : - diag(ghostedDiag), - eps(threshold) - { } - - // Return true if we drop, false if not - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(LO row, LO col, SC val) const { - // We avoid square root by using squared values - auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| - auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 - - return (aij2 <= eps*eps * aiiajj); - } - }; - - template - class DistanceFunctor { - private: - typedef typename CoordsType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - typedef SC value_type; - - public: - DistanceFunctor(CoordsType coords_) : coords(coords_) { } - - KOKKOS_INLINE_FUNCTION - magnitudeType distance2(LO row, LO col) const { - SC d = ATS::zero(), s; - for (size_t j = 0; j < coords.extent(1); j++) { - s = coords(row,j) - coords(col,j); - d += s*s; - } - return ATS::magnitude(d); - } - private: - CoordsType coords; - }; - - template - class DistanceLaplacianDropFunctor { - private: - typedef typename GhostedViewType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - DistanceLaplacianDropFunctor(GhostedViewType ghostedLaplDiag, DistanceFunctor distFunctor_, magnitudeType threshold) : - diag(ghostedLaplDiag), - distFunctor(distFunctor_), - eps(threshold) - { } - - // Return true if we drop, false if not - KOKKOS_INLINE_FUNCTION - bool operator()(LO row, LO col, SC /* val */) const { - // We avoid square root by using squared values - - // We ignore incoming value of val as we operate on an auxiliary - // distance Laplacian matrix - typedef typename DistanceFunctor::value_type dSC; - typedef Kokkos::ArithTraits dATS; - auto fval = dATS::one() / distFunctor.distance2(row, col); - - auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| - auto aij2 = ATS::magnitude(fval) * ATS::magnitude(fval); // |a_ij|^2 - - return (aij2 <= eps*eps * aiiajj); - } +private: + CoordsType coords; +}; + +template +class DistanceLaplacianDropFunctor { +private: + typedef typename GhostedViewType::value_type SC; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::magnitudeType magnitudeType; + +public: + DistanceLaplacianDropFunctor(GhostedViewType ghostedLaplDiag, + DistanceFunctor distFunctor_, + magnitudeType threshold) + : diag(ghostedLaplDiag), distFunctor(distFunctor_), eps(threshold) {} + + // Return true if we drop, false if not + KOKKOS_INLINE_FUNCTION + bool operator()(LO row, LO col, SC /* val */) const { + // We avoid square root by using squared values + + // We ignore incoming value of val as we operate on an auxiliary + // distance Laplacian matrix + typedef typename DistanceFunctor::value_type dSC; + typedef Kokkos::ArithTraits dATS; + auto fval = dATS::one() / distFunctor.distance2(row, col); + + auto aiiajj = ATS::magnitude(diag(row, 0)) * + ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(fval) * ATS::magnitude(fval); // |a_ij|^2 + + return (aij2 <= eps * eps * aiiajj); + } - private: - GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) - DistanceFunctor distFunctor; - magnitudeType eps; - }; - - template - class ScalarFunctor { - private: - typedef typename MatrixType::StaticCrsGraphType graph_type; - typedef typename graph_type::row_map_type rows_type; - typedef typename graph_type::entries_type cols_type; - typedef typename MatrixType::values_type vals_type; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::val_type impl_Scalar; - typedef Kokkos::ArithTraits impl_ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - ScalarFunctor(MatrixType A_, BndViewType bndNodes_, DropFunctorType dropFunctor_, - typename rows_type::non_const_type rows_, - typename cols_type::non_const_type colsAux_, - typename vals_type::non_const_type valsAux_, - bool reuseGraph_, bool lumping_, SC /* threshold_ */, - bool aggregationMayCreateDirichlet_ ) : - A(A_), - bndNodes(bndNodes_), - dropFunctor(dropFunctor_), - rows(rows_), - colsAux(colsAux_), - valsAux(valsAux_), - reuseGraph(reuseGraph_), - lumping(lumping_), - aggregationMayCreateDirichlet(aggregationMayCreateDirichlet_) - { - rowsA = A.graph.row_map; - zero = impl_ATS::zero(); - } +private: + GhostedViewType + diag; // corresponds to overlapped diagonal multivector (2D View) + DistanceFunctor distFunctor; + magnitudeType eps; +}; + +template +class ScalarFunctor { +private: + typedef typename MatrixType::StaticCrsGraphType graph_type; + typedef typename graph_type::row_map_type rows_type; + typedef typename graph_type::entries_type cols_type; + typedef typename MatrixType::values_type vals_type; + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::val_type impl_Scalar; + typedef Kokkos::ArithTraits impl_ATS; + typedef typename ATS::magnitudeType magnitudeType; + +public: + ScalarFunctor(MatrixType A_, BndViewType bndNodes_, + DropFunctorType dropFunctor_, + typename rows_type::non_const_type rows_, + typename cols_type::non_const_type colsAux_, + typename vals_type::non_const_type valsAux_, bool reuseGraph_, + bool lumping_, SC /* threshold_ */, + bool aggregationMayCreateDirichlet_) + : A(A_), bndNodes(bndNodes_), dropFunctor(dropFunctor_), rows(rows_), + colsAux(colsAux_), valsAux(valsAux_), reuseGraph(reuseGraph_), + lumping(lumping_), + aggregationMayCreateDirichlet(aggregationMayCreateDirichlet_) { + rowsA = A.graph.row_map; + zero = impl_ATS::zero(); + } - KOKKOS_INLINE_FUNCTION - void operator()(const LO row, LO& nnz) const { - auto rowView = A.rowConst(row); - auto length = rowView.length; - auto offset = rowsA(row); - - impl_Scalar diag = zero; - LO rownnz = 0; - LO diagID = -1; - for (decltype(length) colID = 0; colID < length; colID++) { - LO col = rowView.colidx(colID); - impl_Scalar val = rowView.value (colID); - - if (!dropFunctor(row, col, rowView.value(colID)) || row == col) { - colsAux(offset+rownnz) = col; - - LO valID = (reuseGraph ? colID : rownnz); - valsAux(offset+valID) = val; - if (row == col) - diagID = valID; - - rownnz++; - - } else { - // Rewrite with zeros (needed for reuseGraph) - valsAux(offset+colID) = zero; - diag += val; - } - } - // How to assert on the device? - // assert(diagIndex != -1); - rows(row+1) = rownnz; - // if (lumping && diagID != -1) { - if (lumping) { - // Add diag to the diagonal - - // NOTE_KOKKOS: valsAux was allocated with - // ViewAllocateWithoutInitializing. This is not a problem here - // because we explicitly set this value above. - valsAux(offset+diagID) += diag; - } + KOKKOS_INLINE_FUNCTION + void operator()(const LO row, LO &nnz) const { + auto rowView = A.rowConst(row); + auto length = rowView.length; + auto offset = rowsA(row); - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - bndNodes(row) = (rownnz == 1 && aggregationMayCreateDirichlet); + impl_Scalar diag = zero; + LO rownnz = 0; + LO diagID = -1; + for (decltype(length) colID = 0; colID < length; colID++) { + LO col = rowView.colidx(colID); + impl_Scalar val = rowView.value(colID); - nnz += rownnz; - } + if (!dropFunctor(row, col, rowView.value(colID)) || row == col) { + colsAux(offset + rownnz) = col; - private: - MatrixType A; - BndViewType bndNodes; - DropFunctorType dropFunctor; - - rows_type rowsA; - - typename rows_type::non_const_type rows; - typename cols_type::non_const_type colsAux; - typename vals_type::non_const_type valsAux; - - bool reuseGraph; - bool lumping; - bool aggregationMayCreateDirichlet; - impl_Scalar zero; - }; - - // collect number nonzeros of blkSize rows in nnz_(row+1) - template - class Stage1aVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - - public: - Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) : - kokkosMatrix(kokkosMatrix_), - nnz(nnz_), - blkSize(blkSize_) { } - - KOKKOS_INLINE_FUNCTION - void operator()(const LO row, LO& totalnnz) const { - - // the following code is more or less what MergeRows is doing - // count nonzero entries in all dof rows associated with node row - LO nodeRowMaxNonZeros = 0; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(row * blkSize + j); - nodeRowMaxNonZeros += rowView.length; - } - nnz(row + 1) = nodeRowMaxNonZeros; - totalnnz += nodeRowMaxNonZeros; - } + LO valID = (reuseGraph ? colID : rownnz); + valsAux(offset + valID) = val; + if (row == col) + diagID = valID; + rownnz++; - private: - MatrixType kokkosMatrix; //< local matrix part - NnzType nnz; //< View containing number of nonzeros for current row - blkSizeType blkSize; //< block size (or partial block size in strided maps) - }; - - - // build the dof-based column map containing the local dof ids belonging to blkSize rows in matrix - // sort column ids - // translate them into (unique) node ids - // count the node column ids per node row - template - class Stage1bcVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - - private: - MatrixType kokkosMatrix; //< local matrix part - NnzType coldofnnz; //< view containing start and stop indices for subviews - blkSizeType blkSize; //< block size (or partial block size in strided maps) - ColDofType coldofs; //< view containing the local dof ids associated with columns for the blkSize rows (not sorted) - Dof2NodeTranslationType dof2node; //< view containing the local node id associated with the local dof id - NnzType colnodennz; //< view containing number of column nodes for each node row - BdryNodeTypeConst dirichletdof; //< view containing with num dofs booleans. True if dof (not necessarily entire node) is dirichlet boundardy dof. - BdryNodeType bdrynode; //< view containing with numNodes booleans. True if node is (full) dirichlet boundardy node. - boolType usegreedydirichlet; //< boolean for use of greedy Dirichlet (if any dof is Dirichlet, entire node is dirichlet) default false (need all dofs in node to be Dirichlet for node to be Dirichlet) - - public: - Stage1bcVectorFunctor(MatrixType kokkosMatrix_, - NnzType coldofnnz_, - blkSizeType blkSize_, - ColDofType coldofs_, - Dof2NodeTranslationType dof2node_, - NnzType colnodennz_, - BdryNodeTypeConst dirichletdof_, - BdryNodeType bdrynode_, - boolType usegreedydirichlet_) : - kokkosMatrix(kokkosMatrix_), - coldofnnz(coldofnnz_), - blkSize(blkSize_), - coldofs(coldofs_), - dof2node(dof2node_), - colnodennz(colnodennz_), - dirichletdof(dirichletdof_), - bdrynode(bdrynode_), - usegreedydirichlet(usegreedydirichlet_) { + } else { + // Rewrite with zeros (needed for reuseGraph) + valsAux(offset + colID) = zero; + diag += val; } + } + // How to assert on the device? + // assert(diagIndex != -1); + rows(row + 1) = rownnz; + // if (lumping && diagID != -1) { + if (lumping) { + // Add diag to the diagonal + + // NOTE_KOKKOS: valsAux was allocated with + // ViewAllocateWithoutInitializing. This is not a problem here + // because we explicitly set this value above. + valsAux(offset + diagID) += diag; + } - KOKKOS_INLINE_FUNCTION - void operator()(const LO rowNode, LO& nnz) const { + // If the only element remaining after filtering is diagonal, mark node as + // boundary + // FIXME: this should really be replaced by the following + // if (indices.size() == 1 && indices[0] == row) + // boundaryNodes[row] = true; + // We do not do it this way now because there is no framework for + // distinguishing isolated and boundary nodes in the aggregation algorithms + bndNodes(row) = (rownnz == 1 && aggregationMayCreateDirichlet); - LO pos = coldofnnz(rowNode); - if( usegreedydirichlet ){ - bdrynode(rowNode) = false; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(rowNode * blkSize + j); - auto numIndices = rowView.length; - - // if any dof in the node is Dirichlet - if( dirichletdof(rowNode * blkSize + j) ) - bdrynode(rowNode) = true; - - for (decltype(numIndices) k = 0; k < numIndices; k++) { - auto dofID = rowView.colidx(k); - coldofs(pos) = dofID; - pos ++; - } - } - }else{ + nnz += rownnz; + } + +private: + MatrixType A; + BndViewType bndNodes; + DropFunctorType dropFunctor; + + rows_type rowsA; + + typename rows_type::non_const_type rows; + typename cols_type::non_const_type colsAux; + typename vals_type::non_const_type valsAux; + + bool reuseGraph; + bool lumping; + bool aggregationMayCreateDirichlet; + impl_Scalar zero; +}; + +// collect number nonzeros of blkSize rows in nnz_(row+1) +template +class Stage1aVectorFunctor { +private: + typedef typename MatrixType::ordinal_type LO; + +public: + Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, + blkSizeType blkSize_) + : kokkosMatrix(kokkosMatrix_), nnz(nnz_), blkSize(blkSize_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const LO row, LO &totalnnz) const { + + // the following code is more or less what MergeRows is doing + // count nonzero entries in all dof rows associated with node row + LO nodeRowMaxNonZeros = 0; + for (LO j = 0; j < blkSize; j++) { + auto rowView = kokkosMatrix.row(row * blkSize + j); + nodeRowMaxNonZeros += rowView.length; + } + nnz(row + 1) = nodeRowMaxNonZeros; + totalnnz += nodeRowMaxNonZeros; + } + +private: + MatrixType kokkosMatrix; //< local matrix part + NnzType nnz; //< View containing number of nonzeros for current row + blkSizeType blkSize; //< block size (or partial block size in strided maps) +}; + +// build the dof-based column map containing the local dof ids belonging to +// blkSize rows in matrix sort column ids translate them into (unique) node ids +// count the node column ids per node row +template +class Stage1bcVectorFunctor { +private: + typedef typename MatrixType::ordinal_type LO; + +private: + MatrixType kokkosMatrix; //< local matrix part + NnzType coldofnnz; //< view containing start and stop indices for subviews + blkSizeType blkSize; //< block size (or partial block size in strided maps) + ColDofType coldofs; //< view containing the local dof ids associated with + // columns for the blkSize rows (not sorted) + Dof2NodeTranslationType dof2node; //< view containing the local node id + // associated with the local dof id + NnzType + colnodennz; //< view containing number of column nodes for each node row + BdryNodeTypeConst + dirichletdof; //< view containing with num dofs booleans. True if dof (not + // necessarily entire node) is dirichlet boundardy dof. + BdryNodeType bdrynode; //< view containing with numNodes booleans. True if + // node is (full) dirichlet boundardy node. + boolType usegreedydirichlet; //< boolean for use of greedy Dirichlet (if any + // dof is Dirichlet, entire node is dirichlet) + // default false (need all dofs in node to be + // Dirichlet for node to be Dirichlet) + +public: + Stage1bcVectorFunctor(MatrixType kokkosMatrix_, NnzType coldofnnz_, + blkSizeType blkSize_, ColDofType coldofs_, + Dof2NodeTranslationType dof2node_, NnzType colnodennz_, + BdryNodeTypeConst dirichletdof_, BdryNodeType bdrynode_, + boolType usegreedydirichlet_) + : kokkosMatrix(kokkosMatrix_), coldofnnz(coldofnnz_), blkSize(blkSize_), + coldofs(coldofs_), dof2node(dof2node_), colnodennz(colnodennz_), + dirichletdof(dirichletdof_), bdrynode(bdrynode_), + usegreedydirichlet(usegreedydirichlet_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const LO rowNode, LO &nnz) const { + + LO pos = coldofnnz(rowNode); + if (usegreedydirichlet) { + bdrynode(rowNode) = false; + for (LO j = 0; j < blkSize; j++) { + auto rowView = kokkosMatrix.row(rowNode * blkSize + j); + auto numIndices = rowView.length; + + // if any dof in the node is Dirichlet + if (dirichletdof(rowNode * blkSize + j)) bdrynode(rowNode) = true; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(rowNode * blkSize + j); - auto numIndices = rowView.length; - - // if any dof in the node is not Dirichlet - if( dirichletdof(rowNode * blkSize + j) == false ) - bdrynode(rowNode) = false; - - for (decltype(numIndices) k = 0; k < numIndices; k++) { - auto dofID = rowView.colidx(k); - coldofs(pos) = dofID; - pos ++; - } - } - } - // sort coldofs - LO begin = coldofnnz(rowNode); - LO end = coldofnnz(rowNode+1); - LO n = end - begin; - for (LO i = 0; i < (n-1); i++) { - for (LO j = 0; j < (n-i-1); j++) { - if (coldofs(j+begin) > coldofs(j+begin+1)) { - LO temp = coldofs(j+begin); - coldofs(j+begin) = coldofs(j+begin+1); - coldofs(j+begin+1) = temp; - } - } - } - size_t cnt = 0; - LO lastNodeID = -1; - for (LO i = 0; i < n; i++) { - LO dofID = coldofs(begin + i); - LO nodeID = dof2node(dofID); - if(nodeID != lastNodeID) { - lastNodeID = nodeID; - coldofs(begin+cnt) = nodeID; - cnt++; - } + for (decltype(numIndices) k = 0; k < numIndices; k++) { + auto dofID = rowView.colidx(k); + coldofs(pos) = dofID; + pos++; } - colnodennz(rowNode+1) = cnt; - nnz += cnt; } + } else { + bdrynode(rowNode) = true; + for (LO j = 0; j < blkSize; j++) { + auto rowView = kokkosMatrix.row(rowNode * blkSize + j); + auto numIndices = rowView.length; - }; - - // fill column node id view - template - class Stage1dVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - typedef typename MatrixType::value_type SC; - - private: - ColDofType coldofs; //< view containing mixed node and dof indices (only input) - ColDofNnzType coldofnnz; //< view containing the start and stop indices for subviews (dofs) - ColNodeType colnodes; //< view containing the local node ids associated with columns - ColNodeNnzType colnodennz; //< view containing start and stop indices for subviews - - public: - Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) : - coldofs(coldofs_), - coldofnnz(coldofnnz_), - colnodes(colnodes_), - colnodennz(colnodennz_) { - } + // if any dof in the node is not Dirichlet + if (dirichletdof(rowNode * blkSize + j) == false) + bdrynode(rowNode) = false; - KOKKOS_INLINE_FUNCTION - void operator()(const LO rowNode) const { - auto dofbegin = coldofnnz(rowNode); - auto nodebegin = colnodennz(rowNode); - auto nodeend = colnodennz(rowNode+1); - auto n = nodeend - nodebegin; + for (decltype(numIndices) k = 0; k < numIndices; k++) { + auto dofID = rowView.colidx(k); + coldofs(pos) = dofID; + pos++; + } + } + } - for (decltype(nodebegin) i = 0; i < n; i++) { - colnodes(nodebegin + i) = coldofs(dofbegin + i); + // sort coldofs + LO begin = coldofnnz(rowNode); + LO end = coldofnnz(rowNode + 1); + LO n = end - begin; + for (LO i = 0; i < (n - 1); i++) { + for (LO j = 0; j < (n - i - 1); j++) { + if (coldofs(j + begin) > coldofs(j + begin + 1)) { + LO temp = coldofs(j + begin); + coldofs(j + begin) = coldofs(j + begin + 1); + coldofs(j + begin + 1) = temp; } } - }; + } + size_t cnt = 0; + LO lastNodeID = -1; + for (LO i = 0; i < n; i++) { + LO dofID = coldofs(begin + i); + LO nodeID = dof2node(dofID); + if (nodeID != lastNodeID) { + lastNodeID = nodeID; + coldofs(begin + cnt) = nodeID; + cnt++; + } + } + colnodennz(rowNode + 1) = cnt; + nnz += cnt; + } +}; + +// fill column node id view +template +class Stage1dVectorFunctor { +private: + typedef typename MatrixType::ordinal_type LO; + typedef typename MatrixType::value_type SC; + +private: + ColDofType + coldofs; //< view containing mixed node and dof indices (only input) + ColDofNnzType coldofnnz; //< view containing the start and stop indices for + // subviews (dofs) + ColNodeType + colnodes; //< view containing the local node ids associated with columns + ColNodeNnzType + colnodennz; //< view containing start and stop indices for subviews + +public: + Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, + ColNodeType colnodes_, ColNodeNnzType colnodennz_) + : coldofs(coldofs_), coldofnnz(coldofnnz_), colnodes(colnodes_), + colnodennz(colnodennz_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const LO rowNode) const { + auto dofbegin = coldofnnz(rowNode); + auto nodebegin = colnodennz(rowNode); + auto nodeend = colnodennz(rowNode + 1); + auto n = nodeend - nodebegin; + + for (decltype(nodebegin) i = 0; i < n; i++) { + colnodes(nodebegin + i) = coldofs(dofbegin + i); + } + } +}; + +} // namespace CoalesceDrop_Kokkos_Details + +template +RCP +CoalesceDropFactory_kokkos>::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: drop tol"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: drop scheme"); + SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); + SET_VALID_ENTRY("aggregation: greedy Dirichlet"); + SET_VALID_ENTRY("filtered matrix: use lumping"); + SET_VALID_ENTRY("filtered matrix: reuse graph"); + SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); + { + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + validParamList->getEntry("aggregation: drop scheme") + .setValidator(rcp(new validatorType( + Teuchos::tuple("classical", "distance laplacian"), + "aggregation: drop scheme"))); + } +#undef SET_VALID_ENTRY + validParamList->set>( + "A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>( + "UnAmalgamationInfo", Teuchos::null, + "Generating factory for UnAmalgamationInfo"); + validParamList->set>( + "Coordinates", Teuchos::null, "Generating factory for Coordinates"); + + return validParamList; +} +template +void CoalesceDropFactory_kokkos< + Scalar, LocalOrdinal, GlobalOrdinal, + Tpetra::KokkosCompat::KokkosDeviceWrapperNode>:: + DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "UnAmalgamationInfo"); + + const ParameterList &pL = GetParameterList(); + if (pL.get("aggregation: drop scheme") == "distance laplacian") + Input(currentLevel, "Coordinates"); +} - } // namespace +template +void CoalesceDropFactory_kokkos>::Build(Level ¤tLevel) + const { + FactoryMonitor m(*this, "Build", currentLevel); - template - RCP CoalesceDropFactory_kokkos>::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType MT; + const MT zero = Teuchos::ScalarTraits::zero(); -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop tol"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: drop scheme"); - SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); - SET_VALID_ENTRY("aggregation: greedy Dirichlet"); - SET_VALID_ENTRY("filtered matrix: use lumping"); - SET_VALID_ENTRY("filtered matrix: reuse graph"); - SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); - { - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; - validParamList->getEntry("aggregation: drop scheme").setValidator( - rcp(new validatorType(Teuchos::tuple("classical", "distance laplacian"), "aggregation: drop scheme"))); - } -#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); + auto A = Get>(currentLevel, "A"); - return validParamList; - } + /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block + in the chosen storage scheme. blkSize is the number of storage blocks that + must kept together during the amalgamation process. - template - void CoalesceDropFactory_kokkos>::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "UnAmalgamationInfo"); + Both of these quantities may be different than numPDEs (from + GetFixedBlockSize()), but the following must always hold: - const ParameterList& pL = GetParameterList(); - if (pL.get("aggregation: drop scheme") == "distance laplacian") - Input(currentLevel, "Coordinates"); - } + numPDEs = blkSize * storageblocksize. - template - void CoalesceDropFactory_kokkos>:: - Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); + If numPDEs==1 + Matrix is point storage (classical CRS storage). storageblocksize=1 and + blkSize=1 No other values makes sense. - typedef Teuchos::ScalarTraits STS; - typedef typename STS::magnitudeType MT; - const MT zero = Teuchos::ScalarTraits::zero(); + If numPDEs>1 + If matrix uses point storage, then storageblocksize=1 and + blkSize=numPDEs. If matrix uses block storage, with block size of n, then + storageblocksize=n, and blkSize=numPDEs/n. Thus far, only + storageblocksize=numPDEs and blkSize=1 has been tested. + */ - auto A = Get< RCP >(currentLevel, "A"); + TEUCHOS_TEST_FOR_EXCEPTION( + A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, + Exceptions::RuntimeError, + "A->GetFixedBlockSize() needs to be a multiple of " + "A->GetStorageBlockSize()"); + LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); + auto amalInfo = + Get>(currentLevel, "UnAmalgamationInfo"); - /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme. - blkSize is the number of storage blocks that must kept together during the amalgamation process. + const ParameterList &pL = GetParameterList(); - Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold: + std::string algo = pL.get("aggregation: drop scheme"); - numPDEs = blkSize * storageblocksize. - - If numPDEs==1 - Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1 - No other values makes sense. + double threshold = pL.get("aggregation: drop tol"); + GetOStream(Runtime0) << "algorithm = \"" << algo + << "\": threshold = " << threshold + << ", blocksize = " << A->GetFixedBlockSize() + << std::endl; - If numPDEs>1 - If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs. - If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n. - Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested. - */ - - TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,Exceptions::RuntimeError,"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); - LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); + const typename STS::magnitudeType dirichletThreshold = STS::magnitude( + as(pL.get("aggregation: Dirichlet threshold"))); - auto amalInfo = Get< RCP >(currentLevel, "UnAmalgamationInfo"); + GO numDropped = 0, numTotal = 0; - const ParameterList& pL = GetParameterList(); + RCP graph; + LO dofsPerNode = -1; - std::string algo = pL.get("aggregation: drop scheme"); + typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type; + boundary_nodes_type boundaryNodes; - double threshold = pL.get("aggregation: drop tol"); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold - << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + RCP filteredA; + if (blkSize == 1 && threshold == zero) { + // Scalar problem without dropping - const typename STS::magnitudeType dirichletThreshold = - STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + // Detect and record rows that correspond to Dirichlet boundary conditions + boundaryNodes = + Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - GO numDropped = 0, numTotal = 0; + // Trivial LWGraph construction + graph = + rcp(new LWGraph_kokkos(A->getCrsGraph()->getLocalGraphDevice(), + A->getRowMap(), A->getColMap(), "graph of A")); + graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); - RCP graph; - LO dofsPerNode = -1; + numTotal = A->getLocalNumEntries(); + dofsPerNode = 1; - typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type; - boundary_nodes_type boundaryNodes; + filteredA = A; - RCP filteredA; - if (blkSize == 1 && threshold == zero) { - // Scalar problem without dropping + } else if (blkSize == 1 && threshold != zero) { + // Scalar problem with dropping - // Detect and record rows that correspond to Dirichlet boundary conditions - boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); + typedef typename Matrix::local_matrix_type local_matrix_type; + typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; + typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type; + typedef typename kokkos_graph_type::entries_type::non_const_type cols_type; + typedef typename local_matrix_type::values_type::non_const_type vals_type; - // Trivial LWGraph construction - graph = rcp(new LWGraph_kokkos(A->getCrsGraph()->getLocalGraphDevice(), A->getRowMap(), A->getColMap(), "graph of A")); - graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); + LO numRows = A->getLocalNumRows(); + local_matrix_type kokkosMatrix = A->getLocalMatrixDevice(); + auto nnzA = kokkosMatrix.nnz(); + auto rowsA = kokkosMatrix.graph.row_map; + + typedef Kokkos::ArithTraits ATS; + typedef typename ATS::val_type impl_Scalar; + typedef Kokkos::ArithTraits impl_ATS; - numTotal = A->getLocalNumEntries(); - dofsPerNode = 1; + bool reuseGraph = pL.get("filtered matrix: reuse graph"); + bool lumping = pL.get("filtered matrix: use lumping"); + if (lumping) + GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; - filteredA = A; + const bool aggregationMayCreateDirichlet = + pL.get("aggregation: dropping may create Dirichlet"); - } else if (blkSize == 1 && threshold != zero) { - // Scalar problem with dropping + // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + setting a + // single value + rows_type rows("FA_rows", numRows + 1); + cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing("FA_aux_cols"), + nnzA); + vals_type valsAux; + if (reuseGraph) { + SubFactoryMonitor m2(*this, "CopyMatrix", currentLevel); - typedef typename Matrix::local_matrix_type local_matrix_type; - typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; - typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type; - typedef typename kokkos_graph_type::entries_type::non_const_type cols_type; - typedef typename local_matrix_type::values_type::non_const_type vals_type; + // Share graph with the original matrix + filteredA = MatrixFactory::Build(A->getCrsGraph()); - LO numRows = A->getLocalNumRows(); - local_matrix_type kokkosMatrix = A->getLocalMatrixDevice(); - auto nnzA = kokkosMatrix.nnz(); - auto rowsA = kokkosMatrix.graph.row_map; + // Do a no-op fill-complete + RCP fillCompleteParams(new ParameterList); + fillCompleteParams->set("No Nonlocal Changes", true); + filteredA->fillComplete(fillCompleteParams); + // No need to reuseFill, just modify in place + valsAux = filteredA->getLocalMatrixDevice().values; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::val_type impl_Scalar; - typedef Kokkos::ArithTraits impl_ATS; + } else { + // Need an extra array to compress + valsAux = vals_type( + Kokkos::ViewAllocateWithoutInitializing("FA_aux_vals"), nnzA); + } - bool reuseGraph = pL.get("filtered matrix: reuse graph"); - bool lumping = pL.get("filtered matrix: use lumping"); - if (lumping) - GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; + typename boundary_nodes_type::non_const_type bndNodes( + Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows); - const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + LO nnzFA = 0; + { + if (algo == "classical") { + // Construct overlapped matrix diagonal + RCP ghostedDiag; + { + kokkosMatrix = local_matrix_type(); + SubFactoryMonitor m2(*this, "Ghosted diag construction", + currentLevel); + ghostedDiag = Utilities::GetMatrixOverlappedDiagonal(*A); + kokkosMatrix = A->getLocalMatrixDevice(); + } - // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + setting a single value - rows_type rows ("FA_rows", numRows+1); - cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing("FA_aux_cols"), nnzA); - vals_type valsAux; - if (reuseGraph) { - SubFactoryMonitor m2(*this, "CopyMatrix", currentLevel); + // Filter out entries + { + SubFactoryMonitor m2(*this, "MainLoop", currentLevel); + + auto ghostedDiagView = + ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); + + CoalesceDrop_Kokkos_Details::ClassicalDropFunctor< + LO, decltype(ghostedDiagView)> + dropFunctor(ghostedDiagView, threshold); + CoalesceDrop_Kokkos_Details::ScalarFunctor< + typename ATS::val_type, LO, local_matrix_type, decltype(bndNodes), + decltype(dropFunctor)> + scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, + valsAux, reuseGraph, lumping, threshold, + aggregationMayCreateDirichlet); + + Kokkos::parallel_reduce( + "MueLu:CoalesceDropF:Build:scalar_filter:main_loop", + range_type(0, numRows), scalarFunctor, nnzFA); + } + + } else if (algo == "distance laplacian") { + typedef Xpetra::MultiVector< + typename Teuchos::ScalarTraits::magnitudeType, LO, GO, NO> + doubleMultiVector; + auto coords = Get>(currentLevel, "Coordinates"); + + auto uniqueMap = A->getRowMap(); + auto nonUniqueMap = A->getColMap(); + + // Construct ghosted coordinates + RCP importer; + { + SubFactoryMonitor m2(*this, "Coords Import construction", + currentLevel); + importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + } + RCP ghostedCoords; + { + SubFactoryMonitor m2(*this, "Ghosted coords construction", + currentLevel); + ghostedCoords = Xpetra::MultiVectorFactory< + typename Teuchos::ScalarTraits::magnitudeType, LO, GO, + NO>::Build(nonUniqueMap, coords->getNumVectors()); + ghostedCoords->doImport(*coords, *importer, Xpetra::INSERT); + } - // Share graph with the original matrix - filteredA = MatrixFactory::Build(A->getCrsGraph()); + auto ghostedCoordsView = + ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadWrite); + CoalesceDrop_Kokkos_Details::DistanceFunctor + distFunctor(ghostedCoordsView); - // Do a no-op fill-complete - RCP fillCompleteParams(new ParameterList); - fillCompleteParams->set("No Nonlocal Changes", true); - filteredA->fillComplete(fillCompleteParams); + // Construct Laplacian diagonal + RCP localLaplDiag; + { + SubFactoryMonitor m2(*this, "Local Laplacian diag construction", + currentLevel); - // No need to reuseFill, just modify in place - valsAux = filteredA->getLocalMatrixDevice().values; + localLaplDiag = VectorFactory::Build(uniqueMap); - } else { - // Need an extra array to compress - valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_aux_vals"), nnzA); - } + auto localLaplDiagView = + localLaplDiag->getDeviceLocalView(Xpetra::Access::OverwriteAll); + auto kokkosGraph = kokkosMatrix.graph; - typename boundary_nodes_type::non_const_type bndNodes(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows); - - LO nnzFA = 0; - { - if (algo == "classical") { - // Construct overlapped matrix diagonal - RCP ghostedDiag; - { - kokkosMatrix = local_matrix_type(); - SubFactoryMonitor m2(*this, "Ghosted diag construction", currentLevel); - ghostedDiag = Utilities::GetMatrixOverlappedDiagonal(*A); - kokkosMatrix=A->getLocalMatrixDevice(); - } - - // Filter out entries - { - SubFactoryMonitor m2(*this, "MainLoop", currentLevel); - - auto ghostedDiagView = ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); - - CoalesceDrop_Kokkos_Details::ClassicalDropFunctor dropFunctor(ghostedDiagView, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor - scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, aggregationMayCreateDirichlet); - - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0,numRows), - scalarFunctor, nnzFA); - } - - } else if (algo == "distance laplacian") { - typedef Xpetra::MultiVector::magnitudeType,LO,GO,NO> doubleMultiVector; - auto coords = Get >(currentLevel, "Coordinates"); - - auto uniqueMap = A->getRowMap(); - auto nonUniqueMap = A->getColMap(); - - // Construct ghosted coordinates - RCP importer; - { - SubFactoryMonitor m2(*this, "Coords Import construction", currentLevel); - importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - } - RCP ghostedCoords; - { - SubFactoryMonitor m2(*this, "Ghosted coords construction", currentLevel); - ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(nonUniqueMap, coords->getNumVectors()); - ghostedCoords->doImport(*coords, *importer, Xpetra::INSERT); - } - - auto ghostedCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadWrite); - CoalesceDrop_Kokkos_Details::DistanceFunctor distFunctor(ghostedCoordsView); - - // Construct Laplacian diagonal - RCP localLaplDiag; - { - SubFactoryMonitor m2(*this, "Local Laplacian diag construction", currentLevel); - - localLaplDiag = VectorFactory::Build(uniqueMap); - - auto localLaplDiagView = localLaplDiag->getDeviceLocalView(Xpetra::Access::OverwriteAll); - auto kokkosGraph = kokkosMatrix.graph; - - Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", range_type(0,numRows), - KOKKOS_LAMBDA(const LO row) { + Kokkos::parallel_for( + "MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", + range_type(0, numRows), KOKKOS_LAMBDA(const LO row) { auto rowView = kokkosGraph.rowConst(row); - auto length = rowView.length; + auto length = rowView.length; impl_Scalar d = impl_ATS::zero(); for (decltype(length) colID = 0; colID < length; colID++) { auto col = rowView(colID); if (row != col) - d += impl_ATS::one()/distFunctor.distance2(row, col); + d += impl_ATS::one() / distFunctor.distance2(row, col); } - localLaplDiagView(row,0) = d; + localLaplDiagView(row, 0) = d; }); - } - - // Construct ghosted Laplacian diagonal - RCP ghostedLaplDiag; - { - SubFactoryMonitor m2(*this, "Ghosted Laplacian diag construction", currentLevel); - ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); - ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); - } - - // Filter out entries - { - SubFactoryMonitor m2(*this, "MainLoop", currentLevel); - - auto ghostedLaplDiagView = ghostedLaplDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); - - CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor - dropFunctor(ghostedLaplDiagView, distFunctor, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor - scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true); - - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0,numRows), - scalarFunctor, nnzFA); - } } + // Construct ghosted Laplacian diagonal + RCP ghostedLaplDiag; + { + SubFactoryMonitor m2(*this, "Ghosted Laplacian diag construction", + currentLevel); + ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); + ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); + } + + // Filter out entries + { + SubFactoryMonitor m2(*this, "MainLoop", currentLevel); + + auto ghostedLaplDiagView = + ghostedLaplDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); + + CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor< + LO, decltype(ghostedLaplDiagView), decltype(distFunctor)> + dropFunctor(ghostedLaplDiagView, distFunctor, threshold); + CoalesceDrop_Kokkos_Details::ScalarFunctor + scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, + valsAux, reuseGraph, lumping, threshold, true); + + Kokkos::parallel_reduce( + "MueLu:CoalesceDropF:Build:scalar_filter:main_loop", + range_type(0, numRows), scalarFunctor, nnzFA); + } } - numDropped = nnzA - nnzFA; + } + numDropped = nnzA - nnzFA; - boundaryNodes = bndNodes; + boundaryNodes = bndNodes; - { - SubFactoryMonitor m2(*this, "CompressRows", currentLevel); + { + SubFactoryMonitor m2(*this, "CompressRows", currentLevel); - // parallel_scan (exclusive) - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:compress_rows", range_type(0,numRows+1), - KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) { + // parallel_scan (exclusive) + Kokkos::parallel_scan( + "MueLu:CoalesceDropF:Build:scalar_filter:compress_rows", + range_type(0, numRows + 1), + KOKKOS_LAMBDA(const LO i, LO &update, const bool &final_pass) { update += rows(i); if (final_pass) rows(i) = update; }); - } + } - // Compress cols (and optionally vals) - // We use a trick here: we moved all remaining elements to the beginning - // of the original row in the main loop, so we don't need to check for - // INVALID here, and just stop when achieving the new number of elements - // per row. - cols_type cols(Kokkos::ViewAllocateWithoutInitializing("FA_cols"), nnzFA); - vals_type vals; - if (reuseGraph) { - GetOStream(Runtime1) << "reuse matrix graph for filtering (compress matrix columns only)" << std::endl; - // Only compress cols - SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); - - Kokkos::parallel_for("MueLu:TentativePF:Build:compress_cols", range_type(0,numRows), + // Compress cols (and optionally vals) + // We use a trick here: we moved all remaining elements to the beginning + // of the original row in the main loop, so we don't need to check for + // INVALID here, and just stop when achieving the new number of elements + // per row. + cols_type cols(Kokkos::ViewAllocateWithoutInitializing("FA_cols"), nnzFA); + vals_type vals; + if (reuseGraph) { + GetOStream(Runtime1) + << "reuse matrix graph for filtering (compress matrix columns only)" + << std::endl; + // Only compress cols + SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); + + Kokkos::parallel_for( + "MueLu:TentativePF:Build:compress_cols", range_type(0, numRows), KOKKOS_LAMBDA(const LO i) { // Is there Kokkos memcpy? - LO rowStart = rows(i); + LO rowStart = rows(i); LO rowAStart = rowsA(i); - size_t rownnz = rows(i+1) - rows(i); + size_t rownnz = rows(i + 1) - rows(i); for (size_t j = 0; j < rownnz; j++) - cols(rowStart+j) = colsAux(rowAStart+j); + cols(rowStart + j) = colsAux(rowAStart + j); }); - } else { - // Compress cols and vals - GetOStream(Runtime1) << "new matrix graph for filtering (compress matrix columns and values)" << std::endl; - SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); + } else { + // Compress cols and vals + GetOStream(Runtime1) << "new matrix graph for filtering (compress matrix " + "columns and values)" + << std::endl; + SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); - vals = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_vals"), nnzFA); + vals = + vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_vals"), nnzFA); - Kokkos::parallel_for("MueLu:TentativePF:Build:compress_cols", range_type(0,numRows), + Kokkos::parallel_for( + "MueLu:TentativePF:Build:compress_cols", range_type(0, numRows), KOKKOS_LAMBDA(const LO i) { - LO rowStart = rows(i); + LO rowStart = rows(i); LO rowAStart = rowsA(i); - size_t rownnz = rows(i+1) - rows(i); + size_t rownnz = rows(i + 1) - rows(i); for (size_t j = 0; j < rownnz; j++) { - cols(rowStart+j) = colsAux(rowAStart+j); - vals(rowStart+j) = valsAux(rowAStart+j); + cols(rowStart + j) = colsAux(rowAStart + j); + vals(rowStart + j) = valsAux(rowAStart + j); } }); - } - - kokkos_graph_type kokkosGraph(cols, rows); - - { - SubFactoryMonitor m2(*this, "LWGraph construction", currentLevel); - - graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), "filtered graph of A")); - graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); - } - - numTotal = A->getLocalNumEntries(); - - dofsPerNode = 1; - - if (!reuseGraph) { - SubFactoryMonitor m2(*this, "LocalMatrix+FillComplete", currentLevel); - - local_matrix_type localFA = local_matrix_type("A", numRows, A->getLocalMatrixDevice().numCols(), nnzFA, vals, rows, cols); - auto filteredACrs = CrsMatrixFactory::Build(localFA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap(), - A->getCrsGraph()->getImporter(), A->getCrsGraph()->getExporter()); - filteredA = rcp(new CrsMatrixWrap(filteredACrs)); - } - - filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); - - if (pL.get("filtered matrix: reuse eigenvalue")) { - // Reuse max eigenvalue from A - // It is unclear what eigenvalue is the best for the smoothing, but we already may have - // the D^{-1}A estimate in A, may as well use it. - // NOTE: ML does that too - filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); - } else { - filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); - } - - } else if (blkSize > 1 && threshold == zero) { - // Case 3: block problem without filtering - // - // FIXME_KOKKOS: this code is completely unoptimized. It really should do - // a very simple thing: merge rows and produce nodal graph. But the code - // seems very complicated. Can we do better? - - TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiply of " << blkSize); - - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); - - // build a node row map (uniqueMap = non-overlapping) and a node column map - // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation - // stored in the AmalgamationInfo class container contain the local node id - // given a local dof id. The data is calculated in the AmalgamationFactory and - // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo) - const RCP uniqueMap = amalInfo->getNodeRowMap(); - const RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View? - Array colTranslationArray = *(amalInfo->getColTranslation()); - - Kokkos::View - rowTranslationView(rowTranslationArray.getRawPtr(),rowTranslationArray.size() ); - Kokkos::View - colTranslationView(colTranslationArray.getRawPtr(),colTranslationArray.size() ); - - // get number of local nodes - LO numNodes = Teuchos::as(uniqueMap->getLocalNumElements()); - typedef typename Kokkos::View id_translation_type; - id_translation_type rowTranslation("dofId2nodeId",rowTranslationArray.size()); - id_translation_type colTranslation("ov_dofId2nodeId",colTranslationArray.size()); - Kokkos::deep_copy(rowTranslation, rowTranslationView); - Kokkos::deep_copy(colTranslation, colTranslationView); - - // extract striding information - blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map - LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size) - if(A->IsView("stridedMaps") == true) { - const RCP myMap = A->getRowMap("stridedMaps"); - const RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); - } - auto kokkosMatrix = A->getLocalMatrixDevice(); // access underlying kokkos data - - // - typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; - typedef typename kokkos_graph_type::row_map_type row_map_type; - //typedef typename row_map_type::HostMirror row_map_type_h; - typedef typename kokkos_graph_type::entries_type entries_type; - - // Stage 1c: get number of dof-nonzeros per blkSize node rows - typename row_map_type::non_const_type dofNnz("nnz_map", numNodes + 1); - LO numDofCols = 0; - CoalesceDrop_Kokkos_Details::Stage1aVectorFunctor stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0,numNodes), stage1aFunctor, numDofCols); - // parallel_scan (exclusive) - CoalesceDrop_Kokkos_Details::ScanFunctor scanFunctor(dofNnz); - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanFunctor); - - // Detect and record dof rows that correspond to Dirichlet boundary conditions - boundary_nodes_type singleEntryRows = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - - typename entries_type::non_const_type dofcols("dofcols", numDofCols/*dofNnz(numNodes)*/); // why does dofNnz(numNodes) work? should be a parallel reduce, i guess - - // we have dofcols and dofids from Stage1dVectorFunctor - LO numNodeCols = 0; - typename row_map_type::non_const_type rows("nnz_nodemap", numNodes + 1); - typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numNodes); - - CoalesceDrop_Kokkos_Details::Stage1bcVectorFunctor stage1bcFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols, colTranslation, rows, singleEntryRows, bndNodes, pL.get("aggregation: greedy Dirichlet")); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1bcFunctor,numNodeCols); + } - // parallel_scan (exclusive) - CoalesceDrop_Kokkos_Details::ScanFunctor scanNodeFunctor(rows); - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanNodeFunctor); + kokkos_graph_type kokkosGraph(cols, rows); - // create column node view - typename entries_type::non_const_type cols("nodecols", numNodeCols); + { + SubFactoryMonitor m2(*this, "LWGraph construction", currentLevel); + graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), + A->getColMap(), "filtered graph of A")); + graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); + } - CoalesceDrop_Kokkos_Details::Stage1dVectorFunctor stage1dFunctor(dofcols, dofNnz, cols, rows); - Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1dFunctor); - kokkos_graph_type kokkosGraph(cols, rows); + numTotal = A->getLocalNumEntries(); - // create LW graph - graph = rcp(new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A")); + dofsPerNode = 1; - boundaryNodes = bndNodes; - graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); - numTotal = A->getLocalNumEntries(); + if (!reuseGraph) { + SubFactoryMonitor m2(*this, "LocalMatrix+FillComplete", currentLevel); - dofsPerNode = blkSize; + local_matrix_type localFA = + local_matrix_type("A", numRows, A->getLocalMatrixDevice().numCols(), + nnzFA, vals, rows, cols); + auto filteredACrs = CrsMatrixFactory::Build( + localFA, A->getRowMap(), A->getColMap(), A->getDomainMap(), + A->getRangeMap(), A->getCrsGraph()->getImporter(), + A->getCrsGraph()->getExporter()); + filteredA = rcp(new CrsMatrixWrap(filteredACrs)); + } - filteredA = A; + filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); + if (pL.get("filtered matrix: reuse eigenvalue")) { + // Reuse max eigenvalue from A + // It is unclear what eigenvalue is the best for the smoothing, but we + // already may have the D^{-1}A estimate in A, may as well use it. NOTE: + // ML does that too + filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu: CoalesceDropFactory_kokkos: Block filtering is not implemented"); + filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } + + } else if (blkSize > 1 && threshold == zero) { + // Case 3: block problem without filtering + // + // FIXME_KOKKOS: this code is completely unoptimized. It really should do + // a very simple thing: merge rows and produce nodal graph. But the code + // seems very complicated. Can we do better? + + TEUCHOS_TEST_FOR_EXCEPTION( + A->getRowMap()->getLocalNumElements() % blkSize != 0, + MueLu::Exceptions::RuntimeError, + "MueLu::CoalesceDropFactory: Number of local elements is " + << A->getRowMap()->getLocalNumElements() + << " but should be a multiply of " << blkSize); + + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); + + // build a node row map (uniqueMap = non-overlapping) and a node column map + // (nonUniqueMap = overlapping). The arrays rowTranslation and + // colTranslation stored in the AmalgamationInfo class container contain the + // local node id given a local dof id. The data is calculated in the + // AmalgamationFactory and stored in the variable "UnAmalgamationInfo" + // (which is of type AmalagamationInfo) + const RCP uniqueMap = amalInfo->getNodeRowMap(); + const RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslationArray = *( + amalInfo + ->getRowTranslation()); // TAW should be transform that into a View? + Array colTranslationArray = *(amalInfo->getColTranslation()); + + Kokkos::View rowTranslationView( + rowTranslationArray.getRawPtr(), rowTranslationArray.size()); + Kokkos::View colTranslationView( + colTranslationArray.getRawPtr(), colTranslationArray.size()); + + // get number of local nodes + LO numNodes = Teuchos::as(uniqueMap->getLocalNumElements()); + typedef typename Kokkos::View + id_translation_type; + id_translation_type rowTranslation("dofId2nodeId", + rowTranslationArray.size()); + id_translation_type colTranslation("ov_dofId2nodeId", + colTranslationArray.size()); + Kokkos::deep_copy(rowTranslation, rowTranslationView); + Kokkos::deep_copy(colTranslation, colTranslationView); + + // extract striding information + blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs + // per node in strided map) + LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it + // is a full block map + LocalOrdinal blkPartSize = + A->GetFixedBlockSize(); //< stores block size of part blkId (or the full + // block size) + if (A->IsView("stridedMaps") == true) { + const RCP myMap = A->getRowMap("stridedMaps"); + const RCP strMap = + Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, + Exceptions::RuntimeError, + "Map is not of type stridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = + Teuchos::as(strMap->getStridingData()[blkId]); } + auto kokkosMatrix = + A->getLocalMatrixDevice(); // access underlying kokkos data + + // + typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; + typedef typename kokkos_graph_type::row_map_type row_map_type; + // typedef typename row_map_type::HostMirror row_map_type_h; + typedef typename kokkos_graph_type::entries_type entries_type; + + // Stage 1c: get number of dof-nonzeros per blkSize node rows + typename row_map_type::non_const_type dofNnz("nnz_map", numNodes + 1); + LO numDofCols = 0; + CoalesceDrop_Kokkos_Details::Stage1aVectorFunctor< + decltype(kokkosMatrix), decltype(dofNnz), decltype(blkPartSize)> + stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize); + Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1a", + range_type(0, numNodes), stage1aFunctor, + numDofCols); + // parallel_scan (exclusive) + CoalesceDrop_Kokkos_Details::ScanFunctor scanFunctor( + dofNnz); + Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", + range_type(0, numNodes + 1), scanFunctor); + + // Detect and record dof rows that correspond to Dirichlet boundary + // conditions + boundary_nodes_type singleEntryRows = + Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); + + typename entries_type::non_const_type dofcols( + "dofcols", + numDofCols /*dofNnz(numNodes)*/); // why does dofNnz(numNodes) work? + // should be a parallel reduce, i + // guess + + // we have dofcols and dofids from Stage1dVectorFunctor + LO numNodeCols = 0; + typename row_map_type::non_const_type rows("nnz_nodemap", numNodes + 1); + typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", + numNodes); + + CoalesceDrop_Kokkos_Details::Stage1bcVectorFunctor< + decltype(kokkosMatrix), decltype(dofNnz), decltype(blkPartSize), + decltype(dofcols), decltype(colTranslation), decltype(singleEntryRows), + decltype(bndNodes), bool> + stage1bcFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols, + colTranslation, rows, singleEntryRows, bndNodes, + pL.get("aggregation: greedy Dirichlet")); + Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", + range_type(0, numNodes), stage1bcFunctor, + numNodeCols); + + // parallel_scan (exclusive) + CoalesceDrop_Kokkos_Details::ScanFunctor + scanNodeFunctor(rows); + Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", + range_type(0, numNodes + 1), scanNodeFunctor); + + // create column node view + typename entries_type::non_const_type cols("nodecols", numNodeCols); + + CoalesceDrop_Kokkos_Details::Stage1dVectorFunctor< + decltype(kokkosMatrix), decltype(dofNnz), decltype(dofcols), + decltype(rows), decltype(cols)> + stage1dFunctor(dofcols, dofNnz, cols, rows); + Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", + range_type(0, numNodes), stage1dFunctor); + kokkos_graph_type kokkosGraph(cols, rows); + + // create LW graph + graph = rcp(new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap, + "amalgamated graph of A")); + + boundaryNodes = bndNodes; + graph->getLocalLWGraph().SetBoundaryNodeMap(boundaryNodes); + numTotal = A->getLocalNumEntries(); + + dofsPerNode = blkSize; + + filteredA = A; + + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, + "MueLu: CoalesceDropFactory_kokkos: Block " + "filtering is not implemented"); + } - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; + if (GetVerbLevel() & Statistics1) { + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)), - KOKKOS_LAMBDA(const LO i, GO& n) { + Kokkos::parallel_reduce( + "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)), + KOKKOS_LAMBDA(const LO i, GO &n) { if (boundaryNodes(i)) n++; - }, numLocalBoundaryNodes); + }, + numLocalBoundaryNodes); - auto comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } + auto comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes + << " Dirichlet nodes" << std::endl; + } - if ((GetVerbLevel() & Statistics1) && threshold != zero) { - auto comm = A->getRowMap()->getComm(); + if ((GetVerbLevel() & Statistics1) && threshold != zero) { + auto comm = A->getRowMap()->getComm(); - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); + GO numGlobalTotal, numGlobalDropped; + MueLu_sumAll(comm, numTotal, numGlobalTotal); + MueLu_sumAll(comm, numDropped, numGlobalDropped); - if (numGlobalTotal != 0) { - GetOStream(Statistics1) << "Number of dropped entries: " - << numGlobalDropped << "/" << numGlobalTotal - << " (" << 100*Teuchos::as(numGlobalDropped)/Teuchos::as(numGlobalTotal) << "%)" << std::endl; - } + if (numGlobalTotal != 0) { + GetOStream(Statistics1) + << "Number of dropped entries: " << numGlobalDropped << "/" + << numGlobalTotal << " (" + << 100 * Teuchos::as(numGlobalDropped) / + Teuchos::as(numGlobalTotal) + << "%)" << std::endl; } - - Set(currentLevel, "DofsPerNode", dofsPerNode); - Set(currentLevel, "Graph", graph); - Set(currentLevel, "A", filteredA); } + + Set(currentLevel, "DofsPerNode", dofsPerNode); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "A", filteredA); } +} // namespace MueLu #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp index 7f94599c3ba1..9c24deee579e 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionBaseClass_decl.hpp @@ -46,35 +46,34 @@ #ifndef MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP #define MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP - -#include "MueLu_ConfigDefs.hpp" #include "MueLu_BaseClass.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_PreDropFunctionBaseClass_fwd.hpp" namespace MueLu { - /*! - * Base class you can derive from to allow user defined dropping - * - */ - template - class PreDropFunctionBaseClass : public BaseClass { +/*! + * Base class you can derive from to allow user defined dropping + * + */ +template +class PreDropFunctionBaseClass : public BaseClass { #undef MUELU_PREDROPFUNCTIONBASECLASS_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! Destructor - virtual ~PreDropFunctionBaseClass() { } - - //! Drop - virtual bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, GlobalOrdinal gcid, const Teuchos::ArrayView & indices, const Teuchos::ArrayView & vals) = 0; +public: + //! Destructor + virtual ~PreDropFunctionBaseClass() {} - }; -} + //! Drop + virtual bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, + LocalOrdinal lcid, GlobalOrdinal gcid, + const Teuchos::ArrayView &indices, + const Teuchos::ArrayView &vals) = 0; +}; +} // namespace MueLu #define MUELU_PREDROPFUNCTIONBASECLASS_SHORT #endif // MUELU_PREDROPFUNCTIONBASECLASS_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp index a42b58f56b18..b02a0cc2278d 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_decl.hpp @@ -50,64 +50,67 @@ #include "MueLu_PreDropFunctionBaseClass.hpp" #include "MueLu_PreDropFunctionConstVal_fwd.hpp" - namespace MueLu { - /*! - * Example implementation for dropping values smaller then a constant threshold - * - */ - template - class PreDropFunctionConstVal : - public MueLu::PreDropFunctionBaseClass { +/*! + * Example implementation for dropping values smaller then a constant threshold + * + */ +template +class PreDropFunctionConstVal + : public MueLu::PreDropFunctionBaseClass { #undef MUELU_PREDROPFUNCTIONCONSTVAL_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! Constructor - explicit PreDropFunctionConstVal(const Scalar threshold = 0.0); - - //! Destructor - virtual ~PreDropFunctionConstVal() { } - - /*! Drop - * @param lrow (size_t): local row index (=lrowid) - * @param grow (GlobalOrdinal: global row id - * @param k (size_t): local column iterator - * @param lcid (LocalOrdinal): local column id (=indices[k]) - * @param gcid (GlobalOrdinal): global column id - * @param indices (ArrrayView): array of local column ids in current row (lrow) - * @param vals (ArrayView): array of corresponding values in current row (lrow) - * @return bool: false, if value in (lrow, lcid) shall be kept, true if it should be dropped - */ - bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, GlobalOrdinal gcid, const Teuchos::ArrayView & indices, const Teuchos::ArrayView & vals); - - //! Return threshold value. - Scalar GetThreshold() const; - - //! @name Overridden from Teuchos::Describable - //@{ +public: + //! Constructor + explicit PreDropFunctionConstVal(const Scalar threshold = 0.0); + + //! Destructor + virtual ~PreDropFunctionConstVal() {} + + /*! Drop + * @param lrow (size_t): local row index (=lrowid) + * @param grow (GlobalOrdinal: global row id + * @param k (size_t): local column iterator + * @param lcid (LocalOrdinal): local column id (=indices[k]) + * @param gcid (GlobalOrdinal): global column id + * @param indices (ArrrayView): array of local column ids in current row + * (lrow) + * @param vals (ArrayView): array of corresponding values in current row + * (lrow) + * @return bool: false, if value in (lrow, lcid) shall be kept, true if it + * should be dropped + */ + bool Drop(size_t lrow, GlobalOrdinal grow, size_t k, LocalOrdinal lcid, + GlobalOrdinal gcid, + const Teuchos::ArrayView &indices, + const Teuchos::ArrayView &vals); - //! Return a simple one-line description of this object. - std::string description() const; + //! Return threshold value. + Scalar GetThreshold() const; - //! Print the object with some verbosity level to an FancyOStream object. - //using MueLu::Describable::describe; // overloading, not hiding - //void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = Default) const; + //! @name Overridden from Teuchos::Describable + //@{ - //@} + //! Return a simple one-line description of this object. + std::string description() const; - private: + //! Print the object with some verbosity level to an FancyOStream object. + // using MueLu::Describable::describe; // overloading, not hiding + // void describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel = + // Default) const; - Scalar threshold_; + //@} - }; +private: + Scalar threshold_; +}; -} +} // namespace MueLu #define MUELU_PREDROPFUNCTIONCONSTVAL_SHORT #endif // MUELU_PREDROPFUNCTIONCONSTVAL_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp index 4c1577d90146..67073d6e67d8 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_PreDropFunctionConstVal_def.hpp @@ -53,40 +53,49 @@ namespace MueLu { - template - PreDropFunctionConstVal::PreDropFunctionConstVal(const Scalar threshold) - : threshold_(threshold) { } +template +PreDropFunctionConstVal::PreDropFunctionConstVal(const Scalar threshold) + : threshold_(threshold) {} - template - bool PreDropFunctionConstVal::Drop(size_t /* lrow */, GlobalOrdinal grow, size_t k, LocalOrdinal /* lcid */, GlobalOrdinal gcid, const Teuchos::ArrayView & /* indices */, const Teuchos::ArrayView & vals) { - if(Teuchos::ScalarTraits::magnitude(vals[k]) > Teuchos::ScalarTraits::magnitude(threshold_) || grow == gcid ) { - return false; // keep values - } - return true; // values too small -> drop them +template +bool PreDropFunctionConstVal::Drop( + size_t /* lrow */, GlobalOrdinal grow, size_t k, LocalOrdinal /* lcid */, + GlobalOrdinal gcid, + const Teuchos::ArrayView & /* indices */, + const Teuchos::ArrayView &vals) { + if (Teuchos::ScalarTraits::magnitude(vals[k]) > + Teuchos::ScalarTraits::magnitude(threshold_) || + grow == gcid) { + return false; // keep values } + return true; // values too small -> drop them +} - template - Scalar PreDropFunctionConstVal::GetThreshold() const { - return threshold_; - } +template +Scalar PreDropFunctionConstVal::GetThreshold() const { + return threshold_; +} +template +std::string PreDropFunctionConstVal::description() const { + std::ostringstream out; + out << "PreDropFunctionConstVal: threshold = " << threshold_ << std::endl; + return out.str(); +} - template - std::string PreDropFunctionConstVal::description() const { - std::ostringstream out; - out << "PreDropFunctionConstVal: threshold = " << threshold_ << std::endl; - return out.str(); +/*template +void PreDropFunctionConstVal::describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { + MUELU_DESCRIBE; + if (verbLevel & Parameters0) { + out0 << "PreDropFunctionConstVal: threshold = " << threshold_ << std::endl; } +}*/ - /*template - void PreDropFunctionConstVal::describe(Teuchos::FancyOStream &out, const VerbLevel verbLevel) const { - MUELU_DESCRIBE; - if (verbLevel & Parameters0) { - out0 << "PreDropFunctionConstVal: threshold = " << threshold_ << std::endl; - } - }*/ - -} +} // namespace MueLu #define MUELU_PREDROPFUNCTIONCONSTVAL_SHORT #endif // MUELU_PREDROPFUNCTIONCONSTVAL_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp index b763c93cd955..64292d201adb 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_decl.hpp @@ -46,122 +46,145 @@ #ifndef MUELU_SMOOVECCOALESCEDROPFACTORY_DECL_HPP #define MUELU_SMOOVECCOALESCEDROPFACTORY_DECL_HPP +#include +#include #include #include #include #include -#include -#include #include "MueLu_ConfigDefs.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_SmooVecCoalesceDropFactory_fwd.hpp" -#include "MueLu_Level_fwd.hpp" #include "MueLu_GraphBase.hpp" #include "MueLu_Graph_fwd.hpp" #include "MueLu_LWGraph_fwd.hpp" +#include "MueLu_Level_fwd.hpp" #include "MueLu_PreDropFunctionBaseClass_fwd.hpp" namespace MueLu { - /*! - @class SmooVecCoalesceDropFactory - @brief Factory for creating a graph base on a given matrix. - - Factory for creating graphs from matrices with entries selectively dropped. - - ## Code paths ## - - Experimental dropping function based on taking a set of random vectors u, running - a smoother on A u = 0, and then basing the drop decisions on "how smooth" the vectors - are local. Neighobring regions where the vectors are smooth can be aggregated - together and so these are kept in the associated drop matrix. Areas that are - not smooth should end up in different aggregates and so the A_ij representing - these should be dropped. This Factory can address both PDE systems and - scalar PDEs, always creating a matrix reprsenting nodal connections as opposed - to dof connections. - - To enter this factor as opposed to the more standard CoalesceDropFactory() one - must set "aggregation: drop scheme" to "unsupported vector smoothing". In this - case some of the parameter options associated with CoalesceDropFactory (e.g., - "aggregation: drop tol", "aggregation: Dirichlet threshold", "lightweight wrap") - will cause parameter validator errors. - - ## Input/output of SmooVecCoalesceDropFactory ## - - ### User parameters of SmooVecCoalesceDropFactory ### - Parameter | type | default | master.xml | validated | requested | description - ---------------------------|-----------|-----------|:----------:|:---------:|:---------:|------------ - A |Factory | null | | * | * | Generating factory of the operator A - "aggregation: drop scheme"|std::string|"classical"| * | * | * | Must choose "unsupported vector smoothing" - "aggregation: number of times to pre or post smooth"|int| 10|* | | * | Amount of pre or post smoothing invocations - "aggregation: number of random vectors"|int| 10 | * | * | * | Number of random vectors - "aggregation: penalty parameters"|Array(double)|{12.0,-.20}| * | * | * | Ultimately determines how much dropping is done - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see SmooVecCoalesceDropFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see SmooVecCoalesceDropFactory::DeclareInput). - - ### Variables provided by UncoupledAggregationFactory ### - - After SmooVecCoalesceDropFactory::Build the following data is available (if requested) - - Parameter | generated by | description - ----------|--------------|------------ - Graph | SmooVecCoalesceDropFactory | Graph of matrix A - DofsPerNode | SmooVecCoalesceDropFactory | number of DOFs per node. Note, that we assume a constant number of DOFs per node for all nodes associated with the operator A. - - */ - - template - class SmooVecCoalesceDropFactory : public SingleLevelFactoryBase { +/*! + @class SmooVecCoalesceDropFactory + @brief Factory for creating a graph base on a given matrix. + + Factory for creating graphs from matrices with entries selectively dropped. + + ## Code paths ## + + Experimental dropping function based on taking a set of random vectors u, + running a smoother on A u = 0, and then basing the drop decisions on "how + smooth" the vectors are local. Neighobring regions where the vectors are + smooth can be aggregated together and so these are kept in the associated drop + matrix. Areas that are not smooth should end up in different aggregates and so + the A_ij representing these should be dropped. This Factory can address both + PDE systems and scalar PDEs, always creating a matrix reprsenting nodal + connections as opposed to dof connections. + + To enter this factor as opposed to the more standard CoalesceDropFactory() + one must set "aggregation: drop scheme" to "unsupported vector smoothing". In + this case some of the parameter options associated with CoalesceDropFactory + (e.g., "aggregation: drop tol", "aggregation: Dirichlet threshold", + "lightweight wrap") will cause parameter validator errors. + + ## Input/output of SmooVecCoalesceDropFactory ## + + ### User parameters of SmooVecCoalesceDropFactory ### + Parameter | type | default | master.xml | validated | + requested | description + ---------------------------|-----------|-----------|:----------:|:---------:|:---------:|------------ + A |Factory | null | | * | + * | Generating factory of the operator A "aggregation: drop + scheme"|std::string|"classical"| * | * | * | Must + choose "unsupported vector smoothing" "aggregation: number of times to pre or + post smooth"|int| 10|* | | * | Amount of pre or post + smoothing invocations "aggregation: number of random vectors"|int| 10 | * | + * | * | Number of random vectors "aggregation: penalty + parameters"|Array(double)|{12.0,-.20}| * | * | * | Ultimately + determines how much dropping is done + + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + SmooVecCoalesceDropFactory::GetValidParameters).
The * in the @c requested + column states that the data is requested as input with all dependencies (see + SmooVecCoalesceDropFactory::DeclareInput). + + ### Variables provided by UncoupledAggregationFactory ### + + After SmooVecCoalesceDropFactory::Build the following data is available (if + requested) + + Parameter | generated by | description + ----------|--------------|------------ + Graph | SmooVecCoalesceDropFactory | Graph of matrix A + DofsPerNode | SmooVecCoalesceDropFactory | number of DOFs per node. Note, that + we assume a constant number of DOFs per node for all nodes associated with the + operator A. + +*/ + +template +class SmooVecCoalesceDropFactory : public SingleLevelFactoryBase { #undef MUELU_SMOOVECCOALESCEDROPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! Constructor - SmooVecCoalesceDropFactory(); +public: + //! @name Constructors/Destructors. + //@{ - //! Destructor - virtual ~SmooVecCoalesceDropFactory() { } + //! Constructor + SmooVecCoalesceDropFactory(); - RCP GetValidParameterList() const; + //! Destructor + virtual ~SmooVecCoalesceDropFactory() {} - //@} + RCP GetValidParameterList() const; - //! Input - //@{ + //@} - void DeclareInput(Level ¤tLevel) const; + //! Input + //@{ - /// set predrop function - void SetPreDropFunction(const RCP > &predrop) { predrop_ = predrop; } + void DeclareInput(Level ¤tLevel) const; - //@} + /// set predrop function + void + SetPreDropFunction(const RCP> &predrop) { + predrop_ = predrop; + } - void Build(Level ¤tLevel) const; // Build + //@} - private: + void Build(Level ¤tLevel) const; // Build - // pre-drop function - mutable - RCP predrop_; +private: + // pre-drop function + mutable RCP predrop_; - //! Methods to support compatible-relaxation style dropping - void badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP & dropParams, LO nPDEs, const MultiVector& smoothedTVecs, const MultiVector& smoothedNull, RCP& filteredGraph) const; - void badGuysDropfunc(LO row, const Teuchos::ArrayView& indices, const Teuchos::ArrayView& vals, const MultiVector& smoothedTVecs, LO nPDEs, Teuchos::ArrayRCP & penalties, const MultiVector& smoothedNull, Teuchos::ArrayRCP& Bcols, Teuchos::ArrayRCP& keepOrNot, LO &Nbcols, LO nLoc) const; + //! Methods to support compatible-relaxation style dropping + void badGuysCoalesceDrop(const Matrix &Amat, + Teuchos::ArrayRCP &dropParams, LO nPDEs, + const MultiVector &smoothedTVecs, + const MultiVector &smoothedNull, + RCP &filteredGraph) const; + void badGuysDropfunc(LO row, + const Teuchos::ArrayView &indices, + const Teuchos::ArrayView &vals, + const MultiVector &smoothedTVecs, LO nPDEs, + Teuchos::ArrayRCP &penalties, + const MultiVector &smoothedNull, + Teuchos::ArrayRCP &Bcols, + Teuchos::ArrayRCP &keepOrNot, LO &Nbcols, + LO nLoc) const; - }; //class SmooVecCoalesceDropFactory +}; // class SmooVecCoalesceDropFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_SMOOVECCOALESCEDROPFACTORY_SHORT #endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp index 24a9a8334275..abd3cdebf81f 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_SmooVecCoalesceDropFactory_def.hpp @@ -49,27 +49,26 @@ #include #include -#include #include +#include #include -#include #include +#include #include -#include #include +#include #include "MueLu_SmooVecCoalesceDropFactory_decl.hpp" #include "MueLu_Exceptions.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Graph.hpp" -#include "MueLu_Level.hpp" +#include "MueLu_GraphBase.hpp" #include "MueLu_LWGraph.hpp" +#include "MueLu_Level.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" #include "MueLu_PreDropFunctionBaseClass.hpp" - #include #include @@ -80,164 +79,205 @@ // Should be removed once we are confident that this works. // #define DJS_READ_ENV_VARIABLES +#include #include #include -#include - -#define poly0thOrderCoef 0 -#define poly1stOrderCoef 1 -#define poly2ndOrderCoef 2 -#define poly3rdOrderCoef 3 -#define poly4thOrderCoef 4 +#define poly0thOrderCoef 0 +#define poly1stOrderCoef 1 +#define poly2ndOrderCoef 2 +#define poly3rdOrderCoef 3 +#define poly4thOrderCoef 4 namespace MueLu { - template - RCP SmooVecCoalesceDropFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop scheme"); - { - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; - validParamList->getEntry("aggregation: drop scheme").setValidator( - rcp(new validatorType(Teuchos::tuple("unsupported vector smoothing"), "aggregation: drop scheme"))); - } - SET_VALID_ENTRY("aggregation: number of random vectors"); - SET_VALID_ENTRY("aggregation: number of times to pre or post smooth"); - SET_VALID_ENTRY("aggregation: penalty parameters"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("PreSmoother", Teuchos::null, "Generating factory of the PreSmoother"); - validParamList->set< RCP >("PostSmoother", Teuchos::null, "Generating factory of the PostSmoother"); - - return validParamList; +template +RCP +SmooVecCoalesceDropFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: drop scheme"); + { + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; + validParamList->getEntry("aggregation: drop scheme") + .setValidator(rcp(new validatorType( + Teuchos::tuple("unsupported vector smoothing"), + "aggregation: drop scheme"))); } - - template - SmooVecCoalesceDropFactory::SmooVecCoalesceDropFactory() : predrop_(Teuchos::null) { } - - template - void SmooVecCoalesceDropFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - if (currentLevel.IsAvailable("PreSmoother")) { // rst: totally unsure that this is legal - Input(currentLevel, "PreSmoother"); // my guess is that this is not yet available - } // so this always comes out false. - else if (currentLevel.IsAvailable("PostSmoother")) { // perhaps we can look on the param list? - Input(currentLevel, "PostSmoother"); - } + SET_VALID_ENTRY("aggregation: number of random vectors"); + SET_VALID_ENTRY("aggregation: number of times to pre or post smooth"); + SET_VALID_ENTRY("aggregation: penalty parameters"); +#undef SET_VALID_ENTRY + + validParamList->set>( + "A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>( + "PreSmoother", Teuchos::null, "Generating factory of the PreSmoother"); + validParamList->set>( + "PostSmoother", Teuchos::null, "Generating factory of the PostSmoother"); + + return validParamList; +} + +template +SmooVecCoalesceDropFactory::SmooVecCoalesceDropFactory() + : predrop_(Teuchos::null) {} + +template +void SmooVecCoalesceDropFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); + if (currentLevel.IsAvailable( + "PreSmoother")) { // rst: totally unsure that this is legal + Input(currentLevel, + "PreSmoother"); // my guess is that this is not yet available + } // so this always comes out false. + else if (currentLevel.IsAvailable( + "PostSmoother")) { // perhaps we can look on the param list? + Input(currentLevel, "PostSmoother"); } +} - template - void SmooVecCoalesceDropFactory::Build(Level ¤tLevel) const { +template +void SmooVecCoalesceDropFactory::Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); + FactoryMonitor m(*this, "Build", currentLevel); - typedef Teuchos::ScalarTraits STS; + typedef Teuchos::ScalarTraits STS; - if (predrop_ != Teuchos::null) - GetOStream(Parameters0) << predrop_->description(); + if (predrop_ != Teuchos::null) + GetOStream(Parameters0) << predrop_->description(); - RCP A = Get< RCP >(currentLevel, "A"); + RCP A = Get>(currentLevel, "A"); - const ParameterList & pL = GetParameterList(); + const ParameterList &pL = GetParameterList(); - LO nPDEs = A->GetFixedBlockSize(); + LO nPDEs = A->GetFixedBlockSize(); - RCP< MultiVector > testVecs; - RCP< MultiVector > nearNull; + RCP testVecs; + RCP nearNull; #ifdef takeOut - testVecs = Xpetra::IO::ReadMultiVector("TpetraTVecs.mm", A->getRowMap()); + testVecs = Xpetra::IO::ReadMultiVector("TpetraTVecs.mm", + A->getRowMap()); #endif - size_t numRandom= as(pL.get("aggregation: number of random vectors")); - testVecs = MultiVectorFactory::Build(A->getRowMap(), numRandom, true); - // use random test vectors but should be positive in order to not get - // crummy results ... so take abs() of randomize(). - testVecs->randomize(); - for (size_t kk = 0; kk < testVecs->getNumVectors(); kk++ ) { - Teuchos::ArrayRCP< Scalar > curVec = testVecs->getDataNonConst(kk); - for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); ii++ ) curVec[ii] = Teuchos::ScalarTraits::magnitude(curVec[ii]); - } - nearNull = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); - - // initialize null space to constants - for (size_t kk = 0; kk < nearNull->getNumVectors(); kk++ ) { - Teuchos::ArrayRCP< Scalar > curVec = nearNull->getDataNonConst(kk); - for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); ii += nearNull->getNumVectors() ) curVec[ii] = Teuchos::ScalarTraits::one(); - } - - RCP< MultiVector > zeroVec_TVecs; - RCP< MultiVector > zeroVec_Null; - - zeroVec_TVecs = MultiVectorFactory::Build(A->getRowMap(), testVecs->getNumVectors(), true); - zeroVec_Null = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); - zeroVec_TVecs->putScalar(Teuchos::ScalarTraits::zero()); - zeroVec_Null->putScalar( Teuchos::ScalarTraits::zero()); - - size_t nInvokeSmoother=as(pL.get("aggregation: number of times to pre or post smooth")); - if (currentLevel.IsAvailable("PreSmoother")) { - RCP preSmoo = currentLevel.Get< RCP >("PreSmoother"); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*testVecs,*zeroVec_TVecs,false); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*nearNull,*zeroVec_Null,false); - } - else if (currentLevel.IsAvailable("PostSmoother")) { - RCP postSmoo = currentLevel.Get< RCP >("PostSmoother"); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*testVecs,*zeroVec_TVecs,false); - for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*nearNull, *zeroVec_Null,false); - } - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Must set a smoother"); - - Teuchos::ArrayRCP penaltyPolyCoef(5); - Teuchos::ArrayView inputPolyCoef; - - penaltyPolyCoef[poly0thOrderCoef] = 12.; - penaltyPolyCoef[poly1stOrderCoef] = -.2; - penaltyPolyCoef[poly2ndOrderCoef] = 0.0; - penaltyPolyCoef[poly3rdOrderCoef] = 0.0; - penaltyPolyCoef[poly4thOrderCoef] = 0.0; - - if(pL.isParameter("aggregation: penalty parameters") && pL.get >("aggregation: penalty parameters").size() > 0) { - if (pL.get >("aggregation: penalty parameters").size() > penaltyPolyCoef.size()) - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of penalty parameters must be " << penaltyPolyCoef.size() << " or less"); - inputPolyCoef = pL.get >("aggregation: penalty parameters")(); - - for (size_t i = 0; i < as(inputPolyCoef.size()) ; i++) penaltyPolyCoef[i] = as(inputPolyCoef[i]); - for (size_t i = as(inputPolyCoef.size()); i < as(penaltyPolyCoef.size()); i++) penaltyPolyCoef[i] = Teuchos::ScalarTraits::zero(); - } - - - RCP filteredGraph; - badGuysCoalesceDrop(*A, penaltyPolyCoef, nPDEs, *testVecs, *nearNull, filteredGraph); + size_t numRandom = + as(pL.get("aggregation: number of random vectors")); + testVecs = MultiVectorFactory::Build(A->getRowMap(), numRandom, true); + // use random test vectors but should be positive in order to not get + // crummy results ... so take abs() of randomize(). + testVecs->randomize(); + for (size_t kk = 0; kk < testVecs->getNumVectors(); kk++) { + Teuchos::ArrayRCP curVec = testVecs->getDataNonConst(kk); + for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); + ii++) + curVec[ii] = Teuchos::ScalarTraits::magnitude(curVec[ii]); + } + nearNull = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); + + // initialize null space to constants + for (size_t kk = 0; kk < nearNull->getNumVectors(); kk++) { + Teuchos::ArrayRCP curVec = nearNull->getDataNonConst(kk); + for (size_t ii = kk; ii < as(A->getRowMap()->getLocalNumElements()); + ii += nearNull->getNumVectors()) + curVec[ii] = Teuchos::ScalarTraits::one(); + } + + RCP zeroVec_TVecs; + RCP zeroVec_Null; + + zeroVec_TVecs = MultiVectorFactory::Build(A->getRowMap(), + testVecs->getNumVectors(), true); + zeroVec_Null = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true); + zeroVec_TVecs->putScalar(Teuchos::ScalarTraits::zero()); + zeroVec_Null->putScalar(Teuchos::ScalarTraits::zero()); + + size_t nInvokeSmoother = as( + pL.get("aggregation: number of times to pre or post smooth")); + if (currentLevel.IsAvailable("PreSmoother")) { + RCP preSmoo = + currentLevel.Get>("PreSmoother"); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) + preSmoo->Apply(*testVecs, *zeroVec_TVecs, false); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) + preSmoo->Apply(*nearNull, *zeroVec_Null, false); + } else if (currentLevel.IsAvailable("PostSmoother")) { + RCP postSmoo = + currentLevel.Get>("PostSmoother"); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) + postSmoo->Apply(*testVecs, *zeroVec_TVecs, false); + for (size_t ii = 0; ii < nInvokeSmoother; ii++) + postSmoo->Apply(*nearNull, *zeroVec_Null, false); + } else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, + "Must set a smoother"); + + Teuchos::ArrayRCP penaltyPolyCoef(5); + Teuchos::ArrayView inputPolyCoef; + + penaltyPolyCoef[poly0thOrderCoef] = 12.; + penaltyPolyCoef[poly1stOrderCoef] = -.2; + penaltyPolyCoef[poly2ndOrderCoef] = 0.0; + penaltyPolyCoef[poly3rdOrderCoef] = 0.0; + penaltyPolyCoef[poly4thOrderCoef] = 0.0; + + if (pL.isParameter("aggregation: penalty parameters") && + pL.get>("aggregation: penalty parameters").size() > + 0) { + if (pL.get>("aggregation: penalty parameters") + .size() > penaltyPolyCoef.size()) + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, + "Number of penalty parameters must be " + << penaltyPolyCoef.size() << " or less"); + inputPolyCoef = + pL.get>("aggregation: penalty parameters")(); + + for (size_t i = 0; i < as(inputPolyCoef.size()); i++) + penaltyPolyCoef[i] = as(inputPolyCoef[i]); + for (size_t i = as(inputPolyCoef.size()); + i < as(penaltyPolyCoef.size()); i++) + penaltyPolyCoef[i] = Teuchos::ScalarTraits::zero(); + } + RCP filteredGraph; + badGuysCoalesceDrop(*A, penaltyPolyCoef, nPDEs, *testVecs, *nearNull, + filteredGraph); #ifdef takeOut - /* write out graph for serial debugging purposes only. */ - - FILE* fp = fopen("codeOutput","w"); - fprintf(fp,"%d %d %d\n",(int) filteredGraph->GetNodeNumVertices(),(int) filteredGraph->GetNodeNumVertices(), - (int) filteredGraph->GetNodeNumEdges()); - for (size_t i = 0; i < filteredGraph->GetNodeNumVertices(); i++) { - ArrayView inds = filteredGraph->getNeighborVertices(as(i)); - for (size_t j = 0; j < as(inds.size()); j++) { - fprintf(fp,"%d %d 1.00e+00\n",(int) i+1,(int) inds[j]+1); - } - } - fclose(fp); + /* write out graph for serial debugging purposes only. */ + + FILE *fp = fopen("codeOutput", "w"); + fprintf(fp, "%d %d %d\n", (int)filteredGraph->GetNodeNumVertices(), + (int)filteredGraph->GetNodeNumVertices(), + (int)filteredGraph->GetNodeNumEdges()); + for (size_t i = 0; i < filteredGraph->GetNodeNumVertices(); i++) { + ArrayView inds = filteredGraph->getNeighborVertices(as(i)); + for (size_t j = 0; j < as(inds.size()); j++) { + fprintf(fp, "%d %d 1.00e+00\n", (int)i + 1, (int)inds[j] + 1); + } + } + fclose(fp); #endif - SC threshold = .01; - Set(currentLevel, "Filtering", (threshold != STS::zero())); - Set(currentLevel, "Graph", filteredGraph); - Set(currentLevel, "DofsPerNode", 1); + SC threshold = .01; + Set(currentLevel, "Filtering", (threshold != STS::zero())); + Set(currentLevel, "Graph", filteredGraph); + Set(currentLevel, "DofsPerNode", 1); - } //Build +} // Build - template - void SmooVecCoalesceDropFactory::badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP & penaltyPolyCoef , LO nPDEs, const MultiVector& testVecs, const MultiVector& nearNull, RCP& filteredGraph) const { +template +void SmooVecCoalesceDropFactory:: + badGuysCoalesceDrop(const Matrix &Amat, + Teuchos::ArrayRCP &penaltyPolyCoef, LO nPDEs, + const MultiVector &testVecs, + const MultiVector &nearNull, + RCP &filteredGraph) const { /* * Compute coalesce/drop graph (in filteredGraph) for A. The basic idea is to * balance trade-offs associated with @@ -259,8 +299,8 @@ namespace MueLu { * the vector portion corresponding to a possible aggregate defined by * all non-dropped connections in the ith row. A tentative prolognator is * used for P. This prolongator corresponds to a null space vector given - * by 'nearNull', which is provided to dropper(). In initial testing, nearNull is - * first set as a vector of all 1's and then smoothed with a relaxation + * by 'nearNull', which is provided to dropper(). In initial testing, nearNull + * is first set as a vector of all 1's and then smoothed with a relaxation * method applied to a nice matrix (with the same sparsity pattern as A). * Originally, nearNull was used to handle Dir bcs where relaxation of the * vector of 1's has a more pronounced effect. @@ -274,198 +314,229 @@ namespace MueLu { * Note: testVecs is supplied by the user, but normally is the result of * applying a relaxation scheme to Au = 0 where u is initial random. */ - - GO numMyNnz = Teuchos::as(Amat.getLocalNumEntries()); - size_t nLoc = Amat.getRowMap()->getLocalNumElements(); - size_t nBlks = nLoc/nPDEs; - if (nBlks*nPDEs != nLoc ) - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of local dofs not divisible by BlkSize"); + GO numMyNnz = Teuchos::as(Amat.getLocalNumEntries()); + size_t nLoc = Amat.getRowMap()->getLocalNumElements(); - Teuchos::ArrayRCP newRowPtr(nBlks+1); /* coalesce & drop matrix */ - Teuchos::ArrayRCP newCols(numMyNnz); /* arrays */ + size_t nBlks = nLoc / nPDEs; + if (nBlks * nPDEs != nLoc) + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, + "Number of local dofs not divisible by BlkSize"); - Teuchos::ArrayRCP bcols(nBlks); /* returned by dropfun(j,...) */ - Teuchos::ArrayRCP keepOrNot(nBlks); /* gives cols for jth row and */ - /* whether or not entry is */ - /* kept or dropped. */ + Teuchos::ArrayRCP newRowPtr(nBlks + 1); /* coalesce & drop matrix */ + Teuchos::ArrayRCP newCols(numMyNnz); /* arrays */ - LO maxNzPerRow = 200; + Teuchos::ArrayRCP bcols(nBlks); /* returned by dropfun(j,...) */ + Teuchos::ArrayRCP keepOrNot(nBlks); /* gives cols for jth row and */ + /* whether or not entry is */ + /* kept or dropped. */ + + LO maxNzPerRow = 200; Teuchos::ArrayRCP penalties(maxNzPerRow); /* Penalty function */ /* described above. */ - - Teuchos::ArrayRCP keepStatus(nBlks,true); /* accumulated keepOrNot info */ - Teuchos::ArrayRCP bColList(nBlks); /* accumulated bcols info */ - /* for an entire block as */ - /* opposed to a single row */ - /* Additionally, keepOrNot[j] */ - /* refers to status of jth */ - /* entry in a row while */ - /* keepStatus[j] refers to */ - /* whether the jth block is */ - /* kept within the block row. */ - - Teuchos::ArrayRCP alreadyOnBColList(nBlks,false); /* used to avoid recording the*/ - /* same block column when */ - /* processing different pt */ - /* rows within a block. */ - - Teuchos::ArrayRCP boundaryNodes(nBlks,false); - - - for (LO i = 0; i < maxNzPerRow; i++) + + Teuchos::ArrayRCP keepStatus(nBlks, + true); /* accumulated keepOrNot info */ + Teuchos::ArrayRCP bColList(nBlks); /* accumulated bcols info */ + /* for an entire block as */ + /* opposed to a single row */ + /* Additionally, keepOrNot[j] */ + /* refers to status of jth */ + /* entry in a row while */ + /* keepStatus[j] refers to */ + /* whether the jth block is */ + /* kept within the block row. */ + + Teuchos::ArrayRCP alreadyOnBColList( + nBlks, false); /* used to avoid recording the*/ + /* same block column when */ + /* processing different pt */ + /* rows within a block. */ + + Teuchos::ArrayRCP boundaryNodes(nBlks, false); + + for (LO i = 0; i < maxNzPerRow; i++) penalties[i] = penaltyPolyCoef[poly0thOrderCoef] + - penaltyPolyCoef[poly1stOrderCoef]*(as(i)) + - penaltyPolyCoef[poly2ndOrderCoef]*(as(i*i)) + - (penaltyPolyCoef[poly3rdOrderCoef]*(as(i*i))*(as(i))) + //perhaps avoids overflow? - (penaltyPolyCoef[poly4thOrderCoef]*(as(i*i))*(as(i*i))); - - LO nzTotal = 0, numBCols = 0, row = -1, Nbcols, bcol; - newRowPtr[0] = 0; - - /* proceed block by block */ - for (LO i = 0; i < as(nBlks); i++) { - newRowPtr[i+1] = newRowPtr[i]; - for (LO j = 0; j < nPDEs; j++) { - row = row + 1; - - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - - Amat.getLocalRowView(row, indices, vals); - - if (indices.size() > maxNzPerRow) { - LO oldSize = maxNzPerRow; - maxNzPerRow = indices.size() + 100; - penalties.resize(as(maxNzPerRow),0.0); - for (LO k = oldSize; k < maxNzPerRow; k++) - penalties[k] = penaltyPolyCoef[poly0thOrderCoef] + - penaltyPolyCoef[poly1stOrderCoef]*(as(i)) + - penaltyPolyCoef[poly2ndOrderCoef]*(as(i*i)) + - (penaltyPolyCoef[poly3rdOrderCoef]*(as(i*i))*(as(i))) + - (penaltyPolyCoef[poly4thOrderCoef]*(as(i*i))*(as(i*i))); - } - badGuysDropfunc(row, indices, vals, testVecs, nPDEs, penalties, nearNull, bcols,keepOrNot,Nbcols,nLoc); - for (LO k=0; k < Nbcols; k++) { - bcol = bcols[k]; - - /* add to bColList if not already on it */ - - if (alreadyOnBColList[bcol] == false) {/* for PDE systems only record */ - bColList[numBCols++] = bcol; /* neighboring block one time */ - alreadyOnBColList[bcol] = true; - } - /* drop if any pt row within block indicates entry should be dropped */ - - if (keepOrNot[k] == false) keepStatus[bcol] = false; - - } /* for (k=0; k < Nbcols; k++) */ - } /* for (j = 0; i < nPDEs; j++) */ - - /* finished with block row. Now record block entries that we keep */ - /* and reset keepStatus, bColList, and alreadyOnBColList. */ - - if ( numBCols < 2) boundaryNodes[i] = true; - for (LO j=0; j < numBCols; j++) { - bcol = bColList[j]; - if (keepStatus[bcol] == true) { - newCols[nzTotal] = bColList[j]; - newRowPtr[i+1]++; - nzTotal = nzTotal + 1; - } - keepStatus[bcol] = true; - alreadyOnBColList[bcol] = false; - bColList[j] = 0; - } - numBCols = 0; - } /* for (i = 0; i < nBlks; i++) */ - - /* create array of the correct size and copy over newCols to it */ - - Teuchos::ArrayRCP finalCols(nzTotal); - for (LO i = 0; i < nzTotal; i++) finalCols[i] = newCols[i]; - - // Not using column map because we do not allow for any off-proc stuff. - // Not sure if this is okay. FIXME - - RCP rowMap = Amat.getRowMap(); // , colMap = Amat.getColMap(); - - LO nAmalgNodesOnProc = rowMap->getLocalNumElements()/nPDEs; - Teuchos::Array nodalGIDs(nAmalgNodesOnProc); - typename Teuchos::ScalarTraits::coordinateType temp; - for (size_t i = 0; i < as(nAmalgNodesOnProc); i++ ) { - GO gid = rowMap->getGlobalElement(i*nPDEs); - temp = ((typename Teuchos::ScalarTraits::coordinateType) (gid))/((typename Teuchos::ScalarTraits::coordinateType) (nPDEs)); - nodalGIDs[i] = as(floor(temp)); - } - GO nAmalgNodesGlobal = rowMap->getGlobalNumElements(); - GO nBlkGlobal = nAmalgNodesGlobal/nPDEs; - if (nBlkGlobal*nPDEs != nAmalgNodesGlobal) - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of global dofs not divisible by BlkSize"); - - Teuchos::RCP AmalgRowMap = MapFactory::Build(rowMap->lib(), nBlkGlobal, - nodalGIDs(),0,rowMap->getComm()); - - filteredGraph = rcp(new LWGraph(newRowPtr, finalCols, AmalgRowMap, AmalgRowMap, "thresholded graph of A")); - filteredGraph->SetBoundaryNodeMap(boundaryNodes); + penaltyPolyCoef[poly1stOrderCoef] * (as(i)) + + penaltyPolyCoef[poly2ndOrderCoef] * (as(i * i)) + + (penaltyPolyCoef[poly3rdOrderCoef] * (as(i * i)) * + (as(i))) + // perhaps avoids overflow? + (penaltyPolyCoef[poly4thOrderCoef] * (as(i * i)) * + (as(i * i))); + + LO nzTotal = 0, numBCols = 0, row = -1, Nbcols, bcol; + newRowPtr[0] = 0; + + /* proceed block by block */ + for (LO i = 0; i < as(nBlks); i++) { + newRowPtr[i + 1] = newRowPtr[i]; + for (LO j = 0; j < nPDEs; j++) { + row = row + 1; + + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + + Amat.getLocalRowView(row, indices, vals); + + if (indices.size() > maxNzPerRow) { + LO oldSize = maxNzPerRow; + maxNzPerRow = indices.size() + 100; + penalties.resize(as(maxNzPerRow), 0.0); + for (LO k = oldSize; k < maxNzPerRow; k++) + penalties[k] = + penaltyPolyCoef[poly0thOrderCoef] + + penaltyPolyCoef[poly1stOrderCoef] * (as(i)) + + penaltyPolyCoef[poly2ndOrderCoef] * (as(i * i)) + + (penaltyPolyCoef[poly3rdOrderCoef] * (as(i * i)) * + (as(i))) + + (penaltyPolyCoef[poly4thOrderCoef] * (as(i * i)) * + (as(i * i))); + } + badGuysDropfunc(row, indices, vals, testVecs, nPDEs, penalties, nearNull, + bcols, keepOrNot, Nbcols, nLoc); + for (LO k = 0; k < Nbcols; k++) { + bcol = bcols[k]; - } + /* add to bColList if not already on it */ + + if (alreadyOnBColList[bcol] == + false) { /* for PDE systems only record */ + bColList[numBCols++] = bcol; /* neighboring block one time */ + alreadyOnBColList[bcol] = true; + } + /* drop if any pt row within block indicates entry should be dropped */ + + if (keepOrNot[k] == false) + keepStatus[bcol] = false; + + } /* for (k=0; k < Nbcols; k++) */ + } /* for (j = 0; i < nPDEs; j++) */ + + /* finished with block row. Now record block entries that we keep */ + /* and reset keepStatus, bColList, and alreadyOnBColList. */ + + if (numBCols < 2) + boundaryNodes[i] = true; + for (LO j = 0; j < numBCols; j++) { + bcol = bColList[j]; + if (keepStatus[bcol] == true) { + newCols[nzTotal] = bColList[j]; + newRowPtr[i + 1]++; + nzTotal = nzTotal + 1; + } + keepStatus[bcol] = true; + alreadyOnBColList[bcol] = false; + bColList[j] = 0; + } + numBCols = 0; + } /* for (i = 0; i < nBlks; i++) */ + + /* create array of the correct size and copy over newCols to it */ - template - void SmooVecCoalesceDropFactory::badGuysDropfunc(LO row, const Teuchos::ArrayView& cols, const Teuchos::ArrayView& vals, const MultiVector& testVecs, LO nPDEs, Teuchos::ArrayRCP & penalties, const MultiVector& nearNull, Teuchos::ArrayRCP& Bcols, Teuchos::ArrayRCP& keepOrNot, LO &Nbcols, LO nLoc) const { - using TST=Teuchos::ScalarTraits; + Teuchos::ArrayRCP finalCols(nzTotal); + for (LO i = 0; i < nzTotal; i++) + finalCols[i] = newCols[i]; - LO nLeng = cols.size(); + // Not using column map because we do not allow for any off-proc stuff. + // Not sure if this is okay. FIXME + + RCP rowMap = Amat.getRowMap(); // , colMap = Amat.getColMap(); + + LO nAmalgNodesOnProc = rowMap->getLocalNumElements() / nPDEs; + Teuchos::Array nodalGIDs(nAmalgNodesOnProc); + typename Teuchos::ScalarTraits::coordinateType temp; + for (size_t i = 0; i < as(nAmalgNodesOnProc); i++) { + GO gid = rowMap->getGlobalElement(i * nPDEs); + temp = ((typename Teuchos::ScalarTraits::coordinateType)(gid)) / + ((typename Teuchos::ScalarTraits::coordinateType)(nPDEs)); + nodalGIDs[i] = as(floor(temp)); + } + GO nAmalgNodesGlobal = rowMap->getGlobalNumElements(); + GO nBlkGlobal = nAmalgNodesGlobal / nPDEs; + if (nBlkGlobal * nPDEs != nAmalgNodesGlobal) + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "Number of global dofs not divisible by BlkSize"); + + Teuchos::RCP AmalgRowMap = MapFactory::Build( + rowMap->lib(), nBlkGlobal, nodalGIDs(), 0, rowMap->getComm()); + + filteredGraph = rcp(new LWGraph(newRowPtr, finalCols, AmalgRowMap, + AmalgRowMap, "thresholded graph of A")); + filteredGraph->SetBoundaryNodeMap(boundaryNodes); +} + +template +void SmooVecCoalesceDropFactory:: + badGuysDropfunc(LO row, const Teuchos::ArrayView &cols, + const Teuchos::ArrayView &vals, + const MultiVector &testVecs, LO nPDEs, + Teuchos::ArrayRCP &penalties, + const MultiVector &nearNull, Teuchos::ArrayRCP &Bcols, + Teuchos::ArrayRCP &keepOrNot, LO &Nbcols, + LO nLoc) const { + using TST = Teuchos::ScalarTraits; + + LO nLeng = cols.size(); typename TST::coordinateType temp; - temp = ((typename TST::coordinateType) (row))/((typename TST::coordinateType) (nPDEs)); + temp = ((typename TST::coordinateType)(row)) / + ((typename TST::coordinateType)(nPDEs)); LO blkRow = as(floor(temp)); - Teuchos::ArrayRCP badGuy( nLeng, 0.0); - Teuchos::ArrayRCP subNull(nLeng, 0.0); /* subset of nearNull */ - /* associated with current */ - /* dof within node. */ - - /* Only consider testVecs associated with same dof & on processor. Further */ - /* collapse testVecs to a single badGuy vector by basically taking the worst */ - /* (least smooth) values for each of the off diags. In particular, we look at*/ - /* the ratio of each off-diag test value / diag test value and compare this */ - /* with the nearNull vector ratio. The further the testVec ratio is from the */ - /* nearNull ratio, the harder is will be to accurately interpolate is these */ - /* two guys are aggregated. So, the biggest ratio mismatch is used to choose */ - /* the testVec entry associated with each off-diagonal entry. */ - - - for (LO i = 0; i < nLeng; i++) keepOrNot[i] = false; + Teuchos::ArrayRCP badGuy(nLeng, 0.0); + Teuchos::ArrayRCP subNull(nLeng, 0.0); /* subset of nearNull */ + /* associated with current */ + /* dof within node. */ + + /* Only consider testVecs associated with same dof & on processor. Further */ + /* collapse testVecs to a single badGuy vector by basically taking the worst + */ + /* (least smooth) values for each of the off diags. In particular, we look + * at*/ + /* the ratio of each off-diag test value / diag test value and compare this */ + /* with the nearNull vector ratio. The further the testVec ratio is from the + */ + /* nearNull ratio, the harder is will be to accurately interpolate is these */ + /* two guys are aggregated. So, the biggest ratio mismatch is used to choose + */ + /* the testVec entry associated with each off-diagonal entry. */ + + for (LO i = 0; i < nLeng; i++) + keepOrNot[i] = false; LO diagInd = -1; - Nbcols = 0; - LO rowDof = row - blkRow*nPDEs; - Teuchos::ArrayRCP< const Scalar > oneNull = nearNull.getData( as(rowDof)); + Nbcols = 0; + LO rowDof = row - blkRow * nPDEs; + Teuchos::ArrayRCP oneNull = + nearNull.getData(as(rowDof)); for (LO i = 0; i < nLeng; i++) { - if ((cols[i] < nLoc ) && (TST::magnitude(vals[i]) != 0.0)) { /* on processor */ - temp = ((typename TST::coordinateType) (cols[i]))/((typename TST::coordinateType) (nPDEs)); - LO colDof = cols[i] - (as(floor( temp )))*nPDEs; + if ((cols[i] < nLoc) && + (TST::magnitude(vals[i]) != 0.0)) { /* on processor */ + temp = ((typename TST::coordinateType)(cols[i])) / + ((typename TST::coordinateType)(nPDEs)); + LO colDof = cols[i] - (as(floor(temp))) * nPDEs; if (colDof == rowDof) { /* same dof within node as row */ - Bcols[ Nbcols] = (cols[i] - colDof)/nPDEs; + Bcols[Nbcols] = (cols[i] - colDof) / nPDEs; subNull[Nbcols] = oneNull[cols[i]]; if (cols[i] != row) { /* not diagonal */ Scalar worstRatio = -TST::one(); - Scalar targetRatio = subNull[Nbcols]/oneNull[row]; + Scalar targetRatio = subNull[Nbcols] / oneNull[row]; Scalar actualRatio; - for (size_t kk = 0; kk < testVecs.getNumVectors(); kk++ ) { - Teuchos::ArrayRCP< const Scalar > curVec = testVecs.getData(kk); - actualRatio = curVec[cols[i]]/curVec[row]; - if (TST::magnitude(actualRatio - targetRatio) > TST::magnitude(worstRatio)) { - badGuy[Nbcols] = actualRatio; - worstRatio = Teuchos::ScalarTraits::magnitude(actualRatio - targetRatio); + for (size_t kk = 0; kk < testVecs.getNumVectors(); kk++) { + Teuchos::ArrayRCP curVec = testVecs.getData(kk); + actualRatio = curVec[cols[i]] / curVec[row]; + if (TST::magnitude(actualRatio - targetRatio) > + TST::magnitude(worstRatio)) { + badGuy[Nbcols] = actualRatio; + worstRatio = Teuchos::ScalarTraits::magnitude(actualRatio - + targetRatio); } } - } - else { - badGuy[ Nbcols] = 1.; + } else { + badGuy[Nbcols] = 1.; keepOrNot[Nbcols] = true; - diagInd = Nbcols; + diagInd = Nbcols; } (Nbcols)++; } @@ -475,28 +546,30 @@ namespace MueLu { /* Make sure that diagonal entry is in block col list */ if (diagInd == -1) { - Bcols[ Nbcols] = (row - rowDof)/nPDEs; - subNull[ Nbcols] = 1.; - badGuy[ Nbcols] = 1.; + Bcols[Nbcols] = (row - rowDof) / nPDEs; + subNull[Nbcols] = 1.; + badGuy[Nbcols] = 1.; keepOrNot[Nbcols] = true; - diagInd = Nbcols; + diagInd = Nbcols; (Nbcols)++; } - Scalar currentRP = oneNull[row]*oneNull[row]; - Scalar currentRTimesBadGuy= oneNull[row]*badGuy[diagInd]; - Scalar currentScore = penalties[0]; /* (I - P inv(R*P)*R )=0 for size */ - /* size 1 agg, so fit is perfect */ - - /* starting from a set that only includes the diagonal entry consider adding */ - /* one off-diagonal at a time until the fitValue exceeds the penalty term. */ - /* Here, the fit value is (I - P inv(R P) R ) and we always consider the */ - /* lowest fitValue that is not currently in the set. R and P correspond to */ - /* a simple tentaive grid transfer associated with an aggregate that */ - /* includes the diagonal, all already determined neighbors, and the potential*/ - /* new neighbor */ + Scalar currentRP = oneNull[row] * oneNull[row]; + Scalar currentRTimesBadGuy = oneNull[row] * badGuy[diagInd]; + Scalar currentScore = penalties[0]; /* (I - P inv(R*P)*R )=0 for size */ + /* size 1 agg, so fit is perfect */ - LO nKeep = 1, flag = 1, minId; + /* starting from a set that only includes the diagonal entry consider adding + */ + /* one off-diagonal at a time until the fitValue exceeds the penalty term. */ + /* Here, the fit value is (I - P inv(R P) R ) and we always consider the */ + /* lowest fitValue that is not currently in the set. R and P correspond to */ + /* a simple tentaive grid transfer associated with an aggregate that */ + /* includes the diagonal, all already determined neighbors, and the + * potential*/ + /* new neighbor */ + + LO nKeep = 1, flag = 1, minId; Scalar minFit, minFitRP = 0., minFitRTimesBadGuy = 0.; Scalar newRP, newRTimesBadGuy; @@ -505,47 +578,50 @@ namespace MueLu { /* that has not already been added as a neighbor */ minFit = 1000000.; - minId = -1; + minId = -1; - for (LO i=0; i < Nbcols; i++) { + for (LO i = 0; i < Nbcols; i++) { if (keepOrNot[i] == false) { - keepOrNot[i] = true; /* temporarily view i as non-dropped neighbor */ - newRP = currentRP + subNull[i]*subNull[i]; - newRTimesBadGuy= currentRTimesBadGuy + subNull[i]*badGuy[i]; - Scalar ratio = newRTimesBadGuy/newRP; + keepOrNot[i] = true; /* temporarily view i as non-dropped neighbor */ + newRP = currentRP + subNull[i] * subNull[i]; + newRTimesBadGuy = currentRTimesBadGuy + subNull[i] * badGuy[i]; + Scalar ratio = newRTimesBadGuy / newRP; Scalar newFit = 0.0; - for (LO k=0; k < Nbcols; k++) { + for (LO k = 0; k < Nbcols; k++) { if (keepOrNot[k] == true) { - Scalar diff = badGuy[k] - ratio*subNull[k]; - newFit = newFit + diff*diff; + Scalar diff = badGuy[k] - ratio * subNull[k]; + newFit = newFit + diff * diff; } } - if (Teuchos::ScalarTraits::magnitude(newFit) < Teuchos::ScalarTraits::magnitude(minFit)) { - minId = i; - minFit = newFit; - minFitRP = newRP; - minFitRTimesBadGuy= newRTimesBadGuy; + if (Teuchos::ScalarTraits::magnitude(newFit) < + Teuchos::ScalarTraits::magnitude(minFit)) { + minId = i; + minFit = newFit; + minFitRP = newRP; + minFitRTimesBadGuy = newRTimesBadGuy; } keepOrNot[i] = false; } } - if (minId == -1) flag = 0; + if (minId == -1) + flag = 0; else { minFit = sqrt(minFit); Scalar newScore = penalties[nKeep] + minFit; - if (Teuchos::ScalarTraits::magnitude(newScore) < Teuchos::ScalarTraits::magnitude(currentScore)) { - nKeep = nKeep + 1; - keepOrNot[minId]= true; - currentScore = newScore; - currentRP = minFitRP; - currentRTimesBadGuy= minFitRTimesBadGuy; - } - else flag = 0; + if (Teuchos::ScalarTraits::magnitude(newScore) < + Teuchos::ScalarTraits::magnitude(currentScore)) { + nKeep = nKeep + 1; + keepOrNot[minId] = true; + currentScore = newScore; + currentRP = minFitRP; + currentRTimesBadGuy = minFitRTimesBadGuy; + } else + flag = 0; } } - } +} -} //namespace MueLu +} // namespace MueLu #endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp index 80b779e26a7d..da5b57380cae 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_decl.hpp @@ -48,91 +48,108 @@ #define PACKAGES_MUELU_SRC_GRAPH_MUELU_UNSMOOSHFACTORY_DECL_HPP_ #include "MueLu_ConfigDefs.hpp" +#include "MueLu_Level_fwd.hpp" #include "MueLu_PFactory.hpp" #include "MueLu_UnsmooshFactory_fwd.hpp" -#include "MueLu_Level_fwd.hpp" namespace MueLu { - /*! - @class UnsmooshFactory class. - @brief Factory for building "unsmooshed" transfer operators from transfer operators associated with a scalar helper problem (built by the VariableDofLaplacianFactory) - - The output prolongation operator P is compatible to the input matrix A. The input prolongation matrix P is supposed to be built by the VariableDofLaplacianFactory with 1 DOF per node. - The DofStatus array declares for each row, whether it is a standard dof or a padded/pseudo Dirichlet dof. - The user has to provide the parameter "maxDofPerNode", since the coarse matrices are always padded by construction (i.e. constant number of DOFs per node). This parameter is needed for building the column map of the unsmooshed prolongation operator. - The parameter "fineIsPadded" allows to specify whether the input matrix A on the finest level is padded or not. In the padded case we have an artificially extended input matrix with pseudo Dirichlet rows on inactive rows. In the non-padded version - we have "variable" number of Dofs per node. - - @ingroup MueLuGraphClasses - - ## Input/output of UnsmooshFactory ## - - ### User parameters of UnsmooshFactory ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the input matrix A with potentially variable number of DOFs. Might be padded or non-padded. Padded means, that the matrix has additional artificial rows and columns to have a constant number of DOFs per node. Needed to match the row map for the unsmooshed version of P with the row map of the unamalgamated input matrix A. - P | Factory | null | | * | * | Generating factory of the (amalgamated) prolongator P generated from a (pseudo Laplacian) with 1 Dofs per node. Will be unsmooshed to be compatible with input matrix A in this factory. - DofStatus |Facotry | null | | * | * | Generating factory for dofStatus array (usually generated by the VariableDofLaplacdianFactory). It is a Teuchos::Array of size number of Rows of input matrix P multiplied by the maximum possible number of Dofs per node. - maxDofPerNode | int | 1 | | * | | Maximum number of DOFs per node. Needed for generating unsmooshed P. Note, that the coarse matrices are always padded, i.e. we have constant number of DOFs per node. - fineIsPadded | bool | false | * | | True if finest level input matrix is padded (default is false) - - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see UnsmooshFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see UnsmooshFactory::DeclareInput). - - ### Variables provided by UnsmooshFactory ### - - After UnsmooshFactory::Build the following data is available (if requested) - - Parameter | generated by | description - ----------|--------------|------------ - | P | UnsmooshFactory | Unsmooshed prolongation operator - */ - template - class UnsmooshFactory : public PFactory { +/*! + @class UnsmooshFactory class. + @brief Factory for building "unsmooshed" transfer operators from transfer + operators associated with a scalar helper problem (built by the + VariableDofLaplacianFactory) + + The output prolongation operator P is compatible to the input matrix A. The + input prolongation matrix P is supposed to be built by the + VariableDofLaplacianFactory with 1 DOF per node. The DofStatus array declares + for each row, whether it is a standard dof or a padded/pseudo Dirichlet dof. + The user has to provide the parameter "maxDofPerNode", since the coarse + matrices are always padded by construction (i.e. constant number of DOFs per + node). This parameter is needed for building the column map of the unsmooshed + prolongation operator. The parameter "fineIsPadded" allows to specify whether + the input matrix A on the finest level is padded or not. In the padded case we + have an artificially extended input matrix with pseudo Dirichlet rows on + inactive rows. In the non-padded version we have "variable" number of Dofs per + node. + + @ingroup MueLuGraphClasses + + ## Input/output of UnsmooshFactory ## + + ### User parameters of UnsmooshFactory ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the input + matrix A with potentially variable number of DOFs. Might be padded or + non-padded. Padded means, that the matrix has additional artificial rows and + columns to have a constant number of DOFs per node. Needed to match the row + map for the unsmooshed version of P with the row map of the unamalgamated + input matrix A. P | Factory | null | | * | * | Generating factory of + the (amalgamated) prolongator P generated from a (pseudo Laplacian) with 1 + Dofs per node. Will be unsmooshed to be compatible with input matrix A in this + factory. DofStatus |Facotry | null | | * | * | Generating factory for + dofStatus array (usually generated by the VariableDofLaplacdianFactory). It is + a Teuchos::Array of size number of Rows of input matrix P multiplied by + the maximum possible number of Dofs per node. maxDofPerNode | int | 1 | | + * | | Maximum number of DOFs per node. Needed for generating unsmooshed P. + Note, that the coarse matrices are always padded, i.e. we have constant number + of DOFs per node. fineIsPadded | bool | false | * | | True if finest + level input matrix is padded (default is false) + + + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + UnsmooshFactory::GetValidParameters).
The * in the @c requested column + states that the data is requested as input with all dependencies (see + UnsmooshFactory::DeclareInput). + + ### Variables provided by UnsmooshFactory ### + + After UnsmooshFactory::Build the following data is available (if requested) + + Parameter | generated by | description + ----------|--------------|------------ + | P | UnsmooshFactory | Unsmooshed prolongation operator +*/ +template +class UnsmooshFactory : public PFactory { #undef MUELU_UNSMOOSHFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! Constructor - UnsmooshFactory(); +public: + //! @name Constructors/Destructors. + //@{ - //! Destructor - virtual ~UnsmooshFactory() { } + //! Constructor + UnsmooshFactory(); - RCP GetValidParameterList() const; + //! Destructor + virtual ~UnsmooshFactory() {} - //@} + RCP GetValidParameterList() const; - //! Input - //@{ + //@} - void DeclareInput(Level &fineLevel, Level &coarseLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level &fineLevel, Level &coarseLevel) const; - void Build (Level &fineLevel, Level &coarseLevel) const; // Build - void BuildP(Level &/* fineLevel */, Level &/* coarseLevel */) const {}; // TAW no real need for an extra BuildP routine. Just use Build + //@} - private: + void Build(Level &fineLevel, Level &coarseLevel) const; // Build + void BuildP(Level & /* fineLevel */, Level & /* coarseLevel */) + const {}; // TAW no real need for an extra BuildP routine. Just use Build +private: +}; // class UnsmooshFactory - - - }; //class UnsmooshFactory - -} //namespace MueLu +} // namespace MueLu #define MUELU_UNSMOOSHFACTORY_SHORT - #endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_UNSMOOSHFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp index f49f488463ea..b378a501ffef 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp @@ -53,198 +53,234 @@ namespace MueLu { - template - UnsmooshFactory::UnsmooshFactory() { } - - template - RCP UnsmooshFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory for unamalgamated matrix. Row map of (unamalgamted) output prolongation operator should match row map of this A."); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the (amalgamated) prolongator P"); - validParamList->set< RCP >("DofStatus", Teuchos::null, "Generating factory for dofStatus array (usually the VariableDofLaplacdianFactory)"); - - validParamList->set< int > ("maxDofPerNode", 1, "Maximum number of DOFs per node"); - validParamList->set< bool > ("fineIsPadded" , false, "true if finest level input matrix is padded"); - - return validParamList; - } - - template - void UnsmooshFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - //const ParameterList& pL = GetParameterList(); - Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - // DofStatus only provided on the finest level (by user) - // On the coarser levels it is auto-generated using the DBC information from the unamalgamated matrix A - if(fineLevel.GetLevelID() == 0) - Input(fineLevel, "DofStatus"); - } - - template - void UnsmooshFactory::Build(Level &fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); - typedef Teuchos::ScalarTraits STS; - - const ParameterList & pL = GetParameterList(); - - // extract matrices (unamalgamated A and amalgamated P) - RCP unamalgA = Get< RCP >(fineLevel, "A"); - RCP amalgP = Get< RCP >(coarseLevel, "P"); - - // extract user parameters - int maxDofPerNode = pL.get ("maxDofPerNode"); - bool fineIsPadded = pL.get("fineIsPadded"); - - // get dofStatus information - // On the finest level it is provided by the user. On the coarser levels it is constructed - // using the DBC information of the matrix A - Teuchos::Array dofStatus; - if(fineLevel.GetLevelID() == 0) { - dofStatus = Get >(fineLevel, "DofStatus"); - } else { - // dof status is the dirichlet information of unsmooshed/unamalgamated A (fine level) - dofStatus = Teuchos::Array(unamalgA->getRowMap()->getLocalNumElements() /*amalgP->getRowMap()->getLocalNumElements() * maxDofPerNode*/,'s'); - - bool bHasZeroDiagonal = false; - Teuchos::ArrayRCP dirOrNot = MueLu::Utilities::DetectDirichletRowsExt(*unamalgA,bHasZeroDiagonal,STS::magnitude(0.5)); - - TEUCHOS_TEST_FOR_EXCEPTION(dirOrNot.size() != dofStatus.size(), MueLu::Exceptions::RuntimeError,"MueLu::UnsmooshFactory::Build: inconsistent number of coarse DBC array and dofStatus array. dirOrNot.size() = " << dirOrNot.size() << " dofStatus.size() = " << dofStatus.size()); - for(decltype(dirOrNot.size()) i = 0; i < dirOrNot.size(); ++i) { - if(dirOrNot[i] == true) dofStatus[i] = 'p'; - } +template +UnsmooshFactory::UnsmooshFactory() {} + +template +RCP UnsmooshFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + validParamList->set>( + "A", Teuchos::null, + "Generating factory for unamalgamated matrix. Row map of (unamalgamted) " + "output prolongation operator should match row map of this A."); + validParamList->set>( + "P", Teuchos::null, + "Generating factory of the (amalgamated) prolongator P"); + validParamList->set>( + "DofStatus", Teuchos::null, + "Generating factory for dofStatus array (usually the " + "VariableDofLaplacdianFactory)"); + + validParamList->set("maxDofPerNode", 1, + "Maximum number of DOFs per node"); + validParamList->set("fineIsPadded", false, + "true if finest level input matrix is padded"); + + return validParamList; +} + +template +void UnsmooshFactory::DeclareInput( + Level &fineLevel, Level &coarseLevel) const { + // const ParameterList& pL = GetParameterList(); + Input(fineLevel, "A"); + Input(coarseLevel, "P"); + + // DofStatus only provided on the finest level (by user) + // On the coarser levels it is auto-generated using the DBC information from + // the unamalgamated matrix A + if (fineLevel.GetLevelID() == 0) + Input(fineLevel, "DofStatus"); +} + +template +void UnsmooshFactory::Build( + Level &fineLevel, Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); + typedef Teuchos::ScalarTraits STS; + + const ParameterList &pL = GetParameterList(); + + // extract matrices (unamalgamated A and amalgamated P) + RCP unamalgA = Get>(fineLevel, "A"); + RCP amalgP = Get>(coarseLevel, "P"); + + // extract user parameters + int maxDofPerNode = pL.get("maxDofPerNode"); + bool fineIsPadded = pL.get("fineIsPadded"); + + // get dofStatus information + // On the finest level it is provided by the user. On the coarser levels it is + // constructed using the DBC information of the matrix A + Teuchos::Array dofStatus; + if (fineLevel.GetLevelID() == 0) { + dofStatus = Get>(fineLevel, "DofStatus"); + } else { + // dof status is the dirichlet information of unsmooshed/unamalgamated A + // (fine level) + dofStatus = Teuchos::Array( + unamalgA->getRowMap() + ->getLocalNumElements() /*amalgP->getRowMap()->getLocalNumElements() + * maxDofPerNode*/ + , + 's'); + + bool bHasZeroDiagonal = false; + Teuchos::ArrayRCP dirOrNot = + MueLu::Utilities::DetectDirichletRowsExt(*unamalgA, + bHasZeroDiagonal, + STS::magnitude(0.5)); + + TEUCHOS_TEST_FOR_EXCEPTION( + dirOrNot.size() != dofStatus.size(), MueLu::Exceptions::RuntimeError, + "MueLu::UnsmooshFactory::Build: inconsistent number of coarse DBC " + "array and dofStatus array. dirOrNot.size() = " + << dirOrNot.size() << " dofStatus.size() = " << dofStatus.size()); + for (decltype(dirOrNot.size()) i = 0; i < dirOrNot.size(); ++i) { + if (dirOrNot[i] == true) + dofStatus[i] = 'p'; } + } - // TODO: TAW the following check is invalid for SA-AMG based input prolongators - //TEUCHOS_TEST_FOR_EXCEPTION(amalgP->getDomainMap()->isSameAs(*amalgP->getColMap()) == false, MueLu::Exceptions::RuntimeError,"MueLu::UnsmooshFactory::Build: only support for non-overlapping aggregates. (column map of Ptent must be the same as domain map of Ptent)"); - - // extract CRS information from amalgamated prolongation operator - Teuchos::ArrayRCP amalgRowPtr(amalgP->getLocalNumRows()); - Teuchos::ArrayRCP amalgCols(amalgP->getLocalNumEntries()); - Teuchos::ArrayRCP amalgVals(amalgP->getLocalNumEntries()); - Teuchos::RCP amalgPwrap = Teuchos::rcp_dynamic_cast(amalgP); - Teuchos::RCP amalgPcrs = amalgPwrap->getCrsMatrix(); - amalgPcrs->getAllValues(amalgRowPtr, amalgCols, amalgVals); - - // calculate number of dof rows for new prolongator - size_t paddedNrows = amalgP->getRowMap()->getLocalNumElements() * Teuchos::as(maxDofPerNode); - - // reserve CSR arrays for new prolongation operator - Teuchos::ArrayRCP newPRowPtr(paddedNrows+1); - Teuchos::ArrayRCP newPCols(amalgP->getLocalNumEntries() * maxDofPerNode); - Teuchos::ArrayRCP newPVals(amalgP->getLocalNumEntries() * maxDofPerNode); - - size_t rowCount = 0; // actual number of (local) in unamalgamated prolongator - if(fineIsPadded == true || fineLevel.GetLevelID() > 0) { - - // build prolongation operator for padded fine level matrices. - // Note: padded fine level dofs are transferred by injection. - // That is, these interpolation stencils do not take averages of - // coarse level variables. Further, fine level Dirichlet points - // also use injection. - - size_t cnt = 0; // local id counter - for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { - // determine number of entries in amalgamated dof row i - size_t rowLength = amalgRowPtr[i+1] - amalgRowPtr[i]; - - // loop over dofs per node (unamalgamation) - for(int j = 0; j < maxDofPerNode; j++) { - newPRowPtr[i*maxDofPerNode+j] = cnt; - if (dofStatus[i*maxDofPerNode+j] == 's') { // add only "standard" dofs to unamalgamated prolongator - // loop over column entries in amalgamated P - for (size_t k = 0; k < rowLength; k++) { - newPCols[cnt ] = amalgCols[k+amalgRowPtr[i]] * maxDofPerNode + j; - newPVals[cnt++] = amalgVals[k+amalgRowPtr[i]]; - } - + // TODO: TAW the following check is invalid for SA-AMG based input + // prolongators + // TEUCHOS_TEST_FOR_EXCEPTION(amalgP->getDomainMap()->isSameAs(*amalgP->getColMap()) + // == false, MueLu::Exceptions::RuntimeError,"MueLu::UnsmooshFactory::Build: + // only support for non-overlapping aggregates. (column map of Ptent must be + // the same as domain map of Ptent)"); + + // extract CRS information from amalgamated prolongation operator + Teuchos::ArrayRCP amalgRowPtr(amalgP->getLocalNumRows()); + Teuchos::ArrayRCP amalgCols(amalgP->getLocalNumEntries()); + Teuchos::ArrayRCP amalgVals(amalgP->getLocalNumEntries()); + Teuchos::RCP amalgPwrap = + Teuchos::rcp_dynamic_cast(amalgP); + Teuchos::RCP amalgPcrs = amalgPwrap->getCrsMatrix(); + amalgPcrs->getAllValues(amalgRowPtr, amalgCols, amalgVals); + + // calculate number of dof rows for new prolongator + size_t paddedNrows = amalgP->getRowMap()->getLocalNumElements() * + Teuchos::as(maxDofPerNode); + + // reserve CSR arrays for new prolongation operator + Teuchos::ArrayRCP newPRowPtr(paddedNrows + 1); + Teuchos::ArrayRCP newPCols(amalgP->getLocalNumEntries() * + maxDofPerNode); + Teuchos::ArrayRCP newPVals(amalgP->getLocalNumEntries() * + maxDofPerNode); + + size_t rowCount = 0; // actual number of (local) in unamalgamated prolongator + if (fineIsPadded == true || fineLevel.GetLevelID() > 0) { + + // build prolongation operator for padded fine level matrices. + // Note: padded fine level dofs are transferred by injection. + // That is, these interpolation stencils do not take averages of + // coarse level variables. Further, fine level Dirichlet points + // also use injection. + + size_t cnt = 0; // local id counter + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + // determine number of entries in amalgamated dof row i + size_t rowLength = amalgRowPtr[i + 1] - amalgRowPtr[i]; + + // loop over dofs per node (unamalgamation) + for (int j = 0; j < maxDofPerNode; j++) { + newPRowPtr[i * maxDofPerNode + j] = cnt; + if (dofStatus[i * maxDofPerNode + j] == + 's') { // add only "standard" dofs to unamalgamated prolongator + // loop over column entries in amalgamated P + for (size_t k = 0; k < rowLength; k++) { + newPCols[cnt] = amalgCols[k + amalgRowPtr[i]] * maxDofPerNode + j; + newPVals[cnt++] = amalgVals[k + amalgRowPtr[i]]; } } } + } - newPRowPtr[paddedNrows] = cnt; // close row CSR array - rowCount = paddedNrows; - } else { - // Build prolongation operator for non-padded fine level matrices. - // Need to map from non-padded dofs to padded dofs. For this, look - // at the status array and skip padded dofs. - - size_t cnt = 0; // local id counter - - for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { - // determine number of entries in amalgamated dof row i - size_t rowLength = amalgRowPtr[i+1] - amalgRowPtr[i]; - - // loop over dofs per node (unamalgamation) - for(int j = 0; j < maxDofPerNode; j++) { - // no interpolation for padded fine dofs as they do not exist - - if (dofStatus[i*maxDofPerNode+j] == 's') { // add only "standard" dofs to unamalgamated prolongator - newPRowPtr[rowCount++] = cnt; - // loop over column entries in amalgamated P - for (size_t k = 0; k < rowLength; k++) { - newPCols[cnt ] = amalgCols[k+amalgRowPtr[i]] * maxDofPerNode + j; - newPVals[cnt++] = amalgVals[k+amalgRowPtr[i]]; - } - - } - if (dofStatus[i*maxDofPerNode+j] == 'd') { // Dirichlet handling - newPRowPtr[rowCount++] = cnt; + newPRowPtr[paddedNrows] = cnt; // close row CSR array + rowCount = paddedNrows; + } else { + // Build prolongation operator for non-padded fine level matrices. + // Need to map from non-padded dofs to padded dofs. For this, look + // at the status array and skip padded dofs. + + size_t cnt = 0; // local id counter + + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + // determine number of entries in amalgamated dof row i + size_t rowLength = amalgRowPtr[i + 1] - amalgRowPtr[i]; + + // loop over dofs per node (unamalgamation) + for (int j = 0; j < maxDofPerNode; j++) { + // no interpolation for padded fine dofs as they do not exist + + if (dofStatus[i * maxDofPerNode + j] == + 's') { // add only "standard" dofs to unamalgamated prolongator + newPRowPtr[rowCount++] = cnt; + // loop over column entries in amalgamated P + for (size_t k = 0; k < rowLength; k++) { + newPCols[cnt] = amalgCols[k + amalgRowPtr[i]] * maxDofPerNode + j; + newPVals[cnt++] = amalgVals[k + amalgRowPtr[i]]; } } - } - newPRowPtr[rowCount] = cnt; // close row CSR array - } // fineIsPadded == false - - // generate coarse domain map - // So far no support for gid offset or strided maps. This information - // could be gathered easily from the unamalgamated fine level operator A. - std::vector stridingInfo(1, maxDofPerNode); - - GlobalOrdinal nCoarseDofs = amalgP->getDomainMap()->getLocalNumElements() * maxDofPerNode; - GlobalOrdinal indexBase = amalgP->getDomainMap()->getIndexBase(); - RCP coarseDomainMap = StridedMapFactory::Build(amalgP->getDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - nCoarseDofs, - indexBase, - stridingInfo, - amalgP->getDomainMap()->getComm(), - -1 /* stridedBlockId */, - 0 /*domainGidOffset */); - - size_t nColCoarseDofs = Teuchos::as(amalgP->getColMap()->getLocalNumElements() * maxDofPerNode); - Teuchos::Array unsmooshColMapGIDs(nColCoarseDofs); - for(size_t c = 0; c < amalgP->getColMap()->getLocalNumElements(); ++c) { - GlobalOrdinal gid = (amalgP->getColMap()->getGlobalElement(c)-indexBase) * maxDofPerNode + indexBase; - - for(int i = 0; i < maxDofPerNode; ++i) { - unsmooshColMapGIDs[c * maxDofPerNode + i] = gid + i; + if (dofStatus[i * maxDofPerNode + j] == 'd') { // Dirichlet handling + newPRowPtr[rowCount++] = cnt; + } } } - Teuchos::RCP coarseColMap = MapFactory::Build(amalgP->getDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - unsmooshColMapGIDs(), //View, - indexBase, - amalgP->getDomainMap()->getComm()); - - // Assemble unamalgamated P - Teuchos::RCP unamalgPCrs = CrsMatrixFactory::Build(unamalgA->getRowMap(), - coarseColMap, - maxDofPerNode*amalgP->getLocalMaxNumRowEntries()); - for (size_t i = 0; i < rowCount; i++) { - unamalgPCrs->insertLocalValues(i, - newPCols.view(newPRowPtr[i], newPRowPtr[i+1] - newPRowPtr[i]), - newPVals.view(newPRowPtr[i], newPRowPtr[i+1] - newPRowPtr[i])); + newPRowPtr[rowCount] = cnt; // close row CSR array + } // fineIsPadded == false + + // generate coarse domain map + // So far no support for gid offset or strided maps. This information + // could be gathered easily from the unamalgamated fine level operator A. + std::vector stridingInfo(1, maxDofPerNode); + + GlobalOrdinal nCoarseDofs = + amalgP->getDomainMap()->getLocalNumElements() * maxDofPerNode; + GlobalOrdinal indexBase = amalgP->getDomainMap()->getIndexBase(); + RCP coarseDomainMap = StridedMapFactory::Build( + amalgP->getDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), nCoarseDofs, + indexBase, stridingInfo, amalgP->getDomainMap()->getComm(), + -1 /* stridedBlockId */, 0 /*domainGidOffset */); + + size_t nColCoarseDofs = Teuchos::as( + amalgP->getColMap()->getLocalNumElements() * maxDofPerNode); + Teuchos::Array unsmooshColMapGIDs(nColCoarseDofs); + for (size_t c = 0; c < amalgP->getColMap()->getLocalNumElements(); ++c) { + GlobalOrdinal gid = + (amalgP->getColMap()->getGlobalElement(c) - indexBase) * maxDofPerNode + + indexBase; + + for (int i = 0; i < maxDofPerNode; ++i) { + unsmooshColMapGIDs[c * maxDofPerNode + i] = gid + i; } - unamalgPCrs->fillComplete(coarseDomainMap, unamalgA->getRowMap()); - - Teuchos::RCP unamalgP = Teuchos::rcp(new CrsMatrixWrap(unamalgPCrs)); - - Set(coarseLevel,"P",unamalgP); } + Teuchos::RCP coarseColMap = MapFactory::Build( + amalgP->getDomainMap()->lib(), + Teuchos::OrdinalTraits::invalid(), + unsmooshColMapGIDs(), // View, + indexBase, amalgP->getDomainMap()->getComm()); + + // Assemble unamalgamated P + Teuchos::RCP unamalgPCrs = CrsMatrixFactory::Build( + unamalgA->getRowMap(), coarseColMap, + maxDofPerNode * amalgP->getLocalMaxNumRowEntries()); + for (size_t i = 0; i < rowCount; i++) { + unamalgPCrs->insertLocalValues( + i, newPCols.view(newPRowPtr[i], newPRowPtr[i + 1] - newPRowPtr[i]), + newPVals.view(newPRowPtr[i], newPRowPtr[i + 1] - newPRowPtr[i])); + } + unamalgPCrs->fillComplete(coarseDomainMap, unamalgA->getRowMap()); + Teuchos::RCP unamalgP = Teuchos::rcp(new CrsMatrixWrap(unamalgPCrs)); -} /* MueLu */ + Set(coarseLevel, "P", unamalgP); +} +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_UNSMOOSHFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp index 51a333e52d81..82deb759d340 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_decl.hpp @@ -47,319 +47,340 @@ #ifndef PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DECL_HPP_ #define PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DECL_HPP_ - #include "MueLu_ConfigDefs.hpp" +#include "MueLu_Level_fwd.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_VariableDofLaplacianFactory_fwd.hpp" -#include "MueLu_Level_fwd.hpp" #include "MueLu_Utilities_fwd.hpp" namespace MueLu { - /*! - @class VariableDofLaplacianFactory class. - @brief Factory for building scalar Laplace operator (that is used as fake operator for variable dof size problems) - - Build distance Laplacian associated with input matrix A (which might have a variable number of DOFs per node). - Coordinates are needed to calculate the distance laplacian values. The user-provided array "DofPresent" stores whether - an array is present (=1) or not (=0) in the matrix. The length of the array is number of nodes * maxDofPerNode and - therefore it might be larger or equal than the number of rows in the input matrix. - - The factory produces the distance laplacian matrix A as output (with one dof per node) as well as the coarse version - of the DofStatus (needed for the next coarser level), containing information about (artificial) Dirichlet rows in the matrix. - - @ingroup MueLuGraphClasses - - ## Input/output of VariableDofLaplacianFactory ## - - ### User parameters of VariableDofLaplacianFactory ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the input matrix A with potentially variable number of DOFs. Might be padded or non-padded. Padded means, that the matrix has additional artificial rows and columns to have a constant number of DOFs per node. - Coordinates | Factory | null | | * | * | Generating factory for Coordinates needed for building distance laplacian. - DofPresent | Teuchos::ArrayRCP | NoFactory | | | (*) | Optional array containing information whether DOF is actually present in matrix or not. - Advanced Dirichlet: threshold | double | 1e-5 | | * | | Drop tolerance for Dirichlet detection - Variable DOF amalgamation: threshold | double | 1.8e-9 | | * | | Drop tolerance for amalgamation process - maxDofPerNode | int | 1 | | * | | Maximum number of DOFs per node - - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see VariableDofLaplacianFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see VariableDofLaplacianFactory::DeclareInput). - - ### Variables provided by VariableDofLaplacianFactory ### - - After TentativePFactory::Build the following data is available (if requested) - - Parameter | generated by | description - ----------|--------------|------------ - | A | VariableDofLaplacianFactory | Laplacian operator - | DofStatus | VariableDofLaplacianFactory | Status array for next coarse level - */ - template - class VariableDofLaplacianFactory : public SingleLevelFactoryBase { +/*! + @class VariableDofLaplacianFactory class. + @brief Factory for building scalar Laplace operator (that is used as fake + operator for variable dof size problems) + + Build distance Laplacian associated with input matrix A (which might have a + variable number of DOFs per node). Coordinates are needed to calculate the + distance laplacian values. The user-provided array "DofPresent" stores whether + an array is present (=1) or not (=0) in the matrix. The length of the array is + number of nodes * maxDofPerNode and therefore it might be larger or equal than + the number of rows in the input matrix. + + The factory produces the distance laplacian matrix A as output (with one dof + per node) as well as the coarse version of the DofStatus (needed for the next + coarser level), containing information about (artificial) Dirichlet rows in + the matrix. + + @ingroup MueLuGraphClasses + + ## Input/output of VariableDofLaplacianFactory ## + + ### User parameters of VariableDofLaplacianFactory ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the input + matrix A with potentially variable number of DOFs. Might be padded or + non-padded. Padded means, that the matrix has additional artificial rows and + columns to have a constant number of DOFs per node. Coordinates | Factory | + null | | * | * | Generating factory for Coordinates needed for building + distance laplacian. DofPresent | Teuchos::ArrayRCP | NoFactory | + | | (*) | Optional array containing information whether DOF is actually + present in matrix or not. Advanced Dirichlet: threshold | double | 1e-5 | | + * | | Drop tolerance for Dirichlet detection Variable DOF amalgamation: + threshold | double | 1.8e-9 | | * | | Drop tolerance for amalgamation + process maxDofPerNode | int | 1 | | * | | Maximum number of DOFs per node + + + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + VariableDofLaplacianFactory::GetValidParameters).
The * in the @c + requested column states that the data is requested as input with all + dependencies (see VariableDofLaplacianFactory::DeclareInput). + + ### Variables provided by VariableDofLaplacianFactory ### + + After TentativePFactory::Build the following data is available (if requested) + + Parameter | generated by | description + ----------|--------------|------------ + | A | VariableDofLaplacianFactory | Laplacian operator + | DofStatus | VariableDofLaplacianFactory | Status array for next coarse level +*/ +template +class VariableDofLaplacianFactory : public SingleLevelFactoryBase { #undef MUELU_VARIABLEDOFLAPLACIANFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ - - //! Constructor - VariableDofLaplacianFactory(); - - //! Destructor - virtual ~VariableDofLaplacianFactory() { } - - RCP GetValidParameterList() const; - - //@} - - //! Input - //@{ - - void DeclareInput(Level ¤tLevel) const; - - //@} - - void Build(Level ¤tLevel) const; // Build - - private: - - void buildPaddedMap(const Teuchos::ArrayRCP & dofPresent, std::vector & map, size_t nDofs) const; - void assignGhostLocalNodeIds(const Teuchos::RCP & rowDofMap, const Teuchos::RCP & colDofMap, std::vector & myLocalNodeIds, const std::vector & dofMap, size_t maxDofPerNode, size_t& nLocalNodes, size_t& nLocalPlusGhostNodes, Teuchos::RCP< const Teuchos::Comm< int > > comm) const; - void squeezeOutNnzs(Teuchos::ArrayRCP & rowPtr, Teuchos::ArrayRCP & cols, Teuchos::ArrayRCP & vals, const std::vector& keep) const; - void buildLaplacian(const Teuchos::ArrayRCP& rowPtr, const Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals, const size_t& numdim, const RCP::magnitudeType,LocalOrdinal,GlobalOrdinal,Node> > & ghostedCoords) const; - - template - void MueLu_az_sort(listType list[], size_t N, size_t list2[], Scalar list3[]) const { - /* local variables */ - - listType RR, K; - size_t l, r, j, i; - int flag; - size_t RR2; - Scalar RR3; - - /*********************** execution begins ******************************/ - - if (N <= 1) return; - - l = N / 2 + 1; - r = N - 1; - l = l - 1; - RR = list[l - 1]; - K = list[l - 1]; - - if ((list2 != NULL) && (list3 != NULL)) { - RR2 = list2[l - 1]; - RR3 = list3[l - 1]; - while (r != 0) { - j = l; - flag = 1; - - while (flag == 1) { - i = j; - j = j + j; - - if (j > r + 1) +public: + //! @name Constructors/Destructors. + //@{ + + //! Constructor + VariableDofLaplacianFactory(); + + //! Destructor + virtual ~VariableDofLaplacianFactory() {} + + RCP GetValidParameterList() const; + + //@} + + //! Input + //@{ + + void DeclareInput(Level ¤tLevel) const; + + //@} + + void Build(Level ¤tLevel) const; // Build + +private: + void buildPaddedMap(const Teuchos::ArrayRCP &dofPresent, + std::vector &map, size_t nDofs) const; + void + assignGhostLocalNodeIds(const Teuchos::RCP &rowDofMap, + const Teuchos::RCP &colDofMap, + std::vector &myLocalNodeIds, + const std::vector &dofMap, + size_t maxDofPerNode, size_t &nLocalNodes, + size_t &nLocalPlusGhostNodes, + Teuchos::RCP> comm) const; + void squeezeOutNnzs(Teuchos::ArrayRCP &rowPtr, + Teuchos::ArrayRCP &cols, + Teuchos::ArrayRCP &vals, + const std::vector &keep) const; + void + buildLaplacian(const Teuchos::ArrayRCP &rowPtr, + const Teuchos::ArrayRCP &cols, + Teuchos::ArrayRCP &vals, const size_t &numdim, + const RCP::magnitudeType, + LocalOrdinal, GlobalOrdinal, Node>> &ghostedCoords) const; + + template + void MueLu_az_sort(listType list[], size_t N, size_t list2[], + Scalar list3[]) const { + /* local variables */ + + listType RR, K; + size_t l, r, j, i; + int flag; + size_t RR2; + Scalar RR3; + + /*********************** execution begins ******************************/ + + if (N <= 1) + return; + + l = N / 2 + 1; + r = N - 1; + l = l - 1; + RR = list[l - 1]; + K = list[l - 1]; + + if ((list2 != NULL) && (list3 != NULL)) { + RR2 = list2[l - 1]; + RR3 = list3[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) + j = j + 1; + + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list2[i - 1] = list2[j - 1]; + list3[i - 1] = list3[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - list2[i - 1] = list2[j - 1]; - list3[i - 1] = list3[j - 1]; - } - else { - flag = 0; - } } } - - list[ i - 1] = RR; - list2[i - 1] = RR2; - list3[i - 1] = RR3; - - if (l == 1) { - RR = list [r]; - RR2 = list2[r]; - RR3 = list3[r]; - - K = list[r]; - list[r ] = list[0]; - list2[r] = list2[0]; - list3[r] = list3[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - RR2 = list2[l - 1]; - RR3 = list3[l - 1]; - K = list[l - 1]; - } } - list[ 0] = RR; - list2[0] = RR2; - list3[0] = RR3; + list[i - 1] = RR; + list2[i - 1] = RR2; + list3[i - 1] = RR3; + + if (l == 1) { + RR = list[r]; + RR2 = list2[r]; + RR3 = list3[r]; + + K = list[r]; + list[r] = list[0]; + list2[r] = list2[0]; + list3[r] = list3[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR2 = list2[l - 1]; + RR3 = list3[l - 1]; + K = list[l - 1]; + } } - else if (list2 != NULL) { - RR2 = list2[l - 1]; - while (r != 0) { - j = l; - flag = 1; - - while (flag == 1) { - i = j; - j = j + j; - if (j > r + 1) + list[0] = RR; + list2[0] = RR2; + list3[0] = RR3; + } else if (list2 != NULL) { + RR2 = list2[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) + j = j + 1; + + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list2[i - 1] = list2[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - list2[i - 1] = list2[j - 1]; - } - else { - flag = 0; - } } } - - list[ i - 1] = RR; - list2[i - 1] = RR2; - - if (l == 1) { - RR = list [r]; - RR2 = list2[r]; - - K = list[r]; - list[r ] = list[0]; - list2[r] = list2[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - RR2 = list2[l - 1]; - K = list[l - 1]; - } } - list[ 0] = RR; - list2[0] = RR2; + list[i - 1] = RR; + list2[i - 1] = RR2; + + if (l == 1) { + RR = list[r]; + RR2 = list2[r]; + + K = list[r]; + list[r] = list[0]; + list2[r] = list2[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR2 = list2[l - 1]; + K = list[l - 1]; + } } - else if (list3 != NULL) { - RR3 = list3[l - 1]; - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; + list[0] = RR; + list2[0] = RR2; + } else if (list3 != NULL) { + RR3 = list3[l - 1]; + while (r != 0) { + j = l; + flag = 1; - if (j > r + 1) + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) + j = j + 1; + + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + list3[i - 1] = list3[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - list3[i - 1] = list3[j - 1]; - } - else { - flag = 0; - } } } + } - list[ i - 1] = RR; - list3[i - 1] = RR3; - - if (l == 1) { - RR = list [r]; - RR3 = list3[r]; - - K = list[r]; - list[r ] = list[0]; - list3[r] = list3[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - RR3 = list3[l - 1]; - K = list[l - 1]; - } + list[i - 1] = RR; + list3[i - 1] = RR3; + + if (l == 1) { + RR = list[r]; + RR3 = list3[r]; + + K = list[r]; + list[r] = list[0]; + list3[r] = list3[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + RR3 = list3[l - 1]; + K = list[l - 1]; } + } - list[ 0] = RR; - list3[0] = RR3; + list[0] = RR; + list3[0] = RR3; - } - else { - while (r != 0) { - j = l; - flag = 1; + } else { + while (r != 0) { + j = l; + flag = 1; - while (flag == 1) { - i = j; - j = j + j; + while (flag == 1) { + i = j; + j = j + j; - if (j > r + 1) + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (list[j] > list[j - 1]) + j = j + 1; + + if (list[j - 1] > K) { + list[i - 1] = list[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (list[j] > list[j - 1]) j = j + 1; - - if (list[j - 1] > K) { - list[ i - 1] = list[ j - 1]; - } - else { - flag = 0; - } } } + } - list[ i - 1] = RR; + list[i - 1] = RR; - if (l == 1) { - RR = list [r]; + if (l == 1) { + RR = list[r]; - K = list[r]; - list[r ] = list[0]; - r = r - 1; - } - else { - l = l - 1; - RR = list[ l - 1]; - K = list[l - 1]; - } + K = list[r]; + list[r] = list[0]; + r = r - 1; + } else { + l = l - 1; + RR = list[l - 1]; + K = list[l - 1]; } - - list[ 0] = RR; } + + list[0] = RR; } + } - }; //class CoalesceDropFactory +}; // class CoalesceDropFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_VARIABLEDOFLAPLACIANFACTORY_SHORT - -#endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DECL_HPP_ */ +#endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DECL_HPP_ \ + */ diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp index 21aa1c77369d..936e5abc21b3 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp @@ -47,559 +47,704 @@ #ifndef PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DEF_HPP_ #define PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DEF_HPP_ - #include "MueLu_Monitor.hpp" #include "MueLu_VariableDofLaplacianFactory_decl.hpp" namespace MueLu { - template - RCP VariableDofLaplacianFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set< double > ("Advanced Dirichlet: threshold", 1e-5, "Drop tolerance for Dirichlet detection"); - validParamList->set< double > ("Variable DOF amalgamation: threshold", 1.8e-9, "Drop tolerance for amalgamation process"); - validParamList->set< int > ("maxDofPerNode", 1, "Maximum number of DOFs per node"); - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for Coordinates"); - - return validParamList; +template +RCP +VariableDofLaplacianFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set("Advanced Dirichlet: threshold", 1e-5, + "Drop tolerance for Dirichlet detection"); + validParamList->set("Variable DOF amalgamation: threshold", 1.8e-9, + "Drop tolerance for amalgamation process"); + validParamList->set("maxDofPerNode", 1, + "Maximum number of DOFs per node"); + + validParamList->set>( + "A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>( + "Coordinates", Teuchos::null, "Generating factory for Coordinates"); + + return validParamList; +} + +template +VariableDofLaplacianFactory::VariableDofLaplacianFactory() {} + +template +void VariableDofLaplacianFactory::DeclareInput(Level ¤tLevel) + const { + Input(currentLevel, "A"); + Input(currentLevel, "Coordinates"); + + // if (currentLevel.GetLevelID() == 0) // TODO check for finest level (special + // treatment) + if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { + currentLevel.DeclareInput("DofPresent", NoFactory::get(), this); } - - template - VariableDofLaplacianFactory::VariableDofLaplacianFactory() { } - - template - void VariableDofLaplacianFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "Coordinates"); - - //if (currentLevel.GetLevelID() == 0) // TODO check for finest level (special treatment) - if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { - currentLevel.DeclareInput("DofPresent", NoFactory::get(), this); - } +} + +template +void VariableDofLaplacianFactory::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + typedef Teuchos::ScalarTraits STS; + + const ParameterList &pL = GetParameterList(); + + RCP A = Get>(currentLevel, "A"); + + Teuchos::RCP> comm = A->getRowMap()->getComm(); + Xpetra::UnderlyingLib lib = A->getRowMap()->lib(); + + typedef Xpetra::MultiVector< + typename Teuchos::ScalarTraits::magnitudeType, LO, GO, NO> + dxMV; + RCP Coords = Get::magnitudeType, LO, GO, NO>>>( + currentLevel, "Coordinates"); + + int maxDofPerNode = pL.get("maxDofPerNode"); + Scalar dirDropTol = Teuchos::as(pL.get( + "Advanced Dirichlet: threshold")); // "ML advnaced Dirichlet: threshold" + Scalar amalgDropTol = Teuchos::as(pL.get( + "Variable DOF amalgamation: threshold")); //"variable DOF amalgamation: + // threshold") + + bool bHasZeroDiagonal = false; + Teuchos::ArrayRCP dirOrNot = + MueLu::Utilities::DetectDirichletRowsExt(*A, bHasZeroDiagonal, + STS::magnitude( + dirDropTol)); + + // check availability of DofPresent array + Teuchos::ArrayRCP dofPresent; + if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { + dofPresent = currentLevel.Get>( + "DofPresent", NoFactory::get()); + } else { + // TAW: not sure about size of array. We cannot determine the expected size + // in the non-padded case correctly... + dofPresent = Teuchos::ArrayRCP( + A->getRowMap()->getLocalNumElements(), 1); } - template - void VariableDofLaplacianFactory::Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - typedef Teuchos::ScalarTraits STS; - - const ParameterList & pL = GetParameterList(); - - RCP A = Get< RCP >(currentLevel, "A"); - - Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm(); - Xpetra::UnderlyingLib lib = A->getRowMap()->lib(); - - typedef Xpetra::MultiVector::magnitudeType,LO,GO,NO> dxMV; - RCP Coords = Get< RCP::magnitudeType,LO,GO,NO> > >(currentLevel, "Coordinates"); - - int maxDofPerNode = pL.get("maxDofPerNode"); - Scalar dirDropTol = Teuchos::as(pL.get("Advanced Dirichlet: threshold")); // "ML advnaced Dirichlet: threshold" - Scalar amalgDropTol = Teuchos::as(pL.get("Variable DOF amalgamation: threshold")); //"variable DOF amalgamation: threshold") - - bool bHasZeroDiagonal = false; - Teuchos::ArrayRCP dirOrNot = MueLu::Utilities::DetectDirichletRowsExt(*A,bHasZeroDiagonal,STS::magnitude(dirDropTol)); - - // check availability of DofPresent array - Teuchos::ArrayRCP dofPresent; - if (currentLevel.IsAvailable("DofPresent", NoFactory::get())) { - dofPresent = currentLevel.Get< Teuchos::ArrayRCP >("DofPresent", NoFactory::get()); - } else { - // TAW: not sure about size of array. We cannot determine the expected size in the non-padded case correctly... - dofPresent = Teuchos::ArrayRCP(A->getRowMap()->getLocalNumElements(),1); - } - - // map[k] indicates that the kth dof in the variable dof matrix A would - // correspond to the map[k]th dof in the padded system. If, i.e., it is - // map[35] = 39 then dof no 35 in the variable dof matrix A corresponds to - // row map id 39 in an imaginary padded matrix Apadded. - // The padded system is never built but would be the associated matrix if - // every node had maxDofPerNode dofs. - std::vector map(A->getLocalNumRows()); - this->buildPaddedMap(dofPresent, map, A->getLocalNumRows()); - - // map of size of number of DOFs containing local node id (dof id -> node id, inclusive ghosted dofs/nodes) - std::vector myLocalNodeIds(A->getColMap()->getLocalNumElements()); // possible maximum (we need the ghost nodes, too) - - // assign the local node ids for the ghosted nodes - size_t nLocalNodes, nLocalPlusGhostNodes; - this->assignGhostLocalNodeIds(A->getRowMap(), A->getColMap(), myLocalNodeIds, map, maxDofPerNode, nLocalNodes, nLocalPlusGhostNodes, comm); - - //RCP fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)," ",0,false,10,false, true); - - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as(dofPresent.size()) != Teuchos::as(nLocalNodes * maxDofPerNode),MueLu::Exceptions::RuntimeError,"VariableDofLaplacianFactory: size of provided DofPresent array is " << dofPresent.size() << " but should be " << nLocalNodes * maxDofPerNode << " on the current processor."); - - // put content of assignGhostLocalNodeIds here... - - // fill nodal maps - - Teuchos::ArrayView< const GlobalOrdinal > myGids = A->getColMap()->getLocalElementList(); - - // vector containing row/col gids of amalgamated matrix (with holes) - - size_t nLocalDofs = A->getRowMap()->getLocalNumElements(); - size_t nLocalPlusGhostDofs = A->getColMap()->getLocalNumElements(); - - // myLocalNodeIds (dof -> node) - - Teuchos::Array amalgRowMapGIDs(nLocalNodes); - Teuchos::Array amalgColMapGIDs(nLocalPlusGhostNodes); + // map[k] indicates that the kth dof in the variable dof matrix A would + // correspond to the map[k]th dof in the padded system. If, i.e., it is + // map[35] = 39 then dof no 35 in the variable dof matrix A corresponds to + // row map id 39 in an imaginary padded matrix Apadded. + // The padded system is never built but would be the associated matrix if + // every node had maxDofPerNode dofs. + std::vector map(A->getLocalNumRows()); + this->buildPaddedMap(dofPresent, map, A->getLocalNumRows()); + + // map of size of number of DOFs containing local node id (dof id -> node id, + // inclusive ghosted dofs/nodes) + std::vector myLocalNodeIds( + A->getColMap()->getLocalNumElements()); // possible maximum (we need the + // ghost nodes, too) + + // assign the local node ids for the ghosted nodes + size_t nLocalNodes, nLocalPlusGhostNodes; + this->assignGhostLocalNodeIds(A->getRowMap(), A->getColMap(), myLocalNodeIds, + map, maxDofPerNode, nLocalNodes, + nLocalPlusGhostNodes, comm); + + // RCP fancy = + // Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)," ",0,false,10,false, + // true); + + TEUCHOS_TEST_FOR_EXCEPTION( + Teuchos::as(dofPresent.size()) != + Teuchos::as(nLocalNodes * maxDofPerNode), + MueLu::Exceptions::RuntimeError, + "VariableDofLaplacianFactory: size of provided DofPresent array is " + << dofPresent.size() << " but should be " + << nLocalNodes * maxDofPerNode << " on the current processor."); + + // put content of assignGhostLocalNodeIds here... + + // fill nodal maps + + Teuchos::ArrayView myGids = + A->getColMap()->getLocalElementList(); + + // vector containing row/col gids of amalgamated matrix (with holes) + + size_t nLocalDofs = A->getRowMap()->getLocalNumElements(); + size_t nLocalPlusGhostDofs = A->getColMap()->getLocalNumElements(); + + // myLocalNodeIds (dof -> node) + + Teuchos::Array amalgRowMapGIDs(nLocalNodes); + Teuchos::Array amalgColMapGIDs(nLocalPlusGhostNodes); + + // initialize + size_t count = 0; + if (nLocalDofs > 0) { + amalgRowMapGIDs[count] = myGids[0]; + amalgColMapGIDs[count] = myGids[0]; + count++; + } - // initialize - size_t count = 0; - if (nLocalDofs > 0) { - amalgRowMapGIDs[count] = myGids[0]; - amalgColMapGIDs[count] = myGids[0]; + for (size_t i = 1; i < nLocalDofs; i++) { + if (myLocalNodeIds[i] != myLocalNodeIds[i - 1]) { + amalgRowMapGIDs[count] = myGids[i]; + amalgColMapGIDs[count] = myGids[i]; count++; } + } - for(size_t i = 1; i < nLocalDofs; i++) { - if (myLocalNodeIds[i] != myLocalNodeIds[i-1]) { - amalgRowMapGIDs[count] = myGids[i]; - amalgColMapGIDs[count] = myGids[i]; - count++; - } - } - - RCP tempAmalgColVec = GOVectorFactory::Build(A->getDomainMap()); - { - Teuchos::ArrayRCP tempAmalgColVecData = tempAmalgColVec->getDataNonConst(0); - for (size_t i = 0; i < A->getDomainMap()->getLocalNumElements(); i++) - tempAmalgColVecData[i] = amalgColMapGIDs[ myLocalNodeIds[i]]; - } + RCP tempAmalgColVec = GOVectorFactory::Build(A->getDomainMap()); + { + Teuchos::ArrayRCP tempAmalgColVecData = + tempAmalgColVec->getDataNonConst(0); + for (size_t i = 0; i < A->getDomainMap()->getLocalNumElements(); i++) + tempAmalgColVecData[i] = amalgColMapGIDs[myLocalNodeIds[i]]; + } - RCP tempAmalgColVecTarget = GOVectorFactory::Build(A->getColMap()); - Teuchos::RCP dofImporter = ImportFactory::Build(A->getDomainMap(), A->getColMap()); - tempAmalgColVecTarget->doImport(*tempAmalgColVec, *dofImporter, Xpetra::INSERT); + RCP tempAmalgColVecTarget = GOVectorFactory::Build(A->getColMap()); + Teuchos::RCP dofImporter = + ImportFactory::Build(A->getDomainMap(), A->getColMap()); + tempAmalgColVecTarget->doImport(*tempAmalgColVec, *dofImporter, + Xpetra::INSERT); + + { + Teuchos::ArrayRCP tempAmalgColVecBData = + tempAmalgColVecTarget->getData(0); + // copy from dof vector to nodal vector + for (size_t i = 0; i < myLocalNodeIds.size(); i++) + amalgColMapGIDs[myLocalNodeIds[i]] = tempAmalgColVecBData[i]; + } - { - Teuchos::ArrayRCP tempAmalgColVecBData = tempAmalgColVecTarget->getData(0); - // copy from dof vector to nodal vector - for (size_t i = 0; i < myLocalNodeIds.size(); i++) - amalgColMapGIDs[ myLocalNodeIds[i]] = tempAmalgColVecBData[i]; + Teuchos::RCP amalgRowMap = + MapFactory::Build(lib, Teuchos::OrdinalTraits::invalid(), + amalgRowMapGIDs(), // View, + A->getRowMap()->getIndexBase(), comm); + + Teuchos::RCP amalgColMap = + MapFactory::Build(lib, Teuchos::OrdinalTraits::invalid(), + amalgColMapGIDs(), // View, + A->getRangeMap()->getIndexBase(), comm); + + // end fill nodal maps + + // start variable dof amalgamation + + Teuchos::RCP Awrap = + Teuchos::rcp_dynamic_cast(A); + Teuchos::RCP Acrs = Awrap->getCrsMatrix(); + // Acrs->describe(*fancy, Teuchos::VERB_EXTREME); + + size_t nNonZeros = 0; + std::vector isNonZero(nLocalPlusGhostDofs, false); + std::vector nonZeroList(nLocalPlusGhostDofs); // ??? + + // also used in DetectDirichletExt + Teuchos::RCP diagVecUnique = VectorFactory::Build(A->getRowMap()); + Teuchos::RCP diagVec = VectorFactory::Build(A->getColMap()); + A->getLocalDiagCopy(*diagVecUnique); + diagVec->doImport(*diagVecUnique, *dofImporter, Xpetra::INSERT); + Teuchos::ArrayRCP diagVecData = diagVec->getData(0); + + Teuchos::ArrayRCP rowptr(Acrs->getLocalNumRows()); + Teuchos::ArrayRCP colind(Acrs->getLocalNumEntries()); + Teuchos::ArrayRCP values(Acrs->getLocalNumEntries()); + Acrs->getAllValues(rowptr, colind, values); + + // create arrays for amalgamated matrix + Teuchos::ArrayRCP amalgRowPtr(nLocalNodes + 1); + Teuchos::ArrayRCP amalgCols(rowptr[rowptr.size() - 1]); + + LocalOrdinal oldBlockRow = 0; + LocalOrdinal blockRow = 0; + LocalOrdinal blockColumn = 0; + + size_t newNzs = 0; + amalgRowPtr[0] = newNzs; + + bool doNotDrop = false; + if (amalgDropTol == Teuchos::ScalarTraits::zero()) + doNotDrop = true; + if (values.size() == 0) + doNotDrop = true; + + for (decltype(rowptr.size()) i = 0; i < rowptr.size() - 1; i++) { + blockRow = std::floor(map[i] / maxDofPerNode); + if (blockRow != oldBlockRow) { + // zero out info recording nonzeros in oldBlockRow + for (size_t j = 0; j < nNonZeros; j++) + isNonZero[nonZeroList[j]] = false; + nNonZeros = 0; + amalgRowPtr[blockRow] = newNzs; // record start of next row } - - Teuchos::RCP amalgRowMap = MapFactory::Build(lib, - Teuchos::OrdinalTraits::invalid(), - amalgRowMapGIDs(), //View, - A->getRowMap()->getIndexBase(), - comm); - - Teuchos::RCP amalgColMap = MapFactory::Build(lib, - Teuchos::OrdinalTraits::invalid(), - amalgColMapGIDs(), //View, - A->getRangeMap()->getIndexBase(), - comm); - - // end fill nodal maps - - - // start variable dof amalgamation - - Teuchos::RCP Awrap = Teuchos::rcp_dynamic_cast(A); - Teuchos::RCP Acrs = Awrap->getCrsMatrix(); - //Acrs->describe(*fancy, Teuchos::VERB_EXTREME); - - size_t nNonZeros = 0; - std::vector isNonZero(nLocalPlusGhostDofs,false); - std::vector nonZeroList(nLocalPlusGhostDofs); // ??? - - // also used in DetectDirichletExt - Teuchos::RCP diagVecUnique = VectorFactory::Build(A->getRowMap()); - Teuchos::RCP diagVec = VectorFactory::Build(A->getColMap()); - A->getLocalDiagCopy(*diagVecUnique); - diagVec->doImport(*diagVecUnique, *dofImporter, Xpetra::INSERT); - Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0); - - Teuchos::ArrayRCP rowptr(Acrs->getLocalNumRows()); - Teuchos::ArrayRCP colind(Acrs->getLocalNumEntries()); - Teuchos::ArrayRCP values(Acrs->getLocalNumEntries()); - Acrs->getAllValues(rowptr, colind, values); - - - // create arrays for amalgamated matrix - Teuchos::ArrayRCP amalgRowPtr(nLocalNodes+1); - Teuchos::ArrayRCP amalgCols(rowptr[rowptr.size()-1]); - - LocalOrdinal oldBlockRow = 0; - LocalOrdinal blockRow = 0; - LocalOrdinal blockColumn = 0; - - size_t newNzs = 0; - amalgRowPtr[0] = newNzs; - - bool doNotDrop = false; - if (amalgDropTol == Teuchos::ScalarTraits::zero()) doNotDrop = true; - if (values.size() == 0) doNotDrop = true; - - for(decltype(rowptr.size()) i = 0; i < rowptr.size()-1; i++) { - blockRow = std::floor( map[i] / maxDofPerNode); - if (blockRow != oldBlockRow) { - // zero out info recording nonzeros in oldBlockRow - for(size_t j = 0; j < nNonZeros; j++) isNonZero[nonZeroList[j]] = false; - nNonZeros = 0; - amalgRowPtr[blockRow] = newNzs; // record start of next row - } - for (size_t j = rowptr[i]; j < rowptr[i+1]; j++) { - if(doNotDrop == true || - ( STS::magnitude(values[j] / STS::magnitude(sqrt(STS::magnitude(diagVecData[i]) * STS::magnitude(diagVecData[colind[j]]))) ) >= STS::magnitude(amalgDropTol) )) { - blockColumn = myLocalNodeIds[colind[j]]; - if(isNonZero[blockColumn] == false) { - isNonZero[blockColumn] = true; - nonZeroList[nNonZeros++] = blockColumn; - amalgCols[newNzs++] = blockColumn; - } + for (size_t j = rowptr[i]; j < rowptr[i + 1]; j++) { + if (doNotDrop == true || + (STS::magnitude( + values[j] / + STS::magnitude(sqrt(STS::magnitude(diagVecData[i]) * + STS::magnitude(diagVecData[colind[j]])))) >= + STS::magnitude(amalgDropTol))) { + blockColumn = myLocalNodeIds[colind[j]]; + if (isNonZero[blockColumn] == false) { + isNonZero[blockColumn] = true; + nonZeroList[nNonZeros++] = blockColumn; + amalgCols[newNzs++] = blockColumn; } } - oldBlockRow = blockRow; } - amalgRowPtr[blockRow+1] = newNzs; - - TEUCHOS_TEST_FOR_EXCEPTION((blockRow+1 != Teuchos::as(nLocalNodes)) && (nLocalNodes !=0), MueLu::Exceptions::RuntimeError, "VariableDofsPerNodeAmalgamation: error, computed # block rows (" << blockRow+1 <<") != nLocalNodes (" << nLocalNodes <<")"); - - amalgCols.resize(amalgRowPtr[nLocalNodes]); - - // end variableDofAmalg - - // begin rm differentDofsCrossings - - // Remove matrix entries (i,j) where the ith node and the jth node have - // different dofs that are 'present' - // Specifically, on input: - // dofPresent[i*maxDofPerNode+k] indicates whether or not the kth - // dof at the ith node is present in the - // variable dof matrix (e.g., the ith node - // has an air pressure dof). true means - // the dof is present while false means it - // is not. - // We create a unique id for the ith node (i.e. uniqueId[i]) via - // sum_{k=0 to maxDofPerNode-1} dofPresent[i*maxDofPerNode+k]*2^k - // and use this unique idea to remove entries (i,j) when uniqueId[i]!=uniqueId[j] - - Teuchos::ArrayRCP uniqueId(nLocalPlusGhostNodes); // unique id associated with DOF - std::vector keep(amalgRowPtr[amalgRowPtr.size()-1],true); // keep connection associated with node - - size_t ii = 0; // iteration index for present dofs - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - LocalOrdinal temp = 1; // basis for dof-id - uniqueId[i] = 0; - for (decltype(maxDofPerNode) j = 0; j < maxDofPerNode; j++) { - if (dofPresent[ii++]) uniqueId[i] += temp; // encode dof to be present - temp = temp * 2; // check next dof - } + oldBlockRow = blockRow; + } + amalgRowPtr[blockRow + 1] = newNzs; + + TEUCHOS_TEST_FOR_EXCEPTION( + (blockRow + 1 != Teuchos::as(nLocalNodes)) && (nLocalNodes != 0), + MueLu::Exceptions::RuntimeError, + "VariableDofsPerNodeAmalgamation: error, computed # block rows (" + << blockRow + 1 << ") != nLocalNodes (" << nLocalNodes << ")"); + + amalgCols.resize(amalgRowPtr[nLocalNodes]); + + // end variableDofAmalg + + // begin rm differentDofsCrossings + + // Remove matrix entries (i,j) where the ith node and the jth node have + // different dofs that are 'present' + // Specifically, on input: + // dofPresent[i*maxDofPerNode+k] indicates whether or not the kth + // dof at the ith node is present in the + // variable dof matrix (e.g., the ith node + // has an air pressure dof). true means + // the dof is present while false means it + // is not. + // We create a unique id for the ith node (i.e. uniqueId[i]) via + // sum_{k=0 to maxDofPerNode-1} dofPresent[i*maxDofPerNode+k]*2^k + // and use this unique idea to remove entries (i,j) when + // uniqueId[i]!=uniqueId[j] + + Teuchos::ArrayRCP uniqueId( + nLocalPlusGhostNodes); // unique id associated with DOF + std::vector keep(amalgRowPtr[amalgRowPtr.size() - 1], + true); // keep connection associated with node + + size_t ii = 0; // iteration index for present dofs + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + LocalOrdinal temp = 1; // basis for dof-id + uniqueId[i] = 0; + for (decltype(maxDofPerNode) j = 0; j < maxDofPerNode; j++) { + if (dofPresent[ii++]) + uniqueId[i] += temp; // encode dof to be present + temp = temp * 2; // check next dof } + } - Teuchos::RCP nodeImporter = ImportFactory::Build(amalgRowMap, amalgColMap); + Teuchos::RCP nodeImporter = + ImportFactory::Build(amalgRowMap, amalgColMap); - RCP nodeIdSrc = Xpetra::VectorFactory::Build(amalgRowMap,true); - RCP nodeIdTarget = Xpetra::VectorFactory::Build(amalgColMap,true); + RCP nodeIdSrc = + Xpetra::VectorFactory::Build(amalgRowMap, true); + RCP nodeIdTarget = + Xpetra::VectorFactory::Build(amalgColMap, true); - Teuchos::ArrayRCP< LocalOrdinal > nodeIdSrcData = nodeIdSrc->getDataNonConst(0); - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - nodeIdSrcData[i] = uniqueId[i]; - } + Teuchos::ArrayRCP nodeIdSrcData = nodeIdSrc->getDataNonConst(0); + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + nodeIdSrcData[i] = uniqueId[i]; + } - nodeIdTarget->doImport(*nodeIdSrc, *nodeImporter, Xpetra::INSERT); + nodeIdTarget->doImport(*nodeIdSrc, *nodeImporter, Xpetra::INSERT); - Teuchos::ArrayRCP< const LocalOrdinal > nodeIdTargetData = nodeIdTarget->getData(0); - for(decltype(uniqueId.size()) i = 0; i < uniqueId.size(); i++) { - uniqueId[i] = nodeIdTargetData[i]; - } + Teuchos::ArrayRCP nodeIdTargetData = + nodeIdTarget->getData(0); + for (decltype(uniqueId.size()) i = 0; i < uniqueId.size(); i++) { + uniqueId[i] = nodeIdTargetData[i]; + } - // nodal comm uniqueId, myLocalNodeIds + // nodal comm uniqueId, myLocalNodeIds - // uniqueId now should contain ghosted data + // uniqueId now should contain ghosted data - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - for(size_t j = amalgRowPtr[i]; j < amalgRowPtr[i+1]; j++) { - if (uniqueId[i] != uniqueId[amalgCols[j]]) keep [j] = false; - } + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + for (size_t j = amalgRowPtr[i]; j < amalgRowPtr[i + 1]; j++) { + if (uniqueId[i] != uniqueId[amalgCols[j]]) + keep[j] = false; } + } - // squeeze out hard-coded zeros from CSR arrays - Teuchos::ArrayRCP amalgVals; - this->squeezeOutNnzs(amalgRowPtr,amalgCols,amalgVals,keep); - - typedef Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO> dxMVf; - RCP ghostedCoords = dxMVf::Build(amalgColMap,Coords->getNumVectors()); - - TEUCHOS_TEST_FOR_EXCEPTION(amalgRowMap->getLocalNumElements() != Coords->getMap()->getLocalNumElements(), MueLu::Exceptions::RuntimeError, "MueLu::VariableDofLaplacianFactory: the number of Coordinates and amalgamated nodes is inconsistent."); - - // Coords might live on a special nodeMap with consecutive ids (the natural numbering) - // The amalgRowMap might have the same number of entries, but with holes in the ids. - // e.g. 0,3,6,9,... as GIDs. - // We need the ghosted Coordinates in the buildLaplacian routine. But we access the data - // through getData only, i.e., the global ids are not interesting as long as we do not change - // the ordering of the entries - Coords->replaceMap(amalgRowMap); - ghostedCoords->doImport(*Coords, *nodeImporter, Xpetra::INSERT); - - Teuchos::ArrayRCP lapVals(amalgRowPtr[nLocalNodes]); - this->buildLaplacian(amalgRowPtr, amalgCols, lapVals, Coords->getNumVectors(), ghostedCoords); - - // sort column GIDs - for(decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size()-1; i++) { - size_t j = amalgRowPtr[i]; - this->MueLu_az_sort(&(amalgCols[j]), amalgRowPtr[i+1] - j, NULL, &(lapVals[j])); - } + // squeeze out hard-coded zeros from CSR arrays + Teuchos::ArrayRCP amalgVals; + this->squeezeOutNnzs(amalgRowPtr, amalgCols, amalgVals, keep); + + typedef Xpetra::MultiVectorFactory< + typename Teuchos::ScalarTraits::magnitudeType, LO, GO, NO> + dxMVf; + RCP ghostedCoords = dxMVf::Build(amalgColMap, Coords->getNumVectors()); + + TEUCHOS_TEST_FOR_EXCEPTION( + amalgRowMap->getLocalNumElements() != + Coords->getMap()->getLocalNumElements(), + MueLu::Exceptions::RuntimeError, + "MueLu::VariableDofLaplacianFactory: the number of Coordinates and " + "amalgamated nodes is inconsistent."); + + // Coords might live on a special nodeMap with consecutive ids (the natural + // numbering) The amalgRowMap might have the same number of entries, but with + // holes in the ids. e.g. 0,3,6,9,... as GIDs. We need the ghosted Coordinates + // in the buildLaplacian routine. But we access the data through getData only, + // i.e., the global ids are not interesting as long as we do not change the + // ordering of the entries + Coords->replaceMap(amalgRowMap); + ghostedCoords->doImport(*Coords, *nodeImporter, Xpetra::INSERT); + + Teuchos::ArrayRCP lapVals(amalgRowPtr[nLocalNodes]); + this->buildLaplacian(amalgRowPtr, amalgCols, lapVals, Coords->getNumVectors(), + ghostedCoords); + + // sort column GIDs + for (decltype(amalgRowPtr.size()) i = 0; i < amalgRowPtr.size() - 1; i++) { + size_t j = amalgRowPtr[i]; + this->MueLu_az_sort(&(amalgCols[j]), amalgRowPtr[i + 1] - j, + NULL, &(lapVals[j])); + } - // Caluclate status array for next level - Teuchos::Array status(nLocalNodes * maxDofPerNode); + // Caluclate status array for next level + Teuchos::Array status(nLocalNodes * maxDofPerNode); - // dir or not Teuchos::ArrayRCP dirOrNot - for(decltype(status.size()) i = 0; i < status.size(); i++) status[i] = 's'; - for(decltype(status.size()) i = 0; i < status.size(); i++) { - if(dofPresent[i] == false) status[i] = 'p'; - } - if(dirOrNot.size() > 0) { - for(decltype(map.size()) i = 0; i < map.size(); i++) { - if(dirOrNot[i] == true){ - status[map[i]] = 'd'; - } + // dir or not Teuchos::ArrayRCP dirOrNot + for (decltype(status.size()) i = 0; i < status.size(); i++) + status[i] = 's'; + for (decltype(status.size()) i = 0; i < status.size(); i++) { + if (dofPresent[i] == false) + status[i] = 'p'; + } + if (dirOrNot.size() > 0) { + for (decltype(map.size()) i = 0; i < map.size(); i++) { + if (dirOrNot[i] == true) { + status[map[i]] = 'd'; } } - Set(currentLevel,"DofStatus",status); - - // end status array - - Teuchos::RCP lapCrsMat = CrsMatrixFactory::Build(amalgRowMap, amalgColMap, 10); // TODO better approx for max nnz per row + } + Set(currentLevel, "DofStatus", status); - for (size_t i = 0; i < nLocalNodes; i++) { - lapCrsMat->insertLocalValues(i, amalgCols.view(amalgRowPtr[i],amalgRowPtr[i+1]-amalgRowPtr[i]), - lapVals.view(amalgRowPtr[i],amalgRowPtr[i+1]-amalgRowPtr[i])); - } - lapCrsMat->fillComplete(amalgRowMap,amalgRowMap); + // end status array - //lapCrsMat->describe(*fancy, Teuchos::VERB_EXTREME); + Teuchos::RCP lapCrsMat = CrsMatrixFactory::Build( + amalgRowMap, amalgColMap, 10); // TODO better approx for max nnz per row - Teuchos::RCP lapMat = Teuchos::rcp(new CrsMatrixWrap(lapCrsMat)); - Set(currentLevel,"A",lapMat); + for (size_t i = 0; i < nLocalNodes; i++) { + lapCrsMat->insertLocalValues( + i, amalgCols.view(amalgRowPtr[i], amalgRowPtr[i + 1] - amalgRowPtr[i]), + lapVals.view(amalgRowPtr[i], amalgRowPtr[i + 1] - amalgRowPtr[i])); } - - template - void VariableDofLaplacianFactory::buildLaplacian(const Teuchos::ArrayRCP& rowPtr, const Teuchos::ArrayRCP& cols, Teuchos::ArrayRCP& vals,const size_t& numdim, const RCP::magnitudeType,LocalOrdinal,GlobalOrdinal,Node> > & ghostedCoords) const { - TEUCHOS_TEST_FOR_EXCEPTION(numdim != 2 && numdim !=3, MueLu::Exceptions::RuntimeError,"buildLaplacian only works for 2d or 3d examples. numdim = " << numdim); - - if(numdim == 2) { // 2d - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > x = ghostedCoords->getData(0); - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > y = ghostedCoords->getData(1); - - for(decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { - Scalar sum = Teuchos::ScalarTraits::zero(); - LocalOrdinal diag = -1; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if(cols[j] != Teuchos::as(i)){ - vals[j] = std::sqrt( (x[i]-x[cols[j]]) * (x[i]-x[cols[j]]) + - (y[i]-y[cols[j]]) * (y[i]-y[cols[j]]) ); - TEUCHOS_TEST_FOR_EXCEPTION(vals[j] == Teuchos::ScalarTraits::zero(), MueLu::Exceptions::RuntimeError, "buildLaplacian: error, " << i << " and " << cols[j] << " have same coordinates: " << x[i] << " and " << y[i]); - vals[j] = -Teuchos::ScalarTraits::one()/vals[j]; - sum = sum - vals[j]; - } - else diag = j; - } - if(sum == Teuchos::ScalarTraits::zero()) sum = Teuchos::ScalarTraits::one(); - TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, "buildLaplacian: error, row " << i << " has zero diagonal!"); - - vals[diag] = sum; + lapCrsMat->fillComplete(amalgRowMap, amalgRowMap); + + // lapCrsMat->describe(*fancy, Teuchos::VERB_EXTREME); + + Teuchos::RCP lapMat = Teuchos::rcp(new CrsMatrixWrap(lapCrsMat)); + Set(currentLevel, "A", lapMat); +} + +template +void VariableDofLaplacianFactory:: + buildLaplacian( + const Teuchos::ArrayRCP &rowPtr, + const Teuchos::ArrayRCP &cols, + Teuchos::ArrayRCP &vals, const size_t &numdim, + const RCP::magnitudeType, LocalOrdinal, + GlobalOrdinal, Node>> &ghostedCoords) const { + TEUCHOS_TEST_FOR_EXCEPTION( + numdim != 2 && numdim != 3, MueLu::Exceptions::RuntimeError, + "buildLaplacian only works for 2d or 3d examples. numdim = " << numdim); + + if (numdim == 2) { // 2d + Teuchos::ArrayRCP< + const typename Teuchos::ScalarTraits::magnitudeType> + x = ghostedCoords->getData(0); + Teuchos::ArrayRCP< + const typename Teuchos::ScalarTraits::magnitudeType> + y = ghostedCoords->getData(1); + + for (decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { + Scalar sum = Teuchos::ScalarTraits::zero(); + LocalOrdinal diag = -1; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (cols[j] != Teuchos::as(i)) { + vals[j] = std::sqrt((x[i] - x[cols[j]]) * (x[i] - x[cols[j]]) + + (y[i] - y[cols[j]]) * (y[i] - y[cols[j]])); + TEUCHOS_TEST_FOR_EXCEPTION( + vals[j] == Teuchos::ScalarTraits::zero(), + MueLu::Exceptions::RuntimeError, + "buildLaplacian: error, " << i << " and " << cols[j] + << " have same coordinates: " << x[i] + << " and " << y[i]); + vals[j] = -Teuchos::ScalarTraits::one() / vals[j]; + sum = sum - vals[j]; + } else + diag = j; } - } else { // 3d - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > x = ghostedCoords->getData(0); - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > y = ghostedCoords->getData(1); - Teuchos::ArrayRCP< const typename Teuchos::ScalarTraits::magnitudeType > z = ghostedCoords->getData(2); - - for(decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { - Scalar sum = Teuchos::ScalarTraits::zero(); - LocalOrdinal diag = -1; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if(cols[j] != Teuchos::as(i)){ - vals[j] = std::sqrt( (x[i]-x[cols[j]]) * (x[i]-x[cols[j]]) + - (y[i]-y[cols[j]]) * (y[i]-y[cols[j]]) + - (z[i]-z[cols[j]]) * (z[i]-z[cols[j]]) ); - - TEUCHOS_TEST_FOR_EXCEPTION(vals[j] == Teuchos::ScalarTraits::zero(), MueLu::Exceptions::RuntimeError, "buildLaplacian: error, " << i << " and " << cols[j] << " have same coordinates: " << x[i] << " and " << y[i] << " and " << z[i]); - - vals[j] = -Teuchos::ScalarTraits::one()/vals[j]; - sum = sum - vals[j]; - } - else diag = j; - } - if(sum == Teuchos::ScalarTraits::zero()) sum = Teuchos::ScalarTraits::one(); - TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, "buildLaplacian: error, row " << i << " has zero diagonal!"); + if (sum == Teuchos::ScalarTraits::zero()) + sum = Teuchos::ScalarTraits::one(); + TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, + "buildLaplacian: error, row " + << i << " has zero diagonal!"); - vals[diag] = sum; + vals[diag] = sum; + } + } else { // 3d + Teuchos::ArrayRCP< + const typename Teuchos::ScalarTraits::magnitudeType> + x = ghostedCoords->getData(0); + Teuchos::ArrayRCP< + const typename Teuchos::ScalarTraits::magnitudeType> + y = ghostedCoords->getData(1); + Teuchos::ArrayRCP< + const typename Teuchos::ScalarTraits::magnitudeType> + z = ghostedCoords->getData(2); + + for (decltype(rowPtr.size()) i = 0; i < rowPtr.size() - 1; i++) { + Scalar sum = Teuchos::ScalarTraits::zero(); + LocalOrdinal diag = -1; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (cols[j] != Teuchos::as(i)) { + vals[j] = std::sqrt((x[i] - x[cols[j]]) * (x[i] - x[cols[j]]) + + (y[i] - y[cols[j]]) * (y[i] - y[cols[j]]) + + (z[i] - z[cols[j]]) * (z[i] - z[cols[j]])); + + TEUCHOS_TEST_FOR_EXCEPTION( + vals[j] == Teuchos::ScalarTraits::zero(), + MueLu::Exceptions::RuntimeError, + "buildLaplacian: error, " << i << " and " << cols[j] + << " have same coordinates: " << x[i] + << " and " << y[i] << " and " << z[i]); + + vals[j] = -Teuchos::ScalarTraits::one() / vals[j]; + sum = sum - vals[j]; + } else + diag = j; } + if (sum == Teuchos::ScalarTraits::zero()) + sum = Teuchos::ScalarTraits::one(); + TEUCHOS_TEST_FOR_EXCEPTION(diag == -1, MueLu::Exceptions::RuntimeError, + "buildLaplacian: error, row " + << i << " has zero diagonal!"); + + vals[diag] = sum; } } - - template - void VariableDofLaplacianFactory::squeezeOutNnzs(Teuchos::ArrayRCP & rowPtr, Teuchos::ArrayRCP & cols, Teuchos::ArrayRCP & vals, const std::vector& keep) const { - // get rid of nonzero entries that have 0's in them and properly change - // the row ptr array to reflect this removal (either vals == NULL or vals != NULL) - // Note, the arrays are squeezed. No memory is freed. - - size_t count = 0; - - size_t nRows = rowPtr.size()-1; - if(vals.size() > 0) { - for(size_t i = 0; i < nRows; i++) { - size_t newStart = count; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if(vals[j] != Teuchos::ScalarTraits::zero()) { - cols[count ] = cols[j]; - vals[count++] = vals[j]; - } +} + +template +void VariableDofLaplacianFactory:: + squeezeOutNnzs(Teuchos::ArrayRCP &rowPtr, + Teuchos::ArrayRCP &cols, + Teuchos::ArrayRCP &vals, + const std::vector &keep) const { + // get rid of nonzero entries that have 0's in them and properly change + // the row ptr array to reflect this removal (either vals == NULL or vals != + // NULL) Note, the arrays are squeezed. No memory is freed. + + size_t count = 0; + + size_t nRows = rowPtr.size() - 1; + if (vals.size() > 0) { + for (size_t i = 0; i < nRows; i++) { + size_t newStart = count; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (vals[j] != Teuchos::ScalarTraits::zero()) { + cols[count] = cols[j]; + vals[count++] = vals[j]; } - rowPtr[i] = newStart; } - } else { - for (size_t i = 0; i < nRows; i++) { - size_t newStart = count; - for(size_t j = rowPtr[i]; j < rowPtr[i+1]; j++) { - if (keep[j] == true) { - cols[count++] = cols[j]; - } + rowPtr[i] = newStart; + } + } else { + for (size_t i = 0; i < nRows; i++) { + size_t newStart = count; + for (size_t j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (keep[j] == true) { + cols[count++] = cols[j]; } - rowPtr[i] = newStart; } + rowPtr[i] = newStart; } - rowPtr[nRows] = count; } - - template - void VariableDofLaplacianFactory::buildPaddedMap(const Teuchos::ArrayRCP & dofPresent, std::vector & map, size_t nDofs) const { - size_t count = 0; - for (decltype(dofPresent.size()) i = 0; i < dofPresent.size(); i++) - if(dofPresent[i] == 1) map[count++] = Teuchos::as(i); - TEUCHOS_TEST_FOR_EXCEPTION(nDofs != count, MueLu::Exceptions::RuntimeError, "VariableDofLaplacianFactory::buildPaddedMap: #dofs in dofPresent does not match the expected value (number of rows of A): " << nDofs << " vs. " << count); + rowPtr[nRows] = count; +} + +template +void VariableDofLaplacianFactory:: + buildPaddedMap(const Teuchos::ArrayRCP &dofPresent, + std::vector &map, size_t nDofs) const { + size_t count = 0; + for (decltype(dofPresent.size()) i = 0; i < dofPresent.size(); i++) + if (dofPresent[i] == 1) + map[count++] = Teuchos::as(i); + TEUCHOS_TEST_FOR_EXCEPTION( + nDofs != count, MueLu::Exceptions::RuntimeError, + "VariableDofLaplacianFactory::buildPaddedMap: #dofs in dofPresent does " + "not match the expected value (number of rows of A): " + << nDofs << " vs. " << count); +} + +template +void VariableDofLaplacianFactory:: + assignGhostLocalNodeIds(const Teuchos::RCP &rowDofMap, + const Teuchos::RCP &colDofMap, + std::vector &myLocalNodeIds, + const std::vector &dofMap, + size_t maxDofPerNode, size_t &nLocalNodes, + size_t &nLocalPlusGhostNodes, + Teuchos::RCP> comm) const { + + size_t nLocalDofs = rowDofMap->getLocalNumElements(); + size_t nLocalPlusGhostDofs = + colDofMap->getLocalNumElements(); // TODO remove parameters + + // create importer for dof-based information + Teuchos::RCP importer = ImportFactory::Build(rowDofMap, colDofMap); + + // create a vector living on column map of A (dof based) + Teuchos::RCP localNodeIdsTemp = + LOVectorFactory::Build(rowDofMap, true); + Teuchos::RCP localNodeIds = LOVectorFactory::Build(colDofMap, true); + + // fill local dofs (padded local ids) + { + Teuchos::ArrayRCP localNodeIdsTempData = + localNodeIdsTemp->getDataNonConst(0); + for (size_t i = 0; i < localNodeIdsTemp->getLocalLength(); i++) + localNodeIdsTempData[i] = + std::floor(dofMap[i] / maxDofPerNode); } - template - void VariableDofLaplacianFactory::assignGhostLocalNodeIds(const Teuchos::RCP & rowDofMap, const Teuchos::RCP & colDofMap, std::vector & myLocalNodeIds, const std::vector & dofMap, size_t maxDofPerNode, size_t& nLocalNodes, size_t& nLocalPlusGhostNodes, Teuchos::RCP< const Teuchos::Comm< int > > comm) const { - - size_t nLocalDofs = rowDofMap->getLocalNumElements(); - size_t nLocalPlusGhostDofs = colDofMap->getLocalNumElements(); // TODO remove parameters - - // create importer for dof-based information - Teuchos::RCP importer = ImportFactory::Build(rowDofMap, colDofMap); - - // create a vector living on column map of A (dof based) - Teuchos::RCP localNodeIdsTemp = LOVectorFactory::Build(rowDofMap,true); - Teuchos::RCP localNodeIds = LOVectorFactory::Build(colDofMap,true); - - // fill local dofs (padded local ids) - { - Teuchos::ArrayRCP< LocalOrdinal > localNodeIdsTempData = localNodeIdsTemp->getDataNonConst(0); - for(size_t i = 0; i < localNodeIdsTemp->getLocalLength(); i++) - localNodeIdsTempData[i] = std::floor( dofMap[i] / maxDofPerNode ); - } - - localNodeIds->doImport(*localNodeIdsTemp, *importer, Xpetra::INSERT); - Teuchos::ArrayRCP< const LocalOrdinal > localNodeIdsData = localNodeIds->getData(0); - - // Note: localNodeIds contains local ids for the padded version as vector values + localNodeIds->doImport(*localNodeIdsTemp, *importer, Xpetra::INSERT); + Teuchos::ArrayRCP localNodeIdsData = + localNodeIds->getData(0); + // Note: localNodeIds contains local ids for the padded version as vector + // values - // we use Scalar instead of int as type - Teuchos::RCP myProcTemp = LOVectorFactory::Build(rowDofMap,true); - Teuchos::RCP myProc = LOVectorFactory::Build(colDofMap,true); + // we use Scalar instead of int as type + Teuchos::RCP myProcTemp = LOVectorFactory::Build(rowDofMap, true); + Teuchos::RCP myProc = LOVectorFactory::Build(colDofMap, true); - // fill local dofs (padded local ids) - { - Teuchos::ArrayRCP< LocalOrdinal > myProcTempData = myProcTemp->getDataNonConst(0); - for(size_t i = 0; i < myProcTemp->getLocalLength(); i++) - myProcTempData[i] = Teuchos::as(comm->getRank()); - } - myProc->doImport(*myProcTemp, *importer, Xpetra::INSERT); - Teuchos::ArrayRCP myProcData = myProc->getDataNonConst(0); // we have to modify the data (therefore the non-const version) - - // At this point, the ghost part of localNodeIds corresponds to the local ids - // associated with the current owning processor. We want to convert these to - // local ids associated with the processor on which these are ghosts. - // Thus we have to re-number them. In doing this re-numbering we must make sure - // that we find all ghosts with the same id & proc and assign a unique local - // id to this group (id&proc). To do this find, we sort all ghost entries in - // localNodeIds that are owned by the same processor. Then we can look for - // duplicates (i.e., several ghost entries corresponding to dofs with the same - // node id) easily and make sure these are all assigned to the same local id. - // To do the sorting we'll make a temporary copy of the ghosts via tempId and - // tempProc and sort this multiple times for each group owned by the same proc. - - - std::vector location(nLocalPlusGhostDofs - nLocalDofs + 1); - std::vector tempId (nLocalPlusGhostDofs - nLocalDofs + 1); - std::vector tempProc(nLocalPlusGhostDofs - nLocalDofs + 1); - - size_t notProcessed = nLocalDofs; // iteration index over all ghosted dofs - size_t tempIndex = 0; - size_t first = tempIndex; - LocalOrdinal neighbor; - - while (notProcessed < nLocalPlusGhostDofs) { - neighbor = myProcData[notProcessed]; // get processor id of not-processed element - first = tempIndex; - location[tempIndex] = notProcessed; - tempId[tempIndex++] = localNodeIdsData[notProcessed]; - myProcData[notProcessed] = -1 - neighbor; - - for(size_t i = notProcessed + 1; i < nLocalPlusGhostDofs; i++) { - if(myProcData[i] == neighbor) { - location[tempIndex] = i; - tempId[tempIndex++] = localNodeIdsData[i]; - myProcData[i] = -1; // mark as visited - } + // fill local dofs (padded local ids) + { + Teuchos::ArrayRCP myProcTempData = + myProcTemp->getDataNonConst(0); + for (size_t i = 0; i < myProcTemp->getLocalLength(); i++) + myProcTempData[i] = Teuchos::as(comm->getRank()); + } + myProc->doImport(*myProcTemp, *importer, Xpetra::INSERT); + Teuchos::ArrayRCP myProcData = myProc->getDataNonConst( + 0); // we have to modify the data (therefore the non-const version) + + // At this point, the ghost part of localNodeIds corresponds to the local ids + // associated with the current owning processor. We want to convert these to + // local ids associated with the processor on which these are ghosts. + // Thus we have to re-number them. In doing this re-numbering we must make + // sure that we find all ghosts with the same id & proc and assign a unique + // local id to this group (id&proc). To do this find, we sort all ghost + // entries in localNodeIds that are owned by the same processor. Then we can + // look for duplicates (i.e., several ghost entries corresponding to dofs with + // the same node id) easily and make sure these are all assigned to the same + // local id. To do the sorting we'll make a temporary copy of the ghosts via + // tempId and tempProc and sort this multiple times for each group owned by + // the same proc. + + std::vector location(nLocalPlusGhostDofs - nLocalDofs + 1); + std::vector tempId(nLocalPlusGhostDofs - nLocalDofs + 1); + std::vector tempProc(nLocalPlusGhostDofs - nLocalDofs + 1); + + size_t notProcessed = nLocalDofs; // iteration index over all ghosted dofs + size_t tempIndex = 0; + size_t first = tempIndex; + LocalOrdinal neighbor; + + while (notProcessed < nLocalPlusGhostDofs) { + neighbor = + myProcData[notProcessed]; // get processor id of not-processed element + first = tempIndex; + location[tempIndex] = notProcessed; + tempId[tempIndex++] = localNodeIdsData[notProcessed]; + myProcData[notProcessed] = -1 - neighbor; + + for (size_t i = notProcessed + 1; i < nLocalPlusGhostDofs; i++) { + if (myProcData[i] == neighbor) { + location[tempIndex] = i; + tempId[tempIndex++] = localNodeIdsData[i]; + myProcData[i] = -1; // mark as visited } - this->MueLu_az_sort(&(tempId[first]), tempIndex - first, &(location[first]), NULL); - for(size_t i = first; i < tempIndex; i++) tempProc[i] = neighbor; - - // increment index. Find next notProcessed dof index corresponding to first non-visited element - notProcessed++; - while ( (notProcessed < nLocalPlusGhostDofs) && (myProcData[notProcessed] < 0)) - notProcessed++; } - TEUCHOS_TEST_FOR_EXCEPTION(tempIndex != nLocalPlusGhostDofs-nLocalDofs, MueLu::Exceptions::RuntimeError,"Number of nonzero ghosts is inconsistent."); - - // Now assign ids to all ghost nodes (giving the same id to those with the - // same myProc[] and the same local id on the proc that actually owns the - // variable associated with the ghost - - nLocalNodes = 0; // initialize return value - if(nLocalDofs > 0) nLocalNodes = localNodeIdsData[nLocalDofs-1] + 1; - - nLocalPlusGhostNodes = nLocalNodes; // initialize return value - if(nLocalDofs < nLocalPlusGhostDofs) nLocalPlusGhostNodes++; // 1st ghost node is unique (not accounted for). number will be increased later, if there are more ghost nodes - - // check if two adjacent ghost dofs correspond to different nodes. To do this, - // check if they are from different processors or whether they have different - // local node ids - - // loop over all (remaining) ghost dofs - for (size_t i = nLocalDofs+1; i < nLocalPlusGhostDofs; i++) { - size_t lagged = nLocalPlusGhostNodes-1; - - // i is a new unique ghost node (not already accounted for) - if ((tempId[i-nLocalDofs] != tempId[i-1-nLocalDofs]) || - (tempProc[i-nLocalDofs] != tempProc[i-1-nLocalDofs])) - nLocalPlusGhostNodes++; // update number of ghost nodes - tempId[i-1-nLocalDofs] = lagged; - } - if (nLocalPlusGhostDofs > nLocalDofs) - tempId[nLocalPlusGhostDofs-1-nLocalDofs] = nLocalPlusGhostNodes - 1; - - // fill myLocalNodeIds array. Start with local part (not ghosted) - for(size_t i = 0; i < nLocalDofs; i++) - myLocalNodeIds[i] = std::floor( dofMap[i] / maxDofPerNode ); - - // copy ghosted nodal ids into myLocalNodeIds - for(size_t i = nLocalDofs; i < nLocalPlusGhostDofs; i++) - myLocalNodeIds[location[i-nLocalDofs]] = tempId[i-nLocalDofs]; - + this->MueLu_az_sort(&(tempId[first]), tempIndex - first, + &(location[first]), NULL); + for (size_t i = first; i < tempIndex; i++) + tempProc[i] = neighbor; + + // increment index. Find next notProcessed dof index corresponding to first + // non-visited element + notProcessed++; + while ((notProcessed < nLocalPlusGhostDofs) && + (myProcData[notProcessed] < 0)) + notProcessed++; + } + TEUCHOS_TEST_FOR_EXCEPTION(tempIndex != nLocalPlusGhostDofs - nLocalDofs, + MueLu::Exceptions::RuntimeError, + "Number of nonzero ghosts is inconsistent."); + + // Now assign ids to all ghost nodes (giving the same id to those with the + // same myProc[] and the same local id on the proc that actually owns the + // variable associated with the ghost + + nLocalNodes = 0; // initialize return value + if (nLocalDofs > 0) + nLocalNodes = localNodeIdsData[nLocalDofs - 1] + 1; + + nLocalPlusGhostNodes = nLocalNodes; // initialize return value + if (nLocalDofs < nLocalPlusGhostDofs) + nLocalPlusGhostNodes++; // 1st ghost node is unique (not accounted for). + // number will be increased later, if there are more + // ghost nodes + + // check if two adjacent ghost dofs correspond to different nodes. To do this, + // check if they are from different processors or whether they have different + // local node ids + + // loop over all (remaining) ghost dofs + for (size_t i = nLocalDofs + 1; i < nLocalPlusGhostDofs; i++) { + size_t lagged = nLocalPlusGhostNodes - 1; + + // i is a new unique ghost node (not already accounted for) + if ((tempId[i - nLocalDofs] != tempId[i - 1 - nLocalDofs]) || + (tempProc[i - nLocalDofs] != tempProc[i - 1 - nLocalDofs])) + nLocalPlusGhostNodes++; // update number of ghost nodes + tempId[i - 1 - nLocalDofs] = lagged; } + if (nLocalPlusGhostDofs > nLocalDofs) + tempId[nLocalPlusGhostDofs - 1 - nLocalDofs] = nLocalPlusGhostNodes - 1; + + // fill myLocalNodeIds array. Start with local part (not ghosted) + for (size_t i = 0; i < nLocalDofs; i++) + myLocalNodeIds[i] = std::floor(dofMap[i] / maxDofPerNode); -} /* MueLu */ + // copy ghosted nodal ids into myLocalNodeIds + for (size_t i = nLocalDofs; i < nLocalPlusGhostDofs; i++) + myLocalNodeIds[location[i - nLocalDofs]] = tempId[i - nLocalDofs]; +} +} // namespace MueLu -#endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DEF_HPP_ */ +#endif /* PACKAGES_MUELU_SRC_GRAPH_MUELU_VARIABLEDOFLAPLACIANFACTORY_DEF_HPP_ \ + */ diff --git a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp index 8a78e56cd9ca..0a8761549831 100644 --- a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp +++ b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase.hpp @@ -46,8 +46,8 @@ #ifndef MUELU_AGGREGATIONALGORITHMBASE_HPP_ #define MUELU_AGGREGATIONALGORITHMBASE_HPP_ -#include "MueLu_ConfigDefs.hpp" #include "MueLu_BaseClass.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_Aggregates_fwd.hpp" @@ -57,40 +57,36 @@ namespace MueLu { - /*! - @class AggregationAlgorithmBase - @brief Pure virtual base class for all MueLu aggregation algorithms +/*! + @class AggregationAlgorithmBase + @brief Pure virtual base class for all MueLu aggregation algorithms - @ingroup MueLuBaseClasses - */ - template - class AggregationAlgorithmBase : public BaseClass { + @ingroup MueLuBaseClasses + */ +template +class AggregationAlgorithmBase : public BaseClass { #undef MUELU_AGGREGATIONALGORITHMBASE_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - //! @name Constructors/Destructors - //@{ - - //! Destructor. - virtual ~AggregationAlgorithmBase() {} +public: + //! @name Constructors/Destructors + //@{ - //@} + //! Destructor. + virtual ~AggregationAlgorithmBase() {} - //! @name Build routines - //@{ + //@} - //! BuildAggregates routine. - virtual void BuildAggregates(const Teuchos::ParameterList& params, - const GraphBase& graph, - Aggregates& aggregates, - std::vector& aggStat, - LO& numNonAggregatedNodes) const = 0; - //@} + //! @name Build routines + //@{ - }; + //! BuildAggregates routine. + virtual void BuildAggregates(const Teuchos::ParameterList ¶ms, + const GraphBase &graph, Aggregates &aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes) const = 0; + //@} +}; } // namespace MueLu diff --git a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp index e758bdf1c84c..230692115524 100644 --- a/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp +++ b/packages/muelu/src/Graph/MueLu_AggregationAlgorithmBase_kokkos.hpp @@ -52,47 +52,46 @@ #include "MueLu_Aggregates_fwd.hpp" -#include "MueLu_BaseClass.hpp" #include "MueLu_Aggregates.hpp" +#include "MueLu_BaseClass.hpp" #include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Types.hpp" namespace MueLu { - /*! - @class AggregationAlgorithmBase - @brief Pure virtual base class for all MueLu aggregation algorithms +/*! + @class AggregationAlgorithmBase + @brief Pure virtual base class for all MueLu aggregation algorithms - @ingroup MueLuBaseClasses - */ - template - class AggregationAlgorithmBase_kokkos : public BaseClass { + @ingroup MueLuBaseClasses + */ +template +class AggregationAlgorithmBase_kokkos : public BaseClass { #undef MUELU_AGGREGATIONALGORITHMBASE_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; +public: + using device_type = typename LWGraph_kokkos::device_type; - //! @name Constructors/Destructors - //@{ + //! @name Constructors/Destructors + //@{ - //! Destructor. - virtual ~AggregationAlgorithmBase_kokkos() {} + //! Destructor. + virtual ~AggregationAlgorithmBase_kokkos() {} - //@} + //@} - //! @name Build routines - //@{ + //! @name Build routines + //@{ - //! BuildAggregates routine. - virtual void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const = 0; - //@} - }; + //! BuildAggregates routine. + virtual void BuildAggregates(const Teuchos::ParameterList ¶ms, + const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const = 0; + //@} +}; } // namespace MueLu diff --git a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp index ccdf3e3a7782..e55031dfc34c 100644 --- a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp @@ -48,29 +48,26 @@ #include "MueLu_ConfigDefs.hpp" - #include #include - #include -#include "MueLu_GraphBase_fwd.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase_fwd.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_NotayAggregationFactory_fwd.hpp" -#include "MueLu_Level_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_Level_fwd.hpp" #include "MueLu_Utilities_fwd.hpp" namespace MueLu { -template +template class NotayAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_NOTAYAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" @@ -79,22 +76,22 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { //! @name typedefs //@{ using local_matrix_type = typename Matrix::local_matrix_type; - using device_type = typename local_matrix_type::device_type; - using execution_space = typename device_type::execution_space; - using magnitude_type = typename Teuchos::ScalarTraits::magnitudeType; - using impl_scalar_type = typename Kokkos::ArithTraits::val_type; - using row_sum_type = typename Kokkos::View; + using device_type = typename local_matrix_type::device_type; + using execution_space = typename device_type::execution_space; + using magnitude_type = typename Teuchos::ScalarTraits::magnitudeType; + using impl_scalar_type = typename Kokkos::ArithTraits::val_type; + using row_sum_type = typename Kokkos::View; //@} - //! @name Constructors/Destructors. //@{ //! Constructor. - NotayAggregationFactory() { }; + NotayAggregationFactory(){}; //! Destructor. - virtual ~NotayAggregationFactory() { } + virtual ~NotayAggregationFactory() {} RCP GetValidParameterList() const; @@ -119,54 +116,47 @@ class NotayAggregationFactory : public SingleLevelFactoryBase { void Build(Level ¤tLevel) const; /*! @brief Initial aggregation phase. */ - void BuildInitialAggregates(const Teuchos::ParameterList& params, - const RCP& A, - const ArrayView& orderingVector, - const magnitude_type kappa, - Aggregates& aggregates, - std::vector& aggStat, - LO& numNonAggregatedNodes, - LO& numDirichletNodes) const; - - /*! @brief Further aggregation phase increases coarsening rate by a factor of ~2 per iteration. */ - void BuildFurtherAggregates(const Teuchos::ParameterList& params, - const RCP& A, - const Teuchos::ArrayView & orderingVector, - const local_matrix_type& coarseA, - const magnitude_type kappa, - const row_sum_type& rowSum, - std::vector& localAggStat, - Array& localVertex2AggID, - LO& numLocalAggregates, - LO& numNonAggregatedNodes) const; - - void BuildOnRankLocalMatrix(const local_matrix_type& localA, - local_matrix_type& onRankA) const; + void BuildInitialAggregates( + const Teuchos::ParameterList ¶ms, const RCP &A, + const ArrayView &orderingVector, const magnitude_type kappa, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes, LO &numDirichletNodes) const; + + /*! @brief Further aggregation phase increases coarsening rate by a factor of + * ~2 per iteration. */ + void BuildFurtherAggregates( + const Teuchos::ParameterList ¶ms, const RCP &A, + const Teuchos::ArrayView &orderingVector, + const local_matrix_type &coarseA, const magnitude_type kappa, + const row_sum_type &rowSum, std::vector &localAggStat, + Array &localVertex2AggID, LO &numLocalAggregates, + LO &numNonAggregatedNodes) const; + + void BuildOnRankLocalMatrix(const local_matrix_type &localA, + local_matrix_type &onRankA) const; /*! @brief Construction of a local prolongator with values equal to 1.0. */ - void BuildIntermediateProlongator(const LO numRows, - const LO numDirichletNodes, - const LO numLocalAggregates, - const ArrayView& localVertex2AggID, - local_matrix_type& intermediateP) const; + void + BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, + const LO numLocalAggregates, + const ArrayView &localVertex2AggID, + local_matrix_type &intermediateP) const; - /*! @brief Implementation of a local Galerkin projection called inside BuildFurtherAggregates. */ - void BuildCoarseLocalMatrix(const local_matrix_type& intermediateP, - local_matrix_type& coarseA) const; + /*! @brief Implementation of a local Galerkin projection called inside + * BuildFurtherAggregates. */ + void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, + local_matrix_type &coarseA) const; /*! @brief Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix. */ - void localSpGEMM(const local_matrix_type& A, - const local_matrix_type& B, - const std::string matrixLabel, - local_matrix_type& C) const; - + void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, + const std::string matrixLabel, local_matrix_type &C) const; //@} private: }; // class NotayAggregationFactory -} +} // namespace MueLu #define MUELU_NOTAYAGGREGATIONFACTORY_SHORT #endif /* MUELU_NOTAYAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp index b84022432e41..e23babd2b39c 100644 --- a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp @@ -47,9 +47,9 @@ #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ #include -#include -#include #include +#include +#include #include #include "KokkosKernels_Handle.hpp" @@ -66,880 +66,964 @@ #include "MueLu_Types.hpp" #include "MueLu_Utilities.hpp" - namespace MueLu { - namespace NotayUtils { - template - LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max) { - return min + as((max-min+1) * (static_cast(std::rand()) / (RAND_MAX + 1.0))); - } - - template - void RandomReorder(Teuchos::Array & list) { - typedef LocalOrdinal LO; - LO n = Teuchos::as(list.size()); - for(LO i = 0; i < n-1; i++) - std::swap(list[i], list[RandomOrdinal(i,n-1)]); - } - } - - template - RCP NotayAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: pairwise: size"); - SET_VALID_ENTRY("aggregation: pairwise: tie threshold"); - SET_VALID_ENTRY("aggregation: compute aggregate qualities"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: ordering"); +namespace NotayUtils { +template +LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max) { + return min + + as((max - min + 1) * (static_cast(std::rand()) / + (RAND_MAX + 1.0))); +} + +template +void RandomReorder(Teuchos::Array &list) { + typedef LocalOrdinal LO; + LO n = Teuchos::as(list.size()); + for (LO i = 0; i < n - 1; i++) + std::swap(list[i], list[RandomOrdinal(i, n - 1)]); +} +} // namespace NotayUtils + +template +RCP +NotayAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: pairwise: size"); + SET_VALID_ENTRY("aggregation: pairwise: tie threshold"); + SET_VALID_ENTRY("aggregation: compute aggregate qualities"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: ordering"); #undef SET_VALID_ENTRY - // general variables needed in AggregationFactory - validParamList->set< RCP >("A", null, "Generating factory of the matrix"); - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - validParamList->set< RCP >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); - - - return validParamList; + // general variables needed in AggregationFactory + validParamList->set>( + "A", null, "Generating factory of the matrix"); + validParamList->set>( + "Graph", null, "Generating factory of the graph"); + validParamList->set>( + "DofsPerNode", null, + "Generating factory for variable \'DofsPerNode\', usually the same as " + "for \'Graph\'"); + validParamList->set>( + "AggregateQualities", null, + "Generating factory for variable \'AggregateQualities\'"); + + return validParamList; +} + +template +void NotayAggregationFactory::DeclareInput(Level ¤tLevel) const { + const ParameterList &pL = GetParameterList(); + + Input(currentLevel, "A"); + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + if (pL.get("aggregation: compute aggregate qualities")) { + Input(currentLevel, "AggregateQualities"); } - - template - void NotayAggregationFactory::DeclareInput(Level& currentLevel) const { - const ParameterList& pL = GetParameterList(); - - Input(currentLevel, "A"); - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - if (pL.get("aggregation: compute aggregate qualities")) { - Input(currentLevel, "AggregateQualities"); - } - - +} + +template +void NotayAggregationFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + using STS = Teuchos::ScalarTraits; + using MT = typename STS::magnitudeType; + + const MT MT_TWO = + Teuchos::ScalarTraits::one() + Teuchos::ScalarTraits::one(); + + RCP out; + if (const char *dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } + const ParameterList &pL = GetParameterList(); + + const MT kappa = + static_cast(pL.get("aggregation: Dirichlet threshold")); + TEUCHOS_TEST_FOR_EXCEPTION( + kappa <= MT_TWO, Exceptions::RuntimeError, + "Pairwise requires kappa > 2" + " otherwise all rows are considered as Dirichlet rows."); + + // Parameters + int maxNumIter = 3; + if (pL.isParameter("aggregation: pairwise: size")) + maxNumIter = pL.get("aggregation: pairwise: size"); + TEUCHOS_TEST_FOR_EXCEPTION( + maxNumIter < 1, Exceptions::RuntimeError, + "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\"" + " must be a strictly positive integer"); + + RCP graph = Get>(currentLevel, "Graph"); + RCP A = Get>(currentLevel, "A"); + + // Setup aggregates & aggStat objects + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("PW"); + + const LO numRows = graph->GetNodeNumVertices(); + + // construct aggStat information + std::vector aggStat(numRows, READY); + + const int DofsPerNode = Get(currentLevel, "DofsPerNode"); + TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError, + "Pairwise only supports one dof per node"); + + // This follows the paper: + // Notay, "Aggregation-based algebraic multigrid for convection-diffusion + // equations", SISC 34(3), pp. A2288-2316. + + // Handle Ordering + std::string orderingStr = pL.get("aggregation: ordering"); + enum { + O_NATURAL, + O_RANDOM, + O_CUTHILL_MCKEE, + } ordering; + + ordering = O_NATURAL; + if (orderingStr == "random") + ordering = O_RANDOM; + else if (orderingStr == "natural") { + } else if (orderingStr == "cuthill-mckee" || orderingStr == "cm") + ordering = O_CUTHILL_MCKEE; + else { + TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, + "Invalid ordering type"); + } - template - void NotayAggregationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - using STS = Teuchos::ScalarTraits; - using MT = typename STS::magnitudeType; - - const MT MT_TWO = Teuchos::ScalarTraits::one() + Teuchos::ScalarTraits::one(); - - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - const ParameterList& pL = GetParameterList(); - - const MT kappa = static_cast(pL.get("aggregation: Dirichlet threshold")); - TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO, - Exceptions::RuntimeError, - "Pairwise requires kappa > 2" - " otherwise all rows are considered as Dirichlet rows."); - - // Parameters - int maxNumIter = 3; - if (pL.isParameter("aggregation: pairwise: size")) - maxNumIter = pL.get("aggregation: pairwise: size"); - TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1, - Exceptions::RuntimeError, - "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\"" - " must be a strictly positive integer"); - - - RCP graph = Get< RCP >(currentLevel, "Graph"); - RCP A = Get< RCP >(currentLevel, "A"); - - // Setup aggregates & aggStat objects - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("PW"); - - const LO numRows = graph->GetNodeNumVertices(); - - // construct aggStat information - std::vector aggStat(numRows, READY); - - - const int DofsPerNode = Get(currentLevel,"DofsPerNode"); - TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError, - "Pairwise only supports one dof per node"); + // Get an ordering vector + // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc + // columns will get ignored in the aggregation phases, so we don't need to + // worry about running off the end. + Array orderingVector(numRows); + for (LO i = 0; i < numRows; i++) + orderingVector[i] = i; + if (ordering == O_RANDOM) + MueLu::NotayUtils::RandomReorder(orderingVector); + else if (ordering == O_CUTHILL_MCKEE) { + RCP> rcmVector = + MueLu::Utilities::CuthillMcKee(*A); + auto localVector = rcmVector->getData(0); + for (LO i = 0; i < numRows; i++) + orderingVector[i] = localVector[i]; + } - // This follows the paper: - // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations", - // SISC 34(3), pp. A2288-2316. + // Get the party stated + LO numNonAggregatedNodes = numRows, numDirichletNodes = 0; + BuildInitialAggregates(pL, A, orderingVector(), kappa, *aggregates, aggStat, + numNonAggregatedNodes, numDirichletNodes); + TEUCHOS_TEST_FOR_EXCEPTION( + 0 < numNonAggregatedNodes, Exceptions::RuntimeError, + "Initial pairwise aggregation failed to aggregate all nodes"); + LO numLocalAggregates = aggregates->GetNumAggregates(); + GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - " + << A->getLocalNumRows() / numLocalAggregates + << std::endl; + + // Temporary data storage for further aggregation steps + local_matrix_type intermediateP; + local_matrix_type coarseLocalA; + + // Compute the on rank part of the local matrix + // that the square submatrix that only contains + // columns corresponding to local rows. + LO numLocalDirichletNodes = numDirichletNodes; + Array localVertex2AggId( + aggregates->GetVertex2AggId()->getData(0).view(0, numRows)); + BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA); + for (LO aggregationIter = 1; aggregationIter < maxNumIter; + ++aggregationIter) { + // Compute the intermediate prolongator + BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, + numLocalAggregates, localVertex2AggId(), + intermediateP); + + // Compute the coarse local matrix and coarse row sum + BuildCoarseLocalMatrix(intermediateP, coarseLocalA); + + // Directly compute rowsum from A, rather than coarseA + row_sum_type rowSum("rowSum", numLocalAggregates); + { + std::vector> agg2vertex(numLocalAggregates); + auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0); + for (LO i = 0; i < (LO)numRows; i++) { + if (aggStat[i] != AGGREGATED) + continue; + LO agg = vertex2AggId[i]; + agg2vertex[agg].push_back(i); + } - // Handle Ordering - std::string orderingStr = pL.get("aggregation: ordering"); - enum { - O_NATURAL, - O_RANDOM, - O_CUTHILL_MCKEE, - } ordering; + typename row_sum_type::HostMirror rowSum_h = + Kokkos::create_mirror_view(rowSum); + for (LO i = 0; i < numRows; i++) { + // If not aggregated already, skip this guy + if (aggStat[i] != AGGREGATED) + continue; + int agg = vertex2AggId[i]; + std::vector &myagg = agg2vertex[agg]; + + size_t nnz = A->getNumEntriesInLocalRow(i); + ArrayView indices; + ArrayView vals; + A->getLocalRowView(i, indices, vals); + + SC mysum = Teuchos::ScalarTraits::zero(); + for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { + bool found = false; + if (indices[colidx] < numRows) { + for (LO j = 0; j < (LO)myagg.size(); j++) + if (vertex2AggId[indices[colidx]] == agg) + found = true; + } + if (!found) { + *out << "- ADDING col " << indices[colidx] << " = " << vals[colidx] + << std::endl; + mysum += vals[colidx]; + } else { + *out << "- NOT ADDING col " << indices[colidx] << " = " + << vals[colidx] << std::endl; + } + } - ordering = O_NATURAL; - if (orderingStr == "random" ) ordering = O_RANDOM; - else if(orderingStr == "natural") {} - else if(orderingStr == "cuthill-mckee" || orderingStr == "cm") ordering = O_CUTHILL_MCKEE; - else { - TEUCHOS_TEST_FOR_EXCEPTION(1,Exceptions::RuntimeError,"Invalid ordering type"); + rowSum_h[agg] = mysum; + } + Kokkos::deep_copy(rowSum, rowSum_h); } - // Get an ordering vector - // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns - // will get ignored in the aggregation phases, so we don't need to worry about - // running off the end. - Array orderingVector(numRows); + // Get local orderingVector + Array localOrderingVector(numRows); for (LO i = 0; i < numRows; i++) - orderingVector[i] = i; + localOrderingVector[i] = i; if (ordering == O_RANDOM) - MueLu::NotayUtils::RandomReorder(orderingVector); + MueLu::NotayUtils::RandomReorder(localOrderingVector); else if (ordering == O_CUTHILL_MCKEE) { - RCP > rcmVector = MueLu::Utilities::CuthillMcKee(*A); + RCP> rcmVector = + MueLu::Utilities::CuthillMcKee(*A); auto localVector = rcmVector->getData(0); for (LO i = 0; i < numRows; i++) - orderingVector[i] = localVector[i]; + localOrderingVector[i] = localVector[i]; } - // Get the party stated - LO numNonAggregatedNodes = numRows, numDirichletNodes = 0; - BuildInitialAggregates(pL, A, orderingVector(), kappa, - *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes); - TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError, - "Initial pairwise aggregation failed to aggregate all nodes"); - LO numLocalAggregates = aggregates->GetNumAggregates(); - GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - " - << A->getLocalNumRows() / numLocalAggregates << std::endl; - - // Temporary data storage for further aggregation steps - local_matrix_type intermediateP; - local_matrix_type coarseLocalA; - - // Compute the on rank part of the local matrix - // that the square submatrix that only contains - // columns corresponding to local rows. - LO numLocalDirichletNodes = numDirichletNodes; - Array localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows)); - BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA); - for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) { - // Compute the intermediate prolongator - BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates, - localVertex2AggId(), intermediateP); - - // Compute the coarse local matrix and coarse row sum - BuildCoarseLocalMatrix(intermediateP, coarseLocalA); - - // Directly compute rowsum from A, rather than coarseA - row_sum_type rowSum("rowSum", numLocalAggregates); - { - std::vector > agg2vertex(numLocalAggregates); - auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0); - for(LO i=0; i<(LO)numRows; i++) { - if(aggStat[i] != AGGREGATED) - continue; - LO agg=vertex2AggId[i]; - agg2vertex[agg].push_back(i); - } - - typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum); - for(LO i = 0; i < numRows; i++) { - // If not aggregated already, skip this guy - if(aggStat[i] != AGGREGATED) - continue; - int agg = vertex2AggId[i]; - std::vector & myagg = agg2vertex[agg]; - - size_t nnz = A->getNumEntriesInLocalRow(i); - ArrayView indices; - ArrayView vals; - A->getLocalRowView(i, indices, vals); - - SC mysum = Teuchos::ScalarTraits::zero(); - for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { - bool found = false; - if(indices[colidx] < numRows) { - for(LO j=0; j<(LO)myagg.size(); j++) - if (vertex2AggId[indices[colidx]] == agg) - found=true; - } - if(!found) { - *out << "- ADDING col "<getLocalNumRows() / numLocalAggregates << std::endl; } - aggregates->SetNumAggregates(numLocalAggregates); - aggregates->AggregatesCrossProcessors(false); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); - // DO stuff - Set(currentLevel, "Aggregates", aggregates); - GetOStream(Statistics0) << aggregates->description() << std::endl; + // We could probably print some better statistics at some point + GetOStream(Statistics0) + << "Iter " << aggregationIter << ": " << numLocalAggregates << " - " + << A->getLocalNumRows() / numLocalAggregates << std::endl; + } + aggregates->SetNumAggregates(numLocalAggregates); + aggregates->AggregatesCrossProcessors(false); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + + // DO stuff + Set(currentLevel, "Aggregates", aggregates); + GetOStream(Statistics0) << aggregates->description() << std::endl; +} + +template +void NotayAggregationFactory:: + BuildInitialAggregates( + const Teuchos::ParameterList ¶ms, const RCP &A, + const Teuchos::ArrayView &orderingVector, + const typename Teuchos::ScalarTraits::magnitudeType kappa, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes, LO &numDirichletNodes) const { + + Monitor m(*this, "BuildInitialAggregates"); + using STS = Teuchos::ScalarTraits; + using MT = typename STS::magnitudeType; + using RealValuedVector = + Xpetra::Vector; + + RCP out; + if (const char *dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - - template - void NotayAggregationFactory:: - BuildInitialAggregates(const Teuchos::ParameterList& params, - const RCP& A, - const Teuchos::ArrayView & orderingVector, - const typename Teuchos::ScalarTraits::magnitudeType kappa, - Aggregates& aggregates, - std::vector& aggStat, - LO& numNonAggregatedNodes, - LO& numDirichletNodes) const { - - Monitor m(*this, "BuildInitialAggregates"); - using STS = Teuchos::ScalarTraits; - using MT = typename STS::magnitudeType; - using RealValuedVector = Xpetra::Vector; - - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - - const SC SC_ZERO = Teuchos::ScalarTraits::zero(); - const MT MT_ZERO = Teuchos::ScalarTraits::zero(); - const MT MT_ONE = Teuchos::ScalarTraits::one(); - const MT MT_TWO = MT_ONE + MT_ONE; - const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); - const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); - - const MT kappa_init = kappa / (kappa - MT_TWO); - const LO numRows = aggStat.size(); - const int myRank = A->getMap()->getComm()->getRank(); - - // For finding "ties" where we fall back to the ordering. Making this larger than - // hard zero substantially increases code robustness. - double tie_criterion = params.get("aggregation: pairwise: tie threshold"); - double tie_less = 1.0 - tie_criterion; - double tie_more = 1.0 + tie_criterion; - - // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(), - // and so we're not doing again here. - // This should probably be fixed at some point. - - // Extract diagonal, rowsums, etc - // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S - RCP ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); - RCP ghostedRowSum = MueLu::Utilities::GetMatrixOverlappedDeletedRowsum(*A); - RCP ghostedAbsRowSum = MueLu::Utilities::GetMatrixOverlappedAbsDeletedRowsum(*A); - const ArrayRCP D = ghostedDiag->getData(0); - const ArrayRCP S = ghostedRowSum->getData(0); - const ArrayRCP AbsRs = ghostedAbsRowSum->getData(0); - - // Aggregates stuff - ArrayRCP vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner_rcp = aggregates.GetProcWinner() ->getDataNonConst(0); - ArrayView vertex2AggId = vertex2AggId_rcp(); - ArrayView procWinner = procWinner_rcp(); - - // Algorithm 4.2 - - // 0,1 : Initialize: Flag boundary conditions - // Modification: We assume symmetry here aij = aji - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - MT aii = STS::magnitude(D[row]); - MT rowsum = AbsRs[row]; - - if(aii >= kappa_init * rowsum) { - *out << "Flagging index " << row << " as dirichlet " - "aii >= kappa*rowsum = " << aii << " >= " << kappa_init << " " << rowsum << std::endl; - aggStat[row] = IGNORED; - --numNonAggregatedNodes; - ++numDirichletNodes; - } + const SC SC_ZERO = Teuchos::ScalarTraits::zero(); + const MT MT_ZERO = Teuchos::ScalarTraits::zero(); + const MT MT_ONE = Teuchos::ScalarTraits::one(); + const MT MT_TWO = MT_ONE + MT_ONE; + const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); + + const MT kappa_init = kappa / (kappa - MT_TWO); + const LO numRows = aggStat.size(); + const int myRank = A->getMap()->getComm()->getRank(); + + // For finding "ties" where we fall back to the ordering. Making this larger + // than hard zero substantially increases code robustness. + double tie_criterion = + params.get("aggregation: pairwise: tie threshold"); + double tie_less = 1.0 - tie_criterion; + double tie_more = 1.0 + tie_criterion; + + // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(), + // and so we're not doing again here. + // This should probably be fixed at some point. + + // Extract diagonal, rowsums, etc + // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S + RCP ghostedDiag = + MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); + RCP ghostedRowSum = + MueLu::Utilities::GetMatrixOverlappedDeletedRowsum(*A); + RCP ghostedAbsRowSum = + MueLu::Utilities::GetMatrixOverlappedAbsDeletedRowsum(*A); + const ArrayRCP D = ghostedDiag->getData(0); + const ArrayRCP S = ghostedRowSum->getData(0); + const ArrayRCP AbsRs = ghostedAbsRowSum->getData(0); + + // Aggregates stuff + ArrayRCP vertex2AggId_rcp = + aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner_rcp = aggregates.GetProcWinner()->getDataNonConst(0); + ArrayView vertex2AggId = vertex2AggId_rcp(); + ArrayView procWinner = procWinner_rcp(); + + // Algorithm 4.2 + + // 0,1 : Initialize: Flag boundary conditions + // Modification: We assume symmetry here aij = aji + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); + ++row) { + MT aii = STS::magnitude(D[row]); + MT rowsum = AbsRs[row]; + + if (aii >= kappa_init * rowsum) { + *out << "Flagging index " << row + << " as dirichlet " + "aii >= kappa*rowsum = " + << aii << " >= " << kappa_init << " " << rowsum << std::endl; + aggStat[row] = IGNORED; + --numNonAggregatedNodes; + ++numDirichletNodes; } + } - - // 2 : Iteration - LO aggIndex = LO_ZERO; - for(LO i = 0; i < numRows; i++) { - LO current_idx = orderingVector[i]; - // If we're aggregated already, skip this guy - if(aggStat[current_idx] != READY) + // 2 : Iteration + LO aggIndex = LO_ZERO; + for (LO i = 0; i < numRows; i++) { + LO current_idx = orderingVector[i]; + // If we're aggregated already, skip this guy + if (aggStat[current_idx] != READY) + continue; + + MT best_mu = MT_ZERO; + LO best_idx = LO_INVALID; + + size_t nnz = A->getNumEntriesInLocalRow(current_idx); + ArrayView indices; + ArrayView vals; + A->getLocalRowView(current_idx, indices, vals); + + MT aii = STS::real(D[current_idx]); + MT si = STS::real(S[current_idx]); + for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { + // Skip aggregated neighbors, off-rank neighbors, hard zeros and self + LO col = indices[colidx]; + SC val = vals[colidx]; + if (current_idx == col || col >= numRows || aggStat[col] != READY || + val == SC_ZERO) continue; - MT best_mu = MT_ZERO; - LO best_idx = LO_INVALID; - - size_t nnz = A->getNumEntriesInLocalRow(current_idx); - ArrayView indices; - ArrayView vals; - A->getLocalRowView(current_idx, indices, vals); - - MT aii = STS::real(D[current_idx]); - MT si = STS::real(S[current_idx]); - for (LO colidx = 0; colidx < static_cast(nnz); colidx++) { - // Skip aggregated neighbors, off-rank neighbors, hard zeros and self - LO col = indices[colidx]; - SC val = vals[colidx]; - if(current_idx == col || col >= numRows || aggStat[col] != READY || val == SC_ZERO) - continue; - - MT aij = STS::real(val); - MT ajj = STS::real(D[col]); - MT sj = - STS::real(S[col]); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S - if(aii - si + ajj - sj >= MT_ZERO) { - // Modification: We assume symmetry here aij = aji - MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj); - MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) ); - MT mu = mu_top / mu_bottom; - - // Modification: Explicitly check the tie criterion here - if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less || - (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) { - best_mu = mu; - best_idx = col; - *out << "[" << current_idx << "] Column UPDATED " << col << ": " - << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj - << " = " << aii - si + ajj - sj<< ", aij = "<(vertex2AggId.size()); ++i) { + *out << i << "(" << vertex2AggId[i] << ")"; + } + *out << std::endl; + + // update aggregate object + aggregates.SetNumAggregates(aggIndex); +} // BuildInitialAggregates + +template +void NotayAggregationFactory:: + BuildFurtherAggregates( + const Teuchos::ParameterList ¶ms, const RCP &A, + const Teuchos::ArrayView &orderingVector, + const typename Matrix::local_matrix_type &coarseA, + const typename Teuchos::ScalarTraits::magnitudeType kappa, + const Kokkos::View::val_type *, + Kokkos::LayoutLeft, + typename Matrix::local_matrix_type::device_type> + &rowSum, + std::vector &localAggStat, + Teuchos::Array &localVertex2AggID, LO &numLocalAggregates, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildFurtherAggregates"); + + // Set debug outputs based on environment variable + RCP out; + if (const char *dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - } else { - *out << "No buddy found for index " << current_idx << "," - " but aggregating as singleton [agg " << aggIndex << "]" << std::endl; - - aggStat[current_idx] = ONEPT; - vertex2AggId[current_idx] = aggIndex; - procWinner[current_idx] = myRank; - numNonAggregatedNodes--; - aggIndex++; - } // best_mu - } // best_idx - }// end Algorithm 4.2 - - *out << "vertex2aggid :"; - for(int i = 0; i < static_cast(vertex2AggId.size()); ++i) { - *out << i << "(" << vertex2AggId[i] << ")"; - } - *out << std::endl; - - // update aggregate object - aggregates.SetNumAggregates(aggIndex); - } // BuildInitialAggregates - - template - void NotayAggregationFactory:: - BuildFurtherAggregates(const Teuchos::ParameterList& params, - const RCP& A, - const Teuchos::ArrayView & orderingVector, - const typename Matrix::local_matrix_type& coarseA, - const typename Teuchos::ScalarTraits::magnitudeType kappa, - const Kokkos::View::val_type*, - Kokkos::LayoutLeft, - typename Matrix::local_matrix_type::device_type>& rowSum, - std::vector& localAggStat, - Teuchos::Array& localVertex2AggID, - LO& numLocalAggregates, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildFurtherAggregates"); - - // Set debug outputs based on environment variable - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + using value_type = typename local_matrix_type::value_type; + const value_type KAT_zero = Kokkos::ArithTraits::zero(); + const magnitude_type MT_zero = Teuchos::ScalarTraits::zero(); + const magnitude_type MT_one = Teuchos::ScalarTraits::one(); + const magnitude_type MT_two = MT_one + MT_one; + const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + // For finding "ties" where we fall back to the ordering. Making this larger + // than hard zero substantially increases code robustness. + double tie_criterion = + params.get("aggregation: pairwise: tie threshold"); + double tie_less = 1.0 - tie_criterion; + double tie_more = 1.0 + tie_criterion; + + typename row_sum_type::HostMirror rowSum_h = + Kokkos::create_mirror_view(rowSum); + Kokkos::deep_copy(rowSum_h, rowSum); + + // Extracting the diagonal of a KokkosSparse::CrsMatrix + // is not currently provided in kokkos-kernels so here + // is an ugly way to get that done... + const LO numRows = static_cast(coarseA.numRows()); + typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", + numRows); + typename local_matrix_type::row_map_type::HostMirror row_map_h = + Kokkos::create_mirror_view(coarseA.graph.row_map); + Kokkos::deep_copy(row_map_h, coarseA.graph.row_map); + typename local_matrix_type::index_type::HostMirror entries_h = + Kokkos::create_mirror_view(coarseA.graph.entries); + Kokkos::deep_copy(entries_h, coarseA.graph.entries); + typename local_matrix_type::values_type::HostMirror values_h = + Kokkos::create_mirror_view(coarseA.values); + Kokkos::deep_copy(values_h, coarseA.values); + for (LO rowIdx = 0; rowIdx < numRows; ++rowIdx) { + for (LO entryIdx = static_cast(row_map_h(rowIdx)); + entryIdx < static_cast(row_map_h(rowIdx + 1)); ++entryIdx) { + if (rowIdx == static_cast(entries_h(entryIdx))) { + diagA_h(rowIdx) = values_h(entryIdx); + } } + } - using value_type = typename local_matrix_type::value_type; - const value_type KAT_zero = Kokkos::ArithTraits::zero(); - const magnitude_type MT_zero = Teuchos::ScalarTraits::zero(); - const magnitude_type MT_one = Teuchos::ScalarTraits::one(); - const magnitude_type MT_two = MT_one + MT_one; - const LO LO_INVALID = Teuchos::OrdinalTraits::invalid() ; - - // For finding "ties" where we fall back to the ordering. Making this larger than - // hard zero substantially increases code robustness. - double tie_criterion = params.get("aggregation: pairwise: tie threshold"); - double tie_less = 1.0 - tie_criterion; - double tie_more = 1.0 + tie_criterion; - - typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum); - Kokkos::deep_copy(rowSum_h, rowSum); - - // Extracting the diagonal of a KokkosSparse::CrsMatrix - // is not currently provided in kokkos-kernels so here - // is an ugly way to get that done... - const LO numRows = static_cast(coarseA.numRows()); - typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows); - typename local_matrix_type::row_map_type::HostMirror row_map_h - = Kokkos::create_mirror_view(coarseA.graph.row_map); - Kokkos::deep_copy(row_map_h, coarseA.graph.row_map); - typename local_matrix_type::index_type::HostMirror entries_h - = Kokkos::create_mirror_view(coarseA.graph.entries); - Kokkos::deep_copy(entries_h, coarseA.graph.entries); - typename local_matrix_type::values_type::HostMirror values_h - = Kokkos::create_mirror_view(coarseA.values); - Kokkos::deep_copy(values_h, coarseA.values); - for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) { - for(LO entryIdx = static_cast(row_map_h(rowIdx)); - entryIdx < static_cast(row_map_h(rowIdx + 1)); - ++entryIdx) { - if(rowIdx == static_cast(entries_h(entryIdx))) { - diagA_h(rowIdx) = values_h(entryIdx); - } - } + for (LO currentIdx = 0; currentIdx < numRows; ++currentIdx) { + if (localAggStat[currentIdx] != READY) { + continue; } - for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) { - if(localAggStat[currentIdx] != READY) { + LO bestIdx = Teuchos::OrdinalTraits::invalid(); + magnitude_type best_mu = Teuchos::ScalarTraits::zero(); + const magnitude_type aii = + Teuchos::ScalarTraits::real(diagA_h(currentIdx)); + const magnitude_type si = + Teuchos::ScalarTraits::real(rowSum_h(currentIdx)); + for (auto entryIdx = row_map_h(currentIdx); + entryIdx < row_map_h(currentIdx + 1); ++entryIdx) { + const LO colIdx = static_cast(entries_h(entryIdx)); + if (currentIdx == colIdx || colIdx >= numRows || + localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) { continue; } - LO bestIdx = Teuchos::OrdinalTraits::invalid(); - magnitude_type best_mu = Teuchos::ScalarTraits::zero(); - const magnitude_type aii = Teuchos::ScalarTraits::real(diagA_h(currentIdx)); - const magnitude_type si = Teuchos::ScalarTraits::real(rowSum_h(currentIdx)); - for(auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) { - const LO colIdx = static_cast(entries_h(entryIdx)); - if(currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) { - continue; - } - - const magnitude_type aij = Teuchos::ScalarTraits::real(values_h(entryIdx)); - const magnitude_type ajj = Teuchos::ScalarTraits::real(diagA_h(colIdx)); - const magnitude_type sj = - Teuchos::ScalarTraits::real(rowSum_h(colIdx)); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S - if(aii - si + ajj - sj >= MT_zero) { - const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj ); - const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj)); - const magnitude_type mu = mu_top / mu_bottom; - - // Modification: Explicitly check the tie criterion here - if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less || - (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) { - best_mu = mu; - bestIdx = colIdx; - *out << "[" << currentIdx << "] Column UPDATED " << colIdx << ": " - << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj - << " = " << aii - si + ajj - sj << ", aij = "< - void NotayAggregationFactory:: - BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA, - typename Matrix::local_matrix_type& onrankA) const { - Monitor m(*this, "BuildOnRankLocalMatrix"); - - // Set debug outputs based on environment variable - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } + } // end loop over matrix rows + +} // BuildFurtherAggregates + +template +void NotayAggregationFactory:: + BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type &localA, + typename Matrix::local_matrix_type &onrankA) const { + Monitor m(*this, "BuildOnRankLocalMatrix"); + + // Set debug outputs based on environment variable + RCP out; + if (const char *dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - // Extract on rank part of A - // Simply check that the column index is less than the number of local rows - // otherwise remove it. - - const int numRows = static_cast(localA.numRows()); - row_pointer_type rowPtr("onrankA row pointer", numRows + 1); - typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr); - typename local_graph_type::row_map_type::HostMirror origRowPtr_h - = Kokkos::create_mirror_view(localA.graph.row_map); - typename local_graph_type::entries_type::HostMirror origColind_h - = Kokkos::create_mirror_view(localA.graph.entries); - typename values_type::HostMirror origValues_h - = Kokkos::create_mirror_view(localA.values); - Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map); - Kokkos::deep_copy(origColind_h, localA.graph.entries); - Kokkos::deep_copy(origValues_h, localA.values); - - // Compute the number of nnz entries per row - rowPtr_h(0) = 0; - for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) { - for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { - if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;} + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = + Kokkos::View; + using col_indices_type = + Kokkos::View; + // Extract on rank part of A + // Simply check that the column index is less than the number of local rows + // otherwise remove it. + + const int numRows = static_cast(localA.numRows()); + row_pointer_type rowPtr("onrankA row pointer", numRows + 1); + typename row_pointer_type::HostMirror rowPtr_h = + Kokkos::create_mirror_view(rowPtr); + typename local_graph_type::row_map_type::HostMirror origRowPtr_h = + Kokkos::create_mirror_view(localA.graph.row_map); + typename local_graph_type::entries_type::HostMirror origColind_h = + Kokkos::create_mirror_view(localA.graph.entries); + typename values_type::HostMirror origValues_h = + Kokkos::create_mirror_view(localA.values); + Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map); + Kokkos::deep_copy(origColind_h, localA.graph.entries); + Kokkos::deep_copy(origValues_h, localA.values); + + // Compute the number of nnz entries per row + rowPtr_h(0) = 0; + for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) { + for (size_type entryIdx = origRowPtr_h(rowIdx); + entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { + if (origColind_h(entryIdx) < numRows) { + rowPtr_h(rowIdx + 1) += 1; } - rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx); } - Kokkos::deep_copy(rowPtr, rowPtr_h); - - const LO nnzOnrankA = rowPtr_h(numRows); - - // Now use nnz per row to allocate matrix views and store column indices and values - col_indices_type colInd("onrankA column indices", rowPtr_h(numRows)); - values_type values("onrankA values", rowPtr_h(numRows)); - typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd); - typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values); - int entriesInRow; - for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) { - entriesInRow = 0; - for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { - if(origColind_h(entryIdx) < numRows) { - colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx); - values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx); - ++entriesInRow; - } + rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx); + } + Kokkos::deep_copy(rowPtr, rowPtr_h); + + const LO nnzOnrankA = rowPtr_h(numRows); + + // Now use nnz per row to allocate matrix views and store column indices and + // values + col_indices_type colInd("onrankA column indices", rowPtr_h(numRows)); + values_type values("onrankA values", rowPtr_h(numRows)); + typename col_indices_type::HostMirror colInd_h = + Kokkos::create_mirror_view(colInd); + typename values_type::HostMirror values_h = + Kokkos::create_mirror_view(values); + int entriesInRow; + for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) { + entriesInRow = 0; + for (size_type entryIdx = origRowPtr_h(rowIdx); + entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) { + if (origColind_h(entryIdx) < numRows) { + colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx); + values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx); + ++entriesInRow; } } - Kokkos::deep_copy(colInd, colInd_h); - Kokkos::deep_copy(values, values_h); - - onrankA = local_matrix_type("onrankA", numRows, numRows, - nnzOnrankA, values, rowPtr, colInd); + } + Kokkos::deep_copy(colInd, colInd_h); + Kokkos::deep_copy(values, values_h); + + onrankA = local_matrix_type("onrankA", numRows, numRows, nnzOnrankA, values, + rowPtr, colInd); +} + +template +void NotayAggregationFactory:: + BuildIntermediateProlongator( + const LocalOrdinal numRows, const LocalOrdinal numDirichletNodes, + const LocalOrdinal numLocalAggregates, + const Teuchos::ArrayView &localVertex2AggID, + typename Matrix::local_matrix_type &intermediateP) const { + Monitor m(*this, "BuildIntermediateProlongator"); + + // Set debug outputs based on environment variable + RCP out; + if (const char *dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - template - void NotayAggregationFactory:: - BuildIntermediateProlongator(const LocalOrdinal numRows, - const LocalOrdinal numDirichletNodes, - const LocalOrdinal numLocalAggregates, - const Teuchos::ArrayView& localVertex2AggID, - typename Matrix::local_matrix_type& intermediateP) const { - Monitor m(*this, "BuildIntermediateProlongator"); - - // Set debug outputs based on environment variable - RCP out; - if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = + Kokkos::View; + using col_indices_type = + Kokkos::View; + + const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + const int intermediatePnnz = numRows - numDirichletNodes; + row_pointer_type rowPtr("intermediateP row pointer", numRows + 1); + col_indices_type colInd("intermediateP column indices", intermediatePnnz); + values_type values("intermediateP values", intermediatePnnz); + typename row_pointer_type::HostMirror rowPtr_h = + Kokkos::create_mirror_view(rowPtr); + typename col_indices_type::HostMirror colInd_h = + Kokkos::create_mirror_view(colInd); + + rowPtr_h(0) = 0; + for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) { + // Skip Dirichlet nodes + if (localVertex2AggID[rowIdx] == LO_INVALID) { + rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - - const LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); - - const int intermediatePnnz = numRows - numDirichletNodes; - row_pointer_type rowPtr("intermediateP row pointer", numRows + 1); - col_indices_type colInd("intermediateP column indices", intermediatePnnz); - values_type values("intermediateP values", intermediatePnnz); - typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr); - typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd); - - rowPtr_h(0) = 0; - for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) { - // Skip Dirichlet nodes - if(localVertex2AggID[rowIdx] == LO_INVALID) { - rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx); - } else { - rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1; - colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx]; - } + rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1; + colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx]; } + } - Kokkos::deep_copy(rowPtr, rowPtr_h); - Kokkos::deep_copy(colInd, colInd_h); - Kokkos::deep_copy(values, Kokkos::ArithTraits::one()); - - intermediateP = local_matrix_type("intermediateP", - numRows, numLocalAggregates, intermediatePnnz, - values, rowPtr, colInd); - } // BuildIntermediateProlongator - - template - void NotayAggregationFactory:: - BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP, - typename Matrix::local_matrix_type& coarseA) const { - Monitor m(*this, "BuildCoarseLocalMatrix"); - - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - - local_matrix_type AP; - localSpGEMM(coarseA, intermediateP, "AP", AP); - - // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle - // I am not sure but doing it for safety in case it stashes data from the previous - // spgemm computation... - - // Compute Ac = Pt * AP - // Two steps needed: - // 1. compute Pt - // 2. perform multiplication - - // Step 1 compute Pt - // Obviously this requires the same amount of storage as P except for the rowPtr - row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"), - intermediateP.numCols() + 1); - col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"), - intermediateP.nnz()); - values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"), - intermediateP.nnz()); - - typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt); - typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries); - Kokkos::deep_copy(entries_h, intermediateP.graph.entries); - Kokkos::deep_copy(rowPtrPt_h, 0); - for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) { - rowPtrPt_h(entries_h(entryIdx) + 1) += 1; - } - for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) { - rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx); - } - Kokkos::deep_copy(rowPtrPt, rowPtrPt_h); - - typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map); - Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map); - typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries); - Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries); - typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values); - Kokkos::deep_copy(valuesP_h, intermediateP.values); - typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt); - typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt); - const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits::invalid(); - Kokkos::deep_copy(colIndPt_h, invalidColumnIndex); - - col_index_type colIdx = 0; - for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) { - for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) { - colIdx = entries_h(entryIdxP); - for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) { - if(colIndPt_h(entryIdxPt) == invalidColumnIndex) { - colIndPt_h(entryIdxPt) = rowIdx; - valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP); - break; - } - } // Loop over entries in row of Pt - } // Loop over entries in row of P - } // Loop over rows of P - - Kokkos::deep_copy(colIndPt, colIndPt_h); - Kokkos::deep_copy(valuesPt, valuesPt_h); - - - local_matrix_type intermediatePt("intermediatePt", - intermediateP.numCols(), - intermediateP.numRows(), - intermediateP.nnz(), - valuesPt, rowPtrPt, colIndPt); - - // Create views for coarseA matrix - localSpGEMM(intermediatePt, AP, "coarseA", coarseA); - } // BuildCoarseLocalMatrix - - template - void NotayAggregationFactory:: - localSpGEMM(const typename Matrix::local_matrix_type& A, - const typename Matrix::local_matrix_type& B, - const std::string matrixLabel, - typename Matrix::local_matrix_type& C) const { - - using local_graph_type = typename local_matrix_type::staticcrsgraph_type; - using values_type = typename local_matrix_type::values_type; - using size_type = typename local_graph_type::size_type; - using col_index_type = typename local_graph_type::data_type; - using array_layout = typename local_graph_type::array_layout; - using memory_space = typename device_type::memory_space; - using memory_traits = typename local_graph_type::memory_traits; - using row_pointer_type = Kokkos::View; - using col_indices_type = Kokkos::View; - - // Options - int team_work_size = 16; - std::string myalg("SPGEMM_KK_MEMORY"); - KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg); - KokkosKernels::Experimental::KokkosKernelsHandle kh; - kh.create_spgemm_handle(alg_enum); - kh.set_team_work_size(team_work_size); - - // Create views for AP matrix - row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"), - A.numRows() + 1); - col_indices_type colIndC; - values_type valuesC; - - // Symbolic multiplication - KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(), - B.numRows(), B.numCols(), - A.graph.row_map, A.graph.entries, false, - B.graph.row_map, B.graph.entries, false, - rowPtrC); - - // allocate column indices and values of AP - size_t nnzC = kh.get_spgemm_handle()->get_c_nnz(); - if (nnzC) { - colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC); - valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC); - } + Kokkos::deep_copy(rowPtr, rowPtr_h); + Kokkos::deep_copy(colInd, colInd_h); + Kokkos::deep_copy( + values, Kokkos::ArithTraits::one()); + + intermediateP = + local_matrix_type("intermediateP", numRows, numLocalAggregates, + intermediatePnnz, values, rowPtr, colInd); +} // BuildIntermediateProlongator + +template +void NotayAggregationFactory:: + BuildCoarseLocalMatrix( + const typename Matrix::local_matrix_type &intermediateP, + typename Matrix::local_matrix_type &coarseA) const { + Monitor m(*this, "BuildCoarseLocalMatrix"); + + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = + Kokkos::View; + using col_indices_type = + Kokkos::View; + + local_matrix_type AP; + localSpGEMM(coarseA, intermediateP, "AP", AP); + + // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle + // I am not sure but doing it for safety in case it stashes data from the + // previous spgemm computation... + + // Compute Ac = Pt * AP + // Two steps needed: + // 1. compute Pt + // 2. perform multiplication + + // Step 1 compute Pt + // Obviously this requires the same amount of storage as P except for the + // rowPtr + row_pointer_type rowPtrPt( + Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"), + intermediateP.numCols() + 1); + col_indices_type colIndPt( + Kokkos::ViewAllocateWithoutInitializing("Pt column indices"), + intermediateP.nnz()); + values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"), + intermediateP.nnz()); + + typename row_pointer_type::HostMirror rowPtrPt_h = + Kokkos::create_mirror_view(rowPtrPt); + typename col_indices_type::HostMirror entries_h = + Kokkos::create_mirror_view(intermediateP.graph.entries); + Kokkos::deep_copy(entries_h, intermediateP.graph.entries); + Kokkos::deep_copy(rowPtrPt_h, 0); + for (size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) { + rowPtrPt_h(entries_h(entryIdx) + 1) += 1; + } + for (LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) { + rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx); + } + Kokkos::deep_copy(rowPtrPt, rowPtrPt_h); + + typename row_pointer_type::HostMirror rowPtrP_h = + Kokkos::create_mirror_view(intermediateP.graph.row_map); + Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map); + typename col_indices_type::HostMirror colIndP_h = + Kokkos::create_mirror_view(intermediateP.graph.entries); + Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries); + typename values_type::HostMirror valuesP_h = + Kokkos::create_mirror_view(intermediateP.values); + Kokkos::deep_copy(valuesP_h, intermediateP.values); + typename col_indices_type::HostMirror colIndPt_h = + Kokkos::create_mirror_view(colIndPt); + typename values_type::HostMirror valuesPt_h = + Kokkos::create_mirror_view(valuesPt); + const col_index_type invalidColumnIndex = + KokkosSparse::OrdinalTraits::invalid(); + Kokkos::deep_copy(colIndPt_h, invalidColumnIndex); + + col_index_type colIdx = 0; + for (LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) { + for (size_type entryIdxP = rowPtrP_h(rowIdx); + entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) { + colIdx = entries_h(entryIdxP); + for (size_type entryIdxPt = rowPtrPt_h(colIdx); + entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) { + if (colIndPt_h(entryIdxPt) == invalidColumnIndex) { + colIndPt_h(entryIdxPt) = rowIdx; + valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP); + break; + } + } // Loop over entries in row of Pt + } // Loop over entries in row of P + } // Loop over rows of P + + Kokkos::deep_copy(colIndPt, colIndPt_h); + Kokkos::deep_copy(valuesPt, valuesPt_h); + + local_matrix_type intermediatePt("intermediatePt", intermediateP.numCols(), + intermediateP.numRows(), intermediateP.nnz(), + valuesPt, rowPtrPt, colIndPt); + + // Create views for coarseA matrix + localSpGEMM(intermediatePt, AP, "coarseA", coarseA); +} // BuildCoarseLocalMatrix + +template +void NotayAggregationFactory:: + localSpGEMM(const typename Matrix::local_matrix_type &A, + const typename Matrix::local_matrix_type &B, + const std::string matrixLabel, + typename Matrix::local_matrix_type &C) const { + + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using values_type = typename local_matrix_type::values_type; + using size_type = typename local_graph_type::size_type; + using col_index_type = typename local_graph_type::data_type; + using array_layout = typename local_graph_type::array_layout; + using memory_space = typename device_type::memory_space; + using memory_traits = typename local_graph_type::memory_traits; + using row_pointer_type = + Kokkos::View; + using col_indices_type = + Kokkos::View; + + // Options + int team_work_size = 16; + std::string myalg("SPGEMM_KK_MEMORY"); + KokkosSparse::SPGEMMAlgorithm alg_enum = + KokkosSparse::StringToSPGEMMAlgorithm(myalg); + KokkosKernels::Experimental::KokkosKernelsHandle< + typename row_pointer_type::const_value_type, + typename col_indices_type::const_value_type, + typename values_type::const_value_type, execution_space, memory_space, + memory_space> + kh; + kh.create_spgemm_handle(alg_enum); + kh.set_team_work_size(team_work_size); + + // Create views for AP matrix + row_pointer_type rowPtrC( + Kokkos::ViewAllocateWithoutInitializing("C row pointer"), + A.numRows() + 1); + col_indices_type colIndC; + values_type valuesC; + + // Symbolic multiplication + KokkosSparse::Experimental::spgemm_symbolic( + &kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map, + A.graph.entries, false, B.graph.row_map, B.graph.entries, false, rowPtrC); + + // allocate column indices and values of AP + size_t nnzC = kh.get_spgemm_handle()->get_c_nnz(); + if (nnzC) { + colIndC = col_indices_type( + Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC); + valuesC = + values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC); + } - // Numeric multiplication - KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(), - B.numRows(), B.numCols(), - A.graph.row_map, A.graph.entries, A.values, false, - B.graph.row_map, B.graph.entries, B.values, false, - rowPtrC, colIndC, valuesC); - kh.destroy_spgemm_handle(); + // Numeric multiplication + KokkosSparse::Experimental::spgemm_numeric( + &kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map, + A.graph.entries, A.values, false, B.graph.row_map, B.graph.entries, + B.values, false, rowPtrC, colIndC, valuesC); + kh.destroy_spgemm_handle(); - C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC); + C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, + rowPtrC, colIndC); - } // localSpGEMM +} // localSpGEMM -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp index 2b89f58299a9..26d4a8cc7cbc 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_decl.hpp @@ -46,88 +46,92 @@ #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DECL_HPP_ #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DECL_HPP_ -#include "MueLu_ConfigDefs.hpp" #include "MueLu_AggregationAlgorithmBase.hpp" #include "MueLu_AggregationStructuredAlgorithm_fwd.hpp" +#include "MueLu_ConfigDefs.hpp" -#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" -#include "MueLu_IndexManager_fwd.hpp" +#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_GraphBase.hpp" +#include "MueLu_IndexManager_fwd.hpp" namespace MueLu { - /*! - @class AggregationStructuredAlgorithm class. - @brief Algorithm for coarsening a graph with structured aggregation. - - @ingroup Aggregation - - ### Idea ### - Use the logical indexing of the mesh to obtain a very regular aggregation pattern and maintain - lines and planes of the problem as they might be useful to the smoother. - This algorithms is also very easy to parallelize on node due to its very regular and predictible - memory access patern. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: coarsen | describe the coarsening rate to be used in each direction - */ - - template - class AggregationStructuredAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class AggregationStructuredAlgorithm class. + @brief Algorithm for coarsening a graph with structured aggregation. + + @ingroup Aggregation + + ### Idea ### + Use the logical indexing of the mesh to obtain a very regular aggregation + pattern and maintain lines and planes of the problem as they might be useful + to the smoother. This algorithms is also very easy to parallelize on node due + to its very regular and predictible memory access patern. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: coarsen | describe the coarsening rate to be used in each + direction +*/ + +template +class AggregationStructuredAlgorithm + : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - AggregationStructuredAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } - - //! Destructor. - virtual ~AggregationStructuredAlgorithm() { } - - //@} +public: + //! @name Constructors/Destructors. + //@{ + //! Constructor. + AggregationStructuredAlgorithm( + const RCP & /* graphFact */ = Teuchos::null) {} - //! @name Aggregation methods. - //@{ + //! Destructor. + virtual ~AggregationStructuredAlgorithm() {} - /*! @brief Local aggregation. */ + //@} - void BuildAggregates(const Teuchos::ParameterList& params, const GraphBase& graph, - Aggregates& aggregates, std::vector& aggStat, - LO& numNonAggregatedNodes) const; + //! @name Aggregation methods. + //@{ - /*! @brief Local aggregation. */ + /*! @brief Local aggregation. */ - void BuildGraph(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, - RCP& myGraph, RCP& coarseCoordinatesFineMap, - RCP& coarseCoordinatesMap) const; - //@} + void BuildAggregates(const Teuchos::ParameterList ¶ms, + const GraphBase &graph, Aggregates &aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes) const; - std::string description() const { return "Aggretation: structured algorithm"; } + /*! @brief Local aggregation. */ - private: + void BuildGraph(const GraphBase &graph, RCP &geoData, + const LO dofsPerNode, RCP &myGraph, + RCP &coarseCoordinatesFineMap, + RCP &coarseCoordinatesMap) const; + //@} - void ComputeGraphDataConstant(const GraphBase& graph, RCP& geoData, - const LO dofsPerNode, const int numInterpolationPoints, - ArrayRCP& nnzOnRow, Array& rowPtr, - Array& colIndex) const; + std::string description() const { + return "Aggretation: structured algorithm"; + } - void ComputeGraphDataLinear(const GraphBase& graph, RCP& geoData, - const LO dofsPerNode, const int numInterpolationPoints, - ArrayRCP& nnzOnRow, Array& rowPtr, - Array& colIndex) const; +private: + void ComputeGraphDataConstant( + const GraphBase &graph, RCP &geoData, const LO dofsPerNode, + const int numInterpolationPoints, ArrayRCP &nnzOnRow, + Array &rowPtr, Array &colIndex) const; - }; + void ComputeGraphDataLinear(const GraphBase &graph, + RCP &geoData, const LO dofsPerNode, + const int numInterpolationPoints, + ArrayRCP &nnzOnRow, Array &rowPtr, + Array &colIndex) const; +}; -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_SHORT #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp index 325b932fb453..e056f9383535 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_def.hpp @@ -46,389 +46,422 @@ #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ - #include #include -#include -#include -#include #include +#include +#include +#include #include "MueLu_AggregationStructuredAlgorithm_decl.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Aggregates.hpp" -#include "MueLu_IndexManager.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase.hpp" +#include "MueLu_IndexManager.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - void AggregationStructuredAlgorithm:: - BuildAggregates(const Teuchos::ParameterList& /* params */, const GraphBase& graph, - Aggregates& aggregates, std::vector& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - RCP geoData = aggregates.GetIndexManager(); - const bool coupled = geoData->isAggregationCoupled(); - const bool singleCoarsePoint = geoData->isSingleCoarsePoint(); - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); - Array ghostedCoarseNodeCoarseLIDs; - Array ghostedCoarseNodeCoarsePIDs; - Array ghostedCoarseNodeCoarseGIDs; - - *out << "Extract data for ghosted nodes" << std::endl; - geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, - ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); - - LO rem, rate; - Array ghostedIdx(3), coarseIdx(3); - LO ghostedCoarseNodeCoarseLID, aggId; - *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl; - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - // Compute coarse ID associated with fine LID - geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); - - for(int dim = 0; dim < 3; ++dim) { - if(singleCoarsePoint - && (geoData->getLocalFineNodesInDir(dim) - 1 < geoData->getCoarseningRate(dim))) { - coarseIdx[dim] = 0; +template +void AggregationStructuredAlgorithm:: + BuildAggregates(const Teuchos::ParameterList & /* params */, + const GraphBase &graph, Aggregates &aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + RCP out; + if (const char *dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + RCP geoData = aggregates.GetIndexManager(); + const bool coupled = geoData->isAggregationCoupled(); + const bool singleCoarsePoint = geoData->isSingleCoarsePoint(); + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + Array ghostedCoarseNodeCoarseLIDs; + Array ghostedCoarseNodeCoarsePIDs; + Array ghostedCoarseNodeCoarseGIDs; + + *out << "Extract data for ghosted nodes" << std::endl; + geoData->getGhostedNodesData( + graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, + ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); + + LO rem, rate; + Array ghostedIdx(3), coarseIdx(3); + LO ghostedCoarseNodeCoarseLID, aggId; + *out << "Loop over fine nodes and assign them to an aggregate and a rank" + << std::endl; + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + // Compute coarse ID associated with fine LID + geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], + ghostedIdx[2]); + + for (int dim = 0; dim < 3; ++dim) { + if (singleCoarsePoint && (geoData->getLocalFineNodesInDir(dim) - 1 < + geoData->getCoarseningRate(dim))) { + coarseIdx[dim] = 0; + } else { + coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); + rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); + if (ghostedIdx[dim] - geoData->getOffset(dim) < + geoData->getLocalFineNodesInDir(dim) - + geoData->getCoarseningEndRate(dim)) { + rate = geoData->getCoarseningRate(dim); } else { - coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); - rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); - if(ghostedIdx[dim] - geoData->getOffset(dim) - < geoData->getLocalFineNodesInDir(dim) - geoData->getCoarseningEndRate(dim)) { - rate = geoData->getCoarseningRate(dim); - } else { - rate = geoData->getCoarseningEndRate(dim); - } - if(rem > (rate / 2)) {++coarseIdx[dim];} - if(coupled && (geoData->getStartGhostedCoarseNode(dim)*geoData->getCoarseningRate(dim) - > geoData->getStartIndex(dim))) {--coarseIdx[dim];} + rate = geoData->getCoarseningEndRate(dim); } - } - - geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], - ghostedCoarseNodeCoarseLID); - - aggId = ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID]; - vertex2AggId[nodeIdx] = aggId; - procWinner[nodeIdx] = ghostedCoarseNodeCoarsePIDs[ghostedCoarseNodeCoarseLID]; - aggStat[nodeIdx] = AGGREGATED; - --numNonAggregatedNodes; - - } // Loop over fine points - } // BuildAggregates() - - - template - void AggregationStructuredAlgorithm:: - BuildGraph(const GraphBase& graph, RCP& geoData, const LO dofsPerNode, - RCP& myGraph, RCP& coarseCoordinatesFineMap, - RCP& coarseCoordinatesMap) const { - Monitor m(*this, "BuildGraphP"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } - - const bool coupled = geoData->isAggregationCoupled(); - - // Compute the number of coarse points needed to interpolate quantities to a fine point - int numInterpolationPoints = 0; - if(geoData->getInterpolationOrder() == 0) { - numInterpolationPoints = 1; - } else if(geoData->getInterpolationOrder() == 1) { - // Compute 2^numDimensions using bit logic to avoid round-off errors - numInterpolationPoints = 1 << geoData->getNumDimensions(); - } - *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; - - Array colIndex((geoData->getNumLocalCoarseNodes() + numInterpolationPoints* - (geoData->getNumLocalFineNodes() - geoData->getNumLocalCoarseNodes()))*dofsPerNode); - Array rowPtr(geoData->getNumLocalFineNodes()*dofsPerNode + 1); - rowPtr[0] = 0; - ArrayRCP nnzOnRow(geoData->getNumLocalFineNodes()*dofsPerNode); - - *out << "Compute prolongatorGraph data" << std::endl; - if(geoData->getInterpolationOrder() == 0) { - ComputeGraphDataConstant(graph, geoData, dofsPerNode, numInterpolationPoints, - nnzOnRow, rowPtr, colIndex); - } else if(geoData->getInterpolationOrder() == 1) { - ComputeGraphDataLinear(graph, geoData, dofsPerNode, numInterpolationPoints, - nnzOnRow, rowPtr, colIndex); - } - - // Compute graph's rowMap, colMap and domainMap - RCP rowMap = MapFactory::Build(graph.GetDomainMap(), dofsPerNode); - RCP colMap, domainMap; - *out << "Compute domain and column maps of the CrsGraph" << std::endl; - if(coupled){ - *out << "Extract data for ghosted nodes" << std::endl; - Array ghostedCoarseNodeCoarseLIDs; - Array ghostedCoarseNodeCoarsePIDs; - Array ghostedCoarseNodeCoarseGIDs; - geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, - ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); - - // In this case we specify the global number of nodes on the coarse mesh - // as well as the GIDs needed on rank. - colMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - ghostedCoarseNodeCoarseGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - - LO coarseNodeIdx = 0; - Array coarseNodeCoarseGIDs, coarseNodeFineGIDs; - geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, coarseNodeFineGIDs); - for(LO nodeIdx = 0; nodeIdx < ghostedCoarseNodeCoarseGIDs.size(); ++nodeIdx) { - if(ghostedCoarseNodeCoarsePIDs[nodeIdx] == colMap->getComm()->getRank()) { - coarseNodeCoarseGIDs[coarseNodeIdx] = ghostedCoarseNodeCoarseGIDs[nodeIdx]; - ++coarseNodeIdx; + if (rem > (rate / 2)) { + ++coarseIdx[dim]; + } + if (coupled && (geoData->getStartGhostedCoarseNode(dim) * + geoData->getCoarseningRate(dim) > + geoData->getStartIndex(dim))) { + --coarseIdx[dim]; } } - domainMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - coarseNodeCoarseGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - coarseCoordinatesMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - coarseNodeCoarseGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - coarseCoordinatesFineMap = MapFactory::Build(graph.GetDomainMap()->lib(), - geoData->getNumGlobalCoarseNodes(), - coarseNodeFineGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - } else { - // In this case the map will compute the global number of nodes on the coarse mesh - // and it will assign GIDs to the local coarse nodes. - colMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - geoData->getNumLocalCoarseNodes()*dofsPerNode, - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - domainMap = colMap; - - Array coarseNodeCoarseGIDs(geoData->getNumLocalCoarseNodes()); - Array coarseNodeFineGIDs(geoData->getNumLocalCoarseNodes()); - geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, coarseNodeFineGIDs); - coarseCoordinatesMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - geoData->getNumLocalCoarseNodes(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - coarseCoordinatesFineMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - coarseNodeFineGIDs(), - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); } - *out << "Call constructor of CrsGraph" << std::endl; - myGraph = CrsGraphFactory::Build(rowMap, - colMap, - nnzOnRow); - - *out << "Fill CrsGraph" << std::endl; - LO rowIdx = 0; - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - for(LO dof = 0; dof < dofsPerNode; ++dof) { - rowIdx = nodeIdx*dofsPerNode + dof; - myGraph->insertLocalIndices(rowIdx, colIndex(rowPtr[rowIdx], nnzOnRow[rowIdx]) ); + geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], + ghostedCoarseNodeCoarseLID); + + aggId = ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID]; + vertex2AggId[nodeIdx] = aggId; + procWinner[nodeIdx] = + ghostedCoarseNodeCoarsePIDs[ghostedCoarseNodeCoarseLID]; + aggStat[nodeIdx] = AGGREGATED; + --numNonAggregatedNodes; + + } // Loop over fine points +} // BuildAggregates() + +template +void AggregationStructuredAlgorithm:: + BuildGraph(const GraphBase &graph, RCP &geoData, + const LO dofsPerNode, RCP &myGraph, + RCP &coarseCoordinatesFineMap, + RCP &coarseCoordinatesMap) const { + Monitor m(*this, "BuildGraphP"); + + RCP out; + if (const char *dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + const bool coupled = geoData->isAggregationCoupled(); + + // Compute the number of coarse points needed to interpolate quantities to a + // fine point + int numInterpolationPoints = 0; + if (geoData->getInterpolationOrder() == 0) { + numInterpolationPoints = 1; + } else if (geoData->getInterpolationOrder() == 1) { + // Compute 2^numDimensions using bit logic to avoid round-off errors + numInterpolationPoints = 1 << geoData->getNumDimensions(); + } + *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; + + Array colIndex( + (geoData->getNumLocalCoarseNodes() + + numInterpolationPoints * (geoData->getNumLocalFineNodes() - + geoData->getNumLocalCoarseNodes())) * + dofsPerNode); + Array rowPtr(geoData->getNumLocalFineNodes() * dofsPerNode + 1); + rowPtr[0] = 0; + ArrayRCP nnzOnRow(geoData->getNumLocalFineNodes() * dofsPerNode); + + *out << "Compute prolongatorGraph data" << std::endl; + if (geoData->getInterpolationOrder() == 0) { + ComputeGraphDataConstant(graph, geoData, dofsPerNode, + numInterpolationPoints, nnzOnRow, rowPtr, + colIndex); + } else if (geoData->getInterpolationOrder() == 1) { + ComputeGraphDataLinear(graph, geoData, dofsPerNode, numInterpolationPoints, + nnzOnRow, rowPtr, colIndex); + } + + // Compute graph's rowMap, colMap and domainMap + RCP rowMap = MapFactory::Build(graph.GetDomainMap(), dofsPerNode); + RCP colMap, domainMap; + *out << "Compute domain and column maps of the CrsGraph" << std::endl; + if (coupled) { + *out << "Extract data for ghosted nodes" << std::endl; + Array ghostedCoarseNodeCoarseLIDs; + Array ghostedCoarseNodeCoarsePIDs; + Array ghostedCoarseNodeCoarseGIDs; + geoData->getGhostedNodesData( + graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, + ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); + + // In this case we specify the global number of nodes on the coarse mesh + // as well as the GIDs needed on rank. + colMap = MapFactory::Build( + graph.GetDomainMap()->lib(), geoData->getNumGlobalCoarseNodes(), + ghostedCoarseNodeCoarseGIDs(), graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + + LO coarseNodeIdx = 0; + Array coarseNodeCoarseGIDs, coarseNodeFineGIDs; + geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, + coarseNodeFineGIDs); + for (LO nodeIdx = 0; nodeIdx < ghostedCoarseNodeCoarseGIDs.size(); + ++nodeIdx) { + if (ghostedCoarseNodeCoarsePIDs[nodeIdx] == + colMap->getComm()->getRank()) { + coarseNodeCoarseGIDs[coarseNodeIdx] = + ghostedCoarseNodeCoarseGIDs[nodeIdx]; + ++coarseNodeIdx; } } - - *out << "Call fillComplete on CrsGraph" << std::endl; - myGraph->fillComplete(domainMap, rowMap); - *out << "Prolongator CrsGraph computed" << std::endl; - - } // BuildGraph() - - - template - void AggregationStructuredAlgorithm:: - ComputeGraphDataConstant(const GraphBase& graph, RCP& geoData, - const LO dofsPerNode, const int /* numInterpolationPoints */, - ArrayRCP& nnzOnRow, Array& rowPtr, - Array& colIndex) const { - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + domainMap = MapFactory::Build( + graph.GetDomainMap()->lib(), geoData->getNumGlobalCoarseNodes(), + coarseNodeCoarseGIDs(), graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + coarseCoordinatesMap = MapFactory::Build( + graph.GetDomainMap()->lib(), geoData->getNumGlobalCoarseNodes(), + coarseNodeCoarseGIDs(), graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + coarseCoordinatesFineMap = MapFactory::Build( + graph.GetDomainMap()->lib(), geoData->getNumGlobalCoarseNodes(), + coarseNodeFineGIDs(), graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + } else { + // In this case the map will compute the global number of nodes on the + // coarse mesh and it will assign GIDs to the local coarse nodes. + colMap = MapFactory::Build( + graph.GetDomainMap()->lib(), Teuchos::OrdinalTraits::invalid(), + geoData->getNumLocalCoarseNodes() * dofsPerNode, + graph.GetDomainMap()->getIndexBase(), graph.GetDomainMap()->getComm()); + domainMap = colMap; + + Array coarseNodeCoarseGIDs(geoData->getNumLocalCoarseNodes()); + Array coarseNodeFineGIDs(geoData->getNumLocalCoarseNodes()); + geoData->getCoarseNodesData(graph.GetDomainMap(), coarseNodeCoarseGIDs, + coarseNodeFineGIDs); + coarseCoordinatesMap = MapFactory::Build( + graph.GetDomainMap()->lib(), Teuchos::OrdinalTraits::invalid(), + geoData->getNumLocalCoarseNodes(), graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + coarseCoordinatesFineMap = MapFactory::Build( + graph.GetDomainMap()->lib(), Teuchos::OrdinalTraits::invalid(), + coarseNodeFineGIDs(), graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + } + + *out << "Call constructor of CrsGraph" << std::endl; + myGraph = CrsGraphFactory::Build(rowMap, colMap, nnzOnRow); + + *out << "Fill CrsGraph" << std::endl; + LO rowIdx = 0; + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + for (LO dof = 0; dof < dofsPerNode; ++dof) { + rowIdx = nodeIdx * dofsPerNode + dof; + myGraph->insertLocalIndices(rowIdx, + colIndex(rowPtr[rowIdx], nnzOnRow[rowIdx])); } - - Array ghostedCoarseNodeCoarseLIDs; - Array ghostedCoarseNodeCoarsePIDs; - Array ghostedCoarseNodeCoarseGIDs; - geoData->getGhostedNodesData(graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, - ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); - - LO ghostedCoarseNodeCoarseLID, rem, rate; - Array ghostedIdx(3), coarseIdx(3); - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - - // Compute coarse ID associated with fine LID - geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); - - for(int dim = 0; dim < 3; ++dim) { - if(geoData->isSingleCoarsePoint() - && (geoData->getLocalFineNodesInDir(dim) - 1 < geoData->getCoarseningRate(dim))) { - coarseIdx[dim] = 0; + } + + *out << "Call fillComplete on CrsGraph" << std::endl; + myGraph->fillComplete(domainMap, rowMap); + *out << "Prolongator CrsGraph computed" << std::endl; + +} // BuildGraph() + +template +void AggregationStructuredAlgorithm:: + ComputeGraphDataConstant(const GraphBase &graph, RCP &geoData, + const LO dofsPerNode, + const int /* numInterpolationPoints */, + ArrayRCP &nnzOnRow, Array &rowPtr, + Array &colIndex) const { + + RCP out; + if (const char *dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + Array ghostedCoarseNodeCoarseLIDs; + Array ghostedCoarseNodeCoarsePIDs; + Array ghostedCoarseNodeCoarseGIDs; + geoData->getGhostedNodesData( + graph.GetDomainMap(), ghostedCoarseNodeCoarseLIDs, + ghostedCoarseNodeCoarsePIDs, ghostedCoarseNodeCoarseGIDs); + + LO ghostedCoarseNodeCoarseLID, rem, rate; + Array ghostedIdx(3), coarseIdx(3); + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + + // Compute coarse ID associated with fine LID + geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], + ghostedIdx[2]); + + for (int dim = 0; dim < 3; ++dim) { + if (geoData->isSingleCoarsePoint() && + (geoData->getLocalFineNodesInDir(dim) - 1 < + geoData->getCoarseningRate(dim))) { + coarseIdx[dim] = 0; + } else { + coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); + rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); + if (ghostedIdx[dim] - geoData->getOffset(dim) < + geoData->getLocalFineNodesInDir(dim) - + geoData->getCoarseningEndRate(dim)) { + rate = geoData->getCoarseningRate(dim); } else { - coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); - rem = ghostedIdx[dim] % geoData->getCoarseningRate(dim); - if(ghostedIdx[dim] - geoData->getOffset(dim) - < geoData->getLocalFineNodesInDir(dim) - geoData->getCoarseningEndRate(dim)) { - rate = geoData->getCoarseningRate(dim); - } else { - rate = geoData->getCoarseningEndRate(dim); - } - if(rem > (rate / 2)) {++coarseIdx[dim];} - if( (geoData->getStartGhostedCoarseNode(dim)*geoData->getCoarseningRate(dim) - > geoData->getStartIndex(dim)) && geoData->isAggregationCoupled() ) { - --coarseIdx[dim]; - } + rate = geoData->getCoarseningEndRate(dim); + } + if (rem > (rate / 2)) { + ++coarseIdx[dim]; + } + if ((geoData->getStartGhostedCoarseNode(dim) * + geoData->getCoarseningRate(dim) > + geoData->getStartIndex(dim)) && + geoData->isAggregationCoupled()) { + --coarseIdx[dim]; } } - - geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], - ghostedCoarseNodeCoarseLID); - - for(LO dof = 0; dof < dofsPerNode; ++dof) { - nnzOnRow[nodeIdx*dofsPerNode + dof] = 1; - rowPtr[nodeIdx*dofsPerNode + dof + 1] = rowPtr[nodeIdx*dofsPerNode + dof] + 1; - colIndex[rowPtr[nodeIdx*dofsPerNode + dof]] = - ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID]*dofsPerNode + dof; - } - } // Loop over fine points - - } // ComputeGraphDataConstant() - - - template - void AggregationStructuredAlgorithm:: - ComputeGraphDataLinear(const GraphBase& /* graph */, RCP& geoData, - const LO dofsPerNode, const int numInterpolationPoints, - ArrayRCP& nnzOnRow, Array& rowPtr, - Array& colIndex) const { - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); } - const bool coupled = geoData->isAggregationCoupled(); - const int numDimensions = geoData->getNumDimensions(); - Array ghostedIdx(3,0); - Array coarseIdx(3,0); - Array ijkRem(3,0); - const LO coarsePointOffset[8][3] = {{0, 0, 0}, {1, 0, 0}, {0, 1, 0}, {1, 1, 0}, - {0, 0, 1}, {1, 0, 1}, {0, 1, 1}, {1, 1, 1}}; - - for(LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { - - // Compute coarse ID associated with fine LID - geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], ghostedIdx[2]); - for(int dim=0; dim < numDimensions; dim++){ - coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); - ijkRem[dim] = ghostedIdx[dim] % geoData->getCoarseningRate(dim); - if(coupled) { - if (geoData->getStartGhostedCoarseNode(dim)*geoData->getCoarseningRate(dim) - > geoData->getStartIndex(dim)) { - --coarseIdx[dim]; - } - } else { - if(ghostedIdx[dim] == geoData->getLocalFineNodesInDir(dim) - 1) { - coarseIdx[dim] = geoData->getLocalCoarseNodesInDir(dim) - 1; - } + geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], + ghostedCoarseNodeCoarseLID); + + for (LO dof = 0; dof < dofsPerNode; ++dof) { + nnzOnRow[nodeIdx * dofsPerNode + dof] = 1; + rowPtr[nodeIdx * dofsPerNode + dof + 1] = + rowPtr[nodeIdx * dofsPerNode + dof] + 1; + colIndex[rowPtr[nodeIdx * dofsPerNode + dof]] = + ghostedCoarseNodeCoarseLIDs[ghostedCoarseNodeCoarseLID] * + dofsPerNode + + dof; + } + } // Loop over fine points + +} // ComputeGraphDataConstant() + +template +void AggregationStructuredAlgorithm:: + ComputeGraphDataLinear(const GraphBase & /* graph */, + RCP &geoData, const LO dofsPerNode, + const int numInterpolationPoints, + ArrayRCP &nnzOnRow, Array &rowPtr, + Array &colIndex) const { + + RCP out; + if (const char *dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + const bool coupled = geoData->isAggregationCoupled(); + const int numDimensions = geoData->getNumDimensions(); + Array ghostedIdx(3, 0); + Array coarseIdx(3, 0); + Array ijkRem(3, 0); + const LO coarsePointOffset[8][3] = {{0, 0, 0}, {1, 0, 0}, {0, 1, 0}, + {1, 1, 0}, {0, 0, 1}, {1, 0, 1}, + {0, 1, 1}, {1, 1, 1}}; + + for (LO nodeIdx = 0; nodeIdx < geoData->getNumLocalFineNodes(); ++nodeIdx) { + + // Compute coarse ID associated with fine LID + geoData->getFineNodeGhostedTuple(nodeIdx, ghostedIdx[0], ghostedIdx[1], + ghostedIdx[2]); + for (int dim = 0; dim < numDimensions; dim++) { + coarseIdx[dim] = ghostedIdx[dim] / geoData->getCoarseningRate(dim); + ijkRem[dim] = ghostedIdx[dim] % geoData->getCoarseningRate(dim); + if (coupled) { + if (geoData->getStartGhostedCoarseNode(dim) * + geoData->getCoarseningRate(dim) > + geoData->getStartIndex(dim)) { + --coarseIdx[dim]; + } + } else { + if (ghostedIdx[dim] == geoData->getLocalFineNodesInDir(dim) - 1) { + coarseIdx[dim] = geoData->getLocalCoarseNodesInDir(dim) - 1; } } + } - // Fill Graph - // Check if Fine node lies on Coarse Node - bool allCoarse = true; - Array isCoarse(numDimensions); - for(int dim = 0; dim < numDimensions; ++dim) { - isCoarse[dim] = false; - if(ijkRem[dim] == 0) + // Fill Graph + // Check if Fine node lies on Coarse Node + bool allCoarse = true; + Array isCoarse(numDimensions); + for (int dim = 0; dim < numDimensions; ++dim) { + isCoarse[dim] = false; + if (ijkRem[dim] == 0) + isCoarse[dim] = true; + + if (coupled) { + if (ghostedIdx[dim] - geoData->getOffset(dim) == + geoData->getLocalFineNodesInDir(dim) - 1 && + geoData->getMeshEdge(dim * 2 + 1)) + isCoarse[dim] = true; + } else { + if (ghostedIdx[dim] - geoData->getOffset(dim) == + geoData->getLocalFineNodesInDir(dim) - 1) isCoarse[dim] = true; - - if(coupled){ - if( ghostedIdx[dim]-geoData->getOffset(dim) == geoData->getLocalFineNodesInDir(dim)-1 && - geoData->getMeshEdge(dim*2+1) ) - isCoarse[dim] = true; - } else { - if( ghostedIdx[dim]-geoData->getOffset(dim) == geoData->getLocalFineNodesInDir(dim)-1) - isCoarse[dim] = true; - } - - if(!isCoarse[dim]) - allCoarse = false; } - LO rowIdx = 0, colIdx = 0; - if(allCoarse) { - for(LO dof = 0; dof < dofsPerNode; ++dof) { - rowIdx = nodeIdx*dofsPerNode + dof; - nnzOnRow[rowIdx] = 1; - rowPtr[rowIdx + 1] = rowPtr[rowIdx] + 1; - - // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node. - geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], colIdx); - colIndex[rowPtr[rowIdx]] = colIdx*dofsPerNode + dof; - } - } else { - // Harder case, we need the LIDs of all the coarse nodes contributing to the interpolation - for(int dim = 0; dim < numDimensions; ++dim) { - if(coarseIdx[dim] == geoData->getGhostedNodesInDir(dim) - 1) - --coarseIdx[dim]; - } + if (!isCoarse[dim]) + allCoarse = false; + } - for(LO dof = 0; dof < dofsPerNode; ++dof) { - // at the current node. - rowIdx = nodeIdx*dofsPerNode + dof; - nnzOnRow[rowIdx] = Teuchos::as( numInterpolationPoints ); - rowPtr[rowIdx + 1] = rowPtr[rowIdx] + Teuchos::as(numInterpolationPoints); - // Compute Coarse Node LID - for(LO interpIdx = 0; interpIdx < numInterpolationPoints; ++interpIdx) { - geoData->getCoarseNodeGhostedLID(coarseIdx[0] + coarsePointOffset[interpIdx][0], - coarseIdx[1] + coarsePointOffset[interpIdx][1], - coarseIdx[2] + coarsePointOffset[interpIdx][2], - colIdx); - colIndex[rowPtr[rowIdx] + interpIdx] = colIdx*dofsPerNode + dof; - } // Loop over numInterpolationPoints - } // Loop over dofsPerNode + LO rowIdx = 0, colIdx = 0; + if (allCoarse) { + for (LO dof = 0; dof < dofsPerNode; ++dof) { + rowIdx = nodeIdx * dofsPerNode + dof; + nnzOnRow[rowIdx] = 1; + rowPtr[rowIdx + 1] = rowPtr[rowIdx] + 1; + + // Fine node lies on Coarse node, easy case, we only need the LID of the + // coarse node. + geoData->getCoarseNodeGhostedLID(coarseIdx[0], coarseIdx[1], + coarseIdx[2], colIdx); + colIndex[rowPtr[rowIdx]] = colIdx * dofsPerNode + dof; + } + } else { + // Harder case, we need the LIDs of all the coarse nodes contributing to + // the interpolation + for (int dim = 0; dim < numDimensions; ++dim) { + if (coarseIdx[dim] == geoData->getGhostedNodesInDir(dim) - 1) + --coarseIdx[dim]; } - } // Loop over fine points - } // ComputeGraphDataLinear() -} // end namespace + for (LO dof = 0; dof < dofsPerNode; ++dof) { + // at the current node. + rowIdx = nodeIdx * dofsPerNode + dof; + nnzOnRow[rowIdx] = Teuchos::as(numInterpolationPoints); + rowPtr[rowIdx + 1] = + rowPtr[rowIdx] + Teuchos::as(numInterpolationPoints); + // Compute Coarse Node LID + for (LO interpIdx = 0; interpIdx < numInterpolationPoints; + ++interpIdx) { + geoData->getCoarseNodeGhostedLID( + coarseIdx[0] + coarsePointOffset[interpIdx][0], + coarseIdx[1] + coarsePointOffset[interpIdx][1], + coarseIdx[2] + coarsePointOffset[interpIdx][2], colIdx); + colIndex[rowPtr[rowIdx] + interpIdx] = colIdx * dofsPerNode + dof; + } // Loop over numInterpolationPoints + } // Loop over dofsPerNode + } + } // Loop over fine points +} // ComputeGraphDataLinear() +} // namespace MueLu #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp index 5d83bf9a5a42..f363b1d3857a 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp @@ -46,178 +46,176 @@ #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DECL_HPP #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DECL_HPP -#include "MueLu_ConfigDefs.hpp" #include "MueLu_AggregationAlgorithmBase_kokkos.hpp" #include "MueLu_AggregationStructuredAlgorithm_kokkos_fwd.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_Aggregates_fwd.hpp" #include "MueLu_IndexManager_kokkos_fwd.hpp" #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class AggregationStructuredAlgorithm class. - @brief Algorithm for coarsening a graph with structured aggregation. - - @ingroup Aggregation - - ### Idea ### - Use the logical indexing of the mesh to obtain a very regular aggregation pattern and maintain - lines and planes of the problem as they might be useful to the smoother. - This algorithms is also very easy to parallelize on node due to its very regular and predictible - memory access patern. - All the parameters needed are passed to this class by the StructuredAggregationFactory class. - */ - - template - class AggregationStructuredAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class AggregationStructuredAlgorithm class. + @brief Algorithm for coarsening a graph with structured aggregation. + + @ingroup Aggregation + + ### Idea ### + Use the logical indexing of the mesh to obtain a very regular aggregation + pattern and maintain lines and planes of the problem as they might be useful + to the smoother. This algorithms is also very easy to parallelize on node due + to its very regular and predictible memory access patern. All the parameters + needed are passed to this class by the StructuredAggregationFactory class. +*/ + +template +class AggregationStructuredAlgorithm_kokkos + : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - using local_graph_type = typename LWGraph_kokkos::local_graph_type; - using non_const_row_map_type = typename local_graph_type::row_map_type::non_const_type; - using size_type = typename local_graph_type::size_type; - using entries_type = typename local_graph_type::entries_type; - using device_type = typename local_graph_type::device_type; - using execution_space = typename local_graph_type::device_type::execution_space; - using memory_space = typename local_graph_type::device_type::memory_space; - - using LOVectorView = decltype(std::declval().getDeviceLocalView(Xpetra::Access::ReadWrite)); - using constIntTupleView = typename Kokkos::View; - using constLOTupleView = typename Kokkos::View; - - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - AggregationStructuredAlgorithm_kokkos() { } - - //! Destructor. - virtual ~AggregationStructuredAlgorithm_kokkos() { } - - //@} - - - //! @name Aggregation methods. - //@{ - - /*! @brief Build aggregates object. */ - - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - - /*! @brief Build a CrsGraph instead of aggregates. */ - - void BuildGraph(const LWGraph_kokkos& graph, - RCP& geoData, - const LO dofsPerNode, - RCP& myGraph) const; - //@} - - std::string description() const { return "Aggretation: structured algorithm"; } - - struct fillAggregatesFunctor{ - - IndexManager_kokkos geoData_; - const int myRank_; - Kokkos::View aggStat_; - LOVectorView vertex2AggID_; - LOVectorView procWinner_; - - fillAggregatesFunctor(RCP geoData, - const int myRank, - Kokkos::View aggStat, - LOVectorView vertex2AggID, - LOVectorView procWinner); - - KOKKOS_INLINE_FUNCTION - void operator() (const LO nodeIdx, LO& lNumAggregatedNodes) const; - - }; // struct fillAggregatesFunctor - - struct computeGraphDataConstantFunctor { - - IndexManager_kokkos geoData_; - const int numGhostedNodes_; - const LO dofsPerNode_; - constIntTupleView coarseRate_; - constIntTupleView endRate_; - constLOTupleView lFineNodesPerDir_; - non_const_row_map_type rowPtr_; - entries_type colIndex_; - - - computeGraphDataConstantFunctor(RCP geoData, - const LO numGhostedNodes, const LO dofsPerNode, - constIntTupleView coarseRate, constIntTupleView endRate, - constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr, entries_type colIndex); - - KOKKOS_INLINE_FUNCTION - void operator() (const LO nodeIdx) const; - - }; // struct computeGraphDataConstantFunctor - - struct computeGraphRowPtrFunctor { - - IndexManager_kokkos geoData_; - const LO dofsPerNode_; - const int numInterpolationPoints_; - const LO numLocalRows_; - constIntTupleView coarseRate_; - constLOTupleView lFineNodesPerDir_; - non_const_row_map_type rowPtr_; - - computeGraphRowPtrFunctor(RCP geoData, - const LO dofsPerNode, - const int numInterpolationPoints, const LO numLocalRows, - constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr); - - KOKKOS_INLINE_FUNCTION - void operator() (const LO rowIdx, GO& update, const bool final) const; - }; // struct computeGraphRowPtrFunctor - - struct computeGraphDataLinearFunctor { - - IndexManager_kokkos geoData_; - const int numDimensions_; - const int numGhostedNodes_; - const LO dofsPerNode_; - const int numInterpolationPoints_; - constIntTupleView coarseRate_; - constIntTupleView endRate_; - constLOTupleView lFineNodesPerDir_; - constLOTupleView ghostedNodesPerDir_; - non_const_row_map_type rowPtr_; - entries_type colIndex_; +public: + using local_graph_type = typename LWGraph_kokkos::local_graph_type; + using non_const_row_map_type = + typename local_graph_type::row_map_type::non_const_type; + using size_type = typename local_graph_type::size_type; + using entries_type = typename local_graph_type::entries_type; + using device_type = typename local_graph_type::device_type; + using execution_space = + typename local_graph_type::device_type::execution_space; + using memory_space = typename local_graph_type::device_type::memory_space; + using LOVectorView = decltype( + std::declval().getDeviceLocalView(Xpetra::Access::ReadWrite)); + using constIntTupleView = typename Kokkos::View; + using constLOTupleView = typename Kokkos::View; + + //! @name Constructors/Destructors. + //@{ + + //! Constructor. + AggregationStructuredAlgorithm_kokkos() {} + + //! Destructor. + virtual ~AggregationStructuredAlgorithm_kokkos() {} + + //@} + + //! @name Aggregation methods. + //@{ + + /*! @brief Build aggregates object. */ + + void BuildAggregates(const Teuchos::ParameterList ¶ms, + const LWGraph_kokkos &graph, Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; + + /*! @brief Build a CrsGraph instead of aggregates. */ + + void BuildGraph(const LWGraph_kokkos &graph, + RCP &geoData, const LO dofsPerNode, + RCP &myGraph) const; + //@} + + std::string description() const { + return "Aggretation: structured algorithm"; + } + + struct fillAggregatesFunctor { + + IndexManager_kokkos geoData_; + const int myRank_; + Kokkos::View aggStat_; + LOVectorView vertex2AggID_; + LOVectorView procWinner_; + + fillAggregatesFunctor(RCP geoData, const int myRank, + Kokkos::View aggStat, + LOVectorView vertex2AggID, LOVectorView procWinner); + + KOKKOS_INLINE_FUNCTION + void operator()(const LO nodeIdx, LO &lNumAggregatedNodes) const; + + }; // struct fillAggregatesFunctor + + struct computeGraphDataConstantFunctor { + + IndexManager_kokkos geoData_; + const int numGhostedNodes_; + const LO dofsPerNode_; + constIntTupleView coarseRate_; + constIntTupleView endRate_; + constLOTupleView lFineNodesPerDir_; + non_const_row_map_type rowPtr_; + entries_type colIndex_; + + computeGraphDataConstantFunctor( + RCP geoData, const LO numGhostedNodes, + const LO dofsPerNode, constIntTupleView coarseRate, + constIntTupleView endRate, constLOTupleView lFineNodesPerDir, + non_const_row_map_type rowPtr, entries_type colIndex); + + KOKKOS_INLINE_FUNCTION + void operator()(const LO nodeIdx) const; + + }; // struct computeGraphDataConstantFunctor + + struct computeGraphRowPtrFunctor { + + IndexManager_kokkos geoData_; + const LO dofsPerNode_; + const int numInterpolationPoints_; + const LO numLocalRows_; + constIntTupleView coarseRate_; + constLOTupleView lFineNodesPerDir_; + non_const_row_map_type rowPtr_; + + computeGraphRowPtrFunctor(RCP geoData, + const LO dofsPerNode, + const int numInterpolationPoints, + const LO numLocalRows, + constIntTupleView coarseRate, + constLOTupleView lFineNodesPerDir, + non_const_row_map_type rowPtr); + + KOKKOS_INLINE_FUNCTION + void operator()(const LO rowIdx, GO &update, const bool final) const; + }; // struct computeGraphRowPtrFunctor - computeGraphDataLinearFunctor(RCP geoData, - const int numDimensions, - const LO numGhostedNodes, const LO dofsPerNode, - const int numInterpolationPoints, - constIntTupleView coarseRate, constIntTupleView endRate, - constLOTupleView lFineNodesPerDir, - constLOTupleView ghostedNodesPerDir, - non_const_row_map_type rowPtr, entries_type colIndex); - - KOKKOS_INLINE_FUNCTION - void operator() (const LO nodeIdx) const; + struct computeGraphDataLinearFunctor { - }; // struct computeGraphDataLinearFunctor - - }; // class AggregationStructuredAlgorithm_kokkos - -} //namespace MueLu + IndexManager_kokkos geoData_; + const int numDimensions_; + const int numGhostedNodes_; + const LO dofsPerNode_; + const int numInterpolationPoints_; + constIntTupleView coarseRate_; + constIntTupleView endRate_; + constLOTupleView lFineNodesPerDir_; + constLOTupleView ghostedNodesPerDir_; + non_const_row_map_type rowPtr_; + entries_type colIndex_; + + computeGraphDataLinearFunctor( + RCP geoData, const int numDimensions, + const LO numGhostedNodes, const LO dofsPerNode, + const int numInterpolationPoints, constIntTupleView coarseRate, + constIntTupleView endRate, constLOTupleView lFineNodesPerDir, + constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, + entries_type colIndex); + + KOKKOS_INLINE_FUNCTION + void operator()(const LO nodeIdx) const; + + }; // struct computeGraphDataLinearFunctor + +}; // class AggregationStructuredAlgorithm_kokkos + +} // namespace MueLu #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_SHORT #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp index 09ab332b27f4..91b6a74417f4 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp @@ -46,367 +46,383 @@ #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP - #include #include -#include -#include -#include #include +#include +#include +#include #include "MueLu_Exceptions.hpp" #include "MueLu_Monitor.hpp" -#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Aggregates.hpp" -#include "MueLu_IndexManager_kokkos.hpp" #include "MueLu_AggregationStructuredAlgorithm_kokkos_decl.hpp" +#include "MueLu_IndexManager_kokkos.hpp" +#include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - template - void AggregationStructuredAlgorithm_kokkos:: - BuildAggregates(const Teuchos::ParameterList& /* params */, const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); +template +void AggregationStructuredAlgorithm_kokkos:: + BuildAggregates(const Teuchos::ParameterList & /* params */, + const LWGraph_kokkos &graph, Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + RCP out; + if (const char *dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + RCP geoData = aggregates.GetIndexManagerKokkos(); + const LO numLocalFineNodes = geoData->getNumLocalFineNodes(); + const LO numCoarseNodes = geoData->getNumCoarseNodes(); + LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView( + Xpetra::Access::ReadWrite); + LOVectorView procWinner = + aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + + *out << "Loop over fine nodes and assign them to an aggregate and a rank" + << std::endl; + LO numAggregatedNodes; + fillAggregatesFunctor fillAggregates(geoData, graph.GetComm()->getRank(), + aggStat, vertex2AggId, procWinner); + Kokkos::parallel_reduce( + "StructuredAggregation: fill aggregates data", + Kokkos::RangePolicy(0, numLocalFineNodes), + fillAggregates, numAggregatedNodes); + + *out << "numCoarseNodes= " << numCoarseNodes + << ", numAggregatedNodes= " << numAggregatedNodes << std::endl; + numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes; + +} // BuildAggregates() + +template +void AggregationStructuredAlgorithm_kokkos:: + BuildGraph(const LWGraph_kokkos &graph, RCP &geoData, + const LO dofsPerNode, RCP &myGraph) const { + Monitor m(*this, "BuildGraphP"); + + RCP out; + if (const char *dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + // Compute the number of coarse points needed to interpolate quantities to a + // fine point + int numInterpolationPoints = 0; + if (geoData->getInterpolationOrder() == 0) { + numInterpolationPoints = 1; + } else if (geoData->getInterpolationOrder() == 1) { + // Compute 2^numDimensions using bit logic to avoid round-off errors from + // std::pow() + numInterpolationPoints = 1 << geoData->getNumDimensions(); + } + *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; + + const LO numLocalFineNodes = geoData->getNumLocalFineNodes(); + const LO numCoarseNodes = geoData->getNumCoarseNodes(); + const LO numNnzEntries = + dofsPerNode * (numCoarseNodes + numInterpolationPoints * + (numLocalFineNodes - numCoarseNodes)); + + non_const_row_map_type rowPtr("Prolongator graph, rowPtr", + dofsPerNode * (numLocalFineNodes + 1)); + entries_type colIndex("Prolongator graph, colIndices", numNnzEntries); + + *out << "Compute prolongatorGraph data" << std::endl; + if (geoData->getInterpolationOrder() == 0) { + computeGraphDataConstantFunctor computeGraphData( + geoData, numCoarseNodes, dofsPerNode, geoData->getCoarseningRates(), + geoData->getCoarseningEndRates(), geoData->getLocalFineNodesPerDir(), + rowPtr, colIndex); + Kokkos::parallel_for( + "Structured Aggregation: compute loca graph data", + Kokkos::RangePolicy(0, numLocalFineNodes), + computeGraphData); + } else if (geoData->getInterpolationOrder() == 1) { + // Note, lbv 2018-11-08: in the piece-wise linear case I am computing the + // rowPtr using a parallel scan, it might be possible to do something faster + // than that by including this calculation in computeGraphDataLinearFunctor + // but at the moment all the ideas I have include a bunch of if statements + // which I would like to avoid. + computeGraphRowPtrFunctor computeGraphRowPtr( + geoData, dofsPerNode, numInterpolationPoints, numLocalFineNodes, + geoData->getCoarseningRates(), geoData->getLocalFineNodesPerDir(), + rowPtr); + Kokkos::parallel_scan( + "Structured Aggregation: compute rowPtr for prolongator graph", + Kokkos::RangePolicy(0, numLocalFineNodes + 1), + computeGraphRowPtr); + + computeGraphDataLinearFunctor computeGraphData( + geoData, geoData->getNumDimensions(), numCoarseNodes, dofsPerNode, + numInterpolationPoints, geoData->getCoarseningRates(), + geoData->getCoarseningEndRates(), geoData->getLocalFineNodesPerDir(), + geoData->getCoarseNodesPerDir(), rowPtr, colIndex); + Kokkos::parallel_for( + "Structured Aggregation: compute loca graph data", + Kokkos::RangePolicy(0, numLocalFineNodes), + computeGraphData); + } + + local_graph_type myLocalGraph(colIndex, rowPtr); + + // Compute graph's colMap and domainMap + RCP colMap, domainMap; + *out << "Compute domain and column maps of the CrsGraph" << std::endl; + colMap = MapFactory::Build( + graph.GetDomainMap()->lib(), Teuchos::OrdinalTraits::invalid(), + numCoarseNodes, graph.GetDomainMap()->getIndexBase(), + graph.GetDomainMap()->getComm()); + domainMap = colMap; + + myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap, + colMap, graph.GetDomainMap()); + +} // BuildGraph() + +template +AggregationStructuredAlgorithm_kokkos:: + fillAggregatesFunctor::fillAggregatesFunctor( + RCP geoData, const int myRank, + Kokkos::View aggStat, + LOVectorView vertex2AggID, LOVectorView procWinner) + : geoData_(*geoData), myRank_(myRank), aggStat_(aggStat), + vertex2AggID_(vertex2AggID), procWinner_(procWinner) {} + +template +KOKKOS_INLINE_FUNCTION void +AggregationStructuredAlgorithm_kokkos:: + fillAggregatesFunctor::operator()(const LO nodeIdx, + LO &lNumAggregatedNodes) const { + // Compute coarse ID associated with fine LID + LO rem, rate; + LO coarseNodeCoarseLID; + LO nodeFineTuple[3], coarseIdx[3]; + auto coarseRate = geoData_.getCoarseningRates(); + auto endRate = geoData_.getCoarseningEndRates(); + auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir(); + // Compute coarse ID associated with fine LID + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + + for (int dim = 0; dim < 3; ++dim) { + coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim); + rem = nodeFineTuple[dim] % coarseRate(dim); + rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) + ? coarseRate(dim) + : endRate(dim); + if (rem > (rate / 2)) { + ++coarseIdx[dim]; } - - RCP geoData = aggregates.GetIndexManagerKokkos(); - const LO numLocalFineNodes= geoData->getNumLocalFineNodes(); - const LO numCoarseNodes = geoData->getNumCoarseNodes(); - LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - LOVectorView procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - - *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl; - LO numAggregatedNodes; - fillAggregatesFunctor fillAggregates(geoData, - graph.GetComm()->getRank(), - aggStat, - vertex2AggId, - procWinner); - Kokkos::parallel_reduce("StructuredAggregation: fill aggregates data", - Kokkos::RangePolicy(0, numLocalFineNodes), - fillAggregates, - numAggregatedNodes); - - *out << "numCoarseNodes= " << numCoarseNodes - << ", numAggregatedNodes= " << numAggregatedNodes << std::endl; - numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes; - - } // BuildAggregates() - - - template - void AggregationStructuredAlgorithm_kokkos:: - BuildGraph(const LWGraph_kokkos& graph, RCP& geoData, const LO dofsPerNode, - RCP& myGraph) const { - Monitor m(*this, "BuildGraphP"); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); + } + + geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], + coarseNodeCoarseLID); + + vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID; + procWinner_(nodeIdx, 0) = myRank_; + aggStat_(nodeIdx) = AGGREGATED; + ++lNumAggregatedNodes; + +} // fillAggregatesFunctor::operator() + +template +AggregationStructuredAlgorithm_kokkos:: + computeGraphDataConstantFunctor::computeGraphDataConstantFunctor( + RCP geoData, const LO NumGhostedNodes, + const LO dofsPerNode, constIntTupleView coarseRate, + constIntTupleView endRate, constLOTupleView lFineNodesPerDir, + non_const_row_map_type rowPtr, entries_type colIndex) + : geoData_(*geoData), numGhostedNodes_(NumGhostedNodes), + dofsPerNode_(dofsPerNode), coarseRate_(coarseRate), endRate_(endRate), + lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr), + colIndex_(colIndex) {} // computeGraphDataConstantFunctor() + +template +KOKKOS_INLINE_FUNCTION void +AggregationStructuredAlgorithm_kokkos:: + computeGraphDataConstantFunctor::operator()(const LO nodeIdx) const { + LO nodeFineTuple[3] = {0, 0, 0}; + LO nodeCoarseTuple[3] = {0, 0, 0}; + + // Compute ghosted tuple associated with fine LID + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + + // Compute coarse tuple associated with fine point + // then overwrite it with tuple associated with aggregate + LO rem, rate, coarseNodeCoarseLID; + for (int dim = 0; dim < 3; ++dim) { + nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); + rem = nodeFineTuple[dim] % coarseRate_(dim); + if (nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim))) { + rate = coarseRate_(dim); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + rate = endRate_(dim); } - - // Compute the number of coarse points needed to interpolate quantities to a fine point - int numInterpolationPoints = 0; - if(geoData->getInterpolationOrder() == 0) { - numInterpolationPoints = 1; - } else if(geoData->getInterpolationOrder() == 1) { - // Compute 2^numDimensions using bit logic to avoid round-off errors from std::pow() - numInterpolationPoints = 1 << geoData->getNumDimensions(); - } - *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl; - - const LO numLocalFineNodes = geoData->getNumLocalFineNodes(); - const LO numCoarseNodes = geoData->getNumCoarseNodes(); - const LO numNnzEntries = dofsPerNode*(numCoarseNodes + numInterpolationPoints - *(numLocalFineNodes - numCoarseNodes)); - - non_const_row_map_type rowPtr("Prolongator graph, rowPtr", dofsPerNode*(numLocalFineNodes + 1)); - entries_type colIndex("Prolongator graph, colIndices", numNnzEntries); - - *out << "Compute prolongatorGraph data" << std::endl; - if(geoData->getInterpolationOrder() == 0) { - computeGraphDataConstantFunctor computeGraphData(geoData, - numCoarseNodes, - dofsPerNode, - geoData->getCoarseningRates(), - geoData->getCoarseningEndRates(), - geoData->getLocalFineNodesPerDir(), - rowPtr, - colIndex); - Kokkos::parallel_for("Structured Aggregation: compute loca graph data", - Kokkos::RangePolicy(0, numLocalFineNodes), - computeGraphData); - } else if(geoData->getInterpolationOrder() == 1) { - // Note, lbv 2018-11-08: in the piece-wise linear case I am computing the rowPtr - // using a parallel scan, it might be possible to do something faster than that - // by including this calculation in computeGraphDataLinearFunctor but at the moment - // all the ideas I have include a bunch of if statements which I would like to avoid. - computeGraphRowPtrFunctor computeGraphRowPtr(geoData, - dofsPerNode, - numInterpolationPoints, - numLocalFineNodes, - geoData->getCoarseningRates(), - geoData->getLocalFineNodesPerDir(), - rowPtr); - Kokkos::parallel_scan("Structured Aggregation: compute rowPtr for prolongator graph", - Kokkos::RangePolicy(0, numLocalFineNodes + 1), - computeGraphRowPtr); - - computeGraphDataLinearFunctor computeGraphData(geoData, - geoData->getNumDimensions(), - numCoarseNodes, - dofsPerNode, - numInterpolationPoints, - geoData->getCoarseningRates(), - geoData->getCoarseningEndRates(), - geoData->getLocalFineNodesPerDir(), - geoData->getCoarseNodesPerDir(), - rowPtr, - colIndex); - Kokkos::parallel_for("Structured Aggregation: compute loca graph data", - Kokkos::RangePolicy(0, numLocalFineNodes), - computeGraphData); - } - - local_graph_type myLocalGraph(colIndex, rowPtr); - - // Compute graph's colMap and domainMap - RCP colMap, domainMap; - *out << "Compute domain and column maps of the CrsGraph" << std::endl; - colMap = MapFactory::Build(graph.GetDomainMap()->lib(), - Teuchos::OrdinalTraits::invalid(), - numCoarseNodes, - graph.GetDomainMap()->getIndexBase(), - graph.GetDomainMap()->getComm()); - domainMap = colMap; - - myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap, - colMap, graph.GetDomainMap()); - - } // BuildGraph() - - - template - AggregationStructuredAlgorithm_kokkos:: - fillAggregatesFunctor::fillAggregatesFunctor(RCP geoData, - const int myRank, - Kokkos::View aggStat, - LOVectorView vertex2AggID, - LOVectorView procWinner) : - geoData_(*geoData), myRank_(myRank), aggStat_(aggStat), - vertex2AggID_(vertex2AggID), procWinner_(procWinner) {} - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - fillAggregatesFunctor::operator() (const LO nodeIdx, LO& lNumAggregatedNodes) const { - // Compute coarse ID associated with fine LID - LO rem, rate; - LO coarseNodeCoarseLID; - LO nodeFineTuple[3], coarseIdx[3]; - auto coarseRate = geoData_.getCoarseningRates(); - auto endRate = geoData_.getCoarseningEndRates(); - auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir(); - // Compute coarse ID associated with fine LID - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - - for(int dim = 0; dim < 3; ++dim) { - coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim); - rem = nodeFineTuple[dim] % coarseRate(dim); - rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim); - if(rem > (rate / 2)) {++coarseIdx[dim];} + if (rem > (rate / 2)) { + ++nodeCoarseTuple[dim]; } - - geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2], - coarseNodeCoarseLID); - - vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID; - procWinner_(nodeIdx, 0) = myRank_; - aggStat_(nodeIdx) = AGGREGATED; - ++lNumAggregatedNodes; - - } // fillAggregatesFunctor::operator() - - template - AggregationStructuredAlgorithm_kokkos:: - computeGraphDataConstantFunctor:: - computeGraphDataConstantFunctor(RCP geoData, - const LO NumGhostedNodes, - const LO dofsPerNode, - constIntTupleView coarseRate, - constIntTupleView endRate, - constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr, - entries_type colIndex) : geoData_(*geoData), - numGhostedNodes_(NumGhostedNodes), dofsPerNode_(dofsPerNode), - coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir), - rowPtr_(rowPtr), colIndex_(colIndex) { - - } // computeGraphDataConstantFunctor() - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - computeGraphDataConstantFunctor::operator() (const LO nodeIdx) const { - LO nodeFineTuple[3] = {0, 0, 0}; - LO nodeCoarseTuple[3] = {0, 0, 0}; - - // Compute ghosted tuple associated with fine LID + } + + // get LID associted with aggregate + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], + nodeCoarseTuple[2], coarseNodeCoarseLID); + + // store data into CrsGraph taking care of multiple dofs case + for (LO dof = 0; dof < dofsPerNode_; ++dof) { + rowPtr_(nodeIdx * dofsPerNode_ + dof + 1) = + nodeIdx * dofsPerNode_ + dof + 1; + colIndex_(nodeIdx * dofsPerNode_ + dof) = + coarseNodeCoarseLID * dofsPerNode_ + dof; + } + +} // computeGraphDataConstantFunctor::operator() + +template +AggregationStructuredAlgorithm_kokkos:: + computeGraphRowPtrFunctor::computeGraphRowPtrFunctor( + RCP geoData, const LO dofsPerNode, + const int numInterpolationPoints, const LO numLocalRows, + constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, + non_const_row_map_type rowPtr) + : geoData_(*geoData), dofsPerNode_(dofsPerNode), + numInterpolationPoints_(numInterpolationPoints), + numLocalRows_(numLocalRows), coarseRate_(coarseRate), + lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr) {} + +template +KOKKOS_INLINE_FUNCTION void +AggregationStructuredAlgorithm_kokkos:: + computeGraphRowPtrFunctor::operator()(const LO rowIdx, GO &update, + const bool final) const { + if (final) { + // Kokkos uses a multipass algorithm to implement scan. + // Only update the array on the final pass. Updating the + // array before changing 'update' means that we do an + // exclusive scan. Update the array after for an inclusive + // scan. + rowPtr_(rowIdx) = update; + } + if (rowIdx < numLocalRows_) { + LO nodeIdx = rowIdx / dofsPerNode_; + bool allCoarse = true; + LO nodeFineTuple[3] = {0, 0, 0}; geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + for (int dim = 0; dim < 3; ++dim) { + const LO rem = nodeFineTuple[dim] % coarseRate_(dim); - // Compute coarse tuple associated with fine point - // then overwrite it with tuple associated with aggregate - LO rem, rate, coarseNodeCoarseLID; - for(int dim = 0; dim < 3; ++dim) { - nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); - rem = nodeFineTuple[dim] % coarseRate_(dim); - if( nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim)) ) { - rate = coarseRate_(dim); - } else { - rate = endRate_(dim); - } - if(rem > (rate / 2)) {++nodeCoarseTuple[dim];} + // Check if Fine node lies on Coarse Node + allCoarse = + (allCoarse && + ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1))); } - - // get LID associted with aggregate - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], - coarseNodeCoarseLID); - - // store data into CrsGraph taking care of multiple dofs case - for(LO dof = 0; dof < dofsPerNode_; ++dof) { - rowPtr_(nodeIdx*dofsPerNode_ + dof + 1) = nodeIdx*dofsPerNode_ + dof + 1; - colIndex_(nodeIdx*dofsPerNode_ + dof) = coarseNodeCoarseLID*dofsPerNode_ + dof; + update += (allCoarse ? 1 : numInterpolationPoints_); + } +} // computeGraphRowPtrFunctor::operator() + +template +AggregationStructuredAlgorithm_kokkos:: + computeGraphDataLinearFunctor::computeGraphDataLinearFunctor( + RCP geoData, const int numDimensions, + const LO numGhostedNodes, const LO dofsPerNode, + const int numInterpolationPoints, constIntTupleView coarseRate, + constIntTupleView endRate, constLOTupleView lFineNodesPerDir, + constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, + entries_type colIndex) + : geoData_(*geoData), numDimensions_(numDimensions), + numGhostedNodes_(numGhostedNodes), dofsPerNode_(dofsPerNode), + numInterpolationPoints_(numInterpolationPoints), coarseRate_(coarseRate), + endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir), + ghostedNodesPerDir_(ghostedNodesPerDir), rowPtr_(rowPtr), + colIndex_(colIndex) {} // computeGraphDataLinearFunctor() + +template +KOKKOS_INLINE_FUNCTION void +AggregationStructuredAlgorithm_kokkos:: + computeGraphDataLinearFunctor::operator()(const LO nodeIdx) const { + LO nodeFineTuple[3] = {0, 0, 0}; + LO nodeCoarseTuple[3] = {0, 0, 0}; + + // Compute coarse ID associated with fine LID + geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); + + LO coarseNodeCoarseLID; + bool allCoarse = false; + for (int dim = 0; dim < 3; ++dim) { + nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); + } + if (rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) { + allCoarse = true; + } + + geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], + nodeCoarseTuple[2], coarseNodeCoarseLID); + + if (allCoarse) { + // Fine node lies on Coarse node, easy case, we only need the LID of the + // coarse node. + for (LO dof = 0; dof < dofsPerNode_; ++dof) { + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof)) = + coarseNodeCoarseLID * dofsPerNode_ + dof; } + } else { - } // computeGraphDataConstantFunctor::operator() - - template - AggregationStructuredAlgorithm_kokkos:: - computeGraphRowPtrFunctor::computeGraphRowPtrFunctor(RCP geoData, - const LO dofsPerNode, - const int numInterpolationPoints, - const LO numLocalRows, - constIntTupleView coarseRate, - constLOTupleView lFineNodesPerDir, - non_const_row_map_type rowPtr) : - geoData_(*geoData), dofsPerNode_(dofsPerNode), - numInterpolationPoints_(numInterpolationPoints), numLocalRows_(numLocalRows), - coarseRate_(coarseRate), lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr) {} - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - computeGraphRowPtrFunctor::operator() (const LO rowIdx, GO& update, const bool final) const { - if (final) { - // Kokkos uses a multipass algorithm to implement scan. - // Only update the array on the final pass. Updating the - // array before changing 'update' means that we do an - // exclusive scan. Update the array after for an inclusive - // scan. - rowPtr_(rowIdx) = update; - } - if (rowIdx < numLocalRows_) { - LO nodeIdx = rowIdx / dofsPerNode_; - bool allCoarse = true; - LO nodeFineTuple[3] = {0, 0, 0}; - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - for(int dim = 0; dim < 3; ++dim) { - const LO rem = nodeFineTuple[dim] % coarseRate_(dim); - - // Check if Fine node lies on Coarse Node - allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1))); + for (int dim = 0; dim < numDimensions_; ++dim) { + if (nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { + --nodeCoarseTuple[dim]; } - update += (allCoarse ? 1 : numInterpolationPoints_); } - } // computeGraphRowPtrFunctor::operator() - - template - AggregationStructuredAlgorithm_kokkos:: - computeGraphDataLinearFunctor::computeGraphDataLinearFunctor(RCP geoData, - const int numDimensions, - const LO numGhostedNodes, - const LO dofsPerNode, - const int numInterpolationPoints, - constIntTupleView coarseRate, - constIntTupleView endRate, - constLOTupleView lFineNodesPerDir, - constLOTupleView ghostedNodesPerDir, - non_const_row_map_type rowPtr, - entries_type colIndex) : - geoData_(*geoData), numDimensions_(numDimensions), - numGhostedNodes_(numGhostedNodes), - dofsPerNode_(dofsPerNode), numInterpolationPoints_(numInterpolationPoints), - coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir), - ghostedNodesPerDir_(ghostedNodesPerDir), rowPtr_(rowPtr), colIndex_(colIndex) { - - } // computeGraphDataLinearFunctor() - - template - KOKKOS_INLINE_FUNCTION - void AggregationStructuredAlgorithm_kokkos:: - computeGraphDataLinearFunctor::operator() (const LO nodeIdx) const { - LO nodeFineTuple[3] = {0, 0, 0}; - LO nodeCoarseTuple[3] = {0, 0, 0}; - - // Compute coarse ID associated with fine LID - geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple); - - LO coarseNodeCoarseLID; - bool allCoarse = false; - for(int dim = 0; dim < 3; ++dim) { - nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim); - } - if(rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) {allCoarse = true;} - - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], - coarseNodeCoarseLID); - - if(allCoarse) { - // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node. - for(LO dof = 0; dof < dofsPerNode_; ++dof) { - colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)) = coarseNodeCoarseLID*dofsPerNode_ + dof; - } - } else { - - for(int dim = 0; dim < numDimensions_; ++dim) { - if(nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { --nodeCoarseTuple[dim]; } - } - // Compute Coarse Node LID - // Note lbv 10-06-2018: it is likely benefitial to remove the two if statments and somehow - // find out the number of dimensions before calling the opertor() of the functor. - for(LO dof = 0; dof < dofsPerNode_; ++dof) { - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+0)); - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+1)); - if(numDimensions_ > 1) { - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+2)); - geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+3)); - if(numDimensions_ > 2) { - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+4)); - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+5)); - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+6)); - geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+7)); - } + // Compute Coarse Node LID + // Note lbv 10-06-2018: it is likely benefitial to remove the two if + // statments and somehow find out the number of dimensions before calling + // the opertor() of the functor. + for (LO dof = 0; dof < dofsPerNode_; ++dof) { + geoData_.getCoarseTuple2CoarseLID( + nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 0)); + geoData_.getCoarseTuple2CoarseLID( + nodeCoarseTuple[0] + 1, nodeCoarseTuple[1], nodeCoarseTuple[2], + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 1)); + if (numDimensions_ > 1) { + geoData_.getCoarseTuple2CoarseLID( + nodeCoarseTuple[0], nodeCoarseTuple[1] + 1, nodeCoarseTuple[2], + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 2)); + geoData_.getCoarseTuple2CoarseLID( + nodeCoarseTuple[0] + 1, nodeCoarseTuple[1] + 1, nodeCoarseTuple[2], + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 3)); + if (numDimensions_ > 2) { + geoData_.getCoarseTuple2CoarseLID( + nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2] + 1, + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 4)); + geoData_.getCoarseTuple2CoarseLID( + nodeCoarseTuple[0] + 1, nodeCoarseTuple[1], + nodeCoarseTuple[2] + 1, + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 5)); + geoData_.getCoarseTuple2CoarseLID( + nodeCoarseTuple[0], nodeCoarseTuple[1] + 1, + nodeCoarseTuple[2] + 1, + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 6)); + geoData_.getCoarseTuple2CoarseLID( + nodeCoarseTuple[0] + 1, nodeCoarseTuple[1] + 1, + nodeCoarseTuple[2] + 1, + colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 7)); } } } - } // computeGraphDataLinearFunctor::operator() - -} // end namespace + } +} // computeGraphDataLinearFunctor::operator() +} // namespace MueLu #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp index 28d26813f5d6..ece66e115654 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_decl.hpp @@ -52,8 +52,8 @@ #include -#include "MueLu_ConfigDefs.hpp" #include "MueLu_BaseClass.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_IndexManager_fwd.hpp" /***************************************************************************** @@ -75,178 +75,226 @@ namespace MueLu { and local lexicographic mesh orderings are supported. */ - template - class IndexManager : public BaseClass { +template +class IndexManager : public BaseClass { #undef MUELU_INDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - private: - - protected: - - const RCP > comm_; ///< Communicator used by uncoupled aggregation - const bool coupled_; ///< Flag for coupled vs uncoupled aggregation mode, if true aggregation is coupled. - const bool singleCoarsePoint_; ///< Flag telling us if can reduce dimensions to a single layer. - const int numDimensions; ///< Number of spacial dimensions in the problem - const int interpolationOrder_; ///< Interpolation order used by grid transfer operators using these aggregates. - - Array coarseRate; ///< coarsening rate in each direction - Array endRate; ///< adapted coarsening rate at the edge of the mesh in each direction. - - GO gNumFineNodes; ///< global number of nodes. - GO gNumFineNodes10; ///< global number of nodes per 0-1 slice. - const Array gFineNodesPerDir; ///< global number of nodes per direction. - - LO lNumFineNodes; ///< local number of nodes. - LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. - const Array lFineNodesPerDir; ///< local number of nodes per direction. +private: +protected: + const RCP> + comm_; ///< Communicator used by uncoupled aggregation + const bool coupled_; ///< Flag for coupled vs uncoupled aggregation mode, if + ///< true aggregation is coupled. + const bool singleCoarsePoint_; ///< Flag telling us if can reduce dimensions + ///< to a single layer. + const int numDimensions; ///< Number of spacial dimensions in the problem + const int interpolationOrder_; ///< Interpolation order used by grid transfer + ///< operators using these aggregates. + + Array coarseRate; ///< coarsening rate in each direction + Array endRate; ///< adapted coarsening rate at the edge of the mesh in + ///< each direction. - GO gNumCoarseNodes; ///< global number of nodes remaining after coarsening. - GO gNumCoarseNodes10; ///< global number of nodes per 0-1 slice remaining after coarsening. - Array gCoarseNodesPerDir; ///< global number of nodes per direction remaining after coarsening. + GO gNumFineNodes; ///< global number of nodes. + GO gNumFineNodes10; ///< global number of nodes per 0-1 slice. + const Array gFineNodesPerDir; ///< global number of nodes per direction. - LO lNumCoarseNodes; ///< local number of nodes remaining after coarsening. - LO lNumCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after coarsening. - Array lCoarseNodesPerDir; ///< local number of nodes per direction remaing after coarsening. + LO lNumFineNodes; ///< local number of nodes. + LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. + const Array lFineNodesPerDir; ///< local number of nodes per direction. - LO numGhostNodes; ///< local number of ghost nodes - LO numGhostedNodes; ///< local number of ghosted nodes (i.e. ghost + coarse nodes). - LO numGhostedNodes10; ///< local number of ghosted nodes (i.e. ghost + coarse nodes) per 0-1 slice. - Array ghostedNodesPerDir; ///< local number of ghosted nodes (i.e. ghost + coarse nodes) per direction + GO gNumCoarseNodes; ///< global number of nodes remaining after coarsening. + GO gNumCoarseNodes10; ///< global number of nodes per 0-1 slice remaining + ///< after coarsening. + Array gCoarseNodesPerDir; ///< global number of nodes per direction + ///< remaining after coarsening. - GO minGlobalIndex; ///< lowest GID of any node in the local process - Array offsets; ///< distance between lowest (resp. highest) index to the lowest (resp. highest) ghostedNodeIndex in that direction. - Array coarseNodeOffsets; ///< distance between lowest (resp. highest) index to the lowest (resp. highest) coarseNodeIndex in that direction. - Array startIndices; ///< lowest global tuple (i,j,k) of a node on the local process - Array startGhostedCoarseNode; ///< lowest coarse global tuple (i,j,k) of a node remaing on the local process after coarsening. + LO lNumCoarseNodes; ///< local number of nodes remaining after coarsening. + LO lNumCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after + ///< coarsening. + Array lCoarseNodesPerDir; ///< local number of nodes per direction remaing + ///< after coarsening. - bool meshEdge[6] = {false}; ///< flags indicating if we run into the edge of the mesh in ilo, ihi, jlo, jhi, klo or khi. - bool ghostInterface[6] = {false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, jhi, klo and khi boundaries. - bool ghostedDir[6] = {false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, jhi, klo and khi boundaries. + LO numGhostNodes; ///< local number of ghost nodes + LO numGhostedNodes; ///< local number of ghosted nodes (i.e. ghost + coarse + ///< nodes). + LO numGhostedNodes10; ///< local number of ghosted nodes (i.e. ghost + coarse + ///< nodes) per 0-1 slice. + Array ghostedNodesPerDir; ///< local number of ghosted nodes (i.e. ghost + + ///< coarse nodes) per direction - public: + GO minGlobalIndex; ///< lowest GID of any node in the local process + Array + offsets; ///< distance between lowest (resp. highest) index to the lowest + ///< (resp. highest) ghostedNodeIndex in that direction. + Array coarseNodeOffsets; ///< distance between lowest (resp. highest) + ///< index to the lowest (resp. highest) + ///< coarseNodeIndex in that direction. + Array startIndices; ///< lowest global tuple (i,j,k) of a node on the + ///< local process + Array startGhostedCoarseNode; ///< lowest coarse global tuple (i,j,k) of a + ///< node remaing on the local process after + ///< coarsening. - IndexManager() = default; + bool meshEdge[6] = {false}; ///< flags indicating if we run into the edge of + ///< the mesh in ilo, ihi, jlo, jhi, klo or khi. + bool ghostInterface[6] = { + false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, + ///< jhi, klo and khi boundaries. + bool ghostedDir[6] = { + false}; ///< flags indicating if ghost points are needed at ilo, ihi, jlo, + ///< jhi, klo and khi boundaries. - IndexManager(const RCP > comm, const bool coupled, - const bool singleCoarsePoint, const int NumDimensions, - const int interpolationOrder, const Array GFineNodesPerDir, - const Array LFineNodesPerDir); +public: + IndexManager() = default; - virtual ~IndexManager() {} + IndexManager(const RCP> comm, const bool coupled, + const bool singleCoarsePoint, const int NumDimensions, + const int interpolationOrder, const Array GFineNodesPerDir, + const Array LFineNodesPerDir); - //! Sets basic parameters used to compute indices on the mesh. - //! This method requires you to have set this->coarseRate and this->startIndices. - void computeMeshParameters(); + virtual ~IndexManager() {} - virtual void computeGlobalCoarseParameters() = 0; + //! Sets basic parameters used to compute indices on the mesh. + //! This method requires you to have set this->coarseRate and + //! this->startIndices. + void computeMeshParameters(); - virtual void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const = 0; + virtual void computeGlobalCoarseParameters() = 0; - virtual void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const = 0; + virtual void getGhostedNodesData(const RCP fineMap, + Array &ghostedNodeCoarseLIDs, + Array &ghostedNodeCoarsePIDs, + Array &ghostedNodeCoarseGIDs) const = 0; - bool isAggregationCoupled() const {return coupled_;} + virtual void getCoarseNodesData(const RCP fineCoordinatesMap, + Array &coarseNodeCoarseGIDs, + Array &coarseNodeFineGIDs) const = 0; - bool isSingleCoarsePoint() const {return singleCoarsePoint_;} + bool isAggregationCoupled() const { return coupled_; } - int getNumDimensions() const {return numDimensions;} + bool isSingleCoarsePoint() const { return singleCoarsePoint_; } - int getInterpolationOrder() const {return interpolationOrder_;} + int getNumDimensions() const { return numDimensions; } - GO getNumGlobalFineNodes() const {return gNumFineNodes;} + int getInterpolationOrder() const { return interpolationOrder_; } - GO getNumGlobalCoarseNodes() const {return gNumCoarseNodes;} + GO getNumGlobalFineNodes() const { return gNumFineNodes; } - LO getNumLocalFineNodes() const {return lNumFineNodes;} + GO getNumGlobalCoarseNodes() const { return gNumCoarseNodes; } - LO getNumLocalCoarseNodes() const {return lNumCoarseNodes;} + LO getNumLocalFineNodes() const { return lNumFineNodes; } - LO getNumLocalGhostedNodes() const {return numGhostedNodes;} + LO getNumLocalCoarseNodes() const { return lNumCoarseNodes; } - Array getCoarseningRates() const {return coarseRate;} + LO getNumLocalGhostedNodes() const { return numGhostedNodes; } - int getCoarseningRate(const int dim) const {return coarseRate[dim];} + Array getCoarseningRates() const { return coarseRate; } - Array getCoarseningEndRates() const {return endRate;} + int getCoarseningRate(const int dim) const { return coarseRate[dim]; } - int getCoarseningEndRate(const int dim) const {return endRate[dim];} + Array getCoarseningEndRates() const { return endRate; } - bool getMeshEdge(const int dir) const {return meshEdge[dir];} + int getCoarseningEndRate(const int dim) const { return endRate[dim]; } - bool getGhostInterface(const int dir) const {return ghostInterface[dir];} + bool getMeshEdge(const int dir) const { return meshEdge[dir]; } - Array getOffsets() const {return offsets;} + bool getGhostInterface(const int dir) const { return ghostInterface[dir]; } - LO getOffset(int const dim) const {return offsets[dim];} + Array getOffsets() const { return offsets; } - Array getCoarseNodeOffsets() const {return coarseNodeOffsets;} + LO getOffset(int const dim) const { return offsets[dim]; } - LO getCoarseNodeOffset(int const dim) const {return coarseNodeOffsets[dim];} + Array getCoarseNodeOffsets() const { return coarseNodeOffsets; } - Array getStartIndices() const {return startIndices;} + LO getCoarseNodeOffset(int const dim) const { return coarseNodeOffsets[dim]; } - GO getStartIndex(int const dim) const {return startIndices[dim];} + Array getStartIndices() const { return startIndices; } - Array getStartGhostedCoarseNodes() const {return startGhostedCoarseNode;} + GO getStartIndex(int const dim) const { return startIndices[dim]; } - GO getStartGhostedCoarseNode(int const dim) const {return startGhostedCoarseNode[dim];} + Array getStartGhostedCoarseNodes() const { + return startGhostedCoarseNode; + } - Array getLocalFineNodesPerDir() const {return lFineNodesPerDir;} + GO getStartGhostedCoarseNode(int const dim) const { + return startGhostedCoarseNode[dim]; + } - LO getLocalFineNodesInDir(const int dim) const {return lFineNodesPerDir[dim];} + Array getLocalFineNodesPerDir() const { return lFineNodesPerDir; } - Array getGlobalFineNodesPerDir() const {return gFineNodesPerDir;} + LO getLocalFineNodesInDir(const int dim) const { + return lFineNodesPerDir[dim]; + } - GO getGlobalFineNodesInDir(const int dim) const {return gFineNodesPerDir[dim];} + Array getGlobalFineNodesPerDir() const { return gFineNodesPerDir; } - Array getLocalCoarseNodesPerDir() const {return lCoarseNodesPerDir;} + GO getGlobalFineNodesInDir(const int dim) const { + return gFineNodesPerDir[dim]; + } - LO getLocalCoarseNodesInDir(const int dim) const {return lCoarseNodesPerDir[dim];} + Array getLocalCoarseNodesPerDir() const { return lCoarseNodesPerDir; } - Array getGlobalCoarseNodesPerDir() const {return gCoarseNodesPerDir;} + LO getLocalCoarseNodesInDir(const int dim) const { + return lCoarseNodesPerDir[dim]; + } - GO getGlobalCoarseNodesInDir(const int dim) const {return gCoarseNodesPerDir[dim];} + Array getGlobalCoarseNodesPerDir() const { return gCoarseNodesPerDir; } - Array getGhostedNodesPerDir() const {return ghostedNodesPerDir;} + GO getGlobalCoarseNodesInDir(const int dim) const { + return gCoarseNodesPerDir[dim]; + } - LO getGhostedNodesInDir(const int dim) const {return ghostedNodesPerDir[dim];} + Array getGhostedNodesPerDir() const { return ghostedNodesPerDir; } - virtual std::vector > getCoarseMeshData() const = 0; + LO getGhostedNodesInDir(const int dim) const { + return ghostedNodesPerDir[dim]; + } - virtual void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const = 0; + virtual std::vector> getCoarseMeshData() const = 0; - virtual void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; + virtual void getFineNodeGlobalTuple(const GO myGID, GO &i, GO &j, + GO &k) const = 0; - virtual void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; + virtual void getFineNodeLocalTuple(const LO myLID, LO &i, LO &j, + LO &k) const = 0; - virtual void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const = 0; + virtual void getFineNodeGhostedTuple(const LO myLID, LO &i, LO &j, + LO &k) const = 0; - virtual void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getFineNodeGID(const GO i, const GO j, const GO k, + GO &myGID) const = 0; - virtual void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const = 0; + virtual void getFineNodeLID(const LO i, const LO j, const LO k, + LO &myLID) const = 0; - virtual void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const = 0; + virtual void getCoarseNodeGlobalTuple(const GO myGID, GO &i, GO &j, + GO &k) const = 0; - virtual void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const = 0; + virtual void getCoarseNodeLocalTuple(const LO myLID, LO &i, LO &j, + LO &k) const = 0; - virtual void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getCoarseNodeGID(const GO i, const GO j, const GO k, + GO &myGID) const = 0; - virtual void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getCoarseNodeLID(const LO i, const LO j, const LO k, + LO &myLID) const = 0; - virtual void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, + LO &myLID) const = 0; - virtual void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getCoarseNodeFineLID(const LO i, const LO j, const LO k, + LO &myLID) const = 0; - virtual void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const = 0; + virtual void getGhostedNodeFineLID(const LO i, const LO j, const LO k, + LO &myLID) const = 0; - }; + virtual void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, + LO &myLID) const = 0; +}; -} //namespace MueLu +} // namespace MueLu #define MUELU_INDEXMANAGER_SHORT #endif // MUELU_INDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp index 894e26393906..98b16d4b4dfd 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_def.hpp @@ -57,220 +57,239 @@ namespace MueLu { - template - IndexManager:: - IndexManager(const RCP > comm, - const bool coupled, - const bool singleCoarsePoint, - const int NumDimensions, - const int interpolationOrder, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir) : - comm_(comm), coupled_(coupled), singleCoarsePoint_(singleCoarsePoint), - numDimensions(NumDimensions), interpolationOrder_(interpolationOrder), - gFineNodesPerDir(GFineNodesPerDir), lFineNodesPerDir(LFineNodesPerDir) { +template +IndexManager::IndexManager( + const RCP> comm, const bool coupled, + const bool singleCoarsePoint, const int NumDimensions, + const int interpolationOrder, const Array GFineNodesPerDir, + const Array LFineNodesPerDir) + : comm_(comm), coupled_(coupled), singleCoarsePoint_(singleCoarsePoint), + numDimensions(NumDimensions), interpolationOrder_(interpolationOrder), + gFineNodesPerDir(GFineNodesPerDir), lFineNodesPerDir(LFineNodesPerDir) { - coarseRate.resize(3); - endRate.resize(3); - gCoarseNodesPerDir.resize(3); - lCoarseNodesPerDir.resize(3); - ghostedNodesPerDir.resize(3); + coarseRate.resize(3); + endRate.resize(3); + gCoarseNodesPerDir.resize(3); + lCoarseNodesPerDir.resize(3); + ghostedNodesPerDir.resize(3); - offsets.resize(3); - coarseNodeOffsets.resize(3); - startIndices.resize(6); - startGhostedCoarseNode.resize(3); + offsets.resize(3); + coarseNodeOffsets.resize(3); + startIndices.resize(6); + startGhostedCoarseNode.resize(3); - } // Constructor +} // Constructor - template - void IndexManager:: - computeMeshParameters() { +template +void IndexManager::computeMeshParameters() { - RCP out; - if(const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } + RCP out; + if (const char *dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - if(coupled_) { - gNumFineNodes10 = gFineNodesPerDir[1]*gFineNodesPerDir[0]; - gNumFineNodes = gFineNodesPerDir[2]*gNumFineNodes10; - } else { - gNumFineNodes10 = Teuchos::OrdinalTraits::invalid(); - gNumFineNodes = Teuchos::OrdinalTraits::invalid(); - } - lNumFineNodes10 = lFineNodesPerDir[1]*lFineNodesPerDir[0]; - lNumFineNodes = lFineNodesPerDir[2]*lNumFineNodes10; - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - if(coupled_) { - if(startIndices[dim] == 0) { - meshEdge[2*dim] = true; - } - if(startIndices[dim + 3] + 1 == gFineNodesPerDir[dim]) { - meshEdge[2*dim + 1] = true; - endRate[dim] = startIndices[dim + 3] % coarseRate[dim]; - } - } else { // With uncoupled problem each rank might require a different endRate - meshEdge[2*dim] = true; - meshEdge[2*dim + 1] = true; - endRate[dim] = (lFineNodesPerDir[dim] - 1) % coarseRate[dim]; + if (coupled_) { + gNumFineNodes10 = gFineNodesPerDir[1] * gFineNodesPerDir[0]; + gNumFineNodes = gFineNodesPerDir[2] * gNumFineNodes10; + } else { + gNumFineNodes10 = Teuchos::OrdinalTraits::invalid(); + gNumFineNodes = Teuchos::OrdinalTraits::invalid(); + } + lNumFineNodes10 = lFineNodesPerDir[1] * lFineNodesPerDir[0]; + lNumFineNodes = lFineNodesPerDir[2] * lNumFineNodes10; + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + if (coupled_) { + if (startIndices[dim] == 0) { + meshEdge[2 * dim] = true; + } + if (startIndices[dim + 3] + 1 == gFineNodesPerDir[dim]) { + meshEdge[2 * dim + 1] = true; + endRate[dim] = startIndices[dim + 3] % coarseRate[dim]; } - if(endRate[dim] == 0) {endRate[dim] = coarseRate[dim];} + } else { // With uncoupled problem each rank might require a different + // endRate + meshEdge[2 * dim] = true; + meshEdge[2 * dim + 1] = true; + endRate[dim] = (lFineNodesPerDir[dim] - 1) % coarseRate[dim]; + } + if (endRate[dim] == 0) { + endRate[dim] = coarseRate[dim]; + } - // If uncoupled aggregation is used, offsets[dim] = 0, so nothing to do. - if(coupled_) { - offsets[dim] = Teuchos::as(startIndices[dim]) % coarseRate[dim]; - if(offsets[dim] == 0) { - coarseNodeOffsets[dim] = 0; - } else if(startIndices[dim] + endRate[dim] == lFineNodesPerDir[dim]) { - coarseNodeOffsets[dim] = endRate[dim] - offsets[dim]; - } else { - coarseNodeOffsets[dim] = coarseRate[dim] - offsets[dim]; - } + // If uncoupled aggregation is used, offsets[dim] = 0, so nothing to do. + if (coupled_) { + offsets[dim] = Teuchos::as(startIndices[dim]) % coarseRate[dim]; + if (offsets[dim] == 0) { + coarseNodeOffsets[dim] = 0; + } else if (startIndices[dim] + endRate[dim] == lFineNodesPerDir[dim]) { + coarseNodeOffsets[dim] = endRate[dim] - offsets[dim]; + } else { + coarseNodeOffsets[dim] = coarseRate[dim] - offsets[dim]; + } - if(interpolationOrder_ == 0) { - int rem = startIndices[dim] % coarseRate[dim]; - if( (rem != 0) && (rem <= Teuchos::as(coarseRate[dim]) / 2.0)) { - ghostInterface[2*dim] = true; - } - rem = startIndices[dim + 3] % coarseRate[dim]; - // uncoupled by nature does not require ghosts nodes - if(coupled_ && (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && - (rem > Teuchos::as(coarseRate[dim]) / 2.0)) { - ghostInterface[2*dim + 1] = true; - } + if (interpolationOrder_ == 0) { + int rem = startIndices[dim] % coarseRate[dim]; + if ((rem != 0) && + (rem <= Teuchos::as(coarseRate[dim]) / 2.0)) { + ghostInterface[2 * dim] = true; + } + rem = startIndices[dim + 3] % coarseRate[dim]; + // uncoupled by nature does not require ghosts nodes + if (coupled_ && + (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && + (rem > Teuchos::as(coarseRate[dim]) / 2.0)) { + ghostInterface[2 * dim + 1] = true; + } - } else if(interpolationOrder_ == 1) { - if(coupled_ && (startIndices[dim] % coarseRate[dim] != 0 || - startIndices[dim] == gFineNodesPerDir[dim]-1)) { - ghostInterface[2*dim] = true; - } - if(coupled_ && (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && - ((lFineNodesPerDir[dim] == 1) || (startIndices[dim + 3] % coarseRate[dim] != 0))) { - ghostInterface[2*dim+1] = true; - } + } else if (interpolationOrder_ == 1) { + if (coupled_ && (startIndices[dim] % coarseRate[dim] != 0 || + startIndices[dim] == gFineNodesPerDir[dim] - 1)) { + ghostInterface[2 * dim] = true; + } + if (coupled_ && + (startIndices[dim + 3] != gFineNodesPerDir[dim] - 1) && + ((lFineNodesPerDir[dim] == 1) || + (startIndices[dim + 3] % coarseRate[dim] != 0))) { + ghostInterface[2 * dim + 1] = true; } } - } else { // Default value for dim >= numDimensions - endRate[dim] = 1; } + } else { // Default value for dim >= numDimensions + endRate[dim] = 1; } + } - *out << "singleCoarsePoint? " << singleCoarsePoint_ << std::endl; - *out << "gFineNodesPerDir: " << gFineNodesPerDir << std::endl; - *out << "lFineNodesPerDir: " << lFineNodesPerDir << std::endl; - *out << "endRate: " << endRate << std::endl; - *out << "ghostInterface: {" << ghostInterface[0] << ", " << ghostInterface[1] << ", " - << ghostInterface[2] << ", " << ghostInterface[3] << ", " << ghostInterface[4] << ", " - << ghostInterface[5] << "}" << std::endl; - *out << "meshEdge: {" << meshEdge[0] << ", " << meshEdge[1] << ", " - << meshEdge[2] << ", " << meshEdge[3] << ", " << meshEdge[4] << ", " - << meshEdge[5] << "}" << std::endl; - *out << "startIndices: " << startIndices << std::endl; - *out << "offsets: " << offsets << std::endl; - *out << "coarseNodeOffsets: " << coarseNodeOffsets << std::endl; + *out << "singleCoarsePoint? " << singleCoarsePoint_ << std::endl; + *out << "gFineNodesPerDir: " << gFineNodesPerDir << std::endl; + *out << "lFineNodesPerDir: " << lFineNodesPerDir << std::endl; + *out << "endRate: " << endRate << std::endl; + *out << "ghostInterface: {" << ghostInterface[0] << ", " << ghostInterface[1] + << ", " << ghostInterface[2] << ", " << ghostInterface[3] << ", " + << ghostInterface[4] << ", " << ghostInterface[5] << "}" << std::endl; + *out << "meshEdge: {" << meshEdge[0] << ", " << meshEdge[1] << ", " + << meshEdge[2] << ", " << meshEdge[3] << ", " << meshEdge[4] << ", " + << meshEdge[5] << "}" << std::endl; + *out << "startIndices: " << startIndices << std::endl; + *out << "offsets: " << offsets << std::endl; + *out << "coarseNodeOffsets: " << coarseNodeOffsets << std::endl; - // Here one element can represent either the degenerate case of one node or the more general - // case of two nodes, i.e. x---x is a 1D element with two nodes and x is a 1D element with - // one node. This helps generating a 3D space from tensorial products... - // A good way to handle this would be to generalize the algorithm to take into account the - // discretization order used in each direction, at least in the FEM sense, since a 0 degree - // discretization will have a unique node per element. This way 1D discretization can be - // viewed as a 3D problem with one 0 degree element in the y direction and one 0 degre - // element in the z direction. - // !!! Operations below are aftecting both local and global values that have two !!! - // different orientations. Orientations can be interchanged using mapDirG2L and mapDirL2G. - // coarseRate, endRate and offsets are in the global basis, as well as all the variables - // starting with a g. - // !!! while the variables starting with an l are in the local basis. !!! - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - // Check whether the partition includes the "end" of the mesh which means that endRate - // will apply. Also make sure that endRate is not 0 which means that the mesh does not - // require a particular treatment at the boundaries. - if( meshEdge[2*dim + 1] ) { - lCoarseNodesPerDir[dim] = (lFineNodesPerDir[dim] - endRate[dim] + offsets[dim] - 1) - / coarseRate[dim] + 1; - if(offsets[dim] == 0) {++lCoarseNodesPerDir[dim];} - // We might want to coarsening the direction - // into a single layer if there are not enough - // points left to form two aggregates - if(singleCoarsePoint_ && lFineNodesPerDir[dim] - 1 < coarseRate[dim]) { - lCoarseNodesPerDir[dim] =1; - } - } else { - lCoarseNodesPerDir[dim] = (lFineNodesPerDir[dim] + offsets[dim] - 1) / coarseRate[dim]; - if(offsets[dim] == 0) {++lCoarseNodesPerDir[dim];} + // Here one element can represent either the degenerate case of one node or + // the more general case of two nodes, i.e. x---x is a 1D element with two + // nodes and x is a 1D element with one node. This helps generating a 3D space + // from tensorial products... A good way to handle this would be to generalize + // the algorithm to take into account the discretization order used in each + // direction, at least in the FEM sense, since a 0 degree discretization will + // have a unique node per element. This way 1D discretization can be viewed as + // a 3D problem with one 0 degree element in the y direction and one 0 degre + // element in the z direction. + // !!! Operations below are aftecting both local and global values that have + // two !!! different orientations. Orientations can be interchanged + // using mapDirG2L and mapDirL2G. coarseRate, endRate and offsets are in the + // global basis, as well as all the variables starting with a g. + // !!! while the variables starting with an l are in the local basis. !!! + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + // Check whether the partition includes the "end" of the mesh which means + // that endRate will apply. Also make sure that endRate is not 0 which + // means that the mesh does not require a particular treatment at the + // boundaries. + if (meshEdge[2 * dim + 1]) { + lCoarseNodesPerDir[dim] = + (lFineNodesPerDir[dim] - endRate[dim] + offsets[dim] - 1) / + coarseRate[dim] + + 1; + if (offsets[dim] == 0) { + ++lCoarseNodesPerDir[dim]; } - - // The first branch of this if-statement will be used if the rank contains only one layer - // of nodes in direction i, that layer must also coincide with the boundary of the mesh - // and coarseRate[i] == endRate[i]... - if(interpolationOrder_ == 0) { - startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; - int rem = startIndices[dim] % coarseRate[dim]; - if(rem > (Teuchos::as(coarseRate[dim]) / 2.0) ) { - ++startGhostedCoarseNode[dim]; - } - } else { - if((startIndices[dim] == gFineNodesPerDir[dim] - 1) && - (startIndices[dim] % coarseRate[dim] == 0)) { - startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim] - 1; - } else { - startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; - } + // We might want to coarsening the direction + // into a single layer if there are not enough + // points left to form two aggregates + if (singleCoarsePoint_ && lFineNodesPerDir[dim] - 1 < coarseRate[dim]) { + lCoarseNodesPerDir[dim] = 1; + } + } else { + lCoarseNodesPerDir[dim] = + (lFineNodesPerDir[dim] + offsets[dim] - 1) / coarseRate[dim]; + if (offsets[dim] == 0) { + ++lCoarseNodesPerDir[dim]; } + } - // This array is passed to the RAPFactory and eventually becomes gFineNodePerDir on the next - // level. - gCoarseNodesPerDir[dim] = (gFineNodesPerDir[dim] - 1) / coarseRate[dim]; - if((gFineNodesPerDir[dim] - 1) % coarseRate[dim] == 0) { - ++gCoarseNodesPerDir[dim]; + // The first branch of this if-statement will be used if the rank contains + // only one layer of nodes in direction i, that layer must also coincide + // with the boundary of the mesh and coarseRate[i] == endRate[i]... + if (interpolationOrder_ == 0) { + startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; + int rem = startIndices[dim] % coarseRate[dim]; + if (rem > (Teuchos::as(coarseRate[dim]) / 2.0)) { + ++startGhostedCoarseNode[dim]; + } + } else { + if ((startIndices[dim] == gFineNodesPerDir[dim] - 1) && + (startIndices[dim] % coarseRate[dim] == 0)) { + startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim] - 1; } else { - gCoarseNodesPerDir[dim] += 2; + startGhostedCoarseNode[dim] = startIndices[dim] / coarseRate[dim]; } - } else { // Default value for dim >= numDimensions - // endRate[dim] = 1; - gCoarseNodesPerDir[dim] = 1; - lCoarseNodesPerDir[dim] = 1; - } // if (dim < numDimensions) - - // This would happen if the rank does not own any nodes but in that case a subcommunicator - // should be used so this should really not be a concern. - if(lFineNodesPerDir[dim] < 1) {lCoarseNodesPerDir[dim] = 0;} - ghostedNodesPerDir[dim] = lCoarseNodesPerDir[dim]; - // Check whether face *low needs ghost nodes - if(ghostInterface[2*dim]) {ghostedNodesPerDir[dim] += 1;} - // Check whether face *hi needs ghost nodes - if(ghostInterface[2*dim + 1]) {ghostedNodesPerDir[dim] += 1;} - } // Loop for dim=0:3 + } - // With uncoupled aggregation we need to communicate to compute the global number of coarse points - if(!coupled_) { - for(int dim = 0; dim < 3; ++dim) { - gCoarseNodesPerDir[dim] = -1; + // This array is passed to the RAPFactory and eventually becomes + // gFineNodePerDir on the next level. + gCoarseNodesPerDir[dim] = (gFineNodesPerDir[dim] - 1) / coarseRate[dim]; + if ((gFineNodesPerDir[dim] - 1) % coarseRate[dim] == 0) { + ++gCoarseNodesPerDir[dim]; + } else { + gCoarseNodesPerDir[dim] += 2; } - } + } else { // Default value for dim >= numDimensions + // endRate[dim] = 1; + gCoarseNodesPerDir[dim] = 1; + lCoarseNodesPerDir[dim] = 1; + } // if (dim < numDimensions) - // Compute cummulative values - lNumCoarseNodes10 = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1]; - lNumCoarseNodes = lNumCoarseNodes10*lCoarseNodesPerDir[2]; - numGhostedNodes10 = ghostedNodesPerDir[1]*ghostedNodesPerDir[0]; - numGhostedNodes = numGhostedNodes10*ghostedNodesPerDir[2]; - numGhostNodes = numGhostedNodes - lNumCoarseNodes; + // This would happen if the rank does not own any nodes but in that case a + // subcommunicator should be used so this should really not be a concern. + if (lFineNodesPerDir[dim] < 1) { + lCoarseNodesPerDir[dim] = 0; + } + ghostedNodesPerDir[dim] = lCoarseNodesPerDir[dim]; + // Check whether face *low needs ghost nodes + if (ghostInterface[2 * dim]) { + ghostedNodesPerDir[dim] += 1; + } + // Check whether face *hi needs ghost nodes + if (ghostInterface[2 * dim + 1]) { + ghostedNodesPerDir[dim] += 1; + } + } // Loop for dim=0:3 - *out << "lCoarseNodesPerDir: " << lCoarseNodesPerDir << std::endl; - *out << "gCoarseNodesPerDir: " << gCoarseNodesPerDir << std::endl; - *out << "ghostedNodesPerDir: " << ghostedNodesPerDir << std::endl; - *out << "lNumCoarseNodes=" << lNumCoarseNodes << std::endl; - *out << "numGhostedNodes=" << numGhostedNodes << std::endl; + // With uncoupled aggregation we need to communicate to compute the global + // number of coarse points + if (!coupled_) { + for (int dim = 0; dim < 3; ++dim) { + gCoarseNodesPerDir[dim] = -1; + } } -} //namespace MueLu + // Compute cummulative values + lNumCoarseNodes10 = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1]; + lNumCoarseNodes = lNumCoarseNodes10 * lCoarseNodesPerDir[2]; + numGhostedNodes10 = ghostedNodesPerDir[1] * ghostedNodesPerDir[0]; + numGhostedNodes = numGhostedNodes10 * ghostedNodesPerDir[2]; + numGhostNodes = numGhostedNodes - lNumCoarseNodes; + + *out << "lCoarseNodesPerDir: " << lCoarseNodesPerDir << std::endl; + *out << "gCoarseNodesPerDir: " << gCoarseNodesPerDir << std::endl; + *out << "ghostedNodesPerDir: " << ghostedNodesPerDir << std::endl; + *out << "lNumCoarseNodes=" << lNumCoarseNodes << std::endl; + *out << "numGhostedNodes=" << numGhostedNodes << std::endl; +} + +} // namespace MueLu #define MUELU_INDEXMANAGER_SHORT #endif // MUELU_INDEXMANAGER_DEF_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp index 113368ad6f50..63dfdaface29 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_decl.hpp @@ -53,7 +53,6 @@ #include "Teuchos_OrdinalTraits.hpp" - #include "MueLu_BaseClass.hpp" #include "MueLu_IndexManager_kokkos_fwd.hpp" @@ -74,112 +73,113 @@ namespace MueLu { spaces and it also provides utilites for coarsening. */ - template - class IndexManager_kokkos : public BaseClass { +template +class IndexManager_kokkos : public BaseClass { #undef MUELU_INDEXMANAGER_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using execution_space = typename Node::execution_space; - using memory_space = typename Node::memory_space; - using device_type = Kokkos::Device; - using intTupleView = typename Kokkos::View; - using LOTupleView = typename Kokkos::View; - - private: - - const int meshLayout = UNCOUPLED; - int myRank = -1; - int numDimensions; ///< Number of spacial dimensions in the problem - int interpolationOrder_; ///< Interpolation order used by grid transfer operators using these aggregates. - intTupleView coarseRate; ///< coarsening rate in each direction - intTupleView endRate; ///< adapted coarsening rate at the edge of the mesh in each direction. - - LO lNumFineNodes; ///< local number of nodes. - LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. - LOTupleView lFineNodesPerDir; ///< local number of nodes per direction. - - LO numCoarseNodes; ///< local number of nodes remaining after coarsening. - LO numCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after coarsening. - LOTupleView coarseNodesPerDir; ///< local number of nodes per direction remaing after coarsening. - - public: - - //! Default constructor, return empty object - IndexManager_kokkos() = default; - - //! Constructs for uncoupled meshes - IndexManager_kokkos(const int NumDimensions, - const int interpolationOrder, - const int MyRank, - const ArrayView LFineNodesPerDir, - const ArrayView CoarseRate); - - virtual ~IndexManager_kokkos() {} - - //! Common setup pattern used for all the different types of undelying mesh - void setupIM(const int NumDimensions, - const int interpolationOrder, - const ArrayView coarseRate, - const ArrayView LFineNodesPerDir); - - //! Sets basic parameters used to compute indices on the mesh. - //! This method requires you to have set this->coarseRate. - void computeMeshParameters(); - - int getNumDimensions() const {return numDimensions;} - - int getInterpolationOrder() const {return interpolationOrder_;} - - LO getNumLocalFineNodes() const {return lNumFineNodes;} - - LO getNumCoarseNodes() const {return numCoarseNodes;} - - KOKKOS_INLINE_FUNCTION - intTupleView getCoarseningRates() const {return coarseRate;} - - KOKKOS_INLINE_FUNCTION - intTupleView getCoarseningEndRates() const {return endRate;} - - KOKKOS_INLINE_FUNCTION - LOTupleView getLocalFineNodesPerDir() const {return lFineNodesPerDir;} - - KOKKOS_INLINE_FUNCTION - LOTupleView getCoarseNodesPerDir() const {return coarseNodesPerDir;} - - Array getCoarseNodesPerDirArray() const; - - KOKKOS_INLINE_FUNCTION - void getFineLID2FineTuple(const LO myLID, LO (&tuple)[3]) const { - LO tmp; - tuple[2] = myLID / (lFineNodesPerDir(1)*lFineNodesPerDir(0)); - tmp = myLID % (lFineNodesPerDir(1)*lFineNodesPerDir(0)); - tuple[1] = tmp / lFineNodesPerDir(0); - tuple[0] = tmp % lFineNodesPerDir(0); - } // getFineNodeLocalTuple - - KOKKOS_INLINE_FUNCTION - void getFineTuple2FineLID(const LO tuple[3], LO& myLID) const { - myLID = tuple[2]*lNumFineNodes10 + tuple[1]*lFineNodesPerDir[0] + tuple[0]; - } // getFineNodeLID - - KOKKOS_INLINE_FUNCTION - void getCoarseLID2CoarseTuple(const LO myLID, LO (&tuple)[3]) const { - LO tmp; - tuple[2] = myLID / numCoarseNodes10; - tmp = myLID % numCoarseNodes10; - tuple[1] = tmp / coarseNodesPerDir[0]; - tuple[0] = tmp % coarseNodesPerDir[0]; - } // getCoarseNodeLocalTuple - - KOKKOS_INLINE_FUNCTION - void getCoarseTuple2CoarseLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*numCoarseNodes10 + j*coarseNodesPerDir[0] + i; - } // getCoarseNodeLID - - }; - -} //namespace MueLu +public: + using execution_space = typename Node::execution_space; + using memory_space = typename Node::memory_space; + using device_type = Kokkos::Device; + using intTupleView = typename Kokkos::View; + using LOTupleView = typename Kokkos::View; + +private: + const int meshLayout = UNCOUPLED; + int myRank = -1; + int numDimensions; ///< Number of spacial dimensions in the problem + int interpolationOrder_; ///< Interpolation order used by grid transfer + ///< operators using these aggregates. + intTupleView coarseRate; ///< coarsening rate in each direction + intTupleView endRate; ///< adapted coarsening rate at the edge of the mesh in + ///< each direction. + + LO lNumFineNodes; ///< local number of nodes. + LO lNumFineNodes10; ///< local number of nodes per 0-1 slice. + LOTupleView lFineNodesPerDir; ///< local number of nodes per direction. + + LO numCoarseNodes; ///< local number of nodes remaining after coarsening. + LO numCoarseNodes10; ///< local number of nodes per 0-1 slice remaining after + ///< coarsening. + LOTupleView coarseNodesPerDir; ///< local number of nodes per direction + ///< remaing after coarsening. + +public: + //! Default constructor, return empty object + IndexManager_kokkos() = default; + + //! Constructs for uncoupled meshes + IndexManager_kokkos(const int NumDimensions, const int interpolationOrder, + const int MyRank, + const ArrayView LFineNodesPerDir, + const ArrayView CoarseRate); + + virtual ~IndexManager_kokkos() {} + + //! Common setup pattern used for all the different types of undelying mesh + void setupIM(const int NumDimensions, const int interpolationOrder, + const ArrayView coarseRate, + const ArrayView LFineNodesPerDir); + + //! Sets basic parameters used to compute indices on the mesh. + //! This method requires you to have set this->coarseRate. + void computeMeshParameters(); + + int getNumDimensions() const { return numDimensions; } + + int getInterpolationOrder() const { return interpolationOrder_; } + + LO getNumLocalFineNodes() const { return lNumFineNodes; } + + LO getNumCoarseNodes() const { return numCoarseNodes; } + + KOKKOS_INLINE_FUNCTION + intTupleView getCoarseningRates() const { return coarseRate; } + + KOKKOS_INLINE_FUNCTION + intTupleView getCoarseningEndRates() const { return endRate; } + + KOKKOS_INLINE_FUNCTION + LOTupleView getLocalFineNodesPerDir() const { return lFineNodesPerDir; } + + KOKKOS_INLINE_FUNCTION + LOTupleView getCoarseNodesPerDir() const { return coarseNodesPerDir; } + + Array getCoarseNodesPerDirArray() const; + + KOKKOS_INLINE_FUNCTION + void getFineLID2FineTuple(const LO myLID, LO (&tuple)[3]) const { + LO tmp; + tuple[2] = myLID / (lFineNodesPerDir(1) * lFineNodesPerDir(0)); + tmp = myLID % (lFineNodesPerDir(1) * lFineNodesPerDir(0)); + tuple[1] = tmp / lFineNodesPerDir(0); + tuple[0] = tmp % lFineNodesPerDir(0); + } // getFineNodeLocalTuple + + KOKKOS_INLINE_FUNCTION + void getFineTuple2FineLID(const LO tuple[3], LO &myLID) const { + myLID = + tuple[2] * lNumFineNodes10 + tuple[1] * lFineNodesPerDir[0] + tuple[0]; + } // getFineNodeLID + + KOKKOS_INLINE_FUNCTION + void getCoarseLID2CoarseTuple(const LO myLID, LO (&tuple)[3]) const { + LO tmp; + tuple[2] = myLID / numCoarseNodes10; + tmp = myLID % numCoarseNodes10; + tuple[1] = tmp / coarseNodesPerDir[0]; + tuple[0] = tmp % coarseNodesPerDir[0]; + } // getCoarseNodeLocalTuple + + KOKKOS_INLINE_FUNCTION + void getCoarseTuple2CoarseLID(const LO i, const LO j, const LO k, + LO &myLID) const { + myLID = k * numCoarseNodes10 + j * coarseNodesPerDir[0] + i; + } // getCoarseNodeLID +}; + +} // namespace MueLu #define MUELU_INDEXMANAGER_KOKKOS_SHORT #endif // MUELU_INDEXMANAGER_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp index f31f94421d86..f22e41fc7b02 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_IndexManager_kokkos_def.hpp @@ -53,8 +53,8 @@ #include #include "MueLu_ConfigDefs.hpp" -#include "MueLu_Types.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_Types.hpp" #include /***************************************************************************** @@ -63,171 +63,189 @@ namespace MueLu { - template - IndexManager_kokkos:: - IndexManager_kokkos(const int NumDimensions, - const int interpolationOrder, - const int MyRank, - const ArrayView LFineNodesPerDir, - const ArrayView CoarseRate) : - myRank(MyRank), coarseRate("coarsening rate"), endRate("endRate"), - lFineNodesPerDir("lFineNodesPerDir"), coarseNodesPerDir("lFineNodesPerDir") { - - RCP out; - if(const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); - } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); - } +template +IndexManager_kokkos::IndexManager_kokkos( + const int NumDimensions, const int interpolationOrder, const int MyRank, + const ArrayView LFineNodesPerDir, + const ArrayView CoarseRate) + : myRank(MyRank), coarseRate("coarsening rate"), endRate("endRate"), + lFineNodesPerDir("lFineNodesPerDir"), + coarseNodesPerDir("lFineNodesPerDir") { + + RCP out; + if (const char *dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - setupIM(NumDimensions, interpolationOrder, CoarseRate, LFineNodesPerDir); + setupIM(NumDimensions, interpolationOrder, CoarseRate, LFineNodesPerDir); - *out << "Done setting up the IndexManager" << std::endl; + *out << "Done setting up the IndexManager" << std::endl; - computeMeshParameters(); + computeMeshParameters(); - *out << "Computed Mesh Parameters" << std::endl; + *out << "Computed Mesh Parameters" << std::endl; - } // IndexManager_kokkos Constructor +} // IndexManager_kokkos Constructor - template - void IndexManager_kokkos:: - setupIM(const int NumDimensions, const int interpolationOrder, - const ArrayView CoarseRate, const ArrayView LFineNodesPerDir) { +template +void IndexManager_kokkos::setupIM( + const int NumDimensions, const int interpolationOrder, + const ArrayView CoarseRate, + const ArrayView LFineNodesPerDir) { - numDimensions = NumDimensions; - interpolationOrder_ = interpolationOrder; + numDimensions = NumDimensions; + interpolationOrder_ = interpolationOrder; - TEUCHOS_TEST_FOR_EXCEPTION((LFineNodesPerDir.size() != 3) - && (LFineNodesPerDir.size() != numDimensions), - Exceptions::RuntimeError, - "LFineNodesPerDir has to be of size 3 or of size numDimensions!"); + TEUCHOS_TEST_FOR_EXCEPTION( + (LFineNodesPerDir.size() != 3) && + (LFineNodesPerDir.size() != numDimensions), + Exceptions::RuntimeError, + "LFineNodesPerDir has to be of size 3 or of size numDimensions!"); - typename Kokkos::View::HostMirror lFineNodesPerDir_h = Kokkos::create_mirror_view(lFineNodesPerDir); - Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); - typename Kokkos::View::HostMirror coarseRate_h = Kokkos::create_mirror_view(coarseRate); - Kokkos::deep_copy(coarseRate_h, coarseRate); + typename Kokkos::View::HostMirror lFineNodesPerDir_h = + Kokkos::create_mirror_view(lFineNodesPerDir); + Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); + typename Kokkos::View::HostMirror coarseRate_h = + Kokkos::create_mirror_view(coarseRate); + Kokkos::deep_copy(coarseRate_h, coarseRate); - // Load coarse rate, being careful about formating - // Also load lFineNodesPerDir - for(int dim = 0; dim < 3; ++dim) { - if(dim < getNumDimensions()) { - lFineNodesPerDir_h(dim) = LFineNodesPerDir[dim]; - if(CoarseRate.size() == 1) { - coarseRate_h(dim) = CoarseRate[0]; - } else if(CoarseRate.size() == getNumDimensions()) { - coarseRate_h(dim) = CoarseRate[dim]; - } - } else { - lFineNodesPerDir_h(dim) = 1; - coarseRate_h(dim) = 1; + // Load coarse rate, being careful about formating + // Also load lFineNodesPerDir + for (int dim = 0; dim < 3; ++dim) { + if (dim < getNumDimensions()) { + lFineNodesPerDir_h(dim) = LFineNodesPerDir[dim]; + if (CoarseRate.size() == 1) { + coarseRate_h(dim) = CoarseRate[0]; + } else if (CoarseRate.size() == getNumDimensions()) { + coarseRate_h(dim) = CoarseRate[dim]; } - } - - Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); - Kokkos::deep_copy(coarseRate, coarseRate_h); - - } // setupIM - - template - void IndexManager_kokkos::computeMeshParameters() { - - RCP out; - if(const char* dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + lFineNodesPerDir_h(dim) = 1; + coarseRate_h(dim) = 1; } + } - typename Kokkos::View::HostMirror coarseRate_h = Kokkos::create_mirror_view(coarseRate); - typename Kokkos::View::HostMirror endRate_h = Kokkos::create_mirror_view(endRate); + Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); + Kokkos::deep_copy(coarseRate, coarseRate_h); +} // setupIM - typename Kokkos::View::HostMirror lFineNodesPerDir_h = Kokkos::create_mirror_view(lFineNodesPerDir); - typename Kokkos::View::HostMirror coarseNodesPerDir_h = Kokkos::create_mirror_view(coarseNodesPerDir); - Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); - Kokkos::deep_copy(coarseRate_h, coarseRate); +template +void IndexManager_kokkos::computeMeshParameters() { - lNumFineNodes10 = lFineNodesPerDir_h(1)*lFineNodesPerDir_h(0); - lNumFineNodes = lFineNodesPerDir_h(2)*lNumFineNodes10; - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - endRate_h(dim) = (lFineNodesPerDir_h(dim) - 1) % coarseRate_h(dim); - if(endRate_h(dim) == 0) {endRate_h(dim) = coarseRate_h(dim);} + RCP out; + if (const char *dbg = std::getenv("MUELU_INDEXMANAGER_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } - } else { // Default value for dim >= numDimensions - endRate_h(dim) = 1; + typename Kokkos::View::HostMirror coarseRate_h = + Kokkos::create_mirror_view(coarseRate); + typename Kokkos::View::HostMirror endRate_h = + Kokkos::create_mirror_view(endRate); + + typename Kokkos::View::HostMirror lFineNodesPerDir_h = + Kokkos::create_mirror_view(lFineNodesPerDir); + typename Kokkos::View::HostMirror coarseNodesPerDir_h = + Kokkos::create_mirror_view(coarseNodesPerDir); + Kokkos::deep_copy(lFineNodesPerDir_h, lFineNodesPerDir); + Kokkos::deep_copy(coarseRate_h, coarseRate); + + lNumFineNodes10 = lFineNodesPerDir_h(1) * lFineNodesPerDir_h(0); + lNumFineNodes = lFineNodesPerDir_h(2) * lNumFineNodes10; + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + endRate_h(dim) = (lFineNodesPerDir_h(dim) - 1) % coarseRate_h(dim); + if (endRate_h(dim) == 0) { + endRate_h(dim) = coarseRate_h(dim); } - } - *out << "lFineNodesPerDir: {" << lFineNodesPerDir_h(0) << ", " << lFineNodesPerDir_h(1) << ", " - << lFineNodesPerDir_h(2) << "}" << std::endl; - *out << "endRate: {" << endRate_h(0) << ", " << endRate_h(1) << ", " - << endRate_h(2) << "}" << std::endl; - - // Here one element can represent either the degenerate case of one node or the more general - // case of two nodes, i.e. x---x is a 1D element with two nodes and x is a 1D element with - // one node. This helps generating a 3D space from tensorial products... - // A good way to handle this would be to generalize the algorithm to take into account the - // discretization order used in each direction, at least in the FEM sense, since a 0 degree - // discretization will have a unique node per element. This way 1D discretization can be - // viewed as a 3D problem with one 0 degree element in the y direction and one 0 degre - // element in the z direction. - // !!! Operations below are aftecting both local and global values that have two !!! - // different orientations. Orientations can be interchanged using mapDirG2L and mapDirL2G. - // coarseRate, endRate and offsets are in the global basis, as well as all the variables - // starting with a g. - // !!! while the variables starting with an l are in the local basis. !!! - for(int dim = 0; dim < 3; ++dim) { - if(dim < numDimensions) { - // Check whether the partition includes the "end" of the mesh which means that endRate - // will apply. Also make sure that endRate is not 0 which means that the mesh does not - // require a particular treatment at the boundaries. - coarseNodesPerDir_h(dim) = (lFineNodesPerDir_h(dim) - endRate_h(dim) - 1) - / coarseRate_h(dim) + 2; - - } else { // Default value for dim >= numDimensions - // endRate[dim] = 1; - coarseNodesPerDir_h(dim) = 1; - } // if (dim < numDimensions) - - // This would happen if the rank does not own any nodes but in that case a subcommunicator - // should be used so this should really not be a concern. - if(lFineNodesPerDir_h(dim) < 1) {coarseNodesPerDir_h(dim) = 0;} - } // Loop for dim=0:3 - - // Compute cummulative values - numCoarseNodes10 = coarseNodesPerDir_h(0)*coarseNodesPerDir_h(1); - numCoarseNodes = numCoarseNodes10*coarseNodesPerDir_h(2); - - *out << "coarseNodesPerDir: {" << coarseNodesPerDir_h(0) << ", " - << coarseNodesPerDir_h(1) << ", " << coarseNodesPerDir_h(2) << "}" << std::endl; - *out << "numCoarseNodes=" << numCoarseNodes << std::endl; - - // Copy Host data to Device. - Kokkos::deep_copy(coarseRate, coarseRate_h); - Kokkos::deep_copy(endRate, endRate_h); - Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); - Kokkos::deep_copy(coarseNodesPerDir, coarseNodesPerDir_h); + } else { // Default value for dim >= numDimensions + endRate_h(dim) = 1; + } } - template - Array IndexManager_kokkos:: - getCoarseNodesPerDirArray() const { - typename LOTupleView::HostMirror coarseNodesPerDir_h = Kokkos::create_mirror_view(coarseNodesPerDir); - Kokkos::deep_copy(coarseNodesPerDir_h, coarseNodesPerDir); - Array coarseNodesPerDirArray(3); - - for(int dim = 0; dim < 3; ++dim) { - coarseNodesPerDirArray[dim] = coarseNodesPerDir_h(dim); + *out << "lFineNodesPerDir: {" << lFineNodesPerDir_h(0) << ", " + << lFineNodesPerDir_h(1) << ", " << lFineNodesPerDir_h(2) << "}" + << std::endl; + *out << "endRate: {" << endRate_h(0) << ", " << endRate_h(1) << ", " + << endRate_h(2) << "}" << std::endl; + + // Here one element can represent either the degenerate case of one node or + // the more general case of two nodes, i.e. x---x is a 1D element with two + // nodes and x is a 1D element with one node. This helps generating a 3D space + // from tensorial products... A good way to handle this would be to generalize + // the algorithm to take into account the discretization order used in each + // direction, at least in the FEM sense, since a 0 degree discretization will + // have a unique node per element. This way 1D discretization can be viewed as + // a 3D problem with one 0 degree element in the y direction and one 0 degre + // element in the z direction. + // !!! Operations below are aftecting both local and global values that have + // two !!! different orientations. Orientations can be interchanged + // using mapDirG2L and mapDirL2G. coarseRate, endRate and offsets are in the + // global basis, as well as all the variables starting with a g. + // !!! while the variables starting with an l are in the local basis. !!! + for (int dim = 0; dim < 3; ++dim) { + if (dim < numDimensions) { + // Check whether the partition includes the "end" of the mesh which means + // that endRate will apply. Also make sure that endRate is not 0 which + // means that the mesh does not require a particular treatment at the + // boundaries. + coarseNodesPerDir_h(dim) = + (lFineNodesPerDir_h(dim) - endRate_h(dim) - 1) / coarseRate_h(dim) + + 2; + + } else { // Default value for dim >= numDimensions + // endRate[dim] = 1; + coarseNodesPerDir_h(dim) = 1; + } // if (dim < numDimensions) + + // This would happen if the rank does not own any nodes but in that case a + // subcommunicator should be used so this should really not be a concern. + if (lFineNodesPerDir_h(dim) < 1) { + coarseNodesPerDir_h(dim) = 0; } + } // Loop for dim=0:3 + + // Compute cummulative values + numCoarseNodes10 = coarseNodesPerDir_h(0) * coarseNodesPerDir_h(1); + numCoarseNodes = numCoarseNodes10 * coarseNodesPerDir_h(2); + + *out << "coarseNodesPerDir: {" << coarseNodesPerDir_h(0) << ", " + << coarseNodesPerDir_h(1) << ", " << coarseNodesPerDir_h(2) << "}" + << std::endl; + *out << "numCoarseNodes=" << numCoarseNodes << std::endl; + + // Copy Host data to Device. + Kokkos::deep_copy(coarseRate, coarseRate_h); + Kokkos::deep_copy(endRate, endRate_h); + Kokkos::deep_copy(lFineNodesPerDir, lFineNodesPerDir_h); + Kokkos::deep_copy(coarseNodesPerDir, coarseNodesPerDir_h); +} + +template +Array +IndexManager_kokkos::getCoarseNodesPerDirArray() const { + typename LOTupleView::HostMirror coarseNodesPerDir_h = + Kokkos::create_mirror_view(coarseNodesPerDir); + Kokkos::deep_copy(coarseNodesPerDir_h, coarseNodesPerDir); + Array coarseNodesPerDirArray(3); + + for (int dim = 0; dim < 3; ++dim) { + coarseNodesPerDirArray[dim] = coarseNodesPerDir_h(dim); + } - return coarseNodesPerDirArray; - } // getCoarseNodesData + return coarseNodesPerDirArray; +} // getCoarseNodesData -} //namespace MueLu +} // namespace MueLu #define MUELU_INDEXMANAGER_KOKKOS_SHORT #endif // MUELU_INDEXMANAGER_DEF_KOKKOS_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp index c43758ee8352..c2f46fedc6be 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_decl.hpp @@ -46,17 +46,16 @@ #ifndef MUELU_STRUCTUREDAGGREGATIONFACTORY_DECL_HPP #define MUELU_STRUCTUREDAGGREGATIONFACTORY_DECL_HPP - // #include // #include // #include +#include "MueLu_AggregationStructuredAlgorithm_fwd.hpp" #include "MueLu_ConfigDefs.hpp" +#include "MueLu_Exceptions.hpp" +#include "MueLu_Level_fwd.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_StructuredAggregationFactory_fwd.hpp" -#include "MueLu_Level_fwd.hpp" -#include "MueLu_Exceptions.hpp" -#include "MueLu_AggregationStructuredAlgorithm_fwd.hpp" namespace MueLu { @@ -64,98 +63,115 @@ namespace MueLu { @class StructuredAggregationFactory class. @brief Factory for building aggregates on structured grids. - Factory for creating aggregates from grid structure of the problem. The structured aggregation - method can return an aggregate structure or a geometric structure used by prolongator factories. + Factory for creating aggregates from grid structure of the problem. The + structured aggregation method can return an aggregate structure or a + geometric structure used by prolongator factories. Internally, each node has a status which can be one of the following: Node status | Meaning ------------|--------- - READY | Node is not aggregated and can be used for building a new aggregate or can be added to an existing aggregate. - AGGREGATED | Node is aggregated. - IGNORED | Node is not considered for aggregation (it may have been dropped or put into a singleton aggregate) - BOUNDARY | Node is a Dirichlet boundary node (with one or more Dirichlet boundary conditions). - ONEPT | The user forces the aggregation algorithm to treat the node as a singleton. Important: Do not forget to set aggregation: allow user-specified singletons to true! Otherwise Phase3 will just handle the ONEPT nodes and probably not build singletons + READY | Node is not aggregated and can be used for building a new + aggregate or can be added to an existing aggregate. AGGREGATED | Node is + aggregated. IGNORED | Node is not considered for aggregation (it may have + been dropped or put into a singleton aggregate) BOUNDARY | Node is a + Dirichlet boundary node (with one or more Dirichlet boundary conditions). + ONEPT | The user forces the aggregation algorithm to treat the node as + a singleton. Important: Do not forget to set aggregation: allow + user-specified singletons to true! Otherwise Phase3 will just handle the + ONEPT nodes and probably not build singletons @ingroup Aggregation ## Input/output of StructuredAggregationFactory ## ### User parameters of StructuredAggregationFactory ### - Parameter | type | default | master.xml | validated | requested | description + Parameter | type | default | master.xml | validated | requested | + description ----------|------|---------|:----------:|:---------:|:---------:|------------ - DofsPerNode | Factory | null | | * | * | Generating factory for variable 'DofsPerNode', usually the same as for 'Graph' - OnePt aggregate map name | string | | | * | * | Name of input map for single node aggregates (default=''). Makes only sense if the parameter 'aggregation: allow user-specified singletons' is set to true. - OnePt aggregate map factory | Factory | null | | * | * | Generating factory of (DOF) map for single node aggregates. Makes only sense if the parameter 'aggregation: allow user-specified singletons' is set to true. - - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see StructuredAggregationFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see StructuredAggregationFactory::DeclareInput). + DofsPerNode | Factory | null | | * | * | Generating factory for + variable 'DofsPerNode', usually the same as for 'Graph' OnePt aggregate map + name | string | | | * | * | Name of input map for single node aggregates + (default=''). Makes only sense if the parameter 'aggregation: allow + user-specified singletons' is set to true. OnePt aggregate map factory | + Factory | null | | * | * | Generating factory of (DOF) map for single node + aggregates. Makes only sense if the parameter 'aggregation: allow + user-specified singletons' is set to true. + + + The * in the @c master.xml column denotes that the parameter is defined in + the @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + StructuredAggregationFactory::GetValidParameters).
The * in the @c + requested column states that the data is requested as input with all + dependencies (see StructuredAggregationFactory::DeclareInput). ### Variables provided by StructuredAggregationFactory ### - After StructuredAggregationFactory::Build the following data is available (if requested) + After StructuredAggregationFactory::Build the following data is available + (if requested) Parameter | generated by | description ----------|--------------|------------ - | Aggregates | StructuredAggregationFactory | Container class with aggregation information. See also Aggregates. + | Aggregates | StructuredAggregationFactory | Container class with + aggregation information. See also Aggregates. */ - template - class StructuredAggregationFactory : public SingleLevelFactoryBase { +template +class StructuredAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_STRUCTUREDAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - StructuredAggregationFactory(); +public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~StructuredAggregationFactory() { } + //! Constructor. + StructuredAggregationFactory(); - RCP GetValidParameterList() const; + //! Destructor. + virtual ~StructuredAggregationFactory() {} - //@} + RCP GetValidParameterList() const; - //! @name Set/get methods. - //@{ - // set information about 1-node aggregates (map name and generating factory) - void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); - } + //@} - //@} + //! @name Set/get methods. + //@{ + // set information about 1-node aggregates (map name and generating factory) + void SetOnePtMapName(const std::string name, + Teuchos::RCP mapFact) { + SetParameter("OnePt aggregate map name", + ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); + } - //! Input - //@{ + //@} - void DeclareInput(Level& currentLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level ¤tLevel) const; - //! @name Build methods. - //@{ + //@} - /*! @brief Build aggregates. */ - void Build(Level& currentLevel) const; + //! @name Build methods. + //@{ - //@} + /*! @brief Build aggregates. */ + void Build(Level ¤tLevel) const; - private: + //@} - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; +private: + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - }; // class StructuredAggregationFactory +}; // class StructuredAggregationFactory -} +} // namespace MueLu #define MUELU_STRUCTUREDAGGREGATIONFACTORY_SHORT #endif /* MUELU_STRUCTUREDAGGREGATIONFACTORY_DECL_HPP */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp index 94685aefcef6..7044cc6479dc 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_def.hpp @@ -46,307 +46,320 @@ #ifndef MUELU_STRUCTUREDAGGREGATIONFACTORY_DEF_HPP_ #define MUELU_STRUCTUREDAGGREGATIONFACTORY_DEF_HPP_ -#include #include +#include +#include "MueLu_Aggregates.hpp" #include "MueLu_AggregationStructuredAlgorithm.hpp" -#include "MueLu_Level.hpp" +#include "MueLu_GlobalLexicographicIndexManager.hpp" #include "MueLu_GraphBase.hpp" -#include "MueLu_Aggregates.hpp" +#include "MueLu_Level.hpp" +#include "MueLu_LocalLexicographicIndexManager.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" #include "MueLu_UncoupledIndexManager.hpp" -#include "MueLu_LocalLexicographicIndexManager.hpp" -#include "MueLu_GlobalLexicographicIndexManager.hpp" #include "MueLu_StructuredAggregationFactory_decl.hpp" namespace MueLu { - template - StructuredAggregationFactory:: - StructuredAggregationFactory() : bDefinitionPhase_(true) - { } - - template - RCP StructuredAggregationFactory:: - GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - - // general variables needed in StructuredAggregationFactory - SET_VALID_ENTRY("aggregation: mesh layout"); - SET_VALID_ENTRY("aggregation: mode"); - SET_VALID_ENTRY("aggregation: output type"); - SET_VALID_ENTRY("aggregation: coarsening rate"); - SET_VALID_ENTRY("aggregation: coarsening order"); -#undef SET_VALID_ENTRY - validParamList->set >("Graph", Teuchos::null, - "Graph of the matrix after amalgamation but without dropping."); - validParamList->set >("numDimensions", Teuchos::null, - "Number of spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("gNodesPerDim", Teuchos::null, - "Global number of nodes per spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("lNodesPerDim", Teuchos::null, - "Local number of nodes per spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("DofsPerNode", Teuchos::null, - "Generating factory for variable \'DofsPerNode\', usually the same as the \'Graph\' factory"); - validParamList->set("aggregation: single coarse point", false, - "Allows the aggreagtion process to reduce spacial dimensions to a single layer"); - - return validParamList; - } // GetValidParameterList() - - template - void StructuredAggregationFactory:: - DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - ParameterList pL = GetParameterList(); - std::string coupling = pL.get("aggregation: mode"); - const bool coupled = (coupling == "coupled" ? true : false); - if(coupled) { - // Request the global number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "gNodesPerDim was not provided by the user on level0!"); - } - } else { - Input(currentLevel, "gNodesPerDim"); - } - } - - // Request the local number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("numDimensions", NoFactory::get())) { - currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), - Exceptions::RuntimeError, - "numDimensions was not provided by the user on level0!"); - } - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); +template +StructuredAggregationFactory::StructuredAggregationFactory() + : bDefinitionPhase_(true) {} + +template +RCP +StructuredAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + + // general variables needed in StructuredAggregationFactory + SET_VALID_ENTRY("aggregation: mesh layout"); + SET_VALID_ENTRY("aggregation: mode"); + SET_VALID_ENTRY("aggregation: output type"); + SET_VALID_ENTRY("aggregation: coarsening rate"); + SET_VALID_ENTRY("aggregation: coarsening order"); +#undef SET_VALID_ENTRY + validParamList->set>( + "Graph", Teuchos::null, + "Graph of the matrix after amalgamation but without dropping."); + validParamList->set>( + "numDimensions", Teuchos::null, + "Number of spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set>( + "gNodesPerDim", Teuchos::null, + "Global number of nodes per spatial dimension provided by " + "CoordinatesTransferFactory."); + validParamList->set>( + "lNodesPerDim", Teuchos::null, + "Local number of nodes per spatial dimension provided by " + "CoordinatesTransferFactory."); + validParamList->set>( + "DofsPerNode", Teuchos::null, + "Generating factory for variable \'DofsPerNode\', usually the same as " + "the \'Graph\' factory"); + validParamList->set("aggregation: single coarse point", false, + "Allows the aggreagtion process to reduce " + "spacial dimensions to a single layer"); + + return validParamList; +} // GetValidParameterList() + +template +void StructuredAggregationFactory::DeclareInput(Level ¤tLevel) + const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + ParameterList pL = GetParameterList(); + std::string coupling = pL.get("aggregation: mode"); + const bool coupled = (coupling == "coupled" ? true : false); + if (coupled) { + // Request the global number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("gNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this); } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); + TEUCHOS_TEST_FOR_EXCEPTION( + currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "gNodesPerDim was not provided by the user on level0!"); } } else { - Input(currentLevel, "numDimensions"); - Input(currentLevel, "lNodesPerDim"); + Input(currentLevel, "gNodesPerDim"); } - } // DeclareInput() + } - template - void StructuredAggregationFactory:: - Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); + // Request the local number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("numDimensions", NoFactory::get())) { + currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + TEUCHOS_TEST_FOR_EXCEPTION( + currentLevel.IsAvailable("numDimensions", NoFactory::get()), + Exceptions::RuntimeError, + "numDimensions was not provided by the user on level0!"); } - - *out << "Entering structured aggregation" << std::endl; - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - // General problem informations are gathered from data stored in the problem matix. - RCP graph = Get< RCP >(currentLevel, "Graph"); - RCP fineMap = graph->GetDomainMap(); - const int myRank = fineMap->getComm()->getRank(); - const int numRanks = fineMap->getComm()->getSize(); - const GO minGlobalIndex = fineMap->getMinGlobalIndex(); - const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); - - // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to - // obtain a nodeMap. - const int interpolationOrder = pL.get("aggregation: coarsening order"); - std::string meshLayout = pL.get("aggregation: mesh layout"); - std::string coupling = pL.get("aggregation: mode"); - const bool coupled = (coupling == "coupled" ? true : false); - std::string outputType = pL.get("aggregation: output type"); - const bool outputAggregates = (outputType == "Aggregates" ? true : false); - const bool singleCoarsePoint = pL.get("aggregation: single coarse point"); - int numDimensions; - Array gFineNodesPerDir(3); - Array lFineNodesPerDir(3); - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); - lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); - if(coupled) { - gFineNodesPerDir = currentLevel.Get >("gNodesPerDim", NoFactory::get()); - } + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); } else { - // On level > 0, data is provided directly by generating factories. - numDimensions = Get(currentLevel, "numDimensions"); - lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); - if(coupled) { - gFineNodesPerDir = Get >(currentLevel, "gNodesPerDim"); - } + TEUCHOS_TEST_FOR_EXCEPTION( + currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); } - - - // First make sure that input parameters are set logically based on dimension - for(int dim = 0; dim < 3; ++dim) { - if(dim >= numDimensions) { - gFineNodesPerDir[dim] = 1; - lFineNodesPerDir[dim] = 1; - } + } else { + Input(currentLevel, "numDimensions"); + Input(currentLevel, "lNodesPerDim"); + } +} // DeclareInput() + +template +void StructuredAggregationFactory::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP out; + if (const char *dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + *out << "Entering structured aggregation" << std::endl; + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all + // aggregation algorithm information is fixed + + // General problem informations are gathered from data stored in the problem + // matix. + RCP graph = Get>(currentLevel, "Graph"); + RCP fineMap = graph->GetDomainMap(); + const int myRank = fineMap->getComm()->getRank(); + const int numRanks = fineMap->getComm()->getSize(); + const GO minGlobalIndex = fineMap->getMinGlobalIndex(); + const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); + + // Since we want to operate on nodes and not dof, we need to modify the rowMap + // in order to obtain a nodeMap. + const int interpolationOrder = pL.get("aggregation: coarsening order"); + std::string meshLayout = pL.get("aggregation: mesh layout"); + std::string coupling = pL.get("aggregation: mode"); + const bool coupled = (coupling == "coupled" ? true : false); + std::string outputType = pL.get("aggregation: output type"); + const bool outputAggregates = (outputType == "Aggregates" ? true : false); + const bool singleCoarsePoint = + pL.get("aggregation: single coarse point"); + int numDimensions; + Array gFineNodesPerDir(3); + Array lFineNodesPerDir(3); + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated + // factory. + numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + lFineNodesPerDir = + currentLevel.Get>("lNodesPerDim", NoFactory::get()); + if (coupled) { + gFineNodesPerDir = + currentLevel.Get>("gNodesPerDim", NoFactory::get()); } - - // Get the coarsening rate - std::string coarseningRate = pL.get("aggregation: coarsening rate"); - Teuchos::Array coarseRate; - try { - coarseRate = Teuchos::fromStringToArray(coarseningRate); - } catch(const Teuchos::InvalidArrayStringRepresentation& e) { - GetOStream(Errors,-1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " - << std::endl; - throw e; + } else { + // On level > 0, data is provided directly by generating factories. + numDimensions = Get(currentLevel, "numDimensions"); + lFineNodesPerDir = Get>(currentLevel, "lNodesPerDim"); + if (coupled) { + gFineNodesPerDir = Get>(currentLevel, "gNodesPerDim"); } - TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), - Exceptions::RuntimeError, - "\"aggregation: coarsening rate\" must have at least as many" - " components as the number of spatial dimensions in the problem."); + } - // Now that we have extracted info from the level, create the IndexManager - RCP geoData; - if(!coupled) { - geoData = rcp(new MueLu::UncoupledIndexManager(fineMap->getComm(), - coupled, - numDimensions, - interpolationOrder, - myRank, - numRanks, - gFineNodesPerDir, - lFineNodesPerDir, - coarseRate, - singleCoarsePoint)); - } else if(meshLayout == "Local Lexicographic") { - Array meshData; - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - meshData = currentLevel.Get >("aggregation: mesh data", NoFactory::get()); - TEUCHOS_TEST_FOR_EXCEPTION(meshData.empty() == true, Exceptions::RuntimeError, - "The meshData array is empty, somehow the input for structured" - " aggregation are not captured correctly."); - } else { - // On level > 0, data is provided directly by generating factories. - meshData = Get >(currentLevel, "aggregation: mesh data"); - } - // Note, LBV Feb 5th 2018: - // I think that it might make sense to pass ghostInterface rather than interpolationOrder. - // For that I need to make sure that ghostInterface can be computed with minimal mesh - // knowledge outside of the IndexManager... - geoData = rcp(new MueLu::LocalLexicographicIndexManager(fineMap->getComm(), - coupled, - numDimensions, - interpolationOrder, - myRank, - numRanks, - gFineNodesPerDir, - lFineNodesPerDir, - coarseRate, - meshData)); - } else if(meshLayout == "Global Lexicographic") { - // Note, LBV Feb 5th 2018: - // I think that it might make sense to pass ghostInterface rather than interpolationOrder. - // For that I need to make sure that ghostInterface can be computed with minimal mesh - // knowledge outside of the IndexManager... - geoData = rcp(new MueLu::GlobalLexicographicIndexManager(fineMap->getComm(), - coupled, - numDimensions, - interpolationOrder, - gFineNodesPerDir, - lFineNodesPerDir, - coarseRate, - minGlobalIndex)); + // First make sure that input parameters are set logically based on dimension + for (int dim = 0; dim < 3; ++dim) { + if (dim >= numDimensions) { + gFineNodesPerDir[dim] = 1; + lFineNodesPerDir[dim] = 1; } - - - *out << "The index manager has now been built" << std::endl; - *out << "graph num nodes: " << fineMap->getLocalNumElements() - << ", structured aggregation num nodes: " << geoData->getNumLocalFineNodes() << std::endl; - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() - != static_cast(geoData->getNumLocalFineNodes()), - Exceptions::RuntimeError, - "The local number of elements in the graph's map is not equal to " - "the number of nodes given by: lNodesPerDim!"); - if(coupled) { - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getGlobalNumElements() - != static_cast(geoData->getNumGlobalFineNodes()), - Exceptions::RuntimeError, - "The global number of elements in the graph's map is not equal to " - "the number of nodes given by: gNodesPerDim!"); - } - - *out << "Compute coarse mesh data" << std::endl; - std::vector > coarseMeshData = geoData->getCoarseMeshData(); - - // Now we are ready for the big loop over the fine node that will assign each - // node on the fine grid to an aggregate and a processor. - RCP graphFact = GetFactory("Graph"); - RCP coarseCoordinatesFineMap, coarseCoordinatesMap; - RCP > - myStructuredAlgorithm = rcp(new AggregationStructuredAlgorithm(graphFact)); - - if(interpolationOrder == 0 && outputAggregates){ - // Create aggregates for prolongation - *out << "Compute Aggregates" << std::endl; - RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); - aggregates->setObjectLabel("ST"); - aggregates->SetIndexManager(geoData); - aggregates->AggregatesCrossProcessors(coupled); - aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); - std::vector aggStat(geoData->getNumLocalFineNodes(), READY); - LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); - - myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, - numNonAggregatedNodes); - - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, - "MueLu::StructuredAggregationFactory::Build: Leftover nodes found! Error!"); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); - GetOStream(Statistics1) << aggregates->description() << std::endl; - Set(currentLevel, "Aggregates", aggregates); - + } + + // Get the coarsening rate + std::string coarseningRate = + pL.get("aggregation: coarsening rate"); + Teuchos::Array coarseRate; + try { + coarseRate = Teuchos::fromStringToArray(coarseningRate); + } catch (const Teuchos::InvalidArrayStringRepresentation &e) { + GetOStream(Errors, -1) << " *** \"aggregation: coarsening rate\" must be a " + "string convertible into an array! *** " + << std::endl; + throw e; + } + TEUCHOS_TEST_FOR_EXCEPTION( + (coarseRate.size() > 1) && (coarseRate.size() < numDimensions), + Exceptions::RuntimeError, + "\"aggregation: coarsening rate\" must have at least as many" + " components as the number of spatial dimensions in the problem."); + + // Now that we have extracted info from the level, create the IndexManager + RCP geoData; + if (!coupled) { + geoData = rcp(new MueLu::UncoupledIndexManager( + fineMap->getComm(), coupled, numDimensions, interpolationOrder, myRank, + numRanks, gFineNodesPerDir, lFineNodesPerDir, coarseRate, + singleCoarsePoint)); + } else if (meshLayout == "Local Lexicographic") { + Array meshData; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated + // factory. + meshData = currentLevel.Get>("aggregation: mesh data", + NoFactory::get()); + TEUCHOS_TEST_FOR_EXCEPTION( + meshData.empty() == true, Exceptions::RuntimeError, + "The meshData array is empty, somehow the input for structured" + " aggregation are not captured correctly."); } else { - // Create the graph of the prolongator - *out << "Compute CrsGraph" << std::endl; - RCP myGraph; - myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph, - coarseCoordinatesFineMap, coarseCoordinatesMap); - Set(currentLevel, "prolongatorGraph", myGraph); - } - - if(coupled) { - Set(currentLevel, "gCoarseNodesPerDim", geoData->getGlobalCoarseNodesPerDir()); + // On level > 0, data is provided directly by generating factories. + meshData = Get>(currentLevel, "aggregation: mesh data"); } - Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); - Set(currentLevel, "coarseCoordinatesFineMap", coarseCoordinatesFineMap); - Set(currentLevel, "coarseCoordinatesMap", coarseCoordinatesMap); - Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); - Set(currentLevel, "numDimensions", numDimensions); - - } // Build() -} //namespace MueLu - + // Note, LBV Feb 5th 2018: + // I think that it might make sense to pass ghostInterface rather than + // interpolationOrder. For that I need to make sure that ghostInterface can + // be computed with minimal mesh knowledge outside of the IndexManager... + geoData = rcp(new MueLu::LocalLexicographicIndexManager( + fineMap->getComm(), coupled, numDimensions, interpolationOrder, myRank, + numRanks, gFineNodesPerDir, lFineNodesPerDir, coarseRate, meshData)); + } else if (meshLayout == "Global Lexicographic") { + // Note, LBV Feb 5th 2018: + // I think that it might make sense to pass ghostInterface rather than + // interpolationOrder. For that I need to make sure that ghostInterface can + // be computed with minimal mesh knowledge outside of the IndexManager... + geoData = rcp(new MueLu::GlobalLexicographicIndexManager( + fineMap->getComm(), coupled, numDimensions, interpolationOrder, + gFineNodesPerDir, lFineNodesPerDir, coarseRate, minGlobalIndex)); + } + + *out << "The index manager has now been built" << std::endl; + *out << "graph num nodes: " << fineMap->getLocalNumElements() + << ", structured aggregation num nodes: " + << geoData->getNumLocalFineNodes() << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION( + fineMap->getLocalNumElements() != + static_cast(geoData->getNumLocalFineNodes()), + Exceptions::RuntimeError, + "The local number of elements in the graph's map is not equal to " + "the number of nodes given by: lNodesPerDim!"); + if (coupled) { + TEUCHOS_TEST_FOR_EXCEPTION( + fineMap->getGlobalNumElements() != + static_cast(geoData->getNumGlobalFineNodes()), + Exceptions::RuntimeError, + "The global number of elements in the graph's map is not equal to " + "the number of nodes given by: gNodesPerDim!"); + } + + *out << "Compute coarse mesh data" << std::endl; + std::vector> coarseMeshData = geoData->getCoarseMeshData(); + + // Now we are ready for the big loop over the fine node that will assign each + // node on the fine grid to an aggregate and a processor. + RCP graphFact = GetFactory("Graph"); + RCP coarseCoordinatesFineMap, coarseCoordinatesMap; + RCP> + myStructuredAlgorithm = + rcp(new AggregationStructuredAlgorithm(graphFact)); + + if (interpolationOrder == 0 && outputAggregates) { + // Create aggregates for prolongation + *out << "Compute Aggregates" << std::endl; + RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); + aggregates->setObjectLabel("ST"); + aggregates->SetIndexManager(geoData); + aggregates->AggregatesCrossProcessors(coupled); + aggregates->SetNumAggregates(geoData->getNumLocalCoarseNodes()); + std::vector aggStat(geoData->getNumLocalFineNodes(), READY); + LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); + + myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, + numNonAggregatedNodes); + + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, + "MueLu::StructuredAggregationFactory::Build: " + "Leftover nodes found! Error!"); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + GetOStream(Statistics1) << aggregates->description() << std::endl; + Set(currentLevel, "Aggregates", aggregates); + + } else { + // Create the graph of the prolongator + *out << "Compute CrsGraph" << std::endl; + RCP myGraph; + myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph, + coarseCoordinatesFineMap, + coarseCoordinatesMap); + Set(currentLevel, "prolongatorGraph", myGraph); + } + + if (coupled) { + Set(currentLevel, "gCoarseNodesPerDim", + geoData->getGlobalCoarseNodesPerDir()); + } + Set(currentLevel, "lCoarseNodesPerDim", geoData->getLocalCoarseNodesPerDir()); + Set(currentLevel, "coarseCoordinatesFineMap", coarseCoordinatesFineMap); + Set(currentLevel, "coarseCoordinatesMap", coarseCoordinatesMap); + Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); + Set(currentLevel, "numDimensions", numDimensions); + +} // Build() +} // namespace MueLu #endif /* MUELU_STRUCTUREDAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp index 88724397a340..7a774d43b360 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_decl.hpp @@ -47,9 +47,9 @@ #define MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_DECL_HPP #include "MueLu_ConfigDefs.hpp" -#include "MueLu_SingleLevelFactoryBase.hpp" -#include "MueLu_Level_fwd.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_Level_fwd.hpp" +#include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_AggregationStructuredAlgorithm_kokkos_fwd.hpp" @@ -59,106 +59,125 @@ namespace MueLu { /*! @class StructuredAggregationFactory_kokkos class. - @brief Factory for building structured aggregates or CrsGraph for interpolation base prolongator. + @brief Factory for building structured aggregates or CrsGraph for + interpolation base prolongator. - Factory for creating structured aggregates or CrsGraph of the prolongator from the amalgamated graph of A. + Factory for creating structured aggregates or CrsGraph of the prolongator + from the amalgamated graph of A. - When Aggregates are requested, each node has a status which can be one of the following: + When Aggregates are requested, each node has a status which can be one of + the following: Node status | Meaning ------------|--------- - READY | Node is not aggregated and can be used for building a new aggregate or can be added to an existing aggregate. - AGGREGATED | Node is aggregated. - IGNORED | Node is not considered for aggregation (it may have been dropped or put into a singleton aggregate) - BOUNDARY | Node is a Dirichlet boundary node (with one or more Dirichlet boundary conditions). - ONEPT | The user forces the aggregation algorithm to treat the node as a singleton. Important: Do not forget to set aggregation: allow user-specified singletons to true! Otherwise Phase3 will just handle the ONEPT nodes and probably not build singletons + READY | Node is not aggregated and can be used for building a new + aggregate or can be added to an existing aggregate. AGGREGATED | Node is + aggregated. IGNORED | Node is not considered for aggregation (it may have + been dropped or put into a singleton aggregate) BOUNDARY | Node is a + Dirichlet boundary node (with one or more Dirichlet boundary conditions). + ONEPT | The user forces the aggregation algorithm to treat the node as + a singleton. Important: Do not forget to set aggregation: allow + user-specified singletons to true! Otherwise Phase3 will just handle the + ONEPT nodes and probably not build singletons @ingroup Aggregation ## Input/output of StructuredAggregationFactory_kokkos ## ### User parameters of StructuredAggregationFactory_kokkos ### - Parameter | type | default | master.xml | validated | requested | description + Parameter | type | default | master.xml | validated | requested | + description ----------|------|---------|:----------:|:---------:|:---------:|------------ - Graph | Factory | null | | * | * | Generating factory of the graph of A - DofsPerNode | Factory | null | | * | * | Generating factory for variable 'DofsPerNode', usually the same as for 'Graph' - lNodesPerDim | Factory | null | | * | * | Generating factory for variable 'lNodesPerDim', usually *this - aggregation: output type | std::string | see master.xml | * | * | | Type of output this factory will generate: Aggregates or CrsGraph - aggregation: coarsening rate | std::string | see master.xml | * | * | | A string interpretable as an array used to set the corasening rate in each spatial direction. - aggregation: number of spatial dimensions | int | see master.xml | * | * | | Number of spatial dimensions in the problem - aggregation: coarsening order | int | 0 | * | * | | The interpolation order used to construct grid transfer operators based off these aggregates. - - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see StructuredAggregationFactory_kokkos::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see StructuredAggregationFactory_kokkos::DeclareInput). + Graph | Factory | null | | * | * | Generating factory of the + graph of A DofsPerNode | Factory | null | | * | * | Generating + factory for variable 'DofsPerNode', usually the same as for 'Graph' + lNodesPerDim | Factory | null | | * | * | Generating factory for + variable 'lNodesPerDim', usually *this aggregation: output type | std::string + | see master.xml | * | * | | Type of output this factory will generate: + Aggregates or CrsGraph aggregation: coarsening rate | std::string | see + master.xml | * | * | | A string interpretable as an array used to set the + corasening rate in each spatial direction. aggregation: number of spatial + dimensions | int | see master.xml | * | * | | Number of spatial dimensions + in the problem aggregation: coarsening order | int | 0 | * | * | | The + interpolation order used to construct grid transfer operators based off these + aggregates. + + + The * in the @c master.xml column denotes that the parameter is defined in + the @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + StructuredAggregationFactory_kokkos::GetValidParameters).
The * in the @c + requested column states that the data is requested as input with all + dependencies (see StructuredAggregationFactory_kokkos::DeclareInput). ### Variables provided by StructuredAggregationFactory_kokkos ### - After StructuredAggregationFactory_kokkos::Build the following data is available (if requested) + After StructuredAggregationFactory_kokkos::Build the following data is + available (if requested) Parameter | generated by | description ----------|--------------|------------ - | Aggregates | StructuredAggregationFactory_kokkos | Container class with aggregation information. See also Aggregates. - | CrsGraph | StructuredAggregationFactory_kokkos | CrsGraph of the prolongator + | Aggregates | StructuredAggregationFactory_kokkos | Container class + with aggregation information. See also Aggregates. | CrsGraph | + StructuredAggregationFactory_kokkos | CrsGraph of the prolongator */ - template - class StructuredAggregationFactory_kokkos : public SingleLevelFactoryBase { +template +class StructuredAggregationFactory_kokkos : public SingleLevelFactoryBase { #undef MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - StructuredAggregationFactory_kokkos(); +public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~StructuredAggregationFactory_kokkos() { } + //! Constructor. + StructuredAggregationFactory_kokkos(); - RCP GetValidParameterList() const; + //! Destructor. + virtual ~StructuredAggregationFactory_kokkos() {} - //@} + RCP GetValidParameterList() const; - //! @name Set/get methods. - //@{ - // set information about 1-node aggregates (map name and generating factory) - void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); - } + //@} - //@} + //! @name Set/get methods. + //@{ + // set information about 1-node aggregates (map name and generating factory) + void SetOnePtMapName(const std::string name, + Teuchos::RCP mapFact) { + SetParameter("OnePt aggregate map name", + ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); + } - //! Input - //@{ + //@} - void DeclareInput(Level& currentLevel) const; + //! Input + //@{ - //@} + void DeclareInput(Level ¤tLevel) const; - //! @name Build methods. - //@{ + //@} - /*! @brief Build aggregates. */ - void Build(Level& currentLevel) const; + //! @name Build methods. + //@{ - //@} + /*! @brief Build aggregates. */ + void Build(Level ¤tLevel) const; - private: + //@} - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; +private: + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; - }; // class StructuredAggregationFactory +}; // class StructuredAggregationFactory -} +} // namespace MueLu #define MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_SHORT #endif // MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp index 1067efc3e08d..9da698423599 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/MueLu_StructuredAggregationFactory_kokkos_def.hpp @@ -47,8 +47,8 @@ #define MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_DEF_HPP // Xpetra includes -#include #include +#include // MueLu generic includes #include "MueLu_Level.hpp" @@ -56,202 +56,226 @@ #include "MueLu_Monitor.hpp" // MueLu specific includes (kokkos version) -#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Aggregates.hpp" -#include "MueLu_IndexManager_kokkos.hpp" #include "MueLu_AggregationStructuredAlgorithm_kokkos.hpp" +#include "MueLu_IndexManager_kokkos.hpp" +#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_StructuredAggregationFactory_kokkos_decl.hpp" namespace MueLu { - template - StructuredAggregationFactory_kokkos:: - StructuredAggregationFactory_kokkos() : bDefinitionPhase_(true) { } - - template - RCP StructuredAggregationFactory_kokkos:: - GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); -#undef SET_VALID_ENTRY - - // general variables needed in StructuredAggregationFactory - validParamList->set ("aggregation: output type", "Aggregates", - "Type of object holding the aggregation data: Aggregtes or CrsGraph"); - validParamList->set ("aggregation: coarsening rate", "{3}", - "Coarsening rate per spatial dimensions"); - validParamList->set ("aggregation: coarsening order", 0, - "The interpolation order used to construct grid transfer operators based off these aggregates."); - validParamList->set >("Graph", Teuchos::null, - "Graph of the matrix after amalgamation but without dropping."); - validParamList->set >("DofsPerNode", Teuchos::null, - "Number of degrees of freedom per mesh node, provided by the coalsce drop factory."); - validParamList->set >("numDimensions", Teuchos::null, - "Number of spatial dimension provided by CoordinatesTransferFactory."); - validParamList->set >("lNodesPerDim", Teuchos::null, - "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory."); - - return validParamList; - } // GetValidParameterList() - - template - void StructuredAggregationFactory_kokkos:: - DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - // Request the local number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("numDimensions", NoFactory::get())) { - currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("numDimensions", NoFactory::get()), - Exceptions::RuntimeError, - "numDimensions was not provided by the user on level0!"); - } - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); - } +template +StructuredAggregationFactory_kokkos::StructuredAggregationFactory_kokkos() + : bDefinitionPhase_(true) {} + +template +RCP +StructuredAggregationFactory_kokkos::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); +#undef SET_VALID_ENTRY + + // general variables needed in StructuredAggregationFactory + validParamList->set( + "aggregation: output type", "Aggregates", + "Type of object holding the aggregation data: Aggregtes or CrsGraph"); + validParamList->set("aggregation: coarsening rate", "{3}", + "Coarsening rate per spatial dimensions"); + validParamList->set("aggregation: coarsening order", 0, + "The interpolation order used to construct grid " + "transfer operators based off these aggregates."); + validParamList->set>( + "Graph", Teuchos::null, + "Graph of the matrix after amalgamation but without dropping."); + validParamList->set>( + "DofsPerNode", Teuchos::null, + "Number of degrees of freedom per mesh node, provided by the coalsce " + "drop factory."); + validParamList->set>( + "numDimensions", Teuchos::null, + "Number of spatial dimension provided by CoordinatesTransferFactory."); + validParamList->set>( + "lNodesPerDim", Teuchos::null, + "Number of nodes per spatial dimmension provided by " + "CoordinatesTransferFactory."); + + return validParamList; +} // GetValidParameterList() + +template +void StructuredAggregationFactory_kokkos:: + DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + // Request the local number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("numDimensions", NoFactory::get())) { + currentLevel.DeclareInput("numDimensions", NoFactory::get(), this); } else { - Input(currentLevel, "lNodesPerDim"); - Input(currentLevel, "numDimensions"); + TEUCHOS_TEST_FOR_EXCEPTION( + currentLevel.IsAvailable("numDimensions", NoFactory::get()), + Exceptions::RuntimeError, + "numDimensions was not provided by the user on level0!"); } - } // DeclareInput() - - template - void StructuredAggregationFactory_kokkos:: - Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - RCP out; - if(const char* dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { - out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - out->setShowAllFrontMatter(false).setShowProcRank(true); + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); } else { - out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + TEUCHOS_TEST_FOR_EXCEPTION( + currentLevel.IsAvailable("lNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); } - - using device_type = typename LWGraph_kokkos::local_graph_type::device_type; - using execution_space = typename LWGraph_kokkos::local_graph_type::device_type::execution_space; - using memory_space = typename LWGraph_kokkos::local_graph_type::device_type::memory_space; - - *out << "Entering structured aggregation" << std::endl; - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - // General problem informations are gathered from data stored in the problem matix. - RCP graph = Get >(currentLevel, "Graph"); - RCP fineMap = graph->GetDomainMap(); - const int myRank = fineMap->getComm()->getRank(); - const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); - - // Since we want to operate on nodes and not dof, we need to modify the rowMap in order to - // obtain a nodeMap. - const int interpolationOrder = pL.get("aggregation: coarsening order"); - std::string outputType = pL.get("aggregation: output type"); - const bool outputAggregates = (outputType == "Aggregates" ? true : false); - Array lFineNodesPerDir(3); - int numDimensions; - if(currentLevel.GetLevelID() == 0) { - // On level 0, data is provided by applications and has no associated factory. - lFineNodesPerDir = currentLevel.Get >("lNodesPerDim", NoFactory::get()); - numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); - } else { - // On level > 0, data is provided directly by generating factories. - lFineNodesPerDir = Get >(currentLevel, "lNodesPerDim"); - numDimensions = Get(currentLevel, "numDimensions"); + } else { + Input(currentLevel, "lNodesPerDim"); + Input(currentLevel, "numDimensions"); + } +} // DeclareInput() + +template +void StructuredAggregationFactory_kokkos< + LocalOrdinal, GlobalOrdinal, Node>::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP out; + if (const char *dbg = std::getenv("MUELU_STRUCTUREDAGGREGATION_DEBUG")) { + out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + out->setShowAllFrontMatter(false).setShowProcRank(true); + } else { + out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream())); + } + + using device_type = typename LWGraph_kokkos::local_graph_type::device_type; + using execution_space = + typename LWGraph_kokkos::local_graph_type::device_type::execution_space; + using memory_space = + typename LWGraph_kokkos::local_graph_type::device_type::memory_space; + + *out << "Entering structured aggregation" << std::endl; + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all + // aggregation algorithm information is fixed + + // General problem informations are gathered from data stored in the problem + // matix. + RCP graph = + Get>(currentLevel, "Graph"); + RCP fineMap = graph->GetDomainMap(); + const int myRank = fineMap->getComm()->getRank(); + const LO dofsPerNode = Get(currentLevel, "DofsPerNode"); + + // Since we want to operate on nodes and not dof, we need to modify the rowMap + // in order to obtain a nodeMap. + const int interpolationOrder = pL.get("aggregation: coarsening order"); + std::string outputType = pL.get("aggregation: output type"); + const bool outputAggregates = (outputType == "Aggregates" ? true : false); + Array lFineNodesPerDir(3); + int numDimensions; + if (currentLevel.GetLevelID() == 0) { + // On level 0, data is provided by applications and has no associated + // factory. + lFineNodesPerDir = + currentLevel.Get>("lNodesPerDim", NoFactory::get()); + numDimensions = currentLevel.Get("numDimensions", NoFactory::get()); + } else { + // On level > 0, data is provided directly by generating factories. + lFineNodesPerDir = Get>(currentLevel, "lNodesPerDim"); + numDimensions = Get(currentLevel, "numDimensions"); + } + + // First make sure that input parameters are set logically based on dimension + for (int dim = 0; dim < 3; ++dim) { + if (dim >= numDimensions) { + lFineNodesPerDir[dim] = 1; } - - - // First make sure that input parameters are set logically based on dimension - for(int dim = 0; dim < 3; ++dim) { - if(dim >= numDimensions) { - lFineNodesPerDir[dim] = 1; - } - } - - // Get the coarsening rate - std::string coarseningRate = pL.get("aggregation: coarsening rate"); - Teuchos::Array coarseRate; - try { - coarseRate = Teuchos::fromStringToArray(coarseningRate); - } catch(const Teuchos::InvalidArrayStringRepresentation& e) { - GetOStream(Errors,-1) << " *** \"aggregation: coarsening rate\" must be a string convertible into an array! *** " - << std::endl; - throw e; - } - TEUCHOS_TEST_FOR_EXCEPTION((coarseRate.size() > 1) && (coarseRate.size() < numDimensions), - Exceptions::RuntimeError, - "\"aggregation: coarsening rate\" must have at least as many" - " components as the number of spatial dimensions in the problem."); - - // Now that we have extracted info from the level, create the IndexManager - RCP geoData = rcp(new IndexManager_kokkos(numDimensions, - interpolationOrder, myRank, - lFineNodesPerDir, - coarseRate)); - - *out << "The index manager has now been built" << std::endl; - TEUCHOS_TEST_FOR_EXCEPTION(fineMap->getLocalNumElements() - != static_cast(geoData->getNumLocalFineNodes()), - Exceptions::RuntimeError, - "The local number of elements in the graph's map is not equal to " - "the number of nodes given by: lNodesPerDim!"); - - // Now we are ready for the big loop over the fine node that will assign each - // node on the fine grid to an aggregate and a processor. - RCP myStructuredAlgorithm - = rcp(new AggregationStructuredAlgorithm_kokkos()); - - if(interpolationOrder == 0 && outputAggregates){ - RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); - aggregates->setObjectLabel("ST"); - aggregates->SetIndexManagerKokkos(geoData); - aggregates->AggregatesCrossProcessors(false); - aggregates->SetNumAggregates(geoData->getNumCoarseNodes()); - - LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); - Kokkos::View aggStat("aggStat", numNonAggregatedNodes); - Kokkos::parallel_for("StructuredAggregation: initialize aggStat", - Kokkos::RangePolicy(0, numNonAggregatedNodes), - KOKKOS_LAMBDA(const LO nodeIdx) {aggStat(nodeIdx) = READY;}); - - myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, - numNonAggregatedNodes); - - *out << "numNonAggregatedNodes: " << numNonAggregatedNodes << std::endl; - - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, - "MueLu::StructuredAggregationFactory::Build: Leftover nodes found! Error!"); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); - GetOStream(Statistics1) << aggregates->description() << std::endl; - Set(currentLevel, "Aggregates", aggregates); - - } else { - // Create Coarse Data - RCP myGraph; - myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph); - Set(currentLevel, "prolongatorGraph", myGraph); - } - - Set(currentLevel, "lCoarseNodesPerDim", geoData->getCoarseNodesPerDirArray()); - Set(currentLevel, "indexManager", geoData); - Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); - Set(currentLevel, "numDimensions", numDimensions); - - } // Build() - -} //namespace MueLu + } + + // Get the coarsening rate + std::string coarseningRate = + pL.get("aggregation: coarsening rate"); + Teuchos::Array coarseRate; + try { + coarseRate = Teuchos::fromStringToArray(coarseningRate); + } catch (const Teuchos::InvalidArrayStringRepresentation &e) { + GetOStream(Errors, -1) << " *** \"aggregation: coarsening rate\" must be a " + "string convertible into an array! *** " + << std::endl; + throw e; + } + TEUCHOS_TEST_FOR_EXCEPTION( + (coarseRate.size() > 1) && (coarseRate.size() < numDimensions), + Exceptions::RuntimeError, + "\"aggregation: coarsening rate\" must have at least as many" + " components as the number of spatial dimensions in the problem."); + + // Now that we have extracted info from the level, create the IndexManager + RCP geoData = rcp(new IndexManager_kokkos( + numDimensions, interpolationOrder, myRank, lFineNodesPerDir, coarseRate)); + + *out << "The index manager has now been built" << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION( + fineMap->getLocalNumElements() != + static_cast(geoData->getNumLocalFineNodes()), + Exceptions::RuntimeError, + "The local number of elements in the graph's map is not equal to " + "the number of nodes given by: lNodesPerDim!"); + + // Now we are ready for the big loop over the fine node that will assign each + // node on the fine grid to an aggregate and a processor. + RCP myStructuredAlgorithm = + rcp(new AggregationStructuredAlgorithm_kokkos()); + + if (interpolationOrder == 0 && outputAggregates) { + RCP aggregates = rcp(new Aggregates(graph->GetDomainMap())); + aggregates->setObjectLabel("ST"); + aggregates->SetIndexManagerKokkos(geoData); + aggregates->AggregatesCrossProcessors(false); + aggregates->SetNumAggregates(geoData->getNumCoarseNodes()); + + LO numNonAggregatedNodes = geoData->getNumLocalFineNodes(); + Kokkos::View aggStat("aggStat", + numNonAggregatedNodes); + Kokkos::parallel_for( + "StructuredAggregation: initialize aggStat", + Kokkos::RangePolicy(0, numNonAggregatedNodes), + KOKKOS_LAMBDA(const LO nodeIdx) { aggStat(nodeIdx) = READY; }); + + myStructuredAlgorithm->BuildAggregates(pL, *graph, *aggregates, aggStat, + numNonAggregatedNodes); + + *out << "numNonAggregatedNodes: " << numNonAggregatedNodes << std::endl; + + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, + "MueLu::StructuredAggregationFactory::Build: " + "Leftover nodes found! Error!"); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); + GetOStream(Statistics1) << aggregates->description() << std::endl; + Set(currentLevel, "Aggregates", aggregates); + + } else { + // Create Coarse Data + RCP myGraph; + myStructuredAlgorithm->BuildGraph(*graph, geoData, dofsPerNode, myGraph); + Set(currentLevel, "prolongatorGraph", myGraph); + } + + Set(currentLevel, "lCoarseNodesPerDim", geoData->getCoarseNodesPerDirArray()); + Set(currentLevel, "indexManager", geoData); + Set(currentLevel, "structuredInterpolationOrder", interpolationOrder); + Set(currentLevel, "numDimensions", numDimensions); + +} // Build() + +} // namespace MueLu #endif /* MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_DEF_HPP */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp index 9488bfe3dd72..9c367561f50e 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_decl.hpp @@ -52,8 +52,8 @@ #include #include -#include #include +#include /***************************************************************************** @@ -77,69 +77,73 @@ namespace MueLu { correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class GlobalLexicographicIndexManager : public IndexManager { +template +class GlobalLexicographicIndexManager + : public IndexManager { #undef MUELU_GLOBALLEXICOGRAPHICINDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - - GlobalLexicographicIndexManager(); - - GlobalLexicographicIndexManager(const RCP< const Teuchos::Comm > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, const Array CoarseRate, - const GO MinGlobalIndex); +public: + GlobalLexicographicIndexManager(); - virtual ~GlobalLexicographicIndexManager() {} + GlobalLexicographicIndexManager(const RCP> comm, + const bool coupled, const int NumDimensions, + const int interpolationOrder, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, + const Array CoarseRate, + const GO MinGlobalIndex); - void computeGlobalCoarseParameters(); + virtual ~GlobalLexicographicIndexManager() {} - void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const; + void computeGlobalCoarseParameters(); - void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const; + void getGhostedNodesData(const RCP fineMap, + Array &ghostedNodeCoarseLIDs, + Array &ghostedNodeCoarsePIDs, + Array &ghostedNodeCoarseGIDs) const; - std::vector > getCoarseMeshData() const; + void getCoarseNodesData(const RCP fineCoordinatesMap, + Array &coarseNodeCoarseGIDs, + Array &coarseNodeFineGIDs) const; - void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + std::vector> getCoarseMeshData() const; - void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGlobalTuple(const GO myGID, GO &i, GO &j, GO &k) const; - void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeLocalTuple(const LO myLID, LO &i, LO &j, LO &k) const; - void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getFineNodeGhostedTuple(const LO myLID, LO &i, LO &j, LO &k) const; - void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getFineNodeGID(const GO i, const GO j, const GO k, GO &myGID) const; - void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getFineNodeLID(const LO i, const LO j, const LO k, LO &myLID) const; - void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getCoarseNodeGlobalTuple(const GO myGID, GO &i, GO &j, GO &k) const; - void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getCoarseNodeLocalTuple(const LO myLID, LO &i, LO &j, LO &k) const; - void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGID(const GO i, const GO j, const GO k, GO &myGID) const; - void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeLID(const LO i, const LO j, const LO k, LO &myLID) const; - void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, + LO &myLID) const; - void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeFineLID(const LO i, const LO j, const LO k, + LO &myLID) const; - void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeFineLID(const LO i, const LO j, const LO k, + LO &myLID) const; - private: + void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, + LO &myLID) const; - }; +private: +}; -} //namespace MueLu +} // namespace MueLu #define MUELU_GLOBALLEXICOGRPHICINDEXMANAGER_SHORT #endif // MUELU_GLOBALLEXICOGRPHICINDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp index 8211e1a461bd..284fbd80b9ea 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_GlobalLexicographicIndexManager_def.hpp @@ -51,341 +51,398 @@ namespace MueLu { - template - GlobalLexicographicIndexManager:: - GlobalLexicographicIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, const Array CoarseRate, - const GO MinGlobalIndex) : - IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, GFineNodesPerDir, LFineNodesPerDir) { - - // Load coarse rate, being careful about formating. - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(CoarseRate.size() == 1) { - this->coarseRate[dim] = CoarseRate[0]; - } else if(CoarseRate.size() == this->numDimensions) { - this->coarseRate[dim] = CoarseRate[dim]; - } - } else { - this->coarseRate[dim] = 1; - } - } - - { - GO tmp = 0; - this->startIndices[2]= MinGlobalIndex / (this->gFineNodesPerDir[1]*this->gFineNodesPerDir[0]); - tmp = MinGlobalIndex % (this->gFineNodesPerDir[1]*this->gFineNodesPerDir[0]); - this->startIndices[1]= tmp / this->gFineNodesPerDir[0]; - this->startIndices[0]= tmp % this->gFineNodesPerDir[0]; - - for(int dim = 0; dim < 3; ++dim) { - this->startIndices[dim + 3] = this->startIndices[dim] + this->lFineNodesPerDir[dim] - 1; +template +GlobalLexicographicIndexManager:: + GlobalLexicographicIndexManager(const RCP> comm, + const bool coupled, const int NumDimensions, + const int interpolationOrder, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, + const Array CoarseRate, + const GO MinGlobalIndex) + : IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, + GFineNodesPerDir, LFineNodesPerDir) { + + // Load coarse rate, being careful about formating. + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (CoarseRate.size() == 1) { + this->coarseRate[dim] = CoarseRate[0]; + } else if (CoarseRate.size() == this->numDimensions) { + this->coarseRate[dim] = CoarseRate[dim]; } + } else { + this->coarseRate[dim] = 1; } - - this->computeMeshParameters(); - computeGlobalCoarseParameters(); - } - template - void GlobalLexicographicIndexManager:: - computeGlobalCoarseParameters() { - this->gNumCoarseNodes10 = this->gCoarseNodesPerDir[0]*this->gCoarseNodesPerDir[1]; - this->gNumCoarseNodes = this->gNumCoarseNodes10*this->gCoarseNodesPerDir[2]; + { + GO tmp = 0; + this->startIndices[2] = MinGlobalIndex / (this->gFineNodesPerDir[1] * + this->gFineNodesPerDir[0]); + tmp = MinGlobalIndex % + (this->gFineNodesPerDir[1] * this->gFineNodesPerDir[0]); + this->startIndices[1] = tmp / this->gFineNodesPerDir[0]; + this->startIndices[0] = tmp % this->gFineNodesPerDir[0]; + + for (int dim = 0; dim < 3; ++dim) { + this->startIndices[dim + 3] = + this->startIndices[dim] + this->lFineNodesPerDir[dim] - 1; + } } - template - void GlobalLexicographicIndexManager:: - getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, Array& ghostedNodeCoarsePIDs, Array&ghostedNodeCoarseGIDs) const { - - ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); - - // Find the GIDs, LIDs and PIDs of the coarse points on the fine mesh and coarse - // mesh as this data will be used to fill vertex2AggId and procWinner vectors. - Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes), + this->computeMeshParameters(); + computeGlobalCoarseParameters(); +} + +template +void GlobalLexicographicIndexManager::computeGlobalCoarseParameters() { + this->gNumCoarseNodes10 = + this->gCoarseNodesPerDir[0] * this->gCoarseNodesPerDir[1]; + this->gNumCoarseNodes = this->gNumCoarseNodes10 * this->gCoarseNodesPerDir[2]; +} + +template +void GlobalLexicographicIndexManager:: + getGhostedNodesData(const RCP fineMap, + Array &ghostedNodeCoarseLIDs, + Array &ghostedNodeCoarsePIDs, + Array &ghostedNodeCoarseGIDs) const { + + ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); + + // Find the GIDs, LIDs and PIDs of the coarse points on the fine mesh and + // coarse mesh as this data will be used to fill vertex2AggId and procWinner + // vectors. + Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes), lCoarseNodeFineGIDs(this->lNumCoarseNodes); - Array ghostedCoarseNodeFineGIDs(this->numGhostedNodes); - Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3), ijk(3); - LO currentIndex = -1, currentCoarseIndex = -1; - for(ijk[2] = 0; ijk[2] < this->ghostedNodesPerDir[2]; ++ijk[2]) { - for(ijk[1] = 0; ijk[1] < this->ghostedNodesPerDir[1]; ++ijk[1]) { - for(ijk[0] = 0; ijk[0] < this->ghostedNodesPerDir[0]; ++ijk[0]) { - currentIndex = ijk[2]*this->numGhostedNodes10 + ijk[1]*this->ghostedNodesPerDir[0] + ijk[0]; - ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + ijk[0]; - ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + ijk[1]; - ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + ijk[2]; - GO myCoarseGID = ghostedCoarseNodeCoarseIndices[0] - + ghostedCoarseNodeCoarseIndices[1]*this->gCoarseNodesPerDir[0] - + ghostedCoarseNodeCoarseIndices[2]*this->gNumCoarseNodes10; - ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; - GO myGID = 0, factor[3] = {}; - factor[2] = this->gNumFineNodes10; - factor[1] = this->gFineNodesPerDir[0]; - factor[0] = 1; - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(this->startIndices[dim] - this->offsets[dim] + ijk[dim]*this->coarseRate[dim] - < this->gFineNodesPerDir[dim] - 1) { - myGID += (this->startIndices[dim] - this->offsets[dim] - + ijk[dim]*this->coarseRate[dim])*factor[dim]; - } else { - myGID += (this->startIndices[dim] - this->offsets[dim] + (ijk[dim] - 1) - *this->coarseRate[dim] + this->endRate[dim])*factor[dim]; - } + Array ghostedCoarseNodeFineGIDs(this->numGhostedNodes); + Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3), + ijk(3); + LO currentIndex = -1, currentCoarseIndex = -1; + for (ijk[2] = 0; ijk[2] < this->ghostedNodesPerDir[2]; ++ijk[2]) { + for (ijk[1] = 0; ijk[1] < this->ghostedNodesPerDir[1]; ++ijk[1]) { + for (ijk[0] = 0; ijk[0] < this->ghostedNodesPerDir[0]; ++ijk[0]) { + currentIndex = ijk[2] * this->numGhostedNodes10 + + ijk[1] * this->ghostedNodesPerDir[0] + ijk[0]; + ghostedCoarseNodeCoarseIndices[0] = + this->startGhostedCoarseNode[0] + ijk[0]; + ghostedCoarseNodeCoarseIndices[1] = + this->startGhostedCoarseNode[1] + ijk[1]; + ghostedCoarseNodeCoarseIndices[2] = + this->startGhostedCoarseNode[2] + ijk[2]; + GO myCoarseGID = + ghostedCoarseNodeCoarseIndices[0] + + ghostedCoarseNodeCoarseIndices[1] * this->gCoarseNodesPerDir[0] + + ghostedCoarseNodeCoarseIndices[2] * this->gNumCoarseNodes10; + ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; + GO myGID = 0, factor[3] = {}; + factor[2] = this->gNumFineNodes10; + factor[1] = this->gFineNodesPerDir[0]; + factor[0] = 1; + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (this->startIndices[dim] - this->offsets[dim] + + ijk[dim] * this->coarseRate[dim] < + this->gFineNodesPerDir[dim] - 1) { + myGID += (this->startIndices[dim] - this->offsets[dim] + + ijk[dim] * this->coarseRate[dim]) * + factor[dim]; + } else { + myGID += (this->startIndices[dim] - this->offsets[dim] + + (ijk[dim] - 1) * this->coarseRate[dim] + + this->endRate[dim]) * + factor[dim]; } } - // lbv 02-08-2018: - // This check is simplistic and should be replaced by a condition that checks - // if the local tuple of the current index is wihin the range of local nodes - // or not in the range of ghosted nodes. - if((!this->ghostInterface[0] || ijk[0] != 0) && - (!this->ghostInterface[2] || ijk[1] != 0) && - (!this->ghostInterface[4] || ijk[2] != 0) && - (!this->ghostInterface[1] || ijk[0] != this->ghostedNodesPerDir[0] - 1) && - (!this->ghostInterface[3] || ijk[1] != this->ghostedNodesPerDir[1] - 1) && - (!this->ghostInterface[5] || ijk[2] != this->ghostedNodesPerDir[2] - 1)) { - - // this->getGhostedNodeFineLID(ijk[0], ijk[1], ijk[2], coarseNodeFineLID); - if(this->interpolationOrder_ == 0) { - currentCoarseIndex = 0; - if(this->ghostInterface[4]) { - currentCoarseIndex += (ijk[2] - 1)*this->lNumCoarseNodes10; - } else { - currentCoarseIndex += ijk[2]*this->lNumCoarseNodes10; - } - if(this->ghostInterface[2]) { - currentCoarseIndex += (ijk[1] - 1)*this->getLocalCoarseNodesInDir(0); - } else { - currentCoarseIndex += ijk[1]*this->getLocalCoarseNodesInDir(0); - } - if(this->ghostInterface[0]) { - currentCoarseIndex += ijk[0] - 1; - } else { - currentCoarseIndex += ijk[0]; - } + } + // lbv 02-08-2018: + // This check is simplistic and should be replaced by a condition that + // checks if the local tuple of the current index is wihin the range of + // local nodes or not in the range of ghosted nodes. + if ((!this->ghostInterface[0] || ijk[0] != 0) && + (!this->ghostInterface[2] || ijk[1] != 0) && + (!this->ghostInterface[4] || ijk[2] != 0) && + (!this->ghostInterface[1] || + ijk[0] != this->ghostedNodesPerDir[0] - 1) && + (!this->ghostInterface[3] || + ijk[1] != this->ghostedNodesPerDir[1] - 1) && + (!this->ghostInterface[5] || + ijk[2] != this->ghostedNodesPerDir[2] - 1)) { + + // this->getGhostedNodeFineLID(ijk[0], ijk[1], ijk[2], + // coarseNodeFineLID); + if (this->interpolationOrder_ == 0) { + currentCoarseIndex = 0; + if (this->ghostInterface[4]) { + currentCoarseIndex += (ijk[2] - 1) * this->lNumCoarseNodes10; } else { - this->getGhostedNodeCoarseLID(ijk[0], ijk[1], ijk[2], currentCoarseIndex); + currentCoarseIndex += ijk[2] * this->lNumCoarseNodes10; } - - lCoarseNodeCoarseGIDs[currentCoarseIndex] = myCoarseGID; - lCoarseNodeFineGIDs[currentCoarseIndex] = myGID; + if (this->ghostInterface[2]) { + currentCoarseIndex += + (ijk[1] - 1) * this->getLocalCoarseNodesInDir(0); + } else { + currentCoarseIndex += ijk[1] * this->getLocalCoarseNodesInDir(0); + } + if (this->ghostInterface[0]) { + currentCoarseIndex += ijk[0] - 1; + } else { + currentCoarseIndex += ijk[0]; + } + } else { + this->getGhostedNodeCoarseLID(ijk[0], ijk[1], ijk[2], + currentCoarseIndex); } - ghostedCoarseNodeFineGIDs[currentIndex] = myGID; - } - } - } - - RCP coarseMap = Xpetra::MapFactory::Build (fineMap->lib(), - this->gNumCoarseNodes, - lCoarseNodeCoarseGIDs(), - fineMap->getIndexBase(), - fineMap->getComm()); - - coarseMap->getRemoteIndexList(ghostedNodeCoarseGIDs(), - ghostedNodeCoarsePIDs(), - ghostedNodeCoarseLIDs()); - - } // End getGhostedMeshData - - template - void GlobalLexicographicIndexManager:: - getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const { - - // Allocate sufficient storage space for outputs - coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); - coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - - // Load all the GIDs on the fine mesh - ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); - - Array coarseStartIndices(3); - GO tmp; - for(int dim = 0; dim < 3; ++dim) { - coarseStartIndices[dim] = this->startIndices[dim] / this->coarseRate[dim]; - tmp = this->startIndices[dim] % this->coarseRate[dim]; - if(tmp > 0) {++coarseStartIndices[dim];} - } - // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs - LO fineLID; - Array lCoarseIndices(3); - Array gCoarseIndices(3); - for(LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { - this->getCoarseNodeLocalTuple(coarseLID, - lCoarseIndices[0], - lCoarseIndices[1], - lCoarseIndices[2]); - getCoarseNodeFineLID(lCoarseIndices[0], lCoarseIndices[1], lCoarseIndices[2], fineLID); - coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; - - // Get Coarse Global IJK - for(int dim=0; dim<3; dim++) { - gCoarseIndices[dim] = coarseStartIndices[dim] + lCoarseIndices[dim]; + lCoarseNodeCoarseGIDs[currentCoarseIndex] = myCoarseGID; + lCoarseNodeFineGIDs[currentCoarseIndex] = myGID; + } + ghostedCoarseNodeFineGIDs[currentIndex] = myGID; } - getCoarseNodeGID(gCoarseIndices[0], - gCoarseIndices[1], - gCoarseIndices[2], - coarseNodeCoarseGIDs[coarseLID] ); - } - } - template - std::vector > GlobalLexicographicIndexManager:: - getCoarseMeshData() const { - std::vector > coarseMeshData; - return coarseMeshData; - } + RCP coarseMap = Xpetra::MapFactory::Build( + fineMap->lib(), this->gNumCoarseNodes, lCoarseNodeCoarseGIDs(), + fineMap->getIndexBase(), fineMap->getComm()); - template - void GlobalLexicographicIndexManager:: - getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const { - GO tmp; - k = myGID / this->gNumFineNodes10; - tmp = myGID % this->gNumFineNodes10; - j = tmp / this->gFineNodesPerDir[0]; - i = tmp % this->gFineNodesPerDir[0]; - } + coarseMap->getRemoteIndexList(ghostedNodeCoarseGIDs(), + ghostedNodeCoarsePIDs(), + ghostedNodeCoarseLIDs()); - template - void GlobalLexicographicIndexManager:: - getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - } +} // End getGhostedMeshData - template - void GlobalLexicographicIndexManager:: - getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - - k += this->offsets[2]; - j += this->offsets[1]; - i += this->offsets[0]; - } +template +void GlobalLexicographicIndexManager:: + getCoarseNodesData(const RCP fineCoordinatesMap, + Array &coarseNodeCoarseGIDs, + Array &coarseNodeFineGIDs) const { - template - void GlobalLexicographicIndexManager:: - getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const { - myGID = k*this->gNumFineNodes10 + j*this->gFineNodesPerDir[0] + i; - } + // Allocate sufficient storage space for outputs + coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); + coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - template - void GlobalLexicographicIndexManager:: - getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->lNumFineNodes10 + j*this->lFineNodesPerDir[0] + i; - } + // Load all the GIDs on the fine mesh + ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); - template - void GlobalLexicographicIndexManager:: - getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const { - GO tmp; - k = myGID / this->gNumCoarseNodes10; - tmp = myGID % this->gNumCoarseNodes10; - j = tmp / this->gCoarseNodesPerDir[0]; - i = tmp % this->gCoarseNodesPerDir[0]; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumCoarseNodes10; - tmp = myLID % this->lNumCoarseNodes10; - j = tmp / this->lCoarseNodesPerDir[0]; - i = tmp % this->lCoarseNodesPerDir[0]; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const { - myGID = k*this->gNumCoarseNodes10 + j*this->gCoarseNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->lNumCoarseNodes10 + j*this->lCoarseNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - } - - template - void GlobalLexicographicIndexManager:: - getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { - // Assumptions: (i,j,k) is a tuple on the coarse mesh - // myLID is the corresponding local ID on the fine mesh - const LO multiplier[3] = {1, this->lFineNodesPerDir[0], this->lNumFineNodes10}; - const LO indices[3] = {i, j, k}; - - myLID = 0; - for(int dim = 0; dim < 3; ++dim) { - if((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && this->meshEdge[2*dim + 1]) { - // We are dealing with the last node on the mesh in direction dim - // so we can simply use the number of nodes on the fine mesh in that direction - myLID += (this->getLocalFineNodesInDir(dim) - 1)*multiplier[dim]; - } else { - myLID += (indices[dim]*this->getCoarseningRate(dim) + this->getCoarseNodeOffset(dim)) - *multiplier[dim]; - } + Array coarseStartIndices(3); + GO tmp; + for (int dim = 0; dim < 3; ++dim) { + coarseStartIndices[dim] = this->startIndices[dim] / this->coarseRate[dim]; + tmp = this->startIndices[dim] % this->coarseRate[dim]; + if (tmp > 0) { + ++coarseStartIndices[dim]; } } - template - void GlobalLexicographicIndexManager:: - getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { - LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); - LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); - LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); - myLID = 0; - if(ktmp*this->coarseRate[2] < this->lFineNodesPerDir[2]) { - myLID += ktmp*this->coarseRate[2]*this->lNumCoarseNodes10; - } else { - myLID += (this->lFineNodesPerDir[2] - 1)*this->lNumCoarseNodes10; - } - - if(jtmp*this->coarseRate[1] < this->lFineNodesPerDir[1]) { - myLID += jtmp*this->coarseRate[1]*this->lFineNodesPerDir[0]; - } else { - myLID += (this->lFineNodesPerDir[1] - 1)*this->lFineNodesPerDir[1]; + // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs + LO fineLID; + Array lCoarseIndices(3); + Array gCoarseIndices(3); + for (LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); + ++coarseLID) { + this->getCoarseNodeLocalTuple(coarseLID, lCoarseIndices[0], + lCoarseIndices[1], lCoarseIndices[2]); + getCoarseNodeFineLID(lCoarseIndices[0], lCoarseIndices[1], + lCoarseIndices[2], fineLID); + coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; + + // Get Coarse Global IJK + for (int dim = 0; dim < 3; dim++) { + gCoarseIndices[dim] = coarseStartIndices[dim] + lCoarseIndices[dim]; } - - if(itmp*this->coarseRate[0] < this->lFineNodesPerDir[0]) { - myLID += itmp*this->coarseRate[0]; + getCoarseNodeGID(gCoarseIndices[0], gCoarseIndices[1], gCoarseIndices[2], + coarseNodeCoarseGIDs[coarseLID]); + } +} + +template +std::vector> +GlobalLexicographicIndexManager::getCoarseMeshData() const { + std::vector> coarseMeshData; + return coarseMeshData; +} + +template +void GlobalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getFineNodeGlobalTuple(const GO myGID, + GO &i, GO &j, + GO &k) const { + GO tmp; + k = myGID / this->gNumFineNodes10; + tmp = myGID % this->gNumFineNodes10; + j = tmp / this->gFineNodesPerDir[0]; + i = tmp % this->gFineNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getFineNodeLocalTuple(const LO myLID, + LO &i, LO &j, + LO &k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getFineNodeGhostedTuple(const LO myLID, + LO &i, LO &j, + LO &k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; + + k += this->offsets[2]; + j += this->offsets[1]; + i += this->offsets[0]; +} + +template +void GlobalLexicographicIndexManager::getFineNodeGID(const GO i, + const GO j, + const GO k, + GO &myGID) const { + myGID = k * this->gNumFineNodes10 + j * this->gFineNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager::getFineNodeLID(const LO i, + const LO j, + const LO k, + LO &myLID) const { + myLID = k * this->lNumFineNodes10 + j * this->lFineNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getCoarseNodeGlobalTuple(const GO myGID, + GO &i, GO &j, + GO &k) const { + GO tmp; + k = myGID / this->gNumCoarseNodes10; + tmp = myGID % this->gNumCoarseNodes10; + j = tmp / this->gCoarseNodesPerDir[0]; + i = tmp % this->gCoarseNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getCoarseNodeLocalTuple(const LO myLID, + LO &i, LO &j, + LO &k) const { + LO tmp; + k = myLID / this->lNumCoarseNodes10; + tmp = myLID % this->lNumCoarseNodes10; + j = tmp / this->lCoarseNodesPerDir[0]; + i = tmp % this->lCoarseNodesPerDir[0]; +} + +template +void GlobalLexicographicIndexManager::getCoarseNodeGID(const GO i, + const GO j, + const GO k, + GO &myGID) const { + myGID = k * this->gNumCoarseNodes10 + j * this->gCoarseNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager::getCoarseNodeLID(const LO i, + const LO j, + const LO k, + LO &myLID) const { + myLID = k * this->lNumCoarseNodes10 + j * this->lCoarseNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager:: + getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, + LO &myLID) const { + myLID = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; +} + +template +void GlobalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getCoarseNodeFineLID(const LO i, + const LO j, + const LO k, + LO &myLID) const { + // Assumptions: (i,j,k) is a tuple on the coarse mesh + // myLID is the corresponding local ID on the fine mesh + const LO multiplier[3] = {1, this->lFineNodesPerDir[0], + this->lNumFineNodes10}; + const LO indices[3] = {i, j, k}; + + myLID = 0; + for (int dim = 0; dim < 3; ++dim) { + if ((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && + this->meshEdge[2 * dim + 1]) { + // We are dealing with the last node on the mesh in direction dim + // so we can simply use the number of nodes on the fine mesh in that + // direction + myLID += (this->getLocalFineNodesInDir(dim) - 1) * multiplier[dim]; } else { - myLID += this->lFineNodesPerDir[0] - 1; + myLID += (indices[dim] * this->getCoarseningRate(dim) + + this->getCoarseNodeOffset(dim)) * + multiplier[dim]; } } +} + +template +void GlobalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getGhostedNodeFineLID(const LO i, + const LO j, + const LO k, + LO &myLID) const { + LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); + LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); + LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); + myLID = 0; + if (ktmp * this->coarseRate[2] < this->lFineNodesPerDir[2]) { + myLID += ktmp * this->coarseRate[2] * this->lNumCoarseNodes10; + } else { + myLID += (this->lFineNodesPerDir[2] - 1) * this->lNumCoarseNodes10; + } - template - void GlobalLexicographicIndexManager:: - getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const { - LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); - LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); - LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); - myLID = ktmp*this->lNumCoarseNodes10 + jtmp*this->lCoarseNodesPerDir[0] + itmp; + if (jtmp * this->coarseRate[1] < this->lFineNodesPerDir[1]) { + myLID += jtmp * this->coarseRate[1] * this->lFineNodesPerDir[0]; + } else { + myLID += (this->lFineNodesPerDir[1] - 1) * this->lFineNodesPerDir[1]; } -} //namespace MueLu + if (itmp * this->coarseRate[0] < this->lFineNodesPerDir[0]) { + myLID += itmp * this->coarseRate[0]; + } else { + myLID += this->lFineNodesPerDir[0] - 1; + } +} + +template +void GlobalLexicographicIndexManager:: + getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, + LO &myLID) const { + LO itmp = i - (this->offsets[0] > 0 ? 1 : 0); + LO jtmp = j - (this->offsets[1] > 0 ? 1 : 0); + LO ktmp = k - (this->offsets[2] > 0 ? 1 : 0); + myLID = ktmp * this->lNumCoarseNodes10 + jtmp * this->lCoarseNodesPerDir[0] + + itmp; +} + +} // namespace MueLu #endif /* MUELU_GLOBALLEXICOGRAPHICINDEXMANAGER_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp index a0e809aaba20..94c62af688bc 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_decl.hpp @@ -77,93 +77,102 @@ namespace MueLu { correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class LocalLexicographicIndexManager : public IndexManager { +template +class LocalLexicographicIndexManager + : public IndexManager { #undef MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: +public: + LocalLexicographicIndexManager() = default; - LocalLexicographicIndexManager() = default; + LocalLexicographicIndexManager(const RCP> comm, + const bool coupled, const int NumDimensions, + const int interpolationOrder, const int MyRank, + const int NumRanks, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, + const Array CoarseRate, + const Array MeshData); - LocalLexicographicIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, - const Array CoarseRate, const Array MeshData); + virtual ~LocalLexicographicIndexManager() {} - virtual ~LocalLexicographicIndexManager() {} + void computeGlobalCoarseParameters(); - void computeGlobalCoarseParameters(); + void getGhostedNodesData(const RCP fineMap, + Array &ghostedNodeCoarseLIDs, + Array &ghostedNodeCoarsePIDs, + Array &ghostedNodeCoarseGIDs) const; - void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const; + void getCoarseNodesData(const RCP fineCoordinatesMap, + Array &coarseNodeCoarseGIDs, + Array &coarseNodeFineGIDs) const; - void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const; + std::vector> getCoarseMeshData() const; - std::vector > getCoarseMeshData() const; + void getFineNodeGlobalTuple(const GO myGID, GO &i, GO &j, GO &k) const; - void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getFineNodeLocalTuple(const LO myLID, LO &i, LO &j, LO &k) const; - void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGhostedTuple(const LO myLID, LO &i, LO &j, LO &k) const; - void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGID(const GO i, const GO j, const GO k, GO &myGID) const; - void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getFineNodeLID(const LO i, const LO j, const LO k, LO &myLID) const; - void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGlobalTuple(const GO myGID, GO &i, GO &j, GO &k) const; - void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getCoarseNodeLocalTuple(const LO myLID, LO &i, LO &j, LO &k) const; - void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getCoarseNodeGID(const GO i, const GO j, const GO k, GO &myGID) const; - void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getCoarseNodeLID(const LO i, const LO j, const LO k, LO &myLID) const; - void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, + LO &myLID) const; - void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeFineLID(const LO i, const LO j, const LO k, + LO &myLID) const; - void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeFineLID(const LO i, const LO j, const LO k, + LO &myLID) const; - void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, + LO &myLID) const; - void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; +private: + const int myRank; ///< Local rank ID. + const int numRanks; ///< Number of ranks used to decompose the problem. - private: + // Iterator delimiting the entries in meshData that correspond to the block + // that owns the local part of the mesh. + typename std::vector>::iterator myBlockStart, myBlockEnd; - const int myRank; ///< Local rank ID. - const int numRanks; ///< Number of ranks used to decompose the problem. + int pi, pj, pk; ///< Number of processors in each diretcion. - // Iterator delimiting the entries in meshData that correspond to the block that owns the local - // part of the mesh. - typename std::vector >::iterator myBlockStart, myBlockEnd; + int numBlocks; ///< Number of mesh block. + int myBlock; ///< local mesh block ID. - int pi, pj, pk; ///< Number of processors in each diretcion. + int myRankIndex; ///< local process index for record in meshData after + ///< sorting. + Array rankIndices; ///< mapping between rank ID and reordered rank ID. + std::vector> + meshData; ///< layout of indices accross all processes. + std::vector> + coarseMeshData; ///< layout of indices accross all processes after + ///< coarsening. - int numBlocks; ///< Number of mesh block. - int myBlock; ///< local mesh block ID. + void sortLocalLexicographicData(); - int myRankIndex; ///< local process index for record in meshData after sorting. - Array rankIndices; ///< mapping between rank ID and reordered rank ID. - std::vector > meshData; ///< layout of indices accross all processes. - std::vector > coarseMeshData; ///< layout of indices accross all processes after coarsening. + void computeCoarseLocalLexicographicData(); - void sortLocalLexicographicData(); + void getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, + const LO kGhosted, + const Array coarseNodeFineIndices, + GO &myGID, LO &myPID, LO &myLID) const; +}; - void computeCoarseLocalLexicographicData(); - - void getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, const LO kGhosted, - const Array coarseNodeFineIndices, GO& myGID, LO& myPID, - LO& myLID) const; - - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_SHORT #endif // MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp index d3c3d8448630..41ec22aa6dd7 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/coupled/MueLu_LocalLexicographicIndexManager_def.hpp @@ -51,448 +51,498 @@ namespace MueLu { - template - LocalLexicographicIndexManager:: - LocalLexicographicIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, const Array LFineNodesPerDir, - const Array CoarseRate, const Array MeshData) : - IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, GFineNodesPerDir, LFineNodesPerDir), - myRank(MyRank), numRanks(NumRanks) { - - // Allocate data based on user input - meshData.resize(numRanks); - rankIndices.resize(numRanks); - coarseMeshData.resize(numRanks); - - // Load coarse rate, being careful about formating - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(CoarseRate.size() == 1) { - this->coarseRate[dim] = CoarseRate[0]; - } else if(CoarseRate.size() == this->numDimensions) { - this->coarseRate[dim] = CoarseRate[dim]; - } - } else { - this->coarseRate[dim] = 1; - } - } - - // Load meshData for local lexicographic case - for(int rank = 0; rank < numRanks; ++rank) { - meshData[rank].resize(10); - for(int entry = 0; entry < 10; ++entry) { - meshData[rank][entry] = MeshData[10*rank + entry]; +template +LocalLexicographicIndexManager:: + LocalLexicographicIndexManager(const RCP> comm, + const bool coupled, const int NumDimensions, + const int interpolationOrder, + const int MyRank, const int NumRanks, + const Array GFineNodesPerDir, + const Array LFineNodesPerDir, + const Array CoarseRate, + const Array MeshData) + : IndexManager(comm, coupled, false, NumDimensions, interpolationOrder, + GFineNodesPerDir, LFineNodesPerDir), + myRank(MyRank), numRanks(NumRanks) { + + // Allocate data based on user input + meshData.resize(numRanks); + rankIndices.resize(numRanks); + coarseMeshData.resize(numRanks); + + // Load coarse rate, being careful about formating + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (CoarseRate.size() == 1) { + this->coarseRate[dim] = CoarseRate[0]; + } else if (CoarseRate.size() == this->numDimensions) { + this->coarseRate[dim] = CoarseRate[dim]; } + } else { + this->coarseRate[dim] = 1; } + } - if(this->coupled_) { - myBlock = meshData[myRank][2]; - sortLocalLexicographicData(); - } - - // Start simple parameter calculation - myRankIndex = rankIndices[myRank]; - for(int dim = 0; dim < 3; ++dim) { - this->startIndices[dim] = meshData[myRankIndex][2*dim + 3]; - this->startIndices[dim + 3] = meshData[myRankIndex][2*dim + 4]; + // Load meshData for local lexicographic case + for (int rank = 0; rank < numRanks; ++rank) { + meshData[rank].resize(10); + for (int entry = 0; entry < 10; ++entry) { + meshData[rank][entry] = MeshData[10 * rank + entry]; } + } - this->computeMeshParameters(); - computeGlobalCoarseParameters(); - computeCoarseLocalLexicographicData(); - } // Constructor - - template - void LocalLexicographicIndexManager:: - computeGlobalCoarseParameters() { - this->gNumCoarseNodes10 = this->gCoarseNodesPerDir[0]*this->gCoarseNodesPerDir[1]; - this->gNumCoarseNodes = this->gNumCoarseNodes10*this->gCoarseNodesPerDir[2]; + if (this->coupled_) { + myBlock = meshData[myRank][2]; + sortLocalLexicographicData(); } - template - void LocalLexicographicIndexManager:: - getGhostedNodesData(const RCP/* fineMap */, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const { - - // First we allocated memory for the outputs - ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); - - // Now the tricky part starts, the coarse nodes / ghosted coarse nodes need to be imported. - // This requires finding what their GID on the fine mesh is. They need to be ordered - // lexicographically to allow for fast sweeps through the mesh. - - // We loop over all ghosted coarse nodes by increasing global lexicographic order - Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3); - Array lCoarseNodeCoarseIndices(3); - Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes); - LO currentIndex = -1, countCoarseNodes = 0; - for(int k = 0; k < this->ghostedNodesPerDir[2]; ++k) { - for(int j = 0; j < this->ghostedNodesPerDir[1]; ++j) { - for(int i = 0; i < this->ghostedNodesPerDir[0]; ++i) { - currentIndex = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + i; - ghostedCoarseNodeFineIndices[0] = ghostedCoarseNodeCoarseIndices[0]*this->coarseRate[0]; - if(ghostedCoarseNodeFineIndices[0] > this->gFineNodesPerDir[0] - 1) { - ghostedCoarseNodeFineIndices[0] = this->gFineNodesPerDir[0] - 1; - } - ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + j; - ghostedCoarseNodeFineIndices[1] = ghostedCoarseNodeCoarseIndices[1]*this->coarseRate[1]; - if(ghostedCoarseNodeFineIndices[1] > this->gFineNodesPerDir[1] - 1) { - ghostedCoarseNodeFineIndices[1] = this->gFineNodesPerDir[1] - 1; - } - ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + k; - ghostedCoarseNodeFineIndices[2] = ghostedCoarseNodeCoarseIndices[2]*this->coarseRate[2]; - if(ghostedCoarseNodeFineIndices[2] > this->gFineNodesPerDir[2] - 1) { - ghostedCoarseNodeFineIndices[2] = this->gFineNodesPerDir[2] - 1; - } + // Start simple parameter calculation + myRankIndex = rankIndices[myRank]; + for (int dim = 0; dim < 3; ++dim) { + this->startIndices[dim] = meshData[myRankIndex][2 * dim + 3]; + this->startIndices[dim + 3] = meshData[myRankIndex][2 * dim + 4]; + } - GO myGID = -1, myCoarseGID = -1; - LO myLID = -1, myPID = -1, myCoarseLID = -1; - getGIDLocalLexicographic(i, j, k, ghostedCoarseNodeFineIndices, myGID, myPID, myLID); + this->computeMeshParameters(); + computeGlobalCoarseParameters(); + computeCoarseLocalLexicographicData(); +} // Constructor + +template +void LocalLexicographicIndexManager::computeGlobalCoarseParameters() { + this->gNumCoarseNodes10 = + this->gCoarseNodesPerDir[0] * this->gCoarseNodesPerDir[1]; + this->gNumCoarseNodes = this->gNumCoarseNodes10 * this->gCoarseNodesPerDir[2]; +} + +template +void LocalLexicographicIndexManager:: + getGhostedNodesData(const RCP /* fineMap */, + Array &ghostedNodeCoarseLIDs, + Array &ghostedNodeCoarsePIDs, + Array &ghostedNodeCoarseGIDs) const { + + // First we allocated memory for the outputs + ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarseGIDs.resize(this->numGhostedNodes); + + // Now the tricky part starts, the coarse nodes / ghosted coarse nodes need to + // be imported. This requires finding what their GID on the fine mesh is. They + // need to be ordered lexicographically to allow for fast sweeps through the + // mesh. + + // We loop over all ghosted coarse nodes by increasing global lexicographic + // order + Array ghostedCoarseNodeCoarseIndices(3), ghostedCoarseNodeFineIndices(3); + Array lCoarseNodeCoarseIndices(3); + Array lCoarseNodeCoarseGIDs(this->lNumCoarseNodes); + LO currentIndex = -1, countCoarseNodes = 0; + for (int k = 0; k < this->ghostedNodesPerDir[2]; ++k) { + for (int j = 0; j < this->ghostedNodesPerDir[1]; ++j) { + for (int i = 0; i < this->ghostedNodesPerDir[0]; ++i) { + currentIndex = + k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; + ghostedCoarseNodeCoarseIndices[0] = this->startGhostedCoarseNode[0] + i; + ghostedCoarseNodeFineIndices[0] = + ghostedCoarseNodeCoarseIndices[0] * this->coarseRate[0]; + if (ghostedCoarseNodeFineIndices[0] > this->gFineNodesPerDir[0] - 1) { + ghostedCoarseNodeFineIndices[0] = this->gFineNodesPerDir[0] - 1; + } + ghostedCoarseNodeCoarseIndices[1] = this->startGhostedCoarseNode[1] + j; + ghostedCoarseNodeFineIndices[1] = + ghostedCoarseNodeCoarseIndices[1] * this->coarseRate[1]; + if (ghostedCoarseNodeFineIndices[1] > this->gFineNodesPerDir[1] - 1) { + ghostedCoarseNodeFineIndices[1] = this->gFineNodesPerDir[1] - 1; + } + ghostedCoarseNodeCoarseIndices[2] = this->startGhostedCoarseNode[2] + k; + ghostedCoarseNodeFineIndices[2] = + ghostedCoarseNodeCoarseIndices[2] * this->coarseRate[2]; + if (ghostedCoarseNodeFineIndices[2] > this->gFineNodesPerDir[2] - 1) { + ghostedCoarseNodeFineIndices[2] = this->gFineNodesPerDir[2] - 1; + } - int rankIndex = rankIndices[myPID]; - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - lCoarseNodeCoarseIndices[dim] = ghostedCoarseNodeCoarseIndices[dim] - - coarseMeshData[rankIndex][3 + 2*dim]; - } - } - LO myRankIndexCoarseNodesInDir0 = coarseMeshData[rankIndex][4] - - coarseMeshData[rankIndex][3] + 1; - LO myRankIndexCoarseNodes10 = (coarseMeshData[rankIndex][6] - - coarseMeshData[rankIndex][5] + 1) - *myRankIndexCoarseNodesInDir0; - myCoarseLID = lCoarseNodeCoarseIndices[2]*myRankIndexCoarseNodes10 - + lCoarseNodeCoarseIndices[1]*myRankIndexCoarseNodesInDir0 - + lCoarseNodeCoarseIndices[0]; - myCoarseGID = myCoarseLID + coarseMeshData[rankIndex][9]; - - ghostedNodeCoarseLIDs[currentIndex] = myCoarseLID; - ghostedNodeCoarsePIDs[currentIndex] = myPID; - ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; - - if(myPID == myRank) { - lCoarseNodeCoarseGIDs[countCoarseNodes] = myCoarseGID; - ++countCoarseNodes; + GO myGID = -1, myCoarseGID = -1; + LO myLID = -1, myPID = -1, myCoarseLID = -1; + getGIDLocalLexicographic(i, j, k, ghostedCoarseNodeFineIndices, myGID, + myPID, myLID); + + int rankIndex = rankIndices[myPID]; + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + lCoarseNodeCoarseIndices[dim] = + ghostedCoarseNodeCoarseIndices[dim] - + coarseMeshData[rankIndex][3 + 2 * dim]; } } + LO myRankIndexCoarseNodesInDir0 = + coarseMeshData[rankIndex][4] - coarseMeshData[rankIndex][3] + 1; + LO myRankIndexCoarseNodes10 = + (coarseMeshData[rankIndex][6] - coarseMeshData[rankIndex][5] + 1) * + myRankIndexCoarseNodesInDir0; + myCoarseLID = + lCoarseNodeCoarseIndices[2] * myRankIndexCoarseNodes10 + + lCoarseNodeCoarseIndices[1] * myRankIndexCoarseNodesInDir0 + + lCoarseNodeCoarseIndices[0]; + myCoarseGID = myCoarseLID + coarseMeshData[rankIndex][9]; + + ghostedNodeCoarseLIDs[currentIndex] = myCoarseLID; + ghostedNodeCoarsePIDs[currentIndex] = myPID; + ghostedNodeCoarseGIDs[currentIndex] = myCoarseGID; + + if (myPID == myRank) { + lCoarseNodeCoarseGIDs[countCoarseNodes] = myCoarseGID; + ++countCoarseNodes; + } } } } +} - template - void LocalLexicographicIndexManager:: - getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const { - - // Allocate sufficient storage space for outputs - coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); - coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - - // Load all the GIDs on the fine mesh - ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); +template +void LocalLexicographicIndexManager:: + getCoarseNodesData(const RCP fineCoordinatesMap, + Array &coarseNodeCoarseGIDs, + Array &coarseNodeFineGIDs) const { - Array coarseStartIndices(3); - for(int dim = 0; dim < 3; ++dim) { - coarseStartIndices[dim] = this->coarseMeshData[myRankIndex][2*dim + 3]; - } + // Allocate sufficient storage space for outputs + coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); + coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs - LO fineLID; - for(LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { - Array coarseIndices(3), fineIndices(3), gCoarseIndices(3); - this->getCoarseNodeLocalTuple(coarseLID, - coarseIndices[0], - coarseIndices[1], - coarseIndices[2]); - getCoarseNodeFineLID(coarseIndices[0],coarseIndices[1],coarseIndices[2],fineLID); - coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; - - LO myRankIndexCoarseNodesInDir0 = coarseMeshData[myRankIndex][4] - - coarseMeshData[myRankIndex][3] + 1; - LO myRankIndexCoarseNodes10 = (coarseMeshData[myRankIndex][6] - - coarseMeshData[myRankIndex][5] + 1) - *myRankIndexCoarseNodesInDir0; - LO myCoarseLID = coarseIndices[2]*myRankIndexCoarseNodes10 - + coarseIndices[1]*myRankIndexCoarseNodesInDir0 - + coarseIndices[0]; - GO myCoarseGID = myCoarseLID + coarseMeshData[myRankIndex][9]; - coarseNodeCoarseGIDs[coarseLID] = myCoarseGID; - } + // Load all the GIDs on the fine mesh + ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); + Array coarseStartIndices(3); + for (int dim = 0; dim < 3; ++dim) { + coarseStartIndices[dim] = this->coarseMeshData[myRankIndex][2 * dim + 3]; } - template - void LocalLexicographicIndexManager:: - getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, const LO kGhosted, - const Array coarseNodeFineIndices, - GO& myGID, LO& myPID, LO& myLID) const { - - LO ni = -1, nj = -1, li = -1, lj = -1, lk = -1; - LO myRankGuess = myRankIndex; - // We try to make a logical guess as to which PID owns the current coarse node - if(iGhosted == 0 && this->ghostInterface[0]) { - --myRankGuess; - } else if((iGhosted == this->ghostedNodesPerDir[0] - 1) && this->ghostInterface[1]) { - ++myRankGuess; - } - if(jGhosted == 0 && this->ghostInterface[2]) { - myRankGuess -= pi; - } else if((jGhosted == this->ghostedNodesPerDir[1] - 1) && this->ghostInterface[3]) { - myRankGuess += pi; - } - if(kGhosted == 0 && this->ghostInterface[4]) { - myRankGuess -= pj*pi; - } else if((kGhosted == this->ghostedNodesPerDir[2] - 1) && this->ghostInterface[5]) { - myRankGuess += pj*pi; - } - if(coarseNodeFineIndices[0] >= meshData[myRankGuess][3] - && coarseNodeFineIndices[0] <= meshData[myRankGuess][4] - && coarseNodeFineIndices[1] >= meshData[myRankGuess][5] - && coarseNodeFineIndices[1] <= meshData[myRankGuess][6] - && coarseNodeFineIndices[2] >= meshData[myRankGuess][7] - && coarseNodeFineIndices[2] <= meshData[myRankGuess][8] - && myRankGuess < numRanks - 1) { - myPID = meshData[myRankGuess][0]; - ni = meshData[myRankGuess][4] - meshData[myRankGuess][3] + 1; - nj = meshData[myRankGuess][6] - meshData[myRankGuess][5] + 1; - li = coarseNodeFineIndices[0] - meshData[myRankGuess][3]; - lj = coarseNodeFineIndices[1] - meshData[myRankGuess][5]; - lk = coarseNodeFineIndices[2] - meshData[myRankGuess][7]; - myLID = lk*nj*ni + lj*ni + li; - myGID = meshData[myRankGuess][9] + myLID; - } else { // The guess failed, let us use the heavy artilery: std::find_if() - // It could be interesting to monitor how many times this branch of the code gets - // used as it is far more expensive than the above one... - auto nodeRank = std::find_if(myBlockStart, myBlockEnd, - [coarseNodeFineIndices](const std::vector& vec){ - if(coarseNodeFineIndices[0] >= vec[3] - && coarseNodeFineIndices[0] <= vec[4] - && coarseNodeFineIndices[1] >= vec[5] - && coarseNodeFineIndices[1] <= vec[6] - && coarseNodeFineIndices[2] >= vec[7] - && coarseNodeFineIndices[2] <= vec[8]) { - return true; - } else { - return false; - } - }); - myPID = (*nodeRank)[0]; - ni = (*nodeRank)[4] - (*nodeRank)[3] + 1; - nj = (*nodeRank)[6] - (*nodeRank)[5] + 1; - li = coarseNodeFineIndices[0] - (*nodeRank)[3]; - lj = coarseNodeFineIndices[1] - (*nodeRank)[5]; - lk = coarseNodeFineIndices[2] - (*nodeRank)[7]; - myLID = lk*nj*ni + lj*ni + li; - myGID = (*nodeRank)[9] + myLID; - } + // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs + LO fineLID; + for (LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); + ++coarseLID) { + Array coarseIndices(3), fineIndices(3), gCoarseIndices(3); + this->getCoarseNodeLocalTuple(coarseLID, coarseIndices[0], coarseIndices[1], + coarseIndices[2]); + getCoarseNodeFineLID(coarseIndices[0], coarseIndices[1], coarseIndices[2], + fineLID); + coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; + + LO myRankIndexCoarseNodesInDir0 = + coarseMeshData[myRankIndex][4] - coarseMeshData[myRankIndex][3] + 1; + LO myRankIndexCoarseNodes10 = + (coarseMeshData[myRankIndex][6] - coarseMeshData[myRankIndex][5] + 1) * + myRankIndexCoarseNodesInDir0; + LO myCoarseLID = coarseIndices[2] * myRankIndexCoarseNodes10 + + coarseIndices[1] * myRankIndexCoarseNodesInDir0 + + coarseIndices[0]; + GO myCoarseGID = myCoarseLID + coarseMeshData[myRankIndex][9]; + coarseNodeCoarseGIDs[coarseLID] = myCoarseGID; } - - template - void LocalLexicographicIndexManager:: - sortLocalLexicographicData() { - - std::sort(meshData.begin(), meshData.end(), - [](const std::vector& a, const std::vector& b)->bool { - // The below function sorts ranks by blockID, kmin, jmin and imin - if(a[2] < b[2]) { +} + +template +void LocalLexicographicIndexManager:: + getGIDLocalLexicographic(const LO iGhosted, const LO jGhosted, + const LO kGhosted, + const Array coarseNodeFineIndices, GO &myGID, + LO &myPID, LO &myLID) const { + + LO ni = -1, nj = -1, li = -1, lj = -1, lk = -1; + LO myRankGuess = myRankIndex; + // We try to make a logical guess as to which PID owns the current coarse node + if (iGhosted == 0 && this->ghostInterface[0]) { + --myRankGuess; + } else if ((iGhosted == this->ghostedNodesPerDir[0] - 1) && + this->ghostInterface[1]) { + ++myRankGuess; + } + if (jGhosted == 0 && this->ghostInterface[2]) { + myRankGuess -= pi; + } else if ((jGhosted == this->ghostedNodesPerDir[1] - 1) && + this->ghostInterface[3]) { + myRankGuess += pi; + } + if (kGhosted == 0 && this->ghostInterface[4]) { + myRankGuess -= pj * pi; + } else if ((kGhosted == this->ghostedNodesPerDir[2] - 1) && + this->ghostInterface[5]) { + myRankGuess += pj * pi; + } + if (coarseNodeFineIndices[0] >= meshData[myRankGuess][3] && + coarseNodeFineIndices[0] <= meshData[myRankGuess][4] && + coarseNodeFineIndices[1] >= meshData[myRankGuess][5] && + coarseNodeFineIndices[1] <= meshData[myRankGuess][6] && + coarseNodeFineIndices[2] >= meshData[myRankGuess][7] && + coarseNodeFineIndices[2] <= meshData[myRankGuess][8] && + myRankGuess < numRanks - 1) { + myPID = meshData[myRankGuess][0]; + ni = meshData[myRankGuess][4] - meshData[myRankGuess][3] + 1; + nj = meshData[myRankGuess][6] - meshData[myRankGuess][5] + 1; + li = coarseNodeFineIndices[0] - meshData[myRankGuess][3]; + lj = coarseNodeFineIndices[1] - meshData[myRankGuess][5]; + lk = coarseNodeFineIndices[2] - meshData[myRankGuess][7]; + myLID = lk * nj * ni + lj * ni + li; + myGID = meshData[myRankGuess][9] + myLID; + } else { // The guess failed, let us use the heavy artilery: std::find_if() + // It could be interesting to monitor how many times this branch of the code + // gets used as it is far more expensive than the above one... + auto nodeRank = + std::find_if(myBlockStart, myBlockEnd, + [coarseNodeFineIndices](const std::vector &vec) { + if (coarseNodeFineIndices[0] >= vec[3] && + coarseNodeFineIndices[0] <= vec[4] && + coarseNodeFineIndices[1] >= vec[5] && + coarseNodeFineIndices[1] <= vec[6] && + coarseNodeFineIndices[2] >= vec[7] && + coarseNodeFineIndices[2] <= vec[8]) { + return true; + } else { + return false; + } + }); + myPID = (*nodeRank)[0]; + ni = (*nodeRank)[4] - (*nodeRank)[3] + 1; + nj = (*nodeRank)[6] - (*nodeRank)[5] + 1; + li = coarseNodeFineIndices[0] - (*nodeRank)[3]; + lj = coarseNodeFineIndices[1] - (*nodeRank)[5]; + lk = coarseNodeFineIndices[2] - (*nodeRank)[7]; + myLID = lk * nj * ni + lj * ni + li; + myGID = (*nodeRank)[9] + myLID; + } +} + +template +void LocalLexicographicIndexManager::sortLocalLexicographicData() { + + std::sort(meshData.begin(), meshData.end(), + [](const std::vector &a, const std::vector &b) -> bool { + // The below function sorts ranks by blockID, kmin, jmin and imin + if (a[2] < b[2]) { + return true; + } else if (a[2] == b[2]) { + if (a[7] < b[7]) { return true; - } else if(a[2] == b[2]) { - if(a[7] < b[7]) { + } else if (a[7] == b[7]) { + if (a[5] < b[5]) { return true; - } else if(a[7] == b[7]) { - if(a[5] < b[5]) { + } else if (a[5] == b[5]) { + if (a[3] < b[3]) { return true; - } else if(a[5] == b[5]) { - if(a[3] < b[3]) {return true;} } } } - return false; - }); - - numBlocks = meshData[numRanks - 1][2] + 1; - // Find the range of the current block - myBlockStart = std::lower_bound(meshData.begin(), meshData.end(), myBlock - 1, - [] (const std::vector& vec, const GO val)->bool { - return (vec[2] < val) ? true : false; - }); - myBlockEnd = std::upper_bound(meshData.begin(), meshData.end(), myBlock, - [] (const GO val, const std::vector& vec)->bool { - return (val < vec[2]) ? true : false; - }); - // Assuming that i,j,k and ranges are split in pi, pj and pk processors - // we search for these numbers as they will allow us to find quickly the PID of processors - // owning ghost nodes. - auto myKEnd = std::upper_bound(myBlockStart, myBlockEnd, (*myBlockStart)[3], - [] (const GO val, const std::vector& vec)->bool { - return (val < vec[7]) ? true : false; - }); - auto myJEnd = std::upper_bound(myBlockStart, myKEnd, (*myBlockStart)[3], - [] (const GO val, const std::vector& vec)->bool { - return (val < vec[5]) ? true : false; - }); - pi = std::distance(myBlockStart, myJEnd); - pj = std::distance(myBlockStart, myKEnd) / pi; - pk = std::distance(myBlockStart, myBlockEnd) / (pj*pi); - - // We also look for the index of the local rank in the current block. - const int MyRank = myRank; - myRankIndex = std::distance(meshData.begin(), - std::find_if(myBlockStart, myBlockEnd, - [MyRank] (const std::vector& vec)->bool { - return (vec[0] == MyRank) ? true : false; - }) - ); - // We also construct a mapping of rank to rankIndex in the meshData vector, - // this will allow us to access data quickly later on. - for(int rankIndex = 0; rankIndex < numRanks; ++rankIndex) { - rankIndices[meshData[rankIndex][0]] = rankIndex; - } + } + return false; + }); + + numBlocks = meshData[numRanks - 1][2] + 1; + // Find the range of the current block + myBlockStart = + std::lower_bound(meshData.begin(), meshData.end(), myBlock - 1, + [](const std::vector &vec, const GO val) -> bool { + return (vec[2] < val) ? true : false; + }); + myBlockEnd = + std::upper_bound(meshData.begin(), meshData.end(), myBlock, + [](const GO val, const std::vector &vec) -> bool { + return (val < vec[2]) ? true : false; + }); + // Assuming that i,j,k and ranges are split in pi, pj and pk processors + // we search for these numbers as they will allow us to find quickly the PID + // of processors owning ghost nodes. + auto myKEnd = + std::upper_bound(myBlockStart, myBlockEnd, (*myBlockStart)[3], + [](const GO val, const std::vector &vec) -> bool { + return (val < vec[7]) ? true : false; + }); + auto myJEnd = + std::upper_bound(myBlockStart, myKEnd, (*myBlockStart)[3], + [](const GO val, const std::vector &vec) -> bool { + return (val < vec[5]) ? true : false; + }); + pi = std::distance(myBlockStart, myJEnd); + pj = std::distance(myBlockStart, myKEnd) / pi; + pk = std::distance(myBlockStart, myBlockEnd) / (pj * pi); + + // We also look for the index of the local rank in the current block. + const int MyRank = myRank; + myRankIndex = + std::distance(meshData.begin(), + std::find_if(myBlockStart, myBlockEnd, + [MyRank](const std::vector &vec) -> bool { + return (vec[0] == MyRank) ? true : false; + })); + // We also construct a mapping of rank to rankIndex in the meshData vector, + // this will allow us to access data quickly later on. + for (int rankIndex = 0; rankIndex < numRanks; ++rankIndex) { + rankIndices[meshData[rankIndex][0]] = rankIndex; } - - template - void LocalLexicographicIndexManager:: - computeCoarseLocalLexicographicData() { - Array rankOffset(3); - for(int rank = 0; rank < numRanks; ++rank) { - coarseMeshData[rank].resize(10); - coarseMeshData[rank][0] = meshData[rank][0]; - coarseMeshData[rank][1] = meshData[rank][1]; - coarseMeshData[rank][2] = meshData[rank][2]; - for(int dim = 0; dim < 3; ++dim) { - coarseMeshData[rank][3 + 2*dim] = meshData[rank][3 + 2*dim] / this->coarseRate[dim]; - if(meshData[rank][3 + 2*dim] % this->coarseRate[dim] > 0) { - ++coarseMeshData[rank][3 + 2*dim]; - } - coarseMeshData[rank][3 + 2*dim + 1] = meshData[rank][3 + 2*dim + 1] / this->coarseRate[dim]; - if(meshData[rank][3 + 2*dim + 1] == this->gFineNodesPerDir[dim] - 1 && - meshData[rank][3 + 2*dim + 1] % this->coarseRate[dim] > 0) { - //this->endRate[dim] < this->coarseRate[dim]) { - ++coarseMeshData[rank][3 + 2*dim + 1]; - } +} + +template +void LocalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::computeCoarseLocalLexicographicData() { + Array rankOffset(3); + for (int rank = 0; rank < numRanks; ++rank) { + coarseMeshData[rank].resize(10); + coarseMeshData[rank][0] = meshData[rank][0]; + coarseMeshData[rank][1] = meshData[rank][1]; + coarseMeshData[rank][2] = meshData[rank][2]; + for (int dim = 0; dim < 3; ++dim) { + coarseMeshData[rank][3 + 2 * dim] = + meshData[rank][3 + 2 * dim] / this->coarseRate[dim]; + if (meshData[rank][3 + 2 * dim] % this->coarseRate[dim] > 0) { + ++coarseMeshData[rank][3 + 2 * dim]; } - if(rank > 0) { - coarseMeshData[rank][9] = coarseMeshData[rank - 1][9] - + (coarseMeshData[rank - 1][8] - coarseMeshData[rank - 1][7] + 1) - * (coarseMeshData[rank - 1][6] - coarseMeshData[rank - 1][5] + 1) - * (coarseMeshData[rank - 1][4] - coarseMeshData[rank - 1][3] + 1); + coarseMeshData[rank][3 + 2 * dim + 1] = + meshData[rank][3 + 2 * dim + 1] / this->coarseRate[dim]; + if (meshData[rank][3 + 2 * dim + 1] == this->gFineNodesPerDir[dim] - 1 && + meshData[rank][3 + 2 * dim + 1] % this->coarseRate[dim] > 0) { + // this->endRate[dim] < this->coarseRate[dim]) { + ++coarseMeshData[rank][3 + 2 * dim + 1]; } } + if (rank > 0) { + coarseMeshData[rank][9] = + coarseMeshData[rank - 1][9] + + (coarseMeshData[rank - 1][8] - coarseMeshData[rank - 1][7] + 1) * + (coarseMeshData[rank - 1][6] - coarseMeshData[rank - 1][5] + 1) * + (coarseMeshData[rank - 1][4] - coarseMeshData[rank - 1][3] + 1); + } } - - template - std::vector > LocalLexicographicIndexManager:: - getCoarseMeshData() const {return coarseMeshData;} - - template - void LocalLexicographicIndexManager:: - getFineNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } - - template - void LocalLexicographicIndexManager:: - getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - } - - template - void LocalLexicographicIndexManager:: - getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - - k += this->offsets[2]; - j += this->offsets[1]; - i += this->offsets[0]; - } - - template - void LocalLexicographicIndexManager:: - getFineNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } - - template - void LocalLexicographicIndexManager:: - getFineNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumCoarseNodes10; - tmp = myLID % this->lNumCoarseNodes10; - j = tmp / this->lCoarseNodesPerDir[0]; - i = tmp % this->lCoarseNodesPerDir[0]; - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - } - - template - void LocalLexicographicIndexManager:: - getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const { - // Assumptions: (i,j,k) is a tuple on the coarse mesh - // myLID is the corresponding local ID on the fine mesh - const LO multiplier[3] = {1, this->lFineNodesPerDir[0], this->lNumFineNodes10}; - const LO indices[3] = {i, j, k}; - - myLID = 0; - for(int dim = 0; dim < 3; ++dim) { - if((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && this->meshEdge[2*dim + 1]) { - // We are dealing with the last node on the mesh in direction dim - // so we can simply use the number of nodes on the fine mesh in that direction - myLID += (this->getLocalFineNodesInDir(dim) - 1)*multiplier[dim]; - } else { - myLID += (indices[dim]*this->getCoarseningRate(dim) + this->getCoarseNodeOffset(dim)) - *multiplier[dim]; - } +} + +template +std::vector> +LocalLexicographicIndexManager::getCoarseMeshData() const { + return coarseMeshData; +} + +template +void LocalLexicographicIndexManager:: + getFineNodeGlobalTuple(const GO /* myGID */, GO & /* i */, GO & /* j */, + GO & /* k */) const {} + +template +void LocalLexicographicIndexManager::getFineNodeLocalTuple(const LO myLID, + LO &i, LO &j, + LO &k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; +} + +template +void LocalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getFineNodeGhostedTuple(const LO myLID, + LO &i, LO &j, + LO &k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; + + k += this->offsets[2]; + j += this->offsets[1]; + i += this->offsets[0]; +} + +template +void LocalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getFineNodeGID(const GO /* i */, + const GO /* j */, + const GO /* k */, + GO & /* myGID */) const { +} + +template +void LocalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getFineNodeLID(const LO /* i */, + const LO /* j */, + const LO /* k */, + LO & /* myLID */) const { +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeGlobalTuple(const GO /* myGID */, GO & /* i */, GO & /* j */, + GO & /* k */) const {} + +template +void LocalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getCoarseNodeLocalTuple(const LO myLID, + LO &i, LO &j, + LO &k) const { + LO tmp; + k = myLID / this->lNumCoarseNodes10; + tmp = myLID % this->lNumCoarseNodes10; + j = tmp / this->lCoarseNodesPerDir[0]; + i = tmp % this->lCoarseNodesPerDir[0]; +} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, + GO & /* myGID */) const {} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, + LO & /* myLID */) const {} + +template +void LocalLexicographicIndexManager:: + getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, + LO &myLID) const { + myLID = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; +} + +template +void LocalLexicographicIndexManager< + LocalOrdinal, GlobalOrdinal, Node>::getCoarseNodeFineLID(const LO i, + const LO j, + const LO k, + LO &myLID) const { + // Assumptions: (i,j,k) is a tuple on the coarse mesh + // myLID is the corresponding local ID on the fine mesh + const LO multiplier[3] = {1, this->lFineNodesPerDir[0], + this->lNumFineNodes10}; + const LO indices[3] = {i, j, k}; + + myLID = 0; + for (int dim = 0; dim < 3; ++dim) { + if ((indices[dim] == this->getLocalCoarseNodesInDir(dim) - 1) && + this->meshEdge[2 * dim + 1]) { + // We are dealing with the last node on the mesh in direction dim + // so we can simply use the number of nodes on the fine mesh in that + // direction + myLID += (this->getLocalFineNodesInDir(dim) - 1) * multiplier[dim]; + } else { + myLID += (indices[dim] * this->getCoarseningRate(dim) + + this->getCoarseNodeOffset(dim)) * + multiplier[dim]; } } +} - template - void LocalLexicographicIndexManager:: - getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void LocalLexicographicIndexManager:: + getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, + LO & /* myLID */) const {} - template - void LocalLexicographicIndexManager:: - getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void LocalLexicographicIndexManager:: + getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, + const LO /* k */, LO & /* myLID */) const {} -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp index 4a8d987b423b..6f24406be644 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_decl.hpp @@ -77,76 +77,78 @@ namespace MueLu { correspond to nodes. While not strictly necessary, it might be convenient. */ - template - class UncoupledIndexManager : public IndexManager { +template +class UncoupledIndexManager + : public IndexManager { #undef MUELU_UNCOUPLEDINDEXMANAGER_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: +public: + // LBV: I doubt that it makes sense to have + // this particular constructor since it is + // not used anywhere and parameters cannot + // all accessible after construction. + UncoupledIndexManager() = default; - //LBV: I doubt that it makes sense to have - // this particular constructor since it is - // not used anywhere and parameters cannot - // all accessible after construction. - UncoupledIndexManager() = default; + UncoupledIndexManager(const RCP> comm, + const bool coupled, const int NumDimensions, + const int interpolationOrder, const int MyRank, + const int NumRanks, const Array GFineNodesPerDir, + const Array LFineNodesPerDir, + const Array CoarseRate, + const bool singleCoarsePoint); - UncoupledIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, - const Array LFineNodesPerDir, - const Array CoarseRate, - const bool singleCoarsePoint); + virtual ~UncoupledIndexManager() {} - virtual ~UncoupledIndexManager() {} + void computeGlobalCoarseParameters(); - void computeGlobalCoarseParameters(); + std::vector> getCoarseMeshData() const; - std::vector > getCoarseMeshData() const; + void getGhostedNodesData(const RCP fineMap, + Array &ghostedNodeCoarseLIDs, + Array &ghostedNodeCoarsePIDs, + Array &ghostedNodeCoarseGIDs) const; - void getGhostedNodesData(const RCP fineMap, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& ghostedNodeCoarseGIDs) const; + void getCoarseNodesData(const RCP fineCoordinatesMap, + Array &coarseNodeCoarseGIDs, + Array &coarseNodeFineGIDs) const; - void getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const; + void getFineNodeGlobalTuple(const GO myGID, GO &i, GO &j, GO &k) const; - void getFineNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getFineNodeLocalTuple(const LO myLID, LO &i, LO &j, LO &k) const; - void getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGhostedTuple(const LO myLID, LO &i, LO &j, LO &k) const; - void getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getFineNodeGID(const GO i, const GO j, const GO k, GO &myGID) const; - void getFineNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getFineNodeLID(const LO i, const LO j, const LO k, LO &myLID) const; - void getFineNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGlobalTuple(const GO myGID, GO &i, GO &j, GO &k) const; - void getCoarseNodeGlobalTuple(const GO myGID, GO& i, GO& j, GO& k) const; + void getCoarseNodeLocalTuple(const LO myLID, LO &i, LO &j, LO &k) const; - void getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const; + void getCoarseNodeGID(const GO i, const GO j, const GO k, GO &myGID) const; - void getCoarseNodeGID(const GO i, const GO j, const GO k, GO& myGID) const; + void getCoarseNodeLID(const LO i, const LO j, const LO k, LO &myLID) const; - void getCoarseNodeLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, + LO &myLID) const; - void getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getCoarseNodeFineLID(const LO i, const LO j, const LO k, + LO &myLID) const; - void getCoarseNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeFineLID(const LO i, const LO j, const LO k, + LO &myLID) const; - void getGhostedNodeFineLID(const LO i, const LO j, const LO k, LO& myLID) const; + void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, + LO &myLID) const; - void getGhostedNodeCoarseLID(const LO i, const LO j, const LO k, LO& myLID) const; +private: + const int myRank; ///< Local rank ID. + const int numRanks; ///< Number of ranks used to decompose the problem. +}; - private: - - const int myRank; ///< Local rank ID. - const int numRanks; ///< Number of ranks used to decompose the problem. - - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_UNCOUPLEDINDEXMANAGER_SHORT #endif // MUELU_UNCOUPLEDINDEXMANAGER_DECL_HPP diff --git a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp index efd12ebd68c9..54c0cbb47d1e 100644 --- a/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp +++ b/packages/muelu/src/Graph/StructuredAggregation/uncoupled/MueLu_UncoupledIndexManager_def.hpp @@ -46,200 +46,211 @@ #ifndef MUELU_UNCOUPLEDINDEXMANAGER_DEF_HPP_ #define MUELU_UNCOUPLEDINDEXMANAGER_DEF_HPP_ -#include -#include #include +#include +#include namespace MueLu { - template - UncoupledIndexManager:: - UncoupledIndexManager(const RCP > comm, const bool coupled, - const int NumDimensions, const int interpolationOrder, - const int MyRank, const int NumRanks, - const Array GFineNodesPerDir, const Array LFineNodesPerDir, - const Array CoarseRate, const bool singleCoarsePoint) : - IndexManager(comm, coupled, singleCoarsePoint, NumDimensions, interpolationOrder, - Array(3, -1), LFineNodesPerDir), - myRank(MyRank), numRanks(NumRanks) - { - - // Load coarse rate, being careful about formating - for(int dim = 0; dim < 3; ++dim) { - if(dim < this->numDimensions) { - if(CoarseRate.size() == 1) { - this->coarseRate[dim] = CoarseRate[0]; - } else if(CoarseRate.size() == this->numDimensions) { - this->coarseRate[dim] = CoarseRate[dim]; - } - } else { - this->coarseRate[dim] = 1; +template +UncoupledIndexManager::UncoupledIndexManager( + const RCP> comm, const bool coupled, + const int NumDimensions, const int interpolationOrder, const int MyRank, + const int NumRanks, const Array GFineNodesPerDir, + const Array LFineNodesPerDir, const Array CoarseRate, + const bool singleCoarsePoint) + : IndexManager(comm, coupled, singleCoarsePoint, NumDimensions, + interpolationOrder, Array(3, -1), LFineNodesPerDir), + myRank(MyRank), numRanks(NumRanks) { + + // Load coarse rate, being careful about formating + for (int dim = 0; dim < 3; ++dim) { + if (dim < this->numDimensions) { + if (CoarseRate.size() == 1) { + this->coarseRate[dim] = CoarseRate[0]; + } else if (CoarseRate.size() == this->numDimensions) { + this->coarseRate[dim] = CoarseRate[dim]; } + } else { + this->coarseRate[dim] = 1; } + } - this->computeMeshParameters(); - this->gNumCoarseNodes10 = Teuchos::OrdinalTraits::invalid(); - this->gNumCoarseNodes = Teuchos::OrdinalTraits::invalid(); - } // Constructor - - template - void UncoupledIndexManager:: - computeGlobalCoarseParameters() { - GO input[1] = {as(this->lNumCoarseNodes)}, output[1] = {0}; - Teuchos::reduceAll(*(this->comm_), Teuchos::REDUCE_SUM, 1, input, output); - this->gNumCoarseNodes = output[0]; - } // computeGlobalCoarseParameters - - template - void UncoupledIndexManager:: - getGhostedNodesData(const RCP/* fineMap */, - Array& ghostedNodeCoarseLIDs, - Array& ghostedNodeCoarsePIDs, - Array& /* ghostedNodeCoarseGIDs */) const { - - // First we allocate memory for the outputs - ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); - ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); - // In the uncoupled case the data required is trivial to provide! - for(LO idx = 0; idx < this->getNumLocalGhostedNodes(); ++idx) { - ghostedNodeCoarseLIDs[idx] = idx; - ghostedNodeCoarsePIDs[idx] = myRank; - } - } // getGhostedNodesData - - template - void UncoupledIndexManager:: - getCoarseNodesData(const RCP fineCoordinatesMap, - Array& coarseNodeCoarseGIDs, - Array& coarseNodeFineGIDs) const { - - // Allocate sufficient amount of storage in output arrays - coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); - coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); - - // Load all the GIDs on the fine mesh - ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); - - // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs - LO fineLID; - for(LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); ++coarseLID) { - Array coarseIndices(3), fineIndices(3); - this->getCoarseNodeLocalTuple(coarseLID, - coarseIndices[0], - coarseIndices[1], - coarseIndices[2]); - for(int dim = 0; dim < 3; ++dim) { - if(coarseIndices[dim] == this->lCoarseNodesPerDir[dim] - 1) { - if(this->lCoarseNodesPerDir[dim] == 1) { - fineIndices[dim] = 0; - } else { - fineIndices[dim] = this->lFineNodesPerDir[dim] - 1; - } + this->computeMeshParameters(); + this->gNumCoarseNodes10 = Teuchos::OrdinalTraits::invalid(); + this->gNumCoarseNodes = Teuchos::OrdinalTraits::invalid(); +} // Constructor + +template +void UncoupledIndexManager::computeGlobalCoarseParameters() { + GO input[1] = {as(this->lNumCoarseNodes)}, output[1] = {0}; + Teuchos::reduceAll(*(this->comm_), Teuchos::REDUCE_SUM, 1, input, output); + this->gNumCoarseNodes = output[0]; +} // computeGlobalCoarseParameters + +template +void UncoupledIndexManager:: + getGhostedNodesData(const RCP /* fineMap */, + Array &ghostedNodeCoarseLIDs, + Array &ghostedNodeCoarsePIDs, + Array & /* ghostedNodeCoarseGIDs */) const { + + // First we allocate memory for the outputs + ghostedNodeCoarseLIDs.resize(this->getNumLocalGhostedNodes()); + ghostedNodeCoarsePIDs.resize(this->getNumLocalGhostedNodes()); + // In the uncoupled case the data required is trivial to provide! + for (LO idx = 0; idx < this->getNumLocalGhostedNodes(); ++idx) { + ghostedNodeCoarseLIDs[idx] = idx; + ghostedNodeCoarsePIDs[idx] = myRank; + } +} // getGhostedNodesData + +template +void UncoupledIndexManager:: + getCoarseNodesData(const RCP fineCoordinatesMap, + Array &coarseNodeCoarseGIDs, + Array &coarseNodeFineGIDs) const { + + // Allocate sufficient amount of storage in output arrays + coarseNodeCoarseGIDs.resize(this->getNumLocalCoarseNodes()); + coarseNodeFineGIDs.resize(this->getNumLocalCoarseNodes()); + + // Load all the GIDs on the fine mesh + ArrayView fineNodeGIDs = fineCoordinatesMap->getLocalElementList(); + + // Extract the fine LIDs of the coarse nodes and store the corresponding GIDs + LO fineLID; + for (LO coarseLID = 0; coarseLID < this->getNumLocalCoarseNodes(); + ++coarseLID) { + Array coarseIndices(3), fineIndices(3); + this->getCoarseNodeLocalTuple(coarseLID, coarseIndices[0], coarseIndices[1], + coarseIndices[2]); + for (int dim = 0; dim < 3; ++dim) { + if (coarseIndices[dim] == this->lCoarseNodesPerDir[dim] - 1) { + if (this->lCoarseNodesPerDir[dim] == 1) { + fineIndices[dim] = 0; } else { - fineIndices[dim] = coarseIndices[dim]*this->coarseRate[dim]; + fineIndices[dim] = this->lFineNodesPerDir[dim] - 1; } + } else { + fineIndices[dim] = coarseIndices[dim] * this->coarseRate[dim]; } - - fineLID = fineIndices[2]*this->lNumFineNodes10 - + fineIndices[1]*this->lFineNodesPerDir[0] - + fineIndices[0]; - coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; - } - } // getCoarseNodesData - template - std::vector > UncoupledIndexManager:: - getCoarseMeshData() const { - std::vector > coarseMeshData; - return coarseMeshData; + fineLID = fineIndices[2] * this->lNumFineNodes10 + + fineIndices[1] * this->lFineNodesPerDir[0] + fineIndices[0]; + coarseNodeFineGIDs[coarseLID] = fineNodeGIDs[fineLID]; } +} // getCoarseNodesData - template - void UncoupledIndexManager:: - getFineNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } +template +std::vector> +UncoupledIndexManager::getCoarseMeshData() + const { + std::vector> coarseMeshData; + return coarseMeshData; +} - template - void UncoupledIndexManager:: - getFineNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - } // getFineNodeLocalTuple - - template - void UncoupledIndexManager:: - getFineNodeGhostedTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumFineNodes10; - tmp = myLID % this->lNumFineNodes10; - j = tmp / this->lFineNodesPerDir[0]; - i = tmp % this->lFineNodesPerDir[0]; - - k += this->offsets[2]; - j += this->offsets[1]; - i += this->offsets[0]; - } // getFineNodeGhostedTuple - - template - void UncoupledIndexManager:: - getFineNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } +template +void UncoupledIndexManager::getFineNodeGlobalTuple(const GO /* myGID */, + GO & /* i */, + GO & /* j */, + GO & /* k */) const {} - template - void UncoupledIndexManager:: - getFineNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void UncoupledIndexManager::getFineNodeLocalTuple(const LO myLID, LO &i, + LO &j, LO &k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; +} // getFineNodeLocalTuple - template - void UncoupledIndexManager:: - getCoarseNodeGlobalTuple(const GO /* myGID */, GO& /* i */, GO& /* j */, GO& /* k */) const { - } +template +void UncoupledIndexManager::getFineNodeGhostedTuple(const LO myLID, LO &i, + LO &j, LO &k) const { + LO tmp; + k = myLID / this->lNumFineNodes10; + tmp = myLID % this->lNumFineNodes10; + j = tmp / this->lFineNodesPerDir[0]; + i = tmp % this->lFineNodesPerDir[0]; - template - void UncoupledIndexManager:: - getCoarseNodeLocalTuple(const LO myLID, LO& i, LO& j, LO& k) const { - LO tmp; - k = myLID / this->lNumCoarseNodes10; - tmp = myLID % this->lNumCoarseNodes10; - j = tmp / this->lCoarseNodesPerDir[0]; - i = tmp % this->lCoarseNodesPerDir[0]; - } // getCoarseNodeLocalTuple - - template - void UncoupledIndexManager:: - getCoarseNodeGID(const GO /* i */, const GO /* j */, const GO /* k */, GO& /* myGID */) const { - } + k += this->offsets[2]; + j += this->offsets[1]; + i += this->offsets[0]; +} // getFineNodeGhostedTuple - template - void UncoupledIndexManager:: - getCoarseNodeLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void UncoupledIndexManager::getFineNodeGID( + const GO /* i */, const GO /* j */, const GO /* k */, + GO & /* myGID */) const {} - template - void UncoupledIndexManager:: - getCoarseNodeGhostedLID(const LO i, const LO j, const LO k, LO& myLID) const { - myLID = k*this->numGhostedNodes10 + j*this->ghostedNodesPerDir[0] + i; - } // getCoarseNodeGhostedLID +template +void UncoupledIndexManager::getFineNodeLID( + const LO /* i */, const LO /* j */, const LO /* k */, + LO & /* myLID */) const {} - template - void UncoupledIndexManager:: - getCoarseNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void UncoupledIndexManager::getCoarseNodeGlobalTuple(const GO /* myGID */, + GO & /* i */, + GO & /* j */, + GO & /* k */) const { +} - template - void UncoupledIndexManager:: - getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void UncoupledIndexManager::getCoarseNodeLocalTuple(const LO myLID, LO &i, + LO &j, LO &k) const { + LO tmp; + k = myLID / this->lNumCoarseNodes10; + tmp = myLID % this->lNumCoarseNodes10; + j = tmp / this->lCoarseNodesPerDir[0]; + i = tmp % this->lCoarseNodesPerDir[0]; +} // getCoarseNodeLocalTuple - template - void UncoupledIndexManager:: - getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, const LO /* k */, LO& /* myLID */) const { - } +template +void UncoupledIndexManager::getCoarseNodeGID( + const GO /* i */, const GO /* j */, const GO /* k */, + GO & /* myGID */) const {} + +template +void UncoupledIndexManager::getCoarseNodeLID( + const LO /* i */, const LO /* j */, const LO /* k */, + LO & /* myLID */) const {} + +template +void UncoupledIndexManager::getCoarseNodeGhostedLID(const LO i, + const LO j, + const LO k, + LO &myLID) const { + myLID = k * this->numGhostedNodes10 + j * this->ghostedNodesPerDir[0] + i; +} // getCoarseNodeGhostedLID + +template +void UncoupledIndexManager::getCoarseNodeFineLID(const LO /* i */, + const LO /* j */, + const LO /* k */, + LO & /* myLID */) const { +} + +template +void UncoupledIndexManager:: + getGhostedNodeFineLID(const LO /* i */, const LO /* j */, const LO /* k */, + LO & /* myLID */) const {} + +template +void UncoupledIndexManager:: + getGhostedNodeCoarseLID(const LO /* i */, const LO /* j */, + const LO /* k */, LO & /* myLID */) const {} -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_UNCOUPLEDINDEXMANAGER_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp index aacc182dc4b3..62801bf2f66e 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_decl.hpp @@ -46,84 +46,89 @@ #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_DECL_HPP_ #define MUELU_AGGREGATIONPHASE1ALGORITHM_DECL_HPP_ -#include "MueLu_ConfigDefs.hpp" #include "MueLu_AggregationAlgorithmBase.hpp" #include "MueLu_AggregationPhase1Algorithm_fwd.hpp" +#include "MueLu_ConfigDefs.hpp" -#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class AggregationPhase1Algorithm class. - @brief Algorithm for coarsening a graph with uncoupled aggregation. - - @ingroup Aggregation - - ### Idea ### - Phase 1 tries to build new aggregates which fulfill the user chosen aggregation - criteria (i.e. minimum and maximum size of aggregates). Especially the chosen - ordering for the input nodes may have some influence on the final aggregates. - Phase 1 is the most important aggregation routine for building new aggregates. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: ordering | Ordering of graph nodes in which the nodes are processed for aggregation. The options are natural, random and graph. - aggregation: max selected neighbors | Maximum number of neighbor nodes which have already been added to aggregates. - aggregation: min agg size | minimum number of nodes which have to be in an aggregate. - aggregation: max agg size | maximum allowed number of nodes in an aggregate - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. Nodes with other states are not touched. - */ - - template - class AggregationPhase1Algorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class AggregationPhase1Algorithm class. + @brief Algorithm for coarsening a graph with uncoupled aggregation. + + @ingroup Aggregation + + ### Idea ### + Phase 1 tries to build new aggregates which fulfill the user chosen + aggregation criteria (i.e. minimum and maximum size of aggregates). Especially + the chosen ordering for the input nodes may have some influence on the final + aggregates. Phase 1 is the most important aggregation routine for building new + aggregates. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: ordering | Ordering of graph nodes in which the nodes are + processed for aggregation. The options are natural, random and graph. + aggregation: max selected neighbors | Maximum number of neighbor nodes which + have already been added to aggregates. aggregation: min agg size | minimum + number of nodes which have to be in an aggregate. aggregation: max agg size | + maximum allowed number of nodes in an aggregate + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. Nodes with other states + are not touched. +*/ + +template +class AggregationPhase1Algorithm + : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE1ALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - AggregationPhase1Algorithm(const RCP& /* graphFact */ = Teuchos::null) { } - - //! Destructor. - virtual ~AggregationPhase1Algorithm() { } +public: + //! @name Constructors/Destructors. + //@{ - //@} + //! Constructor. + AggregationPhase1Algorithm( + const RCP & /* graphFact */ = Teuchos::null) {} + //! Destructor. + virtual ~AggregationPhase1Algorithm() {} - //! @name Aggregation methods. - //@{ + //@} - /*! @brief Local aggregation. */ + //! @name Aggregation methods. + //@{ - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + /*! @brief Local aggregation. */ - std::string description() const { return "Phase 1 (main)"; } + void BuildAggregates(const ParameterList ¶ms, const GraphBase &graph, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes) const; + //@} - private: + std::string description() const { return "Phase 1 (main)"; } - /*! @brief Utility to take a list of integers and reorder them randomly (by using a local permutation). - @param list On input, a bunch of integers. On output, the same integers in a different order - that is determined randomly. - */ - void RandomReorder(ArrayRCP list) const; - - /*! @brief Generate a random number in the range [min, max] */ - int RandomOrdinal(int min, int max) const; +private: + /*! @brief Utility to take a list of integers and reorder them randomly (by + using a local permutation). + @param list On input, a bunch of integers. On output, the same integers in a + different order that is determined randomly. + */ + void RandomReorder(ArrayRCP list) const; - }; + /*! @brief Generate a random number in the range [min, max] */ + int RandomOrdinal(int min, int max) const; +}; -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE1ALGORITHM_SHORT #endif /* MUELU_AGGREGATIONPHASE1ALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp index 866b540bf7d9..e53936561c68 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_def.hpp @@ -55,193 +55,207 @@ #include "MueLu_AggregationPhase1Algorithm_decl.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Aggregates.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - void AggregationPhase1Algorithm:: - BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - std::string orderingStr = params.get("aggregation: ordering"); - int maxNeighAlreadySelected = params.get ("aggregation: max selected neighbors"); - int minNodesPerAggregate = params.get ("aggregation: min agg size"); - int maxNodesPerAggregate = params.get ("aggregation: max agg size"); - - TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError, - "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); - - enum { - O_NATURAL, - O_RANDOM, - O_GRAPH - } ordering; - ordering = O_NATURAL; // initialize variable (fix CID 143665) - if (orderingStr == "natural") ordering = O_NATURAL; - if (orderingStr == "random" ) ordering = O_RANDOM; - if (orderingStr == "graph" ) ordering = O_GRAPH; - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); - - LO numLocalAggregates = aggregates.GetNumAggregates(); - - ArrayRCP randomVector; - if (ordering == O_RANDOM) { - randomVector = arcp(numRows); - for (LO i = 0; i < numRows; i++) - randomVector[i] = i; - RandomReorder(randomVector); - } +template +void AggregationPhase1Algorithm:: + BuildAggregates(const ParameterList ¶ms, const GraphBase &graph, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + std::string orderingStr = params.get("aggregation: ordering"); + int maxNeighAlreadySelected = + params.get("aggregation: max selected neighbors"); + int minNodesPerAggregate = params.get("aggregation: min agg size"); + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + + TEUCHOS_TEST_FOR_EXCEPTION( + maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError, + "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: " + "minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); + + enum { O_NATURAL, O_RANDOM, O_GRAPH } ordering; + ordering = O_NATURAL; // initialize variable (fix CID 143665) + if (orderingStr == "natural") + ordering = O_NATURAL; + if (orderingStr == "random") + ordering = O_RANDOM; + if (orderingStr == "graph") + ordering = O_GRAPH; + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + LO numLocalAggregates = aggregates.GetNumAggregates(); + + ArrayRCP randomVector; + if (ordering == O_RANDOM) { + randomVector = arcp(numRows); + for (LO i = 0; i < numRows; i++) + randomVector[i] = i; + RandomReorder(randomVector); + } - int aggIndex = -1; - size_t aggSize = 0; - std::vector aggList(graph.getLocalMaxNumRowEntries()); - - std::queue graphOrderQueue; - - // Main loop over all local rows of graph(A) - for (LO i = 0; i < numRows; i++) { - // Step 1: pick the next node to aggregate - LO rootCandidate = 0; - if (ordering == O_NATURAL) rootCandidate = i; - else if (ordering == O_RANDOM) rootCandidate = randomVector[i]; - else if (ordering == O_GRAPH) { - - if (graphOrderQueue.size() == 0) { - // Current queue is empty for "graph" ordering, populate with one READY node - for (LO jnode = 0; jnode < numRows; jnode++) - if (aggStat[jnode] == READY) { - graphOrderQueue.push(jnode); - break; - } - } - if (graphOrderQueue.size() == 0) { - // There are no more ready nodes, end the phase - break; - } - rootCandidate = graphOrderQueue.front(); // take next node from graph ordering queue - graphOrderQueue.pop(); // delete this node in list + int aggIndex = -1; + size_t aggSize = 0; + std::vector aggList(graph.getLocalMaxNumRowEntries()); + + std::queue graphOrderQueue; + + // Main loop over all local rows of graph(A) + for (LO i = 0; i < numRows; i++) { + // Step 1: pick the next node to aggregate + LO rootCandidate = 0; + if (ordering == O_NATURAL) + rootCandidate = i; + else if (ordering == O_RANDOM) + rootCandidate = randomVector[i]; + else if (ordering == O_GRAPH) { + + if (graphOrderQueue.size() == 0) { + // Current queue is empty for "graph" ordering, populate with one READY + // node + for (LO jnode = 0; jnode < numRows; jnode++) + if (aggStat[jnode] == READY) { + graphOrderQueue.push(jnode); + break; + } } + if (graphOrderQueue.size() == 0) { + // There are no more ready nodes, end the phase + break; + } + rootCandidate = + graphOrderQueue.front(); // take next node from graph ordering queue + graphOrderQueue.pop(); // delete this node in list + } - if (aggStat[rootCandidate] != READY) - continue; + if (aggStat[rootCandidate] != READY) + continue; - // Step 2: build tentative aggregate - aggSize = 0; - aggList[aggSize++] = rootCandidate; + // Step 2: build tentative aggregate + aggSize = 0; + aggList[aggSize++] = rootCandidate; - ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); + ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); - // If the number of neighbors is less than the minimum number of nodes - // per aggregate, we know this is not going to be a valid root, and we - // may skip it, but only for "natural" and "random" (for "graph" we still - // need to fetch the list of local neighbors to continue) - if ((ordering == O_NATURAL || ordering == O_RANDOM) && - neighOfINode.size() < minNodesPerAggregate) { - continue; - } + // If the number of neighbors is less than the minimum number of nodes + // per aggregate, we know this is not going to be a valid root, and we + // may skip it, but only for "natural" and "random" (for "graph" we still + // need to fetch the list of local neighbors to continue) + if ((ordering == O_NATURAL || ordering == O_RANDOM) && + neighOfINode.size() < minNodesPerAggregate) { + continue; + } - LO numAggregatedNeighbours = 0; + LO numAggregatedNeighbours = 0; - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; - if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) { + if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) { - if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) { - // If aggregate size does not exceed max size, add node to the - // tentative aggregate - // NOTE: We do not exit the loop over all neighbours since we have - // still to count all aggregated neighbour nodes for the - // aggregation criteria - // NOTE: We check here for the maximum aggregation size. If we - // would do it below with all the other check too big aggregates - // would not be accepted at all. - if (aggSize < as(maxNodesPerAggregate)) - aggList[aggSize++] = neigh; + if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) { + // If aggregate size does not exceed max size, add node to the + // tentative aggregate + // NOTE: We do not exit the loop over all neighbours since we have + // still to count all aggregated neighbour nodes for the + // aggregation criteria + // NOTE: We check here for the maximum aggregation size. If we + // would do it below with all the other check too big aggregates + // would not be accepted at all. + if (aggSize < as(maxNodesPerAggregate)) + aggList[aggSize++] = neigh; - } else { - numAggregatedNeighbours++; - } + } else { + numAggregatedNeighbours++; } } + } - // Step 3: check if tentative aggregate is acceptable - if ((numAggregatedNeighbours <= maxNeighAlreadySelected) && // too many connections to other aggregates - (aggSize >= as(minNodesPerAggregate))) { // too few nodes in the tentative aggregate - // Accept new aggregate - // rootCandidate becomes the root of the newly formed aggregate - aggregates.SetIsRoot(rootCandidate); - aggIndex = numLocalAggregates++; - - for (size_t k = 0; k < aggSize; k++) { - aggStat [aggList[k]] = AGGREGATED; - vertex2AggId[aggList[k]] = aggIndex; - procWinner [aggList[k]] = myRank; - } - - numNonAggregatedNodes -= aggSize; - - } else { - // Aggregate is not accepted - aggStat[rootCandidate] = NOTSEL; - - // Need this for the "graph" ordering below - // The original candidate is always aggList[0] - aggSize = 1; + // Step 3: check if tentative aggregate is acceptable + if ((numAggregatedNeighbours <= + maxNeighAlreadySelected) && // too many connections to other aggregates + (aggSize >= as(minNodesPerAggregate))) { // too few nodes in the + // tentative aggregate + // Accept new aggregate + // rootCandidate becomes the root of the newly formed aggregate + aggregates.SetIsRoot(rootCandidate); + aggIndex = numLocalAggregates++; + + for (size_t k = 0; k < aggSize; k++) { + aggStat[aggList[k]] = AGGREGATED; + vertex2AggId[aggList[k]] = aggIndex; + procWinner[aggList[k]] = myRank; } - if (ordering == O_GRAPH) { - // Add candidates to the list of nodes - // NOTE: the code have slightly different meanings depending on context: - // - if aggregate was accepted, we add neighbors of neighbors of the original candidate - // - if aggregate was not accepted, we add neighbors of the original candidate - for (size_t k = 0; k < aggSize; k++) { - ArrayView neighOfJNode = graph.getNeighborVertices(aggList[k]); + numNonAggregatedNodes -= aggSize; - for (int j = 0; j < neighOfJNode.size(); j++) { - LO neigh = neighOfJNode[j]; + } else { + // Aggregate is not accepted + aggStat[rootCandidate] = NOTSEL; - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) - graphOrderQueue.push(neigh); - } + // Need this for the "graph" ordering below + // The original candidate is always aggList[0] + aggSize = 1; + } + + if (ordering == O_GRAPH) { + // Add candidates to the list of nodes + // NOTE: the code have slightly different meanings depending on context: + // - if aggregate was accepted, we add neighbors of neighbors of the + // original candidate + // - if aggregate was not accepted, we add neighbors of the original + // candidate + for (size_t k = 0; k < aggSize; k++) { + ArrayView neighOfJNode = + graph.getNeighborVertices(aggList[k]); + + for (int j = 0; j < neighOfJNode.size(); j++) { + LO neigh = neighOfJNode[j]; + + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) + graphOrderQueue.push(neigh); } } } - - // Reset all NOTSEL vertices to READY - // This simplifies other algorithms - for (LO i = 0; i < numRows; i++) - if (aggStat[i] == NOTSEL) - aggStat[i] = READY; - - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); - } - - template - void AggregationPhase1Algorithm::RandomReorder(ArrayRCP list) const { - //TODO: replace int - int n = list.size(); - for(int i = 0; i < n-1; i++) - std::swap(list[i], list[RandomOrdinal(i,n-1)]); } - template - int AggregationPhase1Algorithm::RandomOrdinal(int min, int max) const { - return min + as((max-min+1) * (static_cast(std::rand()) / (RAND_MAX + 1.0))); - } - -} // end namespace - + // Reset all NOTSEL vertices to READY + // This simplifies other algorithms + for (LO i = 0; i < numRows; i++) + if (aggStat[i] == NOTSEL) + aggStat[i] = READY; + + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} + +template +void AggregationPhase1Algorithm::RandomReorder(ArrayRCP list) const { + // TODO: replace int + int n = list.size(); + for (int i = 0; i < n - 1; i++) + std::swap(list[i], list[RandomOrdinal(i, n - 1)]); +} + +template +int AggregationPhase1Algorithm::RandomOrdinal(int min, int max) const { + return min + as((max - min + 1) * + (static_cast(std::rand()) / (RAND_MAX + 1.0))); +} + +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp index 295f9d927e43..ee69e3523238 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_decl.hpp @@ -58,84 +58,85 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class AggregationPhase1Algorithm class. - @brief Algorithm for coarsening a graph with uncoupled aggregation. - - @ingroup Aggregation - - ### Idea ### - Phase 1 tries to build new aggregates which fulfill the user chosen aggregation - criteria (i.e. minimum and maximum size of aggregates). Especially the chosen - ordering for the input nodes may have some influence on the final aggregates. - Phase 1 is the most important aggregation routine for building new aggregates. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: ordering | Ordering of graph nodes in which the nodes are processed for aggregation. The options are natural, random and graph. - aggregation: max selected neighbors | Maximum number of neighbor nodes which have already been added to aggregates. - aggregation: min agg size | minimum number of nodes which have to be in an aggregate. - aggregation: max agg size | maximum allowed number of nodes in an aggregate - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. Nodes with other states are not touched. - */ - - template - class AggregationPhase1Algorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class AggregationPhase1Algorithm class. + @brief Algorithm for coarsening a graph with uncoupled aggregation. + + @ingroup Aggregation + + ### Idea ### + Phase 1 tries to build new aggregates which fulfill the user chosen + aggregation criteria (i.e. minimum and maximum size of aggregates). Especially + the chosen ordering for the input nodes may have some influence on the final + aggregates. Phase 1 is the most important aggregation routine for building new + aggregates. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: ordering | Ordering of graph nodes in which the nodes are + processed for aggregation. The options are natural, random and graph. + aggregation: max selected neighbors | Maximum number of neighbor nodes which + have already been added to aggregates. aggregation: min agg size | minimum + number of nodes which have to be in an aggregate. aggregation: max agg size | + maximum allowed number of nodes in an aggregate + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. Nodes with other states + are not touched. +*/ + +template +class AggregationPhase1Algorithm_kokkos + : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; +public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase1Algorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase1Algorithm_kokkos( + const RCP & /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase1Algorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase1Algorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList ¶ms, + const LWGraph_kokkos &graph, Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const LO maxAggSize, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; - void BuildAggregatesRandom(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void + BuildAggregatesDeterministic(const LO maxAggSize, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; + //@} - void BuildAggregatesDeterministic(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 1 (main)"; } +}; - std::string description() const { return "Phase 1 (main)"; } - - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_SHORT #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp index 82e837c0ef41..527b26a02703 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase1Algorithm_kokkos_def.hpp @@ -66,221 +66,233 @@ namespace MueLu { - template - void AggregationPhase1Algorithm_kokkos:: - BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { +template +void AggregationPhase1Algorithm_kokkos:: + BuildAggregates( + const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { - int minNodesPerAggregate = params.get ("aggregation: min agg size"); - int maxNodesPerAggregate = params.get ("aggregation: max agg size"); + int minNodesPerAggregate = params.get("aggregation: min agg size"); + int maxNodesPerAggregate = params.get("aggregation: max agg size"); - TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, - Exceptions::RuntimeError, - "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); + TEUCHOS_TEST_FOR_EXCEPTION( + maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError, + "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: " + "minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); - // Distance-2 gives less control than serial uncoupled phase 1 - // no custom row reordering because would require making deep copy - // of local matrix entries and permuting it can only enforce - // max aggregate size - { - if(params.get("aggregation: deterministic")) - { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesDeterministic(maxNodesPerAggregate, graph, - aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(maxNodesPerAggregate, graph, - aggregates, aggStat, numNonAggregatedNodes); - } + // Distance-2 gives less control than serial uncoupled phase 1 + // no custom row reordering because would require making deep copy + // of local matrix entries and permuting it can only enforce + // max aggregate size + { + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesDeterministic(maxNodesPerAggregate, graph, aggregates, + aggStat, numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(maxNodesPerAggregate, graph, aggregates, aggStat, + numNonAggregatedNodes); } } +} - template - void AggregationPhase1Algorithm_kokkos:: - BuildAggregatesRandom(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); +template +void AggregationPhase1Algorithm_kokkos:: + BuildAggregatesRandom( + const LO maxAggSize, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - // Extract data from aggregates - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); + // Extract data from aggregates + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView( + Xpetra::Access::ReadWrite); + auto procWinner = + aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); - auto lclLWGraph = graph.getLocalLWGraph(); + auto lclLWGraph = graph.getLocalLWGraph(); - LO numAggregatedNodes = 0; - LO numLocalAggregates = aggregates.GetNumAggregates(); - Kokkos::View aggCount("aggCount"); - Kokkos::deep_copy(aggCount, numLocalAggregates); - Kokkos::parallel_for("Aggregation Phase 1: initial reduction over color == 1", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO nodeIdx) { - if(colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) { - const LO aggIdx = Kokkos::atomic_fetch_add (&aggCount(), 1); - vertex2AggId(nodeIdx, 0) = aggIdx; - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - } - }); - // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates - // before updating the value of numLocalAggregates. - // But since we also do not want to create a host mirror of aggCount we do some trickery... - numAggregatedNodes -= numLocalAggregates; - Kokkos::deep_copy(numLocalAggregates, aggCount); - numAggregatedNodes += numLocalAggregates; + LO numAggregatedNodes = 0; + LO numLocalAggregates = aggregates.GetNumAggregates(); + Kokkos::View aggCount("aggCount"); + Kokkos::deep_copy(aggCount, numLocalAggregates); + Kokkos::parallel_for( + "Aggregation Phase 1: initial reduction over color == 1", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO nodeIdx) { + if (colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) { + const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1); + vertex2AggId(nodeIdx, 0) = aggIdx; + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + } + }); + // Truely we wish to compute: numAggregatedNodes = aggCount - + // numLocalAggregates before updating the value of numLocalAggregates. But + // since we also do not want to create a host mirror of aggCount we do some + // trickery... + numAggregatedNodes -= numLocalAggregates; + Kokkos::deep_copy(numLocalAggregates, aggCount); + numAggregatedNodes += numLocalAggregates; - // Compute the initial size of the aggregates. - // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1 - // at this point so we could simplify the code below a lot if this - // assumption is correct... - Kokkos::View aggSizesView("aggSizes", numLocalAggregates); - { - // Here there is a possibility that two vertices assigned to two different threads contribute - // to the same aggregate if somethings happened before phase 1? - auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView); - Kokkos::parallel_for("Aggregation Phase 1: compute initial aggregates size", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO nodeIdx) { - auto aggSizesScatterViewAccess = aggSizesScatterView.access(); - if(vertex2AggId(nodeIdx, 0) >= 0) - aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1; - }); - Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView); - } + // Compute the initial size of the aggregates. + // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of + // size 1 + // at this point so we could simplify the code below a lot + // if this assumption is correct... + Kokkos::View aggSizesView("aggSizes", numLocalAggregates); + { + // Here there is a possibility that two vertices assigned to two different + // threads contribute to the same aggregate if somethings happened before + // phase 1? + auto aggSizesScatterView = + Kokkos::Experimental::create_scatter_view(aggSizesView); + Kokkos::parallel_for( + "Aggregation Phase 1: compute initial aggregates size", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO nodeIdx) { + auto aggSizesScatterViewAccess = aggSizesScatterView.access(); + if (vertex2AggId(nodeIdx, 0) >= 0) + aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1; + }); + Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView); + } - LO tmpNumAggregatedNodes = 0; - Kokkos::parallel_reduce("Aggregation Phase 1: main parallel_reduce over aggSizes", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const size_t nodeIdx, LO & lNumAggregatedNodes) { - if(colors(nodeIdx) != 1 - && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) { - // Get neighbors of vertex i and look for local, aggregated, - // color 1 neighbor (valid root). - auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); - for(LO j = 0; j < neighbors.length; ++j) { - auto nei = neighbors.colidx(j); - if(lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 - && aggStat(nei) == AGGREGATED) { + LO tmpNumAggregatedNodes = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 1: main parallel_reduce over aggSizes", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const size_t nodeIdx, LO &lNumAggregatedNodes) { + if (colors(nodeIdx) != 1 && + (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) { + // Get neighbors of vertex i and look for local, aggregated, + // color 1 neighbor (valid root). + auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); + for (LO j = 0; j < neighbors.length; ++j) { + auto nei = neighbors.colidx(j); + if (lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 && + aggStat(nei) == AGGREGATED) { - // This atomic guarentees that any other node trying to - // join aggregate agg has the correct size. - LO agg = vertex2AggId(nei, 0); - const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg), - 1); - if(aggSize < maxAggSize) { - //assign vertex i to aggregate with root j - vertex2AggId(nodeIdx, 0) = agg; - procWinner(nodeIdx, 0) = myRank; - aggStat(nodeIdx) = AGGREGATED; - ++lNumAggregatedNodes; - break; - } else { - // Decrement back the value of aggSizesView(agg) - Kokkos::atomic_decrement(&aggSizesView(agg)); - } - } - } - } - // if(aggStat(nodeIdx) != AGGREGATED) { - // lNumNonAggregatedNodes++; - if(aggStat(nodeIdx) == NOTSEL) { aggStat(nodeIdx) = READY; } - // } - }, tmpNumAggregatedNodes); - numAggregatedNodes += tmpNumAggregatedNodes; - numNonAggregatedNodes -= numAggregatedNodes; + // This atomic guarentees that any other node trying to + // join aggregate agg has the correct size. + LO agg = vertex2AggId(nei, 0); + const LO aggSize = + Kokkos::atomic_fetch_add(&aggSizesView(agg), 1); + if (aggSize < maxAggSize) { + // assign vertex i to aggregate with root j + vertex2AggId(nodeIdx, 0) = agg; + procWinner(nodeIdx, 0) = myRank; + aggStat(nodeIdx) = AGGREGATED; + ++lNumAggregatedNodes; + break; + } else { + // Decrement back the value of aggSizesView(agg) + Kokkos::atomic_decrement(&aggSizesView(agg)); + } + } + } + } + // if(aggStat(nodeIdx) != AGGREGATED) { + // lNumNonAggregatedNodes++; + if (aggStat(nodeIdx) == NOTSEL) { + aggStat(nodeIdx) = READY; + } + // } + }, + tmpNumAggregatedNodes); + numAggregatedNodes += tmpNumAggregatedNodes; + numNonAggregatedNodes -= numAggregatedNodes; - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); - } + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} - template - void AggregationPhase1Algorithm_kokkos:: - BuildAggregatesDeterministic(const LO maxAggSize, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); +template +void AggregationPhase1Algorithm_kokkos:: + BuildAggregatesDeterministic( + const LO maxAggSize, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView( + Xpetra::Access::ReadWrite); + auto procWinner = + aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); - auto lclLWGraph = graph.getLocalLWGraph(); + auto lclLWGraph = graph.getLocalLWGraph(); - LO numLocalAggregates = aggregates.GetNumAggregates(); - Kokkos::View numLocalAggregatesView("Num aggregates"); - { - auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView); - h_nla() = numLocalAggregates; - Kokkos::deep_copy(numLocalAggregatesView, h_nla); - } + LO numLocalAggregates = aggregates.GetNumAggregates(); + Kokkos::View numLocalAggregatesView("Num aggregates"); + { + auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView); + h_nla() = numLocalAggregates; + Kokkos::deep_copy(numLocalAggregatesView, h_nla); + } - Kokkos::View newRoots("New root LIDs", numNonAggregatedNodes); - Kokkos::View numNewRoots("Number of new aggregates of current color"); - auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); + Kokkos::View newRoots("New root LIDs", + numNonAggregatedNodes); + Kokkos::View numNewRoots( + "Number of new aggregates of current color"); + auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); - //first loop build the set of new roots - Kokkos::parallel_for("Aggregation Phase 1: building list of new roots", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO i) - { - if(colors(i) == 1 && aggStat(i) == READY) - { - //i will become a root - newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i; - } - }); - Kokkos::deep_copy(h_numNewRoots, numNewRoots); - //sort new roots by LID to guarantee determinism in agg IDs - Kokkos::sort(newRoots, 0, h_numNewRoots()); - LO numAggregated = 0; - Kokkos::parallel_reduce("Aggregation Phase 1: aggregating nodes", - Kokkos::RangePolicy(0, h_numNewRoots()), - KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated) - { - LO root = newRoots(rootIndex); - LO aggID = numLocalAggregatesView() + rootIndex; - LO aggSize = 1; - vertex2AggId(root, 0) = aggID; - procWinner(root, 0) = myRank; - aggStat(root) = AGGREGATED; - auto neighOfRoot = lclLWGraph.getNeighborVertices(root); - for(LO n = 0; n < neighOfRoot.length; n++) - { - LO neigh = neighOfRoot(n); - if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY) - { - //add neigh to aggregate - vertex2AggId(neigh, 0) = aggID; - procWinner(neigh, 0) = myRank; - aggStat(neigh) = AGGREGATED; - aggSize++; - if(aggSize == maxAggSize) - { - //can't add any more nodes - break; - } - } - } - lnumAggregated += aggSize; - }, numAggregated); - numNonAggregatedNodes -= numAggregated; - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots()); - } + // first loop build the set of new roots + Kokkos::parallel_for( + "Aggregation Phase 1: building list of new roots", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO i) { + if (colors(i) == 1 && aggStat(i) == READY) { + // i will become a root + newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i; + } + }); + Kokkos::deep_copy(h_numNewRoots, numNewRoots); + // sort new roots by LID to guarantee determinism in agg IDs + Kokkos::sort(newRoots, 0, h_numNewRoots()); + LO numAggregated = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 1: aggregating nodes", + Kokkos::RangePolicy(0, h_numNewRoots()), + KOKKOS_LAMBDA(const LO rootIndex, LO &lnumAggregated) { + LO root = newRoots(rootIndex); + LO aggID = numLocalAggregatesView() + rootIndex; + LO aggSize = 1; + vertex2AggId(root, 0) = aggID; + procWinner(root, 0) = myRank; + aggStat(root) = AGGREGATED; + auto neighOfRoot = lclLWGraph.getNeighborVertices(root); + for (LO n = 0; n < neighOfRoot.length; n++) { + LO neigh = neighOfRoot(n); + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == READY) { + // add neigh to aggregate + vertex2AggId(neigh, 0) = aggID; + procWinner(neigh, 0) = myRank; + aggStat(neigh) = AGGREGATED; + aggSize++; + if (aggSize == maxAggSize) { + // can't add any more nodes + break; + } + } + } + lnumAggregated += aggSize; + }, + numAggregated); + numNonAggregatedNodes -= numAggregated; + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots()); +} -} // end namespace +} // namespace MueLu #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp index 0535904d2311..c83fb96e3f74 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_decl.hpp @@ -46,73 +46,76 @@ #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_DECL_HPP_ #define MUELU_AGGREGATIONPHASE2AALGORITHM_DECL_HPP_ -#include "MueLu_ConfigDefs.hpp" #include "MueLu_AggregationAlgorithmBase.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_AggregationPhase2aAlgorithm_fwd.hpp" -#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class AggregationPhase2aAlgorithm class. - @brief Among unaggregated points, see if we can make a reasonable size aggregate out of it. - @ingroup Aggregation - - ### Idea ### - Among unaggregated points, see if we can make a reasonable size - aggregate out of it. We do this by looking at neighbors and seeing - how many are unaggregated and on my processor. Loosely, base the - number of new aggregates created on the percentage of unaggregated nodes. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: min agg size | minimum number of nodes which have to be in an aggregate. - aggregation: max agg size | maximum allowed number of nodes in an aggregate - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. - - */ - - template - class AggregationPhase2aAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class AggregationPhase2aAlgorithm class. + @brief Among unaggregated points, see if we can make a reasonable size + aggregate out of it. + @ingroup Aggregation + + ### Idea ### + Among unaggregated points, see if we can make a reasonable size + aggregate out of it. We do this by looking at neighbors and seeing + how many are unaggregated and on my processor. Loosely, base the + number of new aggregates created on the percentage of unaggregated nodes. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: min agg size | minimum number of nodes which have to be in an + aggregate. aggregation: max agg size | maximum allowed number of nodes in an + aggregate + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. + +*/ + +template +class AggregationPhase2aAlgorithm + : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE2AALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - AggregationPhase2aAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } +public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~AggregationPhase2aAlgorithm() { } + //! Constructor. + AggregationPhase2aAlgorithm( + const RCP & /* graphFact */ = Teuchos::null) {} - //@} + //! Destructor. + virtual ~AggregationPhase2aAlgorithm() {} + //@} - //! @name Aggregation methods. - //@{ + //! @name Aggregation methods. + //@{ - /*! @brief Local aggregation. */ + /*! @brief Local aggregation. */ - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + void BuildAggregates(const ParameterList ¶ms, const GraphBase &graph, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes) const; + //@} - std::string description() const { return "Phase 2a (secondary)"; } - }; + std::string description() const { return "Phase 2a (secondary)"; } +}; -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE2AALGORITHM_SHORT - #endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp index dc74c802d0a7..284cfaa8b183 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_def.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ #define MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ - #include #include @@ -54,94 +53,102 @@ #include "MueLu_AggregationPhase2aAlgorithm_decl.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Aggregates.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - void AggregationPhase2aAlgorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - int minNodesPerAggregate = params.get("aggregation: min agg size"); - int maxNodesPerAggregate = params.get("aggregation: max agg size"); - bool matchMLbehavior = params.get("aggregation: match ML phase2a"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); +template +void AggregationPhase2aAlgorithm:: + BuildAggregates(const ParameterList ¶ms, const GraphBase &graph, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); + int minNodesPerAggregate = params.get("aggregation: min agg size"); + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + bool matchMLbehavior = params.get("aggregation: match ML phase2a"); - LO numLocalAggregates = aggregates.GetNumAggregates(); + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - LO numLocalNodes = procWinner.size(); - LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); - const double aggFactor = params.get("aggregation: phase2a agg factor"); - double factor = as(numLocalAggregated)/(numLocalNodes+1); - factor = pow(factor, aggFactor); - - int aggIndex = -1; - size_t aggSize = 0; - std::vector aggList(graph.getLocalMaxNumRowEntries()); - - for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) { - if (aggStat[rootCandidate] != READY) - continue; - - LO numNeighbors = 0; - aggSize = 0; - if (matchMLbehavior) { - aggList[aggSize++] = rootCandidate; - numNeighbors++; - } + LO numLocalAggregates = aggregates.GetNumAggregates(); - ArrayView neighOfINode = graph.getNeighborVertices(rootCandidate); + LO numLocalNodes = procWinner.size(); + LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + const double aggFactor = + params.get("aggregation: phase2a agg factor"); + double factor = as(numLocalAggregated) / (numLocalNodes + 1); + factor = pow(factor, aggFactor); - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; + int aggIndex = -1; + size_t aggSize = 0; + std::vector aggList(graph.getLocalMaxNumRowEntries()); - if (neigh != rootCandidate) { - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { - // If aggregate size does not exceed max size, add node to the tentative aggregate - // NOTE: We do not exit the loop over all neighbours since we have still - // to count all aggregated neighbour nodes for the aggregation criteria - // NOTE: We check here for the maximum aggregation size. If we would do it below - // with all the other check too big aggregates would not be accepted at all. - if (aggSize < as(maxNodesPerAggregate)) - aggList[aggSize++] = neigh; - } + for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) { + if (aggStat[rootCandidate] != READY) + continue; - numNeighbors++; - } - } + LO numNeighbors = 0; + aggSize = 0; + if (matchMLbehavior) { + aggList[aggSize++] = rootCandidate; + numNeighbors++; + } - // NOTE: ML uses a hardcoded value 3 instead of MinNodesPerAggregate - if (aggSize > as(minNodesPerAggregate) && - (aggSize > factor*numNeighbors)) { - // Accept new aggregate - // rootCandidate becomes the root of the newly formed aggregate - aggregates.SetIsRoot(rootCandidate); - aggIndex = numLocalAggregates++; - - for (size_t k = 0; k < aggSize; k++) { - aggStat [aggList[k]] = AGGREGATED; - vertex2AggId[aggList[k]] = aggIndex; - procWinner [aggList[k]] = myRank; + ArrayView neighOfINode = + graph.getNeighborVertices(rootCandidate); + + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + + if (neigh != rootCandidate) { + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { + // If aggregate size does not exceed max size, add node to the + // tentative aggregate NOTE: We do not exit the loop over all + // neighbours since we have still + // to count all aggregated neighbour nodes for the aggregation + // criteria + // NOTE: We check here for the maximum aggregation size. If we would + // do it below + // with all the other check too big aggregates would not be + // accepted at all. + if (aggSize < as(maxNodesPerAggregate)) + aggList[aggSize++] = neigh; } - numNonAggregatedNodes -= aggSize; + numNeighbors++; } } - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); + // NOTE: ML uses a hardcoded value 3 instead of MinNodesPerAggregate + if (aggSize > as(minNodesPerAggregate) && + (aggSize > factor * numNeighbors)) { + // Accept new aggregate + // rootCandidate becomes the root of the newly formed aggregate + aggregates.SetIsRoot(rootCandidate); + aggIndex = numLocalAggregates++; + + for (size_t k = 0; k < aggSize; k++) { + aggStat[aggList[k]] = AGGREGATED; + vertex2AggId[aggList[k]] = aggIndex; + procWinner[aggList[k]] = myRank; + } + + numNonAggregatedNodes -= aggSize; + } } -} // end namespace + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} + +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp index d253e891cd71..7a2ef6b2d792 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_decl.hpp @@ -57,81 +57,81 @@ #include "MueLu_FactoryBase_fwd.hpp" namespace MueLu { - /*! - @class AggregationPhase2aAlgorithm class. - @brief Among unaggregated points, see if we can make a reasonable size aggregate out of it. - @ingroup Aggregation - - ### Idea ### - Among unaggregated points, see if we can make a reasonable size - aggregate out of it. We do this by looking at neighbors and seeing - how many are unaggregated and on my processor. Loosely, base the - number of new aggregates created on the percentage of unaggregated nodes. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: min agg size | minimum number of nodes which have to be in an aggregate. - aggregation: max agg size | maximum allowed number of nodes in an aggregate - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. - - */ - - template - class AggregationPhase2aAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class AggregationPhase2aAlgorithm class. + @brief Among unaggregated points, see if we can make a reasonable size + aggregate out of it. + @ingroup Aggregation + + ### Idea ### + Among unaggregated points, see if we can make a reasonable size + aggregate out of it. We do this by looking at neighbors and seeing + how many are unaggregated and on my processor. Loosely, base the + number of new aggregates created on the percentage of unaggregated nodes. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: min agg size | minimum number of nodes which have to be in an + aggregate. aggregation: max agg size | maximum allowed number of nodes in an + aggregate + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. + +*/ + +template +class AggregationPhase2aAlgorithm_kokkos + : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; +public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2aAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2aAlgorithm_kokkos( + const RCP & /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2aAlgorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase2aAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList ¶ms, + const LWGraph_kokkos &graph, Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const Teuchos::ParameterList ¶ms, + const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; - void BuildAggregatesRandom(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesDeterministic( + const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; + //@} - void BuildAggregatesDeterministic(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2a (secondary)"; } +}; - std::string description() const { return "Phase 2a (secondary)"; } - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_SHORT #endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp index 01fcb20a21b9..89a86e372ff6 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2aAlgorithm_kokkos_def.hpp @@ -62,268 +62,275 @@ namespace MueLu { - template - void AggregationPhase2aAlgorithm_kokkos:: - BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - if(params.get("aggregation: deterministic")) { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } +template +void AggregationPhase2aAlgorithm_kokkos:: + BuildAggregates( + const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesDeterministic(params, graph, aggregates, aggStat, + numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, + numNonAggregatedNodes); + } - } // BuildAggregates - - template - void AggregationPhase2aAlgorithm_kokkos:: - BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const int minNodesPerAggregate = params.get("aggregation: min agg size"); - const int maxNodesPerAggregate = params.get("aggregation: max agg size"); - bool matchMLbehavior = params.get("aggregation: match ML phase2a"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - LO numLocalNodes = numRows; - LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; - - const double aggFactor = 0.5; - double factor = static_cast(numLocalAggregated)/(numLocalNodes+1); - factor = pow(factor, aggFactor); - - // LBV on Sept 12, 2019: this looks a little heavy handed, - // I'm not sure a view is needed to perform atomic updates. - // If we can avoid this and use a simple LO that would be - // simpler for later maintenance. - Kokkos::View numLocalAggregates("numLocalAggregates"); - typename Kokkos::View::HostMirror h_numLocalAggregates = +} // BuildAggregates + +template +void AggregationPhase2aAlgorithm_kokkos::BuildAggregatesRandom( + const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + const int minNodesPerAggregate = params.get("aggregation: min agg size"); + const int maxNodesPerAggregate = params.get("aggregation: max agg size"); + bool matchMLbehavior = params.get("aggregation: match ML phase2a"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView( + Xpetra::Access::ReadWrite); + auto procWinner = + aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + LO numLocalNodes = numRows; + LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + + const double aggFactor = 0.5; + double factor = static_cast(numLocalAggregated) / (numLocalNodes + 1); + factor = pow(factor, aggFactor); + + // LBV on Sept 12, 2019: this looks a little heavy handed, + // I'm not sure a view is needed to perform atomic updates. + // If we can avoid this and use a simple LO that would be + // simpler for later maintenance. + Kokkos::View numLocalAggregates("numLocalAggregates"); + typename Kokkos::View::HostMirror h_numLocalAggregates = Kokkos::create_mirror_view(numLocalAggregates); - h_numLocalAggregates() = aggregates.GetNumAggregates(); - Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); + h_numLocalAggregates() = aggregates.GetNumAggregates(); + Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); + + // Now we create new aggregates using root nodes in all colors other than the + // first color, as the first color was already exhausted in Phase 1. + for (int color = 2; color < numColors + 1; ++color) { + LO tmpNumNonAggregatedNodes = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 2a: loop over each individual color", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO rootCandidate, LO &lNumNonAggregatedNodes) { + if (aggStat(rootCandidate) == READY && + colors(rootCandidate) == color) { + + LO numNeighbors = 0; + LO aggSize = 0; + if (matchMLbehavior) { + aggSize += 1; + numNeighbors += 1; + } + + auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); + + // Loop over neighbors to count how many nodes could join + // the new aggregate + + for (int j = 0; j < neighbors.length; ++j) { + LO neigh = neighbors(j); + if (neigh != rootCandidate) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + (aggStat(neigh) == READY) && + (aggSize < maxNodesPerAggregate)) { + ++aggSize; + } + ++numNeighbors; + } + } + + // If a sufficient number of nodes can join the new aggregate + // then we actually create the aggregate. + if (aggSize > minNodesPerAggregate && + (aggSize > factor * numNeighbors)) { + + // aggregates.SetIsRoot(rootCandidate); + LO aggIndex = Kokkos::atomic_fetch_add(&numLocalAggregates(), 1); + + LO numAggregated = 0; + + if (matchMLbehavior) { + // Add the root. + aggStat(rootCandidate) = AGGREGATED; + vertex2AggId(rootCandidate, 0) = aggIndex; + procWinner(rootCandidate, 0) = myRank; + ++numAggregated; + --lNumNonAggregatedNodes; + } + + for (int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) { + LO neigh = neighbors(neighIdx); + if (neigh != rootCandidate) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + (aggStat(neigh) == READY) && (numAggregated < aggSize)) { + aggStat(neigh) = AGGREGATED; + vertex2AggId(neigh, 0) = aggIndex; + procWinner(neigh, 0) = myRank; + + ++numAggregated; + --lNumNonAggregatedNodes; + } + } + } + } + } + }, + tmpNumNonAggregatedNodes); + numNonAggregatedNodes += tmpNumNonAggregatedNodes; + } - // Now we create new aggregates using root nodes in all colors other than the first color, - // as the first color was already exhausted in Phase 1. - for(int color = 2; color < numColors + 1; ++color) { + // update aggregate object + Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates); + aggregates.SetNumAggregates(h_numLocalAggregates()); +} // BuildAggregatesRandom + +template +void AggregationPhase2aAlgorithm_kokkos:: + BuildAggregatesDeterministic( + const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + const int minNodesPerAggregate = params.get("aggregation: min agg size"); + const int maxNodesPerAggregate = params.get("aggregation: max agg size"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView( + Xpetra::Access::ReadWrite); + auto procWinner = + aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + LO numLocalNodes = procWinner.size(); + LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; + + const double aggFactor = 0.5; + double factor = as(numLocalAggregated) / (numLocalNodes + 1); + factor = pow(factor, aggFactor); + + Kokkos::View numLocalAggregates("numLocalAggregates"); + typename Kokkos::View::HostMirror h_numLocalAggregates = + Kokkos::create_mirror_view(numLocalAggregates); + h_numLocalAggregates() = aggregates.GetNumAggregates(); + Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); + + // Now we create new aggregates using root nodes in all colors other than the + // first color, as the first color was already exhausted in Phase 1. + // + // In the deterministic version, exactly the same set of aggregates will be + // created (as the nondeterministic version) because no vertex V can be a + // neighbor of two vertices of the same color, so two root candidates can't + // fight over V + // + // But, the precise values in vertex2AggId need to match exactly, so just sort + // the new roots of each color before assigning aggregate IDs + + // numNonAggregatedNodes is the best available upper bound for the number of + // aggregates which may be created in this phase, so use it for the size of + // newRoots + Kokkos::View newRoots("New root LIDs", + numNonAggregatedNodes); + Kokkos::View numNewRoots( + "Number of new aggregates of current color"); + auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); + for (int color = 1; color < numColors + 1; ++color) { + h_numNewRoots() = 0; + Kokkos::deep_copy(numNewRoots, h_numNewRoots); + Kokkos::parallel_for( + "Aggregation Phase 2a: determining new roots of current color", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO rootCandidate) { + if (aggStat(rootCandidate) == READY && + colors(rootCandidate) == color) { + LO aggSize = 0; + auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); + // Loop over neighbors to count how many nodes could join + // the new aggregate + LO numNeighbors = 0; + for (int j = 0; j < neighbors.length; ++j) { + LO neigh = neighbors(j); + if (neigh != rootCandidate) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == READY && aggSize < maxNodesPerAggregate) { + ++aggSize; + } + ++numNeighbors; + } + } + // If a sufficient number of nodes can join the new aggregate + // then we mark rootCandidate as a future root. + if (aggSize > minNodesPerAggregate && + aggSize > factor * numNeighbors) { + LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1); + newRoots(newRootIndex) = rootCandidate; + } + } + }); + Kokkos::deep_copy(h_numNewRoots, numNewRoots); + + if (h_numNewRoots() > 0) { + // sort the new root indices + Kokkos::sort(newRoots, 0, h_numNewRoots()); + // now, loop over all new roots again and actually create the aggregates LO tmpNumNonAggregatedNodes = 0; - Kokkos::parallel_reduce("Aggregation Phase 2a: loop over each individual color", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO rootCandidate, LO& lNumNonAggregatedNodes) { - if(aggStat(rootCandidate) == READY && - colors(rootCandidate) == color) { - - LO numNeighbors = 0; - LO aggSize = 0; - if (matchMLbehavior) { - aggSize += 1; - numNeighbors +=1; - } - - auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); - - // Loop over neighbors to count how many nodes could join - // the new aggregate - - for(int j = 0; j < neighbors.length; ++j) { - LO neigh = neighbors(j); - if(neigh != rootCandidate) { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - (aggStat(neigh) == READY) && - (aggSize < maxNodesPerAggregate)) { - ++aggSize; - } - ++numNeighbors; - } - } - - // If a sufficient number of nodes can join the new aggregate - // then we actually create the aggregate. - if(aggSize > minNodesPerAggregate && - (aggSize > factor*numNeighbors)) { - - // aggregates.SetIsRoot(rootCandidate); - LO aggIndex = Kokkos:: - atomic_fetch_add(&numLocalAggregates(), 1); - - LO numAggregated = 0; - - if (matchMLbehavior) { - // Add the root. - aggStat(rootCandidate) = AGGREGATED; - vertex2AggId(rootCandidate, 0) = aggIndex; - procWinner(rootCandidate, 0) = myRank; - ++numAggregated; - --lNumNonAggregatedNodes; - } - - for(int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) { - LO neigh = neighbors(neighIdx); - if(neigh != rootCandidate) { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - (aggStat(neigh) == READY) && - (numAggregated < aggSize)) { - aggStat(neigh) = AGGREGATED; - vertex2AggId(neigh, 0) = aggIndex; - procWinner(neigh, 0) = myRank; - - ++numAggregated; - --lNumNonAggregatedNodes; - } - } - } - } - } - }, tmpNumNonAggregatedNodes); + // First, just find the set of color vertices which will become aggregate + // roots + Kokkos::parallel_reduce( + "Aggregation Phase 2a: create new aggregates", + Kokkos::RangePolicy(0, h_numNewRoots()), + KOKKOS_LAMBDA(const LO newRootIndex, LO &lNumNonAggregatedNodes) { + LO root = newRoots(newRootIndex); + LO newAggID = numLocalAggregates() + newRootIndex; + auto neighbors = lclLWGraph.getNeighborVertices(root); + // Loop over neighbors and add them to new aggregate + aggStat(root) = AGGREGATED; + vertex2AggId(root, 0) = newAggID; + LO aggSize = 1; + for (int j = 0; j < neighbors.length; ++j) { + LO neigh = neighbors(j); + if (neigh != root) { + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == READY && aggSize < maxNodesPerAggregate) { + aggStat(neigh) = AGGREGATED; + vertex2AggId(neigh, 0) = newAggID; + procWinner(neigh, 0) = myRank; + aggSize++; + } + } + } + lNumNonAggregatedNodes -= aggSize; + }, + tmpNumNonAggregatedNodes); numNonAggregatedNodes += tmpNumNonAggregatedNodes; + h_numLocalAggregates() += h_numNewRoots(); + Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); } - - // update aggregate object - Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates); - aggregates.SetNumAggregates(h_numLocalAggregates()); - } // BuildAggregatesRandom - - template - void AggregationPhase2aAlgorithm_kokkos:: - BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const - { - const int minNodesPerAggregate = params.get("aggregation: min agg size"); - const int maxNodesPerAggregate = params.get("aggregation: max agg size"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - LO numLocalNodes = procWinner.size(); - LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; - - const double aggFactor = 0.5; - double factor = as(numLocalAggregated)/(numLocalNodes+1); - factor = pow(factor, aggFactor); - - Kokkos::View numLocalAggregates("numLocalAggregates"); - typename Kokkos::View::HostMirror h_numLocalAggregates = - Kokkos::create_mirror_view(numLocalAggregates); - h_numLocalAggregates() = aggregates.GetNumAggregates(); - Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); - - // Now we create new aggregates using root nodes in all colors other than the first color, - // as the first color was already exhausted in Phase 1. - // - // In the deterministic version, exactly the same set of aggregates will be created - // (as the nondeterministic version) - // because no vertex V can be a neighbor of two vertices of the same color, so two root - // candidates can't fight over V - // - // But, the precise values in vertex2AggId need to match exactly, so just sort the new - // roots of each color before assigning aggregate IDs - - //numNonAggregatedNodes is the best available upper bound for the number of aggregates - //which may be created in this phase, so use it for the size of newRoots - Kokkos::View newRoots("New root LIDs", numNonAggregatedNodes); - Kokkos::View numNewRoots("Number of new aggregates of current color"); - auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots); - for(int color = 1; color < numColors + 1; ++color) { - h_numNewRoots() = 0; - Kokkos::deep_copy(numNewRoots, h_numNewRoots); - Kokkos::parallel_for("Aggregation Phase 2a: determining new roots of current color", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO rootCandidate) { - if(aggStat(rootCandidate) == READY && - colors(rootCandidate) == color) { - LO aggSize = 0; - auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate); - // Loop over neighbors to count how many nodes could join - // the new aggregate - LO numNeighbors = 0; - for(int j = 0; j < neighbors.length; ++j) { - LO neigh = neighbors(j); - if(neigh != rootCandidate) - { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == READY && - aggSize < maxNodesPerAggregate) - { - ++aggSize; - } - ++numNeighbors; - } - } - // If a sufficient number of nodes can join the new aggregate - // then we mark rootCandidate as a future root. - if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) { - LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1); - newRoots(newRootIndex) = rootCandidate; - } - } - }); - Kokkos::deep_copy(h_numNewRoots, numNewRoots); - - if(h_numNewRoots() > 0) { - //sort the new root indices - Kokkos::sort(newRoots, 0, h_numNewRoots()); - //now, loop over all new roots again and actually create the aggregates - LO tmpNumNonAggregatedNodes = 0; - //First, just find the set of color vertices which will become aggregate roots - Kokkos::parallel_reduce("Aggregation Phase 2a: create new aggregates", - Kokkos::RangePolicy(0, h_numNewRoots()), - KOKKOS_LAMBDA (const LO newRootIndex, LO& lNumNonAggregatedNodes) { - LO root = newRoots(newRootIndex); - LO newAggID = numLocalAggregates() + newRootIndex; - auto neighbors = lclLWGraph.getNeighborVertices(root); - // Loop over neighbors and add them to new aggregate - aggStat(root) = AGGREGATED; - vertex2AggId(root, 0) = newAggID; - LO aggSize = 1; - for(int j = 0; j < neighbors.length; ++j) { - LO neigh = neighbors(j); - if(neigh != root) { - if(lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == READY && - aggSize < maxNodesPerAggregate) { - aggStat(neigh) = AGGREGATED; - vertex2AggId(neigh, 0) = newAggID; - procWinner(neigh, 0) = myRank; - aggSize++; - } - } - } - lNumNonAggregatedNodes -= aggSize; - }, tmpNumNonAggregatedNodes); - numNonAggregatedNodes += tmpNumNonAggregatedNodes; - h_numLocalAggregates() += h_numNewRoots(); - Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates); - } - } - aggregates.SetNumAggregates(h_numLocalAggregates()); } + aggregates.SetNumAggregates(h_numLocalAggregates()); +} -} // end namespace +} // namespace MueLu #endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp index 88b156d23c14..03549f0d1910 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_decl.hpp @@ -56,62 +56,65 @@ #include "MueLu_AggregationPhase2bAlgorithm_fwd.hpp" namespace MueLu { - /*! - @class AggregationPhase2bAlgorithm class. - @brief Add leftovers to existing aggregates - @ingroup Aggregation - - ### Idea ### - In phase 2b non-aggregated nodes are added to existing aggregates. - All neighbors of the unaggregated node are checked and the corresponding - aggregate weight is increased. The unaggregated node is added to the aggregate - with the best weight. A simple penalty strategy makes sure that the non-aggregated - nodes are added to different aggregates. - The routine runs twice to cover non-aggregate nodes which have a node distance - of two to existing aggregates. Assuming that the node distance is not greater - than 3 (the aggregate diameter size), running the algorithm only twice should - be sufficient. - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. There are no aggregation criteria considered. Especially the aggregation: max agg size criterion is ignored. - This is not a problem, since after the previous aggregation phases one should not be able to build too large aggregates. - */ - - template - class AggregationPhase2bAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class AggregationPhase2bAlgorithm class. + @brief Add leftovers to existing aggregates + @ingroup Aggregation + + ### Idea ### + In phase 2b non-aggregated nodes are added to existing aggregates. + All neighbors of the unaggregated node are checked and the corresponding + aggregate weight is increased. The unaggregated node is added to the aggregate + with the best weight. A simple penalty strategy makes sure that the + non-aggregated nodes are added to different aggregates. The routine runs twice + to cover non-aggregate nodes which have a node distance of two to existing + aggregates. Assuming that the node distance is not greater than 3 (the + aggregate diameter size), running the algorithm only twice should be + sufficient. + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. There are no + aggregation criteria considered. Especially the aggregation: max agg size + criterion is ignored. This is not a problem, since after the previous + aggregation phases one should not be able to build too large aggregates. +*/ + +template +class AggregationPhase2bAlgorithm + : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE2BALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2bAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2bAlgorithm( + const RCP & /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2bAlgorithm() { } + //! Destructor. + virtual ~AggregationPhase2bAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList ¶ms, const GraphBase &graph, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2b (expansion)"; } +}; - std::string description() const { return "Phase 2b (expansion)"; } - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE2BALGORITHM_SHORT - #endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp index 59b7a902621d..2ddb68492975 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_def.hpp @@ -60,87 +60,92 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big - template - void AggregationPhase2bAlgorithm::BuildAggregates(const ParameterList& /* params */, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); - - LO numLocalAggregates = aggregates.GetNumAggregates(); - - const int defaultConnectWeight = 100; - const int penaltyConnectWeight = 10; - - std::vector aggWeight (numLocalAggregates, 0); - std::vector connectWeight(numRows, defaultConnectWeight); - std::vector aggPenalties (numRows, 0); - - // We do this cycle twice. - // I don't know why, but ML does it too - // taw: by running the aggregation routine more than once there is a chance that also - // non-aggregated nodes with a node distance of two are added to existing aggregates. - // Assuming that the aggregate size is 3 in each direction running the algorithm only twice - // should be sufficient. - for (int k = 0; k < 2; k++) { - for (LO i = 0; i < numRows; i++) { - if (aggStat[i] != READY) - continue; - - ArrayView neighOfINode = graph.getNeighborVertices(i); - - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - - // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED) - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) - aggWeight[vertex2AggId[neigh]] += connectWeight[neigh]; - } - - int bestScore = -100000; - int bestAggId = -1; - int bestConnect = -1; +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big +template +void AggregationPhase2bAlgorithm:: + BuildAggregates(const ParameterList & /* params */, const GraphBase &graph, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + LO numLocalAggregates = aggregates.GetNumAggregates(); + + const int defaultConnectWeight = 100; + const int penaltyConnectWeight = 10; + + std::vector aggWeight(numLocalAggregates, 0); + std::vector connectWeight(numRows, defaultConnectWeight); + std::vector aggPenalties(numRows, 0); + + // We do this cycle twice. + // I don't know why, but ML does it too + // taw: by running the aggregation routine more than once there is a chance + // that also non-aggregated nodes with a node distance of two are added to + // existing aggregates. Assuming that the aggregate size is 3 in each + // direction running the algorithm only twice should be sufficient. + for (int k = 0; k < 2; k++) { + for (LO i = 0; i < numRows; i++) { + if (aggStat[i] != READY) + continue; + + ArrayView neighOfINode = graph.getNeighborVertices(i); + + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + + // We don't check (neigh != i), as it is covered by checking + // (aggStat[neigh] == AGGREGATED) + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) + aggWeight[vertex2AggId[neigh]] += connectWeight[neigh]; + } - for (int j = 0; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; + int bestScore = -100000; + int bestAggId = -1; + int bestConnect = -1; - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) { - int aggId = vertex2AggId[neigh]; - int score = aggWeight[aggId] - aggPenalties[aggId]; + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; - if (score > bestScore) { - bestAggId = aggId; - bestScore = score; - bestConnect = connectWeight[neigh]; + if (graph.isLocalNeighborVertex(neigh) && + aggStat[neigh] == AGGREGATED) { + int aggId = vertex2AggId[neigh]; + int score = aggWeight[aggId] - aggPenalties[aggId]; - } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) { - bestConnect = connectWeight[neigh]; - } + if (score > bestScore) { + bestAggId = aggId; + bestScore = score; + bestConnect = connectWeight[neigh]; - // Reset the weights for the next loop - aggWeight[aggId] = 0; + } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) { + bestConnect = connectWeight[neigh]; } + + // Reset the weights for the next loop + aggWeight[aggId] = 0; } + } - if (bestScore >= 0) { - aggStat [i] = AGGREGATED; - vertex2AggId[i] = bestAggId; - procWinner [i] = myRank; + if (bestScore >= 0) { + aggStat[i] = AGGREGATED; + vertex2AggId[i] = bestAggId; + procWinner[i] = myRank; - numNonAggregatedNodes--; + numNonAggregatedNodes--; - aggPenalties[bestAggId]++; - connectWeight[i] = bestConnect - penaltyConnectWeight; - } + aggPenalties[bestAggId]++; + connectWeight[i] = bestConnect - penaltyConnectWeight; } } } +} -} // end namespace +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp index 941f732e64d0..776019131d57 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_decl.hpp @@ -58,80 +58,80 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class AggregationPhase2bAlgorithm class. - @brief Add leftovers to existing aggregates - @ingroup Aggregation - - ### Idea ### - In phase 2b non-aggregated nodes are added to existing aggregates. - All neighbors of the unaggregated node are checked and the corresponding - aggregate weight is increased. The unaggregated node is added to the aggregate - with the best weight. A simple penalty strategy makes sure that the non-aggregated - nodes are added to different aggregates. - The routine runs twice to cover non-aggregate nodes which have a node distance - of two to existing aggregates. Assuming that the node distance is not greater - than 3 (the aggregate diameter size), running the algorithm only twice should - be sufficient. - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. There are no aggregation criteria considered. Especially the aggregation: max agg size criterion is ignored. - This is not a problem, since after the previous aggregation phases one should not be able to build too large aggregates. - */ - - template - class AggregationPhase2bAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class AggregationPhase2bAlgorithm class. + @brief Add leftovers to existing aggregates + @ingroup Aggregation + + ### Idea ### + In phase 2b non-aggregated nodes are added to existing aggregates. + All neighbors of the unaggregated node are checked and the corresponding + aggregate weight is increased. The unaggregated node is added to the aggregate + with the best weight. A simple penalty strategy makes sure that the + non-aggregated nodes are added to different aggregates. The routine runs twice + to cover non-aggregate nodes which have a node distance of two to existing + aggregates. Assuming that the node distance is not greater than 3 (the + aggregate diameter size), running the algorithm only twice should be + sufficient. + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. There are no + aggregation criteria considered. Especially the aggregation: max agg size + criterion is ignored. This is not a problem, since after the previous + aggregation phases one should not be able to build too large aggregates. +*/ + +template +class AggregationPhase2bAlgorithm_kokkos + : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; +public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase2bAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase2bAlgorithm_kokkos( + const RCP & /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase2bAlgorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase2bAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; - void BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const ParameterList ¶ms, + const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; - void BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesDeterministic( + const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; + //@} - void BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 2b (expansion)"; } +}; - std::string description() const { return "Phase 2b (expansion)"; } - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_SHORT #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp index b6225f814c28..3c9c39dfb693 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp @@ -60,179 +60,191 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big - template - void AggregationPhase2bAlgorithm_kokkos:: - BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - if(params.get("aggregation: deterministic")) { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big +template +void AggregationPhase2bAlgorithm_kokkos:: + BuildAggregates( + const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesDeterministic(params, graph, aggregates, aggStat, + numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, + numNonAggregatedNodes); + } + +} // BuildAggregates + +template +void AggregationPhase2bAlgorithm_kokkos::BuildAggregatesRandom( + const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView( + Xpetra::Access::ReadWrite); + auto procWinner = + aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + const LO numLocalAggregates = aggregates.GetNumAggregates(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + const LO defaultConnectWeight = 100; + const LO penaltyConnectWeight = 10; + + Kokkos::View aggWeight("aggWeight", numLocalAggregates); + Kokkos::View connectWeight("connectWeight", numRows); + Kokkos::View aggPenalties("aggPenalties", + numLocalAggregates); + + Kokkos::deep_copy(connectWeight, defaultConnectWeight); + + // taw: by running the aggregation routine more than once there is a chance + // that also non-aggregated nodes with a node distance of two are added to + // existing aggregates. Assuming that the aggregate size is 3 in each + // direction running the algorithm only twice should be sufficient. lbv: If + // the prior phase of aggregation where run without specifying an aggregate + // size, the distance 2 coloring and phase 1 aggregation actually guarantee + // that only one iteration is needed to reach distance 2 neighbors. + int maxIters = 2; + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + if (maxNodesPerAggregate == std::numeric_limits::max()) { + maxIters = 1; + } + for (int iter = 0; iter < maxIters; ++iter) { + for (LO color = 1; color <= numColors; ++color) { + Kokkos::deep_copy(aggWeight, 0); + + // the reduce counts how many nodes are aggregated by this phase, + // which will then be subtracted from numNonAggregatedNodes + LO numAggregated = 0; + Kokkos::parallel_reduce( + "Aggregation Phase 2b: aggregates expansion", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO i, LO &tmpNumAggregated) { + if (aggStat(i) != READY || colors(i) != color) + return; + + auto neighOfINode = lclLWGraph.getNeighborVertices(i); + for (int j = 0; j < neighOfINode.length; j++) { + LO neigh = neighOfINode(j); + + // We don't check (neigh != i), as it is covered by checking + // (aggStat[neigh] == AGGREGATED) + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == AGGREGATED) + Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), + connectWeight(neigh)); + } + + int bestScore = -100000; + int bestAggId = -1; + int bestConnect = -1; + + for (int j = 0; j < neighOfINode.length; j++) { + LO neigh = neighOfINode(j); + + if (lclLWGraph.isLocalNeighborVertex(neigh) && + aggStat(neigh) == AGGREGATED) { + auto aggId = vertex2AggId(neigh, 0); + int score = aggWeight(aggId) - aggPenalties(aggId); + + if (score > bestScore) { + bestAggId = aggId; + bestScore = score; + bestConnect = connectWeight(neigh); + + } else if (aggId == bestAggId && + connectWeight(neigh) > bestConnect) { + bestConnect = connectWeight(neigh); + } + } + } + if (bestScore >= 0) { + aggStat(i) = AGGREGATED; + vertex2AggId(i, 0) = bestAggId; + procWinner(i, 0) = myRank; - } // BuildAggregates - - template - void AggregationPhase2bAlgorithm_kokkos:: - BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - const LO numLocalAggregates = aggregates.GetNumAggregates(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - const LO defaultConnectWeight = 100; - const LO penaltyConnectWeight = 10; - - Kokkos::View aggWeight ("aggWeight", numLocalAggregates); - Kokkos::View connectWeight("connectWeight", numRows); - Kokkos::View aggPenalties ("aggPenalties", numLocalAggregates); - - Kokkos::deep_copy(connectWeight, defaultConnectWeight); - - // taw: by running the aggregation routine more than once there is a chance that also - // non-aggregated nodes with a node distance of two are added to existing aggregates. - // Assuming that the aggregate size is 3 in each direction running the algorithm only twice - // should be sufficient. - // lbv: If the prior phase of aggregation where run without specifying an aggregate size, - // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration - // is needed to reach distance 2 neighbors. - int maxIters = 2; - int maxNodesPerAggregate = params.get("aggregation: max agg size"); - if(maxNodesPerAggregate == std::numeric_limits::max()) {maxIters = 1;} - for (int iter = 0; iter < maxIters; ++iter) { - for(LO color = 1; color <= numColors; ++color) { - Kokkos::deep_copy(aggWeight, 0); - - //the reduce counts how many nodes are aggregated by this phase, - //which will then be subtracted from numNonAggregatedNodes - LO numAggregated = 0; - Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) { - if (aggStat(i) != READY || colors(i) != color) - return; - - auto neighOfINode = lclLWGraph.getNeighborVertices(i); - for (int j = 0; j < neighOfINode.length; j++) { - LO neigh = neighOfINode(j); - - // We don't check (neigh != i), as it is covered by checking - // (aggStat[neigh] == AGGREGATED) - if (lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == AGGREGATED) - Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), - connectWeight(neigh)); - } - - int bestScore = -100000; - int bestAggId = -1; - int bestConnect = -1; - - for (int j = 0; j < neighOfINode.length; j++) { - LO neigh = neighOfINode(j); - - if (lclLWGraph.isLocalNeighborVertex(neigh) && - aggStat(neigh) == AGGREGATED) { - auto aggId = vertex2AggId(neigh, 0); - int score = aggWeight(aggId) - aggPenalties(aggId); - - if (score > bestScore) { - bestAggId = aggId; - bestScore = score; - bestConnect = connectWeight(neigh); - - } else if (aggId == bestAggId && - connectWeight(neigh) > bestConnect) { - bestConnect = connectWeight(neigh); - } - } - } - if (bestScore >= 0) { - aggStat(i) = AGGREGATED; - vertex2AggId(i, 0) = bestAggId; - procWinner(i, 0) = myRank; - - Kokkos::atomic_add(&aggPenalties(bestAggId), 1); - connectWeight(i) = bestConnect - penaltyConnectWeight; - tmpNumAggregated++; - } - }, numAggregated); //parallel_for - numNonAggregatedNodes -= numAggregated; - } - } // loop over maxIters - - } // BuildAggregatesRandom - - - - template - void AggregationPhase2bAlgorithm_kokkos:: - BuildAggregatesDeterministic(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - LO numLocalAggregates = aggregates.GetNumAggregates(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - const int defaultConnectWeight = 100; - const int penaltyConnectWeight = 10; - - Kokkos::View connectWeight ("connectWeight", numRows); - Kokkos::View aggWeight ("aggWeight", numLocalAggregates); - Kokkos::View aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates); - Kokkos::View aggPenalties ("aggPenalties", numLocalAggregates); - - Kokkos::deep_copy(connectWeight, defaultConnectWeight); - - // We do this cycle twice. - // I don't know why, but ML does it too - // taw: by running the aggregation routine more than once there is a chance that also - // non-aggregated nodes with a node distance of two are added to existing aggregates. - // Assuming that the aggregate size is 3 in each direction running the algorithm only twice - // should be sufficient. - int maxIters = 2; - int maxNodesPerAggregate = params.get("aggregation: max agg size"); - if(maxNodesPerAggregate == std::numeric_limits::max()) {maxIters = 1;} - for (int iter = 0; iter < maxIters; ++iter) { - for(LO color = 1; color <= numColors; color++) { - Kokkos::deep_copy(aggWeight, 0); - - //the reduce counts how many nodes are aggregated by this phase, - //which will then be subtracted from numNonAggregatedNodes - LO numAggregated = 0; - Kokkos::parallel_for("Aggregation Phase 2b: updating agg weights", + Kokkos::atomic_add(&aggPenalties(bestAggId), 1); + connectWeight(i) = bestConnect - penaltyConnectWeight; + tmpNumAggregated++; + } + }, + numAggregated); // parallel_for + numNonAggregatedNodes -= numAggregated; + } + } // loop over maxIters + +} // BuildAggregatesRandom + +template +void AggregationPhase2bAlgorithm_kokkos:: + BuildAggregatesDeterministic( + const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView( + Xpetra::Access::ReadWrite); + auto procWinner = + aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + LO numLocalAggregates = aggregates.GetNumAggregates(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + const int defaultConnectWeight = 100; + const int penaltyConnectWeight = 10; + + Kokkos::View connectWeight("connectWeight", numRows); + Kokkos::View aggWeight("aggWeight", numLocalAggregates); + Kokkos::View aggPenaltyUpdates("aggPenaltyUpdates", + numLocalAggregates); + Kokkos::View aggPenalties("aggPenalties", + numLocalAggregates); + + Kokkos::deep_copy(connectWeight, defaultConnectWeight); + + // We do this cycle twice. + // I don't know why, but ML does it too + // taw: by running the aggregation routine more than once there is a chance + // that also non-aggregated nodes with a node distance of two are added to + // existing aggregates. Assuming that the aggregate size is 3 in each + // direction running the algorithm only twice should be sufficient. + int maxIters = 2; + int maxNodesPerAggregate = params.get("aggregation: max agg size"); + if (maxNodesPerAggregate == std::numeric_limits::max()) { + maxIters = 1; + } + for (int iter = 0; iter < maxIters; ++iter) { + for (LO color = 1; color <= numColors; color++) { + Kokkos::deep_copy(aggWeight, 0); + + // the reduce counts how many nodes are aggregated by this phase, + // which will then be subtracted from numNonAggregatedNodes + LO numAggregated = 0; + Kokkos::parallel_for( + "Aggregation Phase 2b: updating agg weights", Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO i) - { + KOKKOS_LAMBDA(const LO i) { if (aggStat(i) != READY || colors(i) != color) return; auto neighOfINode = lclLWGraph.getNeighborVertices(i); @@ -242,19 +254,19 @@ namespace MueLu { // (aggStat[neigh] == AGGREGATED) if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == AGGREGATED) - Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), - connectWeight(neigh)); + Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)), + connectWeight(neigh)); } }); - Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion", + Kokkos::parallel_reduce( + "Aggregation Phase 2b: aggregates expansion", Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) - { + KOKKOS_LAMBDA(const LO i, LO &tmpNumAggregated) { if (aggStat(i) != READY || colors(i) != color) return; - int bestScore = -100000; - int bestAggId = -1; + int bestScore = -100000; + int bestAggId = -1; int bestConnect = -1; auto neighOfINode = lclLWGraph.getNeighborVertices(i); @@ -267,38 +279,39 @@ namespace MueLu { int score = aggWeight(aggId) - aggPenalties(aggId); if (score > bestScore) { - bestAggId = aggId; - bestScore = score; + bestAggId = aggId; + bestScore = score; bestConnect = connectWeight(neigh); } else if (aggId == bestAggId && - connectWeight(neigh) > bestConnect) { + connectWeight(neigh) > bestConnect) { bestConnect = connectWeight(neigh); } } } if (bestScore >= 0) { - aggStat(i) = AGGREGATED; + aggStat(i) = AGGREGATED; vertex2AggId(i, 0) = bestAggId; - procWinner(i, 0) = myRank; + procWinner(i, 0) = myRank; Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1); connectWeight(i) = bestConnect - penaltyConnectWeight; tmpNumAggregated++; } - }, numAggregated); //parallel_reduce + }, + numAggregated); // parallel_reduce - Kokkos::parallel_for("Aggregation Phase 2b: updating agg penalties", + Kokkos::parallel_for( + "Aggregation Phase 2b: updating agg penalties", Kokkos::RangePolicy(0, numLocalAggregates), - KOKKOS_LAMBDA (const LO agg) - { + KOKKOS_LAMBDA(const LO agg) { aggPenalties(agg) += aggPenaltyUpdates(agg); aggPenaltyUpdates(agg) = 0; }); - numNonAggregatedNodes -= numAggregated; - } - } // loop over k - } // BuildAggregatesDeterministic -} // end namespace + numNonAggregatedNodes -= numAggregated; + } + } // loop over k +} // BuildAggregatesDeterministic +} // namespace MueLu #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp index 20e5fc8a7222..b093a1f8b380 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_decl.hpp @@ -46,67 +46,68 @@ #ifndef MUELU_AGGREGATIONPHASE3ALGORITHM_DECL_HPP_ #define MUELU_AGGREGATIONPHASE3ALGORITHM_DECL_HPP_ -#include "MueLu_ConfigDefs.hpp" #include "MueLu_AggregationAlgorithmBase.hpp" #include "MueLu_AggregationPhase3Algorithm_fwd.hpp" +#include "MueLu_ConfigDefs.hpp" -#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class AggregationPhase3Algorithm class. - @brief Handle leftover nodes. Try to avoid singleton nodes - @ingroup Aggregation - - ### Idea ### - In phase 3 we try to stick unaggregated nodes into a neighboring aggregate. - We try to avoid singletons: we first try to build a new aggregate containing - all neighboring non-aggregated nodes. If we cannot build a new aggregate, - we add the non-aggregated node to the first adjacent aggregate. - Only if there is no adjacent aggregate, we create a singleton node aggregate. - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. - - */ - - template - class AggregationPhase3Algorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class AggregationPhase3Algorithm class. + @brief Handle leftover nodes. Try to avoid singleton nodes + @ingroup Aggregation + + ### Idea ### + In phase 3 we try to stick unaggregated nodes into a neighboring aggregate. + We try to avoid singletons: we first try to build a new aggregate containing + all neighboring non-aggregated nodes. If we cannot build a new aggregate, + we add the non-aggregated node to the first adjacent aggregate. + Only if there is no adjacent aggregate, we create a singleton node aggregate. + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. + +*/ + +template +class AggregationPhase3Algorithm + : public MueLu::AggregationAlgorithmBase { #undef MUELU_AGGREGATIONPHASE3ALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - AggregationPhase3Algorithm(const RCP& /* graphFact */ = Teuchos::null) { } +public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~AggregationPhase3Algorithm() { } + //! Constructor. + AggregationPhase3Algorithm( + const RCP & /* graphFact */ = Teuchos::null) {} - //@} + //! Destructor. + virtual ~AggregationPhase3Algorithm() {} + //@} - //! @name Aggregation methods. - //@{ + //! @name Aggregation methods. + //@{ - /*! @brief Local aggregation. */ + /*! @brief Local aggregation. */ - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + void BuildAggregates(const ParameterList ¶ms, const GraphBase &graph, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes) const; + //@} - std::string description() const { return "Phase 3 (cleanup)"; } - }; + std::string description() const { return "Phase 3 (cleanup)"; } +}; -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE3ALGORITHM_SHORT - #endif /* MUELU_AGGREGATIONPHASE3ALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp index ac462bcccf4b..b9d98f95fd2c 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_def.hpp @@ -60,150 +60,164 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big. Otherwise, make a new aggregate - template - void AggregationPhase3Algorithm::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big. Otherwise, make a new aggregate +template +void AggregationPhase3Algorithm:: + BuildAggregates(const ParameterList ¶ms, const GraphBase &graph, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + bool makeNonAdjAggs = false; + bool error_on_isolated = false; + if (params.isParameter( + "aggregation: error on nodes with no on-rank neighbors")) + error_on_isolated = params.get( + "aggregation: error on nodes with no on-rank neighbors"); + if (params.isParameter("aggregation: phase3 avoid singletons")) + makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); + + size_t numSingletons = 0; + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + + LO numLocalAggregates = aggregates.GetNumAggregates(); + + for (LO i = 0; i < numRows; i++) { + if (aggStat[i] == AGGREGATED || aggStat[i] == IGNORED) + continue; + + ArrayView neighOfINode = graph.getNeighborVertices(i); + + // We don't want a singleton. So lets see if there is an unaggregated + // neighbor that we can also put with this point. + bool isNewAggregate = false; + bool failedToAggregate = true; + for (int j = 0; j < neighOfINode.size(); j++) { + LO neigh = neighOfINode[j]; + + if (neigh != i && graph.isLocalNeighborVertex(neigh) && + aggStat[neigh] == READY) { + isNewAggregate = true; + + aggStat[neigh] = AGGREGATED; + vertex2AggId[neigh] = numLocalAggregates; + procWinner[neigh] = myRank; - bool makeNonAdjAggs = false; - bool error_on_isolated = false; - if(params.isParameter("aggregation: error on nodes with no on-rank neighbors")) - error_on_isolated = params.get("aggregation: error on nodes with no on-rank neighbors"); - if(params.isParameter("aggregation: phase3 avoid singletons")) - makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); - - size_t numSingletons=0; - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); - - LO numLocalAggregates = aggregates.GetNumAggregates(); - - for (LO i = 0; i < numRows; i++) { - if (aggStat[i] == AGGREGATED || aggStat[i] == IGNORED) - continue; - - ArrayView neighOfINode = graph.getNeighborVertices(i); + numNonAggregatedNodes--; + } + } - // We don't want a singleton. So lets see if there is an unaggregated - // neighbor that we can also put with this point. - bool isNewAggregate = false; - bool failedToAggregate = true; - for (int j = 0; j < neighOfINode.size(); j++) { + if (isNewAggregate) { + // Create new aggregate (not singleton) + aggStat[i] = AGGREGATED; + procWinner[i] = myRank; + numNonAggregatedNodes--; + aggregates.SetIsRoot(i); + vertex2AggId[i] = numLocalAggregates++; + + failedToAggregate = false; + } else { + // We do not want a singleton, but there are no non-aggregated + // neighbors. Lets see if we can connect to any other aggregates + // NOTE: This is very similar to phase 2b, but simplier: we stop with + // the first found aggregate + int j = 0; + for (; j < neighOfINode.size(); j++) { LO neigh = neighOfINode[j]; - if (neigh != i && graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { - isNewAggregate = true; - - aggStat [neigh] = AGGREGATED; - vertex2AggId[neigh] = numLocalAggregates; - procWinner [neigh] = myRank; - - numNonAggregatedNodes--; - } + // We don't check (neigh != rootCandidate), as it is covered by checking + // (aggStat[neigh] == AGGREGATED) + if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) + break; } - if (isNewAggregate) { - // Create new aggregate (not singleton) - aggStat [i] = AGGREGATED; - procWinner [i] = myRank; + if (j < neighOfINode.size()) { + // Assign to an adjacent aggregate + vertex2AggId[i] = vertex2AggId[neighOfINode[j]]; numNonAggregatedNodes--; - aggregates.SetIsRoot(i); - vertex2AggId[i] = numLocalAggregates++; - failedToAggregate = false; - } else { - // We do not want a singleton, but there are no non-aggregated - // neighbors. Lets see if we can connect to any other aggregates - // NOTE: This is very similar to phase 2b, but simplier: we stop with - // the first found aggregate - int j = 0; - for (; j < neighOfINode.size(); j++) { - LO neigh = neighOfINode[j]; - - // We don't check (neigh != rootCandidate), as it is covered by checking (aggStat[neigh] == AGGREGATED) - if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED) - break; - } - - if (j < neighOfINode.size()) { - // Assign to an adjacent aggregate - vertex2AggId[i] = vertex2AggId[neighOfINode[j]]; - numNonAggregatedNodes--; - failedToAggregate = false; - } } + } + + if (failedToAggregate && makeNonAdjAggs) { + // it we are still didn't find an aggregate home for i (i.e., we have + // a potential singleton), we are desperate. Basically, we seek to + // group i with any other local point to form an aggregate (even if + // it is not a neighbor of i. Either we find a vertex that is already + // aggregated or not aggregated. + // 1) if found vertex is aggregated, then assign i to this aggregate + // 2) if found vertex is not aggregated, create new aggregate + + for (LO ii = 0; ii < numRows; ii++) { // look for anyone else + if ((ii != i) && (aggStat[ii] != IGNORED)) { + failedToAggregate = false; // found someone so start + aggStat[i] = AGGREGATED; // marking i as aggregated + procWinner[i] = myRank; + + if (aggStat[ii] == AGGREGATED) + vertex2AggId[i] = vertex2AggId[ii]; + else { + vertex2AggId[i] = numLocalAggregates; + vertex2AggId[ii] = numLocalAggregates; + aggStat[ii] = AGGREGATED; + procWinner[ii] = myRank; + numNonAggregatedNodes--; // acounts for ii now being aggregated + aggregates.SetIsRoot(i); + numLocalAggregates++; + } + numNonAggregatedNodes--; // accounts for i now being aggregated + break; + } // if ( (ii != i) && (aggStat[ii] != IGNORED ... + } // for (LO ii = 0; ... + } + if (failedToAggregate) { + if (error_on_isolated) { + // Error on this isolated node, as the user has requested + std::ostringstream oss; + oss << "MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has " + "detected a non-Dirichlet node that has no on-rank neighbors " + "and is terminating (by user request). " + << std::endl; + oss << "If this error is being generated at level 0, this is due to an " + "initial partitioning problem in your matrix." + << std::endl; + oss << "If this error is being generated at any other level, try " + "turning on repartitioning, which may fix this problem." + << std::endl; + throw Exceptions::RuntimeError(oss.str()); + } else { + // Create new aggregate (singleton) + // this->GetOStream(Warnings1) << "Found singleton: " << i << + // std::endl; + numSingletons++; - if (failedToAggregate && makeNonAdjAggs) { - // it we are still didn't find an aggregate home for i (i.e., we have - // a potential singleton), we are desperate. Basically, we seek to - // group i with any other local point to form an aggregate (even if - // it is not a neighbor of i. Either we find a vertex that is already - // aggregated or not aggregated. - // 1) if found vertex is aggregated, then assign i to this aggregate - // 2) if found vertex is not aggregated, create new aggregate - - - for (LO ii = 0; ii < numRows; ii++) { // look for anyone else - if ( (ii != i) && (aggStat[ii] != IGNORED) ) { - failedToAggregate = false; // found someone so start - aggStat[i] = AGGREGATED; // marking i as aggregated - procWinner[i]= myRank; - - if (aggStat[ii] == AGGREGATED) - vertex2AggId[i] = vertex2AggId[ii]; - else { - vertex2AggId[i] = numLocalAggregates; - vertex2AggId[ii] = numLocalAggregates; - aggStat [ii] = AGGREGATED; - procWinner [ii] = myRank; - numNonAggregatedNodes--; // acounts for ii now being aggregated - aggregates.SetIsRoot(i); - numLocalAggregates++; - } - numNonAggregatedNodes--; // accounts for i now being aggregated - break; - } //if ( (ii != i) && (aggStat[ii] != IGNORED ... - } //for (LO ii = 0; ... - } - if (failedToAggregate) { - if (error_on_isolated) { - // Error on this isolated node, as the user has requested - std::ostringstream oss; - oss<<"MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). "<GetOStream(Warnings1) << "Found singleton: " << i << std::endl; - numSingletons++; - - aggregates.SetIsRoot(i); - vertex2AggId[i] = numLocalAggregates++; - numNonAggregatedNodes--; - } + aggregates.SetIsRoot(i); + vertex2AggId[i] = numLocalAggregates++; + numNonAggregatedNodes--; } + } - // One way or another, the node is aggregated (possibly into a singleton) - aggStat [i] = AGGREGATED; - procWinner[i] = myRank; - - } // loop over numRows - + // One way or another, the node is aggregated (possibly into a singleton) + aggStat[i] = AGGREGATED; + procWinner[i] = myRank; - if(numSingletons > 0) - this->GetOStream(Runtime0)<<" WARNING Rank "< 0) + this->GetOStream(Runtime0) + << " WARNING Rank " << myRank << " singletons :" << numSingletons + << " (phase)" << std::endl; - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); - } + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} -} // end namespace +} // namespace MueLu #endif /* MUELU_AGGREGATIONPHASE3ALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp index 9911ac016c5e..9b504cca449a 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_decl.hpp @@ -58,70 +58,69 @@ #include "MueLu_LWGraph_kokkos_fwd.hpp" namespace MueLu { - /*! - @class AggregationPhase3Algorithm class. - @brief Handle leftover nodes. Try to avoid singleton nodes - @ingroup Aggregation - - ### Idea ### - In phase 3 we try to stick unaggregated nodes into a neighboring aggregate. - We try to avoid singletons: we first try to build a new aggregate containing - all neighboring non-aggregated nodes. If we cannot build a new aggregate, - we add the non-aggregated node to the first adjacent aggregate. - Only if there is no adjacent aggregate, we create a singleton node aggregate. - - ### Comments ### - Only nodes with state READY are changed to AGGREGATED. - - */ - - template - class AggregationPhase3Algorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class AggregationPhase3Algorithm class. + @brief Handle leftover nodes. Try to avoid singleton nodes + @ingroup Aggregation + + ### Idea ### + In phase 3 we try to stick unaggregated nodes into a neighboring aggregate. + We try to avoid singletons: we first try to build a new aggregate containing + all neighboring non-aggregated nodes. If we cannot build a new aggregate, + we add the non-aggregated node to the first adjacent aggregate. + Only if there is no adjacent aggregate, we create a singleton node aggregate. + + ### Comments ### + Only nodes with state READY are changed to AGGREGATED. + +*/ + +template +class AggregationPhase3Algorithm_kokkos + : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; +public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregationPhase3Algorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + AggregationPhase3Algorithm_kokkos( + const RCP & /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~AggregationPhase3Algorithm_kokkos() { } + //! Destructor. + virtual ~AggregationPhase3Algorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; - void BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; + void BuildAggregatesRandom(const ParameterList ¶ms, + const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; + //@} - void BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase 3 (cleanup)"; } +}; - std::string description() const { return "Phase 3 (cleanup)"; } - }; - -} //namespace MueLu +} // namespace MueLu #define MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_SHORT #endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp index 4c32dcfefc37..7eef791dfb79 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_AggregationPhase3Algorithm_kokkos_def.hpp @@ -62,171 +62,187 @@ namespace MueLu { - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big. Otherwise, make a new aggregate - template - void AggregationPhase3Algorithm_kokkos:: - BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - // So far we only have the non-deterministic version of the algorithm... - if(params.get("aggregation: deterministic")) { - Monitor m(*this, "BuildAggregatesDeterministic"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } else { - Monitor m(*this, "BuildAggregatesRandom"); - BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes); - } - +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big. Otherwise, make a new aggregate +template +void AggregationPhase3Algorithm_kokkos:: + BuildAggregates( + const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + + // So far we only have the non-deterministic version of the algorithm... + if (params.get("aggregation: deterministic")) { + Monitor m(*this, "BuildAggregatesDeterministic"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, + numNonAggregatedNodes); + } else { + Monitor m(*this, "BuildAggregatesRandom"); + BuildAggregatesRandom(params, graph, aggregates, aggStat, + numNonAggregatedNodes); } - - // Try to stick unaggregated nodes into a neighboring aggregate if they are - // not already too big. Otherwise, make a new aggregate - template - void AggregationPhase3Algorithm_kokkos:: - BuildAggregatesRandom(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - - bool error_on_isolated = params.get("aggregation: error on nodes with no on-rank neighbors"); - bool makeNonAdjAggs = params.get("aggregation: phase3 avoid singletons"); - - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto colors = aggregates.GetGraphColors(); - const LO numColors = aggregates.GetGraphNumColors(); - - auto lclLWGraph = graph.getLocalLWGraph(); - - Kokkos::View numAggregates("numAggregates"); - Kokkos::deep_copy(numAggregates, aggregates.GetNumAggregates()); - - Kokkos::View aggStatOld("Initial aggregation status", aggStat.extent(0)); +} + +// Try to stick unaggregated nodes into a neighboring aggregate if they are +// not already too big. Otherwise, make a new aggregate +template +void AggregationPhase3Algorithm_kokkos:: + BuildAggregatesRandom( + const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + + bool error_on_isolated = + params.get("aggregation: error on nodes with no on-rank neighbors"); + bool makeNonAdjAggs = + params.get("aggregation: phase3 avoid singletons"); + + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView( + Xpetra::Access::ReadWrite); + auto procWinner = + aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); + auto colors = aggregates.GetGraphColors(); + const LO numColors = aggregates.GetGraphNumColors(); + + auto lclLWGraph = graph.getLocalLWGraph(); + + Kokkos::View numAggregates("numAggregates"); + Kokkos::deep_copy(numAggregates, aggregates.GetNumAggregates()); + + Kokkos::View aggStatOld("Initial aggregation status", + aggStat.extent(0)); + Kokkos::deep_copy(aggStatOld, aggStat); + Kokkos::View numNonAggregated("numNonAggregated"); + Kokkos::deep_copy(numNonAggregated, numNonAggregatedNodes); + for (int color = 1; color < numColors + 1; ++color) { + Kokkos::parallel_for( + "Aggregation Phase 3: aggregates clean-up", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const LO nodeIdx) { + // Check if node has already been treated? + if ((colors(nodeIdx) != color) || + (aggStatOld(nodeIdx) == AGGREGATED) || + (aggStatOld(nodeIdx) == IGNORED)) { + return; + } + + // Grab node neighbors + auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); + LO neighIdx; + + // We don't want a singleton. + // So lets see if any neighbors can be used to form a new aggregate? + bool isNewAggregate = false; + for (int neigh = 0; neigh < neighbors.length; ++neigh) { + neighIdx = neighbors(neigh); + + if ((neighIdx != nodeIdx) && + lclLWGraph.isLocalNeighborVertex(neighIdx) && + (aggStatOld(neighIdx) == READY)) { + isNewAggregate = true; + break; + } + } + + // We can form a new non singleton aggregate! + if (isNewAggregate) { + // If this is the aggregate root + // we need to process the nodes in the aggregate + const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = aggId; + // aggregates.SetIsRoot(nodeIdx); + Kokkos::atomic_decrement(&numNonAggregated()); + for (int neigh = 0; neigh < neighbors.length; ++neigh) { + neighIdx = neighbors(neigh); + if ((neighIdx != nodeIdx) && + lclLWGraph.isLocalNeighborVertex(neighIdx) && + (aggStatOld(neighIdx) == READY)) { + aggStat(neighIdx) = AGGREGATED; + procWinner(neighIdx, 0) = myRank; + vertex2AggId(neighIdx, 0) = aggId; + Kokkos::atomic_decrement(&numNonAggregated()); + } + } + return; + } + + // Getting a little desperate! + // Let us try to aggregate into a neighboring aggregate + for (int neigh = 0; neigh < neighbors.length; ++neigh) { + neighIdx = neighbors(neigh); + if (lclLWGraph.isLocalNeighborVertex(neighIdx) && + (aggStatOld(neighIdx) == AGGREGATED)) { + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = vertex2AggId(neighIdx, 0); + Kokkos::atomic_decrement(&numNonAggregated()); + return; + } + } + + // Getting quite desperate! + // Let us try to make a non contiguous aggregate + if (makeNonAdjAggs) { + for (LO otherNodeIdx = 0; otherNodeIdx < numRows; ++otherNodeIdx) { + if ((otherNodeIdx != nodeIdx) && + (aggStatOld(otherNodeIdx) == AGGREGATED)) { + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = vertex2AggId(otherNodeIdx, 0); + Kokkos::atomic_decrement(&numNonAggregated()); + return; + } + } + } + + // Total deperation! + // Let us make a singleton + if (!error_on_isolated) { + const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); + aggStat(nodeIdx) = AGGREGATED; + procWinner(nodeIdx, 0) = myRank; + vertex2AggId(nodeIdx, 0) = aggId; + Kokkos::atomic_decrement(&numNonAggregated()); + } + }); + // LBV on 09/27/19: here we could copy numNonAggregated to host + // and check for it to be equal to 0 in which case we can stop + // looping over the different colors... Kokkos::deep_copy(aggStatOld, aggStat); - Kokkos::View numNonAggregated("numNonAggregated"); - Kokkos::deep_copy(numNonAggregated, numNonAggregatedNodes); - for(int color = 1; color < numColors + 1; ++color) { - Kokkos::parallel_for("Aggregation Phase 3: aggregates clean-up", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const LO nodeIdx) { - // Check if node has already been treated? - if( (colors(nodeIdx) != color) || - (aggStatOld(nodeIdx) == AGGREGATED) || - (aggStatOld(nodeIdx) == IGNORED) ){ return; } - - // Grab node neighbors - auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx); - LO neighIdx; - - // We don't want a singleton. - // So lets see if any neighbors can be used to form a new aggregate? - bool isNewAggregate = false; - for(int neigh = 0; neigh < neighbors.length; ++neigh) { - neighIdx = neighbors(neigh); - - if((neighIdx != nodeIdx) && - lclLWGraph.isLocalNeighborVertex(neighIdx) && - (aggStatOld(neighIdx) == READY)) { - isNewAggregate = true; - break; - } - } - - // We can form a new non singleton aggregate! - if(isNewAggregate) { - // If this is the aggregate root - // we need to process the nodes in the aggregate - const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = aggId; - // aggregates.SetIsRoot(nodeIdx); - Kokkos::atomic_decrement(&numNonAggregated()); - for(int neigh = 0; neigh < neighbors.length; ++neigh) { - neighIdx = neighbors(neigh); - if((neighIdx != nodeIdx) && - lclLWGraph.isLocalNeighborVertex(neighIdx) && - (aggStatOld(neighIdx) == READY)) { - aggStat(neighIdx) = AGGREGATED; - procWinner(neighIdx, 0) = myRank; - vertex2AggId(neighIdx, 0) = aggId; - Kokkos::atomic_decrement(&numNonAggregated()); - } - } - return; - } - - // Getting a little desperate! - // Let us try to aggregate into a neighboring aggregate - for(int neigh = 0; neigh < neighbors.length; ++neigh) { - neighIdx = neighbors(neigh); - if (lclLWGraph.isLocalNeighborVertex(neighIdx) && - (aggStatOld(neighIdx) == AGGREGATED)) { - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = vertex2AggId(neighIdx, 0); - Kokkos::atomic_decrement(&numNonAggregated()); - return; - } - } - - // Getting quite desperate! - // Let us try to make a non contiguous aggregate - if(makeNonAdjAggs) { - for(LO otherNodeIdx = 0; otherNodeIdx < numRows; ++otherNodeIdx) { - if((otherNodeIdx != nodeIdx) && - (aggStatOld(otherNodeIdx) == AGGREGATED)) { - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = vertex2AggId(otherNodeIdx, 0); - Kokkos::atomic_decrement(&numNonAggregated()); - return; - } - } - } - - // Total deperation! - // Let us make a singleton - if(!error_on_isolated) { - const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1); - aggStat(nodeIdx) = AGGREGATED; - procWinner(nodeIdx, 0) = myRank; - vertex2AggId(nodeIdx, 0) = aggId; - Kokkos::atomic_decrement(&numNonAggregated()); - } - }); - // LBV on 09/27/19: here we could copy numNonAggregated to host - // and check for it to be equal to 0 in which case we can stop - // looping over the different colors... - Kokkos::deep_copy(aggStatOld, aggStat); - } // loop over colors - - auto numNonAggregated_h = Kokkos::create_mirror_view(numNonAggregated); - Kokkos::deep_copy(numNonAggregated_h, numNonAggregated); - numNonAggregatedNodes = numNonAggregated_h(); - if( (error_on_isolated) && (numNonAggregatedNodes > 0) ) { - // Error on this isolated node, as the user has requested - std::ostringstream oss; - oss<<"MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). "< 0)) { + // Error on this isolated node, as the user has requested + std::ostringstream oss; + oss << "MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has " + "detected a non-Dirichlet node that has no on-rank neighbors and is " + "terminating (by user request). " + << std::endl; + oss << "If this error is being generated at level 0, this is due to an " + "initial partitioning problem in your matrix." + << std::endl; + oss << "If this error is being generated at any other level, try turning " + "on repartitioning, which may fix this problem." + << std::endl; + throw Exceptions::RuntimeError(oss.str()); } -} // end namespace + // update aggregate object + auto numAggregates_h = Kokkos::create_mirror_view(numAggregates); + Kokkos::deep_copy(numAggregates_h, numAggregates); + aggregates.SetNumAggregates(numAggregates_h()); +} + +} // namespace MueLu #endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp index fb3425d5058c..affad495896d 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_decl.hpp @@ -53,64 +53,67 @@ #ifndef MUELU_INTERFACEAGGREGATIONALGORITHM_DECL_HPP_ #define MUELU_INTERFACEAGGREGATIONALGORITHM_DECL_HPP_ -#include "MueLu_ConfigDefs.hpp" #include "MueLu_AggregationAlgorithmBase.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_InterfaceAggregationAlgorithm_fwd.hpp" -#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_FactoryBase_fwd.hpp" //#include "MueLu_Graph_fwd.hpp" #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class InterfaceAggregationAlgorithm class. - @brief Algorithm for coarsening a graph with uncoupled aggregation. - creates aggregates along an interface using specified root nodes. - - @ingroup Aggregation - - ### Idea ### - The user can mark some nodes as INTERFACE to build aggregates across an interface. - This can be very useful for certain applications. We build aggregates for nodes with - the state INTERFACE. Then, the state is changed to AGGREGATED. - The InterfaceAggregationAlgorithm should run before the Phase1AggregationAlgorithm. - - */ - - template - class InterfaceAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class InterfaceAggregationAlgorithm class. + @brief Algorithm for coarsening a graph with uncoupled aggregation. + creates aggregates along an interface using specified root nodes. + + @ingroup Aggregation + + ### Idea ### + The user can mark some nodes as INTERFACE to build aggregates across an + interface. This can be very useful for certain applications. We build + aggregates for nodes with the state INTERFACE. Then, the state is changed to + AGGREGATED. The InterfaceAggregationAlgorithm should run before the + Phase1AggregationAlgorithm. + +*/ + +template +class InterfaceAggregationAlgorithm + : public MueLu::AggregationAlgorithmBase { #undef MUELU_INTERFACEAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - InterfaceAggregationAlgorithm(RCP const &graphFact = Teuchos::null); - - //! Destructor. - virtual ~InterfaceAggregationAlgorithm() { } +public: + //! @name Constructors/Destructors. + //@{ - //@} + //! Constructor. + InterfaceAggregationAlgorithm( + RCP const &graphFact = Teuchos::null); + //! Destructor. + virtual ~InterfaceAggregationAlgorithm() {} - //! @name Aggregation methods. - //@{ + //@} - /*! @brief Local aggregation. */ + //! @name Aggregation methods. + //@{ - void BuildAggregates(Teuchos::ParameterList const & params, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + /*! @brief Local aggregation. */ + void BuildAggregates(Teuchos::ParameterList const ¶ms, + GraphBase const &graph, Aggregates &aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes) const; + //@} - }; //class InterfaceAggregationAlgorithm +}; // class InterfaceAggregationAlgorithm -} //namespace MueLu +} // namespace MueLu #define MUELU_INTERFACEAGGREGATIONALGORITHM_SHORT #endif /* MUELU_INTERFACEAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp index 22dd58e56fad..f613192d4706 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_InterfaceAggregationAlgorithm_def.hpp @@ -61,57 +61,66 @@ #include "MueLu_InterfaceAggregationAlgorithm_decl.hpp" //#include "MueLu_Graph.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Aggregates.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { template -InterfaceAggregationAlgorithm::InterfaceAggregationAlgorithm(RCP const &/* graphFact */) -{ -} +InterfaceAggregationAlgorithm:: + InterfaceAggregationAlgorithm( + RCP const & /* graphFact */) {} template -void InterfaceAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const & /* params */, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { +void InterfaceAggregationAlgorithm:: + BuildAggregates(Teuchos::ParameterList const & /* params */, + GraphBase const &graph, Aggregates &aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes) const { Monitor m(*this, "BuildAggregates"); const LocalOrdinal nRows = graph.GetNodeNumVertices(); const int myRank = graph.GetComm()->getRank(); // vertex ids for output - Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + Teuchos::ArrayRCP vertex2AggId = + aggregates.GetVertex2AggId()->getDataNonConst(0); + Teuchos::ArrayRCP procWinner = + aggregates.GetProcWinner()->getDataNonConst(0); // some internal variables - LocalOrdinal numLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc + LocalOrdinal numLocalAggregates = + aggregates + .GetNumAggregates(); // number of local aggregates on current proc // main loop over all local rows of graph(A) - for(int iNode1 = 0; iNode1 < nRows; ++iNode1) { + for (int iNode1 = 0; iNode1 < nRows; ++iNode1) { if (aggStat[iNode1] == INTERFACE) { - aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'agg' + aggregates.SetIsRoot( + iNode1); // mark iNode1 as root node for new aggregate 'agg' int aggIndex = numLocalAggregates; std::vector aggList; aggList.push_back(iNode1); ArrayView neighOfINode = graph.getNeighborVertices(iNode1); - for(int j = 0; j < neighOfINode.size(); ++j) { + for (int j = 0; j < neighOfINode.size(); ++j) { LO neigh = neighOfINode[j]; - if(neigh != iNode1 && graph.isLocalNeighborVertex(neigh)) { - if(aggStat[neigh] != AGGREGATED && aggStat[neigh] != INTERFACE && - aggStat[neigh] != IGNORED) { + if (neigh != iNode1 && graph.isLocalNeighborVertex(neigh)) { + if (aggStat[neigh] != AGGREGATED && aggStat[neigh] != INTERFACE && + aggStat[neigh] != IGNORED) { aggList.push_back(neigh); } } } for (size_t k = 0; k < aggList.size(); k++) { - aggStat[aggList[k]] = AGGREGATED; + aggStat[aggList[k]] = AGGREGATED; vertex2AggId[aggList[k]] = aggIndex; - procWinner[aggList[k]] = myRank; + procWinner[aggList[k]] = myRank; } ++numLocalAggregates; numNonAggregatedNodes -= aggList.size(); @@ -123,7 +132,6 @@ void InterfaceAggregationAlgorithm::BuildAggr aggregates.SetNumAggregates(numLocalAggregates); } -} // end namespace - +} // namespace MueLu #endif /* MUELU_INTERFACEAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp index 47ebb8038952..071f87898e36 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp @@ -47,71 +47,74 @@ #ifndef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DECL_HPP_ #define MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DECL_HPP_ -#include "MueLu_ConfigDefs.hpp" #include "MueLu_AggregationAlgorithmBase.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_IsolatedNodeAggregationAlgorithm_fwd.hpp" -#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_FactoryBase_fwd.hpp" //#include "MueLu_Graph_fwd.hpp" #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class IsolatedNodeAggregationAlgorithm class. - @brief Ignores isolated nodes during aggregation. Marks the node to be "aggregated" without adding real aggregates for them. - - @ingroup Aggregation - - ### Idea ### - The isolated node aggregation algorithm loops over all non-aggregated nodes - (with a state different than aggregated or ignored) which have only themselves - as neighbor node. The state of these "isolated" nodes is then set to ignored such - that they are not considered in the aggregation. This aggregation algorithm should - run as one of the last aggregation algorithms in the aggregation method. - - ### Comments ### - Only nodes with state different than READY or AGGREGATED are changed to IGNORED. - After that, all nodes should have the state AGGREGATED or IGNORED. - - */ - - template - class IsolatedNodeAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class IsolatedNodeAggregationAlgorithm class. + @brief Ignores isolated nodes during aggregation. Marks the node to be + "aggregated" without adding real aggregates for them. + + @ingroup Aggregation + + ### Idea ### + The isolated node aggregation algorithm loops over all non-aggregated nodes + (with a state different than aggregated or ignored) which have only themselves + as neighbor node. The state of these "isolated" nodes is then set to ignored + such that they are not considered in the aggregation. This aggregation + algorithm should run as one of the last aggregation algorithms in the + aggregation method. + + ### Comments ### + Only nodes with state different than READY or AGGREGATED are changed to + IGNORED. After that, all nodes should have the state AGGREGATED or IGNORED. + +*/ + +template +class IsolatedNodeAggregationAlgorithm + : public MueLu::AggregationAlgorithmBase { #undef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - IsolatedNodeAggregationAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } +public: + //! @name Constructors/Destructors. + //@{ - //! Destructor. - virtual ~IsolatedNodeAggregationAlgorithm() { } + //! Constructor. + IsolatedNodeAggregationAlgorithm( + const RCP & /* graphFact */ = Teuchos::null) {} - //@} + //! Destructor. + virtual ~IsolatedNodeAggregationAlgorithm() {} + //@} - //! @name Aggregation methods. - //@{ + //! @name Aggregation methods. + //@{ - /*! @brief Local aggregation. */ + /*! @brief Local aggregation. */ - void BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + void BuildAggregates(const ParameterList ¶ms, const GraphBase &graph, + Aggregates &aggregates, std::vector &aggStat, + LO &numNonAggregatedNodes) const; + //@} - std::string description() const { return "Phase - (isolated)"; } + std::string description() const { return "Phase - (isolated)"; } - }; //class MaxLinkAggregationAlgorithm +}; // class MaxLinkAggregationAlgorithm -} //namespace MueLu +} // namespace MueLu #define MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_SHORT - #endif /* MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp index 77147d7990a8..7f71f42819c3 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_def.hpp @@ -53,7 +53,6 @@ #ifndef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DEF_HPP_ #define MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DEF_HPP_ - #include #include @@ -61,27 +60,32 @@ #include "MueLu_IsolatedNodeAggregationAlgorithm_decl.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Aggregates.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - void IsolatedNodeAggregationAlgorithm::BuildAggregates(const ParameterList& /* params */, const GraphBase& graph, Aggregates& /* aggregates */, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +template +void IsolatedNodeAggregationAlgorithm:: + BuildAggregates(const ParameterList & /* params */, const GraphBase &graph, + Aggregates & /* aggregates */, + std::vector &aggStat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - const LO numRows = graph.GetNodeNumVertices(); + const LO numRows = graph.GetNodeNumVertices(); - // Remove all isolated nodes - for (LO i = 0; i < numRows; i++) - if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && graph.getNeighborVertices(i).size() == 1) { - aggStat[i] = IGNORED; - numNonAggregatedNodes--; - } - } + // Remove all isolated nodes + for (LO i = 0; i < numRows; i++) + if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && + graph.getNeighborVertices(i).size() == 1) { + aggStat[i] = IGNORED; + numNonAggregatedNodes--; + } +} -} // end namespace +} // namespace MueLu #endif /* MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp index c5bbb6e9c4b9..b339b6500a51 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp @@ -58,65 +58,66 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class IsolatedNodeAggregationAlgorithm class. - @brief Ignores isolated nodes during aggregation. Marks the node to be "aggregated" without adding real aggregates for them. - - @ingroup Aggregation - - ### Idea ### - The isolated node aggregation algorithm loops over all non-aggregated nodes - (with a state different than aggregated or ignored) which have only themselves - as neighbor node. The state of these "isolated" nodes is then set to ignored such - that they are not considered in the aggregation. This aggregation algorithm should - run as one of the last aggregation algorithms in the aggregation method. - - ### Comments ### - Only nodes with state different than READY or AGGREGATED are changed to IGNORED. - After that, all nodes should have the state AGGREGATED or IGNORED. - - */ - - template - class IsolatedNodeAggregationAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class IsolatedNodeAggregationAlgorithm class. + @brief Ignores isolated nodes during aggregation. Marks the node to be + "aggregated" without adding real aggregates for them. + + @ingroup Aggregation + + ### Idea ### + The isolated node aggregation algorithm loops over all non-aggregated nodes + (with a state different than aggregated or ignored) which have only themselves + as neighbor node. The state of these "isolated" nodes is then set to ignored + such that they are not considered in the aggregation. This aggregation + algorithm should run as one of the last aggregation algorithms in the + aggregation method. + + ### Comments ### + Only nodes with state different than READY or AGGREGATED are changed to + IGNORED. After that, all nodes should have the state AGGREGATED or IGNORED. + +*/ + +template +class IsolatedNodeAggregationAlgorithm_kokkos + : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ +public: + using device_type = typename LWGraph_kokkos::device_type; + using memory_space = typename LWGraph_kokkos::memory_space; + //! @name Constructors/Destructors. + //@{ - //! Constructor. - IsolatedNodeAggregationAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + IsolatedNodeAggregationAlgorithm_kokkos( + const RCP & /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~IsolatedNodeAggregationAlgorithm_kokkos() { } + //! Destructor. + virtual ~IsolatedNodeAggregationAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const ParameterList ¶ms, const LWGraph_kokkos &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; + //@} - void BuildAggregates(const ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase - (isolated)"; } - std::string description() const { return "Phase - (isolated)"; } +}; // class MaxLinkAggregationAlgorithm - }; //class MaxLinkAggregationAlgorithm - -} //namespace MueLu +} // namespace MueLu #define MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_SHORT #endif // MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp index 96ff102a447a..ec82ed0ec684 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_IsolatedNodeAggregationAlgorithm_kokkos_def.hpp @@ -53,48 +53,50 @@ #include "MueLu_IsolatedNodeAggregationAlgorithm_kokkos_decl.hpp" -#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Aggregates.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - void IsolatedNodeAggregationAlgorithm_kokkos:: - BuildAggregates(const ParameterList& /* params */, - const LWGraph_kokkos& graph, - Aggregates& /* aggregates */, - Kokkos::View& aggstat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - typename Kokkos::View::HostMirror aggstatHost - = Kokkos::create_mirror(aggstat); - Kokkos::deep_copy(aggstatHost, aggstat); - std::vector aggStat; - aggStat.resize(aggstatHost.extent(0)); - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggStat[idx] = aggstatHost(idx); - } +template +void IsolatedNodeAggregationAlgorithm_kokkos:: + BuildAggregates( + const ParameterList & /* params */, const LWGraph_kokkos &graph, + Aggregates & /* aggregates */, + Kokkos::View &aggstat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - auto lclLWGraph = graph.getLocalLWGraph(); + typename Kokkos::View::HostMirror aggstatHost = + Kokkos::create_mirror(aggstat); + Kokkos::deep_copy(aggstatHost, aggstat); + std::vector aggStat; + aggStat.resize(aggstatHost.extent(0)); + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggStat[idx] = aggstatHost(idx); + } - const LO numRows = graph.GetNodeNumVertices(); + auto lclLWGraph = graph.getLocalLWGraph(); - // Remove all isolated nodes - for (LO i = 0; i < numRows; i++) - if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && lclLWGraph.getNeighborVertices(i).length == 1) { - aggStat[i] = IGNORED; - numNonAggregatedNodes--; - } + const LO numRows = graph.GetNodeNumVertices(); - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggstatHost(idx) = aggStat[idx]; + // Remove all isolated nodes + for (LO i = 0; i < numRows; i++) + if (aggStat[i] != AGGREGATED && aggStat[i] != IGNORED && + lclLWGraph.getNeighborVertices(i).length == 1) { + aggStat[i] = IGNORED; + numNonAggregatedNodes--; } - Kokkos::deep_copy(aggstat, aggstatHost); + + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggstatHost(idx) = aggStat[idx]; } + Kokkos::deep_copy(aggstat, aggstatHost); +} -} // end namespace +} // namespace MueLu #endif // MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp index 6a77eb0d4a29..1dc2bd7856ab 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_decl.hpp @@ -53,67 +53,71 @@ #ifndef MUELU_ONEPTAGGREGATIONALGORITHM_DECL_HPP_ #define MUELU_ONEPTAGGREGATIONALGORITHM_DECL_HPP_ -#include "MueLu_ConfigDefs.hpp" #include "MueLu_AggregationAlgorithmBase.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_OnePtAggregationAlgorithm_fwd.hpp" -#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_FactoryBase_fwd.hpp" //#include "MueLu_Graph_fwd.hpp" #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class OnePtAggregationAlgorithm class. - @brief Algorithm for coarsening a graph with uncoupled aggregation. - keep special marked nodes as singleton node aggregates over all multigrid levels - - @ingroup Aggregation - - ### Idea ### - The user can mark some nodes as ONEPT to build some single node aggregates. - This can be very useful for certain applications. We build single node aggregates - for nodes with the state ONEPT. Then, the state is changed to ignored. - The OnePtAggregationAlgorithm should run before the Phase1AggregationAlgorithm. - - ### Comments ### - Only nodes with state ONEPT are changed to IGNORED. - - */ - - template - class OnePtAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class OnePtAggregationAlgorithm class. + @brief Algorithm for coarsening a graph with uncoupled aggregation. + keep special marked nodes as singleton node aggregates over all multigrid + levels + + @ingroup Aggregation + + ### Idea ### + The user can mark some nodes as ONEPT to build some single node aggregates. + This can be very useful for certain applications. We build single node + aggregates for nodes with the state ONEPT. Then, the state is changed to + ignored. The OnePtAggregationAlgorithm should run before the + Phase1AggregationAlgorithm. + + ### Comments ### + Only nodes with state ONEPT are changed to IGNORED. + +*/ + +template +class OnePtAggregationAlgorithm + : public MueLu::AggregationAlgorithmBase { #undef MUELU_ONEPTAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - OnePtAggregationAlgorithm(RCP const &graphFact = Teuchos::null); - - //! Destructor. - virtual ~OnePtAggregationAlgorithm() { } +public: + //! @name Constructors/Destructors. + //@{ - //@} + //! Constructor. + OnePtAggregationAlgorithm( + RCP const &graphFact = Teuchos::null); + //! Destructor. + virtual ~OnePtAggregationAlgorithm() {} - //! @name Aggregation methods. - //@{ + //@} - /*! @brief Local aggregation. */ + //! @name Aggregation methods. + //@{ - void BuildAggregates(Teuchos::ParameterList const & params, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + /*! @brief Local aggregation. */ + void BuildAggregates(Teuchos::ParameterList const ¶ms, + GraphBase const &graph, Aggregates &aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes) const; + //@} - }; //class OnePtAggregationAlgorithm +}; // class OnePtAggregationAlgorithm -} //namespace MueLu +} // namespace MueLu #define MUELU_ONEPTAGGREGATIONALGORITHM_SHORT #endif /* MUELU_ONEPTAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp index 687778c05654..b107a970e458 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_def.hpp @@ -61,39 +61,47 @@ #include "MueLu_OnePtAggregationAlgorithm_decl.hpp" //#include "MueLu_Graph.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Aggregates.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { template -OnePtAggregationAlgorithm::OnePtAggregationAlgorithm(RCP const &/* graphFact */) -{ -} +OnePtAggregationAlgorithm:: + OnePtAggregationAlgorithm(RCP const & /* graphFact */) {} template -void OnePtAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const & /* params */, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { +void OnePtAggregationAlgorithm:: + BuildAggregates(Teuchos::ParameterList const & /* params */, + GraphBase const &graph, Aggregates &aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes) const { Monitor m(*this, "BuildAggregates"); const LocalOrdinal nRows = graph.GetNodeNumVertices(); const int myRank = graph.GetComm()->getRank(); // vertex ids for output - Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); + Teuchos::ArrayRCP vertex2AggId = + aggregates.GetVertex2AggId()->getDataNonConst(0); + Teuchos::ArrayRCP procWinner = + aggregates.GetProcWinner()->getDataNonConst(0); // some internal variables - LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc - LocalOrdinal iNode1 = 0; // current node + LocalOrdinal nLocalAggregates = + aggregates + .GetNumAggregates(); // number of local aggregates on current proc + LocalOrdinal iNode1 = 0; // current node // main loop over all local rows of graph(A) while (iNode1 < nRows) { if (aggStat[iNode1] == ONEPT) { - aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' + aggregates.SetIsRoot( + iNode1); // mark iNode1 as root node for new aggregate 'ag' std::vector aggList; aggList.push_back(iNode1); int aggIndex = nLocalAggregates++; @@ -113,7 +121,6 @@ void OnePtAggregationAlgorithm::BuildAggregat aggregates.SetNumAggregates(nLocalAggregates); } -} // end namespace - +} // namespace MueLu #endif /* MUELU_ONEPTAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp index 1cae818205c2..ba2dbc8495b1 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp @@ -58,63 +58,63 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class OnePtAggregationAlgorithm class. - @brief Algorithm for coarsening a graph with uncoupled aggregation. - keep special marked nodes as singleton node aggregates over all multigrid levels - - @ingroup Aggregation - - ### Idea ### - The user can mark some nodes as ONEPT to build some single node aggregates. - This can be very useful for certain applications. We build single node aggregates - for nodes with the state ONEPT. Then, the state is changed to ignored. - The OnePtAggregationAlgorithm should run before the Phase1AggregationAlgorithm. - - ### Comments ### - Only nodes with state ONEPT are changed to IGNORED. - - */ - - template - class OnePtAggregationAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class OnePtAggregationAlgorithm class. + @brief Algorithm for coarsening a graph with uncoupled aggregation. + keep special marked nodes as singleton node aggregates over all multigrid + levels + + @ingroup Aggregation + + ### Idea ### + The user can mark some nodes as ONEPT to build some single node aggregates. + This can be very useful for certain applications. We build single node + aggregates for nodes with the state ONEPT. Then, the state is changed to + ignored. The OnePtAggregationAlgorithm should run before the + Phase1AggregationAlgorithm. + + ### Comments ### + Only nodes with state ONEPT are changed to IGNORED. + +*/ + +template +class OnePtAggregationAlgorithm_kokkos + : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - OnePtAggregationAlgorithm_kokkos(RCP const &graphFact = Teuchos::null); - - //! Destructor. - virtual ~OnePtAggregationAlgorithm_kokkos() { } +public: + using device_type = typename LWGraph_kokkos::device_type; + using memory_space = typename LWGraph_kokkos::memory_space; + //! @name Constructors/Destructors. + //@{ - //@} + //! Constructor. + OnePtAggregationAlgorithm_kokkos( + RCP const &graphFact = Teuchos::null); + //! Destructor. + virtual ~OnePtAggregationAlgorithm_kokkos() {} - //! @name Aggregation methods. - //@{ + //@} - /*! @brief Local aggregation. */ + //! @name Aggregation methods. + //@{ - void BuildAggregates(Teuchos::ParameterList const & params, - LWGraph_kokkos const & graph, - Aggregates & aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + /*! @brief Local aggregation. */ + void BuildAggregates(Teuchos::ParameterList const ¶ms, + LWGraph_kokkos const &graph, Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; + //@} - }; //class OnePtAggregationAlgorithm_kokkos +}; // class OnePtAggregationAlgorithm_kokkos -} //namespace MueLu +} // namespace MueLu #define MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_SHORT #endif // MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp index 82dd4881b96c..99f5d08bab66 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_OnePtAggregationAlgorithm_kokkos_def.hpp @@ -53,78 +53,83 @@ #include "MueLu_OnePtAggregationAlgorithm_kokkos_decl.hpp" -#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Aggregates.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - OnePtAggregationAlgorithm_kokkos::OnePtAggregationAlgorithm_kokkos(RCP const &/* graphFact */) - { +template +OnePtAggregationAlgorithm_kokkos:: + OnePtAggregationAlgorithm_kokkos( + RCP const & /* graphFact */) {} + +template +void OnePtAggregationAlgorithm_kokkos:: + BuildAggregates( + Teuchos::ParameterList const & /* params */, + LWGraph_kokkos const &graph, Aggregates &aggregates, + Kokkos::View &aggstat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + + typename Kokkos::View::HostMirror aggstatHost = + Kokkos::create_mirror(aggstat); + Kokkos::deep_copy(aggstatHost, aggstat); + std::vector aggStat; + aggStat.resize(aggstatHost.extent(0)); + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggStat[idx] = aggstatHost(idx); } - template - void OnePtAggregationAlgorithm_kokkos:: - BuildAggregates(Teuchos::ParameterList const & /* params */, - LWGraph_kokkos const & graph, - Aggregates & aggregates, - Kokkos::View& aggstat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - - typename Kokkos::View::HostMirror aggstatHost - = Kokkos::create_mirror(aggstat); - Kokkos::deep_copy(aggstatHost, aggstat); - std::vector aggStat; - aggStat.resize(aggstatHost.extent(0)); - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggStat[idx] = aggstatHost(idx); - } - - const LocalOrdinal nRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); - - // vertex ids for output - Teuchos::ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); - - // some internal variables - LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc - LocalOrdinal iNode1 = 0; // current node - - // main loop over all local rows of graph(A) - while (iNode1 < nRows) { - - if (aggStat[iNode1] == ONEPT) { - - aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' - std::vector aggList; - aggList.push_back(iNode1); - int aggIndex = nLocalAggregates++; - - // finalize aggregate - for (size_t k = 0; k < aggList.size(); k++) { - aggStat[aggList[k]] = IGNORED; - vertex2AggId[aggList[k]] = aggIndex; - procWinner[aggList[k]] = myRank; - } - numNonAggregatedNodes -= aggList.size(); + const LocalOrdinal nRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); + + // vertex ids for output + Teuchos::ArrayRCP vertex2AggId = + aggregates.GetVertex2AggId()->getDataNonConst(0); + Teuchos::ArrayRCP procWinner = + aggregates.GetProcWinner()->getDataNonConst(0); + + // some internal variables + LocalOrdinal nLocalAggregates = + aggregates + .GetNumAggregates(); // number of local aggregates on current proc + LocalOrdinal iNode1 = 0; // current node + + // main loop over all local rows of graph(A) + while (iNode1 < nRows) { + + if (aggStat[iNode1] == ONEPT) { + + aggregates.SetIsRoot( + iNode1); // mark iNode1 as root node for new aggregate 'ag' + std::vector aggList; + aggList.push_back(iNode1); + int aggIndex = nLocalAggregates++; + + // finalize aggregate + for (size_t k = 0; k < aggList.size(); k++) { + aggStat[aggList[k]] = IGNORED; + vertex2AggId[aggList[k]] = aggIndex; + procWinner[aggList[k]] = myRank; } - - iNode1++; - } // end while - - for(size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { - aggstatHost(idx) = aggStat[idx]; + numNonAggregatedNodes -= aggList.size(); } - Kokkos::deep_copy(aggstat, aggstatHost); - // update aggregate object - aggregates.SetNumAggregates(nLocalAggregates); + iNode1++; + } // end while + + for (size_t idx = 0; idx < aggstatHost.extent(0); ++idx) { + aggstatHost(idx) = aggStat[idx]; } + Kokkos::deep_copy(aggstat, aggstatHost); + + // update aggregate object + aggregates.SetNumAggregates(nLocalAggregates); +} -} // end namespace +} // namespace MueLu #endif // MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp index 8d1af9929a88..17af5bbc51f0 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp @@ -46,76 +46,81 @@ #ifndef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_DECL_HPP_ #define MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_DECL_HPP_ -#include "MueLu_ConfigDefs.hpp" #include "MueLu_AggregationAlgorithmBase.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_PreserveDirichletAggregationAlgorithm_fwd.hpp" -#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_GraphBase.hpp" namespace MueLu { - /*! - @class PreserveDirichletAggregationAlgorithm class. - @brief Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might - be necessary. (default = off) - - @ingroup Aggregation - - ### Idea ### - Handles Dirichlet boundary nodes with the state Boundary. - Depending on the boolean parameter "aggregation: preserve Dirichlet points" one-to-one aggregates - with singleton nodes are built for all Dirichlet boundary nodes or the aggregates are just - ignored (default behavior). The state of all boundary nodes (state = Boundary) - is set to ignored. That means, that these nodes are not considered for further - aggregation in the later aggregation phases. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: preserve Dirichlet points | Boolean parameter stating whether Dirichlet boundary nodes shall be aggregated in singleton aggregates (default: false). - - ### Comments ### - Only nodes with state BOUNDARY are changed to IGNORED. No other nodes are touched. - */ - - template - class PreserveDirichletAggregationAlgorithm : - public MueLu::AggregationAlgorithmBase { +/*! + @class PreserveDirichletAggregationAlgorithm class. + @brief Builds one-to-one aggregates for all Dirichlet boundary nodes. For some + applications this might be necessary. (default = off) + + @ingroup Aggregation + + ### Idea ### + Handles Dirichlet boundary nodes with the state Boundary. + Depending on the boolean parameter "aggregation: preserve Dirichlet points" + one-to-one aggregates with singleton nodes are built for all Dirichlet + boundary nodes or the aggregates are just ignored (default behavior). The + state of all boundary nodes (state = Boundary) is set to ignored. That means, + that these nodes are not considered for further aggregation in the later + aggregation phases. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: preserve Dirichlet points | Boolean parameter stating whether + Dirichlet boundary nodes shall be aggregated in singleton aggregates (default: + false). + + ### Comments ### + Only nodes with state BOUNDARY are changed to IGNORED. No other nodes are + touched. +*/ + +template +class PreserveDirichletAggregationAlgorithm + : public MueLu::AggregationAlgorithmBase { #undef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - PreserveDirichletAggregationAlgorithm(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + PreserveDirichletAggregationAlgorithm( + const RCP & /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~PreserveDirichletAggregationAlgorithm() { } + //! Destructor. + virtual ~PreserveDirichletAggregationAlgorithm() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList ¶ms, + const GraphBase &graph, Aggregates &aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes) const; + //@} - void BuildAggregates(const Teuchos::ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase - (Dirichlet)"; } - std::string description() const { return "Phase - (Dirichlet)"; } +}; // class PreserveDirichletAggregationAlgorithm - }; //class PreserveDirichletAggregationAlgorithm - -} //namespace MueLu +} // namespace MueLu #define MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_SHORT - - #endif /* MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp index 5dbd6d0dbf7f..6bbc7c8a445a 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_def.hpp @@ -53,44 +53,48 @@ #include "MueLu_PreserveDirichletAggregationAlgorithm_decl.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Aggregates.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - void PreserveDirichletAggregationAlgorithm::BuildAggregates(Teuchos::ParameterList const & params, GraphBase const & graph, Aggregates & aggregates, std::vector& aggStat, LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); +template +void PreserveDirichletAggregationAlgorithm:: + BuildAggregates(Teuchos::ParameterList const ¶ms, + GraphBase const &graph, Aggregates &aggregates, + std::vector &aggStat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); - bool preserve = params.get("aggregation: preserve Dirichlet points"); + bool preserve = params.get("aggregation: preserve Dirichlet points"); - const LO numRows = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + const LO numRows = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); - ArrayRCP procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); + ArrayRCP vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); + ArrayRCP procWinner = aggregates.GetProcWinner()->getDataNonConst(0); - LO numLocalAggregates = aggregates.GetNumAggregates(); + LO numLocalAggregates = aggregates.GetNumAggregates(); - for (LO i = 0; i < numRows; i++) - if (aggStat[i] == BOUNDARY) { - aggStat[i] = IGNORED; - numNonAggregatedNodes--; + for (LO i = 0; i < numRows; i++) + if (aggStat[i] == BOUNDARY) { + aggStat[i] = IGNORED; + numNonAggregatedNodes--; - if (preserve) { - aggregates.SetIsRoot(i); + if (preserve) { + aggregates.SetIsRoot(i); - vertex2AggId[i] = numLocalAggregates++; - procWinner [i] = myRank; - } + vertex2AggId[i] = numLocalAggregates++; + procWinner[i] = myRank; } + } - // update aggregate object - aggregates.SetNumAggregates(numLocalAggregates); - } + // update aggregate object + aggregates.SetNumAggregates(numLocalAggregates); +} -} // end namespace +} // namespace MueLu #endif /* MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp index 568889a49cb2..fadbf562bdb2 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp @@ -58,72 +58,75 @@ #include "MueLu_LWGraph_kokkos.hpp" namespace MueLu { - /*! - @class PreserveDirichletAggregationAlgorithm class. - @brief Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might - be necessary. (default = off) - - @ingroup Aggregation - - ### Idea ### - Handles Dirichlet boundary nodes with the state Boundary. - Depending on the boolean parameter "aggregation: preserve Dirichlet points" one-to-one aggregates - with singleton nodes are built for all Dirichlet boundary nodes or the aggregates are just - ignored (default behavior). The state of all boundary nodes (state = Boundary) - is set to ignored. That means, that these nodes are not considered for further - aggregation in the later aggregation phases. - - ### Parameters ### - Parameter | Meaning - ----------|-------- - aggregation: preserve Dirichlet points | Boolean parameter stating whether Dirichlet boundary nodes shall be aggregated in singleton aggregates (default: false). - - ### Comments ### - Only nodes with state BOUNDARY are changed to IGNORED. No other nodes are touched. - */ - - template - class PreserveDirichletAggregationAlgorithm_kokkos : - public MueLu::AggregationAlgorithmBase_kokkos { +/*! + @class PreserveDirichletAggregationAlgorithm class. + @brief Builds one-to-one aggregates for all Dirichlet boundary nodes. For some + applications this might be necessary. (default = off) + + @ingroup Aggregation + + ### Idea ### + Handles Dirichlet boundary nodes with the state Boundary. + Depending on the boolean parameter "aggregation: preserve Dirichlet points" + one-to-one aggregates with singleton nodes are built for all Dirichlet + boundary nodes or the aggregates are just ignored (default behavior). The + state of all boundary nodes (state = Boundary) is set to ignored. That means, + that these nodes are not considered for further aggregation in the later + aggregation phases. + + ### Parameters ### + Parameter | Meaning + ----------|-------- + aggregation: preserve Dirichlet points | Boolean parameter stating whether + Dirichlet boundary nodes shall be aggregated in singleton aggregates (default: + false). + + ### Comments ### + Only nodes with state BOUNDARY are changed to IGNORED. No other nodes are + touched. +*/ + +template +class PreserveDirichletAggregationAlgorithm_kokkos + : public MueLu::AggregationAlgorithmBase_kokkos { #undef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - using device_type = typename LWGraph_kokkos::device_type; - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; +public: + using device_type = typename LWGraph_kokkos::device_type; + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; - //! @name Constructors/Destructors. - //@{ + //! @name Constructors/Destructors. + //@{ - //! Constructor. - PreserveDirichletAggregationAlgorithm_kokkos(const RCP& /* graphFact */ = Teuchos::null) { } + //! Constructor. + PreserveDirichletAggregationAlgorithm_kokkos( + const RCP & /* graphFact */ = Teuchos::null) {} - //! Destructor. - virtual ~PreserveDirichletAggregationAlgorithm_kokkos() { } + //! Destructor. + virtual ~PreserveDirichletAggregationAlgorithm_kokkos() {} - //@} + //@} + //! @name Aggregation methods. + //@{ - //! @name Aggregation methods. - //@{ + /*! @brief Local aggregation. */ - /*! @brief Local aggregation. */ + void BuildAggregates(const Teuchos::ParameterList ¶ms, + const LWGraph_kokkos &graph, Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const; + //@} - void BuildAggregates(const Teuchos::ParameterList& params, - const LWGraph_kokkos& graph, - Aggregates& aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const; - //@} + std::string description() const { return "Phase - (Dirichlet)"; } - std::string description() const { return "Phase - (Dirichlet)"; } +}; // class PreserveDirichletAggregationAlgorithm - }; //class PreserveDirichletAggregationAlgorithm - -} //namespace MueLu +} // namespace MueLu #define MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_SHORT #endif // MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp index 498640df9c43..f2c664d0cb3c 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_PreserveDirichletAggregationAlgorithm_kokkos_def.hpp @@ -53,68 +53,73 @@ #include "MueLu_PreserveDirichletAggregationAlgorithm_kokkos_decl.hpp" -#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Aggregates.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - void PreserveDirichletAggregationAlgorithm_kokkos:: - BuildAggregates(Teuchos::ParameterList const & params, - LWGraph_kokkos const & graph, - Aggregates & aggregates, - Kokkos::View& aggStat, - LO& numNonAggregatedNodes) const { - Monitor m(*this, "BuildAggregates"); - using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; +template +void PreserveDirichletAggregationAlgorithm_kokkos:: + BuildAggregates( + Teuchos::ParameterList const ¶ms, LWGraph_kokkos const &graph, + Aggregates &aggregates, + Kokkos::View &aggStat, + LO &numNonAggregatedNodes) const { + Monitor m(*this, "BuildAggregates"); + using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; - // Extract parameters and data from: - // 1) the parameter list - const bool preserve = params.get("aggregation: preserve Dirichlet points"); + // Extract parameters and data from: + // 1) the parameter list + const bool preserve = + params.get("aggregation: preserve Dirichlet points"); - // 2) the amalgamated graph - const LO numNodes = graph.GetNodeNumVertices(); - const int myRank = graph.GetComm()->getRank(); + // 2) the amalgamated graph + const LO numNodes = graph.GetNodeNumVertices(); + const int myRank = graph.GetComm()->getRank(); - // 3) the aggregates - auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite); + // 3) the aggregates + auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView( + Xpetra::Access::ReadWrite); + auto procWinner = + aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite); - // A view is needed to count on the fly the current number of local aggregates - Kokkos::View aggCount("aggCount"); - if(preserve) { - Kokkos::deep_copy(aggCount, aggregates.GetNumAggregates()); - } - Kokkos::parallel_for("MueLu - PreserveDirichlet: tagging ignored nodes", - Kokkos::RangePolicy(0, numNodes), - KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { - if (aggStat(nodeIdx) == BOUNDARY) { - aggStat(nodeIdx) = IGNORED; - const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1); + // A view is needed to count on the fly the current number of local aggregates + Kokkos::View aggCount("aggCount"); + if (preserve) { + Kokkos::deep_copy(aggCount, aggregates.GetNumAggregates()); + } + Kokkos::parallel_for( + "MueLu - PreserveDirichlet: tagging ignored nodes", + Kokkos::RangePolicy(0, numNodes), + KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { + if (aggStat(nodeIdx) == BOUNDARY) { + aggStat(nodeIdx) = IGNORED; + const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1); - if (preserve) { - // aggregates.SetIsRoot(nodeIdx); + if (preserve) { + // aggregates.SetIsRoot(nodeIdx); - vertex2AggId(nodeIdx, 0) = aggIdx; - procWinner(nodeIdx, 0) = myRank; - } - } - }); - typename Kokkos::View::HostMirror aggCount_h - = Kokkos::create_mirror_view(aggCount); - Kokkos::deep_copy(aggCount_h, aggCount); - // In this phase the number of new aggregates is the same - // as the number of newly aggregated nodes. - numNonAggregatedNodes -= (aggCount_h() - aggregates.GetNumAggregates()); + vertex2AggId(nodeIdx, 0) = aggIdx; + procWinner(nodeIdx, 0) = myRank; + } + } + }); + typename Kokkos::View::HostMirror aggCount_h = + Kokkos::create_mirror_view(aggCount); + Kokkos::deep_copy(aggCount_h, aggCount); + // In this phase the number of new aggregates is the same + // as the number of newly aggregated nodes. + numNonAggregatedNodes -= (aggCount_h() - aggregates.GetNumAggregates()); - // update aggregate object - if(preserve) { - aggregates.SetNumAggregates(aggCount_h()); - } + // update aggregate object + if (preserve) { + aggregates.SetNumAggregates(aggCount_h()); } +} -} // end namespace +} // namespace MueLu #endif // MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp index 21dd4ab2a9bb..8099082c0854 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_decl.hpp @@ -46,7 +46,6 @@ #ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_DECL_HPP_ #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_DECL_HPP_ - #include #include @@ -56,9 +55,9 @@ #include "MueLu_AggregationAlgorithmBase.hpp" #include "MueLu_InterfaceAggregationAlgorithm_fwd.hpp" +#include "MueLu_IsolatedNodeAggregationAlgorithm_fwd.hpp" #include "MueLu_OnePtAggregationAlgorithm_fwd.hpp" #include "MueLu_PreserveDirichletAggregationAlgorithm_fwd.hpp" -#include "MueLu_IsolatedNodeAggregationAlgorithm_fwd.hpp" #include "MueLu_AggregationPhase1Algorithm_fwd.hpp" #include "MueLu_AggregationPhase2aAlgorithm_fwd.hpp" @@ -67,9 +66,9 @@ #include "MueLu_Level_fwd.hpp" //#include "MueLu_Graph_fwd.hpp" -#include "MueLu_GraphBase_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_GraphBase_fwd.hpp" namespace MueLu { @@ -77,69 +76,94 @@ namespace MueLu { @class UncoupledAggregationFactory class. @brief Factory for building uncoupled aggregates. - Factory for creating uncoupled aggregates from the amalgamated graph of A. The uncoupled aggregation method - uses several aggregation phases which put together all nodes into aggregates. + Factory for creating uncoupled aggregates from the amalgamated graph of A. + The uncoupled aggregation method uses several aggregation phases which put + together all nodes into aggregates. ## Aggregation phases ## AggregationAlgorithm | Short description ---------------------|------------------ - PreserveDirichletAggregationAlgorithm | Handle Dirichlet nodes. Decide whether to drop/ignore them in the aggregation or keep them as singleton nodes. - OnePtAggregationAlgorithm | Special handling for nodes with status ONEPT. A user can mark special nodes for singleton aggregates or a user-specified handling. This aggregation phase has to be switched on by the user if necessary (default = off). - AggregationPhase1Algorithm | Build new aggregates - AggregationPhase2aAlgorithm | Build aggregates of reasonable size from leftover nodes - AggregationPhase2bAlgorithm | Add leftover nodes to existing aggregates - AggregationPhase3Algorithm | Handle leftover nodes. Try to avoid singletons - IsolatedNodeAggregationAlgorithm | Drop/ignore leftover nodes + PreserveDirichletAggregationAlgorithm | Handle Dirichlet nodes. Decide + whether to drop/ignore them in the aggregation or keep them as singleton + nodes. OnePtAggregationAlgorithm | Special handling for nodes with status + ONEPT. A user can mark special nodes for singleton aggregates or a + user-specified handling. This aggregation phase has to be switched on by the + user if necessary (default = off). AggregationPhase1Algorithm | Build new + aggregates AggregationPhase2aAlgorithm | Build aggregates of reasonable size + from leftover nodes AggregationPhase2bAlgorithm | Add leftover nodes to + existing aggregates AggregationPhase3Algorithm | Handle leftover nodes. Try + to avoid singletons IsolatedNodeAggregationAlgorithm | Drop/ignore leftover + nodes Internally, each node has a status which can be one of the following: Node status | Meaning ------------|--------- - READY | Node is not aggregated and can be used for building a new aggregate or can be added to an existing aggregate. - AGGREGATED | Node is aggregated. - IGNORED | Node is not considered for aggregation (it may have been dropped or put into a singleton aggregate) - BOUNDARY | Node is a Dirichlet boundary node (with one or more Dirichlet boundary conditions). - ONEPT | The user forces the aggregation algorithm to treat the node as a singleton. Important: Do not forget to set aggregation: allow user-specified singletons to true! Otherwise Phase3 will just handle the ONEPT nodes and probably not build singletons + READY | Node is not aggregated and can be used for building a new + aggregate or can be added to an existing aggregate. AGGREGATED | Node is + aggregated. IGNORED | Node is not considered for aggregation (it may have + been dropped or put into a singleton aggregate) BOUNDARY | Node is a + Dirichlet boundary node (with one or more Dirichlet boundary conditions). + ONEPT | The user forces the aggregation algorithm to treat the node as + a singleton. Important: Do not forget to set aggregation: allow + user-specified singletons to true! Otherwise Phase3 will just handle the + ONEPT nodes and probably not build singletons @ingroup Aggregation ## Input/output of UncoupledAggregationFactory ## ### User parameters of UncoupledAggregationFactory ### - Parameter | type | default | master.xml | validated | requested | description + Parameter | type | default | master.xml | validated | requested | + description ----------|------|---------|:----------:|:---------:|:---------:|------------ - Graph | Factory | null | | * | * | Generating factory of the graph of A - DofsPerNode | Factory | null | | * | * | Generating factory for variable 'DofsPerNode', usually the same as for 'Graph' - OnePt aggregate map name | string | | | * | * | Name of input map for single node aggregates (default=''). Makes only sense if the parameter 'aggregation: allow user-specified singletons' is set to true. - OnePt aggregate map factory | Factory | null | | * | * | Generating factory of (DOF) map for single node aggregates. Makes only sense if the parameter 'aggregation: allow user-specified singletons' is set to true. - aggregation: max agg size | int | see master.xml | * | * | | Maximum number of nodes per aggregate. - aggregation: min agg size | int | see master.xml | * | * | | Minimum number of nodes necessary to build a new aggregate. - aggregation: max selected neighbors | int | see master.xml | * | * | | Maximum number of neighbor nodes already in aggregate (needed in Phase1) - aggregation: ordering | string | "natural" | * | * | | Ordering of node aggregation (can be either "natural", "graph" or "random"). - aggregation: enable phase 1 | bool | true | * | * | |Turn on/off phase 1 aggregation - aggregation: enable phase 2a | bool | true | * | * | |Turn on/off phase 2a aggregation - aggregation: enable phase 2b | bool | true | * | * | |Turn on/off phase 2b aggregation - aggregation: enable phase 3 | bool | true | * | * | |Turn on/off phase 3 aggregation - aggregation: preserve Dirichlet points | bool | false | * | * | | preserve Dirichlet points as singleton nodes (default=false, i.e., drop Dirichlet nodes during aggregation) - aggregation: allow user-specified singletons | bool | false | * | * | | Turn on/off OnePtAggregationAlgorithm (default=false) - - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see UncoupledAggregationFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see UncoupledAggregationFactory::DeclareInput). + Graph | Factory | null | | * | * | Generating factory of the + graph of A DofsPerNode | Factory | null | | * | * | Generating + factory for variable 'DofsPerNode', usually the same as for 'Graph' OnePt + aggregate map name | string | | | * | * | Name of input map for single node + aggregates (default=''). Makes only sense if the parameter 'aggregation: + allow user-specified singletons' is set to true. OnePt aggregate map factory + | Factory | null | | * | * | Generating factory of (DOF) map for single + node aggregates. Makes only sense if the parameter 'aggregation: allow + user-specified singletons' is set to true. aggregation: max agg size | int | + see master.xml | * | * | | Maximum number of nodes per aggregate. + aggregation: min agg size | int | see master.xml | * | * | | Minimum + number of nodes necessary to build a new aggregate. aggregation: max selected + neighbors | int | see master.xml | * | * | | Maximum number of neighbor + nodes already in aggregate (needed in Phase1) aggregation: ordering | string + | "natural" | * | * | | Ordering of node aggregation (can be either + "natural", "graph" or "random"). aggregation: enable phase 1 | bool | true | + * | * | |Turn on/off phase 1 aggregation aggregation: enable phase 2a | + bool | true | * | * | |Turn on/off phase 2a aggregation aggregation: enable + phase 2b | bool | true | * | * | |Turn on/off phase 2b aggregation + aggregation: enable phase 3 | bool | true | * | * | |Turn on/off phase 3 + aggregation aggregation: preserve Dirichlet points | bool | false | * | * | + | preserve Dirichlet points as singleton nodes (default=false, i.e., drop + Dirichlet nodes during aggregation) aggregation: allow user-specified + singletons | bool | false | * | * | | Turn on/off OnePtAggregationAlgorithm + (default=false) + + + The * in the @c master.xml column denotes that the parameter is defined in + the @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + UncoupledAggregationFactory::GetValidParameters).
The * in the @c + requested column states that the data is requested as input with all + dependencies (see UncoupledAggregationFactory::DeclareInput). ### Variables provided by UncoupledAggregationFactory ### - After UncoupledAggregationFactory::Build the following data is available (if requested) + After UncoupledAggregationFactory::Build the following data is available (if + requested) Parameter | generated by | description ----------|--------------|------------ - | Aggregates | UncoupledAggregationFactory | Container class with aggregation information. See also Aggregates. + | Aggregates | UncoupledAggregationFactory | Container class with + aggregation information. See also Aggregates. */ -template +template class UncoupledAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_UNCOUPLEDAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" @@ -152,7 +176,7 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { UncoupledAggregationFactory(); //! Destructor. - virtual ~UncoupledAggregationFactory() { } + virtual ~UncoupledAggregationFactory() {} RCP GetValidParameterList() const; @@ -164,36 +188,43 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { // Options shared by all aggregation algorithms // deprecated - void SetOrdering(const std::string& ordering) { + void SetOrdering(const std::string &ordering) { SetParameter("aggregation: ordering", ParameterEntry(ordering)); } // deprecated void SetMaxNeighAlreadySelected(int maxNeighAlreadySelected) { - SetParameter("aggregation: max selected neighbors", ParameterEntry(Teuchos::as(maxNeighAlreadySelected))); // revalidate + SetParameter("aggregation: max selected neighbors", + ParameterEntry(Teuchos::as( + maxNeighAlreadySelected))); // revalidate } // deprecated void SetMinNodesPerAggregate(int minNodesPerAggregate) { - SetParameter("aggregation: min agg size", ParameterEntry(Teuchos::as(minNodesPerAggregate))); // revalidate + SetParameter("aggregation: min agg size", + ParameterEntry(Teuchos::as( + minNodesPerAggregate))); // revalidate } // set information about 1-node aggregates (map name and generating factory) - void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); + void SetOnePtMapName(const std::string name, + Teuchos::RCP mapFact) { + SetParameter("OnePt aggregate map name", + ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); } // deprecated - const std::string& GetOrdering() const { - const ParameterList& pL = GetParameterList(); + const std::string &GetOrdering() const { + const ParameterList &pL = GetParameterList(); return pL.get("aggregation: ordering"); } // deprecated int GetMaxNeighAlreadySelected() const { - const ParameterList& pL = GetParameterList(); - return Teuchos::as(pL.get("aggregation: max selected neighbors")); + const ParameterList &pL = GetParameterList(); + return Teuchos::as( + pL.get("aggregation: max selected neighbors")); } // deprecated int GetMinNodesPerAggregate() const { - const ParameterList& pL = GetParameterList(); + const ParameterList &pL = GetParameterList(); return Teuchos::as(pL.get("aggregation: min agg size")); } @@ -217,18 +248,21 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { //! @name Definition methods //@{ - /*! @brief Append a new aggregation algorithm to list of aggregation algorithms */ - //void Append(const RCP > & alg); + /*! @brief Append a new aggregation algorithm to list of aggregation + * algorithms */ + // void Append(const RCP > & alg); /*! @brief Remove all aggregation algorithms from list */ - //void ClearAggregationAlgorithms() { algos_.clear(); } + // void ClearAggregationAlgorithms() { algos_.clear(); } //@} private: - //! aggregation algorithms // will be filled in Build routine - mutable std::vector > > algos_; + mutable std::vector< + RCP>> + algos_; //! boolean flag: definition phase //! if true, the aggregation algorithms still can be set and changed. @@ -237,7 +271,7 @@ class UncoupledAggregationFactory : public SingleLevelFactoryBase { }; // class UncoupledAggregationFactory -} +} // namespace MueLu #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_SHORT #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp index dfaae83ba7f2..66f328ca13c1 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp @@ -49,8 +49,8 @@ #include #include -#include #include +#include #include #include "MueLu_UncoupledAggregationFactory_decl.hpp" @@ -64,245 +64,299 @@ #include "MueLu_AggregationPhase2bAlgorithm.hpp" #include "MueLu_AggregationPhase3Algorithm.hpp" -#include "MueLu_Level.hpp" -#include "MueLu_GraphBase.hpp" #include "MueLu_Aggregates.hpp" +#include "MueLu_GraphBase.hpp" +#include "MueLu_Level.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - UncoupledAggregationFactory::UncoupledAggregationFactory() - : bDefinitionPhase_(true) - { } - - template - RCP UncoupledAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - // Aggregation parameters (used in aggregation algorithms) - // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters - - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: max agg size"); - SET_VALID_ENTRY("aggregation: min agg size"); - SET_VALID_ENTRY("aggregation: max selected neighbors"); - SET_VALID_ENTRY("aggregation: ordering"); - validParamList->getEntry("aggregation: ordering").setValidator( - rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); - SET_VALID_ENTRY("aggregation: enable phase 1"); - SET_VALID_ENTRY("aggregation: enable phase 2a"); - SET_VALID_ENTRY("aggregation: enable phase 2b"); - SET_VALID_ENTRY("aggregation: enable phase 3"); - SET_VALID_ENTRY("aggregation: match ML phase2a"); - SET_VALID_ENTRY("aggregation: phase2a agg factor"); - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: use interface aggregation"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - SET_VALID_ENTRY("aggregation: compute aggregate qualities"); - SET_VALID_ENTRY("aggregation: phase 1 algorithm"); -#undef SET_VALID_ENTRY - - // general variables needed in AggregationFactory - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - validParamList->set< RCP >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); - - // special variables necessary for OnePtAggregationAlgorithm - validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); - validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates."); - //validParamList->set< RCP >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates."); - - // InterfaceAggregation parameters - //validParamList->set< bool > ("aggregation: use interface aggregation", "false", "Flag to trigger aggregation along an interface using specified aggregate seeds."); - validParamList->set< std::string > ("Interface aggregate map name", "", "Name of input map for interface aggregates. (default='')"); - validParamList->set< std::string > ("Interface aggregate map factory", "", "Generating factory of (DOF) map for interface aggregates."); - validParamList->set > ("nodeOnInterface", Teuchos::null, "Array specifying whether or not a node is on the interface (1 or 0)."); - - return validParamList; - } - - template - void UncoupledAggregationFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - const ParameterList& pL = GetParameterList(); - - // request special data necessary for OnePtAggregationAlgorithm - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - if (mapOnePtName.length() > 0) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); - } +template +UncoupledAggregationFactory::UncoupledAggregationFactory() + : bDefinitionPhase_(true) {} + +template +RCP +UncoupledAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + // Aggregation parameters (used in aggregation algorithms) + // TODO introduce local member function for each aggregation algorithm such + // that each aggregation algorithm can define its own parameters + + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: max agg size"); + SET_VALID_ENTRY("aggregation: min agg size"); + SET_VALID_ENTRY("aggregation: max selected neighbors"); + SET_VALID_ENTRY("aggregation: ordering"); + validParamList->getEntry("aggregation: ordering") + .setValidator(rcp(new validatorType( + Teuchos::tuple("natural", "graph", "random"), + "aggregation: ordering"))); + SET_VALID_ENTRY("aggregation: enable phase 1"); + SET_VALID_ENTRY("aggregation: enable phase 2a"); + SET_VALID_ENTRY("aggregation: enable phase 2b"); + SET_VALID_ENTRY("aggregation: enable phase 3"); + SET_VALID_ENTRY("aggregation: match ML phase2a"); + SET_VALID_ENTRY("aggregation: phase2a agg factor"); + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: use interface aggregation"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + SET_VALID_ENTRY("aggregation: compute aggregate qualities"); + SET_VALID_ENTRY("aggregation: phase 1 algorithm"); +#undef SET_VALID_ENTRY + + // general variables needed in AggregationFactory + validParamList->set>( + "Graph", null, "Generating factory of the graph"); + validParamList->set>( + "DofsPerNode", null, + "Generating factory for variable \'DofsPerNode\', usually the same as " + "for \'Graph\'"); + validParamList->set>( + "AggregateQualities", null, + "Generating factory for variable \'AggregateQualities\'"); + + // special variables necessary for OnePtAggregationAlgorithm + validParamList->set( + "OnePt aggregate map name", "", + "Name of input map for single node aggregates. (default='')"); + validParamList->set( + "OnePt aggregate map factory", "", + "Generating factory of (DOF) map for single node aggregates."); + // validParamList->set< RCP >("OnePt aggregate map + // factory", NoFactory::getRCP(), "Generating factory of (DOF) map for + // single node aggregates."); + + // InterfaceAggregation parameters + // validParamList->set< bool > ("aggregation: use interface + // aggregation", "false", "Flag to trigger aggregation along an interface + // using specified aggregate seeds."); + validParamList->set( + "Interface aggregate map name", "", + "Name of input map for interface aggregates. (default='')"); + validParamList->set( + "Interface aggregate map factory", "", + "Generating factory of (DOF) map for interface aggregates."); + validParamList->set>( + "nodeOnInterface", Teuchos::null, + "Array specifying whether or not a node is on the interface (1 or 0)."); + + return validParamList; +} + +template +void UncoupledAggregationFactory::DeclareInput(Level ¤tLevel) + const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + const ParameterList &pL = GetParameterList(); + + // request special data necessary for OnePtAggregationAlgorithm + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + if (mapOnePtName.length() > 0) { + std::string mapOnePtFactName = + pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); } + } - // request special data necessary for InterfaceAggregation - if (pL.get("aggregation: use interface aggregation") == true){ - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { - currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), - Exceptions::RuntimeError, - "nodeOnInterface was not provided by the user on level0!"); - } + // request special data necessary for InterfaceAggregation + if (pL.get("aggregation: use interface aggregation") == true) { + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) { + currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this); } else { - Input(currentLevel, "nodeOnInterface"); + TEUCHOS_TEST_FOR_EXCEPTION( + currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()), + Exceptions::RuntimeError, + "nodeOnInterface was not provided by the user on level0!"); } - } - - if (pL.get("aggregation: compute aggregate qualities")) { - Input(currentLevel, "AggregateQualities"); + } else { + Input(currentLevel, "nodeOnInterface"); } } - template - void UncoupledAggregationFactory::Build(Level ¤tLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - if (pL.get("aggregation: max agg size") == -1) - pL.set("aggregation: max agg size", INT_MAX); - - // define aggregation algorithms - RCP graphFact = GetFactory("Graph"); - - // TODO Can we keep different aggregation algorithms over more Build calls? - algos_.clear(); - algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); - if (pL.get("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); - if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm (graphFact))); - if (pL.get("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm (graphFact))); - - // TODO: remove old aggregation mode - //if (pL.get("UseOnePtAggregationAlgorithm") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); - //if (pL.get("UseUncoupledAggregationAlgorithm") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); - //if (pL.get("UseMaxLinkAggregationAlgorithm") == true) algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); - //if (pL.get("UseEmergencyAggregationAlgorithm") == true) algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); - - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - RCP OnePtMap = Teuchos::null; - if (mapOnePtName.length()) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); - } + if (pL.get("aggregation: compute aggregate qualities")) { + Input(currentLevel, "AggregateQualities"); + } +} + +template +void UncoupledAggregationFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all + // aggregation algorithm information is fixed + + if (pL.get("aggregation: max agg size") == -1) + pL.set("aggregation: max agg size", INT_MAX); + + // define aggregation algorithms + RCP graphFact = GetFactory("Graph"); + + // TODO Can we keep different aggregation algorithms over more Build calls? + algos_.clear(); + algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: use interface aggregation") == true) + algos_.push_back(rcp(new InterfaceAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: allow user-specified singletons") == true) + algos_.push_back(rcp(new OnePtAggregationAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 1") == true) + algos_.push_back(rcp(new AggregationPhase1Algorithm(graphFact))); + if (pL.get("aggregation: enable phase 2a") == true) + algos_.push_back(rcp(new AggregationPhase2aAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 2b") == true) + algos_.push_back(rcp(new AggregationPhase2bAlgorithm(graphFact))); + if (pL.get("aggregation: enable phase 3") == true) + algos_.push_back(rcp(new AggregationPhase3Algorithm(graphFact))); + + // TODO: remove old aggregation mode + // if (pL.get("UseOnePtAggregationAlgorithm") == true) + // algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); if + // (pL.get("UseUncoupledAggregationAlgorithm") == true) + // algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); if + // (pL.get("UseMaxLinkAggregationAlgorithm") == true) + // algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); if + // (pL.get("UseEmergencyAggregationAlgorithm") == true) + // algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); + + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + RCP OnePtMap = Teuchos::null; + if (mapOnePtName.length()) { + std::string mapOnePtFactName = + pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + OnePtMap = currentLevel.Get>(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + OnePtMap = currentLevel.Get>(mapOnePtName, mapOnePtFact.get()); } + } - // Set map for interface aggregates - std::string mapInterfaceName = pL.get("Interface aggregate map name"); - RCP InterfaceMap = Teuchos::null; + // Set map for interface aggregates + std::string mapInterfaceName = + pL.get("Interface aggregate map name"); + RCP InterfaceMap = Teuchos::null; - RCP graph = Get< RCP >(currentLevel, "Graph"); + RCP graph = Get>(currentLevel, "Graph"); - // Build - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("UC"); + // Build + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("UC"); - const LO numRows = graph->GetNodeNumVertices(); + const LO numRows = graph->GetNodeNumVertices(); - // construct aggStat information - std::vector aggStat(numRows, READY); + // construct aggStat information + std::vector aggStat(numRows, READY); - // interface - if (pL.get("aggregation: use interface aggregation") == true){ - Teuchos::Array nodeOnInterface = Get>(currentLevel,"nodeOnInterface"); - for (LO i = 0; i < numRows; i++) { - if (nodeOnInterface[i]) - aggStat[i] = INTERFACE; - } + // interface + if (pL.get("aggregation: use interface aggregation") == true) { + Teuchos::Array nodeOnInterface = + Get>(currentLevel, "nodeOnInterface"); + for (LO i = 0; i < numRows; i++) { + if (nodeOnInterface[i]) + aggStat[i] = INTERFACE; } + } - ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); - if (dirichletBoundaryMap != Teuchos::null) - for (LO i = 0; i < numRows; i++) - if (dirichletBoundaryMap[i] == true) - aggStat[i] = BOUNDARY; - - LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); - GO indexBase = graph->GetDomainMap()->getIndexBase(); - if (OnePtMap != Teuchos::null) { - for (LO i = 0; i < numRows; i++) { - // reconstruct global row id (FIXME only works for contiguous maps) - GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; - - for (LO kr = 0; kr < nDofsPerNode; kr++) - if (OnePtMap->isNodeGlobalElement(grid + kr)) - aggStat[i] = ONEPT; - } + ArrayRCP dirichletBoundaryMap = graph->GetBoundaryNodeMap(); + if (dirichletBoundaryMap != Teuchos::null) + for (LO i = 0; i < numRows; i++) + if (dirichletBoundaryMap[i] == true) + aggStat[i] = BOUNDARY; + + LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); + GO indexBase = graph->GetDomainMap()->getIndexBase(); + if (OnePtMap != Teuchos::null) { + for (LO i = 0; i < numRows; i++) { + // reconstruct global row id (FIXME only works for contiguous maps) + GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * + nDofsPerNode + + indexBase; + + for (LO kr = 0; kr < nDofsPerNode; kr++) + if (OnePtMap->isNodeGlobalElement(grid + kr)) + aggStat[i] = ONEPT; } + } - - - const RCP > comm = graph->GetComm(); - GO numGlobalRows = 0; - if (IsPrint(Statistics1)) - MueLu_sumAll(comm, as(numRows), numGlobalRows); - - LO numNonAggregatedNodes = numRows; - GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; - for (size_t a = 0; a < algos_.size(); a++) { - std::string phase = algos_[a]->description(); - SubFactoryMonitor sfm(*this, "Algo " + phase, currentLevel); - - int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); - algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); - algos_[a]->SetProcRankVerbose(oldRank); - - if (IsPrint(Statistics1)) { - GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; - GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; - MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); - MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); - - double aggPercent = 100*as(numGlobalAggregated)/as(numGlobalRows); - if (aggPercent > 99.99 && aggPercent < 100.00) { - // Due to round off (for instance, for 140465733/140466897), we could - // get 100.00% display even if there are some remaining nodes. This - // is bad from the users point of view. It is much better to change - // it to display 99.99%. - aggPercent = 99.99; - } - GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed - << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" - << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" - << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; - numGlobalAggregatedPrev = numGlobalAggregated; - numGlobalAggsPrev = numGlobalAggs; + const RCP> comm = graph->GetComm(); + GO numGlobalRows = 0; + if (IsPrint(Statistics1)) + MueLu_sumAll(comm, as(numRows), numGlobalRows); + + LO numNonAggregatedNodes = numRows; + GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; + for (size_t a = 0; a < algos_.size(); a++) { + std::string phase = algos_[a]->description(); + SubFactoryMonitor sfm(*this, "Algo " + phase, currentLevel); + + int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); + algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, + numNonAggregatedNodes); + algos_[a]->SetProcRankVerbose(oldRank); + + if (IsPrint(Statistics1)) { + GO numLocalAggregated = numRows - numNonAggregatedNodes, + numGlobalAggregated = 0; + GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; + MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); + MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); + + double aggPercent = + 100 * as(numGlobalAggregated) / as(numGlobalRows); + if (aggPercent > 99.99 && aggPercent < 100.00) { + // Due to round off (for instance, for 140465733/140466897), we could + // get 100.00% display even if there are some remaining nodes. This + // is bad from the users point of view. It is much better to change + // it to display 99.99%. + aggPercent = 99.99; } + GetOStream(Statistics1) + << " aggregated : " + << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " + << std::fixed << std::setprecision(2) << numGlobalAggregated << "/" + << numGlobalRows << " [" << aggPercent << "%] (total)\n" + << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" + << " aggregates : " << numGlobalAggs - numGlobalAggsPrev + << " (phase), " << numGlobalAggs << " (total)" << std::endl; + numGlobalAggregatedPrev = numGlobalAggregated; + numGlobalAggsPrev = numGlobalAggs; } + } - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); - - aggregates->AggregatesCrossProcessors(false); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, + "MueLu::UncoupledAggregationFactory::Build: " + "Leftover nodes found! Error!"); - Set(currentLevel, "Aggregates", aggregates); + aggregates->AggregatesCrossProcessors(false); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); - if (pL.get("aggregation: compute aggregate qualities")) { - RCP> aggQualities = Get>>(currentLevel, "AggregateQualities"); - } + Set(currentLevel, "Aggregates", aggregates); + if (pL.get("aggregation: compute aggregate qualities")) { + RCP> aggQualities = + Get>>( + currentLevel, "AggregateQualities"); } +} -} //namespace MueLu - +} // namespace MueLu #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp index 5bf4100d95bb..304ed01c14d7 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_decl.hpp @@ -61,8 +61,8 @@ #include "MueLu_AggregationPhase2bAlgorithm_kokkos_fwd.hpp" #include "MueLu_AggregationPhase3Algorithm_kokkos_fwd.hpp" #include "MueLu_Exceptions.hpp" -#include "MueLu_Level_fwd.hpp" #include "MueLu_LWGraph_kokkos_fwd.hpp" +#include "MueLu_Level_fwd.hpp" #include "MueLu_OnePtAggregationAlgorithm_kokkos_fwd.hpp" #include "MueLu_PreserveDirichletAggregationAlgorithm_kokkos_fwd.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" @@ -73,167 +73,202 @@ namespace MueLu { @class UncoupledAggregationFactory class. @brief Factory for building uncoupled aggregates. - Factory for creating uncoupled aggregates from the amalgamated graph of A. The uncoupled aggregation method - uses several aggregation phases which put together all nodes into aggregates. + Factory for creating uncoupled aggregates from the amalgamated graph of A. + The uncoupled aggregation method uses several aggregation phases which put + together all nodes into aggregates. ## Aggregation phases ## AggregationAlgorithm | Short description ---------------------|------------------ - PreserveDirichletAggregationAlgorithm | Handle Dirichlet nodes. Decide whether to drop/ignore them in the aggregation or keep them as singleton nodes. - OnePtAggregationAlgorithm | Special handling for nodes with status ONEPT. A user can mark special nodes for singleton aggregates or a user-specified handling. This aggregation phase has to be switched on by the user if necessary (default = off). - AggregationPhase1Algorithm | Build new aggregates - AggregationPhase2aAlgorithm | Build aggregates of reasonable size from leftover nodes - AggregationPhase2bAlgorithm | Add leftover nodes to existing aggregates - AggregationPhase3Algorithm | Handle leftover nodes. Try to avoid singletons - IsolatedNodeAggregationAlgorithm | Drop/ignore leftover nodes + PreserveDirichletAggregationAlgorithm | Handle Dirichlet nodes. Decide + whether to drop/ignore them in the aggregation or keep them as singleton + nodes. OnePtAggregationAlgorithm | Special handling for nodes with status + ONEPT. A user can mark special nodes for singleton aggregates or a + user-specified handling. This aggregation phase has to be switched on by the + user if necessary (default = off). AggregationPhase1Algorithm | Build new + aggregates AggregationPhase2aAlgorithm | Build aggregates of reasonable size + from leftover nodes AggregationPhase2bAlgorithm | Add leftover nodes to + existing aggregates AggregationPhase3Algorithm | Handle leftover nodes. Try + to avoid singletons IsolatedNodeAggregationAlgorithm | Drop/ignore leftover + nodes Internally, each node has a status which can be one of the following: Node status | Meaning ------------|--------- - READY | Node is not aggregated and can be used for building a new aggregate or can be added to an existing aggregate. - AGGREGATED | Node is aggregated. - IGNORED | Node is not considered for aggregation (it may have been dropped or put into a singleton aggregate) - BOUNDARY | Node is a Dirichlet boundary node (with one or more Dirichlet boundary conditions). - ONEPT | The user forces the aggregation algorithm to treat the node as a singleton. Important: Do not forget to set aggregation: allow user-specified singletons to true! Otherwise Phase3 will just handle the ONEPT nodes and probably not build singletons + READY | Node is not aggregated and can be used for building a new + aggregate or can be added to an existing aggregate. AGGREGATED | Node is + aggregated. IGNORED | Node is not considered for aggregation (it may have + been dropped or put into a singleton aggregate) BOUNDARY | Node is a + Dirichlet boundary node (with one or more Dirichlet boundary conditions). + ONEPT | The user forces the aggregation algorithm to treat the node as + a singleton. Important: Do not forget to set aggregation: allow + user-specified singletons to true! Otherwise Phase3 will just handle the + ONEPT nodes and probably not build singletons @ingroup Aggregation ## Input/output of UncoupledAggregationFactory ## ### User parameters of UncoupledAggregationFactory ### - Parameter | type | default | master.xml | validated | requested | description + Parameter | type | default | master.xml | validated | requested | + description ----------|------|---------|:----------:|:---------:|:---------:|------------ - Graph | Factory | null | | * | * | Generating factory of the graph of A - DofsPerNode | Factory | null | | * | * | Generating factory for variable 'DofsPerNode', usually the same as for 'Graph' - OnePt aggregate map name | string | | | * | * | Name of input map for single node aggregates (default=''). Makes only sense if the parameter 'aggregation: allow user-specified singletons' is set to true. - OnePt aggregate map factory | Factory | null | | * | * | Generating factory of (DOF) map for single node aggregates. Makes only sense if the parameter 'aggregation: allow user-specified singletons' is set to true. - aggregation: max agg size | int | see master.xml | * | * | | Maximum number of nodes per aggregate. - aggregation: min agg size | int | see master.xml | * | * | | Minimum number of nodes necessary to build a new aggregate. - aggregation: max selected neighbors | int | see master.xml | * | * | | Maximum number of neighbor nodes already in aggregate (needed in Phase1) - aggregation: ordering | string | "natural" | * | * | | Ordering of node aggregation (can be either "natural", "graph" or "random"). - aggregation: enable phase 1 | bool | true | * | * | |Turn on/off phase 1 aggregation - aggregation: enable phase 2a | bool | true | * | * | |Turn on/off phase 2a aggregation - aggregation: enable phase 2b | bool | true | * | * | |Turn on/off phase 2b aggregation - aggregation: enable phase 3 | bool | true | * | * | |Turn on/off phase 3 aggregation - aggregation: preserve Dirichlet points | bool | false | * | * | | preserve Dirichlet points as singleton nodes (default=false, i.e., drop Dirichlet nodes during aggregation) - aggregation: allow user-specified singletons | bool | false | * | * | | Turn on/off OnePtAggregationAlgorithm (default=false) - - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see UncoupledAggregationFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see UncoupledAggregationFactory::DeclareInput). + Graph | Factory | null | | * | * | Generating factory of the + graph of A DofsPerNode | Factory | null | | * | * | Generating + factory for variable 'DofsPerNode', usually the same as for 'Graph' OnePt + aggregate map name | string | | | * | * | Name of input map for single node + aggregates (default=''). Makes only sense if the parameter 'aggregation: + allow user-specified singletons' is set to true. OnePt aggregate map factory + | Factory | null | | * | * | Generating factory of (DOF) map for single + node aggregates. Makes only sense if the parameter 'aggregation: allow + user-specified singletons' is set to true. aggregation: max agg size | int | + see master.xml | * | * | | Maximum number of nodes per aggregate. + aggregation: min agg size | int | see master.xml | * | * | | Minimum + number of nodes necessary to build a new aggregate. aggregation: max selected + neighbors | int | see master.xml | * | * | | Maximum number of neighbor + nodes already in aggregate (needed in Phase1) aggregation: ordering | string + | "natural" | * | * | | Ordering of node aggregation (can be either + "natural", "graph" or "random"). aggregation: enable phase 1 | bool | true | + * | * | |Turn on/off phase 1 aggregation aggregation: enable phase 2a | + bool | true | * | * | |Turn on/off phase 2a aggregation aggregation: enable + phase 2b | bool | true | * | * | |Turn on/off phase 2b aggregation + aggregation: enable phase 3 | bool | true | * | * | |Turn on/off phase 3 + aggregation aggregation: preserve Dirichlet points | bool | false | * | * | + | preserve Dirichlet points as singleton nodes (default=false, i.e., drop + Dirichlet nodes during aggregation) aggregation: allow user-specified + singletons | bool | false | * | * | | Turn on/off OnePtAggregationAlgorithm + (default=false) + + + The * in the @c master.xml column denotes that the parameter is defined in + the @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + UncoupledAggregationFactory::GetValidParameters).
The * in the @c + requested column states that the data is requested as input with all + dependencies (see UncoupledAggregationFactory::DeclareInput). ### Variables provided by UncoupledAggregationFactory ### - After UncoupledAggregationFactory::Build the following data is available (if requested) + After UncoupledAggregationFactory::Build the following data is available (if + requested) Parameter | generated by | description ----------|--------------|------------ - | Aggregates | UncoupledAggregationFactory | Container class with aggregation information. See also Aggregates. + | Aggregates | UncoupledAggregationFactory | Container class with + aggregation information. See also Aggregates. */ - template - class UncoupledAggregationFactory_kokkos : public SingleLevelFactoryBase { +template +class UncoupledAggregationFactory_kokkos : public SingleLevelFactoryBase { #undef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - UncoupledAggregationFactory_kokkos(); - - //! Destructor. - virtual ~UncoupledAggregationFactory_kokkos() { } - - RCP GetValidParameterList() const; - - //@} - - //! @name Set/get methods. - //@{ - - // Options shared by all aggregation algorithms - - // deprecated - void SetOrdering(const std::string& ordering) { - SetParameter("aggregation: ordering", ParameterEntry(ordering)); - } - // deprecated - void SetMaxNeighAlreadySelected(int maxNeighAlreadySelected) { - SetParameter("aggregation: max selected neighbors", ParameterEntry(Teuchos::as(maxNeighAlreadySelected))); // revalidate - } - // deprecated - void SetMinNodesPerAggregate(int minNodesPerAggregate) { - SetParameter("aggregation: min agg size", ParameterEntry(Teuchos::as(minNodesPerAggregate))); // revalidate - } - // set information about 1-node aggregates (map name and generating factory) - void SetOnePtMapName(const std::string name, Teuchos::RCP mapFact) { - SetParameter("OnePt aggregate map name", ParameterEntry(std::string(name))); // revalidate - SetFactory("OnePt aggregate map factory",mapFact); - } - - // deprecated - const std::string& GetOrdering() const { - const ParameterList& pL = GetParameterList(); - return pL.get("aggregation: ordering"); - } - // deprecated - int GetMaxNeighAlreadySelected() const { - const ParameterList& pL = GetParameterList(); - return Teuchos::as(pL.get("aggregation: max selected neighbors")); - } - // deprecated - int GetMinNodesPerAggregate() const { - const ParameterList& pL = GetParameterList(); - return Teuchos::as(pL.get("aggregation: min agg size")); - } - - //@} - - //! Input - //@{ - - void DeclareInput(Level ¤tLevel) const; - - //@} - - //! @name Build methods. - //@{ - - /*! @brief Build aggregates. */ - void Build(Level ¤tLevel) const; - - //@} - - //! @name Definition methods - //@{ - - /*! @brief Append a new aggregation algorithm to list of aggregation algorithms */ - //void Append(const RCP > & alg); - - /*! @brief Remove all aggregation algorithms from list */ - //void ClearAggregationAlgorithms() { algos_.clear(); } - //@} - - private: - - //! aggregation algorithms - // will be filled in Build routine - mutable std::vector > > algos_; - - //! boolean flag: definition phase - //! if true, the aggregation algorithms still can be set and changed. - //! if false, no change in aggregation algorithms is possible any more - mutable bool bDefinitionPhase_; - - }; // class UncoupledAggregationFactory_kokkos - -} +public: + //! @name Constructors/Destructors. + //@{ + + //! Constructor. + UncoupledAggregationFactory_kokkos(); + + //! Destructor. + virtual ~UncoupledAggregationFactory_kokkos() {} + + RCP GetValidParameterList() const; + + //@} + + //! @name Set/get methods. + //@{ + + // Options shared by all aggregation algorithms + + // deprecated + void SetOrdering(const std::string &ordering) { + SetParameter("aggregation: ordering", ParameterEntry(ordering)); + } + // deprecated + void SetMaxNeighAlreadySelected(int maxNeighAlreadySelected) { + SetParameter("aggregation: max selected neighbors", + ParameterEntry(Teuchos::as( + maxNeighAlreadySelected))); // revalidate + } + // deprecated + void SetMinNodesPerAggregate(int minNodesPerAggregate) { + SetParameter("aggregation: min agg size", + ParameterEntry(Teuchos::as( + minNodesPerAggregate))); // revalidate + } + // set information about 1-node aggregates (map name and generating factory) + void SetOnePtMapName(const std::string name, + Teuchos::RCP mapFact) { + SetParameter("OnePt aggregate map name", + ParameterEntry(std::string(name))); // revalidate + SetFactory("OnePt aggregate map factory", mapFact); + } + + // deprecated + const std::string &GetOrdering() const { + const ParameterList &pL = GetParameterList(); + return pL.get("aggregation: ordering"); + } + // deprecated + int GetMaxNeighAlreadySelected() const { + const ParameterList &pL = GetParameterList(); + return Teuchos::as( + pL.get("aggregation: max selected neighbors")); + } + // deprecated + int GetMinNodesPerAggregate() const { + const ParameterList &pL = GetParameterList(); + return Teuchos::as(pL.get("aggregation: min agg size")); + } + + //@} + + //! Input + //@{ + + void DeclareInput(Level ¤tLevel) const; + + //@} + + //! @name Build methods. + //@{ + + /*! @brief Build aggregates. */ + void Build(Level ¤tLevel) const; + + //@} + + //! @name Definition methods + //@{ + + /*! @brief Append a new aggregation algorithm to list of aggregation + * algorithms */ + // void Append(const RCP > & alg); + + /*! @brief Remove all aggregation algorithms from list */ + // void ClearAggregationAlgorithms() { algos_.clear(); } + //@} + +private: + //! aggregation algorithms + // will be filled in Build routine + mutable std::vector>> + algos_; + + //! boolean flag: definition phase + //! if true, the aggregation algorithms still can be set and changed. + //! if false, no change in aggregation algorithms is possible any more + mutable bool bDefinitionPhase_; + +}; // class UncoupledAggregationFactory_kokkos + +} // namespace MueLu #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_SHORT #endif // MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp index 4f2480077529..a8ad646b45b3 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_kokkos_def.hpp @@ -49,8 +49,8 @@ #include #include -#include #include +#include #include #include "MueLu_UncoupledAggregationFactory_kokkos_decl.hpp" @@ -63,340 +63,398 @@ #include "MueLu_AggregationPhase2bAlgorithm_kokkos.hpp" #include "MueLu_AggregationPhase3Algorithm_kokkos.hpp" -#include "MueLu_Level.hpp" -#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Aggregates.hpp" +#include "MueLu_LWGraph_kokkos.hpp" +#include "MueLu_Level.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" -#include "KokkosGraph_Distance2ColorHandle.hpp" #include "KokkosGraph_Distance2Color.hpp" +#include "KokkosGraph_Distance2ColorHandle.hpp" #include "KokkosGraph_MIS2.hpp" namespace MueLu { - template - UncoupledAggregationFactory_kokkos::UncoupledAggregationFactory_kokkos() - : bDefinitionPhase_(true) - { } - - template - RCP UncoupledAggregationFactory_kokkos::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - // Aggregation parameters (used in aggregation algorithms) - // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters - - typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: max agg size"); - SET_VALID_ENTRY("aggregation: min agg size"); - SET_VALID_ENTRY("aggregation: max selected neighbors"); - SET_VALID_ENTRY("aggregation: ordering"); - validParamList->getEntry("aggregation: ordering").setValidator( - rcp(new validatorType(Teuchos::tuple("natural", "graph", "random"), "aggregation: ordering"))); - SET_VALID_ENTRY("aggregation: deterministic"); - SET_VALID_ENTRY("aggregation: coloring algorithm"); - SET_VALID_ENTRY("aggregation: enable phase 1"); - SET_VALID_ENTRY("aggregation: enable phase 2a"); - SET_VALID_ENTRY("aggregation: enable phase 2b"); - SET_VALID_ENTRY("aggregation: enable phase 3"); - SET_VALID_ENTRY("aggregation: match ML phase2a"); - SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); - SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); - SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); - SET_VALID_ENTRY("aggregation: allow user-specified singletons"); - SET_VALID_ENTRY("aggregation: phase 1 algorithm"); -#undef SET_VALID_ENTRY - - // general variables needed in AggregationFactory - validParamList->set< RCP >("Graph", null, "Generating factory of the graph"); - validParamList->set< RCP >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - - // special variables necessary for OnePtAggregationAlgorithm - validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); - validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates."); - //validParamList->set< RCP >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates."); - - return validParamList; - } - - template - void UncoupledAggregationFactory_kokkos::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "Graph"); - Input(currentLevel, "DofsPerNode"); - - const ParameterList& pL = GetParameterList(); - - // request special data necessary for OnePtAggregationAlgorithm - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - if (mapOnePtName.length() > 0) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); - } +template +UncoupledAggregationFactory_kokkos::UncoupledAggregationFactory_kokkos() + : bDefinitionPhase_(true) {} + +template +RCP +UncoupledAggregationFactory_kokkos::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + // Aggregation parameters (used in aggregation algorithms) + // TODO introduce local member function for each aggregation algorithm such + // that each aggregation algorithm can define its own parameters + + typedef Teuchos::StringToIntegralParameterEntryValidator validatorType; +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: max agg size"); + SET_VALID_ENTRY("aggregation: min agg size"); + SET_VALID_ENTRY("aggregation: max selected neighbors"); + SET_VALID_ENTRY("aggregation: ordering"); + validParamList->getEntry("aggregation: ordering") + .setValidator(rcp(new validatorType( + Teuchos::tuple("natural", "graph", "random"), + "aggregation: ordering"))); + SET_VALID_ENTRY("aggregation: deterministic"); + SET_VALID_ENTRY("aggregation: coloring algorithm"); + SET_VALID_ENTRY("aggregation: enable phase 1"); + SET_VALID_ENTRY("aggregation: enable phase 2a"); + SET_VALID_ENTRY("aggregation: enable phase 2b"); + SET_VALID_ENTRY("aggregation: enable phase 3"); + SET_VALID_ENTRY("aggregation: match ML phase2a"); + SET_VALID_ENTRY("aggregation: phase3 avoid singletons"); + SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors"); + SET_VALID_ENTRY("aggregation: preserve Dirichlet points"); + SET_VALID_ENTRY("aggregation: allow user-specified singletons"); + SET_VALID_ENTRY("aggregation: phase 1 algorithm"); +#undef SET_VALID_ENTRY + + // general variables needed in AggregationFactory + validParamList->set>( + "Graph", null, "Generating factory of the graph"); + validParamList->set>( + "DofsPerNode", null, + "Generating factory for variable \'DofsPerNode\', usually the same as " + "for \'Graph\'"); + + // special variables necessary for OnePtAggregationAlgorithm + validParamList->set( + "OnePt aggregate map name", "", + "Name of input map for single node aggregates. (default='')"); + validParamList->set( + "OnePt aggregate map factory", "", + "Generating factory of (DOF) map for single node aggregates."); + // validParamList->set< RCP >("OnePt aggregate map + // factory", NoFactory::getRCP(), "Generating factory of (DOF) map for + // single node aggregates."); + + return validParamList; +} + +template +void UncoupledAggregationFactory_kokkos::DeclareInput(Level ¤tLevel) + const { + Input(currentLevel, "Graph"); + Input(currentLevel, "DofsPerNode"); + + const ParameterList &pL = GetParameterList(); + + // request special data necessary for OnePtAggregationAlgorithm + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + if (mapOnePtName.length() > 0) { + std::string mapOnePtFactName = + pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + currentLevel.DeclareInput(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get()); } } - - template - void UncoupledAggregationFactory_kokkos:: - Build(Level ¤tLevel) const { - using execution_space = typename LWGraph_kokkos::execution_space; - using memory_space = typename LWGraph_kokkos::memory_space; - using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; - FactoryMonitor m(*this, "Build", currentLevel); - - ParameterList pL = GetParameterList(); - bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed - - if (pL.get("aggregation: max agg size") == -1) - pL.set("aggregation: max agg size", INT_MAX); - - // define aggregation algorithms - RCP graphFact = GetFactory("Graph"); - - // TODO Can we keep different aggregation algorithms over more Build calls? - algos_.clear(); - algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact))); - if (pL.get("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos (graphFact))); - if (pL.get("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos (graphFact))); - - std::string mapOnePtName = pL.get("OnePt aggregate map name"); - RCP OnePtMap = Teuchos::null; - if (mapOnePtName.length()) { - std::string mapOnePtFactName = pL.get("OnePt aggregate map factory"); - if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { - OnePtMap = currentLevel.Get >(mapOnePtName, NoFactory::get()); - } else { - RCP mapOnePtFact = GetFactory(mapOnePtFactName); - OnePtMap = currentLevel.Get >(mapOnePtName, mapOnePtFact.get()); - } +} + +template +void UncoupledAggregationFactory_kokkos< + LocalOrdinal, GlobalOrdinal, Node>::Build(Level ¤tLevel) const { + using execution_space = typename LWGraph_kokkos::execution_space; + using memory_space = typename LWGraph_kokkos::memory_space; + using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type; + FactoryMonitor m(*this, "Build", currentLevel); + + ParameterList pL = GetParameterList(); + bDefinitionPhase_ = false; // definition phase is finished, now all + // aggregation algorithm information is fixed + + if (pL.get("aggregation: max agg size") == -1) + pL.set("aggregation: max agg size", INT_MAX); + + // define aggregation algorithms + RCP graphFact = GetFactory("Graph"); + + // TODO Can we keep different aggregation algorithms over more Build calls? + algos_.clear(); + algos_.push_back( + rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: allow user-specified singletons") == true) + algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 1") == true) + algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 2a") == true) + algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 2b") == true) + algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos(graphFact))); + if (pL.get("aggregation: enable phase 3") == true) + algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos(graphFact))); + + std::string mapOnePtName = pL.get("OnePt aggregate map name"); + RCP OnePtMap = Teuchos::null; + if (mapOnePtName.length()) { + std::string mapOnePtFactName = + pL.get("OnePt aggregate map factory"); + if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") { + OnePtMap = currentLevel.Get>(mapOnePtName, NoFactory::get()); + } else { + RCP mapOnePtFact = GetFactory(mapOnePtFactName); + OnePtMap = currentLevel.Get>(mapOnePtName, mapOnePtFact.get()); } + } - RCP graph = Get< RCP >(currentLevel, "Graph"); - - // Build - RCP aggregates = rcp(new Aggregates(*graph)); - aggregates->setObjectLabel("UC"); - - const LO numRows = graph->GetNodeNumVertices(); - - // construct aggStat information - Kokkos::View aggStat(Kokkos::ViewAllocateWithoutInitializing("aggregation status"), - numRows); - Kokkos::deep_copy(aggStat, READY); + RCP graph = + Get>(currentLevel, "Graph"); + + // Build + RCP aggregates = rcp(new Aggregates(*graph)); + aggregates->setObjectLabel("UC"); + + const LO numRows = graph->GetNodeNumVertices(); + + // construct aggStat information + Kokkos::View aggStat( + Kokkos::ViewAllocateWithoutInitializing("aggregation status"), numRows); + Kokkos::deep_copy(aggStat, READY); + + // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map + // even if the map is correctly extracted from the graph, aggStat is + // now a Kokkos::View and filling it will + // require a parallel_for or to copy it to the Host which is not really + // good from a performance point of view. + // If dirichletBoundaryMap was an actual Xpetra::Map, one could call + // getLocalMap to have a Kokkos::View on the appropriate memory_space + // instead of an ArrayRCP. + { + typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = + graph->getLocalLWGraph().GetBoundaryNodeMap(); + Kokkos::parallel_for( + "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat", + Kokkos::RangePolicy(0, numRows), + KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { + if (dirichletBoundaryMap(nodeIdx) == true) { + aggStat(nodeIdx) = BOUNDARY; + } + }); + } - // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map - // even if the map is correctly extracted from the graph, aggStat is - // now a Kokkos::View and filling it will - // require a parallel_for or to copy it to the Host which is not really - // good from a performance point of view. - // If dirichletBoundaryMap was an actual Xpetra::Map, one could call - // getLocalMap to have a Kokkos::View on the appropriate memory_space - // instead of an ArrayRCP. - { - typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = graph->getLocalLWGraph().GetBoundaryNodeMap(); - Kokkos::parallel_for("MueLu - UncoupledAggregation: tagging boundary nodes in aggStat", - Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) { - if (dirichletBoundaryMap(nodeIdx) == true) { - aggStat(nodeIdx) = BOUNDARY; - } - }); + LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); + GO indexBase = graph->GetDomainMap()->getIndexBase(); + if (OnePtMap != Teuchos::null) { + typename Kokkos::View::HostMirror + aggStatHost = Kokkos::create_mirror_view(aggStat); + Kokkos::deep_copy(aggStatHost, aggStat); + + for (LO i = 0; i < numRows; i++) { + // reconstruct global row id (FIXME only works for contiguous maps) + GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * + nDofsPerNode + + indexBase; + + for (LO kr = 0; kr < nDofsPerNode; kr++) + if (OnePtMap->isNodeGlobalElement(grid + kr)) + aggStatHost(i) = ONEPT; } - LO nDofsPerNode = Get(currentLevel, "DofsPerNode"); - GO indexBase = graph->GetDomainMap()->getIndexBase(); - if (OnePtMap != Teuchos::null) { - typename Kokkos::View::HostMirror aggStatHost - = Kokkos::create_mirror_view(aggStat); - Kokkos::deep_copy(aggStatHost, aggStat); - - for (LO i = 0; i < numRows; i++) { - // reconstruct global row id (FIXME only works for contiguous maps) - GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; - - for (LO kr = 0; kr < nDofsPerNode; kr++) - if (OnePtMap->isNodeGlobalElement(grid + kr)) - aggStatHost(i) = ONEPT; - } + Kokkos::deep_copy(aggStat, aggStatHost); + } - Kokkos::deep_copy(aggStat, aggStatHost); + const RCP> comm = graph->GetComm(); + GO numGlobalRows = 0; + if (IsPrint(Statistics1)) + MueLu_sumAll(comm, as(numRows), numGlobalRows); + + LO numNonAggregatedNodes = numRows; + std::string aggAlgo = pL.get("aggregation: coloring algorithm"); + if (aggAlgo == "mis2 coarsening" || aggAlgo == "mis2 aggregation") { + SubFactoryMonitor sfm(*this, "Algo \"MIS2\"", currentLevel); + using graph_t = typename LWGraph_kokkos::local_graph_type; + using device_t = typename graph_t::device_type; + using exec_space = typename device_t::execution_space; + using rowmap_t = typename graph_t::row_map_type; + using colinds_t = typename graph_t::entries_type; + using lno_t = typename colinds_t::non_const_value_type; + rowmap_t aRowptrs = graph->getLocalLWGraph().getRowPtrs(); + colinds_t aColinds = graph->getLocalLWGraph().getEntries(); + lno_t numAggs = 0; + typename colinds_t::non_const_type labels; + + if (aggAlgo == "mis2 coarsening") { + if (IsPrint(Statistics1)) + GetOStream(Statistics1) << " algorithm: MIS-2 coarsening" << std::endl; + labels = KokkosGraph::graph_mis2_coarsen( + aRowptrs, aColinds, numAggs); + } else if (aggAlgo == "mis2 aggregation") { + if (IsPrint(Statistics1)) + GetOStream(Statistics1) + << " algorithm: MIS-2 aggregation" << std::endl; + labels = KokkosGraph::graph_mis2_aggregate( + aRowptrs, aColinds, numAggs); } - - const RCP > comm = graph->GetComm(); - GO numGlobalRows = 0; - if (IsPrint(Statistics1)) - MueLu_sumAll(comm, as(numRows), numGlobalRows); - - LO numNonAggregatedNodes = numRows; - std::string aggAlgo = pL.get("aggregation: coloring algorithm"); - if(aggAlgo == "mis2 coarsening" || aggAlgo == "mis2 aggregation") - { - SubFactoryMonitor sfm(*this, "Algo \"MIS2\"", currentLevel); - using graph_t = typename LWGraph_kokkos::local_graph_type; - using device_t = typename graph_t::device_type; - using exec_space = typename device_t::execution_space; - using rowmap_t = typename graph_t::row_map_type; - using colinds_t = typename graph_t::entries_type; - using lno_t = typename colinds_t::non_const_value_type; - rowmap_t aRowptrs = graph->getLocalLWGraph().getRowPtrs(); - colinds_t aColinds = graph->getLocalLWGraph().getEntries(); - lno_t numAggs = 0; - typename colinds_t::non_const_type labels; - - if(aggAlgo == "mis2 coarsening") - { - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 coarsening" << std::endl; - labels = KokkosGraph::graph_mis2_coarsen(aRowptrs, aColinds, numAggs); - } - else if(aggAlgo == "mis2 aggregation") - { - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 aggregation" << std::endl; - labels = KokkosGraph::graph_mis2_aggregate(aRowptrs, aColinds, numAggs); - } - auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite); - auto procWinner = aggregates->GetProcWinner() ->getDeviceLocalView(Xpetra::Access::OverwriteAll); - int rank = comm->getRank(); - Kokkos::parallel_for(Kokkos::RangePolicy(0, numRows), - KOKKOS_LAMBDA(lno_t i) - { + auto vertex2AggId = aggregates->GetVertex2AggId()->getDeviceLocalView( + Xpetra::Access::ReadWrite); + auto procWinner = aggregates->GetProcWinner()->getDeviceLocalView( + Xpetra::Access::OverwriteAll); + int rank = comm->getRank(); + Kokkos::parallel_for( + Kokkos::RangePolicy(0, numRows), KOKKOS_LAMBDA(lno_t i) { procWinner(i, 0) = rank; - if(aggStat(i) == READY) - { + if (aggStat(i) == READY) { aggStat(i) = AGGREGATED; vertex2AggId(i, 0) = labels(i); } }); - numNonAggregatedNodes = 0; - aggregates->SetNumAggregates(numAggs); - } - else + numNonAggregatedNodes = 0; + aggregates->SetNumAggregates(numAggs); + } else { { - { - SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel); - - // LBV on Sept 06 2019: the note below is a little worrisome, - // can we guarantee that MueLu is never used on a non-symmetric - // graph? - // note: just using colinds_view in place of scalar_view_t type - // (it won't be used at all by symbolic SPGEMM) - using graph_t = typename LWGraph_kokkos::local_graph_type; - using KernelHandle = KokkosKernels::Experimental:: - KokkosKernelsHandle; - KernelHandle kh; - //leave gc algorithm choice as the default - kh.create_distance2_graph_coloring_handle(); - - // get the distance-2 graph coloring handle - auto coloringHandle = kh.get_distance2_graph_coloring_handle(); - - // Set the distance-2 graph coloring algorithm to use. - // Options: - // COLORING_D2_DEFAULT - Let the kernel handle pick the variation - // COLORING_D2_SERIAL - Use the legacy serial-only implementation - // COLORING_D2_VB - Use the parallel vertex based direct method - // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array - // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT - // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest) - if(pL.get("aggregation: deterministic") == true) { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl; - } else if(aggAlgo == "serial") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl; - } else if(aggAlgo == "default") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_DEFAULT ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl; - } else if(aggAlgo == "vertex based") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl; - } else if(aggAlgo == "vertex based bit set") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl; - } else if(aggAlgo == "edge filtering") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT_EF ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl; - } else if(aggAlgo == "net based bit set") { - coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_NB_BIT ); - if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl; - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering") - } - - //Create device views for graph rowptrs/colinds - typename graph_t::row_map_type aRowptrs = graph->getLocalLWGraph().getRowPtrs(); - typename graph_t::entries_type aColinds = graph->getLocalLWGraph().getEntries(); - - //run d2 graph coloring - //graph is symmetric so row map/entries and col map/entries are the same - KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds); - - // extract the colors and store them in the aggregates - aggregates->SetGraphColors(coloringHandle->get_vertex_colors()); - aggregates->SetGraphNumColors(static_cast(coloringHandle->get_num_colors())); - - - //clean up coloring handle - kh.destroy_distance2_graph_coloring_handle(); + SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel); + + // LBV on Sept 06 2019: the note below is a little worrisome, + // can we guarantee that MueLu is never used on a non-symmetric + // graph? + // note: just using colinds_view in place of scalar_view_t type + // (it won't be used at all by symbolic SPGEMM) + using graph_t = typename LWGraph_kokkos::local_graph_type; + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< + typename graph_t::row_map_type::value_type, + typename graph_t::entries_type::value_type, + typename graph_t::entries_type::value_type, + typename graph_t::device_type::execution_space, + typename graph_t::device_type::memory_space, + typename graph_t::device_type::memory_space>; + KernelHandle kh; + // leave gc algorithm choice as the default + kh.create_distance2_graph_coloring_handle(); + + // get the distance-2 graph coloring handle + auto coloringHandle = kh.get_distance2_graph_coloring_handle(); + + // Set the distance-2 graph coloring algorithm to use. + // Options: + // COLORING_D2_DEFAULT - Let the kernel handle pick the + // variation COLORING_D2_SERIAL - Use the legacy serial-only + // implementation COLORING_D2_VB - Use the parallel vertex + // based direct method COLORING_D2_VB_BIT - Same as VB but + // using the bitvector forbidden array COLORING_D2_VB_BIT_EF - + // Add experimental edge-filtering to VB_BIT COLORING_D2_NB_BIT - + // Net-based coloring (generally the fastest) + if (pL.get("aggregation: deterministic") == true) { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL); + if (IsPrint(Statistics1)) + GetOStream(Statistics1) << " algorithm: serial" << std::endl; + } else if (aggAlgo == "serial") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL); + if (IsPrint(Statistics1)) + GetOStream(Statistics1) << " algorithm: serial" << std::endl; + } else if (aggAlgo == "default") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_DEFAULT); + if (IsPrint(Statistics1)) + GetOStream(Statistics1) << " algorithm: default" << std::endl; + } else if (aggAlgo == "vertex based") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB); + if (IsPrint(Statistics1)) + GetOStream(Statistics1) << " algorithm: vertex based" << std::endl; + } else if (aggAlgo == "vertex based bit set") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT); + if (IsPrint(Statistics1)) + GetOStream(Statistics1) + << " algorithm: vertex based bit set" << std::endl; + } else if (aggAlgo == "edge filtering") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT_EF); + if (IsPrint(Statistics1)) + GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl; + } else if (aggAlgo == "net based bit set") { + coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_NB_BIT); + if (IsPrint(Statistics1)) + GetOStream(Statistics1) + << " algorithm: net based bit set" << std::endl; + } else { + TEUCHOS_TEST_FOR_EXCEPTION( + true, std::invalid_argument, + "Unrecognized distance 2 coloring algorithm, valid options are: " + "serial, default, matrix squared, vertex based, vertex based bit " + "set, edge filtering") } + // Create device views for graph rowptrs/colinds + typename graph_t::row_map_type aRowptrs = + graph->getLocalLWGraph().getRowPtrs(); + typename graph_t::entries_type aColinds = + graph->getLocalLWGraph().getEntries(); + + // run d2 graph coloring + // graph is symmetric so row map/entries and col map/entries are the same + KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, + aColinds); + + // extract the colors and store them in the aggregates + aggregates->SetGraphColors(coloringHandle->get_vertex_colors()); + aggregates->SetGraphNumColors( + static_cast(coloringHandle->get_num_colors())); + + // clean up coloring handle + kh.destroy_distance2_graph_coloring_handle(); + } + + if (IsPrint(Statistics1)) { + GetOStream(Statistics1) + << " num colors: " << aggregates->GetGraphNumColors() << std::endl; + } + GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; + for (size_t a = 0; a < algos_.size(); a++) { + std::string phase = algos_[a]->description(); + SubFactoryMonitor sfm2(*this, "Algo \"" + phase + "\"", currentLevel); + + int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); + algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, + numNonAggregatedNodes); + algos_[a]->SetProcRankVerbose(oldRank); + if (IsPrint(Statistics1)) { - GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl; - } - GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; - for (size_t a = 0; a < algos_.size(); a++) { - std::string phase = algos_[a]->description(); - SubFactoryMonitor sfm2(*this, "Algo \"" + phase + "\"", currentLevel); - - int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose()); - algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); - algos_[a]->SetProcRankVerbose(oldRank); - - if (IsPrint(Statistics1)) { - GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; - GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; - MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); - MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); - - double aggPercent = 100*as(numGlobalAggregated)/as(numGlobalRows); - if (aggPercent > 99.99 && aggPercent < 100.00) { - // Due to round off (for instance, for 140465733/140466897), we could - // get 100.00% display even if there are some remaining nodes. This - // is bad from the users point of view. It is much better to change - // it to display 99.99%. - aggPercent = 99.99; - } - GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed - << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" - << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" - << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; - numGlobalAggregatedPrev = numGlobalAggregated; - numGlobalAggsPrev = numGlobalAggs; + GO numLocalAggregated = numRows - numNonAggregatedNodes, + numGlobalAggregated = 0; + GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; + MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated); + MueLu_sumAll(comm, numLocalAggs, numGlobalAggs); + + double aggPercent = + 100 * as(numGlobalAggregated) / as(numGlobalRows); + if (aggPercent > 99.99 && aggPercent < 100.00) { + // Due to round off (for instance, for 140465733/140466897), we could + // get 100.00% display even if there are some remaining nodes. This + // is bad from the users point of view. It is much better to change + // it to display 99.99%. + aggPercent = 99.99; } + GetOStream(Statistics1) + << " aggregated : " + << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " + << std::fixed << std::setprecision(2) << numGlobalAggregated << "/" + << numGlobalRows << " [" << aggPercent << "%] (total)\n" + << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" + << " aggregates : " << numGlobalAggs - numGlobalAggsPrev + << " (phase), " << numGlobalAggs << " (total)" << std::endl; + numGlobalAggregatedPrev = numGlobalAggregated; + numGlobalAggsPrev = numGlobalAggs; } } + } - TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); - - aggregates->AggregatesCrossProcessors(false); - aggregates->ComputeAggregateSizes(true/*forceRecompute*/); + TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, + "MueLu::UncoupledAggregationFactory::Build: " + "Leftover nodes found! Error!"); - Set(currentLevel, "Aggregates", aggregates); + aggregates->AggregatesCrossProcessors(false); + aggregates->ComputeAggregateSizes(true /*forceRecompute*/); - } + Set(currentLevel, "Aggregates", aggregates); +} -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp index 17c7a3297fd6..e823489a5a99 100644 --- a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_decl.hpp @@ -46,23 +46,21 @@ #ifndef MUELU_USERAGGREGATIONFACTORY_DECL_HPP_ #define MUELU_USERAGGREGATIONFACTORY_DECL_HPP_ - -#include #include +#include #include "MueLu_ConfigDefs.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_UserAggregationFactory_fwd.hpp" -#include "MueLu_Level_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_Level_fwd.hpp" namespace MueLu { -template +template class UserAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_USERAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" @@ -72,10 +70,10 @@ class UserAggregationFactory : public SingleLevelFactoryBase { //@{ //! Constructor. - UserAggregationFactory() { }; + UserAggregationFactory(){}; //! Destructor. - virtual ~UserAggregationFactory() { } + virtual ~UserAggregationFactory() {} RCP GetValidParameterList() const; @@ -104,7 +102,7 @@ class UserAggregationFactory : public SingleLevelFactoryBase { private: }; // class UserAggregationFactory -} +} // namespace MueLu #define MUELU_USERAGGREGATIONFACTORY_SHORT #endif /* MUELU_USERAGGREGATIONFACTORY_DECL_HPP_ */ diff --git a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp index bc0a418a46c7..20f5913207b7 100644 --- a/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/UserAggregation/MueLu_UserAggregationFactory_def.hpp @@ -47,100 +47,117 @@ #define MUELU_USERAGGREGATIONFACTORY_DEF_HPP_ #include -#include +#include #include +#include #include -#include #include "MueLu_UserAggregationFactory_decl.hpp" -#include "MueLu_Level.hpp" #include "MueLu_Aggregates.hpp" +#include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - RCP UserAggregationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - // input parameters - validParamList->set("filePrefix", "", "The data is read from files of this name: _."); - validParamList->set("fileExt", "", "The data is read from files of this name: _."); +template +RCP +UserAggregationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + // input parameters + validParamList->set("filePrefix", "", + "The data is read from files of this name: " + "_."); + validParamList->set("fileExt", "", + "The data is read from files of this name: " + "_."); + + return validParamList; +} + +template +void UserAggregationFactory::DeclareInput( + Level & /* currentLevel */) const {} + +/** + * The function reads aggregate information from a file. + * The file structure is the following: + * * line 1 : + * * line 2+: ... + */ +template +void UserAggregationFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + const ParameterList &pL = GetParameterList(); + + RCP> comm = Teuchos::DefaultComm::getComm(); + const int myRank = comm->getRank(); + + std::string fileName = + pL.get("filePrefix") + toString(currentLevel.GetLevelID()) + + "_" + toString(myRank) + "." + pL.get("fileExt"); + std::ifstream ifs(fileName.c_str()); + TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, + "Cannot read data from \"" << fileName << "\""); + + LO numVertices, numAggregates; + ifs >> numVertices; + TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, + "Cannot read data from \"" << fileName << "\""); + ifs >> numAggregates; + TEUCHOS_TEST_FOR_EXCEPTION(numVertices <= 0, Exceptions::InvalidArgument, + "Number of vertices must be > 0"); + TEUCHOS_TEST_FOR_EXCEPTION(numAggregates <= 0, Exceptions::InvalidArgument, + "Number of aggregates must be > 0"); + + Xpetra::UnderlyingLib lib = currentLevel.lib(); + const int indexBase = 0; + RCP map = MapFactory::Build(lib, numVertices, indexBase, comm); + + RCP aggregates = rcp(new Aggregates(map)); + aggregates->setObjectLabel("User"); + + aggregates->SetNumAggregates(numAggregates); + + Teuchos::ArrayRCP vertex2AggId = + aggregates->GetVertex2AggId()->getDataNonConst(0); + Teuchos::ArrayRCP procWinner = + aggregates->GetProcWinner()->getDataNonConst(0); + + for (LO i = 0; i < numAggregates; i++) { + int aggSize = 0; + ifs >> aggSize; + + std::vector list(aggSize); + for (int k = 0; k < aggSize; k++) { + // FIXME: File contains GIDs, we need LIDs + // for now, works on a single processor + ifs >> list[k]; + } - return validParamList; - } + // Mark first node as root node for the aggregate + aggregates->SetIsRoot(list[0]); - template - void UserAggregationFactory::DeclareInput(Level& /* currentLevel */) const { } - - /** - * The function reads aggregate information from a file. - * The file structure is the following: - * * line 1 : - * * line 2+: ... - */ - template - void UserAggregationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - const ParameterList& pL = GetParameterList(); - - RCP< const Teuchos::Comm > comm = Teuchos::DefaultComm::getComm(); - const int myRank = comm->getRank(); - - std::string fileName = pL.get("filePrefix") + toString(currentLevel.GetLevelID()) + "_" + toString(myRank) + "." + pL.get("fileExt"); - std::ifstream ifs(fileName.c_str()); - TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, "Cannot read data from \"" << fileName << "\""); - - LO numVertices, numAggregates; - ifs >> numVertices; - TEUCHOS_TEST_FOR_EXCEPTION(!ifs.good(), Exceptions::RuntimeError, "Cannot read data from \"" << fileName << "\""); - ifs >> numAggregates; - TEUCHOS_TEST_FOR_EXCEPTION(numVertices <= 0, Exceptions::InvalidArgument, "Number of vertices must be > 0"); - TEUCHOS_TEST_FOR_EXCEPTION(numAggregates <= 0, Exceptions::InvalidArgument, "Number of aggregates must be > 0"); - - Xpetra::UnderlyingLib lib = currentLevel.lib(); - const int indexBase = 0; - RCP map = MapFactory::Build(lib, numVertices, indexBase, comm); - - RCP aggregates = rcp(new Aggregates(map)); - aggregates->setObjectLabel("User"); - - aggregates->SetNumAggregates(numAggregates); - - Teuchos::ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); - - for (LO i = 0; i < numAggregates; i++) { - int aggSize = 0; - ifs >> aggSize; - - std::vector list(aggSize); - for (int k = 0; k < aggSize; k++) { - // FIXME: File contains GIDs, we need LIDs - // for now, works on a single processor - ifs >> list[k]; - } - - // Mark first node as root node for the aggregate - aggregates->SetIsRoot(list[0]); - - // Fill vertex2AggId and procWinner structure with information - for (int k = 0; k < aggSize; k++) { - vertex2AggId[list[k]] = i; - procWinner [list[k]] = myRank; - } + // Fill vertex2AggId and procWinner structure with information + for (int k = 0; k < aggSize; k++) { + vertex2AggId[list[k]] = i; + procWinner[list[k]] = myRank; } + } - // FIXME: do the proper check whether aggregates cross interprocessor boundary - aggregates->AggregatesCrossProcessors(false); + // FIXME: do the proper check whether aggregates cross interprocessor boundary + aggregates->AggregatesCrossProcessors(false); - Set(currentLevel, "Aggregates", aggregates); + Set(currentLevel, "Aggregates", aggregates); - GetOStream(Statistics0) << aggregates->description() << std::endl; - } + GetOStream(Statistics0) << aggregates->description() << std::endl; +} -} //namespace MueLu +} // namespace MueLu #endif /* MUELU_USERAGGREGATIONFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Headers/MueLu.hpp b/packages/muelu/src/Headers/MueLu.hpp index d7af318165c4..c890d787db85 100644 --- a/packages/muelu/src/Headers/MueLu.hpp +++ b/packages/muelu/src/Headers/MueLu.hpp @@ -51,17 +51,17 @@ #include "MueLu_ConfigDefs.hpp" // Teuchos -#include -#include #include -#include #include +#include +#include +#include // Xpetra -#include -#include #include +#include #include +#include #include // MueLu diff --git a/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp b/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp index 6d65bf28102c..fec324e07cec 100644 --- a/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp +++ b/packages/muelu/src/Headers/MueLu_ConfigDefs.hpp @@ -55,8 +55,8 @@ // Memory management #include -#include #include +#include #include // Verbose levels @@ -70,11 +70,12 @@ #include // Special macro for exception testing -// MUELU_TEST_FOR_EXCEPTION is only active if MueLu is configured with MueLu_ENABLE_DEBUG:BOOL=ON -// If you want an exception test both in the release and debug version of MueLu you still can use directly +// MUELU_TEST_FOR_EXCEPTION is only active if MueLu is configured with +// MueLu_ENABLE_DEBUG:BOOL=ON If you want an exception test both in the release +// and debug version of MueLu you still can use directly // TEUCHOS_TEST_FOR_EXCEPTION #ifdef HAVE_MUELU_DEBUG -#define MUELU_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg) \ +#define MUELU_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg) \ TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg); #else #define MUELU_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg) @@ -83,35 +84,36 @@ //! Namespace for MueLu classes and methods namespace MueLu { - // import Teuchos memory management classes into MueLu - using Teuchos::arcp; - using Teuchos::arcpFromArrayView; - using Teuchos::arcp_reinterpret_cast; - using Teuchos::Array; - using Teuchos::ArrayRCP; - using Teuchos::ArrayView; - using Teuchos::as; - using Teuchos::null; - using Teuchos::ParameterList; - using Teuchos::rcp; - using Teuchos::RCP; - using Teuchos::rcp_const_cast; - using Teuchos::rcp_dynamic_cast; - using Teuchos::rcpFromRef; - using Teuchos::rcp_implicit_cast; - using Teuchos::rcp_static_cast; +// import Teuchos memory management classes into MueLu +using Teuchos::arcp; +using Teuchos::arcp_reinterpret_cast; +using Teuchos::arcpFromArrayView; +using Teuchos::Array; +using Teuchos::ArrayRCP; +using Teuchos::ArrayView; +using Teuchos::as; +using Teuchos::null; +using Teuchos::ParameterList; +using Teuchos::rcp; +using Teuchos::RCP; +using Teuchos::rcp_const_cast; +using Teuchos::rcp_dynamic_cast; +using Teuchos::rcp_implicit_cast; +using Teuchos::rcp_static_cast; +using Teuchos::rcpFromRef; - // verbose levels - using Teuchos::VERB_DEFAULT; - using Teuchos::VERB_NONE; - using Teuchos::VERB_LOW; - using Teuchos::VERB_MEDIUM; - using Teuchos::VERB_HIGH; - using Teuchos::VERB_EXTREME; +// verbose levels +using Teuchos::VERB_DEFAULT; +using Teuchos::VERB_EXTREME; +using Teuchos::VERB_HIGH; +using Teuchos::VERB_LOW; +using Teuchos::VERB_MEDIUM; +using Teuchos::VERB_NONE; -} +} // namespace MueLu -// This include file defines macros to avoid warnings under CUDA. See github issue #1133. +// This include file defines macros to avoid warnings under CUDA. See github +// issue #1133. #include "Teuchos_CompilerCodeTweakMacros.hpp" #endif /* MUELU_CONFIGDEFS_H */ diff --git a/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp b/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp index fa9745afbe37..f23e676b38ae 100644 --- a/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp +++ b/packages/muelu/src/Headers/MueLu_Details_DefaultTypes.hpp @@ -47,27 +47,26 @@ #ifndef MUELU_USEDEFAULTTYPES_HPP #define MUELU_USEDEFAULTTYPES_HPP -#include #include "MueLu_config.hpp" +#include #include -namespace MueLu -{ +namespace MueLu { - typedef Tpetra::Details::DefaultTypes::scalar_type DefaultScalar; +typedef Tpetra::Details::DefaultTypes::scalar_type DefaultScalar; - typedef int DefaultLocalOrdinal; +typedef int DefaultLocalOrdinal; - #if defined HAVE_MUELU_DEFAULT_GO_LONG - typedef long DefaultGlobalOrdinal; - #elif defined HAVE_MUELU_DEFAULT_GO_LONGLONG - typedef long long DefaultGlobalOrdinal; - #else - typedef int DefaultGlobalOrdinal; - #endif +#if defined HAVE_MUELU_DEFAULT_GO_LONG +typedef long DefaultGlobalOrdinal; +#elif defined HAVE_MUELU_DEFAULT_GO_LONGLONG +typedef long long DefaultGlobalOrdinal; +#else +typedef int DefaultGlobalOrdinal; +#endif - typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType DefaultNode; -} +typedef Tpetra::KokkosClassic::DefaultNode::DefaultNodeType DefaultNode; +} // namespace MueLu #endif diff --git a/packages/muelu/src/Headers/MueLu_Types.hpp b/packages/muelu/src/Headers/MueLu_Types.hpp index 6ea5d31711d7..190af8898830 100644 --- a/packages/muelu/src/Headers/MueLu_Types.hpp +++ b/packages/muelu/src/Headers/MueLu_Types.hpp @@ -49,55 +49,52 @@ #include "MueLu_ConfigDefs.hpp" namespace MueLu { - enum CycleType { - VCYCLE, - WCYCLE - }; +enum CycleType { VCYCLE, WCYCLE }; - enum PreOrPost { - PRE = 0x1, - POST = 0x2, - BOTH = 0x3 - }; +enum PreOrPost { PRE = 0x1, POST = 0x2, BOTH = 0x3 }; - // In the algorithm, aggStat[] = READY/NOTSEL/SELECTED indicates whether a node has been aggregated - enum NodeState { - READY = 1, // indicates that a node is available to be - // selected as a root node of an aggregate +// In the algorithm, aggStat[] = READY/NOTSEL/SELECTED indicates whether a node +// has been aggregated +enum NodeState { + READY = 1, // indicates that a node is available to be + // selected as a root node of an aggregate - NOTSEL = 2, // indicates that a node has been rejected as a root node. - // This could perhaps be because if this node had been - // selected a small aggregate would have resulted - // This is Phase 1 specific + NOTSEL = 2, // indicates that a node has been rejected as a root node. + // This could perhaps be because if this node had been + // selected a small aggregate would have resulted + // This is Phase 1 specific - AGGREGATED = 3, // indicates that a node has been assigned - // to an aggregate + AGGREGATED = 3, // indicates that a node has been assigned + // to an aggregate - ONEPT = 4, // indicates that a node shall be preserved over - // all multigrid levels as 1 point aggregate + ONEPT = 4, // indicates that a node shall be preserved over + // all multigrid levels as 1 point aggregate - IGNORED = 5, // indicates that the node is removed from consideration, - // and is not aggregated + IGNORED = 5, // indicates that the node is removed from consideration, + // and is not aggregated - BOUNDARY = 6, // node is a Dirichlet node - // During aggregation, it is transformed either to AGGREGATED - // or to IGNORED - INTERFACE = 7 // node is chosen as root node on an interface where coordinated - // coarsening across the interface is required. - }; + BOUNDARY = 6, // node is a Dirichlet node + // During aggregation, it is transformed either to AGGREGATED + // or to IGNORED + INTERFACE = 7 // node is chosen as root node on an interface where coordinated + // coarsening across the interface is required. +}; - // This is use by the structured aggregation index manager to keep track of the underlying mesh - // layout. - enum IndexingType { - UNCOUPLED = 1, // indicates that the underlying mesh is treated independently from rank to rank +// This is use by the structured aggregation index manager to keep track of the +// underlying mesh layout. +enum IndexingType { + UNCOUPLED = 1, // indicates that the underlying mesh is treated independently + // from rank to rank - LOCALLEXI = 2, // local lexicographic indexing of the mesh, this is similar to uncoupled but - // extra data is used to compute indices accross ranks + LOCALLEXI = + 2, // local lexicographic indexing of the mesh, this is similar to + // uncoupled but extra data is used to compute indices accross ranks - GLOBALLEXI = 3 // global lexicographic indexing of the mesh means that the mesh is ordered - // lexicographically accorss and subsequently split among ranks. - }; + GLOBALLEXI = + 3 // global lexicographic indexing of the mesh means that the mesh is + // ordered lexicographically accorss and subsequently split among ranks. +}; -} +} // namespace MueLu -#endif //ifndef MUELU_TYPES_HPP +#endif // ifndef MUELU_TYPES_HPP diff --git a/packages/muelu/src/Headers/MueLu_UseShortNames.hpp b/packages/muelu/src/Headers/MueLu_UseShortNames.hpp index 7a25b031132e..ec23f598d1cd 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNames.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNames.hpp @@ -49,8 +49,9 @@ // 1) As an header of a user program. // In this case, this file must be included *after* other headers // and the types Scalar, LocalOrdinal, GlobalOrdinal, Node must be defined. -// Note also that there is no #ifndef/#endif to protect again the multiple inclusion of this file. -// User should create is own header file including this one: +// Note also that there is no #ifndef/#endif to protect again the multiple +// inclusion of this file. User should create is own header file including +// this one: // // Example: // #ifndef MY_HEADER @@ -62,14 +63,19 @@ // 2) Inside of MueLu to enhance the readability. // // template ::scalar_type, -// class LocalOrdinal = typename Xpetra::MultiVector::local_ordinal_type, -// class GlobalOrdinal = typename Xpetra::MultiVector::global_ordinal_type, -// class Node = typename Xpetra::MultiVector::node_type> -// class TpetraMultiVector : public virtual Xpetra::MultiVector { +// class LocalOrdinal = typename +// Xpetra::MultiVector::local_ordinal_type, class +// GlobalOrdinal = typename Xpetra::MultiVector::global_ordinal_type, class Node = typename +// Xpetra::MultiVector::node_type> +// class TpetraMultiVector : public virtual +// Xpetra::MultiVector { // // #include // -// myMethod(RCP & map) { [...] } // instead of myMethod(RCP > &map) +// myMethod(RCP & map) { [...] } // instead of myMethod(RCP > &map) // // [...] // @@ -81,4 +87,5 @@ //! @file MueLu_UseShortNamesOrdinal.hpp -//TODO / NOTE: This file should not be included at the global scope (to avoid name collision) +// TODO / NOTE: This file should not be included at the global scope (to avoid +// name collision) diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp index 9bc47bfe9b36..3000c3f53df2 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesOrdinal.hpp @@ -1,126 +1,174 @@ -// Type definitions for templated classes (generally graph-related) that do not require a scalar. +// Type definitions for templated classes (generally graph-related) that do not +// require a scalar. #include #ifdef MUELU_AGGREGATES_SHORT -using Aggregates [[maybe_unused]] = MueLu::Aggregates; +using Aggregates [[maybe_unused]] = + MueLu::Aggregates; #endif #ifdef MUELU_AGGREGATIONPHASE1ALGORITHM_SHORT -using AggregationPhase1Algorithm [[maybe_unused]] = MueLu::AggregationPhase1Algorithm; +using AggregationPhase1Algorithm [[maybe_unused]] = + MueLu::AggregationPhase1Algorithm; #endif #ifdef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_SHORT -using AggregationPhase1Algorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase1Algorithm_kokkos; +using AggregationPhase1Algorithm_kokkos [[maybe_unused]] = + MueLu::AggregationPhase1Algorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONPHASE2AALGORITHM_SHORT -using AggregationPhase2aAlgorithm [[maybe_unused]] = MueLu::AggregationPhase2aAlgorithm; +using AggregationPhase2aAlgorithm [[maybe_unused]] = + MueLu::AggregationPhase2aAlgorithm; #endif #ifdef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_SHORT -using AggregationPhase2aAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase2aAlgorithm_kokkos; +using AggregationPhase2aAlgorithm_kokkos [[maybe_unused]] = + MueLu::AggregationPhase2aAlgorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONPHASE2BALGORITHM_SHORT -using AggregationPhase2bAlgorithm [[maybe_unused]] = MueLu::AggregationPhase2bAlgorithm; +using AggregationPhase2bAlgorithm [[maybe_unused]] = + MueLu::AggregationPhase2bAlgorithm; #endif #ifdef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_SHORT -using AggregationPhase2bAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase2bAlgorithm_kokkos; +using AggregationPhase2bAlgorithm_kokkos [[maybe_unused]] = + MueLu::AggregationPhase2bAlgorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONPHASE3ALGORITHM_SHORT -using AggregationPhase3Algorithm [[maybe_unused]] = MueLu::AggregationPhase3Algorithm; +using AggregationPhase3Algorithm [[maybe_unused]] = + MueLu::AggregationPhase3Algorithm; #endif #ifdef MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_SHORT -using AggregationPhase3Algorithm_kokkos [[maybe_unused]] = MueLu::AggregationPhase3Algorithm_kokkos; +using AggregationPhase3Algorithm_kokkos [[maybe_unused]] = + MueLu::AggregationPhase3Algorithm_kokkos; #endif #ifdef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_SHORT -using AggregationStructuredAlgorithm [[maybe_unused]] = MueLu::AggregationStructuredAlgorithm; +using AggregationStructuredAlgorithm [[maybe_unused]] = + MueLu::AggregationStructuredAlgorithm; #endif #ifdef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_SHORT -using AggregationStructuredAlgorithm_kokkos [[maybe_unused]] = MueLu::AggregationStructuredAlgorithm_kokkos; +using AggregationStructuredAlgorithm_kokkos [[maybe_unused]] = + MueLu::AggregationStructuredAlgorithm_kokkos; #endif #ifdef MUELU_AMALGAMATIONINFO_SHORT -using AmalgamationInfo [[maybe_unused]] = MueLu::AmalgamationInfo; +using AmalgamationInfo [[maybe_unused]] = + MueLu::AmalgamationInfo; #endif #ifdef MUELU_GLOBALLEXICOGRAPHICINDEXMANAGER_SHORT -using GlobalLexicographicIndexManager [[maybe_unused]] = MueLu::GlobalLexicographicIndexManager; +using GlobalLexicographicIndexManager [[maybe_unused]] = + MueLu::GlobalLexicographicIndexManager; #endif #ifdef MUELU_GRAPH_SHORT -using Graph [[maybe_unused]] = MueLu::Graph; +using Graph [[maybe_unused]] = MueLu::Graph; #endif #ifdef MUELU_GRAPHBASE_SHORT -using GraphBase [[maybe_unused]] = MueLu::GraphBase; +using GraphBase [[maybe_unused]] = + MueLu::GraphBase; #endif #ifdef MUELU_HYBRIDAGGREGATIONFACTORY_SHORT -using HybridAggregationFactory [[maybe_unused]] = MueLu::HybridAggregationFactory; +using HybridAggregationFactory [[maybe_unused]] = + MueLu::HybridAggregationFactory; #endif #ifdef MUELU_INDEXMANAGER_SHORT -using IndexManager [[maybe_unused]] = MueLu::IndexManager; +using IndexManager [[maybe_unused]] = + MueLu::IndexManager; #endif #ifdef MUELU_INDEXMANAGER_KOKKOS_SHORT -using IndexManager_kokkos [[maybe_unused]] = MueLu::IndexManager_kokkos; +using IndexManager_kokkos [[maybe_unused]] = + MueLu::IndexManager_kokkos; #endif #ifdef MUELU_INTERFACEAGGREGATIONALGORITHM_SHORT -using InterfaceAggregationAlgorithm [[maybe_unused]] = MueLu::InterfaceAggregationAlgorithm; +using InterfaceAggregationAlgorithm [[maybe_unused]] = + MueLu::InterfaceAggregationAlgorithm; #endif #ifdef MUELU_INTERFACEMAPPINGTRANSFERFACTORY_SHORT -using InterfaceMappingTransferFactory [[maybe_unused]] = MueLu::InterfaceMappingTransferFactory; +using InterfaceMappingTransferFactory [[maybe_unused]] = + MueLu::InterfaceMappingTransferFactory; #endif #ifdef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_SHORT -using IsolatedNodeAggregationAlgorithm [[maybe_unused]] = MueLu::IsolatedNodeAggregationAlgorithm; +using IsolatedNodeAggregationAlgorithm [[maybe_unused]] = + MueLu::IsolatedNodeAggregationAlgorithm; #endif #ifdef MUELU_ISOLATEDNODEAGGREGATIONALGORITHM_KOKKOS_SHORT -using IsolatedNodeAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::IsolatedNodeAggregationAlgorithm_kokkos; +using IsolatedNodeAggregationAlgorithm_kokkos [[maybe_unused]] = + MueLu::IsolatedNodeAggregationAlgorithm_kokkos; #endif #ifdef MUELU_ISORROPIAINTERFACE_SHORT -using IsorropiaInterface [[maybe_unused]] = MueLu::IsorropiaInterface; +using IsorropiaInterface [[maybe_unused]] = + MueLu::IsorropiaInterface; #endif #ifdef MUELU_LWGRAPH_SHORT -using LWGraph [[maybe_unused]] = MueLu::LWGraph; +using LWGraph [[maybe_unused]] = + MueLu::LWGraph; #endif #ifdef MUELU_LWGRAPH_KOKKOS_SHORT -using LWGraph_kokkos [[maybe_unused]] = MueLu::LWGraph_kokkos; +using LWGraph_kokkos [[maybe_unused]] = + MueLu::LWGraph_kokkos; #endif #ifdef MUELU_LOCALLWGRAPH_KOKKOS_SHORT -using LocalLWGraph_kokkos [[maybe_unused]] = MueLu::LocalLWGraph_kokkos; +using LocalLWGraph_kokkos [[maybe_unused]] = + MueLu::LocalLWGraph_kokkos; #endif #ifdef MUELU_LOCALLEXICOGRAPHICINDEXMANAGER_SHORT -using LocalLexicographicIndexManager [[maybe_unused]] = MueLu::LocalLexicographicIndexManager; +using LocalLexicographicIndexManager [[maybe_unused]] = + MueLu::LocalLexicographicIndexManager; #endif #ifdef MUELU_LOCALORDINALTRANSFERFACTORY_SHORT -using LocalOrdinalTransferFactory [[maybe_unused]] = MueLu::LocalOrdinalTransferFactory; +using LocalOrdinalTransferFactory [[maybe_unused]] = + MueLu::LocalOrdinalTransferFactory; #endif #ifdef MUELU_ONEPTAGGREGATIONALGORITHM_SHORT -using OnePtAggregationAlgorithm [[maybe_unused]] = MueLu::OnePtAggregationAlgorithm; +using OnePtAggregationAlgorithm [[maybe_unused]] = + MueLu::OnePtAggregationAlgorithm; #endif #ifdef MUELU_ONEPTAGGREGATIONALGORITHM_KOKKOS_SHORT -using OnePtAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::OnePtAggregationAlgorithm_kokkos; +using OnePtAggregationAlgorithm_kokkos [[maybe_unused]] = + MueLu::OnePtAggregationAlgorithm_kokkos; #endif #ifdef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_SHORT -using PreserveDirichletAggregationAlgorithm [[maybe_unused]] = MueLu::PreserveDirichletAggregationAlgorithm; +using PreserveDirichletAggregationAlgorithm [[maybe_unused]] = + MueLu::PreserveDirichletAggregationAlgorithm; #endif #ifdef MUELU_PRESERVEDIRICHLETAGGREGATIONALGORITHM_KOKKOS_SHORT -using PreserveDirichletAggregationAlgorithm_kokkos [[maybe_unused]] = MueLu::PreserveDirichletAggregationAlgorithm_kokkos; +using PreserveDirichletAggregationAlgorithm_kokkos [[maybe_unused]] = + MueLu::PreserveDirichletAggregationAlgorithm_kokkos; #endif #ifdef MUELU_PRFACTORY_SHORT -using PRFactory [[maybe_unused]] = MueLu::PRFactory; +using PRFactory [[maybe_unused]] = + MueLu::PRFactory; #endif #ifdef MUELU_REBALANCEMAPFACTORY_SHORT -using RebalanceMapFactory [[maybe_unused]] = MueLu::RebalanceMapFactory; +using RebalanceMapFactory [[maybe_unused]] = + MueLu::RebalanceMapFactory; #endif #ifdef MUELU_REPARTITIONINTERFACE_SHORT -using RepartitionInterface [[maybe_unused]] = MueLu::RepartitionInterface; +using RepartitionInterface [[maybe_unused]] = + MueLu::RepartitionInterface; #endif #ifdef MUELU_STRUCTUREDAGGREGATIONFACTORY_KOKKOS_SHORT -using StructuredAggregationFactory_kokkos [[maybe_unused]] = MueLu::StructuredAggregationFactory_kokkos; +using StructuredAggregationFactory_kokkos [[maybe_unused]] = + MueLu::StructuredAggregationFactory_kokkos; #endif #ifdef MUELU_UNCOUPLEDAGGREGATIONFACTORY_SHORT -using UncoupledAggregationFactory [[maybe_unused]] = MueLu::UncoupledAggregationFactory; +using UncoupledAggregationFactory [[maybe_unused]] = + MueLu::UncoupledAggregationFactory; #endif #ifdef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_SHORT -using UncoupledAggregationFactory_kokkos [[maybe_unused]] = MueLu::UncoupledAggregationFactory_kokkos; +using UncoupledAggregationFactory_kokkos [[maybe_unused]] = + MueLu::UncoupledAggregationFactory_kokkos; #endif #ifdef MUELU_UNCOUPLEDINDEXMANAGER_SHORT -using UncoupledIndexManager [[maybe_unused]] = MueLu::UncoupledIndexManager; +using UncoupledIndexManager [[maybe_unused]] = + MueLu::UncoupledIndexManager; #endif #ifdef MUELU_USERAGGREGATIONFACTORY_SHORT -using UserAggregationFactory [[maybe_unused]] = MueLu::UserAggregationFactory; +using UserAggregationFactory [[maybe_unused]] = + MueLu::UserAggregationFactory; #endif #ifdef MUELU_FACTORY_SHORT using Factory [[maybe_unused]] = MueLu::Factory; diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp index faeb451b7026..3b9b13ba25df 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp @@ -1,451 +1,631 @@ -// New definition of types using the types Scalar, LocalOrdinal, GlobalOrdinal, Node of the current context. +// New definition of types using the types Scalar, LocalOrdinal, GlobalOrdinal, +// Node of the current context. #include #ifdef MUELU_AGGREGATIONEXPORTFACTORY_SHORT -using AggregationExportFactory [[maybe_unused]] = MueLu::AggregationExportFactory; +using AggregationExportFactory [[maybe_unused]] = + MueLu::AggregationExportFactory; #endif #ifdef MUELU_AGGREGATEQUALITYESTIMATEFACTORY_SHORT -using AggregateQualityEstimateFactory [[maybe_unused]] = MueLu::AggregateQualityEstimateFactory; +using AggregateQualityEstimateFactory [[maybe_unused]] = + MueLu::AggregateQualityEstimateFactory; #endif #ifdef MUELU_AMALGAMATIONFACTORY_SHORT -using AmalgamationFactory [[maybe_unused]] = MueLu::AmalgamationFactory; +using AmalgamationFactory [[maybe_unused]] = + MueLu::AmalgamationFactory; #endif #ifdef MUELU_AMESOS2SMOOTHER_SHORT -using Amesos2Smoother [[maybe_unused]] = MueLu::Amesos2Smoother; +using Amesos2Smoother [[maybe_unused]] = + MueLu::Amesos2Smoother; #endif #ifdef MUELU_AMGXOPERATOR_SHORT -using AMGXOperator [[maybe_unused]] = MueLu::AMGXOperator; +using AMGXOperator [[maybe_unused]] = + MueLu::AMGXOperator; #endif #ifdef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_SHORT -using AlgebraicPermutationStrategy [[maybe_unused]] = MueLu::AlgebraicPermutationStrategy; +using AlgebraicPermutationStrategy [[maybe_unused]] = + MueLu::AlgebraicPermutationStrategy; #endif #ifdef MUELU_BELOSSMOOTHER_SHORT -using BelosSmoother [[maybe_unused]] = MueLu::BelosSmoother; +using BelosSmoother [[maybe_unused]] = + MueLu::BelosSmoother; #endif #ifdef MUELU_BLACKBOXPFACTORY_SHORT -using BlackBoxPFactory [[maybe_unused]] = MueLu::BlackBoxPFactory; +using BlackBoxPFactory [[maybe_unused]] = + MueLu::BlackBoxPFactory; #endif #ifdef MUELU_BLOCKEDCOARSEMAPFACTORY_SHORT -using BlockedCoarseMapFactory [[maybe_unused]] = MueLu::BlockedCoarseMapFactory; +using BlockedCoarseMapFactory [[maybe_unused]] = + MueLu::BlockedCoarseMapFactory; #endif #ifdef MUELU_BLOCKEDCOORDINATESTRANSFERFACTORY_SHORT -using BlockedCoordinatesTransferFactory [[maybe_unused]] = MueLu::BlockedCoordinatesTransferFactory; +using BlockedCoordinatesTransferFactory [[maybe_unused]] = + MueLu::BlockedCoordinatesTransferFactory; #endif #ifdef MUELU_BLOCKEDDIRECTSOLVER_SHORT -using BlockedDirectSolver [[maybe_unused]] = MueLu::BlockedDirectSolver; +using BlockedDirectSolver [[maybe_unused]] = + MueLu::BlockedDirectSolver; #endif #ifdef MUELU_BLOCKEDGAUSSSEIDELSMOOTHER_SHORT -using BlockedGaussSeidelSmoother [[maybe_unused]] = MueLu::BlockedGaussSeidelSmoother; +using BlockedGaussSeidelSmoother [[maybe_unused]] = + MueLu::BlockedGaussSeidelSmoother; #endif #ifdef MUELU_BLOCKEDJACOBISMOOTHER_SHORT -using BlockedJacobiSmoother [[maybe_unused]] = MueLu::BlockedJacobiSmoother; +using BlockedJacobiSmoother [[maybe_unused]] = + MueLu::BlockedJacobiSmoother; #endif #ifdef MUELU_BLOCKEDPFACTORY_SHORT -using BlockedPFactory [[maybe_unused]] = MueLu::BlockedPFactory; +using BlockedPFactory [[maybe_unused]] = + MueLu::BlockedPFactory; #endif #ifdef MUELU_BLOCKEDRAPFACTORY_SHORT -using BlockedRAPFactory [[maybe_unused]] = MueLu::BlockedRAPFactory; +using BlockedRAPFactory [[maybe_unused]] = + MueLu::BlockedRAPFactory; #endif #ifdef MUELU_BRICKAGGREGATIONFACTORY_SHORT -using BrickAggregationFactory [[maybe_unused]] = MueLu::BrickAggregationFactory; +using BrickAggregationFactory [[maybe_unused]] = + MueLu::BrickAggregationFactory; #endif #ifdef MUELU_BRAESSSARAZINSMOOTHER_SHORT -using BraessSarazinSmoother [[maybe_unused]] = MueLu::BraessSarazinSmoother; +using BraessSarazinSmoother [[maybe_unused]] = + MueLu::BraessSarazinSmoother; #endif #ifdef MUELU_CGSOLVER_SHORT -using CGSolver [[maybe_unused]] = MueLu::CGSolver; +using CGSolver [[maybe_unused]] = + MueLu::CGSolver; #endif #ifdef MUELU_CLASSICALMAPFACTORY_SHORT -using ClassicalMapFactory [[maybe_unused]] = MueLu::ClassicalMapFactory; +using ClassicalMapFactory [[maybe_unused]] = + MueLu::ClassicalMapFactory; #endif #ifdef MUELU_CLASSICALPFACTORY_SHORT -using ClassicalPFactory [[maybe_unused]] = MueLu::ClassicalPFactory; +using ClassicalPFactory [[maybe_unused]] = + MueLu::ClassicalPFactory; #endif #ifdef MUELU_CLONEREPARTITIONINTERFACE_SHORT -using CloneRepartitionInterface [[maybe_unused]] = MueLu::CloneRepartitionInterface; +using CloneRepartitionInterface [[maybe_unused]] = + MueLu::CloneRepartitionInterface; #endif #ifdef MUELU_COALESCEDROPFACTORY_SHORT -using CoalesceDropFactory [[maybe_unused]] = MueLu::CoalesceDropFactory; +using CoalesceDropFactory [[maybe_unused]] = + MueLu::CoalesceDropFactory; #endif #ifdef MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT -using CoalesceDropFactory_kokkos [[maybe_unused]] = MueLu::CoalesceDropFactory_kokkos; +using CoalesceDropFactory_kokkos [[maybe_unused]] = + MueLu::CoalesceDropFactory_kokkos; #endif #ifdef MUELU_COARSEMAPFACTORY_SHORT -using CoarseMapFactory [[maybe_unused]] = MueLu::CoarseMapFactory; +using CoarseMapFactory [[maybe_unused]] = + MueLu::CoarseMapFactory; #endif #ifdef MUELU_COARSENINGVISUALIZATIONFACTORY_SHORT -using CoarseningVisualizationFactory [[maybe_unused]] = MueLu::CoarseningVisualizationFactory; +using CoarseningVisualizationFactory [[maybe_unused]] = + MueLu::CoarseningVisualizationFactory; #endif #ifdef MUELU_CONSTRAINT_SHORT -using Constraint [[maybe_unused]] = MueLu::Constraint; +using Constraint [[maybe_unused]] = + MueLu::Constraint; #endif #ifdef MUELU_CONSTRAINTFACTORY_SHORT -using ConstraintFactory [[maybe_unused]] = MueLu::ConstraintFactory; +using ConstraintFactory [[maybe_unused]] = + MueLu::ConstraintFactory; #endif #ifdef MUELU_COORDINATESTRANSFERFACTORY_SHORT -using CoordinatesTransferFactory [[maybe_unused]] = MueLu::CoordinatesTransferFactory; +using CoordinatesTransferFactory [[maybe_unused]] = + MueLu::CoordinatesTransferFactory; #endif #ifdef MUELU_COUPLEDRBMFACTORY_SHORT -using CoupledRBMFactory [[maybe_unused]] = MueLu::CoupledRBMFactory; +using CoupledRBMFactory [[maybe_unused]] = + MueLu::CoupledRBMFactory; #endif #ifdef MUELU_DEMOFACTORY_SHORT -using DemoFactory [[maybe_unused]] = MueLu::DemoFactory; +using DemoFactory [[maybe_unused]] = + MueLu::DemoFactory; #endif #ifdef MUELU_DIRECTSOLVER_SHORT -using DirectSolver [[maybe_unused]] = MueLu::DirectSolver; +using DirectSolver [[maybe_unused]] = + MueLu::DirectSolver; #endif #ifdef MUELU_DROPNEGATIVEENTRIESFACTORY_SHORT -using DropNegativeEntriesFactory [[maybe_unused]] = MueLu::DropNegativeEntriesFactory; +using DropNegativeEntriesFactory [[maybe_unused]] = + MueLu::DropNegativeEntriesFactory; #endif #ifdef MUELU_EMINPFACTORY_SHORT -using EminPFactory [[maybe_unused]] = MueLu::EminPFactory; +using EminPFactory [[maybe_unused]] = + MueLu::EminPFactory; #endif #ifdef MUELU_FACADECLASSFACTORY_SHORT -using FacadeClassFactory [[maybe_unused]] = MueLu::FacadeClassFactory; +using FacadeClassFactory [[maybe_unused]] = + MueLu::FacadeClassFactory; #endif #ifdef MUELU_FACTORYMANAGER_SHORT -using FactoryManager [[maybe_unused]] = MueLu::FactoryManager; +using FactoryManager [[maybe_unused]] = + MueLu::FactoryManager; #endif #ifdef MUELU_FAKESMOOTHERPROTOTYPE_SHORT -using FakeSmootherPrototype [[maybe_unused]] = MueLu::FakeSmootherPrototype; +using FakeSmootherPrototype [[maybe_unused]] = + MueLu::FakeSmootherPrototype; #endif #ifdef MUELU_FILTEREDAFACTORY_SHORT -using FilteredAFactory [[maybe_unused]] = MueLu::FilteredAFactory; +using FilteredAFactory [[maybe_unused]] = + MueLu::FilteredAFactory; #endif #ifdef MUELU_FINELEVELINPUTDATAFACTORY_SHORT -using FineLevelInputDataFactory [[maybe_unused]] = MueLu::FineLevelInputDataFactory; +using FineLevelInputDataFactory [[maybe_unused]] = + MueLu::FineLevelInputDataFactory; #endif #ifdef MUELU_GENERALGEOMETRICPFACTORY_SHORT -using GeneralGeometricPFactory [[maybe_unused]] = MueLu::GeneralGeometricPFactory; +using GeneralGeometricPFactory [[maybe_unused]] = + MueLu::GeneralGeometricPFactory; #endif #ifdef MUELU_GENERICRFACTORY_SHORT -using GenericRFactory [[maybe_unused]] = MueLu::GenericRFactory; +using GenericRFactory [[maybe_unused]] = + MueLu::GenericRFactory; #endif #ifdef MUELU_GEOMETRICINTERPOLATIONPFACTORY_SHORT -using GeometricInterpolationPFactory [[maybe_unused]] = MueLu::GeometricInterpolationPFactory; +using GeometricInterpolationPFactory [[maybe_unused]] = + MueLu::GeometricInterpolationPFactory; #endif #ifdef MUELU_GEOMETRICINTERPOLATIONPFACTORY_KOKKOS_SHORT -using GeometricInterpolationPFactory_kokkos [[maybe_unused]] = MueLu::GeometricInterpolationPFactory_kokkos; +using GeometricInterpolationPFactory_kokkos [[maybe_unused]] = + MueLu::GeometricInterpolationPFactory_kokkos; #endif #ifdef MUELU_GMRESSOLVER_SHORT -using GMRESSolver [[maybe_unused]] = MueLu::GMRESSolver; +using GMRESSolver [[maybe_unused]] = + MueLu::GMRESSolver; #endif #ifdef MUELU_HIERARCHY_SHORT -using Hierarchy [[maybe_unused]] = MueLu::Hierarchy; +using Hierarchy [[maybe_unused]] = + MueLu::Hierarchy; #endif #ifdef MUELU_HIERARCHYMANAGER_SHORT -using HierarchyManager [[maybe_unused]] = MueLu::HierarchyManager; +using HierarchyManager [[maybe_unused]] = + MueLu::HierarchyManager; #endif #ifdef MUELU_HIERARCHYFACTORY_SHORT -using HierarchyFactory [[maybe_unused]] = MueLu::HierarchyFactory; +using HierarchyFactory [[maybe_unused]] = + MueLu::HierarchyFactory; #endif #ifdef MUELU_HIERARCHYUTILS_SHORT -using HierarchyUtils [[maybe_unused]] = MueLu::HierarchyUtils; +using HierarchyUtils [[maybe_unused]] = + MueLu::HierarchyUtils; #endif #ifdef MUELU_INTERFACEAGGREGATIONFACTORY_SHORT -using InterfaceAggregationFactory [[maybe_unused]] = MueLu::InterfaceAggregationFactory; +using InterfaceAggregationFactory [[maybe_unused]] = + MueLu::InterfaceAggregationFactory; #endif #ifdef MUELU_IFPACK2SMOOTHER_SHORT -using Ifpack2Smoother [[maybe_unused]] = MueLu::Ifpack2Smoother; +using Ifpack2Smoother [[maybe_unused]] = + MueLu::Ifpack2Smoother; #endif #ifdef MUELU_INDEFBLOCKEDDIAGONALSMOOTHER_SHORT -using IndefBlockedDiagonalSmoother [[maybe_unused]] = MueLu::IndefBlockedDiagonalSmoother; +using IndefBlockedDiagonalSmoother [[maybe_unused]] = + MueLu::IndefBlockedDiagonalSmoother; #endif #ifdef MUELU_INITIALBLOCKNUMBERFACTORY_SHORT -using InitialBlockNumberFactory [[maybe_unused]] = MueLu::InitialBlockNumberFactory; +using InitialBlockNumberFactory [[maybe_unused]] = + MueLu::InitialBlockNumberFactory; #endif #ifdef MUELU_INTREPIDPCOARSENFACTORY_SHORT -using IntrepidPCoarsenFactory [[maybe_unused]] = MueLu::IntrepidPCoarsenFactory; +using IntrepidPCoarsenFactory [[maybe_unused]] = + MueLu::IntrepidPCoarsenFactory; #endif #ifdef MUELU_INVERSEAPPROXIMATIONFACTORY_SHORT -using InverseApproximationFactory [[maybe_unused]] = MueLu::InverseApproximationFactory; +using InverseApproximationFactory [[maybe_unused]] = + MueLu::InverseApproximationFactory; #endif #ifdef MUELU_LINEDETECTIONFACTORY_SHORT -using LineDetectionFactory [[maybe_unused]] = MueLu::LineDetectionFactory; +using LineDetectionFactory [[maybe_unused]] = + MueLu::LineDetectionFactory; #endif #ifdef MUELU_LOCALPERMUTATIONSTRATEGY_SHORT -using LocalPermutationStrategy [[maybe_unused]] = MueLu::LocalPermutationStrategy; +using LocalPermutationStrategy [[maybe_unused]] = + MueLu::LocalPermutationStrategy; #endif #ifdef MUELU_LOWPRECISIONFACTORY_SHORT -using LowPrecisionFactory [[maybe_unused]] = MueLu::LowPrecisionFactory; +using LowPrecisionFactory [[maybe_unused]] = + MueLu::LowPrecisionFactory; #endif #ifdef MUELU_MAPTRANSFERFACTORY_SHORT -using MapTransferFactory [[maybe_unused]] = MueLu::MapTransferFactory; +using MapTransferFactory [[maybe_unused]] = + MueLu::MapTransferFactory; #endif #ifdef MUELU_MATRIXANALYSISFACTORY_SHORT -using MatrixAnalysisFactory [[maybe_unused]] = MueLu::MatrixAnalysisFactory; +using MatrixAnalysisFactory [[maybe_unused]] = + MueLu::MatrixAnalysisFactory; #endif #ifdef MUELU_MERGEDBLOCKEDMATRIXFACTORY_SHORT -using MergedBlockedMatrixFactory [[maybe_unused]] = MueLu::MergedBlockedMatrixFactory; +using MergedBlockedMatrixFactory [[maybe_unused]] = + MueLu::MergedBlockedMatrixFactory; #endif #ifdef MUELU_MERGEDSMOOTHER_SHORT -using MergedSmoother [[maybe_unused]] = MueLu::MergedSmoother; +using MergedSmoother [[maybe_unused]] = + MueLu::MergedSmoother; #endif #ifdef MUELU_MULTIVECTORTRANSFERFACTORY_SHORT -using MultiVectorTransferFactory [[maybe_unused]] = MueLu::MultiVectorTransferFactory; +using MultiVectorTransferFactory [[maybe_unused]] = + MueLu::MultiVectorTransferFactory; #endif #ifdef MUELU_NOTAYAGGREGATIONFACTORY_SHORT -using NotayAggregationFactory [[maybe_unused]] = MueLu::NotayAggregationFactory; +using NotayAggregationFactory [[maybe_unused]] = + MueLu::NotayAggregationFactory; #endif #ifdef MUELU_NULLSPACEFACTORY_SHORT -using NullspaceFactory [[maybe_unused]] = MueLu::NullspaceFactory; +using NullspaceFactory [[maybe_unused]] = + MueLu::NullspaceFactory; #endif #ifdef MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -using NullspaceFactory_kokkos [[maybe_unused]] = MueLu::NullspaceFactory_kokkos; +using NullspaceFactory_kokkos [[maybe_unused]] = + MueLu::NullspaceFactory_kokkos; #endif #ifdef MUELU_NULLSPACEPRESMOOTHFACTORY_SHORT -using NullspacePresmoothFactory [[maybe_unused]] = MueLu::NullspacePresmoothFactory; +using NullspacePresmoothFactory [[maybe_unused]] = + MueLu::NullspacePresmoothFactory; #endif #ifdef MUELU_PATTERNFACTORY_SHORT -using PatternFactory [[maybe_unused]] = MueLu::PatternFactory; +using PatternFactory [[maybe_unused]] = + MueLu::PatternFactory; #endif #ifdef MUELU_PERFUTILS_SHORT -using PerfUtils [[maybe_unused]] = MueLu::PerfUtils; +using PerfUtils [[maybe_unused]] = + MueLu::PerfUtils; #endif #ifdef MUELU_PERFMODELS_SHORT -using PerfModels [[maybe_unused]] = MueLu::PerfModels; +using PerfModels [[maybe_unused]] = + MueLu::PerfModels; #endif #ifdef MUELU_PERMUTATIONFACTORY_SHORT -using PermutationFactory [[maybe_unused]] = MueLu::PermutationFactory; +using PermutationFactory [[maybe_unused]] = + MueLu::PermutationFactory; #endif #ifdef MUELU_PERMUTINGSMOOTHER_SHORT -using PermutingSmoother [[maybe_unused]] = MueLu::PermutingSmoother; +using PermutingSmoother [[maybe_unused]] = + MueLu::PermutingSmoother; #endif #ifdef MUELU_PGPFACTORY_SHORT -using PgPFactory [[maybe_unused]] = MueLu::PgPFactory; +using PgPFactory [[maybe_unused]] = + MueLu::PgPFactory; #endif #ifdef MUELU_PREDROPFUNCTIONBASECLASS_SHORT -using PreDropFunctionBaseClass [[maybe_unused]] = MueLu::PreDropFunctionBaseClass; +using PreDropFunctionBaseClass [[maybe_unused]] = + MueLu::PreDropFunctionBaseClass; #endif #ifdef MUELU_PREDROPFUNCTIONCONSTVAL_SHORT -using PreDropFunctionConstVal [[maybe_unused]] = MueLu::PreDropFunctionConstVal; +using PreDropFunctionConstVal [[maybe_unused]] = + MueLu::PreDropFunctionConstVal; #endif #ifdef MUELU_PROJECTORSMOOTHER_SHORT -using ProjectorSmoother [[maybe_unused]] = MueLu::ProjectorSmoother; +using ProjectorSmoother [[maybe_unused]] = + MueLu::ProjectorSmoother; #endif #ifdef MUELU_RAPFACTORY_SHORT -using RAPFactory [[maybe_unused]] = MueLu::RAPFactory; +using RAPFactory [[maybe_unused]] = + MueLu::RAPFactory; #endif #ifdef MUELU_RAPSHIFTFACTORY_SHORT -using RAPShiftFactory [[maybe_unused]] = MueLu::RAPShiftFactory; +using RAPShiftFactory [[maybe_unused]] = + MueLu::RAPShiftFactory; #endif #ifdef MUELU_REBALANCEACFACTORY_SHORT -using RebalanceAcFactory [[maybe_unused]] = MueLu::RebalanceAcFactory; +using RebalanceAcFactory [[maybe_unused]] = + MueLu::RebalanceAcFactory; #endif #ifdef MUELU_REBALANCEBLOCKACFACTORY_SHORT -using RebalanceBlockAcFactory [[maybe_unused]] = MueLu::RebalanceBlockAcFactory; +using RebalanceBlockAcFactory [[maybe_unused]] = + MueLu::RebalanceBlockAcFactory; #endif #ifdef MUELU_REBALANCEBLOCKINTERPOLATIONFACTORY_SHORT -using RebalanceBlockInterpolationFactory [[maybe_unused]] = MueLu::RebalanceBlockInterpolationFactory; +using RebalanceBlockInterpolationFactory [[maybe_unused]] = + MueLu::RebalanceBlockInterpolationFactory; #endif #ifdef MUELU_REBALANCEBLOCKRESTRICTIONFACTORY_SHORT -using RebalanceBlockRestrictionFactory [[maybe_unused]] = MueLu::RebalanceBlockRestrictionFactory; +using RebalanceBlockRestrictionFactory [[maybe_unused]] = + MueLu::RebalanceBlockRestrictionFactory; #endif #ifdef MUELU_REBALANCETRANSFERFACTORY_SHORT -using RebalanceTransferFactory [[maybe_unused]] = MueLu::RebalanceTransferFactory; +using RebalanceTransferFactory [[maybe_unused]] = + MueLu::RebalanceTransferFactory; #endif #ifdef MUELU_REFMAXWELLSMOOTHER_SHORT -using RefMaxwellSmoother [[maybe_unused]] = MueLu::RefMaxwellSmoother; +using RefMaxwellSmoother [[maybe_unused]] = + MueLu::RefMaxwellSmoother; #endif #ifdef MUELU_REGIONRFACTORY_SHORT -using RegionRFactory [[maybe_unused]] = MueLu::RegionRFactory; +using RegionRFactory [[maybe_unused]] = + MueLu::RegionRFactory; #endif #ifdef MUELU_REGIONRFACTORY_KOKKOS_SHORT -using RegionRFactory_kokkos [[maybe_unused]] = MueLu::RegionRFactory_kokkos; +using RegionRFactory_kokkos [[maybe_unused]] = + MueLu::RegionRFactory_kokkos; #endif #ifdef MUELU_REITZINGERPFACTORY_SHORT -using ReitzingerPFactory [[maybe_unused]] = MueLu::ReitzingerPFactory; +using ReitzingerPFactory [[maybe_unused]] = + MueLu::ReitzingerPFactory; #endif #ifdef MUELU_REORDERBLOCKAFACTORY_SHORT -using ReorderBlockAFactory [[maybe_unused]] = MueLu::ReorderBlockAFactory; +using ReorderBlockAFactory [[maybe_unused]] = + MueLu::ReorderBlockAFactory; #endif #ifdef MUELU_REPARTITIONFACTORY_SHORT -using RepartitionFactory [[maybe_unused]] = MueLu::RepartitionFactory; +using RepartitionFactory [[maybe_unused]] = + MueLu::RepartitionFactory; #endif #ifdef MUELU_REPARTITIONBLOCKDIAGONALFACTORY_SHORT -using RepartitionBlockDiagonalFactory [[maybe_unused]] = MueLu::RepartitionBlockDiagonalFactory; +using RepartitionBlockDiagonalFactory [[maybe_unused]] = + MueLu::RepartitionBlockDiagonalFactory; #endif #ifdef MUELU_REPARTITIONHEURISTICFACTORY_SHORT -using RepartitionHeuristicFactory [[maybe_unused]] = MueLu::RepartitionHeuristicFactory; +using RepartitionHeuristicFactory [[maybe_unused]] = + MueLu::RepartitionHeuristicFactory; #endif #ifdef MUELU_COMBINEPFACTORY_SHORT -using CombinePFactory [[maybe_unused]] = MueLu::CombinePFactory; +using CombinePFactory [[maybe_unused]] = + MueLu::CombinePFactory; #endif #ifdef MUELU_REPLICATEPFACTORY_SHORT -using ReplicatePFactory [[maybe_unused]] = MueLu::ReplicatePFactory; +using ReplicatePFactory [[maybe_unused]] = + MueLu::ReplicatePFactory; #endif #ifdef MUELU_RIGIDBODYMODEFACTORY_SHORT -using RigidBodyModeFactory [[maybe_unused]] = MueLu::RigidBodyModeFactory; +using RigidBodyModeFactory [[maybe_unused]] = + MueLu::RigidBodyModeFactory; #endif #ifdef MUELU_SAPFACTORY_SHORT -using SaPFactory [[maybe_unused]] = MueLu::SaPFactory; +using SaPFactory [[maybe_unused]] = + MueLu::SaPFactory; #endif #ifdef MUELU_SAPFACTORY_KOKKOS_SHORT -using SaPFactory_kokkos [[maybe_unused]] = MueLu::SaPFactory_kokkos; +using SaPFactory_kokkos [[maybe_unused]] = + MueLu::SaPFactory_kokkos; #endif #ifdef MUELU_SCALEDNULLSPACEFACTORY_SHORT -using ScaledNullspaceFactory [[maybe_unused]] = MueLu::ScaledNullspaceFactory; +using ScaledNullspaceFactory [[maybe_unused]] = + MueLu::ScaledNullspaceFactory; #endif #ifdef MUELU_SCHURCOMPLEMENTFACTORY_SHORT -using SchurComplementFactory [[maybe_unused]] = MueLu::SchurComplementFactory; +using SchurComplementFactory [[maybe_unused]] = + MueLu::SchurComplementFactory; #endif #ifdef MUELU_SEGREGATEDAFACTORY_SHORT -using SegregatedAFactory [[maybe_unused]] = MueLu::SegregatedAFactory; +using SegregatedAFactory [[maybe_unused]] = + MueLu::SegregatedAFactory; #endif #ifdef MUELU_SHIFTEDLAPLACIAN_SHORT -using ShiftedLaplacian [[maybe_unused]] = MueLu::ShiftedLaplacian; +using ShiftedLaplacian [[maybe_unused]] = + MueLu::ShiftedLaplacian; #endif #ifdef MUELU_SHIFTEDLAPLACIANOPERATOR_SHORT -using ShiftedLaplacianOperator [[maybe_unused]] = MueLu::ShiftedLaplacianOperator; +using ShiftedLaplacianOperator [[maybe_unused]] = + MueLu::ShiftedLaplacianOperator; #endif #ifdef MUELU_SIMPLESMOOTHER_SHORT -using SimpleSmoother [[maybe_unused]] = MueLu::SimpleSmoother; +using SimpleSmoother [[maybe_unused]] = + MueLu::SimpleSmoother; #endif #ifdef MUELU_SMOOTHER_SHORT -using Smoother [[maybe_unused]] = MueLu::Smoother; +using Smoother [[maybe_unused]] = + MueLu::Smoother; #endif #ifdef MUELU_SMOOTHERBASE_SHORT -using SmootherBase [[maybe_unused]] = MueLu::SmootherBase; +using SmootherBase [[maybe_unused]] = + MueLu::SmootherBase; #endif #ifdef MUELU_SMOOTHERFACTORY_SHORT -using SmootherFactory [[maybe_unused]] = MueLu::SmootherFactory; +using SmootherFactory [[maybe_unused]] = + MueLu::SmootherFactory; #endif #ifdef MUELU_SMOOTHERPROTOTYPE_SHORT -using SmootherPrototype [[maybe_unused]] = MueLu::SmootherPrototype; +using SmootherPrototype [[maybe_unused]] = + MueLu::SmootherPrototype; #endif #ifdef MUELU_SMOOVECCOALESCEDROPFACTORY_SHORT -using SmooVecCoalesceDropFactory [[maybe_unused]] = MueLu::SmooVecCoalesceDropFactory; +using SmooVecCoalesceDropFactory [[maybe_unused]] = + MueLu::SmooVecCoalesceDropFactory; #endif #ifdef MUELU_SOLVERBASE_SHORT -using SolverBase [[maybe_unused]] = MueLu::SolverBase; +using SolverBase [[maybe_unused]] = + MueLu::SolverBase; #endif #ifdef MUELU_STEEPESTDESCENTSOLVER_SHORT -using SteepestDescentSolver [[maybe_unused]] = MueLu::SteepestDescentSolver; +using SteepestDescentSolver [[maybe_unused]] = + MueLu::SteepestDescentSolver; #endif #ifdef MUELU_STRATIMIKOSSMOOTHER_SHORT -using StratimikosSmoother [[maybe_unused]] = MueLu::StratimikosSmoother; +using StratimikosSmoother [[maybe_unused]] = + MueLu::StratimikosSmoother; #endif #ifdef MUELU_STRUCTUREDAGGREGATIONFACTORY_SHORT -using StructuredAggregationFactory [[maybe_unused]] = MueLu::StructuredAggregationFactory; +using StructuredAggregationFactory [[maybe_unused]] = + MueLu::StructuredAggregationFactory; #endif #ifdef MUELU_STRUCTUREDLINEDETECTIONFACTORY_SHORT -using StructuredLineDetectionFactory [[maybe_unused]] = MueLu::StructuredLineDetectionFactory; +using StructuredLineDetectionFactory [[maybe_unused]] = + MueLu::StructuredLineDetectionFactory; #endif #ifdef MUELU_SUBBLOCKAFACTORY_SHORT -using SubBlockAFactory [[maybe_unused]] = MueLu::SubBlockAFactory; +using SubBlockAFactory [[maybe_unused]] = + MueLu::SubBlockAFactory; #endif #ifdef MUELU_TEKOSMOOTHER_SHORT -using TekoSmoother [[maybe_unused]] = MueLu::TekoSmoother; +using TekoSmoother [[maybe_unused]] = + MueLu::TekoSmoother; #endif #ifdef MUELU_TENTATIVEPFACTORY_SHORT -using TentativePFactory [[maybe_unused]] = MueLu::TentativePFactory; +using TentativePFactory [[maybe_unused]] = + MueLu::TentativePFactory; #endif #ifdef MUELU_TENTATIVEPFACTORY_KOKKOS_SHORT -using TentativePFactory_kokkos [[maybe_unused]] = MueLu::TentativePFactory_kokkos; +using TentativePFactory_kokkos [[maybe_unused]] = + MueLu::TentativePFactory_kokkos; #endif #ifdef MUELU_MATRIXFREETENTATIVEP_SHORT -using MatrixFreeTentativeP [[maybe_unused]] = MueLu::MatrixFreeTentativeP; +using MatrixFreeTentativeP [[maybe_unused]] = + MueLu::MatrixFreeTentativeP; #endif #ifdef MUELU_MATRIXFREETENTATIVEPFACTORY_SHORT -using MatrixFreeTentativePFactory [[maybe_unused]] = MueLu::MatrixFreeTentativePFactory; +using MatrixFreeTentativePFactory [[maybe_unused]] = + MueLu::MatrixFreeTentativePFactory; #endif #ifdef MUELU_THRESHOLDAFILTERFACTORY_SHORT -using ThresholdAFilterFactory [[maybe_unused]] = MueLu::ThresholdAFilterFactory; +using ThresholdAFilterFactory [[maybe_unused]] = + MueLu::ThresholdAFilterFactory; #endif #ifdef MUELU_TOGGLECOORDINATESTRANSFERFACTORY_SHORT -using ToggleCoordinatesTransferFactory [[maybe_unused]] = MueLu::ToggleCoordinatesTransferFactory; +using ToggleCoordinatesTransferFactory [[maybe_unused]] = + MueLu::ToggleCoordinatesTransferFactory; #endif #ifdef MUELU_TOGGLEPFACTORY_SHORT -using TogglePFactory [[maybe_unused]] = MueLu::TogglePFactory; +using TogglePFactory [[maybe_unused]] = + MueLu::TogglePFactory; #endif #ifdef MUELU_TOPRAPFACTORY_SHORT -using TopRAPFactory [[maybe_unused]] = MueLu::TopRAPFactory; +using TopRAPFactory [[maybe_unused]] = + MueLu::TopRAPFactory; #endif #ifdef MUELU_TOPSMOOTHERFACTORY_SHORT -using TopSmootherFactory [[maybe_unused]] = MueLu::TopSmootherFactory; +using TopSmootherFactory [[maybe_unused]] = + MueLu::TopSmootherFactory; #endif #ifdef MUELU_TPETRAOPERATOR_SHORT -using TpetraOperator [[maybe_unused]] = MueLu::TpetraOperator; +using TpetraOperator [[maybe_unused]] = + MueLu::TpetraOperator; #endif #ifdef MUELU_TRANSPFACTORY_SHORT -using TransPFactory [[maybe_unused]] = MueLu::TransPFactory; +using TransPFactory [[maybe_unused]] = + MueLu::TransPFactory; #endif #ifdef MUELU_RFROMP_OR_TRANSP_SHORT -using RfromP_Or_TransP [[maybe_unused]] = MueLu::RfromP_Or_TransP; +using RfromP_Or_TransP [[maybe_unused]] = + MueLu::RfromP_Or_TransP; #endif #ifdef MUELU_TRILINOSSMOOTHER_SHORT -using TrilinosSmoother [[maybe_unused]] = MueLu::TrilinosSmoother; +using TrilinosSmoother [[maybe_unused]] = + MueLu::TrilinosSmoother; #endif #ifdef MUELU_UNSMOOSHFACTORY_SHORT -using UnsmooshFactory [[maybe_unused]] = MueLu::UnsmooshFactory; +using UnsmooshFactory [[maybe_unused]] = + MueLu::UnsmooshFactory; #endif #ifdef MUELU_USERPFACTORY_SHORT -using UserPFactory [[maybe_unused]] = MueLu::UserPFactory; +using UserPFactory [[maybe_unused]] = + MueLu::UserPFactory; #endif #ifdef MUELU_UTILITIES_SHORT -using Utilities [[maybe_unused]] = MueLu::Utilities; +using Utilities [[maybe_unused]] = + MueLu::Utilities; #endif #ifdef MUELU_UTILITIESBASE_SHORT -using UtilitiesBase [[maybe_unused]] = MueLu::UtilitiesBase; +using UtilitiesBase [[maybe_unused]] = + MueLu::UtilitiesBase; #endif #ifdef MUELU_VARIABLEDOFLAPLACIANFACTORY_SHORT -using VariableDofLaplacianFactory [[maybe_unused]] = MueLu::VariableDofLaplacianFactory; +using VariableDofLaplacianFactory [[maybe_unused]] = + MueLu::VariableDofLaplacianFactory; #endif #ifdef MUELU_SEMICOARSENPFACTORY_SHORT -using SemiCoarsenPFactory [[maybe_unused]] = MueLu::SemiCoarsenPFactory; +using SemiCoarsenPFactory [[maybe_unused]] = + MueLu::SemiCoarsenPFactory; #endif #ifdef MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT -using SemiCoarsenPFactory_kokkos [[maybe_unused]] = MueLu::SemiCoarsenPFactory_kokkos; +using SemiCoarsenPFactory_kokkos [[maybe_unused]] = + MueLu::SemiCoarsenPFactory_kokkos; #endif #ifdef MUELU_UZAWASMOOTHER_SHORT -using UzawaSmoother [[maybe_unused]] = MueLu::UzawaSmoother; +using UzawaSmoother [[maybe_unused]] = + MueLu::UzawaSmoother; #endif #ifdef MUELU_VISUALIZATIONHELPERS_SHORT -using VisualizationHelpers [[maybe_unused]] = MueLu::VisualizationHelpers; +using VisualizationHelpers [[maybe_unused]] = + MueLu::VisualizationHelpers; #endif #ifdef MUELU_ZEROSUBBLOCKAFACTORY_SHORT -using ZeroSubBlockAFactory [[maybe_unused]] = MueLu::ZeroSubBlockAFactory; +using ZeroSubBlockAFactory [[maybe_unused]] = + MueLu::ZeroSubBlockAFactory; #endif #ifdef MUELU_ZOLTANINTERFACE_SHORT -using ZoltanInterface [[maybe_unused]] = MueLu::ZoltanInterface; +using ZoltanInterface [[maybe_unused]] = + MueLu::ZoltanInterface; #endif #ifdef MUELU_ZOLTAN2INTERFACE_SHORT -using Zoltan2Interface [[maybe_unused]] = MueLu::Zoltan2Interface; +using Zoltan2Interface [[maybe_unused]] = + MueLu::Zoltan2Interface; #endif #ifdef MUELU_NODEPARTITIONINTERFACE_SHORT -using NodePartitionInterface [[maybe_unused]] = MueLu::NodePartitionInterface; +using NodePartitionInterface [[maybe_unused]] = + MueLu::NodePartitionInterface; #endif #ifdef MUELU_XPETRAOPERATOR_SHORT -using XpetraOperator [[maybe_unused]] = MueLu::XpetraOperator; +using XpetraOperator [[maybe_unused]] = + MueLu::XpetraOperator; #endif #ifdef MUELU_REFMAXWELL_SHORT -using RefMaxwell [[maybe_unused]] = MueLu::RefMaxwell; +using RefMaxwell [[maybe_unused]] = + MueLu::RefMaxwell; #endif #ifdef MUELU_MAXWELL1_SHORT -using Maxwell1 [[maybe_unused]] = MueLu::Maxwell1; +using Maxwell1 [[maybe_unused]] = + MueLu::Maxwell1; #endif #ifdef MUELU_MAXWELL_UTILS_SHORT -using Maxwell_Utils [[maybe_unused]] = MueLu::Maxwell_Utils; +using Maxwell_Utils [[maybe_unused]] = + MueLu::Maxwell_Utils; #endif #ifdef MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_SHORT -typedef MueLu::AdaptiveSaMLParameterListInterpreter AdaptiveSaMLParameterListInterpreter; +typedef MueLu::AdaptiveSaMLParameterListInterpreter + AdaptiveSaMLParameterListInterpreter; #endif #ifdef MUELU_FACTORYFACTORY_SHORT -typedef MueLu::FactoryFactory FactoryFactory; +typedef MueLu::FactoryFactory + FactoryFactory; #endif #ifdef MUELU_MLPARAMETERLISTINTERPRETER_SHORT -typedef MueLu::MLParameterListInterpreter MLParameterListInterpreter; +typedef MueLu::MLParameterListInterpreter + MLParameterListInterpreter; #endif #ifdef MUELU_PARAMETERLISTINTERPRETER_SHORT -typedef MueLu::ParameterListInterpreter ParameterListInterpreter; +typedef MueLu::ParameterListInterpreter + ParameterListInterpreter; #endif #ifdef MUELU_TWOLEVELMATLABFACTORY_SHORT -typedef MueLu::TwoLevelMatlabFactory TwoLevelMatlabFactory; +typedef MueLu::TwoLevelMatlabFactory + TwoLevelMatlabFactory; #endif #ifdef MUELU_SINGLELEVELMATLABFACTORY_SHORT -typedef MueLu::SingleLevelMatlabFactory SingleLevelMatlabFactory; +typedef MueLu::SingleLevelMatlabFactory + SingleLevelMatlabFactory; #endif #ifdef MUELU_MATLABSMOOTHER_SHORT -typedef MueLu::MatlabSmoother MatlabSmoother; +typedef MueLu::MatlabSmoother + MatlabSmoother; #endif diff --git a/packages/muelu/src/Headers/MueLu_Version.hpp b/packages/muelu/src/Headers/MueLu_Version.hpp index 0fbda392c252..d65a24e7a264 100644 --- a/packages/muelu/src/Headers/MueLu_Version.hpp +++ b/packages/muelu/src/Headers/MueLu_Version.hpp @@ -55,10 +55,8 @@ namespace MueLu { - inline std::string const Version() { - return("MueLu development"); - } +inline std::string const Version() { return ("MueLu development"); } } // namespace MueLu -#endif //ifndef MUELU_VERSION_HPP +#endif // ifndef MUELU_VERSION_HPP diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp index f9d9d8b9ff5b..291e93a37c31 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_decl.hpp @@ -51,52 +51,50 @@ namespace MueLu { - template - class FacadeClassBase - : public virtual BaseClass{ +template +class FacadeClassBase : public virtual BaseClass { #undef MUELU_FACADECLASSBASE_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ +public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeClassBase(); + //! Constructor. + FacadeClassBase(); - //! Destructor. - virtual ~FacadeClassBase() { } + //! Destructor. + virtual ~FacadeClassBase() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClass (abstract member). + /*! @brief Set parameter list for FacadeClass (abstract member). - @param[in] paramList: ParameterList containing the MueLu parameters. - */ - virtual Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList) = 0; + @param[in] paramList: ParameterList containing the MueLu parameters. + */ + virtual Teuchos::RCP + SetParameterList(const Teuchos::ParameterList ¶mList) = 0; - protected: - - /*! @brief Replace all occurrences of search string "search" by the string in "replace" given the string "subject" - */ - std::string ReplaceString(std::string& subject, const std::string& search, const std::string& replace) { - size_t pos = 0; - while ((pos = subject.find(search, pos)) != std::string::npos) { - subject.replace(pos, search.length(), replace); - pos += replace.length(); - } - return subject; +protected: + /*! @brief Replace all occurrences of search string "search" by the string in + * "replace" given the string "subject" + */ + std::string ReplaceString(std::string &subject, const std::string &search, + const std::string &replace) { + size_t pos = 0; + while ((pos = subject.find(search, pos)) != std::string::npos) { + subject.replace(pos, search.length(), replace); + pos += replace.length(); } - - }; + return subject; + } +}; } // namespace MueLu #define MUELU_FACADECLASSBASE_SHORT - - -#endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSBASE_DECL_HPP_ */ +#endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSBASE_DECL_HPP_ \ + */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp index fd2375c1ccc2..398a7fecdf51 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassBase_def.hpp @@ -47,18 +47,17 @@ #ifndef PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSBASE_DEF_HPP_ #define PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSBASE_DEF_HPP_ -#include #include - +#include #include "MueLu_Exceptions.hpp" #include "MueLu_FacadeClassBase_decl.hpp" namespace MueLu { - template - FacadeClassBase::FacadeClassBase() { - } -} +template +FacadeClassBase::FacadeClassBase() {} +} // namespace MueLu -#endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSBASE_DEF_HPP_ */ +#endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSBASE_DEF_HPP_ \ + */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp index 96aa53ca3d7e..060e04e50da1 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_decl.hpp @@ -56,51 +56,54 @@ namespace MueLu { - template - class FacadeClassFactory - : public virtual BaseClass{ +template +class FacadeClassFactory : public virtual BaseClass { #undef MUELU_FACADECLASSFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ - - //! Constructor. - FacadeClassFactory(); - - //! Destructor. - virtual ~FacadeClassFactory() { } - - //@} - - /*! @brief Set parameter list for FacadeClassFactory interpreter. - - @param[in] paramList: ParameterList containing the MueLu parameters. - */ - Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); - - /*! @brief Register new facade class - * - * Register new externally provided facade class in FacadeClassFactory - * - * @param[in] name: name that is used to access Facade class - * @param[in] facadeclass: RCP pointer to facade class instance - */ - void RegisterFacadeClass(std::string name, Teuchos::RCP > facadeclass) { - facadeClasses_[name] = facadeclass; - } - - private: - - std::map > > facadeClasses_; - - }; +public: + //! @name Constructors/Destructors + //@{ + + //! Constructor. + FacadeClassFactory(); + + //! Destructor. + virtual ~FacadeClassFactory() {} + + //@} + + /*! @brief Set parameter list for FacadeClassFactory interpreter. + + @param[in] paramList: ParameterList containing the MueLu parameters. + */ + Teuchos::RCP + SetParameterList(const Teuchos::ParameterList ¶mList); + + /*! @brief Register new facade class + * + * Register new externally provided facade class in FacadeClassFactory + * + * @param[in] name: name that is used to access Facade class + * @param[in] facadeclass: RCP pointer to facade class instance + */ + void RegisterFacadeClass( + std::string name, + Teuchos::RCP> + facadeclass) { + facadeClasses_[name] = facadeclass; + } + +private: + std::map< + std::string, + Teuchos::RCP>> + facadeClasses_; +}; } // namespace MueLu #define MUELU_FACADECLASSFACTORY_SHORT - - -#endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSFACTORY_DECL_HPP_ */ +#endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSFACTORY_DECL_HPP_ \ + */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp index f093d7b7f11e..a4d33a3995ed 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_FacadeClassFactory_def.hpp @@ -47,48 +47,69 @@ #ifndef PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSFACTORY_DEF_HPP_ #define PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSFACTORY_DEF_HPP_ -#include #include - +#include #include "MueLu_Exceptions.hpp" #include "MueLu_FacadeClassBase.hpp" -#include "MueLu_Facade_Simple_def.hpp" #include "MueLu_Facade_BGS2x2_def.hpp" +#include "MueLu_Facade_Simple_def.hpp" #include "MueLu_FacadeClassFactory_decl.hpp" namespace MueLu { - template - FacadeClassFactory::FacadeClassFactory() { - facadeClasses_["Simple"] = Teuchos::rcp(new FacadeSimple()); - facadeClasses_["BGS2x2"] = Teuchos::rcp(new FacadeBGS2x2()); - } - +template +FacadeClassFactory::FacadeClassFactory() { + facadeClasses_["Simple"] = Teuchos::rcp( + new FacadeSimple()); + facadeClasses_["BGS2x2"] = Teuchos::rcp( + new FacadeBGS2x2()); +} - template - Teuchos::RCP FacadeClassFactory::SetParameterList(const ParameterList& paramList) { +template +Teuchos::RCP +FacadeClassFactory::SetParameterList( + const ParameterList ¶mList) { - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter("MueLu preconditioner") == false, MueLu::Exceptions::RuntimeError, "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); + TEUCHOS_TEST_FOR_EXCEPTION( + paramList.isParameter("MueLu preconditioner") == false, + MueLu::Exceptions::RuntimeError, + "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu " + "preconditioner\" parameter correctly in your input file."); + TEUCHOS_TEST_FOR_EXCEPTION( + paramList.get("MueLu preconditioner") == "undefined", + MueLu::Exceptions::RuntimeError, + "FacadeClassFactory: undefined MueLu preconditioner. Set the \"MueLu " + "preconditioner\" parameter correctly in your input file."); - std::string precMueLu = paramList.get("MueLu preconditioner"); + std::string precMueLu = paramList.get("MueLu preconditioner"); - // could not find requested facade class - if(facadeClasses_.find(precMueLu) == facadeClasses_.end()) { - GetOStream(Errors) << "FacadeClassFactory: Could not find facade class \"" << precMueLu << "\"!" << std::endl; - GetOStream(Errors) << "The available facade classes are:" << std::endl; - for(typename std::map > >::const_iterator it =facadeClasses_.begin(); it != facadeClasses_.end(); it++){ - GetOStream(Errors) << " " << it->first << std::endl; - } - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "FacadeClassFactory: Could not find facade class \"" << precMueLu << "\"."); + // could not find requested facade class + if (facadeClasses_.find(precMueLu) == facadeClasses_.end()) { + GetOStream(Errors) << "FacadeClassFactory: Could not find facade class \"" + << precMueLu << "\"!" << std::endl; + GetOStream(Errors) << "The available facade classes are:" << std::endl; + for (typename std::map< + std::string, + Teuchos::RCP>>::const_iterator it = + facadeClasses_.begin(); + it != facadeClasses_.end(); it++) { + GetOStream(Errors) << " " << it->first << std::endl; } - - return facadeClasses_[precMueLu]->SetParameterList(paramList); + TEUCHOS_TEST_FOR_EXCEPTION( + true, MueLu::Exceptions::RuntimeError, + "FacadeClassFactory: Could not find facade class \"" << precMueLu + << "\"."); } + return facadeClasses_[precMueLu]->SetParameterList(paramList); +} + } // end namespace MueLu -#endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSFACTORY_DEF_HPP_ */ +#endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_MUELU_FACADECLASSFACTORY_DEF_HPP_ \ + */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_decl.hpp index 8885d500ed3c..aaad7ddc4101 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_decl.hpp @@ -55,37 +55,36 @@ namespace MueLu { - template - class FacadeBGS2x2 : public FacadeClassBase { +template +class FacadeBGS2x2 + : public FacadeClassBase { #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ +public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeBGS2x2(); + //! Constructor. + FacadeBGS2x2(); - //! Destructor. - virtual ~FacadeBGS2x2() { } + //! Destructor. + virtual ~FacadeBGS2x2() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClass interpreter. + /*! @brief Set parameter list for FacadeClass interpreter. - @param[in] paramList: ParameterList containing the MueLu parameters for chosen facade class. - */ - Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); + @param[in] paramList: ParameterList containing the MueLu parameters for + chosen facade class. + */ + Teuchos::RCP + SetParameterList(const Teuchos::ParameterList ¶mList); - private: - - }; +private: +}; } // namespace MueLu - - #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_BGS2x2_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_def.hpp index 3d7beb7d2909..f78b8be85c90 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_BGS2x2_def.hpp @@ -47,9 +47,8 @@ #ifndef PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_BGS2x2_DEF_HPP_ #define PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_BGS2x2_DEF_HPP_ -#include #include - +#include #include "MueLu_Exceptions.hpp" @@ -57,342 +56,458 @@ namespace MueLu { - template - FacadeBGS2x2::FacadeBGS2x2() { - } - - - template - Teuchos::RCP FacadeBGS2x2::SetParameterList(const ParameterList& paramList) { +template +FacadeBGS2x2::FacadeBGS2x2() {} - // obtain ParameterList with default input parameters for this facade class - // Note all parameters are of type string (we use it for string replacement) - std::string defaultString = -"" -"" -"" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" -; - Teuchos::RCP defaultList = Teuchos::getParametersFromXmlString(defaultString); - // validate user input parameters (and set defaults if necessary) - Teuchos::ParameterList inputParameters = paramList; - inputParameters.validateParametersAndSetDefaults(*defaultList); - TEUCHOS_TEST_FOR_EXCEPTION(inputParameters.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeBGS2x2: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); +template +Teuchos::RCP +FacadeBGS2x2::SetParameterList( + const ParameterList ¶mList) { - // create copy of template string which is updated with in-place string replacements - // template string for preconditioner layout (factory based parameters) - std::string finalString = + // obtain ParameterList with default input parameters for this facade class + // Note all parameters are of type string (we use it for string replacement) + std::string defaultString = + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""; + Teuchos::RCP defaultList = + Teuchos::getParametersFromXmlString(defaultString); + // validate user input parameters (and set defaults if necessary) + Teuchos::ParameterList inputParameters = paramList; + inputParameters.validateParametersAndSetDefaults(*defaultList); + TEUCHOS_TEST_FOR_EXCEPTION( + inputParameters.get("MueLu preconditioner") == "undefined", + MueLu::Exceptions::RuntimeError, + "FacadeBGS2x2: undefined MueLu preconditioner. Set the \"MueLu " + "preconditioner\" parameter correctly in your input file."); -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -"" -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -"" - ; + // create copy of template string which is updated with in-place string + // replacements template string for preconditioner layout (factory based + // parameters) + std::string finalString = - // logical code for more complicated distinctions + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + "" + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + ""; + // logical code for more complicated distinctions - std::string smoother1 = inputParameters.get("Block 1: smoother"); - if(smoother1 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Jacobi"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 1: " << smoother1 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } - - std::string smoother2 = inputParameters.get("Block 2: smoother"); - if(smoother2 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 2: " << smoother2 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } + std::string smoother1 = inputParameters.get("Block 1: smoother"); + if (smoother1 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", + "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", + "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", + "Jacobi"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); + } else { + this->GetOStream(Errors) + << "Invalid smoother type for block 1: " << smoother1 + << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or " + "\"Direct\"." + << std::endl; + } - if(inputParameters.get("Block 1: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); - } else { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myTentativePFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myTransPFact1"); - } - if(inputParameters.get("Block 2: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); - } else { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myTentativePFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myTransPFact2"); - } + std::string smoother2 = inputParameters.get("Block 2: smoother"); + if (smoother2 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", + "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", + "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", + "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); + } else { + this->GetOStream(Errors) + << "Invalid smoother type for block 2: " << smoother2 + << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or " + "\"Direct\"." + << std::endl; + } - // end logical code + if (inputParameters.get("Block 1: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); + } else { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", + "myTentativePFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", + "myTransPFact1"); + } + if (inputParameters.get("Block 2: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); + } else { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", + "myTentativePFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", + "myTransPFact2"); + } - // loop over all input parameters - for(Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); it != inputParameters.end(); it++) { - // form replacement string - std::string par_name = inputParameters.name(it); - std::stringstream ss; - ss << "XXX" << par_name << "YYY"; + // end logical code - // update final string with parameters - Teuchos::ParameterEntry par_entry = inputParameters.entry(it); - this->ReplaceString(finalString, - ss.str(), Teuchos::toString(par_entry.getAny())); - } + // loop over all input parameters + for (Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); + it != inputParameters.end(); it++) { + // form replacement string + std::string par_name = inputParameters.name(it); + std::stringstream ss; + ss << "XXX" << par_name << "YYY"; - Teuchos::RCP ret = Teuchos::getParametersFromXmlString(finalString); - return ret; + // update final string with parameters + Teuchos::ParameterEntry par_entry = inputParameters.entry(it); + this->ReplaceString(finalString, ss.str(), + Teuchos::toString(par_entry.getAny())); } + Teuchos::RCP ret = + Teuchos::getParametersFromXmlString(finalString); + return ret; +} + } // end namespace MueLu #endif // PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_BGS2x2_DEF_HPP_ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_decl.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_decl.hpp index 7aa865679bf6..872c43d56800 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_decl.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_decl.hpp @@ -55,37 +55,36 @@ namespace MueLu { - template - class FacadeSimple : public FacadeClassBase { +template +class FacadeSimple + : public FacadeClassBase { #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors - //@{ +public: + //! @name Constructors/Destructors + //@{ - //! Constructor. - FacadeSimple(); + //! Constructor. + FacadeSimple(); - //! Destructor. - virtual ~FacadeSimple() { } + //! Destructor. + virtual ~FacadeSimple() {} - //@} + //@} - /*! @brief Set parameter list for FacadeClass interpreter. + /*! @brief Set parameter list for FacadeClass interpreter. - @param[in] paramList: ParameterList containing the MueLu parameters for chosen facade class. - */ - Teuchos::RCP SetParameterList(const Teuchos::ParameterList& paramList); + @param[in] paramList: ParameterList containing the MueLu parameters for + chosen facade class. + */ + Teuchos::RCP + SetParameterList(const Teuchos::ParameterList ¶mList); - private: - - }; +private: +}; } // namespace MueLu - - #endif /* PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_Simple_DECL_HPP_ */ diff --git a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_def.hpp b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_def.hpp index 7a69633e3341..abfaf79c09fc 100644 --- a/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_def.hpp +++ b/packages/muelu/src/Interface/FacadeClasses/MueLu_Facade_Simple_def.hpp @@ -47,9 +47,8 @@ #ifndef PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_Simple_DEF_HPP_ #define PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_Simple_DEF_HPP_ -#include #include - +#include #include "MueLu_Exceptions.hpp" @@ -57,348 +56,466 @@ namespace MueLu { - template - FacadeSimple::FacadeSimple() { - } - - - template - Teuchos::RCP FacadeSimple::SetParameterList(const ParameterList& paramList) { +template +FacadeSimple::FacadeSimple() {} - // obtain ParameterList with default input parameters for this facade class - // Note all parameters are of type string (we use it for string replacement) - std::string defaultString = -"" -"" -"" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" -; - Teuchos::RCP defaultList = Teuchos::getParametersFromXmlString(defaultString); - // validate user input parameters (and set defaults if necessary) - Teuchos::ParameterList inputParameters = paramList; - inputParameters.validateParametersAndSetDefaults(*defaultList); - TEUCHOS_TEST_FOR_EXCEPTION(inputParameters.get("MueLu preconditioner") == "undefined", MueLu::Exceptions::RuntimeError, "FacadeSimple: undefined MueLu preconditioner. Set the \"MueLu preconditioner\" parameter correctly in your input file."); +template +Teuchos::RCP +FacadeSimple::SetParameterList( + const ParameterList ¶mList) { - // create copy of template string which is updated with in-place string replacements - // template string for preconditioner layout (factory based parameters) - std::string finalString = + // obtain ParameterList with default input parameters for this facade class + // Note all parameters are of type string (we use it for string replacement) + std::string defaultString = + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""; + Teuchos::RCP defaultList = + Teuchos::getParametersFromXmlString(defaultString); + // validate user input parameters (and set defaults if necessary) + Teuchos::ParameterList inputParameters = paramList; + inputParameters.validateParametersAndSetDefaults(*defaultList); + TEUCHOS_TEST_FOR_EXCEPTION( + inputParameters.get("MueLu preconditioner") == "undefined", + MueLu::Exceptions::RuntimeError, + "FacadeSimple: undefined MueLu preconditioner. Set the \"MueLu " + "preconditioner\" parameter correctly in your input file."); -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -" " -"" -" " -" " -"" -" " -" " -" " -"" -" " -" " -" " -" " -" " -" " -" " -" " -"" -" " -"" - ; + // create copy of template string which is updated with in-place string + // replacements template string for preconditioner layout (factory based + // parameters) + std::string finalString = - // logical code for more complicated distinctions + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + " " + "" + " " + " " + "" + " " + " " + " " + "" + " " + " " + " " + " " + " " + " " + " " + " " + "" + " " + ""; + // logical code for more complicated distinctions - std::string smoother1 = inputParameters.get("Block 1: smoother"); - if(smoother1 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", "Jacobi"); - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); - } else if (smoother1 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 1: " << smoother1 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } - - std::string smoother2 = inputParameters.get("Block 2: smoother"); - if(smoother2 == "ILU") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Symmetric Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Gauss-Seidel"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Jacobi") { - this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", "Jacobi"); - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); - } else if (smoother2 == "Direct") { - this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); - } else { - this->GetOStream(Errors) << "Invalid smoother type for block 2: " << smoother2 << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or \"Direct\"." << std::endl; - } + std::string smoother1 = inputParameters.get("Block 1: smoother"); + if (smoother1 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooILUFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", + "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Symmetric Gauss-Seidel" || smoother1 == "GS") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", + "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 1: relaxation: typeYYY", + "Jacobi"); + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooFact1"); + } else if (smoother1 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother1XYZ", "mySmooDirectFact1"); + } else { + this->GetOStream(Errors) + << "Invalid smoother type for block 1: " << smoother1 + << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or " + "\"Direct\"." + << std::endl; + } - if(inputParameters.get("Block 1: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); - } else { - this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myTentativePFact1"); - this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myTransPFact1"); - } - if(inputParameters.get("Block 2: transfer smoothing") == true) { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); - } else { - this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myTentativePFact2"); - this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myTransPFact2"); - } + std::string smoother2 = inputParameters.get("Block 2: smoother"); + if (smoother2 == "ILU") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooILUFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "SGS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", + "Symmetric Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Symmetric Gauss-Seidel" || smoother2 == "GS") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", + "Gauss-Seidel"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Jacobi") { + this->ReplaceString(finalString, "XXXBlock 2: relaxation: typeYYY", + "Jacobi"); + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooFact2"); + } else if (smoother2 == "Direct") { + this->ReplaceString(finalString, "XYZSmoother2XYZ", "mySmooDirectFact2"); + } else { + this->GetOStream(Errors) + << "Invalid smoother type for block 2: " << smoother2 + << ". Valid options are: \"SGS\", \"GS\", \"Jacobi\", \"ILU\" or " + "\"Direct\"." + << std::endl; + } - // end logical code + if (inputParameters.get("Block 1: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", "myPFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", "myRFact1"); + } else { + this->ReplaceString(finalString, "XXXBlock 1: prolongatorYYY", + "myTentativePFact1"); + this->ReplaceString(finalString, "XXXBlock 1: restrictor YYY", + "myTransPFact1"); + } + if (inputParameters.get("Block 2: transfer smoothing") == true) { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", "myPFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", "myRFact2"); + } else { + this->ReplaceString(finalString, "XXXBlock 2: prolongatorYYY", + "myTentativePFact2"); + this->ReplaceString(finalString, "XXXBlock 2: restrictor YYY", + "myTransPFact2"); + } - // loop over all input parameters - for(Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); it != inputParameters.end(); it++) { - // form replacement string - std::string par_name = inputParameters.name(it); - std::stringstream ss; - ss << "XXX" << par_name << "YYY"; + // end logical code - // update final string with parameters - Teuchos::ParameterEntry par_entry = inputParameters.entry(it); - this->ReplaceString(finalString, - ss.str(), Teuchos::toString(par_entry.getAny())); - } + // loop over all input parameters + for (Teuchos::ParameterList::ConstIterator it = inputParameters.begin(); + it != inputParameters.end(); it++) { + // form replacement string + std::string par_name = inputParameters.name(it); + std::stringstream ss; + ss << "XXX" << par_name << "YYY"; - Teuchos::RCP ret = Teuchos::getParametersFromXmlString(finalString); - return ret; + // update final string with parameters + Teuchos::ParameterEntry par_entry = inputParameters.entry(it); + this->ReplaceString(finalString, ss.str(), + Teuchos::toString(par_entry.getAny())); } + Teuchos::RCP ret = + Teuchos::getParametersFromXmlString(finalString); + return ret; +} + } // end namespace MueLu #endif // PACKAGES_MUELU_SRC_INTERFACE_FACADECLASSES_Simple_DEF_HPP_ diff --git a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp index e3dfc36150a6..4f617b6fe2f8 100644 --- a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp @@ -14,192 +14,220 @@ #include #include +#include "MueLu_AdaptiveSaMLParameterListInterpreter_fwd.hpp" #include "MueLu_ConfigDefs.hpp" #include "MueLu_HierarchyManager.hpp" -#include "MueLu_AdaptiveSaMLParameterListInterpreter_fwd.hpp" #include "MueLu_Hierarchy_fwd.hpp" #include "MueLu_SmootherFactory_fwd.hpp" -#include "MueLu_TentativePFactory_fwd.hpp" -#include "MueLu_SaPFactory_fwd.hpp" -#include "MueLu_PgPFactory_fwd.hpp" -#include "MueLu_TransPFactory_fwd.hpp" +#include "MueLu_CoalesceDropFactory_fwd.hpp" +#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_GenericRFactory_fwd.hpp" +#include "MueLu_MLParameterListInterpreter_fwd.hpp" +#include "MueLu_NullspaceFactory_fwd.hpp" +#include "MueLu_PgPFactory_fwd.hpp" +#include "MueLu_RAPFactory_fwd.hpp" +#include "MueLu_SaPFactory_fwd.hpp" #include "MueLu_SmootherPrototype_fwd.hpp" +#include "MueLu_TentativePFactory_fwd.hpp" +#include "MueLu_TransPFactory_fwd.hpp" #include "MueLu_TrilinosSmoother_fwd.hpp" -#include "MueLu_RAPFactory_fwd.hpp" -#include "MueLu_CoalesceDropFactory_fwd.hpp" #include "MueLu_UncoupledAggregationFactory_fwd.hpp" -#include "MueLu_NullspaceFactory_fwd.hpp" -#include "MueLu_FactoryBase_fwd.hpp" -#include "MueLu_MLParameterListInterpreter_fwd.hpp" namespace MueLu { - /* - Utility that from an existing Teuchos::ParameterList creates a new list, in - which level-specific parameters are replaced with sublists. - - Currently, level-specific parameters that begin with "smoother:" - or "aggregation:" are placed in sublists. Coarse options are also placed - in a coarse list. - - Example: - Input: - smoother: type (level 0) = symmetric Gauss-Seidel - smoother: sweeps (level 0) = 1 - Output: - smoother: list (level 0) -> - smoother: type = symmetric Gauss-Seidel - smoother: sweeps = 1 - */ - // This function is a copy of ML_CreateSublists to avoid dependency on ML - // Throw exception on error instead of exit() - //void CreateSublists(const ParameterList &List, ParameterList &newList); - - - /*! - @class AdaptiveSAMLParameterListInterpreter class. - @brief Class that accepts ML-style parameters and builds a MueLu preconditioner. - This interpreter uses the same default values as ML. This allows to compare ML/MueLu results - */ - - template - class AdaptiveSaMLParameterListInterpreter : - public HierarchyManager { +/* + Utility that from an existing Teuchos::ParameterList creates a new list, in + which level-specific parameters are replaced with sublists. + + Currently, level-specific parameters that begin with "smoother:" + or "aggregation:" are placed in sublists. Coarse options are also placed + in a coarse list. + + Example: + Input: + smoother: type (level 0) = symmetric Gauss-Seidel + smoother: sweeps (level 0) = 1 + Output: + smoother: list (level 0) -> + smoother: type = symmetric Gauss-Seidel + smoother: sweeps = 1 +*/ +// This function is a copy of ML_CreateSublists to avoid dependency on ML +// Throw exception on error instead of exit() +// void CreateSublists(const ParameterList &List, ParameterList &newList); + +/*! + @class AdaptiveSAMLParameterListInterpreter class. + @brief Class that accepts ML-style parameters and builds a MueLu + preconditioner. This interpreter uses the same default values as ML. This + allows to compare ML/MueLu results +*/ + +template +class AdaptiveSaMLParameterListInterpreter + : public HierarchyManager { #undef MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - AdaptiveSaMLParameterListInterpreter() : nullspace_(NULL), blksize_(1) { } - - //! Constructor. - //! @param paramList: parameter list with ML parameters - //! @param nspVector: MultiVector with fine-level nullspace approximation - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - AdaptiveSaMLParameterListInterpreter(Teuchos::ParameterList & paramList,std::vector > factoryList = std::vector >(0)); - - //! Constructor. - //! @param xmlFileName: file name for XML file with ML parameters - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - AdaptiveSaMLParameterListInterpreter(const std::string & xmlFileName,std::vector > factoryList = std::vector >(0)); - - //! Destructor. - virtual ~AdaptiveSaMLParameterListInterpreter() { } - - //@} - - //@{ - - void SetParameterList(const Teuchos::ParameterList & paramList); - - //@} - - //@{ - - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy & H) const; - - //@} - - //@{ - - //! @name Handling of additional user-specific transfer factories - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. - - This allows the user to add user-specific factories to the MueLu Hierarchy. The idea is to be able - to add some factories that write out some debug information etc. which are not handled by the ML - Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - */ - void AddTransferFactory(const RCP & factory); - - //! Returns number of transfer factories. - size_t NumTransferFactories() const; - //@} - - private: - - //! build multigrid hierarchy for improving nullspace - //! use ML settings that are also used for the final full multigrid - //! hierarchy. In contrary to the final multigrid hierarchy use - //! only nonsmoothed transfer operators (safe time of prolongator smoothing) - //! and cheap level smoothers (no direct solver on coarsest level). - void SetupInitHierarchy(Hierarchy & H) const; - - //! internal routine to add a new factory manager used for the initialization phase - void AddInitFactoryManager(int startLevel, int numDesiredLevel, RCP manager) { - const int lastLevel = startLevel + numDesiredLevel - 1; - if (init_levelManagers_.size() < lastLevel + 1) init_levelManagers_.resize(lastLevel + 1); - - for(int iLevel = startLevel; iLevel <= lastLevel; iLevel++) { - init_levelManagers_[iLevel] = manager; - } +public: + //! @name Constructors/Destructors. + //@{ + + //! Constructor. + AdaptiveSaMLParameterListInterpreter() : nullspace_(NULL), blksize_(1) {} + + //! Constructor. + //! @param paramList: parameter list with ML parameters + //! @param nspVector: MultiVector with fine-level nullspace approximation + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories + //! to the MueLu Hierarchy. The idea is to be able to add some factories that + //! write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the + //! RAPFactory::AddTransferFactory method, too! + AdaptiveSaMLParameterListInterpreter( + Teuchos::ParameterList ¶mList, + std::vector> factoryList = + std::vector>(0)); + + //! Constructor. + //! @param xmlFileName: file name for XML file with ML parameters + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories + //! to the MueLu Hierarchy. The idea is to be able to add some factories that + //! write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the + //! RAPFactory::AddTransferFactory method, too! + AdaptiveSaMLParameterListInterpreter( + const std::string &xmlFileName, + std::vector> factoryList = + std::vector>(0)); + + //! Destructor. + virtual ~AdaptiveSaMLParameterListInterpreter() {} + + //@} + + //@{ + + void SetParameterList(const Teuchos::ParameterList ¶mList); + + //@} + + //@{ + + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy &H) const; + + //@} + + //@{ + + //! @name Handling of additional user-specific transfer factories + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories for + RAPFactory. + + This allows the user to add user-specific factories to the MueLu Hierarchy. + The idea is to be able to add some factories that write out some debug + information etc. which are not handled by the ML Parameter List itself. See + information about the RAPFactory::AddTransferFactory method, too! + */ + void AddTransferFactory(const RCP &factory); + + //! Returns number of transfer factories. + size_t NumTransferFactories() const; + //@} + +private: + //! build multigrid hierarchy for improving nullspace + //! use ML settings that are also used for the final full multigrid + //! hierarchy. In contrary to the final multigrid hierarchy use + //! only nonsmoothed transfer operators (safe time of prolongator smoothing) + //! and cheap level smoothers (no direct solver on coarsest level). + void SetupInitHierarchy(Hierarchy &H) const; + + //! internal routine to add a new factory manager used for the initialization + //! phase + void AddInitFactoryManager(int startLevel, int numDesiredLevel, + RCP manager) { + const int lastLevel = startLevel + numDesiredLevel - 1; + if (init_levelManagers_.size() < lastLevel + 1) + init_levelManagers_.resize(lastLevel + 1); + + for (int iLevel = startLevel; iLevel <= lastLevel; iLevel++) { + init_levelManagers_[iLevel] = manager; } - - //! Used in SetupInitHierarchy() to access levelManagers_ - //! Inputs i=-1 and i=size() are allowed to simplify calls to hierarchy->Setup() - Teuchos::RCP InitLvlMngr(int levelID, int lastLevelID) const { - - // Please not that the order of the 'if' statements is important. - - if (levelID == -1) return Teuchos::null; // when this routine is called with levelID == '-1', it means that we are processing the finest Level (there is no finer level) - if (levelID == lastLevelID+1) return Teuchos::null; // when this routine is called with levelID == 'lastLevelID+1', it means that we are processing the last level (ie: there is no nextLevel...) - - if (0 == init_levelManagers_.size()) { // default factory manager. - // the default manager is shared across levels, initialized only if needed and deleted with the HierarchyManager. - static RCP defaultMngr = rcp(new FactoryManager()); - return defaultMngr; - } - if (levelID >= init_levelManagers_.size()) return init_levelManagers_[init_levelManagers_.size()-1]; // last levelManager is used for all the remaining levels. - - return init_levelManagers_[levelID]; // throw exception if out of bound. + } + + //! Used in SetupInitHierarchy() to access levelManagers_ + //! Inputs i=-1 and i=size() are allowed to simplify calls to + //! hierarchy->Setup() + Teuchos::RCP InitLvlMngr(int levelID, + int lastLevelID) const { + + // Please not that the order of the 'if' statements is important. + + if (levelID == -1) + return Teuchos::null; // when this routine is called with levelID == '-1', + // it means that we are processing the finest Level + // (there is no finer level) + if (levelID == lastLevelID + 1) + return Teuchos::null; // when this routine is called with levelID == + // 'lastLevelID+1', it means that we are processing + // the last level (ie: there is no nextLevel...) + + if (0 == init_levelManagers_.size()) { // default factory manager. + // the default manager is shared across levels, initialized only if needed + // and deleted with the HierarchyManager. + static RCP defaultMngr = rcp(new FactoryManager()); + return defaultMngr; } + if (levelID >= init_levelManagers_.size()) + return init_levelManagers_[init_levelManagers_.size() - + 1]; // last levelManager is used for all the + // remaining levels. + + return init_levelManagers_[levelID]; // throw exception if out of bound. + } - //! nullspace can be embedded in the ML parameter list - int nullspaceDim_; - double* nullspace_; + //! nullspace can be embedded in the ML parameter list + int nullspaceDim_; + double *nullspace_; - //! export aggregates - bool bExportAggregates_; //!< if set to true an AggregationExportFactory is used to export aggregation information (default = false) + //! export aggregates + bool bExportAggregates_; //!< if set to true an AggregationExportFactory is + //!< used to export aggregation information (default + //!< = false) - //! list of user-defined transfer Factories - //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) - //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the - //! capabibilities of ML. - std::vector > TransferFacts_; + //! list of user-defined transfer Factories + //! We use this vector to add some special user-given factories to the + //! Hierarchy (RAPFactory) This way the user can extend the standard + //! functionality of the MLParameterListInterpreter beyond the capabibilities + //! of ML. + std::vector> TransferFacts_; - //! list of levelManagers for adaptive smoothed aggregation - //! initialization phase - Array > init_levelManagers_; + //! list of levelManagers for adaptive smoothed aggregation + //! initialization phase + Array> init_levelManagers_; - //@{ Operator configuration + //@{ Operator configuration - //! Setup Operator object - //! overloaded from HierarchyManager to set nDofsPerNode - virtual void SetupOperator(Operator & Op) const; + //! Setup Operator object + //! overloaded from HierarchyManager to set nDofsPerNode + virtual void SetupOperator(Operator &Op) const; - //! Matrix configuration storage - int blksize_; - //@} + //! Matrix configuration storage + int blksize_; + //@} - }; // class AdaptiveSaMLParameterListInterpreter +}; // class AdaptiveSaMLParameterListInterpreter } // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp index c9444689155b..e66ab92baae4 100644 --- a/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp +++ b/packages/muelu/src/Interface/MueLu_AdaptiveSaMLParameterListInterpreter_def.hpp @@ -15,33 +15,33 @@ #include #endif +#include #include #include #include #include -#include #include "MueLu_AdaptiveSaMLParameterListInterpreter_decl.hpp" -#include "MueLu_Level.hpp" -#include "MueLu_Hierarchy.hpp" #include "MueLu_FactoryManager.hpp" +#include "MueLu_Hierarchy.hpp" +#include "MueLu_Level.hpp" -#include "MueLu_TentativePFactory.hpp" -#include "MueLu_SaPFactory.hpp" -#include "MueLu_PgPFactory.hpp" -#include "MueLu_TransPFactory.hpp" +#include "MueLu_CoalesceDropFactory.hpp" #include "MueLu_GenericRFactory.hpp" -#include "MueLu_SmootherPrototype.hpp" -#include "MueLu_SmootherFactory.hpp" -#include "MueLu_TrilinosSmoother.hpp" #include "MueLu_HierarchyUtils.hpp" -#include "MueLu_RAPFactory.hpp" -#include "MueLu_CoalesceDropFactory.hpp" -#include "MueLu_UncoupledAggregationFactory.hpp" +#include "MueLu_MLParameterListInterpreter.hpp" #include "MueLu_NullspaceFactory.hpp" #include "MueLu_ParameterListUtils.hpp" -#include "MueLu_MLParameterListInterpreter.hpp" +#include "MueLu_PgPFactory.hpp" +#include "MueLu_RAPFactory.hpp" +#include "MueLu_SaPFactory.hpp" +#include "MueLu_SmootherFactory.hpp" +#include "MueLu_SmootherPrototype.hpp" +#include "MueLu_TentativePFactory.hpp" +#include "MueLu_TransPFactory.hpp" +#include "MueLu_TrilinosSmoother.hpp" +#include "MueLu_UncoupledAggregationFactory.hpp" //#include "MueLu_Utilities.hpp" @@ -49,401 +49,515 @@ // Note: do not add options that are only recognized by MueLu. -// TODO: this parameter list interpreter should force MueLu to use default ML parameters +// TODO: this parameter list interpreter should force MueLu to use default ML +// parameters // - Ex: smoother sweep=2 by default for ML -// Read a parameter value from a parameter list and store it into a variable named 'varName' -#define MUELU_READ_PARAM(paramList, paramStr, varType, defaultValue, varName) \ - varType varName = defaultValue; if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); - -// Read a parameter value from a paraeter list and copy it into a new parameter list (with another parameter name) -#define MUELU_COPY_PARAM(paramList, paramStr, varType, defaultValue, outParamList, outParamStr) \ - if (paramList.isParameter(paramStr)) \ - outParamList.set(outParamStr, paramList.get(paramStr)); \ - else outParamList.set(outParamStr, defaultValue); \ +// Read a parameter value from a parameter list and store it into a variable +// named 'varName' +#define MUELU_READ_PARAM(paramList, paramStr, varType, defaultValue, varName) \ + varType varName = defaultValue; \ + if (paramList.isParameter(paramStr)) \ + varName = paramList.get(paramStr); + +// Read a parameter value from a paraeter list and copy it into a new parameter +// list (with another parameter name) +#define MUELU_COPY_PARAM(paramList, paramStr, varType, defaultValue, \ + outParamList, outParamStr) \ + if (paramList.isParameter(paramStr)) \ + outParamList.set(outParamStr, paramList.get(paramStr)); \ + else \ + outParamList.set(outParamStr, defaultValue); namespace MueLu { - template - AdaptiveSaMLParameterListInterpreter::AdaptiveSaMLParameterListInterpreter(Teuchos::ParameterList & paramList, std::vector > factoryList) : TransferFacts_(factoryList), blksize_(1) { - SetParameterList(paramList); +template +AdaptiveSaMLParameterListInterpreter:: + AdaptiveSaMLParameterListInterpreter( + Teuchos::ParameterList ¶mList, + std::vector> factoryList) + : TransferFacts_(factoryList), blksize_(1) { + SetParameterList(paramList); +} + +template +AdaptiveSaMLParameterListInterpreter< + Scalar, LocalOrdinal, GlobalOrdinal, + Node>::AdaptiveSaMLParameterListInterpreter(const std::string &xmlFileName, + std::vector> + factoryList) + : nullspace_(NULL), TransferFacts_(factoryList), blksize_(1) { + Teuchos::RCP paramList = + Teuchos::getParametersFromXmlFile(xmlFileName); + SetParameterList(*paramList); +} + +template +void AdaptiveSaMLParameterListInterpreter< + Scalar, LocalOrdinal, GlobalOrdinal, + Node>::SetParameterList(const Teuchos::ParameterList ¶mList_in) { + Teuchos::ParameterList paramList = paramList_in; + + RCP out = Teuchos::fancyOStream( + Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) + + // + // Read top-level of the parameter list + // + + // hard-coded default values == ML defaults according to the manual + MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); + MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); + MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); + + MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); + + MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", + agg_type); + // MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, + // agg_threshold); + MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, + (double)4 / (double)3, agg_damping); + // MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, + // agg_smoothingsweeps); + MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, + minPerAgg); + + MUELU_READ_PARAM(paramList, "null space: type", std::string, + "default vectors", nullspaceType); + MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, + nullspaceDim); // TODO: ML default not in documentation + MUELU_READ_PARAM(paramList, "null space: vectors", double *, NULL, + nullspaceVec); // TODO: ML default not in documentation + + MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, + bEnergyMinimization); + + // + // Move smoothers/aggregation/coarse parameters to sublists + // + + // ML allows to have level-specific smoothers/aggregation/coarse parameters at + // the top level of the list or/and defined in sublists: See also: ML Guide + // section 6.4.1, MueLu::CreateSublists, ML_CreateSublists + ParameterList paramListWithSubList; + MueLu::CreateSublists(paramList, paramListWithSubList); + paramList = paramListWithSubList; // swap + + // std::cout << std::endl << "Parameter list after CreateSublists" << + // std::endl; std::cout << paramListWithSubList << std::endl; + + int maxNbrAlreadySelected = 0; + + // Matrix option + this->blksize_ = nDofsPerNode; + + // Translate verbosity parameter + Teuchos::EVerbosityLevel eVerbLevel = Teuchos::VERB_NONE; + if (verbosityLevel == 0) + eVerbLevel = Teuchos::VERB_NONE; + if (verbosityLevel > 0) + eVerbLevel = Teuchos::VERB_LOW; + if (verbosityLevel > 4) + eVerbLevel = Teuchos::VERB_MEDIUM; + if (verbosityLevel > 7) + eVerbLevel = Teuchos::VERB_HIGH; + if (verbosityLevel > 9) + eVerbLevel = Teuchos::VERB_EXTREME; + + TEUCHOS_TEST_FOR_EXCEPTION( + agg_type != "Uncoupled", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::Setup(): parameter \"aggregation: " + "type\": only 'Uncoupled' aggregation is supported."); + + // Create MueLu factories + // RCP nspFact = rcp(new NullspaceFactory()); + RCP dropFact = rcp(new CoalesceDropFactory()); + // dropFact->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); + + // Uncoupled aggregation + RCP AggFact = + rcp(new UncoupledAggregationFactory()); + AggFact->SetMinNodesPerAggregate( + minPerAgg); // TODO should increase if run anything other than 1D + AggFact->SetMaxNeighAlreadySelected(maxNbrAlreadySelected); + AggFact->SetOrdering("natural"); + + if (verbosityLevel > 3) { // TODO fix me: Setup is a static function: we + // cannot use GetOStream without an object... + *out << "========================= Aggregate option summary " + "=========================" + << std::endl; + *out << "min Nodes per aggregate : " << minPerAgg + << std::endl; + *out << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected + << std::endl; + *out << "aggregate ordering : natural" << std::endl; + *out << "==================================================================" + "===========" + << std::endl; } - template - AdaptiveSaMLParameterListInterpreter::AdaptiveSaMLParameterListInterpreter(const std::string & xmlFileName, std::vector > factoryList) : nullspace_(NULL), TransferFacts_(factoryList), blksize_(1) { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - SetParameterList(*paramList); + RCP PFact; + RCP RFact; + RCP PtentFact = rcp(new TentativePFactory()); + if (agg_damping == 0.0 && bEnergyMinimization == false) { + // tentative prolongation operator (PA-AMG) + PFact = PtentFact; + RFact = rcp(new TransPFactory()); + } else if (agg_damping != 0.0 && bEnergyMinimization == false) { + // smoothed aggregation (SA-AMG) + RCP SaPFact = rcp(new SaPFactory()); + SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); + PFact = SaPFact; + RFact = rcp(new TransPFactory()); + } else if (bEnergyMinimization == true) { + // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) + PFact = rcp(new PgPFactory()); + RFact = rcp(new GenericRFactory()); } - template - void AdaptiveSaMLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList & paramList_in) { - Teuchos::ParameterList paramList = paramList_in; - - RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) - - // - // Read top-level of the parameter list - // - - // hard-coded default values == ML defaults according to the manual - MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); - MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); - MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); - - MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); + RCP AcFact = rcp(new RAPFactory()); + for (size_t i = 0; i < TransferFacts_.size(); i++) { + AcFact->AddTransferFactory( + TransferFacts_[i]); // THIS WILL BE REPLACED with a call to the + // MLParamterListInterpreter + } - MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); - //MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); - MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4/(double)3, agg_damping); - //MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); - MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); + // + // Nullspace factory + // + + // Set fine level nullspace + // extract pre-computed nullspace from ML parameter list + // store it in nullspace_ and nullspaceDim_ + if (nullspaceType != "default vectors") { + TEUCHOS_TEST_FOR_EXCEPTION( + nullspaceType != "pre-computed", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: no valid nullspace (no " + "pre-computed null space). error."); + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: no valid " + "nullspace (nullspace dim == -1). error."); + TEUCHOS_TEST_FOR_EXCEPTION( + nullspaceVec == NULL, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == " + "NULL). You have to provide a valid fine-level nullspace in \'null " + "space: vectors\'"); + + nullspaceDim_ = nullspaceDim; + nullspace_ = nullspaceVec; + } - MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); - MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation - MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation + Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory()); + nspFact->SetFactory("Nullspace", PtentFact); + + // + // Hierarchy + FactoryManager + // + + // Hierarchy options + this->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); + this->numDesiredLevel_ = maxLevels; + this->maxCoarseSize_ = maxCoarseSize; + + // init smoother + RCP initSmootherFact = Teuchos::null; + if (paramList.isSublist("init smoother")) { + ParameterList &initList = + paramList.sublist("init smoother"); // TODO move this before for loop + initSmootherFact = MLParameterListInterpreter::GetSmootherFactory( + initList); // TODO: missing AFact input arg. + } else { + std::string ifpackType = "RELAXATION"; + Teuchos::ParameterList smootherParamList; + smootherParamList.set("relaxation: type", "symmetric Gauss-Seidel"); + smootherParamList.set("smoother: sweeps", 1); + smootherParamList.set("smoother: damping factor", 1.0); + RCP smooProto = + rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + + initSmootherFact = rcp(new SmootherFactory()); + initSmootherFact->SetSmootherPrototypes(smooProto, smooProto); + } - MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); + // + // Coarse Smoother + // + ParameterList &coarseList = paramList.sublist("coarse: list"); + // coarseList.get("smoother: type", "Amesos-KLU"); // set default + // RCP coarseFact = this->GetSmootherFactory(coarseList); + RCP coarseFact = + MLParameterListInterpreter::GetSmootherFactory(coarseList); + // Smoothers Top Level Parameters - // - // Move smoothers/aggregation/coarse parameters to sublists - // + RCP topLevelSmootherParam = + ExtractSetOfParameters(paramList, "smoother"); + // std::cout << std::endl << "Top level smoother parameters:" << std::endl; + // std::cout << *topLevelSmootherParam << std::endl; - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); - paramList = paramListWithSubList; // swap - - // std::cout << std::endl << "Parameter list after CreateSublists" << std::endl; - // std::cout << paramListWithSubList << std::endl; - - int maxNbrAlreadySelected = 0; - - // Matrix option - this->blksize_ = nDofsPerNode; - - // Translate verbosity parameter - Teuchos::EVerbosityLevel eVerbLevel = Teuchos::VERB_NONE; - if (verbosityLevel == 0) eVerbLevel = Teuchos::VERB_NONE; - if (verbosityLevel > 0) eVerbLevel = Teuchos::VERB_LOW; - if (verbosityLevel > 4) eVerbLevel = Teuchos::VERB_MEDIUM; - if (verbosityLevel > 7) eVerbLevel = Teuchos::VERB_HIGH; - if (verbosityLevel > 9) eVerbLevel = Teuchos::VERB_EXTREME; - - TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); - - // Create MueLu factories - // RCP nspFact = rcp(new NullspaceFactory()); - RCP dropFact = rcp(new CoalesceDropFactory()); - //dropFact->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); - - // Uncoupled aggregation - RCP AggFact = rcp(new UncoupledAggregationFactory()); - AggFact->SetMinNodesPerAggregate(minPerAgg); //TODO should increase if run anything other than 1D - AggFact->SetMaxNeighAlreadySelected(maxNbrAlreadySelected); - AggFact->SetOrdering("natural"); - - if (verbosityLevel > 3) { // TODO fix me: Setup is a static function: we cannot use GetOStream without an object... - *out << "========================= Aggregate option summary =========================" << std::endl; - *out << "min Nodes per aggregate : " << minPerAgg << std::endl; - *out << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; - *out << "aggregate ordering : natural" << std::endl; - *out << "=============================================================================" << std::endl; - } + // - RCP PFact; - RCP RFact; - RCP PtentFact = rcp( new TentativePFactory() ); - if (agg_damping == 0.0 && bEnergyMinimization == false) { - // tentative prolongation operator (PA-AMG) - PFact = PtentFact; - RFact = rcp( new TransPFactory() ); - } else if (agg_damping != 0.0 && bEnergyMinimization == false) { - // smoothed aggregation (SA-AMG) - RCP SaPFact = rcp( new SaPFactory() ); - SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); - PFact = SaPFact; - RFact = rcp( new TransPFactory() ); - } else if (bEnergyMinimization == true) { - // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) - PFact = rcp( new PgPFactory() ); - RFact = rcp( new GenericRFactory() ); - } + // Prepare factory managers + // TODO: smootherFact can be reuse accross level if same parameters/no + // specific parameterList - RCP AcFact = rcp( new RAPFactory() ); - for (size_t i = 0; iAddTransferFactory(TransferFacts_[i]); // THIS WILL BE REPLACED with a call to the MLParamterListInterpreter - } + for (int levelID = 0; levelID < maxLevels; levelID++) { // - // Nullspace factory + // Level FactoryManager // - // Set fine level nullspace - // extract pre-computed nullspace from ML parameter list - // store it in nullspace_ and nullspaceDim_ - if (nullspaceType != "default vectors") { - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); - - nullspaceDim_ = nullspaceDim; - nullspace_ = nullspaceVec; - } - - Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory()); - nspFact->SetFactory("Nullspace", PtentFact); + RCP manager = rcp(new FactoryManager()); + RCP initmanager = rcp(new FactoryManager()); // - // Hierarchy + FactoryManager + // Smoothers // - // Hierarchy options - this->SetVerbLevel(toMueLuVerbLevel(eVerbLevel)); - this->numDesiredLevel_ = maxLevels; - this->maxCoarseSize_ = maxCoarseSize; - - // init smoother - RCP initSmootherFact = Teuchos::null; - if(paramList.isSublist("init smoother")) { - ParameterList& initList = paramList.sublist("init smoother"); // TODO move this before for loop - initSmootherFact = MLParameterListInterpreter::GetSmootherFactory(initList); // TODO: missing AFact input arg. - } else { - std::string ifpackType = "RELAXATION"; - Teuchos::ParameterList smootherParamList; - smootherParamList.set("relaxation: type", "symmetric Gauss-Seidel"); - smootherParamList.set("smoother: sweeps", 1); - smootherParamList.set("smoother: damping factor", 1.0); - RCP smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - - initSmootherFact = rcp( new SmootherFactory() ); - initSmootherFact->SetSmootherPrototypes(smooProto, smooProto); + { + // Merge level-specific parameters with global parameters. level-specific + // parameters takes precedence. + // TODO: unit-test this part alone + + ParameterList levelSmootherParam = + GetMLSubList(paramList, "smoother", levelID); // copy + MergeParameterList( + *topLevelSmootherParam, levelSmootherParam, + false); /* false = do no overwrite levelSmootherParam parameters by + topLevelSmootherParam parameters */ + // std::cout << std::endl << "Merged List for level " << levelID << + // std::endl; std::cout << levelSmootherParam << std::endl; + + // RCP smootherFact = + // this->GetSmootherFactory(levelSmootherParam); // TODO: missing AFact + // input arg. + RCP smootherFact = + MLParameterListInterpreter::GetSmootherFactory( + levelSmootherParam); // TODO: missing AFact input arg. + manager->SetFactory("Smoother", smootherFact); + smootherFact->DisableMultipleCallCheck(); + + initmanager->SetFactory("Smoother", initSmootherFact); + initmanager->SetFactory("CoarseSolver", initSmootherFact); + initSmootherFact->DisableMultipleCallCheck(); } // - // Coarse Smoother - // - ParameterList& coarseList = paramList.sublist("coarse: list"); - // coarseList.get("smoother: type", "Amesos-KLU"); // set default - //RCP coarseFact = this->GetSmootherFactory(coarseList); - RCP coarseFact = MLParameterListInterpreter::GetSmootherFactory(coarseList); - - // Smoothers Top Level Parameters - - RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); - // std::cout << std::endl << "Top level smoother parameters:" << std::endl; - // std::cout << *topLevelSmootherParam << std::endl; - + // Misc // - // Prepare factory managers - // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList - - for (int levelID=0; levelID < maxLevels; levelID++) { - - // - // Level FactoryManager - // - - RCP manager = rcp(new FactoryManager()); - RCP initmanager = rcp(new FactoryManager()); - - // - // Smoothers - // - - { - // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. - // TODO: unit-test this part alone - - ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy - MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ - // std::cout << std::endl << "Merged List for level " << levelID << std::endl; - // std::cout << levelSmootherParam << std::endl; - - //RCP smootherFact = this->GetSmootherFactory(levelSmootherParam); // TODO: missing AFact input arg. - RCP smootherFact = MLParameterListInterpreter::GetSmootherFactory(levelSmootherParam); // TODO: missing AFact input arg. - manager->SetFactory("Smoother", smootherFact); - smootherFact->DisableMultipleCallCheck(); - - initmanager->SetFactory("Smoother", initSmootherFact); - initmanager->SetFactory("CoarseSolver", initSmootherFact); - initSmootherFact->DisableMultipleCallCheck(); - - } - - // - // Misc - // - - Teuchos::rcp_dynamic_cast(PFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(PtentFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(RFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(coarseFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(dropFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(AggFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(AcFact)->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(nspFact)->DisableMultipleCallCheck(); - - manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop - manager->SetFactory("Graph", dropFact); - manager->SetFactory("Aggregates", AggFact); - manager->SetFactory("DofsPerNode", dropFact); - manager->SetFactory("A", AcFact); - manager->SetFactory("P", PFact); - manager->SetFactory("Ptent", PtentFact); - manager->SetFactory("R", RFact); - manager->SetFactory("Nullspace", nspFact); - - //initmanager->SetFactory("CoarseSolver", coarseFact); - initmanager->SetFactory("Graph", dropFact); - initmanager->SetFactory("Aggregates", AggFact); - initmanager->SetFactory("DofsPerNode", dropFact); - initmanager->SetFactory("A", AcFact); - initmanager->SetFactory("P", PtentFact); // use nonsmoothed transfers - initmanager->SetFactory("Ptent", PtentFact); - initmanager->SetFactory("R", RFact); - initmanager->SetFactory("Nullspace", nspFact); - - this->AddFactoryManager(levelID, 1, manager); - this->AddInitFactoryManager(levelID, 1, initmanager); - } // for (level loop) + Teuchos::rcp_dynamic_cast(PFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(PtentFact)->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(RFact) + ->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(coarseFact) + ->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(dropFact) + ->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(AggFact) + ->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(AcFact) + ->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast(nspFact) + ->DisableMultipleCallCheck(); + + manager->SetFactory("CoarseSolver", + coarseFact); // TODO: should not be done in the loop + manager->SetFactory("Graph", dropFact); + manager->SetFactory("Aggregates", AggFact); + manager->SetFactory("DofsPerNode", dropFact); + manager->SetFactory("A", AcFact); + manager->SetFactory("P", PFact); + manager->SetFactory("Ptent", PtentFact); + manager->SetFactory("R", RFact); + manager->SetFactory("Nullspace", nspFact); + + // initmanager->SetFactory("CoarseSolver", coarseFact); + initmanager->SetFactory("Graph", dropFact); + initmanager->SetFactory("Aggregates", AggFact); + initmanager->SetFactory("DofsPerNode", dropFact); + initmanager->SetFactory("A", AcFact); + initmanager->SetFactory("P", PtentFact); // use nonsmoothed transfers + initmanager->SetFactory("Ptent", PtentFact); + initmanager->SetFactory("R", RFact); + initmanager->SetFactory("Nullspace", nspFact); + + this->AddFactoryManager(levelID, 1, manager); + this->AddInitFactoryManager(levelID, 1, initmanager); + } // for (level loop) +} + +template +void AdaptiveSaMLParameterListInterpreter< + Scalar, LocalOrdinal, GlobalOrdinal, Node>::SetupInitHierarchy(Hierarchy &H) + const { + TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), + Exceptions::RuntimeError, + "No fine level operator"); + + RCP l = H.GetLevel(0); + RCP Op = l->Get>("A"); + SetupOperator(*Op); // use overloaded SetupMatrix routine + this->SetupExtra(H); + + // Setup Hierarchy + H.SetMaxCoarseSize(this->maxCoarseSize_); // TODO + + int levelID = 0; + int lastLevelID = this->numDesiredLevel_ - 1; + bool isLastLevel = false; + + while (!isLastLevel) { + bool r = H.Setup(levelID, InitLvlMngr(levelID - 1, lastLevelID), + InitLvlMngr(levelID, lastLevelID), + InitLvlMngr(levelID + 1, lastLevelID)); + + isLastLevel = r || (levelID == lastLevelID); + levelID++; } - - template - void AdaptiveSaMLParameterListInterpreter::SetupInitHierarchy(Hierarchy & H) const { - TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), Exceptions::RuntimeError, "No fine level operator"); - - RCP l = H.GetLevel(0); - RCP Op = l->Get >("A"); - SetupOperator(*Op); // use overloaded SetupMatrix routine - this->SetupExtra(H); - - // Setup Hierarchy - H.SetMaxCoarseSize(this->maxCoarseSize_); // TODO - - int levelID = 0; - int lastLevelID = this->numDesiredLevel_ - 1; - bool isLastLevel = false; - - while(!isLastLevel) { - bool r = H.Setup(levelID, - InitLvlMngr(levelID-1, lastLevelID), - InitLvlMngr(levelID, lastLevelID), - InitLvlMngr(levelID+1, lastLevelID)); - - isLastLevel = r || (levelID == lastLevelID); - levelID++; - } - } - - template - void AdaptiveSaMLParameterListInterpreter::SetupHierarchy(Hierarchy & H) const { - - // set fine level null space - // usually this null space is provided from outside (by the user) using - // the ML parameter lists. - if (this->nullspace_ != NULL) { - RCP fineLevel = H.GetLevel(0); - const RCP rowMap = fineLevel->Get< RCP >("A")->getRowMap(); - RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); - - for ( size_t i=0; i < Teuchos::as(nullspaceDim_); i++) { - Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); - const size_t myLength = nullspace->getLocalLength(); - - for (size_t j = 0; j < myLength; j++) { - nullspacei[j] = nullspace_[i*myLength + j]; - } +} + +template +void AdaptiveSaMLParameterListInterpreter::SetupHierarchy(Hierarchy &H) + const { + + // set fine level null space + // usually this null space is provided from outside (by the user) using + // the ML parameter lists. + if (this->nullspace_ != NULL) { + RCP fineLevel = H.GetLevel(0); + const RCP rowMap = fineLevel->Get>("A")->getRowMap(); + RCP nullspace = + MultiVectorFactory::Build(rowMap, nullspaceDim_, true); + + for (size_t i = 0; i < Teuchos::as(nullspaceDim_); i++) { + Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); + const size_t myLength = nullspace->getLocalLength(); + + for (size_t j = 0; j < myLength; j++) { + nullspacei[j] = nullspace_[i * myLength + j]; } - - fineLevel->Set("Nullspace", nullspace); } - // keep aggregates - H.Keep("Aggregates", HierarchyManager::GetFactoryManager(0)->GetFactory("Aggregates").get()); - - /////////////////////////////// - - // build hierarchy for initialization - SetupInitHierarchy(H); - - { - // do some iterations with the built hierarchy to improve the null space - Teuchos::RCP Finest = H.GetLevel(0); // get finest level,MueLu::NoFactory::get() - Teuchos::RCP nspVector2 = Finest->Get >("Nullspace"); - - Xpetra::IO::Write("orig_nsp.vec", *nspVector2); + fineLevel->Set("Nullspace", nullspace); + } - RCP Op = Finest->Get >("A"); - Xpetra::IO::Write("A.mat", *Op); + // keep aggregates + H.Keep( + "Aggregates", + HierarchyManager::GetFactoryManager(0)->GetFactory("Aggregates").get()); + /////////////////////////////// - Teuchos::RCP homogRhsVec = MultiVectorFactory::Build(nspVector2->getMap(),nspVector2->getNumVectors(),true); - homogRhsVec->putScalar(0.0); + // build hierarchy for initialization + SetupInitHierarchy(H); - // do 1 multigrid cycle for improving the null space by "solving" - // A B_f = 0 - // where A is the system matrix and B_f the fine level null space vectors - H.Iterate(*homogRhsVec, *nspVector2, 1, false); + { + // do some iterations with the built hierarchy to improve the null space + Teuchos::RCP Finest = + H.GetLevel(0); // get finest level,MueLu::NoFactory::get() + Teuchos::RCP nspVector2 = + Finest->Get>("Nullspace"); - // store improved fine level null space - Finest->Set("Nullspace",nspVector2); + Xpetra::IO::Write("orig_nsp.vec", + *nspVector2); - Xpetra::IO::Write("new_nsp.vec", *nspVector2); + RCP Op = Finest->Get>("A"); + Xpetra::IO::Write("A.mat", *Op); - //H.Delete("CoarseSolver", init_levelManagers_[0]->GetFactory("CoarseSolver").get()); - } + Teuchos::RCP homogRhsVec = MultiVectorFactory::Build( + nspVector2->getMap(), nspVector2->getNumVectors(), true); + homogRhsVec->putScalar(0.0); - { - // do some clean up. - // remove all old default factories. Build new ones for the second build. - // this is a little bit tricky to understand - for(size_t k=0; k < HierarchyManager::getNumFactoryManagers(); k++) { - HierarchyManager::GetFactoryManager(k)->Clean(); - //Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(k)->GetFactory("Smoother"))->DisableMultipleCallCheck(); // after changing to MLParamterListInterpreter functions - } - // not sure about this. i only need it if Smoother is defined explicitely (not using default smoother) - // need this: otherwise RAPFactory::Build is complaining on level 0 - // and TentativePFactory::Build is complaining on level 1 - Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(0)->GetFactory("A"))->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(1)->GetFactory("P"))->DisableMultipleCallCheck(); - Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(1)->GetFactory("Ptent"))->DisableMultipleCallCheck(); - - HierarchyManager::SetupHierarchy(H); - } + // do 1 multigrid cycle for improving the null space by "solving" + // A B_f = 0 + // where A is the system matrix and B_f the fine level null space vectors + H.Iterate(*homogRhsVec, *nspVector2, 1, false); - } + // store improved fine level null space + Finest->Set("Nullspace", nspVector2); - template - void AdaptiveSaMLParameterListInterpreter::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); - TransferFacts_.push_back(factory); - } + Xpetra::IO::Write("new_nsp.vec", + *nspVector2); - template - size_t AdaptiveSaMLParameterListInterpreter::NumTransferFactories() const { - return TransferFacts_.size(); + // H.Delete("CoarseSolver", + // init_levelManagers_[0]->GetFactory("CoarseSolver").get()); } - template - void AdaptiveSaMLParameterListInterpreter::SetupOperator(Operator & Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; - - A.SetFixedBlockSize(blksize_); - - } catch (std::bad_cast& e) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; + { + // do some clean up. + // remove all old default factories. Build new ones for the second build. + // this is a little bit tricky to understand + for (size_t k = 0; k < HierarchyManager::getNumFactoryManagers(); k++) { + HierarchyManager::GetFactoryManager(k)->Clean(); + // Teuchos::rcp_dynamic_cast(HierarchyManager::GetFactoryManager(k)->GetFactory("Smoother"))->DisableMultipleCallCheck(); + // // after changing to MLParamterListInterpreter functions } + // not sure about this. i only need it if Smoother is defined explicitely + // (not using default smoother) need this: otherwise RAPFactory::Build is + // complaining on level 0 + // and TentativePFactory::Build is complaining on level 1 + Teuchos::rcp_dynamic_cast( + HierarchyManager::GetFactoryManager(0)->GetFactory("A")) + ->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast( + HierarchyManager::GetFactoryManager(1)->GetFactory("P")) + ->DisableMultipleCallCheck(); + Teuchos::rcp_dynamic_cast( + HierarchyManager::GetFactoryManager(1)->GetFactory("Ptent")) + ->DisableMultipleCallCheck(); + + HierarchyManager::SetupHierarchy(H); } +} + +template +void AdaptiveSaMLParameterListInterpreter< + Scalar, LocalOrdinal, GlobalOrdinal, + Node>::AddTransferFactory(const RCP &factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION( + Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, + Exceptions::BadCast, + "Transfer factory is not derived from TwoLevelFactoryBase. Since " + "transfer factories will be handled by the RAPFactory they have to be " + "derived from TwoLevelFactoryBase!"); + TransferFacts_.push_back(factory); +} + +template +size_t +AdaptiveSaMLParameterListInterpreter::NumTransferFactories() const { + return TransferFacts_.size(); +} + +template +void AdaptiveSaMLParameterListInterpreter::SetupOperator(Operator &Op) + const { + try { + Matrix &A = dynamic_cast(Op); + if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) + this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ + << " (value of the parameter in the list) " + << "instead of " << A.GetFixedBlockSize() + << " (provided matrix)." << std::endl; + + A.SetFixedBlockSize(blksize_); + + } catch (std::bad_cast &e) { + this->GetOStream(Warnings0) + << "Skipping setting block size as the operator is not a matrix" + << std::endl; + } +} } // namespace MueLu - #endif /* MUELU_ADAPTIVESAMLPARAMETERLISTINTERPRETER_DEF_HPP_ */ diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp index 1832f0704030..69a3e249817f 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp @@ -49,8 +49,8 @@ #include #include -#include #include +#include #include "MueLu_ConfigDefs.hpp" #include "MueLu_FactoryFactory_fwd.hpp" @@ -58,14 +58,14 @@ #include "MueLu_HierarchyFactory.hpp" #include "MueLu_FactoryBase.hpp" +#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_FactoryManager.hpp" #include "MueLu_FactoryManagerBase_fwd.hpp" #include "MueLu_FactoryManager_fwd.hpp" -#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_Hierarchy_fwd.hpp" -#include "MueLu_Monitor.hpp" #include "MueLu_Exceptions.hpp" +#include "MueLu_Monitor.hpp" #include "MueLu_AggregateQualityEstimateFactory.hpp" #include "MueLu_AggregationExportFactory.hpp" @@ -84,9 +84,9 @@ #include "MueLu_ClassicalPFactory.hpp" #include "MueLu_CloneRepartitionInterface.hpp" #include "MueLu_CoalesceDropFactory.hpp" -#include "MueLu_SmooVecCoalesceDropFactory.hpp" #include "MueLu_CoarseMapFactory.hpp" #include "MueLu_CoarseningVisualizationFactory.hpp" +#include "MueLu_CombinePFactory.hpp" #include "MueLu_ConstraintFactory.hpp" #include "MueLu_CoordinatesTransferFactory.hpp" #include "MueLu_DirectSolver.hpp" @@ -95,20 +95,16 @@ #include "MueLu_FilteredAFactory.hpp" #include "MueLu_FineLevelInputDataFactory.hpp" #include "MueLu_GeneralGeometricPFactory.hpp" -#include "MueLu_ReplicatePFactory.hpp" -#include "MueLu_CombinePFactory.hpp" #include "MueLu_GenericRFactory.hpp" #include "MueLu_GeometricInterpolationPFactory.hpp" +#include "MueLu_IndefBlockedDiagonalSmoother.hpp" +#include "MueLu_InitialBlockNumberFactory.hpp" #include "MueLu_InterfaceAggregationFactory.hpp" #include "MueLu_InterfaceMappingTransferFactory.hpp" -#include "MueLu_InitialBlockNumberFactory.hpp" -#include "MueLu_IndefBlockedDiagonalSmoother.hpp" #include "MueLu_InverseApproximationFactory.hpp" #include "MueLu_IsorropiaInterface.hpp" #include "MueLu_LineDetectionFactory.hpp" #include "MueLu_LocalOrdinalTransferFactory.hpp" -#include "MueLu_RepartitionInterface.hpp" -#include "MueLu_RepartitionBlockDiagonalFactory.hpp" #include "MueLu_MapTransferFactory.hpp" #include "MueLu_MatrixAnalysisFactory.hpp" #include "MueLu_MultiVectorTransferFactory.hpp" @@ -117,23 +113,27 @@ #include "MueLu_NullspacePresmoothFactory.hpp" #include "MueLu_PatternFactory.hpp" #include "MueLu_PgPFactory.hpp" +#include "MueLu_RAPFactory.hpp" +#include "MueLu_RAPShiftFactory.hpp" +#include "MueLu_RebalanceAcFactory.hpp" +#include "MueLu_RebalanceBlockAcFactory.hpp" #include "MueLu_RebalanceBlockInterpolationFactory.hpp" #include "MueLu_RebalanceBlockRestrictionFactory.hpp" -#include "MueLu_RebalanceBlockAcFactory.hpp" #include "MueLu_RebalanceTransferFactory.hpp" #include "MueLu_RegionRFactory.hpp" +#include "MueLu_ReorderBlockAFactory.hpp" +#include "MueLu_RepartitionBlockDiagonalFactory.hpp" #include "MueLu_RepartitionFactory.hpp" #include "MueLu_RepartitionHeuristicFactory.hpp" -#include "MueLu_RAPFactory.hpp" -#include "MueLu_RAPShiftFactory.hpp" -#include "MueLu_RebalanceAcFactory.hpp" -#include "MueLu_ReorderBlockAFactory.hpp" +#include "MueLu_RepartitionInterface.hpp" +#include "MueLu_ReplicatePFactory.hpp" #include "MueLu_SaPFactory.hpp" #include "MueLu_ScaledNullspaceFactory.hpp" +#include "MueLu_SchurComplementFactory.hpp" #include "MueLu_SegregatedAFactory.hpp" #include "MueLu_SemiCoarsenPFactory.hpp" -#include "MueLu_SchurComplementFactory.hpp" #include "MueLu_SimpleSmoother.hpp" +#include "MueLu_SmooVecCoalesceDropFactory.hpp" #include "MueLu_SmootherFactory.hpp" #include "MueLu_StructuredAggregationFactory.hpp" #include "MueLu_StructuredLineDetectionFactory.hpp" @@ -141,44 +141,44 @@ #ifdef HAVE_MUELU_TEKO #include "MueLu_TekoSmoother.hpp" #endif +#include "MueLu_HybridAggregationFactory.hpp" +#include "MueLu_NodePartitionInterface.hpp" +#include "MueLu_RfromP_Or_TransP.hpp" #include "MueLu_TentativePFactory.hpp" #include "MueLu_ToggleCoordinatesTransferFactory.hpp" #include "MueLu_TogglePFactory.hpp" -#include "MueLu_TrilinosSmoother.hpp" #include "MueLu_TransPFactory.hpp" -#include "MueLu_RfromP_Or_TransP.hpp" +#include "MueLu_TrilinosSmoother.hpp" #include "MueLu_UncoupledAggregationFactory.hpp" -#include "MueLu_HybridAggregationFactory.hpp" #include "MueLu_UnsmooshFactory.hpp" #include "MueLu_UserAggregationFactory.hpp" #include "MueLu_UserPFactory.hpp" #include "MueLu_UzawaSmoother.hpp" #include "MueLu_VariableDofLaplacianFactory.hpp" #include "MueLu_ZeroSubBlockAFactory.hpp" -#include "MueLu_ZoltanInterface.hpp" #include "MueLu_Zoltan2Interface.hpp" -#include "MueLu_NodePartitionInterface.hpp" - +#include "MueLu_ZoltanInterface.hpp" #include "MueLu_CoalesceDropFactory_kokkos.hpp" #include "MueLu_GeometricInterpolationPFactory_kokkos.hpp" +#include "MueLu_MatrixFreeTentativePFactory.hpp" #include "MueLu_NullspaceFactory_kokkos.hpp" +#include "MueLu_RegionRFactory_kokkos.hpp" #include "MueLu_SaPFactory_kokkos.hpp" #include "MueLu_SemiCoarsenPFactory_kokkos.hpp" #include "MueLu_StructuredAggregationFactory_kokkos.hpp" #include "MueLu_TentativePFactory_kokkos.hpp" -#include "MueLu_MatrixFreeTentativePFactory.hpp" #include "MueLu_UncoupledAggregationFactory_kokkos.hpp" -#include "MueLu_RegionRFactory_kokkos.hpp" #ifdef HAVE_MUELU_MATLAB -// This is distasteful, but (sadly) neccesary due to peculiarities in MueLu's build system. +// This is distasteful, but (sadly) neccesary due to peculiarities in MueLu's +// build system. +#include "../matlab/src/MueLu_MatlabSmoother_decl.hpp" +#include "../matlab/src/MueLu_MatlabSmoother_def.hpp" #include "../matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp" #include "../matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp" #include "../matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp" #include "../matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp" -#include "../matlab/src/MueLu_MatlabSmoother_decl.hpp" -#include "../matlab/src/MueLu_MatlabSmoother_def.hpp" #endif #ifdef HAVE_MUELU_INTREPID2 @@ -187,777 +187,1231 @@ namespace MueLu { - /*! class FactoryFactory +/*! class FactoryFactory - @brief Factory that can generate other factories from +@brief Factory that can generate other factories from - */ - template - class FactoryFactory : public BaseClass { +*/ +template +class FactoryFactory : public BaseClass { #undef MUELU_FACTORYFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - typedef std::map > FactoryMap; // TODO: remove - typedef std::map > FactoryManagerMap; - - public: - - /// \brief: Interpret Factory parameter list and build new factory - /// - /// \param param [in]: ParameterEntry being either the parameter list containing the "factory" parameter declaring the factory type (e.g., "TrilinosSmoother") or being a plain Parameter containing the factory type as value - /// \param factoryMapIn [in]: FactoryMap containing a map between factory name (e.g., "smootherFact1") and corresponding factory of all previously defined factories - /// \param factoryManagersIn [in]: FactoryManagerMap containing a map between group names and Factory manager objects. Needed for factories with sub-factory managers. - /// - /// Parameter List Parsing: - /// --------- - /// - /// - /// or: - /// - /// - /// - /// ... - /// - /// - virtual RCP BuildFactory(const Teuchos::ParameterEntry& param, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // Find factory - std::string factoryName; - Teuchos::ParameterList paramList; - if (!param.isList()) { - factoryName = Teuchos::getValue(param); - } else { - paramList = Teuchos::getValue(param); - factoryName = paramList.get("factory"); - } + typedef std::map> + FactoryMap; // TODO: remove + typedef std::map> FactoryManagerMap; + +public: + /// \brief: Interpret Factory parameter list and build new factory + /// + /// \param param [in]: ParameterEntry being either the parameter list + /// containing the "factory" parameter declaring the factory type (e.g., + /// "TrilinosSmoother") or being a plain Parameter containing the factory type + /// as value \param factoryMapIn [in]: FactoryMap containing a map between + /// factory name (e.g., "smootherFact1") and corresponding factory of all + /// previously defined factories \param factoryManagersIn [in]: + /// FactoryManagerMap containing a map between group names and Factory manager + /// objects. Needed for factories with sub-factory managers. + /// + /// Parameter List Parsing: + /// --------- + /// + /// + /// or: + /// + /// + /// + /// ... + /// + /// + virtual RCP + BuildFactory(const Teuchos::ParameterEntry ¶m, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + // Find factory + std::string factoryName; + Teuchos::ParameterList paramList; + if (!param.isList()) { + factoryName = Teuchos::getValue(param); + } else { + paramList = Teuchos::getValue(param); + factoryName = paramList.get("factory"); + } - // TODO: see how Teko handles this (=> register factories). - if (factoryName == "AggregateQualityEstimateFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "AggregationExportFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "AmalgamationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedCoarseMapFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedRAPFactory") return BuildRAPFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BrickAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ClassicalMapFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ClassicalPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CloneRepartitionInterface") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoarseMapFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoarseningVisualizationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoalesceDropFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SmooVecCoalesceDropFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ConstraintFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoordinatesTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "DirectSolver") return BuildDirectSolver (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "DropNegativeEntriesFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "EminPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "FilteredAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "FineLevelInputDataFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GeneralGeometricPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ReplicatePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CombinePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GenericRFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GeometricInterpolationPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "HybridAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InterfaceAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InterfaceMappingTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InverseApproximationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "InitialBlockNumberFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "LineDetectionFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - // LocalOrdinalTransferFactory is a utility factory that can be used for multiple things, so there is no default - // if (factoryName == "LocalOrdinalTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MapTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MatrixAnalysisFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MultiVectorTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NoFactory") return MueLu::NoFactory::getRCP(); - if (factoryName == "NoSmoother") return rcp(new SmootherFactory(Teuchos::null)); - if (factoryName == "NotayAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NullspaceFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NullspacePresmoothFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "PatternFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "PgPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SaPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RAPFactory") return BuildRAPFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RAPShiftFactory") return BuildRAPFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceAcFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceTransferFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RegionRFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RegionRFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ReorderBlockAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RepartitionInterface") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ScaledNullspaceFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SegregatedAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SemiCoarsenPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "StructuredAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "StructuredLineDetectionFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SubBlockAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TentativePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ToggleCoordinatesTransferFactory") return BuildToggleCoordinatesTransferFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TogglePFactory") return BuildTogglePFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TransPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RfromP_Or_TransP") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TrilinosSmoother") return BuildTrilinosSmoother (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UncoupledAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UnsmooshFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UserAggregationFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UserPFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "VariableDofLaplacianFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "ZeroSubBlockAFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoalesceDropFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "GeometricInterpolationPFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "NullspaceFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SaPFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SemiCoarsenPFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "StructuredAggregationFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "TentativePFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MatrixFreeTentativePFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "UncoupledAggregationFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - - // Handle removed Kokkos factories - if (factoryName == "CoarseMapFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "CoordinatesTransferFactory_kokkos") return Build2 (paramList, factoryMapIn, factoryManagersIn); - - if (factoryName == "ZoltanInterface") { + // TODO: see how Teko handles this (=> register factories). + if (factoryName == "AggregateQualityEstimateFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "AggregationExportFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "AmalgamationFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "BlockedCoarseMapFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "BlockedRAPFactory") + return BuildRAPFactory(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "BrickAggregationFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "ClassicalMapFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "ClassicalPFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "CloneRepartitionInterface") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "CoarseMapFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "CoarseningVisualizationFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "CoalesceDropFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "SmooVecCoalesceDropFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "ConstraintFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "CoordinatesTransferFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "DirectSolver") + return BuildDirectSolver(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "DropNegativeEntriesFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "EminPFactory") + return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "FilteredAFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "FineLevelInputDataFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "GeneralGeometricPFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "ReplicatePFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "CombinePFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "GenericRFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "GeometricInterpolationPFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "HybridAggregationFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "InterfaceAggregationFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "InterfaceMappingTransferFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "InverseApproximationFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "InitialBlockNumberFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "LineDetectionFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + // LocalOrdinalTransferFactory is a utility factory that can be used for + // multiple things, so there is no default + // if (factoryName == "LocalOrdinalTransferFactory") return + // Build2 (paramList, + // factoryMapIn, factoryManagersIn); + if (factoryName == "MapTransferFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "MatrixAnalysisFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "MultiVectorTransferFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "NoFactory") + return MueLu::NoFactory::getRCP(); + if (factoryName == "NoSmoother") + return rcp(new SmootherFactory(Teuchos::null)); + if (factoryName == "NotayAggregationFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "NullspaceFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "NullspacePresmoothFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "PatternFactory") + return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "PgPFactory") + return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SaPFactory") + return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RAPFactory") + return BuildRAPFactory(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "RAPShiftFactory") + return BuildRAPFactory(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "RebalanceAcFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "RebalanceTransferFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "RegionRFactory") + return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RegionRFactory_kokkos") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "ReorderBlockAFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "RepartitionInterface") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "ScaledNullspaceFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "SegregatedAFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "SemiCoarsenPFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "StructuredAggregationFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "StructuredLineDetectionFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "SubBlockAFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "TentativePFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "ToggleCoordinatesTransferFactory") + return BuildToggleCoordinatesTransferFactory(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "TogglePFactory") + return BuildTogglePFactory(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "TransPFactory") + return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RfromP_Or_TransP") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "TrilinosSmoother") + return BuildTrilinosSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UncoupledAggregationFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "UnsmooshFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "UserAggregationFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "UserPFactory") + return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "VariableDofLaplacianFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "ZeroSubBlockAFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "CoalesceDropFactory_kokkos") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "GeometricInterpolationPFactory_kokkos") + return Build2( + paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "NullspaceFactory_kokkos") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "SaPFactory_kokkos") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "SemiCoarsenPFactory_kokkos") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "StructuredAggregationFactory_kokkos") + return Build2( + paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TentativePFactory_kokkos") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "MatrixFreeTentativePFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "UncoupledAggregationFactory_kokkos") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + + // Handle removed Kokkos factories + if (factoryName == "CoarseMapFactory_kokkos") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "CoordinatesTransferFactory_kokkos") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + + if (factoryName == "ZoltanInterface") { #if defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, + factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a ZoltanInterface object: Zoltan is disabled: HAVE_MUELU_ZOLTAN && HAVE_MPI == false."); + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::FactoryFactory:BuildFactory(): Cannot create a " + "ZoltanInterface object: Zoltan is disabled: HAVE_MUELU_ZOLTAN && " + "HAVE_MPI == false."); #endif // HAVE_MUELU_ZOLTAN && HAVE_MPI - } - if (factoryName == "Zoltan2Interface") { + } + if (factoryName == "Zoltan2Interface") { #if defined(HAVE_MUELU_ZOLTAN2) && defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, + factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a Zoltan2Interface object: Zoltan2 is disabled: HAVE_MUELU_ZOLTAN2 && HAVE_MPI == false."); + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::FactoryFactory:BuildFactory(): Cannot create a " + "Zoltan2Interface object: Zoltan2 is disabled: HAVE_MUELU_ZOLTAN2 && " + "HAVE_MPI == false."); #endif // HAVE_MUELU_ZOLTAN2 && HAVE_MPI - } - if (factoryName == "IsorropiaInterface") { + } + if (factoryName == "IsorropiaInterface") { #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, + factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a IsorropiaInterface object: Isorropia is disabled: HAVE_MUELU_ISORROPIA && HAVE_MPI == false."); + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::FactoryFactory:BuildFactory(): Cannot create a " + "IsorropiaInterface object: Isorropia is disabled: " + "HAVE_MUELU_ISORROPIA && HAVE_MPI == false."); #endif // HAVE_MUELU_ZOLTAN2 && HAVE_MPI - } + } - if (factoryName == "NodePartitionInterface") { + if (factoryName == "NodePartitionInterface") { #if defined(HAVE_MPI) - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, + factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a NodePartitionInterface object: HAVE_MPI == false."); + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::FactoryFactory:BuildFactory(): Cannot create a " + "NodePartitionInterface object: HAVE_MPI == false."); #endif // HAVE_MPI - } + } - if (factoryName == "RepartitionFactory") { + if (factoryName == "RepartitionFactory") { #ifdef HAVE_MPI - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, + factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a RepartitionFactory object: HAVE_MPI == false."); + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::FactoryFactory:BuildFactory(): Cannot create a " + "RepartitionFactory object: HAVE_MPI == false."); #endif // HAVE_MPI - } - if (factoryName == "RepartitionHeuristicFactory") { + } + if (factoryName == "RepartitionHeuristicFactory") { #ifdef HAVE_MPI - return Build2(paramList, factoryMapIn, factoryManagersIn); + return Build2(paramList, factoryMapIn, + factoryManagersIn); #else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory:BuildFactory(): Cannot create a RepartitionHeuristicFactory object: HAVE_MPI == false."); + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::FactoryFactory:BuildFactory(): Cannot create a " + "RepartitionHeuristicFactory object: HAVE_MPI == false."); #endif // HAVE_MPI - } - // Blocked factories - if (factoryName == "BlockedCoordinatesTransferFactory") return BuildBlockedCoordFactory (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedDirectSolver") return BuildBlockedDirectSolver(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedGaussSeidelSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedJacobiSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BlockedPFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "BraessSarazinSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "IndefiniteBlockDiagonalSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SimpleSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SchurComplementFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceBlockRestrictionFactory")return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceBlockAcFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "RebalanceBlockInterpolationFactory") return BuildBlockedFactory(paramList, factoryMapIn, factoryManagersIn); + } + // Blocked factories + if (factoryName == "BlockedCoordinatesTransferFactory") + return BuildBlockedCoordFactory( + paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedDirectSolver") + return BuildBlockedDirectSolver(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "BlockedGaussSeidelSmoother") + return BuildBlockedSmoother( + paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedJacobiSmoother") + return BuildBlockedSmoother( + paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "BlockedPFactory") + return BuildBlockedFactory(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "BraessSarazinSmoother") + return BuildBlockedSmoother( + paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "IndefiniteBlockDiagonalSmoother") + return BuildBlockedSmoother( + paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "SimpleSmoother") + return BuildBlockedSmoother(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "SchurComplementFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "RebalanceBlockRestrictionFactory") + return BuildBlockedFactory( + paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceBlockAcFactory") + return BuildBlockedFactory( + paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RebalanceBlockInterpolationFactory") + return BuildBlockedFactory( + paramList, factoryMapIn, factoryManagersIn); #ifdef HAVE_MPI - if (factoryName == "RepartitionBlockDiagonalFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "RepartitionBlockDiagonalFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); #endif #ifdef HAVE_MUELU_TEKO - if (factoryName == "TekoSmoother") return BuildTekoSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TekoSmoother") + return BuildTekoSmoother(paramList, factoryMapIn, factoryManagersIn); #endif - if (factoryName == "UzawaSmoother") return BuildBlockedSmoother(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "UzawaSmoother") + return BuildBlockedSmoother(paramList, factoryMapIn, + factoryManagersIn); // Matlab factories #ifdef HAVE_MUELU_MATLAB - if (factoryName == "TwoLevelMatlabFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "SingleLevelMatlabFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); - if (factoryName == "MatlabSmoother") return BuildMatlabSmoother (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TwoLevelMatlabFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "SingleLevelMatlabFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); + if (factoryName == "MatlabSmoother") + return BuildMatlabSmoother(paramList, factoryMapIn, factoryManagersIn); #endif #ifdef HAVE_MUELU_INTREPID2 - if (factoryName == "IntrepidPCoarsenFactory") return Build2 (paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "IntrepidPCoarsenFactory") + return Build2(paramList, factoryMapIn, + factoryManagersIn); #endif - // Use a user defined factories (in node) - if (factoryMapIn.find(factoryName) != factoryMapIn.end()) { - TEUCHOS_TEST_FOR_EXCEPTION((param.isList() && (++paramList.begin() != paramList.end())), Exceptions::RuntimeError, - "MueLu::FactoryFactory: Error during the parsing of: " << std::endl << paramList << std::endl - << "'" << factoryName << "' is not a factory name but an existing instance of a factory." << std::endl - << "Extra parameters cannot be specified after the creation of the object." << std::endl << std::endl - << "Correct syntaxes includes:" << std::endl - << " " << std::endl - << "or" << std::endl - << " " << std::endl - ); - - return factoryMapIn.find(factoryName)->second; - } + // Use a user defined factories (in node) + if (factoryMapIn.find(factoryName) != factoryMapIn.end()) { + TEUCHOS_TEST_FOR_EXCEPTION( + (param.isList() && (++paramList.begin() != paramList.end())), + Exceptions::RuntimeError, + "MueLu::FactoryFactory: Error during the parsing of: " + << std::endl + << paramList << std::endl + << "'" << factoryName + << "' is not a factory name but an existing instance of a " + "factory." + << std::endl + << "Extra parameters cannot be specified after the creation of " + "the object." + << std::endl + << std::endl + << "Correct syntaxes includes:" << std::endl + << " " << std::endl + << "or" << std::endl + << " " << std::endl); + + return factoryMapIn.find(factoryName)->second; + } - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::FactoryFactory: unknown factory name : " << factoryName); + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::FactoryFactory: unknown factory name : " << factoryName); + + TEUCHOS_UNREACHABLE_RETURN(Teuchos::null); + } + + // + // + // + + // FOLLOWING FUNCTIONS SHOULD LIVE WITH THE CORRESPONDING CLASS + + // + // + // + +#define arraysize(ar) (sizeof(ar) / sizeof(ar[0])) + + template // T must implement the Factory interface + RCP Build(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + RCP factory = rcp(new T()); + + const char *strarray[] = {"A", + "P", + "R", + "Graph", + "UnAmalgamationInfo", + "Aggregates", + "Nullspace", + "TransferFactory", + "DofsPerNode"}; + std::vector v(strarray, strarray + arraysize(strarray)); + for (size_t i = 0; i < v.size(); ++i) + if (paramList.isParameter(v[i])) + factory->SetFactory(v[i], + BuildFactory(paramList.getEntry(v[i]), factoryMapIn, + factoryManagersIn)); + + return factory; + } + + template // T must implement the Factory interface + RCP Build2(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + RCP factory = rcp(new T()); + + ParameterList paramListWithFactories; + + // Read the RCP parameters of the class T + RCP validParamList = + factory->GetValidParameterList(); // TODO check for Teuchos::null (no + // parameter list validation) + TEUCHOS_TEST_FOR_EXCEPTION(validParamList == Teuchos::null, + Exceptions::RuntimeError, + "FactoryFactory::Build2: default parameter list " + "is null. Please fix this."); + for (ParameterList::ConstIterator param = validParamList->begin(); + param != validParamList->end(); ++param) { + const std::string &pName = validParamList->name(param); + + if (!paramList.isParameter(pName)) { + // Ignore unknown parameters + continue; + } - TEUCHOS_UNREACHABLE_RETURN(Teuchos::null); + if (validParamList->isType>(pName)) { + // Generate or get factory described by param + RCP generatingFact = BuildFactory( + paramList.getEntry(pName), factoryMapIn, factoryManagersIn); + paramListWithFactories.set(pName, generatingFact); + } else if (validParamList->isType>(pName)) { + if (pName == "ParameterList") { + // NOTE: we cannot use + // subList = sublist(rcpFromRef(paramList), pName) + // here as that would result in sublist also being a reference to a + // temporary object. The resulting dereferencing in the corresponding + // factory would then segfault + RCP subList = + Teuchos::sublist(rcp(new ParameterList(paramList)), pName); + paramListWithFactories.set(pName, subList); + } + } else { + paramListWithFactories.setEntry(pName, paramList.getEntry(pName)); + } } - // - // - // + // Configure the factory + factory->SetParameterList(paramListWithFactories); - // FOLLOWING FUNCTIONS SHOULD LIVE WITH THE CORRESPONDING CLASS + return factory; + } - // - // - // + template // T must implement the Factory interface + RCP BuildRAPFactory(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + RCP factory; + if (paramList.isSublist("TransferFactories") == false) { + factory = Build2(paramList, factoryMapIn, factoryManagersIn); -#define arraysize(ar) (sizeof(ar) / sizeof(ar[0])) + } else { + RCP paramListNonConst = + rcp(new Teuchos::ParameterList(paramList)); + RCP transferFactories = + rcp(new Teuchos::ParameterList( + *sublist(paramListNonConst, "TransferFactories"))); - template // T must implement the Factory interface - RCP Build(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory = rcp(new T()); + paramListNonConst->remove("TransferFactories"); - const char* strarray[] = {"A", "P", "R", "Graph", "UnAmalgamationInfo", "Aggregates", "Nullspace", "TransferFactory", "DofsPerNode"}; - std::vector v(strarray, strarray + arraysize(strarray)); - for (size_t i = 0; i < v.size(); ++i) - if (paramList.isParameter(v[i])) - factory->SetFactory(v[i], BuildFactory(paramList.getEntry(v[i]), factoryMapIn, factoryManagersIn)); + factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - return factory; + for (Teuchos::ParameterList::ConstIterator param = + transferFactories->begin(); + param != transferFactories->end(); ++param) { + RCP p = BuildFactory( + transferFactories->entry(param), factoryMapIn, factoryManagersIn); + factory->AddTransferFactory(p); + } } - template // T must implement the Factory interface - RCP Build2(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory = rcp(new T()); + return factory; + } + + template // T must implement the Factory interface + RCP BuildTogglePFactory(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + RCP factory; + if (paramList.isSublist("TransferFactories") == false) { + // TODO put in an error message: the TogglePFactory needs a + // TransferFactories sublist! + factory = Build2(paramList, factoryMapIn, factoryManagersIn); + + } else { + RCP paramListNonConst = + rcp(new Teuchos::ParameterList(paramList)); + RCP transferFactories = + rcp(new Teuchos::ParameterList( + *sublist(paramListNonConst, "TransferFactories"))); - ParameterList paramListWithFactories; - - // Read the RCP parameters of the class T - RCP validParamList = factory->GetValidParameterList(); // TODO check for Teuchos::null (no parameter list validation) - TEUCHOS_TEST_FOR_EXCEPTION(validParamList == Teuchos::null, Exceptions::RuntimeError, "FactoryFactory::Build2: default parameter list is null. Please fix this."); - for (ParameterList::ConstIterator param = validParamList->begin(); param != validParamList->end(); ++param) { - const std::string& pName = validParamList->name(param); + paramListNonConst->remove("TransferFactories"); - if (!paramList.isParameter(pName)) { - // Ignore unknown parameters + // build TogglePFactory + factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); + + // count how many prolongation factories and how many coarse null space + // factories have been declared. the numbers must match! + int numProlongatorFactories = 0; + int numPtentFactories = 0; + int numCoarseNspFactories = 0; + for (Teuchos::ParameterList::ConstIterator param = + transferFactories->begin(); + param != transferFactories->end(); ++param) { + size_t foundNsp = transferFactories->name(param).find("Nullspace"); + if (foundNsp != std::string::npos && foundNsp == 0 && + transferFactories->name(param).length() == 10) { + numCoarseNspFactories++; continue; } - - if (validParamList->isType< RCP >(pName)) { - // Generate or get factory described by param - RCP generatingFact = BuildFactory(paramList.getEntry(pName), factoryMapIn, factoryManagersIn); - paramListWithFactories.set(pName, generatingFact); - } else if (validParamList->isType >(pName)) { - if (pName == "ParameterList") { - // NOTE: we cannot use - // subList = sublist(rcpFromRef(paramList), pName) - // here as that would result in sublist also being a reference to a temporary object. - // The resulting dereferencing in the corresponding factory would then segfault - RCP subList = Teuchos::sublist(rcp(new ParameterList(paramList)), pName); - paramListWithFactories.set(pName, subList); - } - } else { - paramListWithFactories.setEntry(pName, paramList.getEntry(pName)); + size_t foundPtent = transferFactories->name(param).find("Ptent"); + if (foundPtent != std::string::npos && foundPtent == 0 && + transferFactories->name(param).length() == 6) { + numPtentFactories++; + continue; } - } - - // Configure the factory - factory->SetParameterList(paramListWithFactories); - - return factory; - } - - template // T must implement the Factory interface - RCP BuildRAPFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory; - if (paramList.isSublist("TransferFactories") == false) { - factory = Build2(paramList, factoryMapIn, factoryManagersIn); - - } else { - RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); - RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); - - paramListNonConst->remove("TransferFactories"); - - factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - RCP p = BuildFactory(transferFactories->entry(param), factoryMapIn, factoryManagersIn); - factory->AddTransferFactory(p); + size_t foundP = transferFactories->name(param).find("P"); + if (foundP != std::string::npos && foundP == 0 && + transferFactories->name(param).length() == 2) { + numProlongatorFactories++; + continue; } } + TEUCHOS_TEST_FOR_EXCEPTION( + numProlongatorFactories != numCoarseNspFactories, + Exceptions::RuntimeError, + "FactoryFactory::BuildToggleP: The user has to provide the same " + "number of prolongator and coarse nullspace factories!"); + TEUCHOS_TEST_FOR_EXCEPTION( + numPtentFactories != numCoarseNspFactories, Exceptions::RuntimeError, + "FactoryFactory::BuildToggleP: The user has to provide the same " + "number of ptent and coarse nullspace factories!"); + TEUCHOS_TEST_FOR_EXCEPTION( + numProlongatorFactories < 2, Exceptions::RuntimeError, + "FactoryFactory::BuildToggleP: The TogglePFactory needs at least two " + "different prolongation operators. The factories have to be provided " + "using the names P%i and Nullspace %i, where %i denotes a number " + "between 1 and 9."); - return factory; - } - - template // T must implement the Factory interface - RCP BuildTogglePFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory; - if (paramList.isSublist("TransferFactories") == false) { - //TODO put in an error message: the TogglePFactory needs a TransferFactories sublist! - factory = Build2(paramList, factoryMapIn, factoryManagersIn); - - } else { - RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); - RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); - - paramListNonConst->remove("TransferFactories"); - - // build TogglePFactory - factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // count how many prolongation factories and how many coarse null space factories have been declared. - // the numbers must match! - int numProlongatorFactories = 0; - int numPtentFactories = 0; - int numCoarseNspFactories = 0; - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundNsp = transferFactories->name(param).find("Nullspace"); - if (foundNsp != std::string::npos && foundNsp == 0 && transferFactories->name(param).length()==10) { - numCoarseNspFactories++; - continue; - } - size_t foundPtent = transferFactories->name(param).find("Ptent"); - if (foundPtent != std::string::npos && foundPtent == 0 && transferFactories->name(param).length()==6) { - numPtentFactories++; - continue; - } - size_t foundP = transferFactories->name(param).find("P"); - if (foundP != std::string::npos && foundP == 0 && transferFactories->name(param).length()==2) { - numProlongatorFactories++; - continue; - } - } - TEUCHOS_TEST_FOR_EXCEPTION(numProlongatorFactories!=numCoarseNspFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The user has to provide the same number of prolongator and coarse nullspace factories!"); - TEUCHOS_TEST_FOR_EXCEPTION(numPtentFactories!=numCoarseNspFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The user has to provide the same number of ptent and coarse nullspace factories!"); - TEUCHOS_TEST_FOR_EXCEPTION(numProlongatorFactories < 2, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: The TogglePFactory needs at least two different prolongation operators. The factories have to be provided using the names P%i and Nullspace %i, where %i denotes a number between 1 and 9."); - - // create empty vectors with data - std::vector prolongatorFactoryNames(numProlongatorFactories); - std::vector coarseNspFactoryNames(numProlongatorFactories); - std::vector ptentFactoryNames(numProlongatorFactories); - - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundNsp = transferFactories->name(param).find("Nullspace"); - if (foundNsp != std::string::npos && foundNsp == 0 && transferFactories->name(param).length()==10) { - int number = atoi(&(transferFactories->name(param).at(9))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numProlongatorFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format Nullspace%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); - coarseNspFactoryNames[number-1] = transferFactories->entry(param); - continue; - } - size_t foundPtent = transferFactories->name(param).find("Ptent"); - if (foundPtent != std::string::npos && foundPtent == 0 && transferFactories->name(param).length()==6) { - int number = atoi(&(transferFactories->name(param).at(5))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numPtentFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format Ptent%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); - ptentFactoryNames[number-1] = transferFactories->entry(param); - continue; - } - size_t foundP = transferFactories->name(param).find("P"); - if (foundP != std::string::npos && foundP == 0 && transferFactories->name(param).length()==2) { - int number = atoi(&(transferFactories->name(param).at(1))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numProlongatorFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleP: Please use the format P%i with %i an integer between 1 and the maximum number of prolongation operators in TogglePFactory!"); - prolongatorFactoryNames[number-1] = transferFactories->entry(param); - continue; - } - } - - // register all prolongation factories in TogglePFactory - for (std::vector::const_iterator it = prolongatorFactoryNames.begin(); it != prolongatorFactoryNames.end(); ++it) { - RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddProlongatorFactory(p); + // create empty vectors with data + std::vector prolongatorFactoryNames( + numProlongatorFactories); + std::vector coarseNspFactoryNames( + numProlongatorFactories); + std::vector ptentFactoryNames( + numProlongatorFactories); + + for (Teuchos::ParameterList::ConstIterator param = + transferFactories->begin(); + param != transferFactories->end(); ++param) { + size_t foundNsp = transferFactories->name(param).find("Nullspace"); + if (foundNsp != std::string::npos && foundNsp == 0 && + transferFactories->name(param).length() == 10) { + int number = atoi(&(transferFactories->name(param).at(9))); + TEUCHOS_TEST_FOR_EXCEPTION( + number < 1 || number > numProlongatorFactories, + Exceptions::RuntimeError, + "FactoryFactory::BuildToggleP: Please use the format Nullspace%i " + "with %i an integer between 1 and the maximum number of " + "prolongation operators in TogglePFactory!"); + coarseNspFactoryNames[number - 1] = transferFactories->entry(param); + continue; } - - // register all tentative prolongation factories in TogglePFactory - for (std::vector::const_iterator it = ptentFactoryNames.begin(); it != ptentFactoryNames.end(); ++it) { - RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddPtentFactory(p); + size_t foundPtent = transferFactories->name(param).find("Ptent"); + if (foundPtent != std::string::npos && foundPtent == 0 && + transferFactories->name(param).length() == 6) { + int number = atoi(&(transferFactories->name(param).at(5))); + TEUCHOS_TEST_FOR_EXCEPTION( + number < 1 || number > numPtentFactories, + Exceptions::RuntimeError, + "FactoryFactory::BuildToggleP: Please use the format Ptent%i " + "with %i an integer between 1 and the maximum number of " + "prolongation operators in TogglePFactory!"); + ptentFactoryNames[number - 1] = transferFactories->entry(param); + continue; } - - // register all coarse nullspace factories in TogglePFactory - for (std::vector::const_iterator it = coarseNspFactoryNames.begin(); it != coarseNspFactoryNames.end(); ++it) { - RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddCoarseNullspaceFactory(p); + size_t foundP = transferFactories->name(param).find("P"); + if (foundP != std::string::npos && foundP == 0 && + transferFactories->name(param).length() == 2) { + int number = atoi(&(transferFactories->name(param).at(1))); + TEUCHOS_TEST_FOR_EXCEPTION( + number < 1 || number > numProlongatorFactories, + Exceptions::RuntimeError, + "FactoryFactory::BuildToggleP: Please use the format P%i with %i " + "an integer between 1 and the maximum number of prolongation " + "operators in TogglePFactory!"); + prolongatorFactoryNames[number - 1] = transferFactories->entry(param); + continue; } } - return factory; - } - - RCP BuildToggleCoordinatesTransferFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP factory; - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isSublist("TransferFactories") == false, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransferFactory: the ToggleCoordinatesTransferFactory needs a sublist 'TransferFactories' containing information about the subfactories for coordinate transfer!"); - - RCP paramListNonConst = rcp(new Teuchos::ParameterList(paramList)); - RCP transferFactories = rcp(new Teuchos::ParameterList(*sublist(paramListNonConst, "TransferFactories"))); - paramListNonConst->remove("TransferFactories"); - // build CoordinatesTransferFactory - factory = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // count how many coordinate transfer factories have been declared. - // the numbers must match! - int numCoordTransferFactories = 0; - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundCoordinates = transferFactories->name(param).find("Coordinates"); - if (foundCoordinates != std::string::npos && foundCoordinates == 0 && transferFactories->name(param).length()==12) { - numCoordTransferFactories++; - continue; - } + // register all prolongation factories in TogglePFactory + for (std::vector::const_iterator it = + prolongatorFactoryNames.begin(); + it != prolongatorFactoryNames.end(); ++it) { + RCP p = + BuildFactory(*it, factoryMapIn, factoryManagersIn); + factory->AddProlongatorFactory(p); } - TEUCHOS_TEST_FOR_EXCEPTION(numCoordTransferFactories != 2, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransfer: The ToggleCoordinatesTransferFactory needs two (different) coordinate transfer factories. The factories have to be provided using the names Coordinates%i, where %i denotes a number between 1 and 9."); - // create empty vectors with data - std::vector coarseCoordsFactoryNames(numCoordTransferFactories); - - for (Teuchos::ParameterList::ConstIterator param = transferFactories->begin(); param != transferFactories->end(); ++param) { - size_t foundCoords = transferFactories->name(param).find("Coordinates"); - if (foundCoords != std::string::npos && foundCoords == 0 && transferFactories->name(param).length()==12) { - int number = atoi(&(transferFactories->name(param).at(11))); - TEUCHOS_TEST_FOR_EXCEPTION(number < 1 || number > numCoordTransferFactories, Exceptions::RuntimeError, "FactoryFactory::BuildToggleCoordinatesTransfer: Please use the format Coordinates%i with %i an integer between 1 and the maximum number of coordinate transfer factories in ToggleCoordinatesTransferFactory!"); - coarseCoordsFactoryNames[number-1] = transferFactories->entry(param); - continue; - } + // register all tentative prolongation factories in TogglePFactory + for (std::vector::const_iterator it = + ptentFactoryNames.begin(); + it != ptentFactoryNames.end(); ++it) { + RCP p = + BuildFactory(*it, factoryMapIn, factoryManagersIn); + factory->AddPtentFactory(p); } // register all coarse nullspace factories in TogglePFactory - for (std::vector::const_iterator it = coarseCoordsFactoryNames.begin(); it != coarseCoordsFactoryNames.end(); ++it) { - RCP p = BuildFactory(*it, factoryMapIn, factoryManagersIn); - factory->AddCoordTransferFactory(p); + for (std::vector::const_iterator it = + coarseNspFactoryNames.begin(); + it != coarseNspFactoryNames.end(); ++it) { + RCP p = + BuildFactory(*it, factoryMapIn, factoryManagersIn); + factory->AddCoarseNullspaceFactory(p); } - - return factory; } - - //! TrilinosSmoother - // Parameter List Parsing: - // - // - // - // - // - // ... - // - // - RCP BuildTrilinosSmoother(const Teuchos::ParameterList & paramList, const FactoryMap & factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - if (paramList.begin() == paramList.end()) - return rcp(new SmootherFactory(rcp(new TrilinosSmoother()))); - - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "TrilinosSmoother", Exceptions::RuntimeError, ""); - - // Is it true? TEUCHOS_TEST_FOR_EXCEPTION(!paramList.isParameter("type"), Exceptions::RuntimeError, "TrilinosSmoother: parameter 'type' is mandatory"); - // type="" is default in TrilinosSmoother, but what happen then? - - std::string type=""; if(paramList.isParameter("type")) type = paramList.get("type"); - int overlap=0; if(paramList.isParameter("overlap")) overlap = paramList.get ("overlap"); - // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); - Teuchos::ParameterList params; if(paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); - - // parameters from SmootherFactory - //bool bKeepSmootherData = false; if(paramList.isParameter("keep smoother data")) bKeepSmootherData = paramList.get("keep smoother data"); - - // Read in factory information for smoothers (if available...) - // NOTE: only a selected number of factories can be used with the Trilinos smoother - // smoothers usually work with the global data available (which is A and the transfers P and R) - - Teuchos::RCP trilSmoo = Teuchos::rcp(new TrilinosSmoother(type, params, overlap)); - - if (paramList.isParameter("LineDetection_Layers")) { - RCP generatingFact = BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, factoryManagersIn); - trilSmoo->SetFactory("LineDetection_Layers", generatingFact); - } - if (paramList.isParameter("LineDetection_VertLineIds")) { - RCP generatingFact = BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, factoryManagersIn); - trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + return factory; + } + + RCP BuildToggleCoordinatesTransferFactory( + const Teuchos::ParameterList ¶mList, const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + RCP factory; + TEUCHOS_TEST_FOR_EXCEPTION( + paramList.isSublist("TransferFactories") == false, + Exceptions::RuntimeError, + "FactoryFactory::BuildToggleCoordinatesTransferFactory: the " + "ToggleCoordinatesTransferFactory needs a sublist 'TransferFactories' " + "containing information about the subfactories for coordinate " + "transfer!"); + + RCP paramListNonConst = + rcp(new Teuchos::ParameterList(paramList)); + RCP transferFactories = + rcp(new Teuchos::ParameterList( + *sublist(paramListNonConst, "TransferFactories"))); + paramListNonConst->remove("TransferFactories"); + + // build CoordinatesTransferFactory + factory = Build2( + *paramListNonConst, factoryMapIn, factoryManagersIn); + + // count how many coordinate transfer factories have been declared. + // the numbers must match! + int numCoordTransferFactories = 0; + for (Teuchos::ParameterList::ConstIterator param = + transferFactories->begin(); + param != transferFactories->end(); ++param) { + size_t foundCoordinates = + transferFactories->name(param).find("Coordinates"); + if (foundCoordinates != std::string::npos && foundCoordinates == 0 && + transferFactories->name(param).length() == 12) { + numCoordTransferFactories++; + continue; } - if (paramList.isParameter("CoarseNumZLayers")) { - RCP generatingFact = BuildFactory(paramList.getEntry("CoarseNumZLayers"), factoryMapIn, factoryManagersIn); - trilSmoo->SetFactory("CoarseNumZLayers", generatingFact); + } + TEUCHOS_TEST_FOR_EXCEPTION( + numCoordTransferFactories != 2, Exceptions::RuntimeError, + "FactoryFactory::BuildToggleCoordinatesTransfer: The " + "ToggleCoordinatesTransferFactory needs two (different) coordinate " + "transfer factories. The factories have to be provided using the names " + "Coordinates%i, where %i denotes a number between 1 and 9."); + + // create empty vectors with data + std::vector coarseCoordsFactoryNames( + numCoordTransferFactories); + + for (Teuchos::ParameterList::ConstIterator param = + transferFactories->begin(); + param != transferFactories->end(); ++param) { + size_t foundCoords = transferFactories->name(param).find("Coordinates"); + if (foundCoords != std::string::npos && foundCoords == 0 && + transferFactories->name(param).length() == 12) { + int number = atoi(&(transferFactories->name(param).at(11))); + TEUCHOS_TEST_FOR_EXCEPTION( + number < 1 || number > numCoordTransferFactories, + Exceptions::RuntimeError, + "FactoryFactory::BuildToggleCoordinatesTransfer: Please use the " + "format Coordinates%i with %i an integer between 1 and the maximum " + "number of coordinate transfer factories in " + "ToggleCoordinatesTransferFactory!"); + coarseCoordsFactoryNames[number - 1] = transferFactories->entry(param); + continue; } - - RCP smooFact = rcp(new SmootherFactory(Teuchos::null)); - Teuchos::ParameterList smooFactParams; - //smooFactParams.setEntry("keep smoother data", paramList.getEntry("keep smoother data")); - smooFact->SetParameterList(smooFactParams); - smooFact->SetSmootherPrototypes(trilSmoo); - return smooFact; } -#ifdef HAVE_MUELU_MATLAB - //! MatlabSmoother - // Parameter List Parsing: - // - // - // - // - // - // - // - // - // - RCP BuildMatlabSmoother(const Teuchos::ParameterList & paramList, const FactoryMap & factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - if (paramList.begin() == paramList.end()) - return rcp(new SmootherFactory(rcp(new MatlabSmoother()))); - - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "MatlabSmoother", Exceptions::RuntimeError, ""); - - // Read in factory information for smoothers (if available...) - // NOTE: only a selected number of factories can be used with the Trilinos smoother - // smoothers usually work with the global data available (which is A and the transfers P and R) - - Teuchos::RCP matSmoo = Teuchos::rcp(new MatlabSmoother(paramList)); - - return rcp(new SmootherFactory(matSmoo)); + // register all coarse nullspace factories in TogglePFactory + for (std::vector::const_iterator it = + coarseCoordsFactoryNames.begin(); + it != coarseCoordsFactoryNames.end(); ++it) { + RCP p = + BuildFactory(*it, factoryMapIn, factoryManagersIn); + factory->AddCoordTransferFactory(p); } -#endif - - RCP BuildDirectSolver(const Teuchos::ParameterList& paramList, const FactoryMap& /* factoryMapIn */, const FactoryManagerMap& /* factoryManagersIn */) const { - if (paramList.begin() == paramList.end()) - return rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null)); - - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "DirectSolver", Exceptions::RuntimeError, ""); - - std::string type; if(paramList.isParameter("type")) type = paramList.get("type"); - // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); - Teuchos::ParameterList params; if(paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); - return rcp(new SmootherFactory(rcp(new DirectSolver(type, params)), Teuchos::null)); + return factory; + } + + //! TrilinosSmoother + // Parameter List Parsing: + // + // + // + // + // + // ... + // + // + RCP + BuildTrilinosSmoother(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + if (paramList.begin() == paramList.end()) + return rcp(new SmootherFactory(rcp(new TrilinosSmoother()))); + + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != + "TrilinosSmoother", + Exceptions::RuntimeError, ""); + + // Is it true? TEUCHOS_TEST_FOR_EXCEPTION(!paramList.isParameter("type"), + // Exceptions::RuntimeError, "TrilinosSmoother: parameter 'type' is + // mandatory"); type="" is default in TrilinosSmoother, but what happen + // then? + + std::string type = ""; + if (paramList.isParameter("type")) + type = paramList.get("type"); + int overlap = 0; + if (paramList.isParameter("overlap")) + overlap = paramList.get("overlap"); + // std::string verbose; if(paramList.isParameter("verbose")) verbose + // = paramList.get("verbose"); + Teuchos::ParameterList params; + if (paramList.isParameter("ParameterList")) + params = paramList.get("ParameterList"); + + // parameters from SmootherFactory + // bool bKeepSmootherData = false; if(paramList.isParameter("keep smoother + // data")) bKeepSmootherData = paramList.get("keep smoother data"); + + // Read in factory information for smoothers (if available...) + // NOTE: only a selected number of factories can be used with the Trilinos + // smoother + // smoothers usually work with the global data available (which is A + // and the transfers P and R) + + Teuchos::RCP trilSmoo = + Teuchos::rcp(new TrilinosSmoother(type, params, overlap)); + + if (paramList.isParameter("LineDetection_Layers")) { + RCP generatingFact = + BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, + factoryManagersIn); + trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + } + if (paramList.isParameter("LineDetection_VertLineIds")) { + RCP generatingFact = + BuildFactory(paramList.getEntry("LineDetection_Layers"), factoryMapIn, + factoryManagersIn); + trilSmoo->SetFactory("LineDetection_Layers", generatingFact); + } + if (paramList.isParameter("CoarseNumZLayers")) { + RCP generatingFact = + BuildFactory(paramList.getEntry("CoarseNumZLayers"), factoryMapIn, + factoryManagersIn); + trilSmoo->SetFactory("CoarseNumZLayers", generatingFact); } - template // T must implement the Factory interface - RCP BuildBlockedSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); - - // internal vector of factory managers - std::vector > facManagers; - - // loop over all "block%i" sublists in parameter list - int blockid = 1; - bool blockExists = true; - while (blockExists == true) { - std::stringstream ss; - ss << "block" << blockid; - - if(paramList.isSublist(ss.str()) == true) { - // we either have a parameter group or we have a list of factories in here - RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - - RCP M = Teuchos::null; - - if (b->isParameter("group")) { - // use a factory manager - std::string facManagerName = b->get< std::string >("group"); - TEUCHOS_TEST_FOR_EXCEPTION(factoryManagersIn.count(facManagerName) != 1, Exceptions::RuntimeError, "Factory manager has not been found. Please check the spelling of the factory managers in your xml file."); - RCP Mb = factoryManagersIn.find(facManagerName)->second; - M = Teuchos::rcp_dynamic_cast(Mb); - TEUCHOS_TEST_FOR_EXCEPTION(M==Teuchos::null, Exceptions::RuntimeError, "Failed to cast FactoryManagerBase object to FactoryManager."); - } else { - // read in the list of factories - M = rcp(new FactoryManager()); - for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { - RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); - M->SetFactory(b->name(param),p); - } - } + RCP smooFact = rcp(new SmootherFactory(Teuchos::null)); + Teuchos::ParameterList smooFactParams; + // smooFactParams.setEntry("keep smoother data", paramList.getEntry("keep + // smoother data")); + smooFact->SetParameterList(smooFactParams); + smooFact->SetSmootherPrototypes(trilSmoo); + return smooFact; + } + +#ifdef HAVE_MUELU_MATLAB + //! MatlabSmoother + // Parameter List Parsing: + // + // + // + // + // + // + RCP + BuildMatlabSmoother(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + if (paramList.begin() == paramList.end()) + return rcp(new SmootherFactory(rcp(new MatlabSmoother()))); + + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != + "MatlabSmoother", + Exceptions::RuntimeError, ""); + + // Read in factory information for smoothers (if available...) + // NOTE: only a selected number of factories can be used with the Trilinos + // smoother + // smoothers usually work with the global data available (which is A + // and the transfers P and R) + + Teuchos::RCP matSmoo = + Teuchos::rcp(new MatlabSmoother(paramList)); + + return rcp(new SmootherFactory(matSmoo)); + } +#endif - // add factory manager to internal vector of factory managers - M->SetIgnoreUserData(true); - facManagers.push_back(M); - paramListNonConst->remove(ss.str()); - blockid++; + RCP + BuildDirectSolver(const Teuchos::ParameterList ¶mList, + const FactoryMap & /* factoryMapIn */, + const FactoryManagerMap & /* factoryManagersIn */) const { + if (paramList.begin() == paramList.end()) + return rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null)); + + TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != + "DirectSolver", + Exceptions::RuntimeError, ""); + + std::string type; + if (paramList.isParameter("type")) + type = paramList.get("type"); + // std::string verbose; if(paramList.isParameter("verbose")) verbose + // = paramList.get("verbose"); + Teuchos::ParameterList params; + if (paramList.isParameter("ParameterList")) + params = paramList.get("ParameterList"); + + return rcp(new SmootherFactory(rcp(new DirectSolver(type, params)), + Teuchos::null)); + } + + template // T must implement the Factory interface + RCP + BuildBlockedSmoother(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); + + // internal vector of factory managers + std::vector> facManagers; + + // loop over all "block%i" sublists in parameter list + int blockid = 1; + bool blockExists = true; + while (blockExists == true) { + std::stringstream ss; + ss << "block" << blockid; + + if (paramList.isSublist(ss.str()) == true) { + // we either have a parameter group or we have a list of factories in + // here + RCP b = + rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); + + RCP M = Teuchos::null; + + if (b->isParameter("group")) { + // use a factory manager + std::string facManagerName = b->get("group"); + TEUCHOS_TEST_FOR_EXCEPTION( + factoryManagersIn.count(facManagerName) != 1, + Exceptions::RuntimeError, + "Factory manager has not been found. Please check the spelling " + "of the factory managers in your xml file."); + RCP Mb = + factoryManagersIn.find(facManagerName)->second; + M = Teuchos::rcp_dynamic_cast(Mb); + TEUCHOS_TEST_FOR_EXCEPTION( + M == Teuchos::null, Exceptions::RuntimeError, + "Failed to cast FactoryManagerBase object to FactoryManager."); } else { - blockExists = false; - break; + // read in the list of factories + M = rcp(new FactoryManager()); + for (ParameterList::ConstIterator param = b->begin(); + param != b->end(); ++param) { + RCP p = + BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); + M->SetFactory(b->name(param), p); + } } + // add factory manager to internal vector of factory managers + M->SetIgnoreUserData(true); + facManagers.push_back(M); + paramListNonConst->remove(ss.str()); + blockid++; + } else { + blockExists = false; + break; } - - // create a new blocked smoother - RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // important: set block factory for A here! - // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator here. - // The user might want to overwrite this in the xml file, so just - // use what is declared as "A" - //bs->SetFactory("A", MueLu::NoFactory::getRCP()); - - for (int i = 0; i(facManagers.size()); i++) { - bs->AddFactoryManager(facManagers[i],i); - } - - return rcp(new SmootherFactory(bs)); } -#ifdef HAVE_MUELU_TEKO - RCP BuildTekoSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); - RCP tekoParams = rcp(new ParameterList(paramListNonConst->sublist("Inverse Factory Library"))); - paramListNonConst->remove("Inverse Factory Library"); - - // create a new blocked smoother - RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // important: set block factory for A here! - // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator here. - // The user might want to overwrite this in the xml file, so just - // use what is declared as "A" - //bs->SetFactory("A", MueLu::NoFactory::getRCP()); + // create a new blocked smoother + RCP bs = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - // Set Teko parameters ("Inverse Factory Library") - bs->SetTekoParameters(tekoParams); + // important: set block factory for A here! + // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator + // here. + // The user might want to overwrite this in the xml file, so + // just use what is declared as "A" + // bs->SetFactory("A", MueLu::NoFactory::getRCP()); - return rcp(new SmootherFactory(bs)); + for (int i = 0; i < Teuchos::as(facManagers.size()); i++) { + bs->AddFactoryManager(facManagers[i], i); } -#endif - - RCP BuildBlockedDirectSolver(const Teuchos::ParameterList& paramList, const FactoryMap& /* factoryMapIn */, const FactoryManagerMap& /* factoryManagersIn */) const { - if (paramList.numParams() == 0) - return rcp(new SmootherFactory(rcp(new BlockedDirectSolver()))); - - TEUCHOS_TEST_FOR_EXCEPTION(paramList.get("factory") != "BlockedDirectSolver", Exceptions::RuntimeError, "FactoryFactory::BuildBlockedDirectSolver: Generating factory needs to be a BlockedDirectSolver."); - std::string type; if(paramList.isParameter("type")) type = paramList.get("type"); - // std::string verbose; if(paramList.isParameter("verbose")) verbose = paramList.get("verbose"); - Teuchos::ParameterList params; if(paramList.isParameter("ParameterList")) params = paramList.get("ParameterList"); - - return rcp(new SmootherFactory(rcp(new BlockedDirectSolver(type, params)))); - } + return rcp(new SmootherFactory(bs)); + } - //RCP BuildBlockedPFactory(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - // RCP pfac = rcp(new BlockedPFactory()); - - template // T must implement the Factory interface - RCP BuildBlockedFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP pfac = Teuchos::null; - - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); - - // internal vector of factory managers - std::vector > facManagers; - - // loop over all "block%i" sublists in parameter list - int blockid = 1; - bool blockExists = true; - while (blockExists == true) { - std::stringstream ss; - ss << "block" << blockid; - - if(paramList.isSublist(ss.str()) == true) { - // we either have a parameter group or we have a list of factories in here - RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - - RCP M = Teuchos::null; - - if (b->isParameter("group")) { - // use a factory manager - std::string facManagerName = b->get< std::string >("group"); - TEUCHOS_TEST_FOR_EXCEPTION(factoryManagersIn.count(facManagerName) != 1, Exceptions::RuntimeError, "Factory manager has not been found. Please check the spelling of the factory managers in your xml file."); - RCP Mb = factoryManagersIn.find(facManagerName)->second; - M = Teuchos::rcp_dynamic_cast(Mb); - TEUCHOS_TEST_FOR_EXCEPTION(M==Teuchos::null, Exceptions::RuntimeError, "Failed to cast FactoryManagerBase object to FactoryManager."); - } else { - // read in the list of factories - M = rcp(new FactoryManager()); - for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { - RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); - M->SetFactory(b->name(param),p); - } - } +#ifdef HAVE_MUELU_TEKO + RCP + BuildTekoSmoother(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); + RCP tekoParams = rcp(new ParameterList( + paramListNonConst->sublist("Inverse Factory Library"))); + paramListNonConst->remove("Inverse Factory Library"); + + // create a new blocked smoother + RCP bs = Build2( + *paramListNonConst, factoryMapIn, factoryManagersIn); + + // important: set block factory for A here! + // TAW: 7/6/2016: We should not need to set/hardcode the blocked operator + // here. + // The user might want to overwrite this in the xml file, so + // just use what is declared as "A" + // bs->SetFactory("A", MueLu::NoFactory::getRCP()); + + // Set Teko parameters ("Inverse Factory Library") + bs->SetTekoParameters(tekoParams); + + return rcp(new SmootherFactory(bs)); + } +#endif - // add factory manager to internal vector of factory managers - M->SetIgnoreUserData(true); - facManagers.push_back(M); - paramListNonConst->remove(ss.str()); - blockid++; + RCP BuildBlockedDirectSolver( + const Teuchos::ParameterList ¶mList, + const FactoryMap & /* factoryMapIn */, + const FactoryManagerMap & /* factoryManagersIn */) const { + if (paramList.numParams() == 0) + return rcp(new SmootherFactory(rcp(new BlockedDirectSolver()))); + + TEUCHOS_TEST_FOR_EXCEPTION( + paramList.get("factory") != "BlockedDirectSolver", + Exceptions::RuntimeError, + "FactoryFactory::BuildBlockedDirectSolver: Generating factory needs to " + "be a BlockedDirectSolver."); + + std::string type; + if (paramList.isParameter("type")) + type = paramList.get("type"); + // std::string verbose; if(paramList.isParameter("verbose")) verbose + // = paramList.get("verbose"); + Teuchos::ParameterList params; + if (paramList.isParameter("ParameterList")) + params = paramList.get("ParameterList"); + + return rcp(new SmootherFactory(rcp(new BlockedDirectSolver(type, params)))); + } + + // RCP BuildBlockedPFactory(const Teuchos::ParameterList& + // paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& + // factoryManagersIn) const { + // RCP pfac = rcp(new BlockedPFactory()); + + template // T must implement the Factory interface + RCP BuildBlockedFactory(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + RCP pfac = Teuchos::null; + + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); + + // internal vector of factory managers + std::vector> facManagers; + + // loop over all "block%i" sublists in parameter list + int blockid = 1; + bool blockExists = true; + while (blockExists == true) { + std::stringstream ss; + ss << "block" << blockid; + + if (paramList.isSublist(ss.str()) == true) { + // we either have a parameter group or we have a list of factories in + // here + RCP b = + rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); + + RCP M = Teuchos::null; + + if (b->isParameter("group")) { + // use a factory manager + std::string facManagerName = b->get("group"); + TEUCHOS_TEST_FOR_EXCEPTION( + factoryManagersIn.count(facManagerName) != 1, + Exceptions::RuntimeError, + "Factory manager has not been found. Please check the spelling " + "of the factory managers in your xml file."); + RCP Mb = + factoryManagersIn.find(facManagerName)->second; + M = Teuchos::rcp_dynamic_cast(Mb); + TEUCHOS_TEST_FOR_EXCEPTION( + M == Teuchos::null, Exceptions::RuntimeError, + "Failed to cast FactoryManagerBase object to FactoryManager."); } else { - blockExists = false; - break; + // read in the list of factories + M = rcp(new FactoryManager()); + for (ParameterList::ConstIterator param = b->begin(); + param != b->end(); ++param) { + RCP p = + BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); + M->SetFactory(b->name(param), p); + } } + // add factory manager to internal vector of factory managers + M->SetIgnoreUserData(true); + facManagers.push_back(M); + paramListNonConst->remove(ss.str()); + blockid++; + } else { + blockExists = false; + break; } - - // build BlockedPFactory (without sub block information) - pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - - // add FactoryManager objects - for(size_t i = 0; iAddFactoryManager(facManagers[i]); // add factory manager - } - - return pfac; } + // build BlockedPFactory (without sub block information) + pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - template // T must implement the Factory interface - RCP BuildBlockedCoordFactory(const Teuchos::ParameterList & paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { - RCP pfac = Teuchos::null; - - // read in sub lists - RCP paramListNonConst = rcp(new ParameterList(paramList)); - - // internal vector of factory managers - std::vector > facBase; - - // loop over all "block%i" sublists in parameter list - int blockid = 1; - bool blockExists = true; - while (blockExists == true) { - std::stringstream ss; - ss << "block" << blockid; - - if(paramList.isSublist(ss.str()) == true) { - // we either have a parameter group or we have a list of factories in here - RCP b = rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); - - // read in the list of factories - for (ParameterList::ConstIterator param = b->begin(); param != b->end(); ++param) { - RCP p = BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); - facBase.push_back(p); - } + // add FactoryManager objects + for (size_t i = 0; i < facManagers.size(); i++) { + pfac->AddFactoryManager(facManagers[i]); // add factory manager + } - // add factory manager to internal vector of factory managers - paramListNonConst->remove(ss.str()); - blockid++; - } else { - blockExists = false; - break; + return pfac; + } + + template // T must implement the Factory interface + RCP + BuildBlockedCoordFactory(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + const FactoryManagerMap &factoryManagersIn) const { + RCP pfac = Teuchos::null; + + // read in sub lists + RCP paramListNonConst = rcp(new ParameterList(paramList)); + + // internal vector of factory managers + std::vector> facBase; + + // loop over all "block%i" sublists in parameter list + int blockid = 1; + bool blockExists = true; + while (blockExists == true) { + std::stringstream ss; + ss << "block" << blockid; + + if (paramList.isSublist(ss.str()) == true) { + // we either have a parameter group or we have a list of factories in + // here + RCP b = + rcp(new ParameterList(*sublist(paramListNonConst, ss.str()))); + + // read in the list of factories + for (ParameterList::ConstIterator param = b->begin(); param != b->end(); + ++param) { + RCP p = + BuildFactory(b->entry(param), factoryMapIn, factoryManagersIn); + facBase.push_back(p); } + // add factory manager to internal vector of factory managers + paramListNonConst->remove(ss.str()); + blockid++; + } else { + blockExists = false; + break; } + } - // build BlockedPFactory (without sub block information) - pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); + // build BlockedPFactory (without sub block information) + pfac = Build2(*paramListNonConst, factoryMapIn, factoryManagersIn); - // add FactoryManager objects - for(size_t i = 0; iAddFactory(facBase[i]); // add factory manager - } - - return pfac; + // add FactoryManager objects + for (size_t i = 0; i < facBase.size(); i++) { + pfac->AddFactory(facBase[i]); // add factory manager } - }; // class + return pfac; + } + +}; // class } // namespace MueLu #define MUELU_FACTORYFACTORY_SHORT #endif // MUELU_FACTORYFACTORY_DECL_HPP - // TODO: handle factory parameters - // TODO: parameter validator - // TODO: static - // TODO: default parameters should not be duplicated here and on the Factory (ex: default for overlap (=0) is defined both here and on TrilinosSmoother constructors) +// TODO: handle factory parameters +// TODO: parameter validator +// TODO: static +// TODO: default parameters should not be duplicated here and on the Factory +// (ex: default for overlap (=0) is defined both here and on TrilinosSmoother +// constructors) diff --git a/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp b/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp index 03e2596daf12..37406522ae3d 100644 --- a/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp +++ b/packages/muelu/src/Interface/MueLu_HierarchyFactory.hpp @@ -48,49 +48,50 @@ #include "Teuchos_RCP.hpp" -#include "MueLu_ConfigDefs.hpp" #include "MueLu_BaseClass.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_Hierarchy_fwd.hpp" namespace MueLu { - //! - template - class HierarchyFactory : public BaseClass { +//! +template +class HierarchyFactory : public BaseClass { #undef MUELU_HIERARCHYFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //@{ Constructors/Destructors. +public: + //@{ Constructors/Destructors. - //! Destructor. - virtual ~HierarchyFactory() { } + //! Destructor. + virtual ~HierarchyFactory() {} - //@} + //@} - //! Create an empty Hierarchy object - // Note: This function is not very useful at the moment as MueLu only have on Hierarchy class. - // In the future, we might have an abstract Hierarchy class and several derived Hierarchy classes. - // Using this function will then be the recommended way to generate a Hierarchy. - // - // This method is called Create() instead of Build(), because it return an non-initialized - // object (ie: MG setup is not done). - // Build() function in MueLu returns initialized objects. - virtual RCP CreateHierarchy() const = 0; + //! Create an empty Hierarchy object + // Note: This function is not very useful at the moment as MueLu only have on + // Hierarchy class. + // In the future, we might have an abstract Hierarchy class and several + // derived Hierarchy classes. Using this function will then be the + // recommended way to generate a Hierarchy. + // + // This method is called Create() instead of Build(), because it return an + // non-initialized object (ie: MG setup is not done). Build() function in + // MueLu returns initialized objects. + virtual RCP CreateHierarchy() const = 0; - //! Create a labeled empty Hierarchy object - virtual RCP CreateHierarchy(const std::string& label) const = 0; + //! Create a labeled empty Hierarchy object + virtual RCP CreateHierarchy(const std::string &label) const = 0; - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy & H) const = 0; + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy &H) const = 0; - }; // class HierarchyFactoryBase +}; // class HierarchyFactoryBase } // namespace MueLu #define MUELU_HIERARCHYFACTORY_SHORT -#endif //ifndef MUELU_HIERARCHYFACTORY_HPP +#endif // ifndef MUELU_HIERARCHYFACTORY_HPP diff --git a/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp b/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp index 04f89f47e6ae..b6c4992f4e65 100644 --- a/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp +++ b/packages/muelu/src/Interface/MueLu_HierarchyManager.hpp @@ -46,18 +46,18 @@ #ifndef MUELU_HIERARCHYMANAGER_DECL_HPP #define MUELU_HIERARCHYMANAGER_DECL_HPP -#include #include +#include #include -#include #include +#include #include "MueLu_ConfigDefs.hpp" -#include "MueLu_Exceptions.hpp" #include "MueLu_Aggregates.hpp" +#include "MueLu_Exceptions.hpp" #include "MueLu_Hierarchy.hpp" #include "MueLu_HierarchyFactory.hpp" #include "MueLu_Level.hpp" @@ -70,471 +70,513 @@ namespace MueLu { - // This class stores the configuration of a Hierarchy. - // The class also provides an algorithm to build a Hierarchy from the configuration. - // - // See also: FactoryManager - // - template - class HierarchyManager : public HierarchyFactory { +// This class stores the configuration of a Hierarchy. +// The class also provides an algorithm to build a Hierarchy from the +// configuration. +// +// See also: FactoryManager +// +template +class HierarchyManager + : public HierarchyFactory { #undef MUELU_HIERARCHYMANAGER_SHORT #include "MueLu_UseShortNames.hpp" - typedef std::pair keep_pair; + typedef std::pair keep_pair; - public: - - //! Constructor - HierarchyManager(int numDesiredLevel = MasterList::getDefault("max levels")) : - numDesiredLevel_(numDesiredLevel), +public: + //! Constructor + HierarchyManager( + int numDesiredLevel = MasterList::getDefault("max levels")) + : numDesiredLevel_(numDesiredLevel), maxCoarseSize_(MasterList::getDefault("coarse: max size")), - verbosity_(Medium), - doPRrebalance_(MasterList::getDefault("repartition: rebalance P and R")), - doPRViaCopyrebalance_(MasterList::getDefault("repartition: explicit via new copy rebalance P and R")), - implicitTranspose_(MasterList::getDefault("transpose: use implicit")), - fuseProlongationAndUpdate_(MasterList::getDefault("fuse prolongation and update")), - suppressNullspaceDimensionCheck_(MasterList::getDefault("nullspace: suppress dimension check")), + verbosity_(Medium), doPRrebalance_(MasterList::getDefault( + "repartition: rebalance P and R")), + doPRViaCopyrebalance_(MasterList::getDefault( + "repartition: explicit via new copy rebalance P and R")), + implicitTranspose_( + MasterList::getDefault("transpose: use implicit")), + fuseProlongationAndUpdate_( + MasterList::getDefault("fuse prolongation and update")), + suppressNullspaceDimensionCheck_(MasterList::getDefault( + "nullspace: suppress dimension check")), sizeOfMultiVectors_(MasterList::getDefault("number of vectors")), - graphOutputLevel_(-2) { } - - //! Destructor - virtual ~HierarchyManager() = default; - - //! - void AddFactoryManager(int startLevel, int numDesiredLevel, RCP manager) { - const int lastLevel = startLevel + numDesiredLevel - 1; - if (levelManagers_.size() < lastLevel + 1) - levelManagers_.resize(lastLevel + 1); - - for (int iLevel = startLevel; iLevel <= lastLevel; iLevel++) - levelManagers_[iLevel] = manager; - } - - //! - RCP GetFactoryManager(int levelID) const { - // NOTE: last levelManager is used for all the remaining levels - return (levelID >= levelManagers_.size() ? levelManagers_[levelManagers_.size()-1] : levelManagers_[levelID]); - } - - //! returns number of factory managers stored in levelManagers_ vector. - size_t getNumFactoryManagers() const { - return levelManagers_.size(); - } - - //! - void CheckConfig() { - for (int i = 0; i < levelManagers_.size(); i++) - TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_[i] == Teuchos::null, Exceptions::RuntimeError, "MueLu:HierarchyConfig::CheckConfig(): Undefined configuration for level:"); - } - - //@{ - - virtual RCP CreateHierarchy() const { - return rcp(new Hierarchy()); - } - - virtual RCP CreateHierarchy(const std::string& label) const { - return rcp(new Hierarchy(label)); - } - - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy& H) const { - TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), Exceptions::RuntimeError, "No fine level operator"); - - RCP l0 = H.GetLevel(0); - RCP Op = l0->Get>("A"); - - // Compare nullspace dimension to NumPDEs and throw/warn based on user input - if (l0->IsAvailable("Nullspace")) { - RCP A = Teuchos::rcp_dynamic_cast(Op); - if (A != Teuchos::null) { - RCP nullspace = l0->Get>("Nullspace"); - - if (static_cast(A->GetFixedBlockSize()) > nullspace->getNumVectors()) - { - std::stringstream msg; - msg << "User-provided nullspace has fewer vectors (" - << nullspace->getNumVectors() << ") than number of PDE equations (" - << A->GetFixedBlockSize() << "). "; - - if (suppressNullspaceDimensionCheck_) - { - msg << "It depends on the PDE, if this is a problem or not."; - this->GetOStream(Warnings0) << msg.str() << std::endl; - } - else - { - msg << "Add the missing nullspace vectors! (You can suppress this check. See the MueLu user guide for details.)"; - TEUCHOS_TEST_FOR_EXCEPTION(static_cast(A->GetFixedBlockSize()) > nullspace->getNumVectors(), Exceptions::RuntimeError, msg.str()); - } + graphOutputLevel_(-2) {} + + //! Destructor + virtual ~HierarchyManager() = default; + + //! + void AddFactoryManager(int startLevel, int numDesiredLevel, + RCP manager) { + const int lastLevel = startLevel + numDesiredLevel - 1; + if (levelManagers_.size() < lastLevel + 1) + levelManagers_.resize(lastLevel + 1); + + for (int iLevel = startLevel; iLevel <= lastLevel; iLevel++) + levelManagers_[iLevel] = manager; + } + + //! + RCP GetFactoryManager(int levelID) const { + // NOTE: last levelManager is used for all the remaining levels + return (levelID >= levelManagers_.size() + ? levelManagers_[levelManagers_.size() - 1] + : levelManagers_[levelID]); + } + + //! returns number of factory managers stored in levelManagers_ vector. + size_t getNumFactoryManagers() const { return levelManagers_.size(); } + + //! + void CheckConfig() { + for (int i = 0; i < levelManagers_.size(); i++) + TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_[i] == Teuchos::null, + Exceptions::RuntimeError, + "MueLu:HierarchyConfig::CheckConfig(): " + "Undefined configuration for level:"); + } + + //@{ + + virtual RCP CreateHierarchy() const { + return rcp(new Hierarchy()); + } + + virtual RCP CreateHierarchy(const std::string &label) const { + return rcp(new Hierarchy(label)); + } + + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy &H) const { + TEUCHOS_TEST_FOR_EXCEPTION(!H.GetLevel(0)->IsAvailable("A"), + Exceptions::RuntimeError, + "No fine level operator"); + + RCP l0 = H.GetLevel(0); + RCP Op = l0->Get>("A"); + + // Compare nullspace dimension to NumPDEs and throw/warn based on user input + if (l0->IsAvailable("Nullspace")) { + RCP A = Teuchos::rcp_dynamic_cast(Op); + if (A != Teuchos::null) { + RCP nullspace = l0->Get>("Nullspace"); + + if (static_cast(A->GetFixedBlockSize()) > + nullspace->getNumVectors()) { + std::stringstream msg; + msg << "User-provided nullspace has fewer vectors (" + << nullspace->getNumVectors() + << ") than number of PDE equations (" << A->GetFixedBlockSize() + << "). "; + + if (suppressNullspaceDimensionCheck_) { + msg << "It depends on the PDE, if this is a problem or not."; + this->GetOStream(Warnings0) << msg.str() << std::endl; + } else { + msg << "Add the missing nullspace vectors! (You can suppress this " + "check. See the MueLu user guide for details.)"; + TEUCHOS_TEST_FOR_EXCEPTION( + static_cast(A->GetFixedBlockSize()) > + nullspace->getNumVectors(), + Exceptions::RuntimeError, msg.str()); } - } else { - this->GetOStream(Warnings0) << "Skipping dimension check of user-supplied nullspace because user-supplied operator is not a matrix" << std::endl; } + } else { + this->GetOStream(Warnings0) + << "Skipping dimension check of user-supplied nullspace because " + "user-supplied operator is not a matrix" + << std::endl; } + } #ifdef HAVE_MUELU_DEBUG - // Reset factories' data used for debugging - for (int i = 0; i < levelManagers_.size(); i++) - levelManagers_[i]->ResetDebugData(); + // Reset factories' data used for debugging + for (int i = 0; i < levelManagers_.size(); i++) + levelManagers_[i]->ResetDebugData(); #endif - // Setup Matrix - // TODO: I should certainly undo this somewhere... + // Setup Matrix + // TODO: I should certainly undo this somewhere... - Xpetra::UnderlyingLib lib = Op->getDomainMap()->lib(); - H.setlib(lib); + Xpetra::UnderlyingLib lib = Op->getDomainMap()->lib(); + H.setlib(lib); - SetupOperator(*Op); - SetupExtra(H); + SetupOperator(*Op); + SetupExtra(H); - // Setup Hierarchy - H.SetMaxCoarseSize(maxCoarseSize_); - VerboseObject::SetDefaultVerbLevel(verbosity_); - if (graphOutputLevel_ >= 0 || graphOutputLevel_ == -1) - H.EnableGraphDumping("dep_graph", graphOutputLevel_); + // Setup Hierarchy + H.SetMaxCoarseSize(maxCoarseSize_); + VerboseObject::SetDefaultVerbLevel(verbosity_); + if (graphOutputLevel_ >= 0 || graphOutputLevel_ == -1) + H.EnableGraphDumping("dep_graph", graphOutputLevel_); - if (VerboseObject::IsPrint(Statistics2)) { - RCP Amat = rcp_dynamic_cast(Op); + if (VerboseObject::IsPrint(Statistics2)) { + RCP Amat = rcp_dynamic_cast(Op); - if (!Amat.is_null()) { - RCP params = rcp(new ParameterList()); - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); + if (!Amat.is_null()) { + RCP params = rcp(new ParameterList()); + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); - VerboseObject::GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); - } else { - VerboseObject::GetOStream(Warnings1) << "Fine level operator is not a matrix, statistics are not available" << std::endl; - } + VerboseObject::GetOStream(Statistics2) + << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); + } else { + VerboseObject::GetOStream(Warnings1) + << "Fine level operator is not a matrix, statistics are not " + "available" + << std::endl; } + } - H.SetPRrebalance(doPRrebalance_); - H.SetPRViaCopyrebalance(doPRViaCopyrebalance_); - H.SetImplicitTranspose(implicitTranspose_); - H.SetFuseProlongationAndUpdate(fuseProlongationAndUpdate_); - - H.Clear(); - - // There are few issues with using Keep in the interpreter: - // 1. Hierarchy::Keep interface takes a name and a factory. If - // factories are different on different levels, the AddNewLevel() call - // in Hierarchy does not work properly, as it assume that factories are - // the same. - // 2. FactoryManager does not have a Keep option, only Hierarchy and - // Level have it - // 3. Interpreter constructs factory managers, but not levels. So we - // cannot set up Keep flags there. - // - // The solution implemented here does the following: - // 1. Construct hierarchy with dummy levels. This avoids - // Hierarchy::AddNewLevel() calls which will propagate wrong - // inheritance. - // 2. Interpreter constructs keep_ array with names and factories for - // that level - // 3. For each level, we call Keep(name, factory) for each keep_ - for (int i = 0; i < numDesiredLevel_; i++) { - std::map >::const_iterator it = keep_.find(i); - if (it != keep_.end()) { - RCP l = H.GetLevel(i); - const std::vector& keeps = it->second; - for (size_t j = 0; j < keeps.size(); j++) - l->Keep(keeps[j].first, keeps[j].second); - } - if (i < numDesiredLevel_-1) { - RCP newLevel = rcp(new Level()); - H.AddLevel(newLevel); - } + H.SetPRrebalance(doPRrebalance_); + H.SetPRViaCopyrebalance(doPRViaCopyrebalance_); + H.SetImplicitTranspose(implicitTranspose_); + H.SetFuseProlongationAndUpdate(fuseProlongationAndUpdate_); + + H.Clear(); + + // There are few issues with using Keep in the interpreter: + // 1. Hierarchy::Keep interface takes a name and a factory. If + // factories are different on different levels, the AddNewLevel() call + // in Hierarchy does not work properly, as it assume that factories are + // the same. + // 2. FactoryManager does not have a Keep option, only Hierarchy and + // Level have it + // 3. Interpreter constructs factory managers, but not levels. So we + // cannot set up Keep flags there. + // + // The solution implemented here does the following: + // 1. Construct hierarchy with dummy levels. This avoids + // Hierarchy::AddNewLevel() calls which will propagate wrong + // inheritance. + // 2. Interpreter constructs keep_ array with names and factories for + // that level + // 3. For each level, we call Keep(name, factory) for each keep_ + for (int i = 0; i < numDesiredLevel_; i++) { + std::map>::const_iterator it = keep_.find(i); + if (it != keep_.end()) { + RCP l = H.GetLevel(i); + const std::vector &keeps = it->second; + for (size_t j = 0; j < keeps.size(); j++) + l->Keep(keeps[j].first, keeps[j].second); + } + if (i < numDesiredLevel_ - 1) { + RCP newLevel = rcp(new Level()); + H.AddLevel(newLevel); } + } - // Matrices to print - for(auto iter=matricesToPrint_.begin(); iter!=matricesToPrint_.end(); iter++) - ExportDataSetKeepFlags(H,iter->second,iter->first); + // Matrices to print + for (auto iter = matricesToPrint_.begin(); iter != matricesToPrint_.end(); + iter++) + ExportDataSetKeepFlags(H, iter->second, iter->first); - // Vectors, aggregates and other things that need special case handling - ExportDataSetKeepFlags(H, nullspaceToPrint_, "Nullspace"); - ExportDataSetKeepFlags(H, coordinatesToPrint_, "Coordinates"); - // NOTE: Aggregates use the next level's Factory - ExportDataSetKeepFlagsNextLevel(H, aggregatesToPrint_, "Aggregates"); + // Vectors, aggregates and other things that need special case handling + ExportDataSetKeepFlags(H, nullspaceToPrint_, "Nullspace"); + ExportDataSetKeepFlags(H, coordinatesToPrint_, "Coordinates"); + // NOTE: Aggregates use the next level's Factory + ExportDataSetKeepFlagsNextLevel(H, aggregatesToPrint_, "Aggregates"); #ifdef HAVE_MUELU_INTREPID2 - ExportDataSetKeepFlags(H,elementToNodeMapsToPrint_, "pcoarsen: element to node map"); + ExportDataSetKeepFlags(H, elementToNodeMapsToPrint_, + "pcoarsen: element to node map"); #endif - // Data to save only (these do not have a level, so we do all levels) - for(int i=0; iprint(H.GetOStream(Developer), verbosity_); - - isLastLevel = r || (levelID == lastLevelID); - levelID++; - } - if (!matvecParams_.is_null()) - H.SetMatvecParams(matvecParams_); - H.AllocateLevelMultiVectors(sizeOfMultiVectors_); - // Set hierarchy description. - // This is cached, but involves and MPI_Allreduce. - H.description(); - H.describe(H.GetOStream(Runtime0), verbosity_); - - // When we reuse hierarchy, it is necessary that we don't - // change the number of levels. We also cannot make requests - // for coarser levels, because we don't construct all the - // data on previous levels. For instance, let's say our first - // run constructed three levels. If we try to do requests during - // next setup for the fourth level, it would need Aggregates - // which we didn't construct for level 3 because we reused P. - // To fix this situation, we change the number of desired levels - // here. - numDesiredLevel_ = levelID; - - // Matrix prints - for(auto iter = matricesToPrint_.begin(); iter != matricesToPrint_.end(); iter++) { - WriteData(H,iter->second,iter->first); - } + int levelID = 0; + int lastLevelID = numDesiredLevel_ - 1; + bool isLastLevel = false; - // Vectors, aggregates and all things we need to print manually - WriteData(H, nullspaceToPrint_, "Nullspace"); - WriteData(H, coordinatesToPrint_, "Coordinates"); - WriteDataAggregates(H, aggregatesToPrint_, "Aggregates"); + while (!isLastLevel) { + bool r = H.Setup(levelID, LvlMngr(levelID - 1, lastLevelID), + LvlMngr(levelID, lastLevelID), + LvlMngr(levelID + 1, lastLevelID)); + if (levelID < H.GetNumLevels()) + H.GetLevel(levelID)->print(H.GetOStream(Developer), verbosity_); + isLastLevel = r || (levelID == lastLevelID); + levelID++; + } + if (!matvecParams_.is_null()) + H.SetMatvecParams(matvecParams_); + H.AllocateLevelMultiVectors(sizeOfMultiVectors_); + // Set hierarchy description. + // This is cached, but involves and MPI_Allreduce. + H.description(); + H.describe(H.GetOStream(Runtime0), verbosity_); + + // When we reuse hierarchy, it is necessary that we don't + // change the number of levels. We also cannot make requests + // for coarser levels, because we don't construct all the + // data on previous levels. For instance, let's say our first + // run constructed three levels. If we try to do requests during + // next setup for the fourth level, it would need Aggregates + // which we didn't construct for level 3 because we reused P. + // To fix this situation, we change the number of desired levels + // here. + numDesiredLevel_ = levelID; + + // Matrix prints + for (auto iter = matricesToPrint_.begin(); iter != matricesToPrint_.end(); + iter++) { + WriteData(H, iter->second, iter->first); + } + // Vectors, aggregates and all things we need to print manually + WriteData(H, nullspaceToPrint_, "Nullspace"); + WriteData(H, coordinatesToPrint_, "Coordinates"); + WriteDataAggregates(H, aggregatesToPrint_, "Aggregates"); #ifdef HAVE_MUELU_INTREPID2 - typedef Kokkos::DynRankView FCi; - WriteDataFC(H,elementToNodeMapsToPrint_, "pcoarsen: element to node map","el2node"); + typedef Kokkos::DynRankView FCi; + WriteDataFC(H, elementToNodeMapsToPrint_, + "pcoarsen: element to node map", "el2node"); #endif + } // SetupHierarchy - } //SetupHierarchy - - //@} + //@} - typedef std::map > FactoryMap; + typedef std::map> FactoryMap; - protected: //TODO: access function +protected: // TODO: access function + //! Setup Matrix object + virtual void SetupOperator(Operator & /* Op */) const {} - //! Setup Matrix object - virtual void SetupOperator(Operator& /* Op */) const { } + //! Setup extra data + // TODO: merge with SetupMatrix ? + virtual void SetupExtra(Hierarchy & /* H */) const {} - //! Setup extra data - // TODO: merge with SetupMatrix ? - virtual void SetupExtra(Hierarchy& /* H */) const { } + // TODO this was private + // Used in SetupHierarchy() to access levelManagers_ + // Inputs i=-1 and i=size() are allowed to simplify calls to + // hierarchy->Setup() + Teuchos::RCP LvlMngr(int levelID, int lastLevelID) const { + // NOTE: the order of 'if' statements is important + if (levelID == -1) // levelID = -1 corresponds to the finest level + return Teuchos::null; - // TODO this was private - // Used in SetupHierarchy() to access levelManagers_ - // Inputs i=-1 and i=size() are allowed to simplify calls to hierarchy->Setup() - Teuchos::RCP LvlMngr(int levelID, int lastLevelID) const { - // NOTE: the order of 'if' statements is important - if (levelID == -1) // levelID = -1 corresponds to the finest level - return Teuchos::null; + if (levelID == lastLevelID + 1) // levelID = 'lastLevelID+1' corresponds to + // the last level (i.e., no nextLevel) + return Teuchos::null; - if (levelID == lastLevelID+1) // levelID = 'lastLevelID+1' corresponds to the last level (i.e., no nextLevel) - return Teuchos::null; - - if (levelManagers_.size() == 0) { // default factory manager. - // The default manager is shared across levels, initialized only if needed and deleted with the HierarchyManager - static RCP defaultMngr = rcp(new FactoryManager()); - return defaultMngr; - } - - return GetFactoryManager(levelID); + if (levelManagers_.size() == 0) { // default factory manager. + // The default manager is shared across levels, initialized only if needed + // and deleted with the HierarchyManager + static RCP defaultMngr = rcp(new FactoryManager()); + return defaultMngr; } - //! @group Hierarchy parameters - //! @{ + return GetFactoryManager(levelID); + } - mutable int numDesiredLevel_; - Xpetra::global_size_t maxCoarseSize_; - MsgType verbosity_; + //! @group Hierarchy parameters + //! @{ - bool doPRrebalance_; - bool doPRViaCopyrebalance_; - bool implicitTranspose_; - bool fuseProlongationAndUpdate_; + mutable int numDesiredLevel_; + Xpetra::global_size_t maxCoarseSize_; + MsgType verbosity_; - /*! @brief Flag to indicate whether the check of the nullspace dimension is suppressed + bool doPRrebalance_; + bool doPRViaCopyrebalance_; + bool implicitTranspose_; + bool fuseProlongationAndUpdate_; - By default, we do not suppress such a check, as it acts as a safety mechanism. - Yet, certain scenarios deliberately use nullspaces with less nullspace vectors than NumPDEs. - Therefore, the user can suppress this check. Then, the error message is converted to a warning. - */ - bool suppressNullspaceDimensionCheck_; + /*! @brief Flag to indicate whether the check of the nullspace dimension is + suppressed - int sizeOfMultiVectors_; + By default, we do not suppress such a check, as it acts as a safety mechanism. + Yet, certain scenarios deliberately use nullspaces with less nullspace vectors + than NumPDEs. Therefore, the user can suppress this check. Then, the error + message is converted to a warning. + */ + bool suppressNullspaceDimensionCheck_; - //! -2 = no output, -1 = all levels - int graphOutputLevel_; + int sizeOfMultiVectors_; - //! Lists of entities to be exported (or saved) - // Items here get handled manually - Teuchos::Array nullspaceToPrint_; - Teuchos::Array coordinatesToPrint_; - Teuchos::Array aggregatesToPrint_; - Teuchos::Array elementToNodeMapsToPrint_; + //! -2 = no output, -1 = all levels + int graphOutputLevel_; - // Data we'll need to save, not necessarily print - Teuchos::Array dataToSave_; + //! Lists of entities to be exported (or saved) + // Items here get handled manually + Teuchos::Array nullspaceToPrint_; + Teuchos::Array coordinatesToPrint_; + Teuchos::Array aggregatesToPrint_; + Teuchos::Array elementToNodeMapsToPrint_; - // Matrices we'll need to print - std::map > matricesToPrint_; + // Data we'll need to save, not necessarily print + Teuchos::Array dataToSave_; - Teuchos::RCP matvecParams_; + // Matrices we'll need to print + std::map> matricesToPrint_; - std::map > keep_; - //! @} + Teuchos::RCP matvecParams_; - private: - // Set the keep flags for Export Data - void ExportDataSetKeepFlags(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - if(!L.is_null() && data[i] < levelManagers_.size()) - L->AddKeepFlag(name, &*levelManagers_[data[i]]->GetFactory(name)); - } - } - } + std::map> keep_; + //! @} - void ExportDataSetKeepFlagsNextLevel(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - if(!L.is_null() && data[i]+1 < levelManagers_.size()) - L->AddKeepFlag(name, &*levelManagers_[data[i]+1]->GetFactory(name)); - } +private: + // Set the keep flags for Export Data + void ExportDataSetKeepFlags(Hierarchy &H, const Teuchos::Array &data, + const std::string &name) const { + for (int i = 0; i < data.size(); ++i) { + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + if (!L.is_null() && data[i] < levelManagers_.size()) + L->AddKeepFlag(name, &*levelManagers_[data[i]]->GetFactory(name)); } } - - // Set the keep flags for Export Data - void ExportDataSetKeepFlagsAll(Hierarchy& H, const std::string& name) const { - for (int i=0; i < H.GetNumLevels(); i++ ) { - RCP L = H.GetLevel(i); - if(!L.is_null() && i < levelManagers_.size()) - L->AddKeepFlag(name, &*levelManagers_[i]->GetFactory(name)); + } + + void ExportDataSetKeepFlagsNextLevel(Hierarchy &H, + const Teuchos::Array &data, + const std::string &name) const { + for (int i = 0; i < data.size(); ++i) { + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + if (!L.is_null() && data[i] + 1 < levelManagers_.size()) + L->AddKeepFlag(name, &*levelManagers_[data[i] + 1]->GetFactory(name)); } } - - - template - void WriteData(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - std::string fileName; - if (H.getObjectLabel() != "") - fileName = H.getObjectLabel() + "_" + name + "_" + Teuchos::toString(data[i]) + ".m"; - else - fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; - - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - if (data[i] < levelManagers_.size() && L->IsAvailable(name,&*levelManagers_[data[i]]->GetFactory(name))) { - // Try generating factory - RCP M = L->template Get< RCP >(name,&*levelManagers_[data[i]]->GetFactory(name)); - if (!M.is_null()) { - Xpetra::IO::Write(fileName,* M); - } + } + + // Set the keep flags for Export Data + void ExportDataSetKeepFlagsAll(Hierarchy &H, const std::string &name) const { + for (int i = 0; i < H.GetNumLevels(); i++) { + RCP L = H.GetLevel(i); + if (!L.is_null() && i < levelManagers_.size()) + L->AddKeepFlag(name, &*levelManagers_[i]->GetFactory(name)); + } + } + + template + void WriteData(Hierarchy &H, const Teuchos::Array &data, + const std::string &name) const { + for (int i = 0; i < data.size(); ++i) { + std::string fileName; + if (H.getObjectLabel() != "") + fileName = H.getObjectLabel() + "_" + name + "_" + + Teuchos::toString(data[i]) + ".m"; + else + fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; + + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + if (data[i] < levelManagers_.size() && + L->IsAvailable(name, &*levelManagers_[data[i]]->GetFactory(name))) { + // Try generating factory + RCP M = L->template Get>( + name, &*levelManagers_[data[i]]->GetFactory(name)); + if (!M.is_null()) { + Xpetra::IO::Write( + fileName, *M); } - else if (L->IsAvailable(name)) { - // Try nofactory - RCP M = L->template Get< RCP >(name); - if (!M.is_null()) { - Xpetra::IO::Write(fileName,* M); - } + } else if (L->IsAvailable(name)) { + // Try nofactory + RCP M = L->template Get>(name); + if (!M.is_null()) { + Xpetra::IO::Write( + fileName, *M); } } } } - - void WriteDataAggregates(Hierarchy& H, const Teuchos::Array& data, const std::string& name) const { - for (int i = 0; i < data.size(); ++i) { - const std::string fileName = name + "_" + Teuchos::toString(data[i]) + ".m"; - - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - - // NOTE: Aggregates use the next level's factory - RCP agg; - if(data[i]+1 < H.GetNumLevels() && L->IsAvailable(name,&*levelManagers_[data[i]+1]->GetFactory(name))) { - // Try generating factory - agg = L->template Get< RCP >(name,&*levelManagers_[data[i]+1]->GetFactory(name)); - } - else if (L->IsAvailable(name)) { - agg = L->template Get >("Aggregates"); - } - if(!agg.is_null()) { - std::ofstream ofs(fileName); - Teuchos::FancyOStream fofs(rcp(&ofs,false)); - agg->print(fofs,Teuchos::VERB_EXTREME); - } + } + + void WriteDataAggregates(Hierarchy &H, const Teuchos::Array &data, + const std::string &name) const { + for (int i = 0; i < data.size(); ++i) { + const std::string fileName = + name + "_" + Teuchos::toString(data[i]) + ".m"; + + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + + // NOTE: Aggregates use the next level's factory + RCP agg; + if (data[i] + 1 < H.GetNumLevels() && + L->IsAvailable(name, + &*levelManagers_[data[i] + 1]->GetFactory(name))) { + // Try generating factory + agg = L->template Get>( + name, &*levelManagers_[data[i] + 1]->GetFactory(name)); + } else if (L->IsAvailable(name)) { + agg = L->template Get>("Aggregates"); + } + if (!agg.is_null()) { + std::ofstream ofs(fileName); + Teuchos::FancyOStream fofs(rcp(&ofs, false)); + agg->print(fofs, Teuchos::VERB_EXTREME); } } } - - template - void WriteDataFC(Hierarchy& H, const Teuchos::Array& data, const std::string& name, const std::string & ofname) const { - for (int i = 0; i < data.size(); ++i) { - const std::string fileName = ofname + "_" + Teuchos::toString(data[i]) + ".m"; - - if (data[i] < H.GetNumLevels()) { - RCP L = H.GetLevel(data[i]); - - if (L->IsAvailable(name)) { - RCP M = L->template Get< RCP >(name); - if (!M.is_null()) { - RCP A = L->template Get >("A"); - RCP AG = A->getCrsGraph(); - WriteFieldContainer(fileName,*M,*AG->getColMap()); - } + } + + template + void WriteDataFC(Hierarchy &H, const Teuchos::Array &data, + const std::string &name, const std::string &ofname) const { + for (int i = 0; i < data.size(); ++i) { + const std::string fileName = + ofname + "_" + Teuchos::toString(data[i]) + ".m"; + + if (data[i] < H.GetNumLevels()) { + RCP L = H.GetLevel(data[i]); + + if (L->IsAvailable(name)) { + RCP M = L->template Get>(name); + if (!M.is_null()) { + RCP A = L->template Get>("A"); + RCP AG = A->getCrsGraph(); + WriteFieldContainer(fileName, *M, *AG->getColMap()); } } } } + } - // For dumping an IntrepidPCoarsening element-to-node map to disk - template - void WriteFieldContainer(const std::string& fileName, T & fcont,const Map &colMap) const { + // For dumping an IntrepidPCoarsening element-to-node map to disk + template + void WriteFieldContainer(const std::string &fileName, T &fcont, + const Map &colMap) const { - size_t num_els = (size_t) fcont.extent(0); - size_t num_vecs =(size_t) fcont.extent(1); + size_t num_els = (size_t)fcont.extent(0); + size_t num_vecs = (size_t)fcont.extent(1); - // Generate rowMap - Teuchos::RCP rowMap = Xpetra::MapFactory::Build(colMap.lib(),Teuchos::OrdinalTraits::invalid(),fcont.extent(0),colMap.getIndexBase(),colMap.getComm()); + // Generate rowMap + Teuchos::RCP rowMap = Xpetra::MapFactory::Build( + colMap.lib(), Teuchos::OrdinalTraits::invalid(), + fcont.extent(0), colMap.getIndexBase(), colMap.getComm()); - // Fill multivector to use *petra dump routines - RCP vec = Xpetra::MultiVectorFactory::Build(rowMap,num_vecs); + // Fill multivector to use *petra dump routines + RCP vec = + Xpetra::MultiVectorFactory::Build(rowMap, num_vecs); - for(size_t j=0; j v = vec->getDataNonConst(j); - for(size_t i=0; i::Write(fileName,*vec); + for (size_t j = 0; j < num_vecs; j++) { + Teuchos::ArrayRCP v = vec->getDataNonConst(j); + for (size_t i = 0; i < num_els; i++) + v[i] = colMap.getGlobalElement(fcont(i, j)); } + Xpetra::IO::Write(fileName, *vec); + } + // Levels + Array> + levelManagers_; // one FactoryManager per level (the last levelManager is + // used for all the remaining levels) - // Levels - Array > levelManagers_; // one FactoryManager per level (the last levelManager is used for all the remaining levels) - - }; // class HierarchyManager +}; // class HierarchyManager } // namespace MueLu #define MUELU_HIERARCHYMANAGER_SHORT #endif // MUELU_HIERARCHYMANAGER_HPP -//TODO: split into _decl/_def -// TODO: default value for first param (FactoryManager()) should not be duplicated (code maintainability) +// TODO: split into _decl/_def +// TODO: default value for first param (FactoryManager()) should not be +// duplicated (code maintainability) diff --git a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp index 6c84218ea930..25242c64e4bc 100644 --- a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp +++ b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp @@ -46,422 +46,674 @@ #include "MueLu_ConfigDefs.hpp" #if defined(HAVE_MUELU_ML) -# include -# if defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) -# include -# include // for default values -# include -# endif +#include +#if defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) +#include // for default values +#include +#include +#endif #endif #include namespace MueLu { +std::string ML2MueLuParameterTranslator::GetSmootherFactory( + const Teuchos::ParameterList ¶mList, + Teuchos::ParameterList &adaptingParamList, const std::string &pname, + const std::string &value) { - std::string ML2MueLuParameterTranslator::GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value) { - - TEUCHOS_TEST_FOR_EXCEPTION(pname != "coarse: type" && pname != "coarse: list" && pname != "smoother: type" && pname.find("smoother: list",0) != 0, + TEUCHOS_TEST_FOR_EXCEPTION( + pname != "coarse: type" && pname != "coarse: list" && + pname != "smoother: type" && pname.find("smoother: list", 0) != 0, Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::Setup(): Only \"coarse: type\", \"smoother: type\" or \"smoother: list\" (\"coarse: list\") are " - "supported as ML parameters for transformation of smoother/solver parameters to MueLu"); - - // string stream containing the smoother/solver xml parameters - std::stringstream mueluss; - - // Check whether we are dealing with coarse level (solver) parameters or level smoother parameters - std::string mode = "smoother:"; - if (pname.find("coarse:", 0) == 0) - mode = "coarse:"; - - // check whether pre and/or post smoothing - std::string PreOrPost = "both"; - if (paramList.isParameter(mode + " pre or post")) - PreOrPost = paramList.get(mode + " pre or post"); - - TEUCHOS_TEST_FOR_EXCEPTION(mode == "coarse:" && PreOrPost != "both", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: pre or post\" is not supported by MueLu. " - "It does not make sense for direct solvers. For iterative solvers you obtain the same effect by increasing, " - "e.g., the number of sweeps for the coarse grid smoother. Please remove it from your parameters."); - - // select smoother type - std::string valuestr = value; // temporary variable - std::transform(valuestr.begin(), valuestr.end(), valuestr.begin(), ::tolower); - if ( valuestr == "jacobi" || valuestr == "gauss-seidel" || valuestr == "symmetric gauss-seidel" ) { - std::string my_name; - if ( PreOrPost == "both" ) my_name = "\"" + pname + "\""; - else my_name = "\"smoother: " + PreOrPost + " type\""; - mueluss << "" << std::endl; - - } else if ( valuestr == "hiptmair" ) { - std::string my_name; - if ( PreOrPost == "both" ) my_name = "\"" + pname + "\""; - else my_name = "\"smoother: " + PreOrPost + " type\""; - mueluss << "" << std::endl; - - } else if ( valuestr == "ifpack" ) { - std::string my_name = "\"" + pname + "\""; - if ( paramList.isParameter("smoother: ifpack type") ) { - if ( paramList.get("smoother: ifpack type") == "ILU" ) { - mueluss << "" << std::endl; - adaptingParamList.remove("smoother: ifpack type",false); - } - if ( paramList.get("smoother: ifpack type") == "ILUT" ) { - mueluss << "" << std::endl; - adaptingParamList.remove("smoother: ifpack type",false); - } + "MueLu::MLParameterListInterpreter::Setup(): Only \"coarse: type\", " + "\"smoother: type\" or \"smoother: list\" (\"coarse: list\") are " + "supported as ML parameters for transformation of smoother/solver " + "parameters to MueLu"); + + // string stream containing the smoother/solver xml parameters + std::stringstream mueluss; + + // Check whether we are dealing with coarse level (solver) parameters or level + // smoother parameters + std::string mode = "smoother:"; + if (pname.find("coarse:", 0) == 0) + mode = "coarse:"; + + // check whether pre and/or post smoothing + std::string PreOrPost = "both"; + if (paramList.isParameter(mode + " pre or post")) + PreOrPost = paramList.get(mode + " pre or post"); + + TEUCHOS_TEST_FOR_EXCEPTION( + mode == "coarse:" && PreOrPost != "both", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: pre " + "or post\" is not supported by MueLu. " + "It does not make sense for direct solvers. For iterative solvers you " + "obtain the same effect by increasing, " + "e.g., the number of sweeps for the coarse grid smoother. Please remove " + "it from your parameters."); + + // select smoother type + std::string valuestr = value; // temporary variable + std::transform(valuestr.begin(), valuestr.end(), valuestr.begin(), ::tolower); + if (valuestr == "jacobi" || valuestr == "gauss-seidel" || + valuestr == "symmetric gauss-seidel") { + std::string my_name; + if (PreOrPost == "both") + my_name = "\"" + pname + "\""; + else + my_name = "\"smoother: " + PreOrPost + " type\""; + mueluss << "" << std::endl; + + } else if (valuestr == "hiptmair") { + std::string my_name; + if (PreOrPost == "both") + my_name = "\"" + pname + "\""; + else + my_name = "\"smoother: " + PreOrPost + " type\""; + mueluss << "" << std::endl; + + } else if (valuestr == "ifpack") { + std::string my_name = "\"" + pname + "\""; + if (paramList.isParameter("smoother: ifpack type")) { + if (paramList.get("smoother: ifpack type") == "ILU") { + mueluss << "" << std::endl; + adaptingParamList.remove("smoother: ifpack type", false); + } + if (paramList.get("smoother: ifpack type") == "ILUT") { + mueluss << "" << std::endl; + adaptingParamList.remove("smoother: ifpack type", false); } + } - } else if (( valuestr == "chebyshev" ) || ( valuestr == "mls" )) { - std::string my_name = "\"" + pname + "\""; - mueluss << "" << std::endl; + } else if ((valuestr == "chebyshev") || (valuestr == "mls")) { + std::string my_name = "\"" + pname + "\""; + mueluss << "" << std::endl; + + } else if (valuestr.length() > strlen("amesos") && + valuestr.substr(0, strlen("amesos")) == + "amesos") { /* catch Amesos-* */ + std::string solverType = + valuestr.substr(strlen("amesos") + 1); /* ("amesos-klu" -> "klu") */ + + bool valid = false; + const int validatorSize = 5; + std::string validator[validatorSize] = {"superlu", "superludist", "klu", + "umfpack", "mumps"}; + for (int i = 0; i < validatorSize; i++) + if (validator[i] == solverType) + valid = true; + TEUCHOS_TEST_FOR_EXCEPTION( + !valid, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: unknown smoother type. '" + << solverType << "' not supported."); + + mueluss << "" << std::endl; + + } else { + // TODO error message + std::cout << "error in " << __FILE__ << ":" << __LINE__ + << " could not find valid smoother/solver" << std::endl; + } - } else if (valuestr.length() > strlen("amesos") && valuestr.substr(0, strlen("amesos")) == "amesos") { /* catch Amesos-* */ - std::string solverType = valuestr.substr(strlen("amesos")+1); /* ("amesos-klu" -> "klu") */ + // set smoother: pre or post parameter + // Note that there is no "coarse: pre or post" in MueLu! + if (paramList.isParameter("smoother: pre or post") && mode == "smoother:") { + // std::cout << "paramList" << paramList << std::endl; + // std::string smootherPreOrPost = paramList.get("smoother: pre + // or post"); std::cout << "Create pre or post parameter with " << + // smootherPreOrPost << std::endl; + mueluss + << "" << std::endl; + adaptingParamList.remove("smoother: pre or post", false); + } - bool valid = false; - const int validatorSize = 5; - std::string validator[validatorSize] = {"superlu", "superludist", "klu", "umfpack", "mumps"}; - for (int i=0; i < validatorSize; i++) - if (validator[i] == solverType) - valid = true; - TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter: unknown smoother type. '" << solverType << "' not supported."); + // create smoother parameter list + if (PreOrPost != "both") { + mueluss << "" + << std::endl; + } else { + mueluss << "" << std::endl; + } - mueluss << "" << std::endl; + // relaxation based smoothers: - } else { - // TODO error message - std::cout << "error in " << __FILE__ << ":" << __LINE__ << " could not find valid smoother/solver" << std::endl; + if (valuestr == "jacobi" || valuestr == "gauss-seidel" || + valuestr == "symmetric gauss-seidel") { + if (valuestr == "jacobi") { + mueluss << "" + << std::endl; + adaptingParamList.remove("relaxation: type", false); + } + if (valuestr == "gauss-seidel") { + mueluss << "" + << std::endl; + adaptingParamList.remove("relaxation: type", false); + } + if (valuestr == "symmetric gauss-seidel") { + mueluss << "" + << std::endl; + adaptingParamList.remove("relaxation: type", false); } - // set smoother: pre or post parameter - // Note that there is no "coarse: pre or post" in MueLu! - if ( paramList.isParameter("smoother: pre or post") && mode == "smoother:") { - //std::cout << "paramList" << paramList << std::endl; - //std::string smootherPreOrPost = paramList.get("smoother: pre or post"); - //std::cout << "Create pre or post parameter with " << smootherPreOrPost << std::endl; - mueluss << "" << std::endl; - adaptingParamList.remove("smoother: pre or post",false); + if (paramList.isParameter("smoother: sweeps")) { + mueluss << "("smoother: sweeps") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: sweeps", false); } + if (paramList.isParameter("smoother: damping factor")) { + mueluss << "("smoother: damping factor") << "\"/>" + << std::endl; + adaptingParamList.remove("smoother: damping factor", false); + } + if (paramList.isParameter("smoother: use l1 Gauss-Seidel")) { + mueluss << "("smoother: use l1 Gauss-Seidel") << "\"/>" + << std::endl; + adaptingParamList.remove("smoother: use l1 Gauss-Seidel", false); + } + } - // create smoother parameter list - if (PreOrPost != "both") { - mueluss << "" << std::endl; + // Chebyshev + if (valuestr == "chebyshev") { + if (paramList.isParameter("smoother: polynomial order")) { + mueluss << "("smoother: polynomial order") << "\"/>" + << std::endl; + adaptingParamList.remove("smoother: polynomial order", false); } else { - mueluss << "" << std::endl; + mueluss + << "" + << std::endl; } - - // relaxation based smoothers: - - if ( valuestr == "jacobi" || valuestr == "gauss-seidel" || valuestr == "symmetric gauss-seidel" ) { - if ( valuestr == "jacobi" ) { mueluss << "" << std::endl; adaptingParamList.remove("relaxation: type",false); } - if ( valuestr == "gauss-seidel" ) { mueluss << "" << std::endl; adaptingParamList.remove("relaxation: type",false); } - if ( valuestr == "symmetric gauss-seidel" ) { mueluss << "" << std::endl; adaptingParamList.remove("relaxation: type",false); } - - if ( paramList.isParameter("smoother: sweeps") ) { mueluss << "("smoother: sweeps") << "\"/>" << std::endl; adaptingParamList.remove("smoother: sweeps",false); } - if ( paramList.isParameter("smoother: damping factor") ) { mueluss << "("smoother: damping factor") << "\"/>" << std::endl; adaptingParamList.remove("smoother: damping factor",false); } - if ( paramList.isParameter("smoother: use l1 Gauss-Seidel") ) { mueluss << "("smoother: use l1 Gauss-Seidel") << "\"/>" << std::endl; adaptingParamList.remove("smoother: use l1 Gauss-Seidel",false); } + if (paramList.isParameter("smoother: Chebyshev alpha")) { + mueluss << "("smoother: Chebyshev alpha") << "\"/>" + << std::endl; + adaptingParamList.remove("smoother: Chebyshev alpha", false); + } else { + mueluss << "" + << std::endl; + adaptingParamList.remove("smoother: Chebyshev alpha", false); } - - // Chebyshev - if ( valuestr == "chebyshev") { - if ( paramList.isParameter("smoother: polynomial order") ) { mueluss << "("smoother: polynomial order") << "\"/>" << std::endl; adaptingParamList.remove("smoother: polynomial order",false); } - else { mueluss << "" << std::endl; } - if ( paramList.isParameter("smoother: Chebyshev alpha") ) { mueluss << "("smoother: Chebyshev alpha") << "\"/>" << std::endl; adaptingParamList.remove("smoother: Chebyshev alpha",false); } - else { mueluss << "" << std::endl; adaptingParamList.remove("smoother: Chebyshev alpha",false); } - if ( paramList.isParameter("eigen-analysis: type") ) { mueluss << "("eigen-analysis: type") << "\"/>" << std::endl; adaptingParamList.remove("eigen-analysis: type",false); } - else { mueluss << "" << std::endl; } + if (paramList.isParameter("eigen-analysis: type")) { + mueluss + << "("eigen-analysis: type") << "\"/>" + << std::endl; + adaptingParamList.remove("eigen-analysis: type", false); + } else { + mueluss << "" + << std::endl; } + } - // MLS - if ( valuestr == "mls") { - if ( paramList.isParameter("smoother: MLS polynomial order") ) { mueluss << "("smoother: MLS polynomial order") << "\"/>" << std::endl; adaptingParamList.remove("smoother: MLS polynomial order",false); } - else if ( paramList.isParameter("smoother: polynomial order") ) { mueluss << "("smoother: polynomial order") << "\"/>" << std::endl; adaptingParamList.remove("smoother: polynomial order",false); } - else { mueluss << "" << std::endl; } - if ( paramList.isParameter("smoother: MLS alpha") ) { mueluss << "("smoother: MLS alpha") << "\"/>" << std::endl; adaptingParamList.remove("smoother: MLS alpha",false); } - else if ( paramList.isParameter("smoother: Chebyshev alpha") ) { mueluss << "("smoother: Chebyshev alpha") << "\"/>" << std::endl; adaptingParamList.remove("smoother: Chebyshev alpha",false); } - else { mueluss << "" << std::endl; } - if ( paramList.isParameter("eigen-analysis: type") ) { mueluss << "("eigen-analysis: type") << "\"/>" << std::endl; adaptingParamList.remove("eigen-analysis: type",false); } - else { mueluss << "" << std::endl; } + // MLS + if (valuestr == "mls") { + if (paramList.isParameter("smoother: MLS polynomial order")) { + mueluss << "("smoother: MLS polynomial order") << "\"/>" + << std::endl; + adaptingParamList.remove("smoother: MLS polynomial order", false); + } else if (paramList.isParameter("smoother: polynomial order")) { + mueluss << "("smoother: polynomial order") << "\"/>" + << std::endl; + adaptingParamList.remove("smoother: polynomial order", false); + } else { + mueluss + << "" + << std::endl; } + if (paramList.isParameter("smoother: MLS alpha")) { + mueluss << "("smoother: MLS alpha") << "\"/>" + << std::endl; + adaptingParamList.remove("smoother: MLS alpha", false); + } else if (paramList.isParameter("smoother: Chebyshev alpha")) { + mueluss << "("smoother: Chebyshev alpha") << "\"/>" + << std::endl; + adaptingParamList.remove("smoother: Chebyshev alpha", false); + } else { + mueluss << "" + << std::endl; + } + if (paramList.isParameter("eigen-analysis: type")) { + mueluss + << "("eigen-analysis: type") << "\"/>" + << std::endl; + adaptingParamList.remove("eigen-analysis: type", false); + } else { + mueluss << "" + << std::endl; + } + } - if ( valuestr == "hiptmair" ) { - std::string subSmootherType = "Chebyshev"; - if (paramList.isParameter("subsmoother: type")) - subSmootherType = paramList.get("subsmoother: type"); - std::string subSmootherIfpackType; - if (subSmootherType == "Chebyshev") - subSmootherIfpackType = "CHEBYSHEV"; - else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { - if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME - subSmootherIfpackType = "RELAXATION"; - } else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListTranslator: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); - - mueluss << "" << std::endl; - mueluss << "" << std::endl; - - mueluss << "" << std::endl; - if (subSmootherType == "Chebyshev") { - if (paramList.isParameter("subsmoother: edge sweeps")) { - mueluss << "("subsmoother: edge sweeps") << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: edge sweeps", false); - } - if (paramList.isParameter("subsmoother: Chebyshev alpha")) { - mueluss << "("subsmoother: Chebyshev alpha") << "\"/>" << std::endl; - } - } else { - if (paramList.isParameter("subsmoother: edge sweeps")) { - mueluss << "" << std::endl; - mueluss << "("subsmoother: edge sweeps") << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: edge sweeps", false); - } - if (paramList.isParameter("subsmoother: SGS damping factor")) { - mueluss << "("subsmoother: SGS damping factor") << "\"/>" << std::endl; - } + if (valuestr == "hiptmair") { + std::string subSmootherType = "Chebyshev"; + if (paramList.isParameter("subsmoother: type")) + subSmootherType = paramList.get("subsmoother: type"); + std::string subSmootherIfpackType; + if (subSmootherType == "Chebyshev") + subSmootherIfpackType = "CHEBYSHEV"; + else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || + subSmootherType == "symmetric Gauss-Seidel") { + if (subSmootherType == "symmetric Gauss-Seidel") + subSmootherType = "Symmetric Gauss-Seidel"; // FIXME + subSmootherIfpackType = "RELAXATION"; + } else + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::MLParameterListTranslator: unknown smoother type. '" + << subSmootherType << "' not supported by MueLu."); + + mueluss << "" << std::endl; + mueluss << "" << std::endl; + + mueluss << "" + << std::endl; + if (subSmootherType == "Chebyshev") { + if (paramList.isParameter("subsmoother: edge sweeps")) { + mueluss << "("subsmoother: edge sweeps") << "\"/>" + << std::endl; + adaptingParamList.remove("subsmoother: edge sweeps", false); } - mueluss << "" << std::endl; - - mueluss << "" << std::endl; - if (subSmootherType == "Chebyshev") { - if (paramList.isParameter("subsmoother: node sweeps")) { - mueluss << "("subsmoother: node sweeps") << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: node sweeps", false); - } - if (paramList.isParameter("subsmoother: Chebyshev alpha")) { - mueluss << "("subsmoother: Chebyshev alpha") << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: Chebyshev alpha", false); - } - } else { - if (paramList.isParameter("subsmoother: node sweeps")) { - mueluss << "" << std::endl; - mueluss << "("subsmoother: node sweeps") << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: node sweeps", false); - } - if (paramList.isParameter("subsmoother: SGS damping factor")) { - mueluss << "("subsmoother: SGS damping factor") << "\"/>" << std::endl; - adaptingParamList.remove("subsmoother: SGS damping factor", false); - } + if (paramList.isParameter("subsmoother: Chebyshev alpha")) { + mueluss << "("subsmoother: Chebyshev alpha") + << "\"/>" << std::endl; + } + } else { + if (paramList.isParameter("subsmoother: edge sweeps")) { + mueluss + << "" << std::endl; + mueluss + << "("subsmoother: edge sweeps") << "\"/>" + << std::endl; + adaptingParamList.remove("subsmoother: edge sweeps", false); + } + if (paramList.isParameter("subsmoother: SGS damping factor")) { + mueluss << "("subsmoother: SGS damping factor") + << "\"/>" << std::endl; } - mueluss << "" << std::endl; - - } - - // parameters for ILU based preconditioners - if ( valuestr == "ifpack") { - - // add Ifpack parameters - if ( paramList.isParameter("smoother: ifpack overlap") ) { mueluss << "("smoother: ifpack overlap") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack overlap",false); } - if ( paramList.isParameter("smoother: ifpack level-of-fill") ) { mueluss << "("smoother: ifpack level-of-fill") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack level-of-fill",false); } - if ( paramList.isParameter("smoother: ifpack absolute threshold") ) { mueluss << "("smoother: ifpack absolute threshold") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack absolute threshold",false); } - if ( paramList.isParameter("smoother: ifpack relative threshold") ) { mueluss << "("smoother: ifpack relative threshold") << "\"/>" << std::endl; adaptingParamList.remove("smoother: ifpack relative threshold",false); } } - mueluss << "" << std::endl; - // max coarse level size parameter (outside of smoother parameter lists) - if ( paramList.isParameter("smoother: max size") ) { - mueluss << "("smoother: max size") << "\"/>" << std::endl; adaptingParamList.remove("smoother: max size",false); + mueluss << "" + << std::endl; + if (subSmootherType == "Chebyshev") { + if (paramList.isParameter("subsmoother: node sweeps")) { + mueluss << "("subsmoother: node sweeps") << "\"/>" + << std::endl; + adaptingParamList.remove("subsmoother: node sweeps", false); + } + if (paramList.isParameter("subsmoother: Chebyshev alpha")) { + mueluss << "("subsmoother: Chebyshev alpha") + << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: Chebyshev alpha", false); + } + } else { + if (paramList.isParameter("subsmoother: node sweeps")) { + mueluss + << "" << std::endl; + mueluss + << "("subsmoother: node sweeps") << "\"/>" + << std::endl; + adaptingParamList.remove("subsmoother: node sweeps", false); + } + if (paramList.isParameter("subsmoother: SGS damping factor")) { + mueluss << "("subsmoother: SGS damping factor") + << "\"/>" << std::endl; + adaptingParamList.remove("subsmoother: SGS damping factor", false); + } } - - return mueluss.str(); + mueluss << "" << std::endl; } - std::string ML2MueLuParameterTranslator::SetParameterList(const Teuchos::ParameterList & paramList_in, const std::string& defaultVals) { - Teuchos::ParameterList paramList = paramList_in; + // parameters for ILU based preconditioners + if (valuestr == "ifpack") { - RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) - -#if defined(HAVE_MUELU_ML) && defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) - - // TODO alternative with standard parameterlist from ML user guide? - - if (defaultVals != "") { - TEUCHOS_TEST_FOR_EXCEPTION(defaultVals!="SA" && defaultVals!="NSSA" && defaultVals!="refmaxwell" && defaultVals!="Maxwell", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter: only \"SA\", \"NSSA\", \"refmaxwell\" and \"Maxwell\" allowed as options for ML default parameters."); - Teuchos::ParameterList ML_defaultlist; - if (defaultVals == "refmaxwell") - ML_Epetra::SetDefaultsRefMaxwell(ML_defaultlist); - else - ML_Epetra::SetDefaults(defaultVals,ML_defaultlist); - - // merge user parameters with default parameters - MueLu::MergeParameterList(paramList_in, ML_defaultlist, true); - paramList = ML_defaultlist; + // add Ifpack parameters + if (paramList.isParameter("smoother: ifpack overlap")) { + mueluss + << "("smoother: ifpack overlap") << "\"/>" + << std::endl; + adaptingParamList.remove("smoother: ifpack overlap", false); } -#else - if (defaultVals != "") { - // If no validator available: issue a warning and set parameter value to false in the output list - *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or ML_ENABLE_TEUCHOS=OFF. No ML default values available." << std::endl; + if (paramList.isParameter("smoother: ifpack level-of-fill")) { + mueluss << "("smoother: ifpack level-of-fill") << "\"/>" + << std::endl; + adaptingParamList.remove("smoother: ifpack level-of-fill", false); } -#endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS + if (paramList.isParameter("smoother: ifpack absolute threshold")) { + mueluss << "("smoother: ifpack absolute threshold") + << "\"/>" << std::endl; + adaptingParamList.remove("smoother: ifpack absolute threshold", false); + } + if (paramList.isParameter("smoother: ifpack relative threshold")) { + mueluss << "("smoother: ifpack relative threshold") + << "\"/>" << std::endl; + adaptingParamList.remove("smoother: ifpack relative threshold", false); + } + } - // - // Move smoothers/aggregation/coarse parameters to sublists - // + mueluss << "" << std::endl; - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); - paramList = paramListWithSubList; // swap - Teuchos::ParameterList adaptingParamList = paramList; // copy of paramList which is used to removed already interpreted parameters + // max coarse level size parameter (outside of smoother parameter lists) + if (paramList.isParameter("smoother: max size")) { + mueluss << "("smoother: max size") << "\"/>" << std::endl; + adaptingParamList.remove("smoother: max size", false); + } - // - // Validate parameter list - // - { - bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ - if (validate && defaultVals!="refmaxwell") { - -#if defined(HAVE_MUELU_ML) && defined(HAVE_ML_EPETRA) && defined(HAVE_ML_TEUCHOS) - // Validate parameter list using ML validator - int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ - TEUCHOS_TEST_FOR_EXCEPTION(! ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, - "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); + return mueluss.str(); +} + +std::string ML2MueLuParameterTranslator::SetParameterList( + const Teuchos::ParameterList ¶mList_in, + const std::string &defaultVals) { + Teuchos::ParameterList paramList = paramList_in; + + RCP out = Teuchos::fancyOStream( + Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) + +#if defined(HAVE_MUELU_ML) && defined(HAVE_ML_EPETRA) && \ + defined(HAVE_ML_TEUCHOS) + + // TODO alternative with standard parameterlist from ML user guide? + + if (defaultVals != "") { + TEUCHOS_TEST_FOR_EXCEPTION( + defaultVals != "SA" && defaultVals != "NSSA" && + defaultVals != "refmaxwell" && defaultVals != "Maxwell", + Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: only \"SA\", \"NSSA\", " + "\"refmaxwell\" and \"Maxwell\" allowed as options for ML default " + "parameters."); + Teuchos::ParameterList ML_defaultlist; + if (defaultVals == "refmaxwell") + ML_Epetra::SetDefaultsRefMaxwell(ML_defaultlist); + else + ML_Epetra::SetDefaults(defaultVals, ML_defaultlist); + + // merge user parameters with default parameters + MueLu::MergeParameterList(paramList_in, ML_defaultlist, true); + paramList = ML_defaultlist; + } #else - // If no validator available: issue a warning and set parameter value to false in the output list - *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or ML_ENABLE_TEUCHOS=OFF. The parameter list cannot be validated." << std::endl; - paramList.set("ML validate parameter list", false); - + if (defaultVals != "") { + // If no validator available: issue a warning and set parameter value to + // false in the output list + *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or " + "ML_ENABLE_TEUCHOS=OFF. No ML default values available." + << std::endl; + } #endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS - } // if(validate) - } // scope - - - { - // Special handling of ML's aux aggregation - // - // In ML, when "aggregation: aux: enable" == true, the threshold - // is set via "aggregation: aux: threshold" instead of - // "aggregation: threshold". In MueLu, we use "aggregation: drop - // tol" regardless of "sa: use filtering". So depending on - // "aggregation: aux: enable" we use either one or the other - // threshold to set "aggregation: drop tol". - if (paramListWithSubList.isParameter("aggregation: aux: enable") && paramListWithSubList.get("aggregation: aux: enable")) { - if (paramListWithSubList.isParameter("aggregation: aux: threshold")) { - paramListWithSubList.set("aggregation: threshold", paramListWithSubList.get("aggregation: aux: threshold")); - paramListWithSubList.remove("aggregation: aux: threshold"); - } - } - } - // stringstream for concatenating xml parameter strings. - std::stringstream mueluss; - - // create surrounding MueLu parameter list - mueluss << "" << std::endl; - - // loop over all ML parameters in provided parameter list - for (ParameterList::ConstIterator param = paramListWithSubList.begin(); param != paramListWithSubList.end(); ++param) { - - // extract ML parameter name - const std::string & pname=paramListWithSubList.name(param); - - // extract corresponding (ML) value - // remove ParameterList specific information from result string - std::stringstream valuess; - valuess << paramList.entry(param); - std::string valuestr = valuess.str(); - replaceAll(valuestr, "[unused]", ""); - replaceAll(valuestr, "[default]", ""); - valuestr = trim(valuestr); - - // transform ML parameter to corresponding MueLu parameter and generate XML string - std::string valueInterpreterStr = "\"" + valuestr + "\""; - std::string ret = MasterList::interpretParameterName(MasterList::ML2MueLu(pname),valueInterpreterStr); - - // special handling for verbosity level - if (pname == "ML output") { - // Translate verbosity parameter - int verbosityLevel = std::stoi(valuestr); - std::string eVerbLevel = "none"; - if (verbosityLevel == 0) eVerbLevel = "none"; - if (verbosityLevel >= 1) eVerbLevel = "low"; - if (verbosityLevel >= 5) eVerbLevel = "medium"; - if (verbosityLevel >= 10) eVerbLevel = "high"; - if (verbosityLevel >= 11) eVerbLevel = "extreme"; - if (verbosityLevel >= 42) eVerbLevel = "test"; - if (verbosityLevel >= 666) eVerbLevel = "interfacetest"; - mueluss << "" << std::endl; - continue; - } + // + // Move smoothers/aggregation/coarse parameters to sublists + // + + // ML allows to have level-specific smoothers/aggregation/coarse parameters at + // the top level of the list or/and defined in sublists: See also: ML Guide + // section 6.4.1, MueLu::CreateSublists, ML_CreateSublists + ParameterList paramListWithSubList; + MueLu::CreateSublists(paramList, paramListWithSubList); + paramList = paramListWithSubList; // swap + Teuchos::ParameterList adaptingParamList = + paramList; // copy of paramList which is used to removed already + // interpreted parameters + + // + // Validate parameter list + // + { + bool validate = paramList.get("ML validate parameter list", + true); /* true = default in ML */ + if (validate && defaultVals != "refmaxwell") { + +#if defined(HAVE_MUELU_ML) && defined(HAVE_ML_EPETRA) && \ + defined(HAVE_ML_TEUCHOS) + // Validate parameter list using ML validator + int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ + TEUCHOS_TEST_FOR_EXCEPTION( + !ML_Epetra::ValidateMLPParameters(paramList, depth), + Exceptions::RuntimeError, + "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); +#else + // If no validator available: issue a warning and set parameter value to + // false in the output list + *out << "Warning: MueLu_ENABLE_ML=OFF, ML_ENABLE_Epetra=OFF or " + "ML_ENABLE_TEUCHOS=OFF. The parameter list cannot be validated." + << std::endl; + paramList.set("ML validate parameter list", false); - // add XML string - if (ret != "") { - mueluss << ret << std::endl; +#endif // HAVE_MUELU_ML && HAVE_ML_EPETRA && HAVE_ML_TEUCHOS + } // if(validate) + } // scope - // remove parameter from ML parameter list - adaptingParamList.remove(pname,false); + { + // Special handling of ML's aux aggregation + // + // In ML, when "aggregation: aux: enable" == true, the threshold + // is set via "aggregation: aux: threshold" instead of + // "aggregation: threshold". In MueLu, we use "aggregation: drop + // tol" regardless of "sa: use filtering". So depending on + // "aggregation: aux: enable" we use either one or the other + // threshold to set "aggregation: drop tol". + if (paramListWithSubList.isParameter("aggregation: aux: enable") && + paramListWithSubList.get("aggregation: aux: enable")) { + if (paramListWithSubList.isParameter("aggregation: aux: threshold")) { + paramListWithSubList.set( + "aggregation: threshold", + paramListWithSubList.get("aggregation: aux: threshold")); + paramListWithSubList.remove("aggregation: aux: threshold"); } + } + } - // make sure that MueLu's phase2a matches ML's - mueluss << "" << std::endl; + // stringstream for concatenating xml parameter strings. + std::stringstream mueluss; + + // create surrounding MueLu parameter list + mueluss << "" << std::endl; + + // loop over all ML parameters in provided parameter list + for (ParameterList::ConstIterator param = paramListWithSubList.begin(); + param != paramListWithSubList.end(); ++param) { + + // extract ML parameter name + const std::string &pname = paramListWithSubList.name(param); + + // extract corresponding (ML) value + // remove ParameterList specific information from result string + std::stringstream valuess; + valuess << paramList.entry(param); + std::string valuestr = valuess.str(); + replaceAll(valuestr, "[unused]", ""); + replaceAll(valuestr, "[default]", ""); + valuestr = trim(valuestr); + + // transform ML parameter to corresponding MueLu parameter and generate XML + // string + std::string valueInterpreterStr = "\"" + valuestr + "\""; + std::string ret = MasterList::interpretParameterName( + MasterList::ML2MueLu(pname), valueInterpreterStr); + + // special handling for verbosity level + if (pname == "ML output") { + // Translate verbosity parameter + int verbosityLevel = std::stoi(valuestr); + std::string eVerbLevel = "none"; + if (verbosityLevel == 0) + eVerbLevel = "none"; + if (verbosityLevel >= 1) + eVerbLevel = "low"; + if (verbosityLevel >= 5) + eVerbLevel = "medium"; + if (verbosityLevel >= 10) + eVerbLevel = "high"; + if (verbosityLevel >= 11) + eVerbLevel = "extreme"; + if (verbosityLevel >= 42) + eVerbLevel = "test"; + if (verbosityLevel >= 666) + eVerbLevel = "interfacetest"; + mueluss << "" << std::endl; + continue; + } - // make sure that MueLu's drop tol matches ML's - mueluss << "" << std::endl; + // add XML string + if (ret != "") { + mueluss << ret << std::endl; - // special handling for energy minimization - // TAW: this is not optimal for symmetric problems but at least works. - // for symmetric problems the "energy minimization" parameter should not exist anyway... - if (pname == "energy minimization: enable") { - mueluss << "" << std::endl; - mueluss << "" << std::endl; - } + // remove parameter from ML parameter list + adaptingParamList.remove(pname, false); + } - // special handling for smoothers - if (pname == "smoother: type") { + // make sure that MueLu's phase2a matches ML's + mueluss << "" + << std::endl; + + // make sure that MueLu's drop tol matches ML's + mueluss << "" + << std::endl; + + // special handling for energy minimization + // TAW: this is not optimal for symmetric problems but at least works. + // for symmetric problems the "energy minimization" parameter should + // not exist anyway... + if (pname == "energy minimization: enable") { + mueluss << "" + << std::endl; + mueluss << "" + << std::endl; + } - mueluss << GetSmootherFactory(paramList, adaptingParamList, pname, valuestr); + // special handling for smoothers + if (pname == "smoother: type") { - } + mueluss << GetSmootherFactory(paramList, adaptingParamList, pname, + valuestr); + } - // special handling for level-specific smoothers - if (pname.find("smoother: list (level",0) == 0) { - // Scan pname (ex: pname="smoother: type (level 2)") - std::string type, option; - int levelID=-1; - { - typedef Teuchos::ArrayRCP::size_type size_type; - Teuchos::Array ctype (size_type(pname.size()+1)); - Teuchos::Array coption(size_type(pname.size()+1)); - - int matched = sscanf(pname.c_str(),"%s %[^(](level %d)", ctype.getRawPtr(), coption.getRawPtr(), &levelID); // use [^(] instead of %s to allow for strings with white-spaces (ex: "ifpack list") - type = std::string(ctype.getRawPtr()); - option = std::string(coption.getRawPtr()); option.resize(option.size () - 1); // remove final white-space - - if (matched != 3 || (type != "smoother:")) { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::CreateSublist(), Line " << __LINE__ << ". " - << "Error in creating level-specific sublists" << std::endl - << "Offending parameter: " << pname << std::endl); - } - - mueluss << "" << std::endl; - mueluss << GetSmootherFactory(paramList.sublist(pname),adaptingParamList.sublist(pname), "smoother: type", paramList.sublist(pname).get("smoother: type")); - mueluss << "" << std::endl; + // special handling for level-specific smoothers + if (pname.find("smoother: list (level", 0) == 0) { + // Scan pname (ex: pname="smoother: type (level 2)") + std::string type, option; + int levelID = -1; + { + typedef Teuchos::ArrayRCP::size_type size_type; + Teuchos::Array ctype(size_type(pname.size() + 1)); + Teuchos::Array coption(size_type(pname.size() + 1)); + + int matched = + sscanf(pname.c_str(), "%s %[^(](level %d)", ctype.getRawPtr(), + coption.getRawPtr(), + &levelID); // use [^(] instead of %s to allow for strings + // with white-spaces (ex: "ifpack list") + type = std::string(ctype.getRawPtr()); + option = std::string(coption.getRawPtr()); + option.resize(option.size() - 1); // remove final white-space + + if (matched != 3 || (type != "smoother:")) { + TEUCHOS_TEST_FOR_EXCEPTION( + true, MueLu::Exceptions::RuntimeError, + "MueLu::CreateSublist(), Line " + << __LINE__ << ". " + << "Error in creating level-specific sublists" << std::endl + << "Offending parameter: " << pname << std::endl); } - } - - // special handling for coarse level - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter("coarse: type"), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: type\" should not exist but being stored in \"coarse: list\" instead."); - if ( pname == "coarse: list" ) { - - // interpret smoother/coarse solver data. - // Note, that we inspect the "coarse: list" sublist to define the "coarse" smoother/solver - // Be aware, that MueLu::CreateSublists renames the prefix of the parameters in the "coarse: list" from "coarse" to "smoother". - // Therefore, we have to check the values of the "smoother" parameters - TEUCHOS_TEST_FOR_EXCEPTION(!paramList.sublist("coarse: list").isParameter("smoother: type"), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): no coarse grid solver defined."); - mueluss << GetSmootherFactory(paramList.sublist("coarse: list"), adaptingParamList.sublist("coarse: list"), "coarse: type", paramList.sublist("coarse: list").get("smoother: type")); - + mueluss << "" + << std::endl; + mueluss << GetSmootherFactory( + paramList.sublist(pname), adaptingParamList.sublist(pname), + "smoother: type", + paramList.sublist(pname).get("smoother: type")); + mueluss << "" << std::endl; } - } // for + } - mueluss << "" << std::endl; + // special handling for coarse level + TEUCHOS_TEST_FOR_EXCEPTION( + paramList.isParameter("coarse: type"), Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::Setup(): The parameter \"coarse: " + "type\" should not exist but being stored in \"coarse: list\" " + "instead."); + if (pname == "coarse: list") { + + // interpret smoother/coarse solver data. + // Note, that we inspect the "coarse: list" sublist to define the "coarse" + // smoother/solver Be aware, that MueLu::CreateSublists renames the prefix + // of the parameters in the "coarse: list" from "coarse" to "smoother". + // Therefore, we have to check the values of the "smoother" parameters + TEUCHOS_TEST_FOR_EXCEPTION( + !paramList.sublist("coarse: list").isParameter("smoother: type"), + Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::Setup(): no coarse grid solver " + "defined."); + mueluss << GetSmootherFactory( + paramList.sublist("coarse: list"), + adaptingParamList.sublist("coarse: list"), "coarse: type", + paramList.sublist("coarse: list").get("smoother: type")); + } + } // for - return mueluss.str(); - } + mueluss << "" << std::endl; + return mueluss.str(); +} } // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp index 34697f6fd4e9..34a6b5d0c964 100644 --- a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp +++ b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp @@ -47,8 +47,8 @@ #ifndef MUELU_ML2MUELUPARAMETERTRANSLATOR_HPP #define MUELU_ML2MUELUPARAMETERTRANSLATOR_HPP -#include #include +#include #include #include @@ -59,97 +59,112 @@ namespace MueLu { - /*! - @class ML2MueLuParameterTranslator class. - @brief Class that accepts ML-style parameters and builds a MueLu parameter list (easy input deck) - - This interpreter class is meant to make the transition from ML to MueLu easier. - */ - class ML2MueLuParameterTranslator { - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - ML2MueLuParameterTranslator() { } - - //! Destructor. - virtual ~ML2MueLuParameterTranslator() { } - - //@} - - //!@name Parameter translation from ML to MueLu - //@{ - - /// @brief: Translate ML parameters to MueLu parameter XML string - /// - /// @param [in] paramList_in: ML parameter list - /// @return std::string with MueLu XML parameters - static std::string translate(Teuchos::ParameterList & paramList, const std::string& defaultVals="") { - return SetParameterList(paramList, defaultVals); - } - - /// @brief: Translate ML parameters to MueLu parameter XML string - /// - /// @param [in] xmlFileName: file name with ML xml parameters - /// @return std::string with MueLu XML parameters - static std::string translate(const std::string & xmlFileName, const std::string& defaultVals="") { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - return SetParameterList(*paramList, defaultVals); - } - - //@} - - private: - - //! @name Parameter handling - //@{ - - /// @brief: Interpret parameter list - /// - /// @param [in] paramList_in: ML parameter list - /// @return std::string with MueLu XML parameters - static std::string SetParameterList(const Teuchos::ParameterList & paramList_in, const std::string& defaultVals); - - - /// @brief: Helper function which translates ML smoother/solver paramters to MueLu XML string - /// - /// @param [in] paramList: reference to Teuchos::ParameterList containing the ML smoother/solver parameters. - /// @param [in,out] adaptingParamList: reference to Teuchos::ParameterList containing the ML smoother/solver parameters. Note that the processed parameters are removed from the ParameterList. It can be used to detect non-interpreted ML parameters. - /// @param [in] pname: currently processed parameter TODO - /// @param [in] value: currently processed value TODO - static std::string GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value); - - //@} - - // - // helper routines - // - - // trim from start - static inline std::string <rim(std::string &s) { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c){return !std::isspace(c);})); - return s; - } - - // trim from end - static inline std::string &rtrim(std::string &s) { - s.erase(std::find_if(s.rbegin(), s.rend(), [](int c){return !std::isspace(c);}).base(), s.end()); - return s; - } - - // trim from both ends - static inline std::string &trim(std::string &s) { - return ltrim(rtrim(s)); - } - - //! @name Member variables - //@{ - //std::string xmlString_; ///! string containing MueLu XML parameters corresponding to ML parameters - //@} - - }; // class MLParameterListInterpreter - +/*! + @class ML2MueLuParameterTranslator class. + @brief Class that accepts ML-style parameters and builds a MueLu parameter + list (easy input deck) + + This interpreter class is meant to make the transition from ML to MueLu + easier. +*/ +class ML2MueLuParameterTranslator { +public: + //! @name Constructors/Destructors. + //@{ + + //! Constructor. + ML2MueLuParameterTranslator() {} + + //! Destructor. + virtual ~ML2MueLuParameterTranslator() {} + + //@} + + //!@name Parameter translation from ML to MueLu + //@{ + + /// @brief: Translate ML parameters to MueLu parameter XML string + /// + /// @param [in] paramList_in: ML parameter list + /// @return std::string with MueLu XML parameters + static std::string translate(Teuchos::ParameterList ¶mList, + const std::string &defaultVals = "") { + return SetParameterList(paramList, defaultVals); + } + + /// @brief: Translate ML parameters to MueLu parameter XML string + /// + /// @param [in] xmlFileName: file name with ML xml parameters + /// @return std::string with MueLu XML parameters + static std::string translate(const std::string &xmlFileName, + const std::string &defaultVals = "") { + Teuchos::RCP paramList = + Teuchos::getParametersFromXmlFile(xmlFileName); + return SetParameterList(*paramList, defaultVals); + } + + //@} + +private: + //! @name Parameter handling + //@{ + + /// @brief: Interpret parameter list + /// + /// @param [in] paramList_in: ML parameter list + /// @return std::string with MueLu XML parameters + static std::string + SetParameterList(const Teuchos::ParameterList ¶mList_in, + const std::string &defaultVals); + + /// @brief: Helper function which translates ML smoother/solver paramters to + /// MueLu XML string + /// + /// @param [in] paramList: reference to Teuchos::ParameterList containing the + /// ML smoother/solver parameters. + /// @param [in,out] adaptingParamList: reference to Teuchos::ParameterList + /// containing the ML smoother/solver parameters. Note that the processed + /// parameters are removed from the ParameterList. It can be used to detect + /// non-interpreted ML parameters. + /// @param [in] pname: currently processed parameter TODO + /// @param [in] value: currently processed value TODO + static std::string + GetSmootherFactory(const Teuchos::ParameterList ¶mList, + Teuchos::ParameterList &adaptingParamList, + const std::string &pname, const std::string &value); + + //@} + + // + // helper routines + // + + // trim from start + static inline std::string <rim(std::string &s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), + [](int c) { return !std::isspace(c); })); + return s; + } + + // trim from end + static inline std::string &rtrim(std::string &s) { + s.erase(std::find_if(s.rbegin(), s.rend(), + [](int c) { return !std::isspace(c); }) + .base(), + s.end()); + return s; + } + + // trim from both ends + static inline std::string &trim(std::string &s) { return ltrim(rtrim(s)); } + + //! @name Member variables + //@{ + // std::string xmlString_; ///! string containing MueLu XML parameters + // corresponding to ML parameters + //@} + +}; // class MLParameterListInterpreter } // end namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp index b03bb9e30e47..9e598b4d44d5 100644 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp @@ -60,171 +60,187 @@ #include "MueLu_Hierarchy_fwd.hpp" #include "MueLu_SmootherFactory_fwd.hpp" -#include "MueLu_TentativePFactory_fwd.hpp" -#include "MueLu_SaPFactory_fwd.hpp" -#include "MueLu_PgPFactory_fwd.hpp" #include "MueLu_AmalgamationFactory_fwd.hpp" -#include "MueLu_TransPFactory_fwd.hpp" +#include "MueLu_CoalesceDropFactory_fwd.hpp" +#include "MueLu_DirectSolver_fwd.hpp" +#include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_GenericRFactory_fwd.hpp" -#include "MueLu_SmootherPrototype_fwd.hpp" -#include "MueLu_TrilinosSmoother_fwd.hpp" #include "MueLu_IfpackSmoother_fwd.hpp" -#include "MueLu_DirectSolver_fwd.hpp" +#include "MueLu_NullspaceFactory_fwd.hpp" +#include "MueLu_PgPFactory_fwd.hpp" #include "MueLu_RAPFactory_fwd.hpp" -#include "MueLu_CoalesceDropFactory_fwd.hpp" +#include "MueLu_SaPFactory_fwd.hpp" +#include "MueLu_SmootherPrototype_fwd.hpp" +#include "MueLu_TentativePFactory_fwd.hpp" +#include "MueLu_TransPFactory_fwd.hpp" +#include "MueLu_TrilinosSmoother_fwd.hpp" #include "MueLu_UncoupledAggregationFactory_fwd.hpp" -#include "MueLu_NullspaceFactory_fwd.hpp" -#include "MueLu_FactoryBase_fwd.hpp" #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) -#include "MueLu_RepartitionHeuristicFactory_fwd.hpp" -#include "MueLu_RepartitionFactory_fwd.hpp" -#include "MueLu_RebalanceTransferFactory_fwd.hpp" #include "MueLu_IsorropiaInterface_fwd.hpp" #include "MueLu_RebalanceAcFactory_fwd.hpp" #include "MueLu_RebalanceMapFactory_fwd.hpp" +#include "MueLu_RebalanceTransferFactory_fwd.hpp" +#include "MueLu_RepartitionFactory_fwd.hpp" +#include "MueLu_RepartitionHeuristicFactory_fwd.hpp" #endif namespace MueLu { - /* - Utility that from an existing Teuchos::ParameterList creates a new list, in - which level-specific parameters are replaced with sublists. - - Currently, level-specific parameters that begin with "smoother:" - or "aggregation:" are placed in sublists. Coarse options are also placed - in a coarse list. - - Example: - Input: - smoother: type (level 0) = symmetric Gauss-Seidel - smoother: sweeps (level 0) = 1 - Output: - smoother: list (level 0) -> - smoother: type = symmetric Gauss-Seidel - smoother: sweeps = 1 - */ - // This function is a copy of ML_CreateSublists to avoid dependency on ML - // Throw exception on error instead of exit() - void CreateSublists(const ParameterList &List, ParameterList &newList); - - - /*! - @class MLParameterListInterpreter class. - @brief Class that accepts ML-style parameters and builds a MueLu preconditioner. - This interpreter uses the same default values as ML. This allows to compare ML/MueLu results - - The parameter list is validated only if the package ML is available and parameter "ML validate parameter list" is true. - TODO: A warning is issued if ML is not available - */ - - template - class MLParameterListInterpreter : public HierarchyManager { +/* + Utility that from an existing Teuchos::ParameterList creates a new list, in + which level-specific parameters are replaced with sublists. + + Currently, level-specific parameters that begin with "smoother:" + or "aggregation:" are placed in sublists. Coarse options are also placed + in a coarse list. + + Example: + Input: + smoother: type (level 0) = symmetric Gauss-Seidel + smoother: sweeps (level 0) = 1 + Output: + smoother: list (level 0) -> + smoother: type = symmetric Gauss-Seidel + smoother: sweeps = 1 +*/ +// This function is a copy of ML_CreateSublists to avoid dependency on ML +// Throw exception on error instead of exit() +void CreateSublists(const ParameterList &List, ParameterList &newList); + +/*! + @class MLParameterListInterpreter class. + @brief Class that accepts ML-style parameters and builds a MueLu + preconditioner. This interpreter uses the same default values as ML. This + allows to compare ML/MueLu results + + The parameter list is validated only if the package ML is available and + parameter "ML validate parameter list" is true. + TODO: A warning is issued if ML is not available +*/ + +template +class MLParameterListInterpreter + : public HierarchyManager { #undef MUELU_MLPARAMETERLISTINTERPRETER_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - MLParameterListInterpreter() : nullspace_(NULL), blksize_(1) { } - - //! Constructor. - //! @param paramList: parameter list with ML parameters - //! @param[in] comm (RCP >): Optional RCP of a Teuchos communicator (default: Teuchos::null) - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - MLParameterListInterpreter(Teuchos::ParameterList & paramList, Teuchos::RCP > comm = Teuchos::null, std::vector > factoryList = std::vector >(0)); - - //! Constructor. - //! @param xmlFileName: file name for XML file with ML parameters - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - MLParameterListInterpreter(const std::string & xmlFileName,std::vector > factoryList = std::vector >(0)); - - //! Destructor. - virtual ~MLParameterListInterpreter() { } - - //@} - - //@{ - - void SetParameterList(const Teuchos::ParameterList & paramList); - - //@} - - //@{ - - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy & H) const; - - //@} - - //@{ - - //! @name static helper functions translating parameter list to factories - //! @brief static helper functions that also can be used from outside for translating ML parameters into MueLu objects - //@{ - - //! Read smoother options and build the corresponding smoother factory - // @param AFact: Factory used by smoother to find 'A' - static RCP GetSmootherFactory(const Teuchos::ParameterList & paramList, const RCP & AFact = Teuchos::null); - - //@} - - - //! @name Handling of additional user-specific transfer factories - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. - - This allows the user to add user-specific factories to the MueLu Hierarchy. The idea is to be able - to add some factories that write out some debug information etc. which are not handled by the ML - Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - */ - void AddTransferFactory(const RCP & factory); - - //! Returns number of transfer factories. - size_t NumTransferFactories() const; - //@} +public: + //! @name Constructors/Destructors. + //@{ + + //! Constructor. + MLParameterListInterpreter() : nullspace_(NULL), blksize_(1) {} + + //! Constructor. + //! @param paramList: parameter list with ML parameters + //! @param[in] comm (RCP >): Optional RCP of a Teuchos + //! communicator (default: Teuchos::null) + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories + //! to the MueLu Hierarchy. The idea is to be able to add some factories that + //! write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the + //! RAPFactory::AddTransferFactory method, too! + MLParameterListInterpreter( + Teuchos::ParameterList ¶mList, + Teuchos::RCP> comm = Teuchos::null, + std::vector> factoryList = + std::vector>(0)); + + //! Constructor. + //! @param xmlFileName: file name for XML file with ML parameters + //! @param factoryList: vector with RCP of FactoryBase objects + //! + //! The factories in factoryList allow the user to add user-specific factories + //! to the MueLu Hierarchy. The idea is to be able to add some factories that + //! write out some debug information etc. which are not handled by the ML + //! Parameter List itself. See information about the + //! RAPFactory::AddTransferFactory method, too! + MLParameterListInterpreter(const std::string &xmlFileName, + std::vector> factoryList = + std::vector>(0)); + + //! Destructor. + virtual ~MLParameterListInterpreter() {} + + //@} + + //@{ + + void SetParameterList(const Teuchos::ParameterList ¶mList); + + //@} + + //@{ + + //! Setup Hierarchy object + virtual void SetupHierarchy(Hierarchy &H) const; + + //@} + + //@{ + + //! @name static helper functions translating parameter list to factories + //! @brief static helper functions that also can be used from outside for + //! translating ML parameters into MueLu objects + //@{ + + //! Read smoother options and build the corresponding smoother factory + // @param AFact: Factory used by smoother to find 'A' + static RCP + GetSmootherFactory(const Teuchos::ParameterList ¶mList, + const RCP &AFact = Teuchos::null); + + //@} + + //! @name Handling of additional user-specific transfer factories + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories for + RAPFactory. + + This allows the user to add user-specific factories to the MueLu Hierarchy. + The idea is to be able to add some factories that write out some debug + information etc. which are not handled by the ML Parameter List itself. See + information about the RAPFactory::AddTransferFactory method, too! + */ + void AddTransferFactory(const RCP &factory); - private: + //! Returns number of transfer factories. + size_t NumTransferFactories() const; + //@} - //! nullspace can be embedded in the ML parameter list - int nullspaceDim_; - double* nullspace_; //TODO: replace by Teuchos::ArrayRCP<> +private: + //! nullspace can be embedded in the ML parameter list + int nullspaceDim_; + double *nullspace_; // TODO: replace by Teuchos::ArrayRCP<> - //! coordinates can be embedded in the ML parameter list - double* xcoord_; - double* ycoord_; - double* zcoord_; + //! coordinates can be embedded in the ML parameter list + double *xcoord_; + double *ycoord_; + double *zcoord_; - //! list of user-defined transfer Factories - //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) - //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the - //! capabibilities of ML. - std::vector > TransferFacts_; + //! list of user-defined transfer Factories + //! We use this vector to add some special user-given factories to the + //! Hierarchy (RAPFactory) This way the user can extend the standard + //! functionality of the MLParameterListInterpreter beyond the capabibilities + //! of ML. + std::vector> TransferFacts_; - //@{ Matrix configuration + //@{ Matrix configuration - //! Setup Operator object - virtual void SetupOperator(Operator & Op) const; + //! Setup Operator object + virtual void SetupOperator(Operator &Op) const; - //! Matrix configuration storage - int blksize_; + //! Matrix configuration storage + int blksize_; - //@} + //@} - }; // class MLParameterListInterpreter +}; // class MLParameterListInterpreter } // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp index 63f6081c5719..8fa692c9c692 100644 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp +++ b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp @@ -61,27 +61,27 @@ #include "MueLu_MLParameterListInterpreter_decl.hpp" -#include "MueLu_Level.hpp" -#include "MueLu_Hierarchy.hpp" #include "MueLu_FactoryManager.hpp" +#include "MueLu_Hierarchy.hpp" +#include "MueLu_Level.hpp" -#include "MueLu_TentativePFactory.hpp" -#include "MueLu_SaPFactory.hpp" -#include "MueLu_PgPFactory.hpp" #include "MueLu_AmalgamationFactory.hpp" -#include "MueLu_TransPFactory.hpp" -#include "MueLu_GenericRFactory.hpp" -#include "MueLu_SmootherPrototype.hpp" -#include "MueLu_SmootherFactory.hpp" -#include "MueLu_TrilinosSmoother.hpp" -#include "MueLu_IfpackSmoother.hpp" +#include "MueLu_CoalesceDropFactory.hpp" #include "MueLu_DirectSolver.hpp" +#include "MueLu_GenericRFactory.hpp" #include "MueLu_HierarchyUtils.hpp" -#include "MueLu_RAPFactory.hpp" -#include "MueLu_CoalesceDropFactory.hpp" -#include "MueLu_UncoupledAggregationFactory.hpp" +#include "MueLu_IfpackSmoother.hpp" #include "MueLu_NullspaceFactory.hpp" #include "MueLu_ParameterListUtils.hpp" +#include "MueLu_PgPFactory.hpp" +#include "MueLu_RAPFactory.hpp" +#include "MueLu_SaPFactory.hpp" +#include "MueLu_SmootherFactory.hpp" +#include "MueLu_SmootherPrototype.hpp" +#include "MueLu_TentativePFactory.hpp" +#include "MueLu_TransPFactory.hpp" +#include "MueLu_TrilinosSmoother.hpp" +#include "MueLu_UncoupledAggregationFactory.hpp" #include "MueLu_CoalesceDropFactory_kokkos.hpp" // #include "MueLu_CoordinatesTransferFactory_kokkos.hpp" @@ -92,709 +92,885 @@ #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) #include "MueLu_IsorropiaInterface.hpp" -#include "MueLu_RepartitionHeuristicFactory.hpp" -#include "MueLu_RepartitionFactory.hpp" +#include "MueLu_RebalanceAcFactory.hpp" #include "MueLu_RebalanceTransferFactory.hpp" +#include "MueLu_RepartitionFactory.hpp" +#include "MueLu_RepartitionHeuristicFactory.hpp" #include "MueLu_RepartitionInterface.hpp" -#include "MueLu_RebalanceAcFactory.hpp" //#include "MueLu_RebalanceMapFactory.hpp" #endif // Note: do not add options that are only recognized by MueLu. -// TODO: this parameter list interpreter should force MueLu to use default ML parameters +// TODO: this parameter list interpreter should force MueLu to use default ML +// parameters // - Ex: smoother sweep=2 by default for ML -// Read a parameter value from a parameter list and store it into a variable named 'varName' -#define MUELU_READ_PARAM(paramList, paramStr, varType, defaultValue, varName) \ - varType varName = defaultValue; if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); - -// Read a parameter value from a paraeter list and copy it into a new parameter list (with another parameter name) -#define MUELU_COPY_PARAM(paramList, paramStr, varType, defaultValue, outParamList, outParamStr) \ - if (paramList.isParameter(paramStr)) \ - outParamList.set(outParamStr, paramList.get(paramStr)); \ - else outParamList.set(outParamStr, static_cast(defaultValue)); \ +// Read a parameter value from a parameter list and store it into a variable +// named 'varName' +#define MUELU_READ_PARAM(paramList, paramStr, varType, defaultValue, varName) \ + varType varName = defaultValue; \ + if (paramList.isParameter(paramStr)) \ + varName = paramList.get(paramStr); + +// Read a parameter value from a paraeter list and copy it into a new parameter +// list (with another parameter name) +#define MUELU_COPY_PARAM(paramList, paramStr, varType, defaultValue, \ + outParamList, outParamStr) \ + if (paramList.isParameter(paramStr)) \ + outParamList.set(outParamStr, paramList.get(paramStr)); \ + else \ + outParamList.set(outParamStr, static_cast(defaultValue)); namespace MueLu { - template - MLParameterListInterpreter::MLParameterListInterpreter(Teuchos::ParameterList & paramList, Teuchos::RCP > comm, std::vector > factoryList) : nullspace_(NULL), xcoord_(NULL), ycoord_(NULL), zcoord_(NULL),TransferFacts_(factoryList), blksize_(1) { - - if (paramList.isParameter("xml parameter file")){ - std::string filename = paramList.get("xml parameter file",""); - if (filename.length() != 0) { - TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); - Teuchos::ParameterList paramList2 = paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2),*comm); - paramList2.remove("xml parameter file"); - SetParameterList(paramList2); - } - else - SetParameterList(paramList); - } - else +template +MLParameterListInterpreter:: + MLParameterListInterpreter(Teuchos::ParameterList ¶mList, + Teuchos::RCP> comm, + std::vector> factoryList) + : nullspace_(NULL), xcoord_(NULL), ycoord_(NULL), zcoord_(NULL), + TransferFacts_(factoryList), blksize_(1) { + + if (paramList.isParameter("xml parameter file")) { + std::string filename = paramList.get("xml parameter file", ""); + if (filename.length() != 0) { + TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, + "xml parameter file requires a valid comm"); + Teuchos::ParameterList paramList2 = paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast( + filename, Teuchos::Ptr(¶mList2), *comm); + paramList2.remove("xml parameter file"); + SetParameterList(paramList2); + } else SetParameterList(paramList); - } - - template - MLParameterListInterpreter::MLParameterListInterpreter(const std::string & xmlFileName, std::vector > factoryList) : nullspace_(NULL), TransferFacts_(factoryList), blksize_(1) { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - SetParameterList(*paramList); - } - - template - void MLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList & paramList_in) { - Teuchos::ParameterList paramList = paramList_in; - - // - // Read top-level of the parameter list - // - - // hard-coded default values == ML defaults according to the manual - MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); - MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); - MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); - - MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); - - MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); - //MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); - MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4/(double)3, agg_damping); - //MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); - MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); - MUELU_READ_PARAM(paramList, "aggregation: keep Dirichlet bcs", bool, false, bKeepDirichletBcs); // This is a MueLu specific extension that does not exist in ML - MUELU_READ_PARAM(paramList, "aggregation: max neighbours already aggregated", int, 0, maxNbrAlreadySelected); // This is a MueLu specific extension that does not exist in M - MUELU_READ_PARAM(paramList, "aggregation: aux: enable", bool, false, agg_use_aux); - MUELU_READ_PARAM(paramList, "aggregation: aux: threshold", double, false, agg_aux_thresh); - - MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); - MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation - MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation - - MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); - - MUELU_READ_PARAM(paramList, "RAP: fix diagonal", bool, false, bFixDiagonal); // This is a MueLu specific extension that does not exist in ML - - MUELU_READ_PARAM(paramList, "x-coordinates", double*, NULL, xcoord); - MUELU_READ_PARAM(paramList, "y-coordinates", double*, NULL, ycoord); - MUELU_READ_PARAM(paramList, "z-coordinates", double*, NULL, zcoord); - - - // - // Move smoothers/aggregation/coarse parameters to sublists - // - - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); - paramList = paramListWithSubList; // swap - - // pull out "use kokkos refactor" - bool setKokkosRefactor = false; - bool useKokkosRefactor; -# ifdef HAVE_MUELU_SERIAL - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSerialWrapperNode).name()) - useKokkosRefactor = false; -# endif -# ifdef HAVE_MUELU_OPENMP - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosOpenMPWrapperNode).name()) - useKokkosRefactor = true; -# endif -# ifdef HAVE_MUELU_CUDA - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) - useKokkosRefactor = true; -# endif -# ifdef HAVE_MUELU_HIP - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) - useKokkosRefactor = true; -# endif -# ifdef HAVE_MUELU_SYCL - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) - useKokkosRefactor = true; + } else + SetParameterList(paramList); +} + +template +MLParameterListInterpreter:: + MLParameterListInterpreter(const std::string &xmlFileName, + std::vector> factoryList) + : nullspace_(NULL), TransferFacts_(factoryList), blksize_(1) { + Teuchos::RCP paramList = + Teuchos::getParametersFromXmlFile(xmlFileName); + SetParameterList(*paramList); +} + +template +void MLParameterListInterpreter:: + SetParameterList(const Teuchos::ParameterList ¶mList_in) { + Teuchos::ParameterList paramList = paramList_in; + + // + // Read top-level of the parameter list + // + + // hard-coded default values == ML defaults according to the manual + MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); + MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); + MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); + + MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); + + MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", + agg_type); + // MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, + // agg_threshold); + MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, + (double)4 / (double)3, agg_damping); + // MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, + // agg_smoothingsweeps); + MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, + minPerAgg); + MUELU_READ_PARAM(paramList, "aggregation: keep Dirichlet bcs", bool, false, + bKeepDirichletBcs); // This is a MueLu specific extension + // that does not exist in ML + MUELU_READ_PARAM(paramList, "aggregation: max neighbours already aggregated", + int, 0, + maxNbrAlreadySelected); // This is a MueLu specific extension + // that does not exist in M + MUELU_READ_PARAM(paramList, "aggregation: aux: enable", bool, false, + agg_use_aux); + MUELU_READ_PARAM(paramList, "aggregation: aux: threshold", double, false, + agg_aux_thresh); + + MUELU_READ_PARAM(paramList, "null space: type", std::string, + "default vectors", nullspaceType); + MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, + nullspaceDim); // TODO: ML default not in documentation + MUELU_READ_PARAM(paramList, "null space: vectors", double *, NULL, + nullspaceVec); // TODO: ML default not in documentation + + MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, + bEnergyMinimization); + + MUELU_READ_PARAM(paramList, "RAP: fix diagonal", bool, false, + bFixDiagonal); // This is a MueLu specific extension that + // does not exist in ML + + MUELU_READ_PARAM(paramList, "x-coordinates", double *, NULL, xcoord); + MUELU_READ_PARAM(paramList, "y-coordinates", double *, NULL, ycoord); + MUELU_READ_PARAM(paramList, "z-coordinates", double *, NULL, zcoord); + + // + // Move smoothers/aggregation/coarse parameters to sublists + // + + // ML allows to have level-specific smoothers/aggregation/coarse parameters at + // the top level of the list or/and defined in sublists: See also: ML Guide + // section 6.4.1, MueLu::CreateSublists, ML_CreateSublists + ParameterList paramListWithSubList; + MueLu::CreateSublists(paramList, paramListWithSubList); + paramList = paramListWithSubList; // swap + + // pull out "use kokkos refactor" + bool setKokkosRefactor = false; + bool useKokkosRefactor; +#ifdef HAVE_MUELU_SERIAL + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosSerialWrapperNode).name()) + useKokkosRefactor = false; #endif - if (paramList.isType("use kokkos refactor")) { - useKokkosRefactor = paramList.get("use kokkos refactor"); - setKokkosRefactor = true; - paramList.remove("use kokkos refactor"); - } +#ifdef HAVE_MUELU_OPENMP + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosOpenMPWrapperNode).name()) + useKokkosRefactor = true; +#endif +#ifdef HAVE_MUELU_CUDA + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) + useKokkosRefactor = true; +#endif +#ifdef HAVE_MUELU_HIP + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) + useKokkosRefactor = true; +#endif +#ifdef HAVE_MUELU_SYCL + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) + useKokkosRefactor = true; +#endif + if (paramList.isType("use kokkos refactor")) { + useKokkosRefactor = paramList.get("use kokkos refactor"); + setKokkosRefactor = true; + paramList.remove("use kokkos refactor"); + } - // - // Validate parameter list - // + // + // Validate parameter list + // - { - bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ - if (validate) { + { + bool validate = paramList.get("ML validate parameter list", + true); /* true = default in ML */ + if (validate) { #if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) - // Validate parameter list using ML validator - int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ - TEUCHOS_TEST_FOR_EXCEPTION(! ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, - "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); + // Validate parameter list using ML validator + int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ + TEUCHOS_TEST_FOR_EXCEPTION( + !ML_Epetra::ValidateMLPParameters(paramList, depth), + Exceptions::RuntimeError, + "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); #else - // If no validator available: issue a warning and set parameter value to false in the output list - this->GetOStream(Warnings0) << "Warning: MueLu_ENABLE_ML=OFF. The parameter list cannot be validated." << std::endl; - paramList.set("ML validate parameter list", false); + // If no validator available: issue a warning and set parameter value to + // false in the output list + this->GetOStream(Warnings0) << "Warning: MueLu_ENABLE_ML=OFF. The " + "parameter list cannot be validated." + << std::endl; + paramList.set("ML validate parameter list", false); #endif // HAVE_MUELU_ML - } // if(validate) - } // scope - - - // Matrix option - blksize_ = nDofsPerNode; - - // Translate verbosity parameter - - // Translate verbosity parameter - MsgType eVerbLevel = None; - if (verbosityLevel == 0) eVerbLevel = None; - if (verbosityLevel >= 1) eVerbLevel = Low; - if (verbosityLevel >= 5) eVerbLevel = Medium; - if (verbosityLevel >= 10) eVerbLevel = High; - if (verbosityLevel >= 11) eVerbLevel = Extreme; - if (verbosityLevel >= 42) eVerbLevel = Test; - if (verbosityLevel >= 43) eVerbLevel = InterfaceTest; - this->verbosity_ = eVerbLevel; - - - TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::SetParameterList(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); - - // Create MueLu factories - RCP dropFact; - if(useKokkosRefactor) - dropFact = rcp( new CoalesceDropFactory_kokkos() ); - else - dropFact = rcp( new CoalesceDropFactory() ); - - if (agg_use_aux) { - dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(std::string("distance laplacian"))); - dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(agg_aux_thresh)); - } + } // if(validate) + } // scope + + // Matrix option + blksize_ = nDofsPerNode; + + // Translate verbosity parameter + + // Translate verbosity parameter + MsgType eVerbLevel = None; + if (verbosityLevel == 0) + eVerbLevel = None; + if (verbosityLevel >= 1) + eVerbLevel = Low; + if (verbosityLevel >= 5) + eVerbLevel = Medium; + if (verbosityLevel >= 10) + eVerbLevel = High; + if (verbosityLevel >= 11) + eVerbLevel = Extreme; + if (verbosityLevel >= 42) + eVerbLevel = Test; + if (verbosityLevel >= 43) + eVerbLevel = InterfaceTest; + this->verbosity_ = eVerbLevel; + + TEUCHOS_TEST_FOR_EXCEPTION( + agg_type != "Uncoupled", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter::SetParameterList(): parameter " + "\"aggregation: type\": only 'Uncoupled' aggregation is supported."); + + // Create MueLu factories + RCP dropFact; + if (useKokkosRefactor) + dropFact = rcp(new CoalesceDropFactory_kokkos()); + else + dropFact = rcp(new CoalesceDropFactory()); + + if (agg_use_aux) { + dropFact->SetParameter( + "aggregation: drop scheme", + Teuchos::ParameterEntry(std::string("distance laplacian"))); + dropFact->SetParameter("aggregation: drop tol", + Teuchos::ParameterEntry(agg_aux_thresh)); + } - // Uncoupled aggregation - RCP AggFact = Teuchos::null; - if(useKokkosRefactor) { - AggFact = rcp( new UncoupledAggregationFactory_kokkos() ); - } - else - AggFact = rcp( new UncoupledAggregationFactory() ); - - AggFact->SetFactory("Graph", dropFact); - AggFact->SetFactory("DofsPerNode", dropFact); - AggFact->SetParameter("aggregation: preserve Dirichlet points", Teuchos::ParameterEntry(bKeepDirichletBcs)); - AggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); - AggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(maxNbrAlreadySelected)); - AggFact->SetParameter("aggregation: min agg size", Teuchos::ParameterEntry(minPerAgg)); - - - if (verbosityLevel > 3) { - std::ostringstream oss; - oss << "========================= Aggregate option summary  =========================" << std::endl; - oss << "min Nodes per aggregate :              " << minPerAgg << std::endl; - oss << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; - oss << "aggregate ordering :                    natural" << std::endl; - oss << "=============================================================================" << std::endl; - this->GetOStream(Runtime1) << oss.str(); - } + // Uncoupled aggregation + RCP AggFact = Teuchos::null; + if (useKokkosRefactor) { + AggFact = rcp(new UncoupledAggregationFactory_kokkos()); + } else + AggFact = rcp(new UncoupledAggregationFactory()); + + AggFact->SetFactory("Graph", dropFact); + AggFact->SetFactory("DofsPerNode", dropFact); + AggFact->SetParameter("aggregation: preserve Dirichlet points", + Teuchos::ParameterEntry(bKeepDirichletBcs)); + AggFact->SetParameter("aggregation: ordering", + Teuchos::ParameterEntry(std::string("natural"))); + AggFact->SetParameter("aggregation: max selected neighbors", + Teuchos::ParameterEntry(maxNbrAlreadySelected)); + AggFact->SetParameter("aggregation: min agg size", + Teuchos::ParameterEntry(minPerAgg)); + + if (verbosityLevel > 3) { + std::ostringstream oss; + oss << "========================= Aggregate option summary " + " =========================" + << std::endl; + oss << "min Nodes per aggregate :              " << minPerAgg << std::endl; + oss << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected + << std::endl; + oss << "aggregate ordering :                    natural" << std::endl; + oss << "===================================================================" + "==========" + << std::endl; + this->GetOStream(Runtime1) << oss.str(); + } - RCP PFact; - RCP RFact; - RCP PtentFact; - if(useKokkosRefactor) - PtentFact = rcp( new TentativePFactory_kokkos() ); + RCP PFact; + RCP RFact; + RCP PtentFact; + if (useKokkosRefactor) + PtentFact = rcp(new TentativePFactory_kokkos()); + else + PtentFact = rcp(new TentativePFactory()); + if (agg_damping == 0.0 && bEnergyMinimization == false) { + // tentative prolongation operator (PA-AMG) + PFact = PtentFact; + RFact = rcp(new TransPFactory()); + } else if (agg_damping != 0.0 && bEnergyMinimization == false) { + // smoothed aggregation (SA-AMG) + RCP SaPFact; + if (useKokkosRefactor) + SaPFact = rcp(new SaPFactory_kokkos()); else - PtentFact = rcp( new TentativePFactory() ); - if (agg_damping == 0.0 && bEnergyMinimization == false) { - // tentative prolongation operator (PA-AMG) - PFact = PtentFact; - RFact = rcp( new TransPFactory() ); - } else if (agg_damping != 0.0 && bEnergyMinimization == false) { - // smoothed aggregation (SA-AMG) - RCP SaPFact; - if(useKokkosRefactor) - SaPFact = rcp( new SaPFactory_kokkos() ); - else - SaPFact = rcp( new SaPFactory() ); - SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); - PFact = SaPFact; - RFact = rcp( new TransPFactory() ); - } else if (bEnergyMinimization == true) { - // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) - PFact = rcp( new PgPFactory() ); - RFact = rcp( new GenericRFactory() ); - } + SaPFact = rcp(new SaPFactory()); + SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); + PFact = SaPFact; + RFact = rcp(new TransPFactory()); + } else if (bEnergyMinimization == true) { + // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) + PFact = rcp(new PgPFactory()); + RFact = rcp(new GenericRFactory()); + } - RCP AcFact = rcp( new RAPFactory() ); - AcFact->SetParameter("RepairMainDiagonal", Teuchos::ParameterEntry(bFixDiagonal)); - for (size_t i = 0; iAddTransferFactory(TransferFacts_[i]); - } + RCP AcFact = rcp(new RAPFactory()); + AcFact->SetParameter("RepairMainDiagonal", + Teuchos::ParameterEntry(bFixDiagonal)); + for (size_t i = 0; i < TransferFacts_.size(); i++) { + AcFact->AddTransferFactory(TransferFacts_[i]); + } - // - // introduce rebalancing - // + // + // introduce rebalancing + // #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - Teuchos::RCP RebalancedPFact = Teuchos::null; - Teuchos::RCP RebalancedRFact = Teuchos::null; - Teuchos::RCP RepartitionFact = Teuchos::null; - Teuchos::RCP RebalancedAFact = Teuchos::null; - - MUELU_READ_PARAM(paramList, "repartition: enable", int, 0, bDoRepartition); - if (bDoRepartition == 1) { - // The Factory Manager will be configured to return the rebalanced versions of P, R, A by default. - // Everytime we want to use the non-rebalanced versions, we need to explicitly define the generating factory. - RFact->SetFactory("P", PFact); - // - AcFact->SetFactory("P", PFact); - AcFact->SetFactory("R", RFact); - - // define rebalancing factory for coarse matrix - Teuchos::RCP > rebAmalgFact = Teuchos::rcp(new MueLu::AmalgamationFactory()); - rebAmalgFact->SetFactory("A", AcFact); - - MUELU_READ_PARAM(paramList, "repartition: max min ratio", double, 1.3, maxminratio); - MUELU_READ_PARAM(paramList, "repartition: min per proc", int, 512, minperproc); - - // Repartitioning heuristic - RCP RepartitionHeuristicFact = Teuchos::rcp(new RepartitionHeuristicFactory()); - { - Teuchos::ParameterList paramListRepFact; - paramListRepFact.set("repartition: min rows per proc", minperproc); - paramListRepFact.set("repartition: max imbalance", maxminratio); - RepartitionHeuristicFact->SetParameterList(paramListRepFact); - } - RepartitionHeuristicFact->SetFactory("A", AcFact); - - // create "Partition" - Teuchos::RCP > isoInterface = Teuchos::rcp(new MueLu::IsorropiaInterface()); - isoInterface->SetFactory("A", AcFact); - isoInterface->SetFactory("number of partitions", RepartitionHeuristicFact); - isoInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); - - // create "Partition" by unamalgamtion - Teuchos::RCP > repInterface = Teuchos::rcp(new MueLu::RepartitionInterface()); - repInterface->SetFactory("A", AcFact); - repInterface->SetFactory("number of partitions", RepartitionHeuristicFact); - repInterface->SetFactory("AmalgamatedPartition", isoInterface); - //repInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); // not necessary? - - // Repartitioning (creates "Importer" from "Partition") - RepartitionFact = Teuchos::rcp(new RepartitionFactory()); - RepartitionFact->SetFactory("A", AcFact); - RepartitionFact->SetFactory("number of partitions", RepartitionHeuristicFact); - RepartitionFact->SetFactory("Partition", repInterface); - - // Reordering of the transfer operators - RebalancedPFact = Teuchos::rcp(new RebalanceTransferFactory()); - RebalancedPFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Interpolation"))); - RebalancedPFact->SetFactory("P", PFact); - RebalancedPFact->SetFactory("Nullspace", PtentFact); - RebalancedPFact->SetFactory("Importer", RepartitionFact); - - RebalancedRFact = Teuchos::rcp(new RebalanceTransferFactory()); - RebalancedRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction"))); - RebalancedRFact->SetFactory("R", RFact); - RebalancedRFact->SetFactory("Importer", RepartitionFact); - - // Compute Ac from rebalanced P and R - RebalancedAFact = Teuchos::rcp(new RebalanceAcFactory()); - RebalancedAFact->SetFactory("A", AcFact); + Teuchos::RCP RebalancedPFact = Teuchos::null; + Teuchos::RCP RebalancedRFact = Teuchos::null; + Teuchos::RCP RepartitionFact = Teuchos::null; + Teuchos::RCP RebalancedAFact = Teuchos::null; + + MUELU_READ_PARAM(paramList, "repartition: enable", int, 0, bDoRepartition); + if (bDoRepartition == 1) { + // The Factory Manager will be configured to return the rebalanced versions + // of P, R, A by default. Everytime we want to use the non-rebalanced + // versions, we need to explicitly define the generating factory. + RFact->SetFactory("P", PFact); + // + AcFact->SetFactory("P", PFact); + AcFact->SetFactory("R", RFact); + + // define rebalancing factory for coarse matrix + Teuchos::RCP> rebAmalgFact = + Teuchos::rcp(new MueLu::AmalgamationFactory()); + rebAmalgFact->SetFactory("A", AcFact); + + MUELU_READ_PARAM(paramList, "repartition: max min ratio", double, 1.3, + maxminratio); + MUELU_READ_PARAM(paramList, "repartition: min per proc", int, 512, + minperproc); + + // Repartitioning heuristic + RCP RepartitionHeuristicFact = + Teuchos::rcp(new RepartitionHeuristicFactory()); + { + Teuchos::ParameterList paramListRepFact; + paramListRepFact.set("repartition: min rows per proc", minperproc); + paramListRepFact.set("repartition: max imbalance", maxminratio); + RepartitionHeuristicFact->SetParameterList(paramListRepFact); } + RepartitionHeuristicFact->SetFactory("A", AcFact); + + // create "Partition" + Teuchos::RCP> isoInterface = + Teuchos::rcp(new MueLu::IsorropiaInterface()); + isoInterface->SetFactory("A", AcFact); + isoInterface->SetFactory("number of partitions", RepartitionHeuristicFact); + isoInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); + + // create "Partition" by unamalgamtion + Teuchos::RCP> repInterface = + Teuchos::rcp(new MueLu::RepartitionInterface()); + repInterface->SetFactory("A", AcFact); + repInterface->SetFactory("number of partitions", RepartitionHeuristicFact); + repInterface->SetFactory("AmalgamatedPartition", isoInterface); + // repInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); // not + // necessary? + + // Repartitioning (creates "Importer" from "Partition") + RepartitionFact = Teuchos::rcp(new RepartitionFactory()); + RepartitionFact->SetFactory("A", AcFact); + RepartitionFact->SetFactory("number of partitions", + RepartitionHeuristicFact); + RepartitionFact->SetFactory("Partition", repInterface); + + // Reordering of the transfer operators + RebalancedPFact = Teuchos::rcp(new RebalanceTransferFactory()); + RebalancedPFact->SetParameter( + "type", Teuchos::ParameterEntry(std::string("Interpolation"))); + RebalancedPFact->SetFactory("P", PFact); + RebalancedPFact->SetFactory("Nullspace", PtentFact); + RebalancedPFact->SetFactory("Importer", RepartitionFact); + + RebalancedRFact = Teuchos::rcp(new RebalanceTransferFactory()); + RebalancedRFact->SetParameter( + "type", Teuchos::ParameterEntry(std::string("Restriction"))); + RebalancedRFact->SetFactory("R", RFact); + RebalancedRFact->SetFactory("Importer", RepartitionFact); + + // Compute Ac from rebalanced P and R + RebalancedAFact = Teuchos::rcp(new RebalanceAcFactory()); + RebalancedAFact->SetFactory("A", AcFact); + } #else // #ifdef HAVE_MUELU_ISORROPIA - // Get rid of [-Wunused] warnings - //(void) - // - // ^^^ FIXME (mfh 17 Nov 2013) That definitely doesn't compile. + // Get rid of [-Wunused] warnings + //(void) + // + // ^^^ FIXME (mfh 17 Nov 2013) That definitely doesn't compile. #endif - // - // Nullspace factory - // + // + // Nullspace factory + // + + // Set fine level nullspace + // extract pre-computed nullspace from ML parameter list + // store it in nullspace_ and nullspaceDim_ + if (nullspaceType != "default vectors") { + TEUCHOS_TEST_FOR_EXCEPTION( + nullspaceType != "pre-computed", Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: no valid nullspace (no " + "pre-computed null space). error."); + TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: no valid " + "nullspace (nullspace dim == -1). error."); + TEUCHOS_TEST_FOR_EXCEPTION( + nullspaceVec == NULL, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == " + "NULL). You have to provide a valid fine-level nullspace in \'null " + "space: vectors\'"); + + nullspaceDim_ = nullspaceDim; + nullspace_ = nullspaceVec; + } - // Set fine level nullspace - // extract pre-computed nullspace from ML parameter list - // store it in nullspace_ and nullspaceDim_ - if (nullspaceType != "default vectors") { - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); + Teuchos::RCP nspFact = + Teuchos::rcp(new NullspaceFactory("Nullspace")); + nspFact->SetFactory("Nullspace", PtentFact); - nullspaceDim_ = nullspaceDim; - nullspace_ = nullspaceVec; - } + // Stash coordinates + xcoord_ = xcoord; + ycoord_ = ycoord; + zcoord_ = zcoord; - Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory("Nullspace")); - nspFact->SetFactory("Nullspace", PtentFact); + // + // Hierarchy + FactoryManager + // + // Hierarchy options + this->numDesiredLevel_ = maxLevels; + this->maxCoarseSize_ = maxCoarseSize; - // Stash coordinates - xcoord_ = xcoord; - ycoord_ = ycoord; - zcoord_ = zcoord; + // + // Coarse Smoother + // + ParameterList &coarseList = paramList.sublist("coarse: list"); + // check whether coarse solver is set properly. If not, set default coarse + // solver. + if (!coarseList.isParameter("smoother: type")) + coarseList.set( + "smoother: type", + "Amesos-KLU"); // set default coarse solver according to ML 5.0 guide + RCP coarseFact = + GetSmootherFactory(coarseList, Teuchos::null); + // Smoothers Top Level Parameters + RCP topLevelSmootherParam = + ExtractSetOfParameters(paramList, "smoother"); - // - // Hierarchy + FactoryManager - // + // + + // Prepare factory managers + // TODO: smootherFact can be reuse accross level if same parameters/no + // specific parameterList - // Hierarchy options - this->numDesiredLevel_ = maxLevels; - this->maxCoarseSize_ = maxCoarseSize; + for (int levelID = 0; levelID < maxLevels; levelID++) { // - // Coarse Smoother + // Level FactoryManager // - ParameterList& coarseList = paramList.sublist("coarse: list"); - // check whether coarse solver is set properly. If not, set default coarse solver. - if (!coarseList.isParameter("smoother: type")) - coarseList.set("smoother: type", "Amesos-KLU"); // set default coarse solver according to ML 5.0 guide - RCP coarseFact = GetSmootherFactory(coarseList, Teuchos::null); - - // Smoothers Top Level Parameters - RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); + RCP manager = rcp(new FactoryManager()); + if (setKokkosRefactor) + manager->SetKokkosRefactor(useKokkosRefactor); // + // Smoothers + // - // Prepare factory managers - // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList - - for (int levelID=0; levelID < maxLevels; levelID++) { - - // - // Level FactoryManager - // - - RCP manager = rcp(new FactoryManager()); - if (setKokkosRefactor) - manager->SetKokkosRefactor(useKokkosRefactor); - - // - // Smoothers - // - - { - // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. - // TODO: unit-test this part alone - - ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy - MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ - // std::cout << std::endl << "Merged List for level " << levelID << std::endl; - // std::cout << levelSmootherParam << std::endl; - - RCP smootherFact = GetSmootherFactory(levelSmootherParam, Teuchos::null); // TODO: missing AFact input arg. - - manager->SetFactory("Smoother", smootherFact); - } + { + // Merge level-specific parameters with global parameters. level-specific + // parameters takes precedence. + // TODO: unit-test this part alone + + ParameterList levelSmootherParam = + GetMLSubList(paramList, "smoother", levelID); // copy + MergeParameterList( + *topLevelSmootherParam, levelSmootherParam, + false); /* false = do no overwrite levelSmootherParam parameters by + topLevelSmootherParam parameters */ + // std::cout << std::endl << "Merged List for level " << levelID << + // std::endl; std::cout << levelSmootherParam << std::endl; + + RCP smootherFact = GetSmootherFactory( + levelSmootherParam, Teuchos::null); // TODO: missing AFact input arg. + + manager->SetFactory("Smoother", smootherFact); + } - // - // Misc - // + // + // Misc + // - manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop - manager->SetFactory("Graph", dropFact); - manager->SetFactory("Aggregates", AggFact); - manager->SetFactory("DofsPerNode", dropFact); - manager->SetFactory("Ptent", PtentFact); + manager->SetFactory("CoarseSolver", + coarseFact); // TODO: should not be done in the loop + manager->SetFactory("Graph", dropFact); + manager->SetFactory("Aggregates", AggFact); + manager->SetFactory("DofsPerNode", dropFact); + manager->SetFactory("Ptent", PtentFact); #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) if (bDoRepartition == 1) { manager->SetFactory("A", RebalancedAFact); manager->SetFactory("P", RebalancedPFact); manager->SetFactory("R", RebalancedRFact); - manager->SetFactory("Nullspace", RebalancedPFact); - manager->SetFactory("Importer", RepartitionFact); + manager->SetFactory("Nullspace", RebalancedPFact); + manager->SetFactory("Importer", RepartitionFact); } else { #endif // #ifdef HAVE_MUELU_ISORROPIA - manager->SetFactory("Nullspace", nspFact); // use same nullspace factory throughout all multigrid levels - manager->SetFactory("A", AcFact); // same RAP factory for all levels - manager->SetFactory("P", PFact); // same prolongator and restrictor factories for all levels - manager->SetFactory("R", RFact); // same prolongator and restrictor factories for all levels + manager->SetFactory("Nullspace", + nspFact); // use same nullspace factory throughout all + // multigrid levels + manager->SetFactory("A", AcFact); // same RAP factory for all levels + manager->SetFactory( + "P", + PFact); // same prolongator and restrictor factories for all levels + manager->SetFactory( + "R", + RFact); // same prolongator and restrictor factories for all levels #if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) } #endif - this->AddFactoryManager(levelID, 1, manager); - } // for (level loop) - - } - - template - void MLParameterListInterpreter::SetupHierarchy(Hierarchy & H) const { - // if nullspace_ has already been extracted from ML parameter list - // make nullspace available for MueLu - if (nullspace_ != NULL) { - RCP fineLevel = H.GetLevel(0); - RCP Op = fineLevel->Get >("A"); - RCP A = rcp_dynamic_cast(Op); - if (!A.is_null()) { - const RCP rowMap = fineLevel->Get< RCP >("A")->getRowMap(); - RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); - - for ( size_t i=0; i < Teuchos::as(nullspaceDim_); i++) { - Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); - const size_t myLength = nullspace->getLocalLength(); - - for (size_t j = 0; j < myLength; j++) { - nullspacei[j] = nullspace_[i*myLength + j]; - } + this->AddFactoryManager(levelID, 1, manager); + } // for (level loop) +} + +template +void MLParameterListInterpreter::SetupHierarchy(Hierarchy &H) const { + // if nullspace_ has already been extracted from ML parameter list + // make nullspace available for MueLu + if (nullspace_ != NULL) { + RCP fineLevel = H.GetLevel(0); + RCP Op = fineLevel->Get>("A"); + RCP A = rcp_dynamic_cast(Op); + if (!A.is_null()) { + const RCP rowMap = + fineLevel->Get>("A")->getRowMap(); + RCP nullspace = + MultiVectorFactory::Build(rowMap, nullspaceDim_, true); + + for (size_t i = 0; i < Teuchos::as(nullspaceDim_); i++) { + Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); + const size_t myLength = nullspace->getLocalLength(); + + for (size_t j = 0; j < myLength; j++) { + nullspacei[j] = nullspace_[i * myLength + j]; } - - fineLevel->Set("Nullspace", nullspace); } + + fineLevel->Set("Nullspace", nullspace); } + } - // Do the same for coordinates - size_t num_coords = 0; - double * coordPTR[3]; - if (xcoord_) { - coordPTR[0] = xcoord_; + // Do the same for coordinates + size_t num_coords = 0; + double *coordPTR[3]; + if (xcoord_) { + coordPTR[0] = xcoord_; + num_coords++; + if (ycoord_) { + coordPTR[1] = ycoord_; num_coords++; - if (ycoord_) { - coordPTR[1] = ycoord_; + if (zcoord_) { + coordPTR[2] = zcoord_; num_coords++; - if (zcoord_) { - coordPTR[2] = zcoord_; - num_coords++; - } } } - if (num_coords){ - Teuchos::RCP fineLevel = H.GetLevel(0); - Teuchos::RCP Op = fineLevel->Get >("A"); - Teuchos::RCP A = rcp_dynamic_cast(Op); - if (!A.is_null()) { - const Teuchos::RCP rowMap = fineLevel->Get< RCP >("A")->getRowMap(); - Teuchos::RCP coordinates = MultiVectorFactory::Build(rowMap, num_coords, true); - - for ( size_t i=0; i < num_coords; i++) { - Teuchos::ArrayRCP coordsi = coordinates->getDataNonConst(i); - const size_t myLength = coordinates->getLocalLength(); - for (size_t j = 0; j < myLength; j++) { - coordsi[j] = coordPTR[i][j]; - } + } + if (num_coords) { + Teuchos::RCP fineLevel = H.GetLevel(0); + Teuchos::RCP Op = fineLevel->Get>("A"); + Teuchos::RCP A = rcp_dynamic_cast(Op); + if (!A.is_null()) { + const Teuchos::RCP rowMap = + fineLevel->Get>("A")->getRowMap(); + Teuchos::RCP coordinates = + MultiVectorFactory::Build(rowMap, num_coords, true); + + for (size_t i = 0; i < num_coords; i++) { + Teuchos::ArrayRCP coordsi = coordinates->getDataNonConst(i); + const size_t myLength = coordinates->getLocalLength(); + for (size_t j = 0; j < myLength; j++) { + coordsi[j] = coordPTR[i][j]; } - fineLevel->Set("Coordinates",coordinates); } + fineLevel->Set("Coordinates", coordinates); } - - HierarchyManager::SetupHierarchy(H); } - // TODO: code factorization with MueLu_ParameterListInterpreter. - template - RCP > - MLParameterListInterpreter:: - GetSmootherFactory (const Teuchos::ParameterList & paramList, - const RCP & AFact) - { - typedef Teuchos::ScalarTraits STS; - SC one = STS::one(); - - std::string type = "symmetric Gauss-Seidel"; // default - - // - // Get 'type' - // - -// //TODO: fix defaults!! - -// // Default coarse grid smoother -// std::string type; -// if ("smoother" == "coarse") { -// #if (defined(HAVE_MUELU_EPETRA) && defined( HAVE_MUELU_AMESOS)) || (defined(HAVE_MUELU_AMESOS2)) // FIXME: test is wrong (ex: compiled with Epetra&&Tpetra&&Amesos2 but without Amesos => error running Epetra problem) -// type = ""; // use default defined by AmesosSmoother or Amesos2Smoother -// #else -// type = "symmetric Gauss-Seidel"; // use a sym Gauss-Seidel (with no damping) as fallback "coarse solver" (TODO: needs Ifpack(2)) -// #endif -// } else { -// // TODO: default smoother? -// type = ""; -// } - - - if (paramList.isParameter("smoother: type")) type = paramList.get("smoother: type"); - TEUCHOS_TEST_FOR_EXCEPTION(type.empty(), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no \"smoother: type\" in the smoother parameter list" << std::endl << paramList); - - // - // Create the smoother prototype - // - - RCP smooProto; - std::string ifpackType; - Teuchos::ParameterList smootherParamList; - - if (type == "Jacobi" || type == "Gauss-Seidel" || type == "symmetric Gauss-Seidel") { - if (type == "symmetric Gauss-Seidel") type = "Symmetric Gauss-Seidel"; // FIXME - - ifpackType = "RELAXATION"; - smootherParamList.set("relaxation: type", type); - - MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "relaxation: sweeps"); - MUELU_COPY_PARAM(paramList, "smoother: damping factor", Scalar, one, smootherParamList, "relaxation: damping factor"); - - smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - smooProto->SetFactory("A", AFact); - - } else if (type == "Chebyshev" || type == "MLS") { - - ifpackType = "CHEBYSHEV"; - - MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "chebyshev: degree"); - if (paramList.isParameter("smoother: MLS alpha")) { - MUELU_COPY_PARAM(paramList, "smoother: MLS alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); - } else { - MUELU_COPY_PARAM(paramList, "smoother: Chebyshev alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); - } - + HierarchyManager::SetupHierarchy(H); +} + +// TODO: code factorization with MueLu_ParameterListInterpreter. +template +RCP> +MLParameterListInterpreter:: + GetSmootherFactory(const Teuchos::ParameterList ¶mList, + const RCP &AFact) { + typedef Teuchos::ScalarTraits STS; + SC one = STS::one(); + + std::string type = "symmetric Gauss-Seidel"; // default + + // + // Get 'type' + // + + // //TODO: fix defaults!! + + // // Default coarse grid smoother + // std::string type; + // if ("smoother" == "coarse") { + // #if (defined(HAVE_MUELU_EPETRA) && defined( HAVE_MUELU_AMESOS)) || + // (defined(HAVE_MUELU_AMESOS2)) // FIXME: test is wrong (ex: compiled with + // Epetra&&Tpetra&&Amesos2 but without Amesos => error running Epetra problem) + // type = ""; // use default defined by AmesosSmoother or + // Amesos2Smoother + // #else + // type = "symmetric Gauss-Seidel"; // use a sym Gauss-Seidel (with no + // damping) as fallback "coarse solver" (TODO: needs Ifpack(2)) + // #endif + // } else { + // // TODO: default smoother? + // type = ""; + // } + + if (paramList.isParameter("smoother: type")) + type = paramList.get("smoother: type"); + TEUCHOS_TEST_FOR_EXCEPTION(type.empty(), Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: no " + "\"smoother: type\" in the smoother parameter list" + << std::endl + << paramList); + + // + // Create the smoother prototype + // + + RCP smooProto; + std::string ifpackType; + Teuchos::ParameterList smootherParamList; + + if (type == "Jacobi" || type == "Gauss-Seidel" || + type == "symmetric Gauss-Seidel") { + if (type == "symmetric Gauss-Seidel") + type = "Symmetric Gauss-Seidel"; // FIXME + + ifpackType = "RELAXATION"; + smootherParamList.set("relaxation: type", type); + + MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, + "relaxation: sweeps"); + MUELU_COPY_PARAM(paramList, "smoother: damping factor", Scalar, one, + smootherParamList, "relaxation: damping factor"); + + smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + smooProto->SetFactory("A", AFact); + + } else if (type == "Chebyshev" || type == "MLS") { + + ifpackType = "CHEBYSHEV"; + + MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, + "chebyshev: degree"); + if (paramList.isParameter("smoother: MLS alpha")) { + MUELU_COPY_PARAM(paramList, "smoother: MLS alpha", double, 20, + smootherParamList, "chebyshev: ratio eigenvalue"); + } else { + MUELU_COPY_PARAM(paramList, "smoother: Chebyshev alpha", double, 20, + smootherParamList, "chebyshev: ratio eigenvalue"); + } - smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - smooProto->SetFactory("A", AFact); + smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + smooProto->SetFactory("A", AFact); + + } else if (type == "Hiptmair") { + ifpackType = "HIPTMAIR"; + std::string subSmootherType = "Chebyshev"; + if (paramList.isParameter("subsmoother: type")) + subSmootherType = paramList.get("subsmoother: type"); + std::string subSmootherIfpackType; + if (subSmootherType == "Chebyshev") + subSmootherIfpackType = "CHEBYSHEV"; + else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || + subSmootherType == "symmetric Gauss-Seidel") { + if (subSmootherType == "symmetric Gauss-Seidel") + subSmootherType = "Symmetric Gauss-Seidel"; // FIXME + subSmootherIfpackType = "RELAXATION"; + } else + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: unknown smoother type. '" + << subSmootherType << "' not supported by MueLu."); + + smootherParamList.set("hiptmair: smoother type 1", subSmootherIfpackType); + smootherParamList.set("hiptmair: smoother type 2", subSmootherIfpackType); + + auto smoother1ParamList = + smootherParamList.sublist("hiptmair: smoother list 1"); + auto smoother2ParamList = + smootherParamList.sublist("hiptmair: smoother list 2"); + + if (subSmootherType == "Chebyshev") { + MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, + smoother1ParamList, "chebyshev: degree"); + MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, + smoother2ParamList, "chebyshev: degree"); + + MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, + smoother1ParamList, "chebyshev: ratio eigenvalue"); + MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, + smoother2ParamList, "chebyshev: ratio eigenvalue"); + } else { + MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, + smoother1ParamList, "relaxation: sweeps"); + MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, + smoother2ParamList, "relaxation: sweeps"); - } else if (type == "Hiptmair") { - ifpackType = "HIPTMAIR"; - std::string subSmootherType = "Chebyshev"; - if (paramList.isParameter("subsmoother: type")) - subSmootherType = paramList.get("subsmoother: type"); - std::string subSmootherIfpackType; - if (subSmootherType == "Chebyshev") - subSmootherIfpackType = "CHEBYSHEV"; - else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { - if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME - subSmootherIfpackType = "RELAXATION"; - } else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); - - smootherParamList.set("hiptmair: smoother type 1", subSmootherIfpackType); - smootherParamList.set("hiptmair: smoother type 2", subSmootherIfpackType); - - auto smoother1ParamList = smootherParamList.sublist("hiptmair: smoother list 1"); - auto smoother2ParamList = smootherParamList.sublist("hiptmair: smoother list 2"); - - if (subSmootherType == "Chebyshev") { - MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "chebyshev: degree"); - MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "chebyshev: degree"); - - MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother1ParamList, "chebyshev: ratio eigenvalue"); - MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother2ParamList, "chebyshev: ratio eigenvalue"); - } else { - MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "relaxation: sweeps"); - MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "relaxation: sweeps"); - - MUELU_COPY_PARAM(paramList, "subsmoother: SGS damping factor", double, 0.8, smoother2ParamList, "relaxation: damping factor"); - } + MUELU_COPY_PARAM(paramList, "subsmoother: SGS damping factor", double, + 0.8, smoother2ParamList, "relaxation: damping factor"); + } + smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); + smooProto->SetFactory("A", AFact); - smooProto = rcp( new TrilinosSmoother(ifpackType, smootherParamList, 0) ); - smooProto->SetFactory("A", AFact); - - } else if (type == "IFPACK") { // TODO: this option is not described in the ML Guide v5.0 + } else if (type == "IFPACK") { // TODO: this option is not described in the ML + // Guide v5.0 #if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_IFPACK) - ifpackType = paramList.get("smoother: ifpack type"); - - if (ifpackType == "ILU") { - // TODO fix this (type mismatch double vs. int) - //MUELU_COPY_PARAM(paramList, "smoother: ifpack level-of-fill", double /*int*/, 0.0 /*2*/, smootherParamList, "fact: level-of-fill"); - if (paramList.isParameter("smoother: ifpack level-of-fill")) - smootherParamList.set("fact: level-of-fill", Teuchos::as(paramList.get("smoother: ifpack level-of-fill"))); - else smootherParamList.set("fact: level-of-fill", as(0)); - - MUELU_COPY_PARAM(paramList, "smoother: ifpack overlap", int, 2, smootherParamList, "partitioner: overlap"); - - // TODO change to TrilinosSmoother as soon as Ifpack2 supports all preconditioners from Ifpack - smooProto = - MueLu::GetIfpackSmoother (ifpackType, - smootherParamList, - paramList.get ("smoother: ifpack overlap")); - smooProto->SetFactory("A", AFact); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown ML smoother type " + type + " (IFPACK) not supported by MueLu. Only ILU is supported."); - } -#else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: MueLu compiled without Ifpack support"); -#endif - - } else if (type.length() > strlen("Amesos") && type.substr(0, strlen("Amesos")) == "Amesos") { /* catch Amesos-* */ - std::string solverType = type.substr(strlen("Amesos")+1); /* ("Amesos-KLU" -> "KLU") */ - - // Validator: following upper/lower case is what is allowed by ML - bool valid = false; - const int validatorSize = 5; - std::string validator[validatorSize] = {"Superlu", "Superludist", "KLU", "UMFPACK", "MUMPS"}; /* TODO: should "" be allowed? */ - for (int i=0; i < validatorSize; i++) { if (validator[i] == solverType) valid = true; } - TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported."); + ifpackType = paramList.get("smoother: ifpack type"); + + if (ifpackType == "ILU") { + // TODO fix this (type mismatch double vs. int) + // MUELU_COPY_PARAM(paramList, "smoother: ifpack level-of-fill", double + // /*int*/, 0.0 /*2*/, smootherParamList, "fact: level-of-fill"); + if (paramList.isParameter("smoother: ifpack level-of-fill")) + smootherParamList.set("fact: level-of-fill", + Teuchos::as(paramList.get( + "smoother: ifpack level-of-fill"))); + else + smootherParamList.set("fact: level-of-fill", as(0)); - // FIXME: MueLu should accept any Upper/Lower case. Not the case for the moment - std::transform(solverType.begin()+1, solverType.end(), solverType.begin()+1, ::tolower); + MUELU_COPY_PARAM(paramList, "smoother: ifpack overlap", int, 2, + smootherParamList, "partitioner: overlap"); - smooProto = Teuchos::rcp( new DirectSolver(solverType, Teuchos::ParameterList()) ); + // TODO change to TrilinosSmoother as soon as Ifpack2 supports all + // preconditioners from Ifpack + smooProto = + MueLu::GetIfpackSmoother( + ifpackType, smootherParamList, + paramList.get("smoother: ifpack overlap")); smooProto->SetFactory("A", AFact); - } else { - - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported by MueLu."); - + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: unknown ML smoother type " + + type + + " (IFPACK) not supported by MueLu. Only ILU is supported."); } - TEUCHOS_TEST_FOR_EXCEPTION(smooProto == Teuchos::null, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no smoother prototype. fatal error."); - - // - // Create the smoother factory - // - - RCP SmooFact = rcp( new SmootherFactory() ); +#else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: MueLu " + "compiled without Ifpack support"); +#endif - // Set parameters of the smoother factory - MUELU_READ_PARAM(paramList, "smoother: pre or post", std::string, "both", preOrPost); - if (preOrPost == "both") { - SmooFact->SetSmootherPrototypes(smooProto, smooProto); - } else if (preOrPost == "pre") { - SmooFact->SetSmootherPrototypes(smooProto, Teuchos::null); - } else if (preOrPost == "post") { - SmooFact->SetSmootherPrototypes(Teuchos::null, smooProto); + } else if (type.length() > strlen("Amesos") && + type.substr(0, strlen("Amesos")) == + "Amesos") { /* catch Amesos-* */ + std::string solverType = + type.substr(strlen("Amesos") + 1); /* ("Amesos-KLU" -> "KLU") */ + + // Validator: following upper/lower case is what is allowed by ML + bool valid = false; + const int validatorSize = 5; + std::string validator[validatorSize] = { + "Superlu", "Superludist", "KLU", "UMFPACK", + "MUMPS"}; /* TODO: should "" be allowed? */ + for (int i = 0; i < validatorSize; i++) { + if (validator[i] == solverType) + valid = true; } - - return SmooFact; + TEUCHOS_TEST_FOR_EXCEPTION( + !valid, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: unknown smoother type. '" + << type << "' not supported."); + + // FIXME: MueLu should accept any Upper/Lower case. Not the case for the + // moment + std::transform(solverType.begin() + 1, solverType.end(), + solverType.begin() + 1, ::tolower); + + smooProto = + Teuchos::rcp(new DirectSolver(solverType, Teuchos::ParameterList())); + smooProto->SetFactory("A", AFact); + + } else { + + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: unknown smoother type. '" + << type << "' not supported by MueLu."); } - - template - void MLParameterListInterpreter::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); - TransferFacts_.push_back(factory); - } - - template - size_t MLParameterListInterpreter::NumTransferFactories() const { - return TransferFacts_.size(); + TEUCHOS_TEST_FOR_EXCEPTION( + smooProto == Teuchos::null, Exceptions::RuntimeError, + "MueLu::MLParameterListInterpreter: no smoother prototype. fatal error."); + + // + // Create the smoother factory + // + + RCP SmooFact = rcp(new SmootherFactory()); + + // Set parameters of the smoother factory + MUELU_READ_PARAM(paramList, "smoother: pre or post", std::string, "both", + preOrPost); + if (preOrPost == "both") { + SmooFact->SetSmootherPrototypes(smooProto, smooProto); + } else if (preOrPost == "pre") { + SmooFact->SetSmootherPrototypes(smooProto, Teuchos::null); + } else if (preOrPost == "post") { + SmooFact->SetSmootherPrototypes(Teuchos::null, smooProto); } - template - void MLParameterListInterpreter::SetupOperator(Operator & Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; - - A.SetFixedBlockSize(blksize_); + return SmooFact; +} + +template +void MLParameterListInterpreter:: + AddTransferFactory(const RCP &factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION( + Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, + Exceptions::BadCast, + "Transfer factory is not derived from TwoLevelFactoryBase. Since " + "transfer factories will be handled by the RAPFactory they have to be " + "derived from TwoLevelFactoryBase!"); + TransferFacts_.push_back(factory); +} + +template +size_t MLParameterListInterpreter::NumTransferFactories() const { + return TransferFacts_.size(); +} + +template +void MLParameterListInterpreter::SetupOperator(Operator &Op) const { + try { + Matrix &A = dynamic_cast(Op); + if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) + this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ + << " (value of the parameter in the list) " + << "instead of " << A.GetFixedBlockSize() + << " (provided matrix)." << std::endl; + + A.SetFixedBlockSize(blksize_); #ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(A); + MatrixUtils::checkLocalRowMapMatchesColMap(A); #endif // HAVE_MUELU_DEBUG - } catch (std::bad_cast&) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; - } + } catch (std::bad_cast &) { + this->GetOStream(Warnings0) + << "Skipping setting block size as the operator is not a matrix" + << std::endl; } +} } // namespace MueLu #define MUELU_MLPARAMETERLISTINTERPRETER_SHORT #endif /* MUELU_MLPARAMETERLISTINTERPRETER_DEF_HPP */ -//TODO: see if it can be factorized with ML interpreter (ex: generation of Ifpack param list) +// TODO: see if it can be factorized with ML interpreter (ex: generation of +// Ifpack param list) diff --git a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp index fe96d582432b..6c9c3e575374 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter.cpp @@ -44,51 +44,53 @@ // // @HEADER +#include #include #include #include -#include namespace MueLu { - size_t LevenshteinDistance(const char* s, size_t len_s, const char* t, size_t len_t) { - // degenerate cases - if (len_s == 0) return len_t; - if (len_t == 0) return len_s; - if (!strncmp(s, t, std::min(len_s, len_t))) return 0; +size_t LevenshteinDistance(const char *s, size_t len_s, const char *t, + size_t len_t) { + // degenerate cases + if (len_s == 0) + return len_t; + if (len_t == 0) + return len_s; + if (!strncmp(s, t, std::min(len_s, len_t))) + return 0; - // create two work vectors of integer distances - size_t len = len_t + 1; - std::vector v0(len); - std::vector v1(len); + // create two work vectors of integer distances + size_t len = len_t + 1; + std::vector v0(len); + std::vector v1(len); - // initialize v0 (the previous row of distances) - // this row is A[0][i]: edit distance for an empty s - // the distance is just the number of characters to delete from t - for (size_t i = 0; i < len; i++) - v0[i] = i; + // initialize v0 (the previous row of distances) + // this row is A[0][i]: edit distance for an empty s + // the distance is just the number of characters to delete from t + for (size_t i = 0; i < len; i++) + v0[i] = i; - for (size_t i = 0; i < len_s; i++) { - // calculate v1 (current row distances) from the previous row v0 + for (size_t i = 0; i < len_s; i++) { + // calculate v1 (current row distances) from the previous row v0 - // first element of v1 is A[i+1][0] - // edit distance is delete (i+1) chars from s to match empty t - v1[0] = i + 1; + // first element of v1 is A[i+1][0] + // edit distance is delete (i+1) chars from s to match empty t + v1[0] = i + 1; - // use formula to fill in the rest of the row - for (size_t j = 0; j < len_t; j++) { - size_t cost = (s[i] == t[j]) ? 0 : 1; - v1[j+1] = std::min(v1[j] + 1, - std::min(v0[j + 1] + 1, - v0[j] + cost)); - } - - // copy v1 (current row) to v0 (previous row) for next iteration - for (size_t j = 0; j < len; j++) - v0[j] = v1[j]; + // use formula to fill in the rest of the row + for (size_t j = 0; j < len_t; j++) { + size_t cost = (s[i] == t[j]) ? 0 : 1; + v1[j + 1] = std::min(v1[j] + 1, std::min(v0[j + 1] + 1, v0[j] + cost)); } - return v1[len_t]; + // copy v1 (current row) to v0 (previous row) for next iteration + for (size_t j = 0; j < len; j++) + v0[j] = v1[j]; } + return v1[len_t]; } + +} // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp index 395dc4231ffc..fe4230183efa 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_decl.hpp @@ -81,25 +81,24 @@ #include "MueLu_RAPShiftFactory_fwd.hpp" #include "MueLu_RebalanceAcFactory_fwd.hpp" #include "MueLu_RebalanceTransferFactory_fwd.hpp" -#include "MueLu_RepartitionFactory_fwd.hpp" #include "MueLu_ReitzingerPFactory_fwd.hpp" +#include "MueLu_RepartitionFactory_fwd.hpp" #include "MueLu_SaPFactory_fwd.hpp" #include "MueLu_SemiCoarsenPFactory_fwd.hpp" #include "MueLu_SmootherFactory_fwd.hpp" #include "MueLu_TentativePFactory_fwd.hpp" -#include "MueLu_TogglePFactory_fwd.hpp" #include "MueLu_ToggleCoordinatesTransferFactory_fwd.hpp" +#include "MueLu_TogglePFactory_fwd.hpp" #include "MueLu_TransPFactory_fwd.hpp" #include "MueLu_UncoupledAggregationFactory_fwd.hpp" -#include "MueLu_ZoltanInterface_fwd.hpp" #include "MueLu_Zoltan2Interface_fwd.hpp" +#include "MueLu_ZoltanInterface_fwd.hpp" #ifdef HAVE_MUELU_MATLAB #include "MueLu_MatlabSmoother_fwd.hpp" -#include "MueLu_TwoLevelMatlabFactory_fwd.hpp" #include "MueLu_SingleLevelMatlabFactory_fwd.hpp" +#include "MueLu_TwoLevelMatlabFactory_fwd.hpp" #endif - #include "MueLu_CoalesceDropFactory_kokkos_fwd.hpp" #include "MueLu_NullspaceFactory_kokkos_fwd.hpp" #include "MueLu_SaPFactory_kokkos_fwd.hpp" @@ -113,182 +112,264 @@ namespace MueLu { - template - class ParameterListInterpreter : - public HierarchyManager { +template +class ParameterListInterpreter + : public HierarchyManager { #undef MUELU_PARAMETERLISTINTERPRETER_SHORT #include "MueLu_UseShortNames.hpp" - typedef std::pair keep_pair; - - public: - //! @name Constructors/Destructors - //@{ - - protected: - /*! @brief Empty constructor - * - * Constructor for derived classes - */ - ParameterListInterpreter() { - factFact_ = Teuchos::null; - facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); - } - - public: - /*! @brief Constructor that accepts a user-provided ParameterList. - - Constructor for parameter list interpreter which directly interprets Teuchos::ParameterLists - - @details The parameter list can be either in the easy parameter list format or in the factory driven parameter list format. - - @param[in] paramList (Teuchos::ParameterList): ParameterList containing the MueLu parameters - @param[in] comm (RCP >): Optional RCP of a Teuchos communicator (default: Teuchos::null) - @param[in] factFact (RCP): Optional parameter allowing to define user-specific factory interpreters for user-specific extensions of the XML interface. (default: Teuchos::null) - @param[in] facadeFact (RCP): Optional parameter containing a FacadeFactory class. The user can register its own facade classes in the FacadeFactory and provide it to the ParameterListInterpreter. (default: Teuchos::null, means, only standard FacadeClass that come with MueLu are available) - - */ - ParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm = Teuchos::null, Teuchos::RCP factFact = Teuchos::null, Teuchos::RCP facadeFact = Teuchos::null); - - /*! @brief Constructor that reads parameters from an XML file. - - XML options are converted to ParameterList entries by Teuchos. - - @param[in] xmlFileName (std::string): XML file to read - @param[in] comm (Teuchos::Comm): Teuchos communicator - @param[in] factFact (RCP): Optional parameter allowing to define user-specific factory interpreters for user-specific extensions of the XML interface. (default: Teuchos::null) - @param[in] facadeFact (RCP): Optional parameter containing a FacadeFactory class. The user can register its own facade classes in the FacadeFactory and provide it to the ParameterListInterpreter. (default: Teuchos::null, means, only standard FacadeClass that come with MueLu are available) - - */ - ParameterListInterpreter(const std::string& xmlFileName, const Teuchos::Comm& comm, Teuchos::RCP factFact = Teuchos::null, Teuchos::RCP facadeFact = Teuchos::null); - - //! Destructor. - virtual ~ParameterListInterpreter() { } - - //@} - - /*! @brief Set parameter list for Parameter list interpreter. - - The routine checks whether it is a parameter list in the easy parameter format or the more advanced factory-based parameter format and calls the corresponding interpreter routine. - - When finished, the parameter list is set that will used by the hierarchy build phase. - - This method includes validation and some pre-parsing of the list for: - - verbosity level - - data to export - - cycle type - - max coarse size - - max levels - - number of equations - - @param[in] paramList: ParameterList containing the MueLu parameters. - */ - void SetParameterList(const Teuchos::ParameterList& paramList); - - //! Call the SetupHierarchy routine from the HiearchyManager object. - void SetupHierarchy(Hierarchy& H) const; - - private: - //! Setup Operator object - virtual void SetupOperator(Operator& A) const; - - int blockSize_; ///< block size of matrix (fixed block size) - CycleType Cycle_; ///< multigrid cycle type (V-cycle or W-cycle) - int WCycleStartLevel_; ///< in case of W-cycle, level on which cycle should start - double scalingFactor_; ///< prolongator scaling factor - GlobalOrdinal dofOffset_; ///< global offset variable describing offset of DOFs in operator - - //! Easy interpreter stuff - //@{ - // These three variables are only needed to print out proper [default] - bool changedPRrebalance_; - bool changedPRViaCopyrebalance_; - bool changedImplicitTranspose_; - - void SetEasyParameterList(const Teuchos::ParameterList& paramList); - void Validate(const Teuchos::ParameterList& paramList) const; - - void UpdateFactoryManager(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - - // "Generic components" for UpdateFactoryManager - void UpdateFactoryManager_Smoothers(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_CoarseSolvers(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Aggregation_TentativeP(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Restriction(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_RAP(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Coordinates(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Repartition(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps, RCP & nullSpaceFactory) const; - void UpdateFactoryManager_LowPrecision(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Nullspace(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps, RCP & nullSpaceFactory) const; - void UpdateFactoryManager_BlockNumber(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, - FactoryManager& manager,int levelID, std::vector& keeps) const; - void UpdateFactoryManager_LocalOrdinalTransfer(const std::string& VarName, const std::string& multigridAlgo, Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, - FactoryManager& manager,int levelID, std::vector& keeps) const; - - // Algorithm-specific components for UpdateFactoryManager - void UpdateFactoryManager_SemiCoarsen(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_PCoarsen(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_SA(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Reitzinger(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Emin(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_PG(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Replicate(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Combine(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - void UpdateFactoryManager_Matlab(Teuchos::ParameterList& paramList, const Teuchos::ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const; - - - - - bool useCoordinates_; - bool useBlockNumber_; - bool useKokkos_; - //@} - - //! Factory interpreter stuff - // TODO: - // - parameter list validator - // - SetParameterList - // - Set/Get directly Level manager - // - build per level - // - comments/docs - // - use FactoryManager instead of FactoryMap - //@{ - void SetFactoryParameterList(const Teuchos::ParameterList& paramList); - - typedef std::map > FactoryMap; //TODO: remove this line - typedef std::map > FactoryManagerMap; - - void BuildFactoryMap(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, FactoryMap& factoryMapOut, FactoryManagerMap& factoryManagers) const; - - //! Internal factory for factories - Teuchos::RCP factFact_; - - //! FacadeClass factory - Teuchos::RCP > facadeFact_; - - //@} - }; + typedef std::pair keep_pair; + +public: + //! @name Constructors/Destructors + //@{ + +protected: + /*! @brief Empty constructor + * + * Constructor for derived classes + */ + ParameterListInterpreter() { + factFact_ = Teuchos::null; + facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); + } + +public: + /*! @brief Constructor that accepts a user-provided ParameterList. + + Constructor for parameter list interpreter which directly interprets + Teuchos::ParameterLists + + @details The parameter list can be either in the easy parameter list + format or in the factory driven parameter list format. + + @param[in] paramList (Teuchos::ParameterList): ParameterList containing + the MueLu parameters + @param[in] comm (RCP >): Optional RCP of a Teuchos + communicator (default: Teuchos::null) + @param[in] factFact (RCP): Optional parameter allowing to + define user-specific factory interpreters for user-specific extensions of + the XML interface. (default: Teuchos::null) + @param[in] facadeFact (RCP): Optional parameter containing + a FacadeFactory class. The user can register its own facade classes in the + FacadeFactory and provide it to the ParameterListInterpreter. (default: + Teuchos::null, means, only standard FacadeClass that come with MueLu are + available) + + */ + ParameterListInterpreter( + Teuchos::ParameterList ¶mList, + Teuchos::RCP> comm = Teuchos::null, + Teuchos::RCP factFact = Teuchos::null, + Teuchos::RCP facadeFact = Teuchos::null); + + /*! @brief Constructor that reads parameters from an XML file. + + XML options are converted to ParameterList entries by Teuchos. + + @param[in] xmlFileName (std::string): XML file to read + @param[in] comm (Teuchos::Comm): Teuchos communicator + @param[in] factFact (RCP): Optional parameter allowing to + define user-specific factory interpreters for user-specific extensions of + the XML interface. (default: Teuchos::null) + @param[in] facadeFact (RCP): Optional parameter containing + a FacadeFactory class. The user can register its own facade classes in the + FacadeFactory and provide it to the ParameterListInterpreter. (default: + Teuchos::null, means, only standard FacadeClass that come with MueLu are + available) + + */ + ParameterListInterpreter( + const std::string &xmlFileName, const Teuchos::Comm &comm, + Teuchos::RCP factFact = Teuchos::null, + Teuchos::RCP facadeFact = Teuchos::null); + + //! Destructor. + virtual ~ParameterListInterpreter() {} + + //@} + + /*! @brief Set parameter list for Parameter list interpreter. + + The routine checks whether it is a parameter list in the easy parameter + format or the more advanced factory-based parameter format and calls the + corresponding interpreter routine. + + When finished, the parameter list is set that will used by the hierarchy + build phase. + + This method includes validation and some pre-parsing of the list for: + - verbosity level + - data to export + - cycle type + - max coarse size + - max levels + - number of equations + + @param[in] paramList: ParameterList containing the MueLu parameters. + */ + void SetParameterList(const Teuchos::ParameterList ¶mList); + + //! Call the SetupHierarchy routine from the HiearchyManager object. + void SetupHierarchy(Hierarchy &H) const; + +private: + //! Setup Operator object + virtual void SetupOperator(Operator &A) const; + + int blockSize_; ///< block size of matrix (fixed block size) + CycleType Cycle_; ///< multigrid cycle type (V-cycle or W-cycle) + int WCycleStartLevel_; ///< in case of W-cycle, level on which cycle should + ///< start + double scalingFactor_; ///< prolongator scaling factor + GlobalOrdinal dofOffset_; ///< global offset variable describing offset of + ///< DOFs in operator + + //! Easy interpreter stuff + //@{ + // These three variables are only needed to print out proper [default] + bool changedPRrebalance_; + bool changedPRViaCopyrebalance_; + bool changedImplicitTranspose_; + + void SetEasyParameterList(const Teuchos::ParameterList ¶mList); + void Validate(const Teuchos::ParameterList ¶mList) const; + + void UpdateFactoryManager(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + + // "Generic components" for UpdateFactoryManager + void UpdateFactoryManager_Smoothers(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void + UpdateFactoryManager_CoarseSolvers(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_Aggregation_TentativeP( + Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, FactoryManager &manager, + int levelID, std::vector &keeps) const; + void + UpdateFactoryManager_Restriction(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_RAP(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void + UpdateFactoryManager_Coordinates(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void + UpdateFactoryManager_Repartition(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps, + RCP &nullSpaceFactory) const; + void UpdateFactoryManager_LowPrecision(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_Nullspace(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps, + RCP &nullSpaceFactory) const; + void + UpdateFactoryManager_BlockNumber(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_LocalOrdinalTransfer( + const std::string &VarName, const std::string &multigridAlgo, + Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, FactoryManager &manager, + int levelID, std::vector &keeps) const; + + // Algorithm-specific components for UpdateFactoryManager + void + UpdateFactoryManager_SemiCoarsen(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_PCoarsen(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_SA(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void + UpdateFactoryManager_Reitzinger(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_Emin(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_PG(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_Replicate(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_Combine(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + void UpdateFactoryManager_Matlab(Teuchos::ParameterList ¶mList, + const Teuchos::ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const; + + bool useCoordinates_; + bool useBlockNumber_; + bool useKokkos_; + //@} + + //! Factory interpreter stuff + // TODO: + // - parameter list validator + // - SetParameterList + // - Set/Get directly Level manager + // - build per level + // - comments/docs + // - use FactoryManager instead of FactoryMap + //@{ + void SetFactoryParameterList(const Teuchos::ParameterList ¶mList); + + typedef std::map> + FactoryMap; // TODO: remove this line + typedef std::map> FactoryManagerMap; + + void BuildFactoryMap(const Teuchos::ParameterList ¶mList, + const FactoryMap &factoryMapIn, + FactoryMap &factoryMapOut, + FactoryManagerMap &factoryManagers) const; + + //! Internal factory for factories + Teuchos::RCP factFact_; + + //! FacadeClass factory + Teuchos::RCP< + MueLu::FacadeClassFactory> + facadeFact_; + + //@} +}; } // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp index ccf340ca11ed..1cfc7e718c8a 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp @@ -54,18 +54,19 @@ #include "MueLu_ParameterListInterpreter_decl.hpp" -#include "MueLu_MasterList.hpp" -#include "MueLu_Level.hpp" -#include "MueLu_Hierarchy.hpp" #include "MueLu_FactoryManager.hpp" +#include "MueLu_Hierarchy.hpp" +#include "MueLu_Level.hpp" +#include "MueLu_MasterList.hpp" -#include "MueLu_AggregationExportFactory.hpp" #include "MueLu_AggregateQualityEstimateFactory.hpp" +#include "MueLu_AggregationExportFactory.hpp" #include "MueLu_BrickAggregationFactory.hpp" #include "MueLu_ClassicalMapFactory.hpp" #include "MueLu_ClassicalPFactory.hpp" #include "MueLu_CoalesceDropFactory.hpp" #include "MueLu_CoarseMapFactory.hpp" +#include "MueLu_CombinePFactory.hpp" #include "MueLu_ConstraintFactory.hpp" #include "MueLu_CoordinatesTransferFactory.hpp" #include "MueLu_DirectSolver.hpp" @@ -78,32 +79,31 @@ #include "MueLu_InitialBlockNumberFactory.hpp" #include "MueLu_LineDetectionFactory.hpp" #include "MueLu_LocalOrdinalTransferFactory.hpp" +#include "MueLu_LowPrecisionFactory.hpp" +#include "MueLu_NodePartitionInterface.hpp" #include "MueLu_NotayAggregationFactory.hpp" #include "MueLu_NullspaceFactory.hpp" #include "MueLu_PatternFactory.hpp" -#include "MueLu_ReplicatePFactory.hpp" -#include "MueLu_CombinePFactory.hpp" #include "MueLu_PgPFactory.hpp" #include "MueLu_RAPFactory.hpp" #include "MueLu_RAPShiftFactory.hpp" #include "MueLu_RebalanceAcFactory.hpp" #include "MueLu_RebalanceTransferFactory.hpp" -#include "MueLu_RepartitionFactory.hpp" #include "MueLu_ReitzingerPFactory.hpp" +#include "MueLu_RepartitionFactory.hpp" +#include "MueLu_ReplicatePFactory.hpp" #include "MueLu_SaPFactory.hpp" #include "MueLu_ScaledNullspaceFactory.hpp" #include "MueLu_SemiCoarsenPFactory.hpp" -#include "MueLu_SmootherFactory.hpp" #include "MueLu_SmooVecCoalesceDropFactory.hpp" +#include "MueLu_SmootherFactory.hpp" #include "MueLu_TentativePFactory.hpp" -#include "MueLu_TogglePFactory.hpp" #include "MueLu_ToggleCoordinatesTransferFactory.hpp" +#include "MueLu_TogglePFactory.hpp" #include "MueLu_TransPFactory.hpp" #include "MueLu_UncoupledAggregationFactory.hpp" -#include "MueLu_ZoltanInterface.hpp" #include "MueLu_Zoltan2Interface.hpp" -#include "MueLu_NodePartitionInterface.hpp" -#include "MueLu_LowPrecisionFactory.hpp" +#include "MueLu_ZoltanInterface.hpp" #include "MueLu_CoalesceDropFactory_kokkos.hpp" #include "MueLu_NullspaceFactory_kokkos.hpp" @@ -115,10 +115,10 @@ #ifdef HAVE_MUELU_MATLAB #include "../matlab/src/MueLu_MatlabSmoother_decl.hpp" #include "../matlab/src/MueLu_MatlabSmoother_def.hpp" -#include "../matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp" -#include "../matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp" #include "../matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp" #include "../matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp" +#include "../matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp" +#include "../matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp" #endif #ifdef HAVE_MUELU_INTREPID2 @@ -129,2080 +129,2764 @@ namespace MueLu { - template - ParameterListInterpreter::ParameterListInterpreter(ParameterList& paramList, Teuchos::RCP > comm, Teuchos::RCP factFact, Teuchos::RCP facadeFact) : factFact_(factFact) { - RCP tM = rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(std::string("MueLu: ParameterListInterpreter (ParameterList)")))); - if(facadeFact == Teuchos::null) - facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); - else - facadeFact_ = facadeFact; - - if (paramList.isParameter("xml parameter file")) { - std::string filename = paramList.get("xml parameter file", ""); - if (filename.length() != 0) { - TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); - - ParameterList paramList2 = paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2), *comm); - SetParameterList(paramList2); - - } else { - SetParameterList(paramList); - } +template +ParameterListInterpreter:: + ParameterListInterpreter(ParameterList ¶mList, + Teuchos::RCP> comm, + Teuchos::RCP factFact, + Teuchos::RCP facadeFact) + : factFact_(factFact) { + RCP tM = + rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer( + std::string("MueLu: ParameterListInterpreter (ParameterList)")))); + if (facadeFact == Teuchos::null) + facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); + else + facadeFact_ = facadeFact; + + if (paramList.isParameter("xml parameter file")) { + std::string filename = paramList.get("xml parameter file", ""); + if (filename.length() != 0) { + TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, + "xml parameter file requires a valid comm"); + + ParameterList paramList2 = paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast( + filename, Teuchos::Ptr(¶mList2), *comm); + SetParameterList(paramList2); } else { SetParameterList(paramList); } - } - - template - ParameterListInterpreter::ParameterListInterpreter(const std::string& xmlFileName, const Teuchos::Comm& comm, Teuchos::RCP factFact, Teuchos::RCP facadeFact) : factFact_(factFact) { - RCP tM = rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(std::string("MueLu: ParameterListInterpreter (XML)")))); - if(facadeFact == Teuchos::null) - facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); - else - facadeFact_ = facadeFact; - ParameterList paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr(¶mList), comm); + } else { SetParameterList(paramList); } - - template - void ParameterListInterpreter::SetParameterList(const ParameterList& paramList) { - Cycle_ = Hierarchy::GetDefaultCycle(); - WCycleStartLevel_ = Hierarchy::GetDefaultCycleStartLevel(); - scalingFactor_= Teuchos::ScalarTraits::one(); - blockSize_ = 1; - dofOffset_ = 0; - - if (paramList.isSublist("Hierarchy")) { - SetFactoryParameterList(paramList); - - } else if (paramList.isParameter("MueLu preconditioner") == true) { - this->GetOStream(Runtime0) << "Use facade class: " << paramList.get("MueLu preconditioner") << std::endl; - Teuchos::RCP pp = facadeFact_->SetParameterList(paramList); - SetFactoryParameterList(*pp); - - } else { - // The validator doesn't work correctly for non-serializable data (Hint: template parameters), so strip it out - ParameterList serialList, nonSerialList; - - ExtractNonSerializableData(paramList, serialList, nonSerialList); - Validate(serialList); - SetEasyParameterList(paramList); - } +} + +template +ParameterListInterpreter:: + ParameterListInterpreter(const std::string &xmlFileName, + const Teuchos::Comm &comm, + Teuchos::RCP factFact, + Teuchos::RCP facadeFact) + : factFact_(factFact) { + RCP tM = + rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer( + std::string("MueLu: ParameterListInterpreter (XML)")))); + if (facadeFact == Teuchos::null) + facadeFact_ = Teuchos::rcp(new FacadeClassFactory()); + else + facadeFact_ = facadeFact; + + ParameterList paramList; + Teuchos::updateParametersFromXmlFileAndBroadcast( + xmlFileName, Teuchos::Ptr(¶mList), comm); + SetParameterList(paramList); +} + +template +void ParameterListInterpreter:: + SetParameterList(const ParameterList ¶mList) { + Cycle_ = Hierarchy::GetDefaultCycle(); + WCycleStartLevel_ = Hierarchy::GetDefaultCycleStartLevel(); + scalingFactor_ = Teuchos::ScalarTraits::one(); + blockSize_ = 1; + dofOffset_ = 0; + + if (paramList.isSublist("Hierarchy")) { + SetFactoryParameterList(paramList); + + } else if (paramList.isParameter("MueLu preconditioner") == true) { + this->GetOStream(Runtime0) + << "Use facade class: " + << paramList.get("MueLu preconditioner") << std::endl; + Teuchos::RCP pp = facadeFact_->SetParameterList(paramList); + SetFactoryParameterList(*pp); + + } else { + // The validator doesn't work correctly for non-serializable data (Hint: + // template parameters), so strip it out + ParameterList serialList, nonSerialList; + + ExtractNonSerializableData(paramList, serialList, nonSerialList); + Validate(serialList); + SetEasyParameterList(paramList); + } +} + +// ===================================================================================================== +// ====================================== EASY interpreter +// ============================================= +// ===================================================================================================== +//! Helper functions to compare two paramter lists +static inline bool areSame(const ParameterList &list1, + const ParameterList &list2); + +// Get value from one of the lists, or set it to default +// Use case: check for a parameter value in a level-specific sublist, then in a +// root level list; if it is absent from both, set it to default +#define MUELU_SET_VAR_2LIST(paramList, defaultList, paramName, paramType, \ + varName) \ + paramType varName; \ + if (paramList.isParameter(paramName)) \ + varName = paramList.get(paramName); \ + else if (defaultList.isParameter(paramName)) \ + varName = defaultList.get(paramName); \ + else \ + varName = MasterList::getDefault(paramName); + +#define MUELU_TEST_AND_SET_VAR(paramList, paramName, paramType, varName) \ + (paramList.isParameter(paramName) \ + ? varName = paramList.get(paramName), \ + true : false) + +// Set parameter in a list if it is present in any of two lists +// User case: set factory specific parameter, first checking for a +// level-specific value, then cheking root level value +#define MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, paramName, \ + paramType, listWrite) \ + try { \ + if (paramList.isParameter(paramName)) \ + listWrite.set(paramName, paramList.get(paramName)); \ + else if (defaultList.isParameter(paramName)) \ + listWrite.set(paramName, defaultList.get(paramName)); \ + } catch (Teuchos::Exceptions::InvalidParameterType &) { \ + TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG( \ + true, Teuchos::Exceptions::InvalidParameterType, \ + "Error: parameter \"" << paramName << "\" must be of type " \ + << Teuchos::TypeNameTraits::name()); \ } - // ===================================================================================================== - // ====================================== EASY interpreter ============================================= - // ===================================================================================================== - //! Helper functions to compare two paramter lists - static inline bool areSame(const ParameterList& list1, const ParameterList& list2); - - // Get value from one of the lists, or set it to default - // Use case: check for a parameter value in a level-specific sublist, then in a root level list; - // if it is absent from both, set it to default -#define MUELU_SET_VAR_2LIST(paramList, defaultList, paramName, paramType, varName) \ - paramType varName; \ - if (paramList.isParameter(paramName)) varName = paramList.get(paramName); \ - else if (defaultList.isParameter(paramName)) varName = defaultList.get(paramName); \ - else varName = MasterList::getDefault(paramName); - -#define MUELU_TEST_AND_SET_VAR(paramList, paramName, paramType, varName) \ - (paramList.isParameter(paramName) ? varName = paramList.get(paramName), true : false) - - // Set parameter in a list if it is present in any of two lists - // User case: set factory specific parameter, first checking for a level-specific value, then cheking root level value -#define MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, paramName, paramType, listWrite) \ - try { \ - if (paramList .isParameter(paramName)) listWrite.set(paramName, paramList .get(paramName)); \ - else if (defaultList.isParameter(paramName)) listWrite.set(paramName, defaultList.get(paramName)); \ - } \ - catch(Teuchos::Exceptions::InvalidParameterType&) { \ - TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG(true, Teuchos::Exceptions::InvalidParameterType, \ - "Error: parameter \"" << paramName << "\" must be of type " << Teuchos::TypeNameTraits::name()); \ - } \ - -#define MUELU_TEST_PARAM_2LIST(paramList, defaultList, paramName, paramType, cmpValue) \ - (cmpValue == ( \ - paramList.isParameter(paramName) ? paramList .get(paramName) : ( \ - defaultList.isParameter(paramName) ? defaultList.get(paramName) : \ - MasterList::getDefault(paramName) ) ) ) - -#define MUELU_KOKKOS_FACTORY(varName, oldFactory, newFactory) \ - RCP varName; \ - if (!useKokkos_) varName = rcp(new oldFactory()); \ - else varName = rcp(new newFactory()); -#define MUELU_KOKKOS_FACTORY_NO_DECL(varName, oldFactory, newFactory) \ - if (!useKokkos_) varName = rcp(new oldFactory()); \ - else varName = rcp(new newFactory()); - - template - void ParameterListInterpreter:: - SetEasyParameterList(const ParameterList& constParamList) { - ParameterList paramList; - - MUELU_SET_VAR_2LIST(constParamList, constParamList, "problem: type", std::string, problemType); - if (problemType != "unknown") { - paramList = *MasterList::GetProblemSpecificList(problemType); - paramList.setParameters(constParamList); - } else { - // Create a non const copy of the parameter list - // Working with a modifiable list is much much easier than with original one - paramList = constParamList; - } - - // Check for Kokkos -# ifdef HAVE_MUELU_SERIAL - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSerialWrapperNode).name()) - useKokkos_ = false; -# endif -# ifdef HAVE_MUELU_OPENMP - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosOpenMPWrapperNode).name()) - useKokkos_ = true; -# endif -# ifdef HAVE_MUELU_CUDA - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) - useKokkos_ = true; -# endif -# ifdef HAVE_MUELU_HIP - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) - useKokkos_ = true; -# endif -# ifdef HAVE_MUELU_SYCL - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) - useKokkos_ = true; -# endif - (void)MUELU_TEST_AND_SET_VAR(paramList, "use kokkos refactor", bool, useKokkos_); - - // Check for timer synchronization - MUELU_SET_VAR_2LIST(paramList, paramList, "synchronize factory timers", bool, syncTimers); - if (syncTimers) - Factory::EnableTimerSync(); - - // Translate cycle type parameter - if (paramList.isParameter("cycle type")) { - std::map cycleMap; - cycleMap["V"] = VCYCLE; - cycleMap["W"] = WCYCLE; - - auto cycleType = paramList.get("cycle type"); - TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, Exceptions::RuntimeError, - "Invalid cycle type: \"" << cycleType << "\""); - Cycle_ = cycleMap[cycleType]; - } - - if (paramList.isParameter("W cycle start level")) { - WCycleStartLevel_ = paramList.get("W cycle start level"); - } - - if (paramList.isParameter("coarse grid correction scaling factor")) - scalingFactor_ = paramList.get("coarse grid correction scaling factor"); - - this->maxCoarseSize_ = paramList.get ("coarse: max size", MasterList::getDefault("coarse: max size")); - this->numDesiredLevel_ = paramList.get ("max levels", MasterList::getDefault("max levels")); - blockSize_ = paramList.get ("number of equations", MasterList::getDefault("number of equations")); - - - (void)MUELU_TEST_AND_SET_VAR(paramList, "debug: graph level", int, this->graphOutputLevel_); - - // Generic data saving (this saves the data on all levels) - if(paramList.isParameter("save data")) - this->dataToSave_ = Teuchos::getArrayFromStringParameter(paramList,"save data"); +#define MUELU_TEST_PARAM_2LIST(paramList, defaultList, paramName, paramType, \ + cmpValue) \ + (cmpValue == (paramList.isParameter(paramName) \ + ? paramList.get(paramName) \ + : (defaultList.isParameter(paramName) \ + ? defaultList.get(paramName) \ + : MasterList::getDefault(paramName)))) + +#define MUELU_KOKKOS_FACTORY(varName, oldFactory, newFactory) \ + RCP varName; \ + if (!useKokkos_) \ + varName = rcp(new oldFactory()); \ + else \ + varName = rcp(new newFactory()); +#define MUELU_KOKKOS_FACTORY_NO_DECL(varName, oldFactory, newFactory) \ + if (!useKokkos_) \ + varName = rcp(new oldFactory()); \ + else \ + varName = rcp(new newFactory()); + +template +void ParameterListInterpreter:: + SetEasyParameterList(const ParameterList &constParamList) { + ParameterList paramList; + + MUELU_SET_VAR_2LIST(constParamList, constParamList, "problem: type", + std::string, problemType); + if (problemType != "unknown") { + paramList = *MasterList::GetProblemSpecificList(problemType); + paramList.setParameters(constParamList); + } else { + // Create a non const copy of the parameter list + // Working with a modifiable list is much much easier than with original one + paramList = constParamList; + } - // Save level data - if (paramList.isSublist("export data")) { - ParameterList printList = paramList.sublist("export data"); + // Check for Kokkos +#ifdef HAVE_MUELU_SERIAL + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosSerialWrapperNode).name()) + useKokkos_ = false; +#endif +#ifdef HAVE_MUELU_OPENMP + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosOpenMPWrapperNode).name()) + useKokkos_ = true; +#endif +#ifdef HAVE_MUELU_CUDA + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) + useKokkos_ = true; +#endif +#ifdef HAVE_MUELU_HIP + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) + useKokkos_ = true; +#endif +#ifdef HAVE_MUELU_SYCL + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) + useKokkos_ = true; +#endif + (void)MUELU_TEST_AND_SET_VAR(paramList, "use kokkos refactor", bool, + useKokkos_); + + // Check for timer synchronization + MUELU_SET_VAR_2LIST(paramList, paramList, "synchronize factory timers", bool, + syncTimers); + if (syncTimers) + Factory::EnableTimerSync(); + + // Translate cycle type parameter + if (paramList.isParameter("cycle type")) { + std::map cycleMap; + cycleMap["V"] = VCYCLE; + cycleMap["W"] = WCYCLE; + + auto cycleType = paramList.get("cycle type"); + TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, + Exceptions::RuntimeError, + "Invalid cycle type: \"" << cycleType << "\""); + Cycle_ = cycleMap[cycleType]; + } - // Vectors, aggregates and other things that need special handling - if (printList.isParameter("Nullspace")) - this->nullspaceToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Nullspace"); - if (printList.isParameter("Coordinates")) - this->coordinatesToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Coordinates"); - if (printList.isParameter("Aggregates")) - this->aggregatesToPrint_ = Teuchos::getArrayFromStringParameter(printList, "Aggregates"); - if (printList.isParameter("pcoarsen: element to node map")) - this->elementToNodeMapsToPrint_ = Teuchos::getArrayFromStringParameter(printList, "pcoarsen: element to node map"); + if (paramList.isParameter("W cycle start level")) { + WCycleStartLevel_ = paramList.get("W cycle start level"); + } - // If we asked for an arbitrary matrix to be printed, we do that here - for(auto iter = printList.begin(); iter != printList.end(); iter++) { - const std::string & name = printList.name(iter); - // Ignore the special cases - if(name == "Nullspace" || name == "Coordinates" || name == "Aggregates" || name == "pcoarsen: element to node map") - continue; + if (paramList.isParameter("coarse grid correction scaling factor")) + scalingFactor_ = + paramList.get("coarse grid correction scaling factor"); + + this->maxCoarseSize_ = paramList.get( + "coarse: max size", MasterList::getDefault("coarse: max size")); + this->numDesiredLevel_ = paramList.get( + "max levels", MasterList::getDefault("max levels")); + blockSize_ = + paramList.get("number of equations", + MasterList::getDefault("number of equations")); + + (void)MUELU_TEST_AND_SET_VAR(paramList, "debug: graph level", int, + this->graphOutputLevel_); + + // Generic data saving (this saves the data on all levels) + if (paramList.isParameter("save data")) + this->dataToSave_ = Teuchos::getArrayFromStringParameter( + paramList, "save data"); + + // Save level data + if (paramList.isSublist("export data")) { + ParameterList printList = paramList.sublist("export data"); + + // Vectors, aggregates and other things that need special handling + if (printList.isParameter("Nullspace")) + this->nullspaceToPrint_ = + Teuchos::getArrayFromStringParameter(printList, "Nullspace"); + if (printList.isParameter("Coordinates")) + this->coordinatesToPrint_ = + Teuchos::getArrayFromStringParameter(printList, "Coordinates"); + if (printList.isParameter("Aggregates")) + this->aggregatesToPrint_ = + Teuchos::getArrayFromStringParameter(printList, "Aggregates"); + if (printList.isParameter("pcoarsen: element to node map")) + this->elementToNodeMapsToPrint_ = + Teuchos::getArrayFromStringParameter( + printList, "pcoarsen: element to node map"); + + // If we asked for an arbitrary matrix to be printed, we do that here + for (auto iter = printList.begin(); iter != printList.end(); iter++) { + const std::string &name = printList.name(iter); + // Ignore the special cases + if (name == "Nullspace" || name == "Coordinates" || + name == "Aggregates" || name == "pcoarsen: element to node map") + continue; - this->matricesToPrint_[name] = Teuchos::getArrayFromStringParameter(printList, name); - } + this->matricesToPrint_[name] = + Teuchos::getArrayFromStringParameter(printList, name); } + } - // Set verbosity parameter - VerbLevel oldVerbLevel = VerboseObject::GetDefaultVerbLevel(); - { - MUELU_SET_VAR_2LIST(paramList, paramList, "verbosity", std::string, verbosityLevel); - this->verbosity_ = toVerbLevel(verbosityLevel); - VerboseObject::SetDefaultVerbLevel(this->verbosity_); - } + // Set verbosity parameter + VerbLevel oldVerbLevel = VerboseObject::GetDefaultVerbLevel(); + { + MUELU_SET_VAR_2LIST(paramList, paramList, "verbosity", std::string, + verbosityLevel); + this->verbosity_ = toVerbLevel(verbosityLevel); + VerboseObject::SetDefaultVerbLevel(this->verbosity_); + } - MUELU_SET_VAR_2LIST(paramList, paramList, "output filename", std::string, outputFilename); - if (outputFilename != "") - VerboseObject::SetMueLuOFileStream(outputFilename); - - // Detect if we need to transfer coordinates to coarse levels. We do that iff - // - we use "distance laplacian" dropping on some level, or - // - we use a repartitioner on some level that needs coordinates - // - we use brick aggregation - // - we use Ifpack2 line partitioner - // This is not ideal, as we may have "repartition: enable" turned on by default - // and not present in the list, but it is better than nothing. - useCoordinates_ = false; - useBlockNumber_ = false; - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "distance laplacian") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: type", std::string, "brick") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: export visualization data", bool, true)) { + MUELU_SET_VAR_2LIST(paramList, paramList, "output filename", std::string, + outputFilename); + if (outputFilename != "") + VerboseObject::SetMueLuOFileStream(outputFilename); + + // Detect if we need to transfer coordinates to coarse levels. We do that iff + // - we use "distance laplacian" dropping on some level, or + // - we use a repartitioner on some level that needs coordinates + // - we use brick aggregation + // - we use Ifpack2 line partitioner + // This is not ideal, as we may have "repartition: enable" turned on by + // default and not present in the list, but it is better than nothing. + useCoordinates_ = false; + useBlockNumber_ = false; + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", + std::string, "distance laplacian") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: type", + std::string, "brick") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, + "aggregation: export visualization data", bool, + true)) { + useCoordinates_ = true; + } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, + "aggregation: drop scheme", std::string, + "block diagonal distance laplacian")) { + useCoordinates_ = true; + useBlockNumber_ = true; + } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, + "aggregation: drop scheme", std::string, + "block diagonal") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, + "aggregation: drop scheme", std::string, + "block diagonal classical") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, + "aggregation: drop scheme", std::string, + "block diagonal signed classical") || + MUELU_TEST_PARAM_2LIST( + paramList, paramList, "aggregation: drop scheme", std::string, + "block diagonal colored signed classical") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, + "aggregation: drop scheme", std::string, + "signed classical")) { + useBlockNumber_ = true; + } else if (paramList.isSublist("smoother: params")) { + const auto smooParamList = paramList.sublist("smoother: params"); + if (smooParamList.isParameter("partitioner: type") && + (smooParamList.get("partitioner: type") == "line")) { useCoordinates_ = true; - } else if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal distance laplacian")) { - useCoordinates_ = true; - useBlockNumber_ = true; - } else if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal signed classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal colored signed classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "signed classical")) { - useBlockNumber_ = true; - } else if(paramList.isSublist("smoother: params")) { - const auto smooParamList = paramList.sublist("smoother: params"); - if(smooParamList.isParameter("partitioner: type") && - (smooParamList.get("partitioner: type") == "line")) { - useCoordinates_ = true; - } - } else { - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - std::string levelStr = "level " + toString(levelID); - - if (paramList.isSublist(levelStr)) { - const ParameterList& levelList = paramList.sublist(levelStr); + } + } else { + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + std::string levelStr = "level " + toString(levelID); - if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "distance laplacian") || - MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: type", std::string, "brick") || - MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: export visualization data", bool, true)) { - useCoordinates_ = true; - } - else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal distance laplacian")) { - useCoordinates_ = true; - useBlockNumber_ = true; - } - else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal") || - MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: drop scheme", std::string, "block diagonal classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal signed classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "block diagonal colored signed classical") || - MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "signed classical")) { - useBlockNumber_ = true; - } + if (paramList.isSublist(levelStr)) { + const ParameterList &levelList = paramList.sublist(levelStr); + + if (MUELU_TEST_PARAM_2LIST(levelList, paramList, + "aggregation: drop scheme", std::string, + "distance laplacian") || + MUELU_TEST_PARAM_2LIST(levelList, paramList, "aggregation: type", + std::string, "brick") || + MUELU_TEST_PARAM_2LIST(levelList, paramList, + "aggregation: export visualization data", + bool, true)) { + useCoordinates_ = true; + } else if (MUELU_TEST_PARAM_2LIST( + levelList, paramList, "aggregation: drop scheme", + std::string, "block diagonal distance laplacian")) { + useCoordinates_ = true; + useBlockNumber_ = true; + } else if (MUELU_TEST_PARAM_2LIST(levelList, paramList, + "aggregation: drop scheme", + std::string, "block diagonal") || + MUELU_TEST_PARAM_2LIST( + levelList, paramList, "aggregation: drop scheme", + std::string, "block diagonal classical") || + MUELU_TEST_PARAM_2LIST( + paramList, paramList, "aggregation: drop scheme", + std::string, "block diagonal signed classical") || + MUELU_TEST_PARAM_2LIST( + paramList, paramList, "aggregation: drop scheme", + std::string, + "block diagonal colored signed classical") || + MUELU_TEST_PARAM_2LIST(paramList, paramList, + "aggregation: drop scheme", + std::string, "signed classical")) { + useBlockNumber_ = true; } } } + } - if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, true)) { - // We don't need coordinates if we're doing the in-place restriction - if(MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: use subcommunicators", bool, true) && - MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: use subcommunicators in place", bool, true)) { - // do nothing --- these don't need coordinates - } else if (!paramList.isSublist("repartition: params")) { - useCoordinates_ = true; - } else { - const ParameterList& repParams = paramList.sublist("repartition: params"); - if (repParams.isType("algorithm")) { - const std::string algo = repParams.get("algorithm"); - if (algo == "multijagged" || algo == "rcb") { - useCoordinates_ = true; - } - } else { + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, + true)) { + // We don't need coordinates if we're doing the in-place restriction + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, + "repartition: use subcommunicators", bool, + true) && + MUELU_TEST_PARAM_2LIST(paramList, paramList, + "repartition: use subcommunicators in place", + bool, true)) { + // do nothing --- these don't need coordinates + } else if (!paramList.isSublist("repartition: params")) { + useCoordinates_ = true; + } else { + const ParameterList &repParams = paramList.sublist("repartition: params"); + if (repParams.isType("algorithm")) { + const std::string algo = repParams.get("algorithm"); + if (algo == "multijagged" || algo == "rcb") { useCoordinates_ = true; } + } else { + useCoordinates_ = true; } } - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - std::string levelStr = "level " + toString(levelID); + } + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + std::string levelStr = "level " + toString(levelID); - if (paramList.isSublist(levelStr)) { - const ParameterList& levelList = paramList.sublist(levelStr); + if (paramList.isSublist(levelStr)) { + const ParameterList &levelList = paramList.sublist(levelStr); - if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "repartition: enable", bool, true)) { - if (!levelList.isSublist("repartition: params")) { - useCoordinates_ = true; - break; - } else { - const ParameterList& repParams = levelList.sublist("repartition: params"); - if (repParams.isType("algorithm")) { - const std::string algo = repParams.get("algorithm"); - if (algo == "multijagged" || algo == "rcb"){ - useCoordinates_ = true; - break; - } - } else { + if (MUELU_TEST_PARAM_2LIST(levelList, paramList, "repartition: enable", + bool, true)) { + if (!levelList.isSublist("repartition: params")) { + useCoordinates_ = true; + break; + } else { + const ParameterList &repParams = + levelList.sublist("repartition: params"); + if (repParams.isType("algorithm")) { + const std::string algo = repParams.get("algorithm"); + if (algo == "multijagged" || algo == "rcb") { useCoordinates_ = true; break; } + } else { + useCoordinates_ = true; + break; } } } } + } - // Detect if we do implicit P and R rebalance - changedPRrebalance_ = false; - changedPRViaCopyrebalance_ = false; - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, true)) { - changedPRrebalance_ = MUELU_TEST_AND_SET_VAR(paramList, "repartition: rebalance P and R", bool, this->doPRrebalance_); - changedPRViaCopyrebalance_ = MUELU_TEST_AND_SET_VAR(paramList,"repartition: explicit via new copy rebalance P and R", bool, this->doPRViaCopyrebalance_); - } - - // Detect if we use implicit transpose - changedImplicitTranspose_ = MUELU_TEST_AND_SET_VAR(paramList, "transpose: use implicit", bool, this->implicitTranspose_); - - // Detect if we use fuse prolongation and update - (void)MUELU_TEST_AND_SET_VAR(paramList, "fuse prolongation and update", bool, this->fuseProlongationAndUpdate_); - - // Detect if we suppress the dimension check of the user-given nullspace - (void)MUELU_TEST_AND_SET_VAR(paramList, "nullspace: suppress dimension check", bool, this->suppressNullspaceDimensionCheck_); - - if (paramList.isSublist("matvec params")) - this->matvecParams_ = Teuchos::parameterList(paramList.sublist("matvec params")); - - // Create default manager - // FIXME: should it be here, or higher up - RCP defaultManager = rcp(new FactoryManager()); - defaultManager->SetVerbLevel(this->verbosity_); - defaultManager->SetKokkosRefactor(useKokkos_); - - // We will ignore keeps0 - std::vector keeps0; - UpdateFactoryManager(paramList, ParameterList(), *defaultManager, 0/*levelID*/, keeps0); - - // std::cout<<"*** Default Manager ***"<Print(); - - // Create level specific factory managers - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - // Note, that originally if there were no level specific parameters, we - // simply copied the defaultManager However, with the introduction of - // levelID to UpdateFactoryManager (required for reuse), we can no longer - // guarantee that the kept variables are the same for each level even if - // dependency structure does not change. - RCP levelManager = rcp(new FactoryManager(*defaultManager)); - levelManager->SetVerbLevel(defaultManager->GetVerbLevel()); - - std::vector keeps; - if (paramList.isSublist("level " + toString(levelID))) { - // We do this so the parameters on the level get flagged correctly as "used" - ParameterList& levelList = paramList.sublist("level " + toString(levelID), true/*mustAlreadyExist*/); - UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); - - } else { - ParameterList levelList; - UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); - } - - this->keep_[levelID] = keeps; - this->AddFactoryManager(levelID, 1, levelManager); + // Detect if we do implicit P and R rebalance + changedPRrebalance_ = false; + changedPRViaCopyrebalance_ = false; + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "repartition: enable", bool, + true)) { + changedPRrebalance_ = + MUELU_TEST_AND_SET_VAR(paramList, "repartition: rebalance P and R", + bool, this->doPRrebalance_); + changedPRViaCopyrebalance_ = MUELU_TEST_AND_SET_VAR( + paramList, "repartition: explicit via new copy rebalance P and R", bool, + this->doPRViaCopyrebalance_); + } - // std::cout<<"*** Level "<Print(); + // Detect if we use implicit transpose + changedImplicitTranspose_ = MUELU_TEST_AND_SET_VAR( + paramList, "transpose: use implicit", bool, this->implicitTranspose_); + + // Detect if we use fuse prolongation and update + (void)MUELU_TEST_AND_SET_VAR(paramList, "fuse prolongation and update", bool, + this->fuseProlongationAndUpdate_); + + // Detect if we suppress the dimension check of the user-given nullspace + (void)MUELU_TEST_AND_SET_VAR(paramList, "nullspace: suppress dimension check", + bool, this->suppressNullspaceDimensionCheck_); + + if (paramList.isSublist("matvec params")) + this->matvecParams_ = + Teuchos::parameterList(paramList.sublist("matvec params")); + + // Create default manager + // FIXME: should it be here, or higher up + RCP defaultManager = rcp(new FactoryManager()); + defaultManager->SetVerbLevel(this->verbosity_); + defaultManager->SetKokkosRefactor(useKokkos_); + + // We will ignore keeps0 + std::vector keeps0; + UpdateFactoryManager(paramList, ParameterList(), *defaultManager, + 0 /*levelID*/, keeps0); + + // std::cout<<"*** Default Manager ***"<Print(); + + // Create level specific factory managers + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + // Note, that originally if there were no level specific parameters, we + // simply copied the defaultManager However, with the introduction of + // levelID to UpdateFactoryManager (required for reuse), we can no longer + // guarantee that the kept variables are the same for each level even if + // dependency structure does not change. + RCP levelManager = rcp(new FactoryManager(*defaultManager)); + levelManager->SetVerbLevel(defaultManager->GetVerbLevel()); + + std::vector keeps; + if (paramList.isSublist("level " + toString(levelID))) { + // We do this so the parameters on the level get flagged correctly as + // "used" + ParameterList &levelList = paramList.sublist("level " + toString(levelID), + true /*mustAlreadyExist*/); + UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); + } else { + ParameterList levelList; + UpdateFactoryManager(levelList, paramList, *levelManager, levelID, keeps); } - // FIXME: parameters passed to packages, like Ifpack2, are not touched by us, resulting in "[unused]" flag - // being displayed. On the other hand, we don't want to simply iterate through them touching. I don't know - // what a good solution looks like - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print initial parameters", bool, true)) - this->GetOStream(static_cast(Runtime1), 0) << paramList << std::endl; - - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print unused parameters", bool, true)) { - // Check unused parameters - ParameterList unusedParamList; + this->keep_[levelID] = keeps; + this->AddFactoryManager(levelID, 1, levelManager); - // Check for unused parameters that aren't lists - for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) { - const ParameterEntry& entry = paramList.entry(it); + // std::cout<<"*** Level "<Print(); + } - if (!entry.isList() && !entry.isUsed()) - unusedParamList.setEntry(paramList.name(it), entry); - } + // FIXME: parameters passed to packages, like Ifpack2, are not touched by us, + // resulting in "[unused]" flag being displayed. On the other hand, we don't + // want to simply iterate through them touching. I don't know what a good + // solution looks like + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print initial parameters", + bool, true)) + this->GetOStream(static_cast(Runtime1), 0) + << paramList << std::endl; + + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "print unused parameters", + bool, true)) { + // Check unused parameters + ParameterList unusedParamList; + + // Check for unused parameters that aren't lists + for (ParameterList::ConstIterator it = paramList.begin(); + it != paramList.end(); it++) { + const ParameterEntry &entry = paramList.entry(it); + + if (!entry.isList() && !entry.isUsed()) + unusedParamList.setEntry(paramList.name(it), entry); + } - // Check for unused parameters in level-specific sublists - for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { - std::string levelStr = "level " + toString(levelID); + // Check for unused parameters in level-specific sublists + for (int levelID = 0; levelID < this->numDesiredLevel_; levelID++) { + std::string levelStr = "level " + toString(levelID); - if (paramList.isSublist(levelStr)) { - const ParameterList& levelList = paramList.sublist(levelStr); + if (paramList.isSublist(levelStr)) { + const ParameterList &levelList = paramList.sublist(levelStr); - for (ParameterList::ConstIterator itr = levelList.begin(); itr != levelList.end(); ++itr) { - const ParameterEntry& entry = levelList.entry(itr); + for (ParameterList::ConstIterator itr = levelList.begin(); + itr != levelList.end(); ++itr) { + const ParameterEntry &entry = levelList.entry(itr); - if (!entry.isList() && !entry.isUsed()) - unusedParamList.sublist(levelStr).setEntry(levelList.name(itr), entry); - } + if (!entry.isList() && !entry.isUsed()) + unusedParamList.sublist(levelStr).setEntry(levelList.name(itr), + entry); } } - - if (unusedParamList.numParams() > 0) { - std::ostringstream unusedParamsStream; - int indent = 4; - unusedParamList.print(unusedParamsStream, indent); - - this->GetOStream(Warnings1) << "The following parameters were not used:\n" << unusedParamsStream.str() << std::endl; - } } - VerboseObject::SetDefaultVerbLevel(oldVerbLevel); + if (unusedParamList.numParams() > 0) { + std::ostringstream unusedParamsStream; + int indent = 4; + unusedParamList.print(unusedParamsStream, indent); + this->GetOStream(Warnings1) << "The following parameters were not used:\n" + << unusedParamsStream.str() << std::endl; + } } - - // ===================================================================================================== - // ==================================== UpdateFactoryManager =========================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { - // NOTE: Factory::SetParameterList must be called prior to Factory::SetFactory, as - // SetParameterList sets default values for non mentioned parameters, including factories - - using strings = std::unordered_set; - - // shortcut - if (paramList.numParams() == 0 && defaultList.numParams() > 0) - paramList = ParameterList(defaultList); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - TEUCHOS_TEST_FOR_EXCEPTION(strings({"none", "tP", "RP", "emin", "RAP", "full", "S"}).count(reuseType) == 0, - Exceptions::RuntimeError, "Unknown \"reuse: type\" value: \"" << reuseType << "\". Please consult User's Guide."); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); - TEUCHOS_TEST_FOR_EXCEPTION(strings({"unsmoothed", "sa", "pg", "emin", "matlab", "pcoarsen","classical","smoothed reitzinger","unsmoothed reitzinger","replicate","combine"}).count(multigridAlgo) == 0, - Exceptions::RuntimeError, "Unknown \"multigrid algorithm\" value: \"" << multigridAlgo << "\". Please consult User's Guide."); + VerboseObject::SetDefaultVerbLevel(oldVerbLevel); +} + +// ===================================================================================================== +// ==================================== UpdateFactoryManager +// =========================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const { + // NOTE: Factory::SetParameterList must be called prior to + // Factory::SetFactory, as SetParameterList sets default values for non + // mentioned parameters, including factories + + using strings = std::unordered_set; + + // shortcut + if (paramList.numParams() == 0 && defaultList.numParams() > 0) + paramList = ParameterList(defaultList); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, + reuseType); + TEUCHOS_TEST_FOR_EXCEPTION( + strings({"none", "tP", "RP", "emin", "RAP", "full", "S"}) + .count(reuseType) == 0, + Exceptions::RuntimeError, + "Unknown \"reuse: type\" value: \"" + << reuseType << "\". Please consult User's Guide."); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", + std::string, multigridAlgo); + TEUCHOS_TEST_FOR_EXCEPTION( + strings({"unsmoothed", "sa", "pg", "emin", "matlab", "pcoarsen", + "classical", "smoothed reitzinger", "unsmoothed reitzinger", + "replicate", "combine"}) + .count(multigridAlgo) == 0, + Exceptions::RuntimeError, + "Unknown \"multigrid algorithm\" value: \"" + << multigridAlgo << "\". Please consult User's Guide."); #ifndef HAVE_MUELU_MATLAB - TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "matlab", Exceptions::RuntimeError, - "Cannot use matlab for multigrid algorithm - MueLu was not configured with MATLAB support."); + TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "matlab", + Exceptions::RuntimeError, + "Cannot use matlab for multigrid algorithm - " + "MueLu was not configured with MATLAB support."); #endif #ifndef HAVE_MUELU_INTREPID2 - TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "pcoarsen", Exceptions::RuntimeError, - "Cannot use IntrepidPCoarsen prolongator factory - MueLu was not configured with Intrepid support."); + TEUCHOS_TEST_FOR_EXCEPTION( + multigridAlgo == "pcoarsen", Exceptions::RuntimeError, + "Cannot use IntrepidPCoarsen prolongator factory - MueLu was not " + "configured with Intrepid support."); #endif - // Only some combinations of reuse and multigrid algorithms are tested, all - // other are considered invalid at the moment - if (reuseType == "none" || reuseType == "S" || reuseType == "RP" || reuseType == "RAP") { - // This works for all kinds of multigrid algorithms - - } else if (reuseType == "tP" && (multigridAlgo != "sa" && multigridAlgo != "unsmoothed")) { - reuseType = "none"; - this->GetOStream(Warnings0) << "Ignoring \"tP\" reuse option as it is only compatible with \"sa\", " - "or \"unsmoothed\" multigrid algorithms" << std::endl; - - } else if (reuseType == "emin" && multigridAlgo != "emin") { - reuseType = "none"; - this->GetOStream(Warnings0) << "Ignoring \"emin\" reuse option it is only compatible with " - "\"emin\" multigrid algorithm" << std::endl; - } - - // == Non-serializable data === - // Check both the parameter and the type - bool have_userP = false; - if (paramList.isParameter("P") && !paramList.get >("P").is_null()) - have_userP = true; - - // === Coarse solver === - UpdateFactoryManager_CoarseSolvers(paramList, defaultList, manager, levelID, keeps); - - // == Smoothers == - UpdateFactoryManager_Smoothers(paramList, defaultList, manager, levelID, keeps); - - // === BlockNumber === - if(levelID == 0) - UpdateFactoryManager_BlockNumber(paramList, defaultList, manager, levelID, keeps); - - // === Aggregation === - if(multigridAlgo == "unsmoothed reitzinger" || multigridAlgo == "smoothed reitzinger") - UpdateFactoryManager_Reitzinger(paramList, defaultList, manager, levelID, keeps); - else - UpdateFactoryManager_Aggregation_TentativeP(paramList, defaultList, manager, levelID, keeps); - - // === Nullspace === - RCP nullSpaceFactory; // Cache thcAN is guy for the combination of semi-coarsening & repartitioning - UpdateFactoryManager_Nullspace(paramList, defaultList, manager, levelID, keeps, nullSpaceFactory); - - // === Prolongation === - // NOTE: None of the UpdateFactoryManager routines called here check the - // multigridAlgo. This is intentional, to allow for reuse of components - // underneath. Thus, the multigridAlgo was checked in the beginning of the - // function. - if (have_userP) { - // User prolongator - manager.SetFactory("P", NoFactory::getRCP()); - - } else if (multigridAlgo == "unsmoothed" || multigridAlgo == "unsmoothed reitzinger") { - // Unsmoothed aggregation - manager.SetFactory("P", manager.GetFactory("Ptent")); - - } else if (multigridAlgo == "classical") { - // Classical AMG - manager.SetFactory("P", manager.GetFactory("Ptent")); - - } else if (multigridAlgo == "sa" || multigridAlgo == "smoothed reitzinger") { - // Smoothed aggregation - UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); - - } else if (multigridAlgo == "emin") { - // Energy minimization - UpdateFactoryManager_Emin(paramList, defaultList, manager, levelID, keeps); - - } else if (multigridAlgo == "replicate") { - UpdateFactoryManager_Replicate(paramList, defaultList, manager, levelID, keeps); - - } else if (multigridAlgo == "combine") { - UpdateFactoryManager_Combine(paramList, defaultList, manager, levelID, keeps); + // Only some combinations of reuse and multigrid algorithms are tested, all + // other are considered invalid at the moment + if (reuseType == "none" || reuseType == "S" || reuseType == "RP" || + reuseType == "RAP") { + // This works for all kinds of multigrid algorithms + + } else if (reuseType == "tP" && + (multigridAlgo != "sa" && multigridAlgo != "unsmoothed")) { + reuseType = "none"; + this->GetOStream(Warnings0) + << "Ignoring \"tP\" reuse option as it is only compatible with \"sa\", " + "or \"unsmoothed\" multigrid algorithms" + << std::endl; + + } else if (reuseType == "emin" && multigridAlgo != "emin") { + reuseType = "none"; + this->GetOStream(Warnings0) + << "Ignoring \"emin\" reuse option it is only compatible with " + "\"emin\" multigrid algorithm" + << std::endl; + } - } else if (multigridAlgo == "pg") { - // Petrov-Galerkin - UpdateFactoryManager_PG(paramList, defaultList, manager, levelID, keeps); + // == Non-serializable data === + // Check both the parameter and the type + bool have_userP = false; + if (paramList.isParameter("P") && !paramList.get>("P").is_null()) + have_userP = true; + + // === Coarse solver === + UpdateFactoryManager_CoarseSolvers(paramList, defaultList, manager, levelID, + keeps); + + // == Smoothers == + UpdateFactoryManager_Smoothers(paramList, defaultList, manager, levelID, + keeps); + + // === BlockNumber === + if (levelID == 0) + UpdateFactoryManager_BlockNumber(paramList, defaultList, manager, levelID, + keeps); + + // === Aggregation === + if (multigridAlgo == "unsmoothed reitzinger" || + multigridAlgo == "smoothed reitzinger") + UpdateFactoryManager_Reitzinger(paramList, defaultList, manager, levelID, + keeps); + else + UpdateFactoryManager_Aggregation_TentativeP(paramList, defaultList, manager, + levelID, keeps); + + // === Nullspace === + RCP nullSpaceFactory; // Cache thcAN is guy for the combination of + // semi-coarsening & repartitioning + UpdateFactoryManager_Nullspace(paramList, defaultList, manager, levelID, + keeps, nullSpaceFactory); + + // === Prolongation === + // NOTE: None of the UpdateFactoryManager routines called here check the + // multigridAlgo. This is intentional, to allow for reuse of components + // underneath. Thus, the multigridAlgo was checked in the beginning of the + // function. + if (have_userP) { + // User prolongator + manager.SetFactory("P", NoFactory::getRCP()); + + } else if (multigridAlgo == "unsmoothed" || + multigridAlgo == "unsmoothed reitzinger") { + // Unsmoothed aggregation + manager.SetFactory("P", manager.GetFactory("Ptent")); + + } else if (multigridAlgo == "classical") { + // Classical AMG + manager.SetFactory("P", manager.GetFactory("Ptent")); + + } else if (multigridAlgo == "sa" || multigridAlgo == "smoothed reitzinger") { + // Smoothed aggregation + UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); - } else if (multigridAlgo == "matlab") { - // Matlab Coarsneing - UpdateFactoryManager_Matlab(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "emin") { + // Energy minimization + UpdateFactoryManager_Emin(paramList, defaultList, manager, levelID, keeps); - } else if (multigridAlgo == "pcoarsen") { - // P-Coarsening - UpdateFactoryManager_PCoarsen(paramList, defaultList, manager, levelID, keeps); - } + } else if (multigridAlgo == "replicate") { + UpdateFactoryManager_Replicate(paramList, defaultList, manager, levelID, + keeps); - // === Semi-coarsening === - UpdateFactoryManager_SemiCoarsen(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "combine") { + UpdateFactoryManager_Combine(paramList, defaultList, manager, levelID, + keeps); - // === Restriction === - UpdateFactoryManager_Restriction(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "pg") { + // Petrov-Galerkin + UpdateFactoryManager_PG(paramList, defaultList, manager, levelID, keeps); - // === RAP === - UpdateFactoryManager_RAP(paramList, defaultList, manager, levelID, keeps); + } else if (multigridAlgo == "matlab") { + // Matlab Coarsneing + UpdateFactoryManager_Matlab(paramList, defaultList, manager, levelID, + keeps); - // == BlockNumber Transfer == - UpdateFactoryManager_LocalOrdinalTransfer("BlockNumber",multigridAlgo,paramList,defaultList,manager,levelID,keeps); + } else if (multigridAlgo == "pcoarsen") { + // P-Coarsening + UpdateFactoryManager_PCoarsen(paramList, defaultList, manager, levelID, + keeps); + } + // === Semi-coarsening === + UpdateFactoryManager_SemiCoarsen(paramList, defaultList, manager, levelID, + keeps); - // === Coordinates === - UpdateFactoryManager_Coordinates(paramList, defaultList, manager, levelID, keeps); + // === Restriction === + UpdateFactoryManager_Restriction(paramList, defaultList, manager, levelID, + keeps); - // === Pre-Repartition Keeps for Reuse === - if ((reuseType == "RP" || reuseType == "RAP" || reuseType == "full") && levelID) - keeps.push_back(keep_pair("Nullspace", manager.GetFactory("Nullspace").get())); + // === RAP === + UpdateFactoryManager_RAP(paramList, defaultList, manager, levelID, keeps); - if (reuseType == "RP" && levelID) { - keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); - if (!this->implicitTranspose_) - keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); - } - if ((reuseType == "tP" || reuseType == "RP" || reuseType == "emin") && useCoordinates_ && levelID) - keeps.push_back(keep_pair("Coordinates", manager.GetFactory("Coordinates").get())); + // == BlockNumber Transfer == + UpdateFactoryManager_LocalOrdinalTransfer("BlockNumber", multigridAlgo, + paramList, defaultList, manager, + levelID, keeps); - // === Repartitioning === - UpdateFactoryManager_Repartition(paramList, defaultList, manager, levelID, keeps, nullSpaceFactory); + // === Coordinates === + UpdateFactoryManager_Coordinates(paramList, defaultList, manager, levelID, + keeps); - // === Lower precision transfers === - UpdateFactoryManager_LowPrecision(paramList, defaultList, manager, levelID, keeps); + // === Pre-Repartition Keeps for Reuse === + if ((reuseType == "RP" || reuseType == "RAP" || reuseType == "full") && + levelID) + keeps.push_back( + keep_pair("Nullspace", manager.GetFactory("Nullspace").get())); - // === Final Keeps for Reuse === - if ((reuseType == "RAP" || reuseType == "full") && levelID) { - keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); - if (!this->implicitTranspose_) - keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); - keeps.push_back(keep_pair("A", manager.GetFactory("A").get())); - } + if (reuseType == "RP" && levelID) { + keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); + if (!this->implicitTranspose_) + keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); + } + if ((reuseType == "tP" || reuseType == "RP" || reuseType == "emin") && + useCoordinates_ && levelID) + keeps.push_back( + keep_pair("Coordinates", manager.GetFactory("Coordinates").get())); + + // === Repartitioning === + UpdateFactoryManager_Repartition(paramList, defaultList, manager, levelID, + keeps, nullSpaceFactory); + + // === Lower precision transfers === + UpdateFactoryManager_LowPrecision(paramList, defaultList, manager, levelID, + keeps); + + // === Final Keeps for Reuse === + if ((reuseType == "RAP" || reuseType == "full") && levelID) { + keeps.push_back(keep_pair("P", manager.GetFactory("P").get())); + if (!this->implicitTranspose_) + keeps.push_back(keep_pair("R", manager.GetFactory("R").get())); + keeps.push_back(keep_pair("A", manager.GetFactory("A").get())); + } - // In case you ever want to inspect the FactoryManager as it is generated for each level - /*std::cout<<"*** Factory Manager on level "< - void ParameterListInterpreter:: - UpdateFactoryManager_Smoothers(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID, std::vector& keeps) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - bool useMaxAbsDiagonalScaling = false; - if (defaultList.isParameter("sa: use rowsumabs diagonal scaling")) - useMaxAbsDiagonalScaling = defaultList.get("sa: use rowsumabs diagonal scaling"); - - // === Smoothing === - // FIXME: should custom smoother check default list too? - bool isCustomSmoother = - paramList.isParameter("smoother: pre or post") || - paramList.isParameter("smoother: type") || paramList.isParameter("smoother: pre type") || paramList.isParameter("smoother: post type") || - paramList.isSublist ("smoother: params") || paramList.isSublist ("smoother: pre params") || paramList.isSublist ("smoother: post params") || - paramList.isParameter("smoother: sweeps") || paramList.isParameter("smoother: pre sweeps") || paramList.isParameter("smoother: post sweeps") || - paramList.isParameter("smoother: overlap") || paramList.isParameter("smoother: pre overlap") || paramList.isParameter("smoother: post overlap"); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: pre or post", std::string, PreOrPost); - if (PreOrPost == "none") { - manager.SetFactory("Smoother", Teuchos::null); - - } else if (isCustomSmoother) { - // FIXME: get default values from the factory - // NOTE: none of the smoothers at the moment use parameter validation framework, so we - // cannot get the default values from it. - #define TEST_MUTUALLY_EXCLUSIVE(arg1,arg2) \ - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isParameter(#arg1) && paramList.isParameter(#arg2), \ - Exceptions::InvalidArgument, "You cannot specify both \""#arg1"\" and \""#arg2"\""); - #define TEST_MUTUALLY_EXCLUSIVE_S(arg1,arg2) \ - TEUCHOS_TEST_FOR_EXCEPTION(paramList.isSublist(#arg1) && paramList.isSublist(#arg2), \ - Exceptions::InvalidArgument, "You cannot specify both \""#arg1"\" and \""#arg2"\""); - - TEST_MUTUALLY_EXCLUSIVE ("smoother: type", "smoother: pre type"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: type", "smoother: post type"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: sweeps", "smoother: pre sweeps"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: sweeps", "smoother: post sweeps"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: overlap", "smoother: pre overlap"); - TEST_MUTUALLY_EXCLUSIVE ("smoother: overlap", "smoother: post overlap"); - TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: pre params"); - TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: post params"); - TEUCHOS_TEST_FOR_EXCEPTION(PreOrPost == "both" && (paramList.isParameter("smoother: pre type") != paramList.isParameter("smoother: post type")), - Exceptions::InvalidArgument, "You must specify both \"smoother: pre type\" and \"smoother: post type\""); - - // Default values - int overlap = 0; - ParameterList defaultSmootherParams; - defaultSmootherParams.set("relaxation: type", "Symmetric Gauss-Seidel"); - defaultSmootherParams.set("relaxation: sweeps", Teuchos::OrdinalTraits::one()); - defaultSmootherParams.set("relaxation: damping factor", Teuchos::ScalarTraits::one()); - - RCP preSmoother = Teuchos::null, postSmoother = Teuchos::null; - std::string preSmootherType, postSmootherType; - ParameterList preSmootherParams, postSmootherParams; - - if (paramList.isParameter("smoother: overlap")) - overlap = paramList.get("smoother: overlap"); - - if (PreOrPost == "pre" || PreOrPost == "both") { - if (paramList.isParameter("smoother: pre type")) { - preSmootherType = paramList.get("smoother: pre type"); - } else { - MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", std::string, preSmootherTypeTmp); - preSmootherType = preSmootherTypeTmp; - } - if (paramList.isParameter("smoother: pre overlap")) - overlap = paramList.get("smoother: pre overlap"); - - if (paramList.isSublist("smoother: pre params")) - preSmootherParams = paramList.sublist("smoother: pre params"); - else if (paramList.isSublist("smoother: params")) - preSmootherParams = paramList.sublist("smoother: params"); - else if (defaultList.isSublist("smoother: params")) - preSmootherParams = defaultList.sublist("smoother: params"); - else if (preSmootherType == "RELAXATION") - preSmootherParams = defaultSmootherParams; - - if (preSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) - preSmootherParams.set("chebyshev: use rowsumabs diagonal scaling",true); - - #ifdef HAVE_MUELU_INTREPID2 - // Propagate P-coarsening for Topo smoothing - if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && - defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { - // P-Coarsening by schedule (new interface) - // NOTE: levelID represents the *coarse* level in this case - auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList, "pcoarsen: schedule"); - auto pcoarsen_element = defaultList.get("pcoarsen: element"); - - if (levelID < (int)pcoarsen_schedule.size()) { - // Topo info for P-Coarsening - auto lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); - preSmootherParams.set("pcoarsen: hi basis", lo); - } - } - #endif - - #ifdef HAVE_MUELU_MATLAB - if (preSmootherType == "matlab") - preSmoother = rcp(new SmootherFactory(rcp(new MatlabSmoother(preSmootherParams)))); - else - #endif - preSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother(preSmootherType, preSmootherParams, overlap)))); - } - - if (PreOrPost == "post" || PreOrPost == "both") { - if (paramList.isParameter("smoother: post type")) - postSmootherType = paramList.get("smoother: post type"); - else { - MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", std::string, postSmootherTypeTmp); - postSmootherType = postSmootherTypeTmp; - } - - if (paramList.isSublist("smoother: post params")) - postSmootherParams = paramList.sublist("smoother: post params"); - else if (paramList.isSublist("smoother: params")) - postSmootherParams = paramList.sublist("smoother: params"); - else if (defaultList.isSublist("smoother: params")) - postSmootherParams = defaultList.sublist("smoother: params"); - else if (postSmootherType == "RELAXATION") - postSmootherParams = defaultSmootherParams; - if (paramList.isParameter("smoother: post overlap")) - overlap = paramList.get("smoother: post overlap"); - - if (postSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) - postSmootherParams.set("chebyshev: use rowsumabs diagonal scaling",true); - - if (postSmootherType == preSmootherType && areSame(preSmootherParams, postSmootherParams)) - postSmoother = preSmoother; - else { - #ifdef HAVE_MUELU_INTREPID2 - // Propagate P-coarsening for Topo smoothing - if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && - defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { - // P-Coarsening by schedule (new interface) - // NOTE: levelID represents the *coarse* level in this case - auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList,"pcoarsen: schedule"); - auto pcoarsen_element = defaultList.get("pcoarsen: element"); - - if (levelID < (int)pcoarsen_schedule.size()) { - // Topo info for P-Coarsening - auto lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); - postSmootherParams.set("pcoarsen: hi basis", lo); - } - } - #endif - - #ifdef HAVE_MUELU_MATLAB - if (postSmootherType == "matlab") - postSmoother = rcp(new SmootherFactory(rcp(new MatlabSmoother(postSmootherParams)))); - else - #endif - postSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother(postSmootherType, postSmootherParams, overlap)))); - } - } - - if (preSmoother == postSmoother) - manager.SetFactory("Smoother", preSmoother); - else { - manager.SetFactory("PreSmoother", preSmoother); - manager.SetFactory("PostSmoother", postSmoother); - } - } - - // The first clause is not necessary, but it is here for clarity Smoothers - // are reused if smoother explicitly said to reuse them, or if any other - // reuse option is enabled - bool reuseSmoothers = (reuseType == "S" || reuseType != "none"); - if (reuseSmoothers) { - auto preSmootherFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("PreSmoother"))); - - if (preSmootherFactory != Teuchos::null) { - ParameterList postSmootherFactoryParams; - postSmootherFactoryParams.set("keep smoother data", true); - preSmootherFactory->SetParameterList(postSmootherFactoryParams); - - keeps.push_back(keep_pair("PreSmoother data", preSmootherFactory.get())); - } - - auto postSmootherFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("PostSmoother"))); - if (postSmootherFactory != Teuchos::null) { - ParameterList postSmootherFactoryParams; - postSmootherFactoryParams.set("keep smoother data", true); - postSmootherFactory->SetParameterList(postSmootherFactoryParams); - - keeps.push_back(keep_pair("PostSmoother data", postSmootherFactory.get())); - } - - auto coarseFactory = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("CoarseSolver"))); - if (coarseFactory != Teuchos::null) { - ParameterList coarseFactoryParams; - coarseFactoryParams.set("keep smoother data", true); - coarseFactory->SetParameterList(coarseFactoryParams); - - keeps.push_back(keep_pair("PreSmoother data", coarseFactory.get())); - } - } - - if ((reuseType == "RAP" && levelID) || (reuseType == "full")) { - // The difference between "RAP" and "full" is keeping smoothers. However, - // as in both cases we keep coarse matrices, we do not need to update - // coarse smoothers. On the other hand, if a user changes fine level - // matrix, "RAP" would update the fine level smoother, while "full" would - // not - keeps.push_back(keep_pair("PreSmoother", manager.GetFactory("PreSmoother") .get())); - keeps.push_back(keep_pair("PostSmoother", manager.GetFactory("PostSmoother").get())); - - // We do keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get()) - // as the coarse solver factory is in fact a smoothing factory, so the - // only pieces of data it generates are PreSmoother and PostSmoother - keeps.push_back(keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get())); - } - } - - // ===================================================================================================== - // ====================================== Coarse Solvers =============================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_CoarseSolvers(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int /* levelID */, std::vector& /* keeps */) const - { - // FIXME: should custom coarse solver check default list too? - bool isCustomCoarseSolver = - paramList.isParameter("coarse: type") || - paramList.isParameter("coarse: params"); - if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "coarse: type", std::string, "none")) { - manager.SetFactory("CoarseSolver", Teuchos::null); - - } else if (isCustomCoarseSolver) { - // FIXME: get default values from the factory - // NOTE: none of the smoothers at the moment use parameter validation framework, so we - // cannot get the default values from it. - MUELU_SET_VAR_2LIST(paramList, defaultList, "coarse: type", std::string, coarseType); - - int overlap = 0; - if (paramList.isParameter("coarse: overlap")) - overlap = paramList.get("coarse: overlap"); - - ParameterList coarseParams; - if (paramList.isSublist("coarse: params")) - coarseParams = paramList.sublist("coarse: params"); - else if (defaultList.isSublist("coarse: params")) - coarseParams = defaultList.sublist("coarse: params"); - - using strings = std::unordered_set; - - RCP coarseSmoother; - // TODO: this is not a proper place to check. If we consider direct solver to be a special - // case of smoother, we would like to unify Amesos and Ifpack2 smoothers in src/Smoothers, and - // have a single factory responsible for those. Then, this check would belong there. - if (strings({"RELAXATION", "CHEBYSHEV", "ILUT", "ILU", "RILUK", "SCHWARZ", "Amesos", - "BLOCK RELAXATION", "BLOCK_RELAXATION", "BLOCKRELAXATION" , - "SPARSE BLOCK RELAXATION", "SPARSE_BLOCK_RELAXATION", "SPARSEBLOCKRELAXATION", - "LINESMOOTHING_BANDEDRELAXATION", "LINESMOOTHING_BANDED_RELAXATION", "LINESMOOTHING_BANDED RELAXATION", - "LINESMOOTHING_TRIDIRELAXATION", "LINESMOOTHING_TRIDI_RELAXATION", "LINESMOOTHING_TRIDI RELAXATION", - "LINESMOOTHING_TRIDIAGONALRELAXATION", "LINESMOOTHING_TRIDIAGONAL_RELAXATION", "LINESMOOTHING_TRIDIAGONAL RELAXATION", - "TOPOLOGICAL", "FAST_ILU", "FAST_IC", "FAST_ILDL"}).count(coarseType)) { - coarseSmoother = rcp(new TrilinosSmoother(coarseType, coarseParams, overlap)); - } else { - #ifdef HAVE_MUELU_MATLAB - if (coarseType == "matlab") - coarseSmoother = rcp(new MatlabSmoother(coarseParams)); - else - #endif - coarseSmoother = rcp(new DirectSolver(coarseType, coarseParams)); - } - - manager.SetFactory("CoarseSolver", rcp(new SmootherFactory(coarseSmoother))); - } - } - - - // ===================================================================================================== - // ========================================= TentativeP================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Reitzinger(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID, std::vector& keeps) const - { - ParameterList rParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: enable", bool, rParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: constant column sums", bool, rParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, rParams); - - RCP rFactory = rcp(new ReitzingerPFactory()); - rFactory->SetParameterList(rParams); - - // These are all going to be user provided, so NoFactory - rFactory->SetFactory("Pnodal", NoFactory::getRCP()); - rFactory->SetFactory("NodeAggMatrix", NoFactory::getRCP()); - //rFactory->SetFactory("NodeMatrix", NoFactory::getRCP()); - - if(levelID > 1) - rFactory->SetFactory("D0", this->GetFactoryManager(levelID-1)->GetFactory("D0")); - else - rFactory->SetFactory("D0", NoFactory::getRCP()); - - manager.SetFactory("Ptent", rFactory); - manager.SetFactory("D0", rFactory); - manager.SetFactory("InPlaceMap", rFactory); - - } - - // ===================================================================================================== - // ========================================= TentativeP================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Aggregation_TentativeP(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID, std::vector& keeps) const - { - using strings = std::unordered_set; - - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "aggregation: type", std::string, aggType); - TEUCHOS_TEST_FOR_EXCEPTION(!strings({"uncoupled", "coupled", "brick", "matlab","notay","classical"}).count(aggType), - Exceptions::RuntimeError, "Unknown aggregation algorithm: \"" << aggType << "\". Please consult User's Guide."); - - - // Only doing this for classical because otherwise, the gold tests get broken badly - RCP amalgFact; - if(aggType == "classical") { - amalgFact = rcp(new AmalgamationFactory()); - manager.SetFactory("UnAmalgamationInfo",amalgFact); - } - - // Aggregation graph - RCP dropFactory; - - if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "matlab")) { - #ifdef HAVE_MUELU_MATLAB - dropFactory = rcp(new SingleLevelMatlabFactory()); - ParameterList socParams = paramList.sublist("strength-of-connection: params"); - dropFactory->SetParameterList(socParams); - #else - throw std::runtime_error("Cannot use MATLAB evolutionary strength-of-connection - MueLu was not configured with MATLAB support."); - #endif - } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", std::string, "unsupported vector smoothing")) { - dropFactory = rcp(new MueLu::SmooVecCoalesceDropFactory()); - ParameterList dropParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: number of random vectors", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: number of times to pre or post smooth", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: penalty parameters", Teuchos::Array, dropParams); - dropFactory->SetParameterList(dropParams); - } - else { - MUELU_KOKKOS_FACTORY_NO_DECL(dropFactory, CoalesceDropFactory, CoalesceDropFactory_kokkos); - ParameterList dropParams; - if (!rcp_dynamic_cast(dropFactory).is_null()) - dropParams.set("lightweight wrap", true); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: row sum drop tol", double, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: use ml scaling of drop tol", bool, dropParams); - - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: greedy Dirichlet", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: distance laplacian algo", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: classical algo", std::string, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: distance laplacian directional weights",Teuchos::Array,dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring: localize color graph", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: dropping may create Dirichlet", bool, dropParams); - if (useKokkos_) { - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, dropParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, dropParams); - } - - if(!amalgFact.is_null()) - dropFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - - if(dropParams.isParameter("aggregation: drop scheme")) { - std::string drop_scheme = dropParams.get("aggregation: drop scheme"); - if(drop_scheme == "block diagonal colored signed classical") - manager.SetFactory("Coloring Graph",dropFactory); - if (drop_scheme.find("block diagonal") != std::string::npos || drop_scheme == "signed classical") { - if(levelID > 0) - dropFactory->SetFactory("BlockNumber", this->GetFactoryManager(levelID-1)->GetFactory("BlockNumber")); - else - dropFactory->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); - } - } - - dropFactory->SetParameterList(dropParams); - } - manager.SetFactory("Graph", dropFactory); - - - // Aggregation scheme - #ifndef HAVE_MUELU_MATLAB - if (aggType == "matlab") - throw std::runtime_error("Cannot use MATLAB aggregation - MueLu was not configured with MATLAB support."); - #endif - RCP aggFactory; - if (aggType == "uncoupled") { - MUELU_KOKKOS_FACTORY_NO_DECL(aggFactory, UncoupledAggregationFactory, UncoupledAggregationFactory_kokkos); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: mode", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: min agg size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: max agg size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: max selected neighbors", int, aggParams); - if(useKokkos_) { - //if not using kokkos refactor Uncoupled, there is no algorithm option (always Serial) - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase 1 algorithm", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: deterministic", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring algorithm", std::string, aggParams); + // In case you ever want to inspect the FactoryManager as it is generated for + // each level + /*std::cout<<"*** Factory Manager on level "< +void ParameterListInterpreter:: + UpdateFactoryManager_Smoothers(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", + std::string, multigridAlgo); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, + reuseType); + bool useMaxAbsDiagonalScaling = false; + if (defaultList.isParameter("sa: use rowsumabs diagonal scaling")) + useMaxAbsDiagonalScaling = + defaultList.get("sa: use rowsumabs diagonal scaling"); + + // === Smoothing === + // FIXME: should custom smoother check default list too? + bool isCustomSmoother = paramList.isParameter("smoother: pre or post") || + paramList.isParameter("smoother: type") || + paramList.isParameter("smoother: pre type") || + paramList.isParameter("smoother: post type") || + paramList.isSublist("smoother: params") || + paramList.isSublist("smoother: pre params") || + paramList.isSublist("smoother: post params") || + paramList.isParameter("smoother: sweeps") || + paramList.isParameter("smoother: pre sweeps") || + paramList.isParameter("smoother: post sweeps") || + paramList.isParameter("smoother: overlap") || + paramList.isParameter("smoother: pre overlap") || + paramList.isParameter("smoother: post overlap"); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: pre or post", + std::string, PreOrPost); + if (PreOrPost == "none") { + manager.SetFactory("Smoother", Teuchos::null); + + } else if (isCustomSmoother) { + // FIXME: get default values from the factory + // NOTE: none of the smoothers at the moment use parameter validation + // framework, so we cannot get the default values from it. +#define TEST_MUTUALLY_EXCLUSIVE(arg1, arg2) \ + TEUCHOS_TEST_FOR_EXCEPTION( \ + paramList.isParameter(#arg1) && paramList.isParameter(#arg2), \ + Exceptions::InvalidArgument, \ + "You cannot specify both \"" #arg1 "\" and \"" #arg2 "\""); +#define TEST_MUTUALLY_EXCLUSIVE_S(arg1, arg2) \ + TEUCHOS_TEST_FOR_EXCEPTION( \ + paramList.isSublist(#arg1) && paramList.isSublist(#arg2), \ + Exceptions::InvalidArgument, \ + "You cannot specify both \"" #arg1 "\" and \"" #arg2 "\""); + + TEST_MUTUALLY_EXCLUSIVE("smoother: type", "smoother: pre type"); + TEST_MUTUALLY_EXCLUSIVE("smoother: type", "smoother: post type"); + TEST_MUTUALLY_EXCLUSIVE("smoother: sweeps", "smoother: pre sweeps"); + TEST_MUTUALLY_EXCLUSIVE("smoother: sweeps", "smoother: post sweeps"); + TEST_MUTUALLY_EXCLUSIVE("smoother: overlap", "smoother: pre overlap"); + TEST_MUTUALLY_EXCLUSIVE("smoother: overlap", "smoother: post overlap"); + TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: pre params"); + TEST_MUTUALLY_EXCLUSIVE_S("smoother: params", "smoother: post params"); + TEUCHOS_TEST_FOR_EXCEPTION( + PreOrPost == "both" && (paramList.isParameter("smoother: pre type") != + paramList.isParameter("smoother: post type")), + Exceptions::InvalidArgument, + "You must specify both \"smoother: pre type\" and \"smoother: post " + "type\""); + + // Default values + int overlap = 0; + ParameterList defaultSmootherParams; + defaultSmootherParams.set("relaxation: type", "Symmetric Gauss-Seidel"); + defaultSmootherParams.set("relaxation: sweeps", + Teuchos::OrdinalTraits::one()); + defaultSmootherParams.set("relaxation: damping factor", + Teuchos::ScalarTraits::one()); + + RCP preSmoother = Teuchos::null, + postSmoother = Teuchos::null; + std::string preSmootherType, postSmootherType; + ParameterList preSmootherParams, postSmootherParams; + + if (paramList.isParameter("smoother: overlap")) + overlap = paramList.get("smoother: overlap"); + + if (PreOrPost == "pre" || PreOrPost == "both") { + if (paramList.isParameter("smoother: pre type")) { + preSmootherType = paramList.get("smoother: pre type"); + } else { + MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", + std::string, preSmootherTypeTmp); + preSmootherType = preSmootherTypeTmp; } - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 1", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 2a", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 2b", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: enable phase 3", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: match ML phase2a", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase2a agg factor", double, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: preserve Dirichlet points", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: error on nodes with no on-rank neighbors", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase3 avoid singletons", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, aggParams); - aggFactory->SetParameterList(aggParams); - // make sure that the aggregation factory has all necessary data - aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); - aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); - // aggFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - - } else if (aggType == "brick") { - aggFactory = rcp(new BrickAggregationFactory()); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick x size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick y size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick z size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick x Dirichlet", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick y Dirichlet", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: brick z Dirichlet", bool, aggParams); - aggFactory->SetParameterList(aggParams); - - // Unlike other factories, BrickAggregationFactory makes the Graph/DofsPerNode itself - manager.SetFactory("Graph", aggFactory); - manager.SetFactory("DofsPerNode", aggFactory); - manager.SetFactory("Filtering", aggFactory); - if (levelID > 1) { - // We check for levelID > 0, as in the interpreter aggFactory for - // levelID really corresponds to level 0. Managers are clunky, as they - // contain factories for two different levels - aggFactory->SetFactory("Coordinates", this->GetFactoryManager(levelID-1)->GetFactory("Coordinates")); + if (paramList.isParameter("smoother: pre overlap")) + overlap = paramList.get("smoother: pre overlap"); + + if (paramList.isSublist("smoother: pre params")) + preSmootherParams = paramList.sublist("smoother: pre params"); + else if (paramList.isSublist("smoother: params")) + preSmootherParams = paramList.sublist("smoother: params"); + else if (defaultList.isSublist("smoother: params")) + preSmootherParams = defaultList.sublist("smoother: params"); + else if (preSmootherType == "RELAXATION") + preSmootherParams = defaultSmootherParams; + + if (preSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) + preSmootherParams.set("chebyshev: use rowsumabs diagonal scaling", + true); + +#ifdef HAVE_MUELU_INTREPID2 + // Propagate P-coarsening for Topo smoothing + if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && + defaultList.isParameter("pcoarsen: schedule") && + defaultList.isParameter("pcoarsen: element")) { + // P-Coarsening by schedule (new interface) + // NOTE: levelID represents the *coarse* level in this case + auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter( + defaultList, "pcoarsen: schedule"); + auto pcoarsen_element = + defaultList.get("pcoarsen: element"); + + if (levelID < (int)pcoarsen_schedule.size()) { + // Topo info for P-Coarsening + auto lo = + pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); + preSmootherParams.set("pcoarsen: hi basis", lo); + } } +#endif + +#ifdef HAVE_MUELU_MATLAB + if (preSmootherType == "matlab") + preSmoother = rcp( + new SmootherFactory(rcp(new MatlabSmoother(preSmootherParams)))); + else +#endif + preSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother( + preSmootherType, preSmootherParams, overlap)))); } - else if (aggType == "classical") { - // Map and coloring - RCP mapFact = rcp(new ClassicalMapFactory()); - ParameterList mapParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: deterministic", bool, mapParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: coloring algorithm", std::string, mapParams); - - ParameterList tempParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, tempParams); - std::string drop_algo = tempParams.get("aggregation: drop scheme"); - if(drop_algo == "block diagonal colored signed classical") { - mapParams.set("aggregation: coloring: use color graph",true); - mapFact->SetFactory("Coloring Graph", manager.GetFactory("Coloring Graph")); - } - mapFact->SetParameterList(mapParams); - mapFact->SetFactory("Graph", manager.GetFactory("Graph")); - mapFact->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - - manager.SetFactory("FC Splitting", mapFact); - manager.SetFactory("CoarseMap", mapFact); - - - aggFactory = rcp(new ClassicalPFactory()); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: classical scheme", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: drop scheme", std::string, aggParams); - aggFactory->SetParameterList(aggParams); - aggFactory->SetFactory("FC Splitting",manager.GetFactory("FC Splitting")); - aggFactory->SetFactory("CoarseMap",manager.GetFactory("CoarseMap")); - aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); - aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); - - if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") { - if(levelID > 0) - aggFactory->SetFactory("BlockNumber", this->GetFactoryManager(levelID-1)->GetFactory("BlockNumber")); - else - aggFactory->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); + if (PreOrPost == "post" || PreOrPost == "both") { + if (paramList.isParameter("smoother: post type")) + postSmootherType = paramList.get("smoother: post type"); + else { + MUELU_SET_VAR_2LIST(paramList, defaultList, "smoother: type", + std::string, postSmootherTypeTmp); + postSmootherType = postSmootherTypeTmp; } - // Now we short-circuit, because we neither need nor want TentativePFactory here - manager.SetFactory("Ptent", aggFactory); - manager.SetFactory("P Graph", aggFactory); - + if (paramList.isSublist("smoother: post params")) + postSmootherParams = paramList.sublist("smoother: post params"); + else if (paramList.isSublist("smoother: params")) + postSmootherParams = paramList.sublist("smoother: params"); + else if (defaultList.isSublist("smoother: params")) + postSmootherParams = defaultList.sublist("smoother: params"); + else if (postSmootherType == "RELAXATION") + postSmootherParams = defaultSmootherParams; + if (paramList.isParameter("smoother: post overlap")) + overlap = paramList.get("smoother: post overlap"); + + if (postSmootherType == "CHEBYSHEV" && useMaxAbsDiagonalScaling) + postSmootherParams.set("chebyshev: use rowsumabs diagonal scaling", + true); + + if (postSmootherType == preSmootherType && + areSame(preSmootherParams, postSmootherParams)) + postSmoother = preSmoother; + else { +#ifdef HAVE_MUELU_INTREPID2 + // Propagate P-coarsening for Topo smoothing + if (multigridAlgo == "pcoarsen" && preSmootherType == "TOPOLOGICAL" && + defaultList.isParameter("pcoarsen: schedule") && + defaultList.isParameter("pcoarsen: element")) { + // P-Coarsening by schedule (new interface) + // NOTE: levelID represents the *coarse* level in this case + auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter( + defaultList, "pcoarsen: schedule"); + auto pcoarsen_element = + defaultList.get("pcoarsen: element"); + + if (levelID < (int)pcoarsen_schedule.size()) { + // Topo info for P-Coarsening + auto lo = + pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); + postSmootherParams.set("pcoarsen: hi basis", lo); + } + } +#endif - if (reuseType == "tP" && levelID) { - // keeps.push_back(keep_pair("Nullspace", Ptent.get())); - keeps.push_back(keep_pair("Ptent",aggFactory.get())); - } - return; - } - else if (aggType == "notay") { - aggFactory = rcp(new NotayAggregationFactory()); - ParameterList aggParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: size", int, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: tie threshold", double, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities",bool, aggParams); - aggFactory->SetParameterList(aggParams); - aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); - aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); - } #ifdef HAVE_MUELU_MATLAB - else if(aggType == "matlab") { - ParameterList aggParams = paramList.sublist("aggregation: params"); - aggFactory = rcp(new SingleLevelMatlabFactory()); - aggFactory->SetParameterList(aggParams); - } + if (postSmootherType == "matlab") + postSmoother = rcp( + new SmootherFactory(rcp(new MatlabSmoother(postSmootherParams)))); + else #endif + postSmoother = rcp(new SmootherFactory(rcp(new TrilinosSmoother( + postSmootherType, postSmootherParams, overlap)))); + } + } + if (preSmoother == postSmoother) + manager.SetFactory("Smoother", preSmoother); + else { + manager.SetFactory("PreSmoother", preSmoother); + manager.SetFactory("PostSmoother", postSmoother); + } + } + // The first clause is not necessary, but it is here for clarity Smoothers + // are reused if smoother explicitly said to reuse them, or if any other + // reuse option is enabled + bool reuseSmoothers = (reuseType == "S" || reuseType != "none"); + if (reuseSmoothers) { + auto preSmootherFactory = rcp_const_cast( + rcp_dynamic_cast(manager.GetFactory("PreSmoother"))); - manager.SetFactory("Aggregates", aggFactory); + if (preSmootherFactory != Teuchos::null) { + ParameterList postSmootherFactoryParams; + postSmootherFactoryParams.set("keep smoother data", true); + preSmootherFactory->SetParameterList(postSmootherFactoryParams); - // Coarse map - RCP coarseMap = rcp(new CoarseMapFactory()); - coarseMap->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - manager.SetFactory("CoarseMap", coarseMap); + keeps.push_back(keep_pair("PreSmoother data", preSmootherFactory.get())); + } - // Aggregate qualities - if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, true)) { - RCP aggQualityFact = rcp(new AggregateQualityEstimateFactory()); - ParameterList aggQualityParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: good aggregate threshold", double, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file output", bool, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file base", std::string, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: check symmetry", bool, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: algorithm", std::string, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: zero threshold", double, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: percentiles", Teuchos::Array,aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: mode", std::string, aggQualityParams); - aggQualityFact->SetParameterList(aggQualityParams); - manager.SetFactory("AggregateQualities", aggQualityFact); + auto postSmootherFactory = rcp_const_cast( + rcp_dynamic_cast(manager.GetFactory("PostSmoother"))); + if (postSmootherFactory != Teuchos::null) { + ParameterList postSmootherFactoryParams; + postSmootherFactoryParams.set("keep smoother data", true); + postSmootherFactory->SetParameterList(postSmootherFactoryParams); - assert(aggType == "uncoupled"); - aggFactory->SetFactory("AggregateQualities", aggQualityFact); + keeps.push_back( + keep_pair("PostSmoother data", postSmootherFactory.get())); } + auto coarseFactory = rcp_const_cast( + rcp_dynamic_cast(manager.GetFactory("CoarseSolver"))); + if (coarseFactory != Teuchos::null) { + ParameterList coarseFactoryParams; + coarseFactoryParams.set("keep smoother data", true); + coarseFactory->SetParameterList(coarseFactoryParams); - // Tentative P - MUELU_KOKKOS_FACTORY(Ptent, TentativePFactory, TentativePFactory_kokkos); - ParameterList ptentParams; - if (paramList.isSublist("matrixmatrix: kernel params")) - ptentParams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); - if (defaultList.isSublist("matrixmatrix: kernel params")) - ptentParams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, ptentParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: build coarse coordinates", bool, ptentParams); - Ptent->SetParameterList(ptentParams); - Ptent->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - Ptent->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - manager.SetFactory("Ptent", Ptent); - - if (reuseType == "tP" && levelID) { - keeps.push_back(keep_pair("Nullspace", Ptent.get())); - keeps.push_back(keep_pair("P", Ptent.get())); + keeps.push_back(keep_pair("PreSmoother data", coarseFactory.get())); } } - // ===================================================================================================== - // ============================================ RAP ==================================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_RAP(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { - if (paramList.isParameter("A") && !paramList.get >("A").is_null()) { - // We have user matrix A - manager.SetFactory("A", NoFactory::getRCP()); - return; - } + if ((reuseType == "RAP" && levelID) || (reuseType == "full")) { + // The difference between "RAP" and "full" is keeping smoothers. However, + // as in both cases we keep coarse matrices, we do not need to update + // coarse smoothers. On the other hand, if a user changes fine level + // matrix, "RAP" would update the fine level smoother, while "full" would + // not + keeps.push_back( + keep_pair("PreSmoother", manager.GetFactory("PreSmoother").get())); + keeps.push_back( + keep_pair("PostSmoother", manager.GetFactory("PostSmoother").get())); + + // We do keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get()) + // as the coarse solver factory is in fact a smoothing factory, so the + // only pieces of data it generates are PreSmoother and PostSmoother + keeps.push_back( + keep_pair("PreSmoother", manager.GetFactory("CoarseSolver").get())); + } +} + +// ===================================================================================================== +// ====================================== Coarse Solvers +// =============================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_CoarseSolvers( + ParameterList ¶mList, const ParameterList &defaultList, + FactoryManager &manager, int /* levelID */, + std::vector & /* keeps */) const { + // FIXME: should custom coarse solver check default list too? + bool isCustomCoarseSolver = paramList.isParameter("coarse: type") || + paramList.isParameter("coarse: params"); + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "coarse: type", + std::string, "none")) { + manager.SetFactory("CoarseSolver", Teuchos::null); + + } else if (isCustomCoarseSolver) { + // FIXME: get default values from the factory + // NOTE: none of the smoothers at the moment use parameter validation + // framework, so we cannot get the default values from it. + MUELU_SET_VAR_2LIST(paramList, defaultList, "coarse: type", std::string, + coarseType); + + int overlap = 0; + if (paramList.isParameter("coarse: overlap")) + overlap = paramList.get("coarse: overlap"); + + ParameterList coarseParams; + if (paramList.isSublist("coarse: params")) + coarseParams = paramList.sublist("coarse: params"); + else if (defaultList.isSublist("coarse: params")) + coarseParams = defaultList.sublist("coarse: params"); - ParameterList RAPparams; - - RCP RAP; - RCP RAPs; - // Allow for Galerkin or shifted RAP - // FIXME: Should this not be some form of MUELU_SET_VAR_2LIST? - std::string alg = paramList.get("rap: algorithm", "galerkin"); - if (alg == "shift" || alg == "non-galerkin") { - RAPs = rcp(new RAPShiftFactory()); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift", double, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift diagonal M", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift low storage", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift array", Teuchos::Array, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: cfl array", Teuchos::Array, RAPparams); + using strings = std::unordered_set; + RCP coarseSmoother; + // TODO: this is not a proper place to check. If we consider direct solver + // to be a special case of smoother, we would like to unify Amesos and + // Ifpack2 smoothers in src/Smoothers, and have a single factory responsible + // for those. Then, this check would belong there. + if (strings({"RELAXATION", + "CHEBYSHEV", + "ILUT", + "ILU", + "RILUK", + "SCHWARZ", + "Amesos", + "BLOCK RELAXATION", + "BLOCK_RELAXATION", + "BLOCKRELAXATION", + "SPARSE BLOCK RELAXATION", + "SPARSE_BLOCK_RELAXATION", + "SPARSEBLOCKRELAXATION", + "LINESMOOTHING_BANDEDRELAXATION", + "LINESMOOTHING_BANDED_RELAXATION", + "LINESMOOTHING_BANDED RELAXATION", + "LINESMOOTHING_TRIDIRELAXATION", + "LINESMOOTHING_TRIDI_RELAXATION", + "LINESMOOTHING_TRIDI RELAXATION", + "LINESMOOTHING_TRIDIAGONALRELAXATION", + "LINESMOOTHING_TRIDIAGONAL_RELAXATION", + "LINESMOOTHING_TRIDIAGONAL RELAXATION", + "TOPOLOGICAL", + "FAST_ILU", + "FAST_IC", + "FAST_ILDL"}) + .count(coarseType)) { + coarseSmoother = + rcp(new TrilinosSmoother(coarseType, coarseParams, overlap)); } else { - RAP = rcp(new RAPFactory()); +#ifdef HAVE_MUELU_MATLAB + if (coarseType == "matlab") + coarseSmoother = rcp(new MatlabSmoother(coarseParams)); + else +#endif + coarseSmoother = rcp(new DirectSolver(coarseType, coarseParams)); } - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: relative diagonal floor", Teuchos::Array, RAPparams); - - if (paramList.isSublist("matrixmatrix: kernel params")) - RAPparams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); - if (defaultList.isSublist("matrixmatrix: kernel params")) - RAPparams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "transpose: use implicit", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals", bool, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals threshold", double, RAPparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: fix zero diagonals replacement", Scalar, RAPparams); - - // if "rap: triple product" has not been set and algorithm is "unsmoothed" switch triple product on - if (!paramList.isParameter("rap: triple product") && - paramList.isType("multigrid algorithm") && - paramList.get("multigrid algorithm") == "unsmoothed") - paramList.set("rap: triple product", true); - else - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: triple product", bool, RAPparams); - - try { - if (paramList.isParameter("aggregation: allow empty prolongator columns")) { - RAPparams.set("CheckMainDiagonal", paramList.get("aggregation: allow empty prolongator columns")); - RAPparams.set("RepairMainDiagonal", paramList.get("aggregation: allow empty prolongator columns")); - } - else if (defaultList.isParameter("aggregation: allow empty prolongator columns")) { - RAPparams.set("CheckMainDiagonal", defaultList.get("aggregation: allow empty prolongator columns")); - RAPparams.set("RepairMainDiagonal", defaultList.get("aggregation: allow empty prolongator columns")); - } + manager.SetFactory("CoarseSolver", + rcp(new SmootherFactory(coarseSmoother))); + } +} + +// ===================================================================================================== +// ========================================= +// TentativeP================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Reitzinger(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const { + ParameterList rParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: enable", + bool, rParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: use subcommunicators", bool, + rParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "tentative: constant column sums", bool, rParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "tentative: calculate qr", bool, rParams); + + RCP rFactory = rcp(new ReitzingerPFactory()); + rFactory->SetParameterList(rParams); + + // These are all going to be user provided, so NoFactory + rFactory->SetFactory("Pnodal", NoFactory::getRCP()); + rFactory->SetFactory("NodeAggMatrix", NoFactory::getRCP()); + // rFactory->SetFactory("NodeMatrix", NoFactory::getRCP()); + + if (levelID > 1) + rFactory->SetFactory( + "D0", this->GetFactoryManager(levelID - 1)->GetFactory("D0")); + else + rFactory->SetFactory("D0", NoFactory::getRCP()); + + manager.SetFactory("Ptent", rFactory); + manager.SetFactory("D0", rFactory); + manager.SetFactory("InPlaceMap", rFactory); +} + +// ===================================================================================================== +// ========================================= +// TentativeP================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Aggregation_TentativeP( + ParameterList ¶mList, const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const { + using strings = std::unordered_set; + + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, + reuseType); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "aggregation: type", std::string, + aggType); + TEUCHOS_TEST_FOR_EXCEPTION(!strings({"uncoupled", "coupled", "brick", + "matlab", "notay", "classical"}) + .count(aggType), + Exceptions::RuntimeError, + "Unknown aggregation algorithm: \"" + << aggType + << "\". Please consult User's Guide."); + + // Only doing this for classical because otherwise, the gold tests get broken + // badly + RCP amalgFact; + if (aggType == "classical") { + amalgFact = rcp(new AmalgamationFactory()); + manager.SetFactory("UnAmalgamationInfo", amalgFact); + } - } catch (Teuchos::Exceptions::InvalidParameterType&) { - TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG(true, Teuchos::Exceptions::InvalidParameterType, - "Error: parameter \"aggregation: allow empty prolongator columns\" must be of type " << Teuchos::TypeNameTraits::name()); - } + // Aggregation graph + RCP dropFactory; - if (!RAP.is_null()) { - RAP->SetParameterList(RAPparams); - RAP->SetFactory("P", manager.GetFactory("P")); - } else { - RAPs->SetParameterList(RAPparams); - RAPs->SetFactory("P", manager.GetFactory("P")); + if (MUELU_TEST_PARAM_2LIST(paramList, paramList, "aggregation: drop scheme", + std::string, "matlab")) { +#ifdef HAVE_MUELU_MATLAB + dropFactory = rcp(new SingleLevelMatlabFactory()); + ParameterList socParams = + paramList.sublist("strength-of-connection: params"); + dropFactory->SetParameterList(socParams); +#else + throw std::runtime_error( + "Cannot use MATLAB evolutionary strength-of-connection - MueLu was not " + "configured with MATLAB support."); +#endif + } else if (MUELU_TEST_PARAM_2LIST(paramList, paramList, + "aggregation: drop scheme", std::string, + "unsupported vector smoothing")) { + dropFactory = rcp(new MueLu::SmooVecCoalesceDropFactory()); + ParameterList dropParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: drop scheme", std::string, + dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, + "aggregation: block diagonal: interleaved blocksize", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: number of random vectors", int, + dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, + "aggregation: number of times to pre or post smooth", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: penalty parameters", + Teuchos::Array, dropParams); + dropFactory->SetParameterList(dropParams); + } else { + MUELU_KOKKOS_FACTORY_NO_DECL(dropFactory, CoalesceDropFactory, + CoalesceDropFactory_kokkos); + ParameterList dropParams; + if (!rcp_dynamic_cast(dropFactory).is_null()) + dropParams.set("lightweight wrap", true); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: drop scheme", std::string, + dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: row sum drop tol", double, + dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, + "aggregation: block diagonal: interleaved blocksize", int, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: drop tol", double, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: use ml scaling of drop tol", + bool, dropParams); + + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: Dirichlet threshold", double, + dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: greedy Dirichlet", bool, + dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: distance laplacian algo", + std::string, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: classical algo", std::string, + dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, + "aggregation: distance laplacian directional weights", + Teuchos::Array, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "aggregation: coloring: localize color graph", + bool, dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: dropping may create Dirichlet", + bool, dropParams); + if (useKokkos_) { + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: use lumping", bool, + dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: reuse graph", bool, + dropParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: reuse eigenvalue", bool, + dropParams); } - if (!this->implicitTranspose_) { - if (!RAP.is_null()) - RAP->SetFactory("R", manager.GetFactory("R")); - else - RAPs->SetFactory("R", manager.GetFactory("R")); + if (!amalgFact.is_null()) + dropFactory->SetFactory("UnAmalgamationInfo", + manager.GetFactory("UnAmalgamationInfo")); + + if (dropParams.isParameter("aggregation: drop scheme")) { + std::string drop_scheme = + dropParams.get("aggregation: drop scheme"); + if (drop_scheme == "block diagonal colored signed classical") + manager.SetFactory("Coloring Graph", dropFactory); + if (drop_scheme.find("block diagonal") != std::string::npos || + drop_scheme == "signed classical") { + if (levelID > 0) + dropFactory->SetFactory( + "BlockNumber", + this->GetFactoryManager(levelID - 1)->GetFactory("BlockNumber")); + else + dropFactory->SetFactory("BlockNumber", + manager.GetFactory("BlockNumber")); + } } - if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: export visualization data", bool, true)) { - RCP aggExport = rcp(new AggregationExportFactory()); - ParameterList aggExportParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output filename", std::string, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: agg style", std::string, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: iter", int, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: time step", int, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: fine graph edges", bool, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: coarse graph edges", bool, aggExportParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: output file: build colormap", bool, aggExportParams); - aggExport->SetParameterList(aggExportParams); - aggExport->SetFactory("DofsPerNode", manager.GetFactory("DofsPerNode")); + dropFactory->SetParameterList(dropParams); + } + manager.SetFactory("Graph", dropFactory); - if (!RAP.is_null()) - RAP->AddTransferFactory(aggExport); +// Aggregation scheme +#ifndef HAVE_MUELU_MATLAB + if (aggType == "matlab") + throw std::runtime_error("Cannot use MATLAB aggregation - MueLu was not " + "configured with MATLAB support."); +#endif + RCP aggFactory; + if (aggType == "uncoupled") { + MUELU_KOKKOS_FACTORY_NO_DECL(aggFactory, UncoupledAggregationFactory, + UncoupledAggregationFactory_kokkos); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: mode", + std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: ordering", std::string, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: min agg size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: max agg size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: max selected neighbors", int, + aggParams); + if (useKokkos_) { + // if not using kokkos refactor Uncoupled, there is no algorithm option + // (always Serial) + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: phase 1 algorithm", + std::string, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: deterministic", bool, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: coloring algorithm", + std::string, aggParams); + } + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "aggregation: enable phase 1", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: enable phase 2a", bool, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: enable phase 2b", bool, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "aggregation: enable phase 3", bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: match ML phase2a", bool, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: phase2a agg factor", double, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: preserve Dirichlet points", + bool, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, + "aggregation: error on nodes with no on-rank neighbors", bool, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: phase3 avoid singletons", bool, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: compute aggregate qualities", + bool, aggParams); + aggFactory->SetParameterList(aggParams); + // make sure that the aggregation factory has all necessary data + aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); + aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); + // aggFactory->SetFactory("UnAmalgamationInfo", + // manager.GetFactory("UnAmalgamationInfo")); + + } else if (aggType == "brick") { + aggFactory = rcp(new BrickAggregationFactory()); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: brick x size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: brick y size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: brick z size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: brick x Dirichlet", bool, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: brick y Dirichlet", bool, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: brick z Dirichlet", bool, + aggParams); + aggFactory->SetParameterList(aggParams); + + // Unlike other factories, BrickAggregationFactory makes the + // Graph/DofsPerNode itself + manager.SetFactory("Graph", aggFactory); + manager.SetFactory("DofsPerNode", aggFactory); + manager.SetFactory("Filtering", aggFactory); + if (levelID > 1) { + // We check for levelID > 0, as in the interpreter aggFactory for + // levelID really corresponds to level 0. Managers are clunky, as they + // contain factories for two different levels + aggFactory->SetFactory( + "Coordinates", + this->GetFactoryManager(levelID - 1)->GetFactory("Coordinates")); + } + } else if (aggType == "classical") { + // Map and coloring + RCP mapFact = rcp(new ClassicalMapFactory()); + ParameterList mapParams; + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "aggregation: deterministic", bool, mapParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: coloring algorithm", + std::string, mapParams); + + ParameterList tempParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: drop scheme", std::string, + tempParams); + std::string drop_algo = + tempParams.get("aggregation: drop scheme"); + if (drop_algo == "block diagonal colored signed classical") { + mapParams.set("aggregation: coloring: use color graph", true); + mapFact->SetFactory("Coloring Graph", + manager.GetFactory("Coloring Graph")); + } + mapFact->SetParameterList(mapParams); + mapFact->SetFactory("Graph", manager.GetFactory("Graph")); + mapFact->SetFactory("UnAmalgamationInfo", + manager.GetFactory("UnAmalgamationInfo")); + + manager.SetFactory("FC Splitting", mapFact); + manager.SetFactory("CoarseMap", mapFact); + + aggFactory = rcp(new ClassicalPFactory()); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: classical scheme", std::string, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: drop scheme", std::string, + aggParams); + aggFactory->SetParameterList(aggParams); + aggFactory->SetFactory("FC Splitting", manager.GetFactory("FC Splitting")); + aggFactory->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); + aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); + + if (drop_algo.find("block diagonal") != std::string::npos || + drop_algo == "signed classical") { + if (levelID > 0) + aggFactory->SetFactory( + "BlockNumber", + this->GetFactoryManager(levelID - 1)->GetFactory("BlockNumber")); else - RAPs->AddTransferFactory(aggExport); + aggFactory->SetFactory("BlockNumber", + manager.GetFactory("BlockNumber")); } - if (!RAP.is_null()) - manager.SetFactory("A", RAP); - else - manager.SetFactory("A", RAPs); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, useFiltering); - bool filteringChangesMatrix = useFiltering && !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, 0); + // Now we short-circuit, because we neither need nor want TentativePFactory + // here + manager.SetFactory("Ptent", aggFactory); + manager.SetFactory("P Graph", aggFactory); - if (reuseType == "RP" || (reuseType == "tP" && !filteringChangesMatrix)) { - if (!RAP.is_null()) { - keeps.push_back(keep_pair("AP reuse data", RAP.get())); - keeps.push_back(keep_pair("RAP reuse data", RAP.get())); - - } else { - keeps.push_back(keep_pair("AP reuse data", RAPs.get())); - keeps.push_back(keep_pair("RAP reuse data", RAPs.get())); - } + if (reuseType == "tP" && levelID) { + // keeps.push_back(keep_pair("Nullspace", Ptent.get())); + keeps.push_back(keep_pair("Ptent", aggFactory.get())); } + return; + } else if (aggType == "notay") { + aggFactory = rcp(new NotayAggregationFactory()); + ParameterList aggParams; + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "aggregation: pairwise: size", int, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: pairwise: tie threshold", + double, aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: Dirichlet threshold", double, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: ordering", std::string, + aggParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: compute aggregate qualities", + bool, aggParams); + aggFactory->SetParameterList(aggParams); + aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); + aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); } +#ifdef HAVE_MUELU_MATLAB + else if (aggType == "matlab") { + ParameterList aggParams = paramList.sublist("aggregation: params"); + aggFactory = rcp(new SingleLevelMatlabFactory()); + aggFactory->SetParameterList(aggParams); + } +#endif - // ===================================================================================================== - // ======================================= Coordinates ================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Coordinates(ParameterList& paramList, const ParameterList& /* defaultList */, - FactoryManager& manager, int /* levelID */, std::vector& /* keeps */) const - { - bool have_userCO = false; - if (paramList.isParameter("Coordinates") && !paramList.get >("Coordinates").is_null()) - have_userCO = true; - - if (useCoordinates_) { - if (have_userCO) { - manager.SetFactory("Coordinates", NoFactory::getRCP()); + manager.SetFactory("Aggregates", aggFactory); + + // Coarse map + RCP coarseMap = rcp(new CoarseMapFactory()); + coarseMap->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + manager.SetFactory("CoarseMap", coarseMap); + + // Aggregate qualities + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, + "aggregation: compute aggregate qualities", bool, + true)) { + RCP aggQualityFact = rcp(new AggregateQualityEstimateFactory()); + ParameterList aggQualityParams; + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "aggregate qualities: good aggregate threshold", + double, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregate qualities: file output", bool, + aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregate qualities: file base", + std::string, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregate qualities: check symmetry", bool, + aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregate qualities: algorithm", + std::string, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregate qualities: zero threshold", + double, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregate qualities: percentiles", + Teuchos::Array, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregate qualities: mode", std::string, + aggQualityParams); + aggQualityFact->SetParameterList(aggQualityParams); + manager.SetFactory("AggregateQualities", aggQualityFact); + + assert(aggType == "uncoupled"); + aggFactory->SetFactory("AggregateQualities", aggQualityFact); + } - } else { - RCP coords = rcp(new CoordinatesTransferFactory()); - coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - manager.SetFactory("Coordinates", coords); - - auto RAP = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - if (!RAP.is_null()) { - RAP->AddTransferFactory(manager.GetFactory("Coordinates")); - } else { - auto RAPs = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - RAPs->AddTransferFactory(manager.GetFactory("Coordinates")); - } - } - } + // Tentative P + MUELU_KOKKOS_FACTORY(Ptent, TentativePFactory, TentativePFactory_kokkos); + ParameterList ptentParams; + if (paramList.isSublist("matrixmatrix: kernel params")) + ptentParams.sublist("matrixmatrix: kernel params", false) = + paramList.sublist("matrixmatrix: kernel params"); + if (defaultList.isSublist("matrixmatrix: kernel params")) + ptentParams.sublist("matrixmatrix: kernel params", false) = + defaultList.sublist("matrixmatrix: kernel params"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "tentative: calculate qr", bool, ptentParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "tentative: build coarse coordinates", bool, + ptentParams); + Ptent->SetParameterList(ptentParams); + Ptent->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + Ptent->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + manager.SetFactory("Ptent", Ptent); + + if (reuseType == "tP" && levelID) { + keeps.push_back(keep_pair("Nullspace", Ptent.get())); + keeps.push_back(keep_pair("P", Ptent.get())); + } +} + +// ===================================================================================================== +// ============================================ RAP +// ==================================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_RAP(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const { + if (paramList.isParameter("A") && + !paramList.get>("A").is_null()) { + // We have user matrix A + manager.SetFactory("A", NoFactory::getRCP()); + return; } - // ===================================================================================================== - // ================================= LocalOrdinalTransfer ============================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_LocalOrdinalTransfer(const std::string & VarName, const std::string &multigridAlgo,ParameterList& paramList, const ParameterList& /* defaultList */, - FactoryManager& manager, int levelID, std::vector& /* keeps */) const - { - // NOTE: You would think this would be levelID > 0, but you'd be wrong, since the FactoryManager is basically - // offset by a level from the things which actually do the work. - if (useBlockNumber_ && (levelID > 0)) { - auto RAP = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - auto RAPs = rcp_const_cast(rcp_dynamic_cast(manager.GetFactory("A"))); - if (!RAP.is_null() || !RAPs.is_null()) { - RCP fact = rcp(new LocalOrdinalTransferFactory(VarName,multigridAlgo)); - if(multigridAlgo == "classical") - fact->SetFactory("P Graph", manager.GetFactory("P Graph")); - else - fact->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - fact->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + ParameterList RAPparams; + + RCP RAP; + RCP RAPs; + // Allow for Galerkin or shifted RAP + // FIXME: Should this not be some form of MUELU_SET_VAR_2LIST? + std::string alg = paramList.get("rap: algorithm", "galerkin"); + if (alg == "shift" || alg == "non-galerkin") { + RAPs = rcp(new RAPShiftFactory()); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift", double, + RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "rap: shift diagonal M", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "rap: shift low storage", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: shift array", + Teuchos::Array, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "rap: cfl array", + Teuchos::Array, RAPparams); + + } else { + RAP = rcp(new RAPFactory()); + } - fact->SetFactory(VarName, this->GetFactoryManager(levelID-1)->GetFactory(VarName)); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "rap: relative diagonal floor", + Teuchos::Array, RAPparams); + + if (paramList.isSublist("matrixmatrix: kernel params")) + RAPparams.sublist("matrixmatrix: kernel params", false) = + paramList.sublist("matrixmatrix: kernel params"); + if (defaultList.isSublist("matrixmatrix: kernel params")) + RAPparams.sublist("matrixmatrix: kernel params", false) = + defaultList.sublist("matrixmatrix: kernel params"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "transpose: use implicit", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "rap: fix zero diagonals", bool, RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "rap: fix zero diagonals threshold", double, + RAPparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "rap: fix zero diagonals replacement", Scalar, + RAPparams); + + // if "rap: triple product" has not been set and algorithm is "unsmoothed" + // switch triple product on + if (!paramList.isParameter("rap: triple product") && + paramList.isType("multigrid algorithm") && + paramList.get("multigrid algorithm") == "unsmoothed") + paramList.set("rap: triple product", true); + else + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "rap: triple product", bool, RAPparams); + + try { + if (paramList.isParameter("aggregation: allow empty prolongator columns")) { + RAPparams.set( + "CheckMainDiagonal", + paramList.get("aggregation: allow empty prolongator columns")); + RAPparams.set( + "RepairMainDiagonal", + paramList.get("aggregation: allow empty prolongator columns")); + } else if (defaultList.isParameter( + "aggregation: allow empty prolongator columns")) { + RAPparams.set("CheckMainDiagonal", + defaultList.get( + "aggregation: allow empty prolongator columns")); + RAPparams.set("RepairMainDiagonal", + defaultList.get( + "aggregation: allow empty prolongator columns")); + } - manager.SetFactory(VarName, fact); + } catch (Teuchos::Exceptions::InvalidParameterType &) { + TEUCHOS_TEST_FOR_EXCEPTION_PURE_MSG( + true, Teuchos::Exceptions::InvalidParameterType, + "Error: parameter \"aggregation: allow empty prolongator columns\" " + "must be of type " + << Teuchos::TypeNameTraits::name()); + } - if (!RAP.is_null()) - RAP->AddTransferFactory(manager.GetFactory(VarName)); - else - RAPs->AddTransferFactory(manager.GetFactory(VarName)); - } - } + if (!RAP.is_null()) { + RAP->SetParameterList(RAPparams); + RAP->SetFactory("P", manager.GetFactory("P")); + } else { + RAPs->SetParameterList(RAPparams); + RAPs->SetFactory("P", manager.GetFactory("P")); } + if (!this->implicitTranspose_) { + if (!RAP.is_null()) + RAP->SetFactory("R", manager.GetFactory("R")); + else + RAPs->SetFactory("R", manager.GetFactory("R")); + } - // ====================================================================================================== - // ====================================== BlockNumber ================================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_BlockNumber(ParameterList& paramList, const ParameterList& defaultList, - FactoryManager& manager, int levelID , std::vector& keeps) const - { - if(useBlockNumber_) { - ParameterList myParams; - RCP fact = rcp(new InitialBlockNumberFactory()); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: block diagonal: interleaved blocksize", int, myParams); - fact->SetParameterList(myParams); - manager.SetFactory("BlockNumber",fact); - } + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, + "aggregation: export visualization data", bool, + true)) { + RCP aggExport = + rcp(new AggregationExportFactory()); + ParameterList aggExportParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: output filename", std::string, + aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: output file: agg style", + std::string, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: output file: iter", int, + aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: output file: time step", int, + aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: output file: fine graph edges", + bool, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "aggregation: output file: coarse graph edges", + bool, aggExportParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "aggregation: output file: build colormap", + bool, aggExportParams); + aggExport->SetParameterList(aggExportParams); + aggExport->SetFactory("DofsPerNode", manager.GetFactory("DofsPerNode")); + if (!RAP.is_null()) + RAP->AddTransferFactory(aggExport); + else + RAPs->AddTransferFactory(aggExport); } + if (!RAP.is_null()) + manager.SetFactory("A", RAP); + else + manager.SetFactory("A", RAPs); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, + reuseType); + MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, + useFiltering); + bool filteringChangesMatrix = + useFiltering && + !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", + double, 0); + + if (reuseType == "RP" || (reuseType == "tP" && !filteringChangesMatrix)) { + if (!RAP.is_null()) { + keeps.push_back(keep_pair("AP reuse data", RAP.get())); + keeps.push_back(keep_pair("RAP reuse data", RAP.get())); + } else { + keeps.push_back(keep_pair("AP reuse data", RAPs.get())); + keeps.push_back(keep_pair("RAP reuse data", RAPs.get())); + } + } +} + +// ===================================================================================================== +// ======================================= Coordinates +// ================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Coordinates( + ParameterList ¶mList, const ParameterList & /* defaultList */, + FactoryManager &manager, int /* levelID */, + std::vector & /* keeps */) const { + bool have_userCO = false; + if (paramList.isParameter("Coordinates") && + !paramList.get>("Coordinates").is_null()) + have_userCO = true; + + if (useCoordinates_) { + if (have_userCO) { + manager.SetFactory("Coordinates", NoFactory::getRCP()); - // ===================================================================================================== - // =========================================== Restriction ============================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Restriction(ParameterList& paramList, const ParameterList& defaultList , FactoryManager& manager, - int levelID, std::vector& /* keeps */) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", std::string, multigridAlgo); - bool have_userR = false; - if (paramList.isParameter("R") && !paramList.get >("R").is_null()) - have_userR = true; + } else { + RCP coords = rcp(new CoordinatesTransferFactory()); + coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + manager.SetFactory("Coordinates", coords); - // === Restriction === - RCP R; - if (!this->implicitTranspose_) { - MUELU_SET_VAR_2LIST(paramList, defaultList, "problem: symmetric", bool, isSymmetric); - - if (isSymmetric == false && (multigridAlgo == "unsmoothed" || multigridAlgo == "emin")) { - this->GetOStream(Warnings0) << - "Switching \"problem: symmetric\" parameter to symmetric as multigrid algorithm. " << - multigridAlgo << " is primarily supposed to be used for symmetric problems.\n\n" << - "Please note: if you are using \"unsmoothed\" transfer operators the \"problem: symmetric\" parameter " << - "has no real mathematical meaning, i.e. you can use it for non-symmetric\n" << - "problems, too. With \"problem: symmetric\"=\"symmetric\" you can use implicit transpose for building " << - "the restriction operators which may drastically reduce the amount of consumed memory." << std::endl; - isSymmetric = true; + auto RAP = rcp_const_cast( + rcp_dynamic_cast(manager.GetFactory("A"))); + if (!RAP.is_null()) { + RAP->AddTransferFactory(manager.GetFactory("Coordinates")); + } else { + auto RAPs = rcp_const_cast( + rcp_dynamic_cast(manager.GetFactory("A"))); + RAPs->AddTransferFactory(manager.GetFactory("Coordinates")); } - TEUCHOS_TEST_FOR_EXCEPTION(multigridAlgo == "pg" && isSymmetric == true, Exceptions::RuntimeError, - "Petrov-Galerkin smoothed transfer operators are only allowed for non-symmetric problems: Set \"problem: symmetric\" to false!\n" \ - "While PG smoothed transfer operators generally would also work for symmetric problems this is an unusual use case. " \ - "You can use the factory-based xml interface though if you need PG-AMG for symmetric problems."); + } + } +} + +// ===================================================================================================== +// ================================= LocalOrdinalTransfer +// ============================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_LocalOrdinalTransfer( + const std::string &VarName, const std::string &multigridAlgo, + ParameterList ¶mList, const ParameterList & /* defaultList */, + FactoryManager &manager, int levelID, + std::vector & /* keeps */) const { + // NOTE: You would think this would be levelID > 0, but you'd be wrong, since + // the FactoryManager is basically offset by a level from the things which + // actually do the work. + if (useBlockNumber_ && (levelID > 0)) { + auto RAP = rcp_const_cast( + rcp_dynamic_cast(manager.GetFactory("A"))); + auto RAPs = rcp_const_cast( + rcp_dynamic_cast(manager.GetFactory("A"))); + if (!RAP.is_null() || !RAPs.is_null()) { + RCP fact = + rcp(new LocalOrdinalTransferFactory(VarName, multigridAlgo)); + if (multigridAlgo == "classical") + fact->SetFactory("P Graph", manager.GetFactory("P Graph")); + else + fact->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + fact->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - if (have_userR) { - manager.SetFactory("R", NoFactory::getRCP()); - } else { - if (isSymmetric) R = rcp(new TransPFactory()); - else R = rcp(new GenericRFactory()); + fact->SetFactory( + VarName, this->GetFactoryManager(levelID - 1)->GetFactory(VarName)); - R->SetFactory("P", manager.GetFactory("P")); - manager.SetFactory("R", R); - } + manager.SetFactory(VarName, fact); - } else { - manager.SetFactory("R", Teuchos::null); + if (!RAP.is_null()) + RAP->AddTransferFactory(manager.GetFactory(VarName)); + else + RAPs->AddTransferFactory(manager.GetFactory(VarName)); } - - // === Restriction: Nullspace Scaling === - if (paramList.isParameter("restriction: scale nullspace") && paramList.get("restriction: scale nullspace")) { - RCP tentPFactory = rcp(new TentativePFactory()); - Teuchos::ParameterList tentPlist; - tentPlist.set("Nullspace name","Scaled Nullspace"); - tentPFactory->SetParameterList(tentPlist); - tentPFactory->SetFactory("Aggregates",manager.GetFactory("Aggregates")); - tentPFactory->SetFactory("CoarseMap",manager.GetFactory("CoarseMap")); - - if(R.is_null()) R = rcp(new TransPFactory()); - R->SetFactory("P",tentPFactory); + } +} + +// ====================================================================================================== +// ====================================== BlockNumber +// ================================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_BlockNumber(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const { + if (useBlockNumber_) { + ParameterList myParams; + RCP fact = rcp(new InitialBlockNumberFactory()); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, + "aggregation: block diagonal: interleaved blocksize", int, myParams); + fact->SetParameterList(myParams); + manager.SetFactory("BlockNumber", fact); + } +} + +// ===================================================================================================== +// =========================================== Restriction +// ============================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Restriction( + ParameterList ¶mList, const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector & /* keeps */) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "multigrid algorithm", + std::string, multigridAlgo); + bool have_userR = false; + if (paramList.isParameter("R") && !paramList.get>("R").is_null()) + have_userR = true; + + // === Restriction === + RCP R; + if (!this->implicitTranspose_) { + MUELU_SET_VAR_2LIST(paramList, defaultList, "problem: symmetric", bool, + isSymmetric); + + if (isSymmetric == false && + (multigridAlgo == "unsmoothed" || multigridAlgo == "emin")) { + this->GetOStream(Warnings0) + << "Switching \"problem: symmetric\" parameter to symmetric as " + "multigrid algorithm. " + << multigridAlgo + << " is primarily supposed to be used for symmetric problems.\n\n" + << "Please note: if you are using \"unsmoothed\" transfer operators " + "the \"problem: symmetric\" parameter " + << "has no real mathematical meaning, i.e. you can use it for " + "non-symmetric\n" + << "problems, too. With \"problem: symmetric\"=\"symmetric\" you can " + "use implicit transpose for building " + << "the restriction operators which may drastically reduce the " + "amount of consumed memory." + << std::endl; + isSymmetric = true; } + TEUCHOS_TEST_FOR_EXCEPTION( + multigridAlgo == "pg" && isSymmetric == true, Exceptions::RuntimeError, + "Petrov-Galerkin smoothed transfer operators are only allowed for " + "non-symmetric problems: Set \"problem: symmetric\" to false!\n" + "While PG smoothed transfer operators generally would also work for " + "symmetric problems this is an unusual use case. " + "You can use the factory-based xml interface though if you need PG-AMG " + "for symmetric problems."); + + if (have_userR) { + manager.SetFactory("R", NoFactory::getRCP()); + } else { + if (isSymmetric) + R = rcp(new TransPFactory()); + else + R = rcp(new GenericRFactory()); + R->SetFactory("P", manager.GetFactory("P")); + manager.SetFactory("R", R); + } + } else { + manager.SetFactory("R", Teuchos::null); } - // ===================================================================================================== - // ========================================= Repartition =============================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Repartition(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps, RCP & nullSpaceFactory) const - { - // === Repartitioning === - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: enable", bool, enableRepart); - if (enableRepart) { -#if defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || defined(HAVE_MUELU_ZOLTAN2)) // skip to the end, print warning, and turn off repartitioning if we don't have MPI and Zoltan/Zoltan2 - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: use subcommunicators in place", bool, enableInPlace); - // Short summary of the issue: RebalanceTransferFactory shares ownership - // of "P" with SaPFactory, and therefore, changes the stored version. - // That means that if SaPFactory generated P, and stored it on the level, - // then after rebalancing the value in that storage changed. It goes - // against the concept of factories (I think), that every factory is - // responsible for its own objects, and they are immutable outside. - // - // In reuse, this is what happens: as we reuse Importer across setups, - // the order of factories changes, and coupled with shared ownership - // leads to problems. - // *First setup* - // SaP builds P [and stores it] - // TransP builds R [and stores it] - // RAP builds A [and stores it] - // RebalanceTransfer rebalances P [and changes the P stored by SaP] (*) - // RebalanceTransfer rebalances R - // RebalanceAc rebalances A - // *Second setup* ("RP" reuse) - // RebalanceTransfer rebalances P [which is incorrect due to (*)] - // RebalanceTransfer rebalances R - // RAP builds A [which is incorrect due to (*)] - // RebalanceAc rebalances A [which throws due to map inconsistency] - // ... - // *Second setup* ("tP" reuse) - // SaP builds P [and stores it] - // RebalanceTransfer rebalances P [and changes the P stored by SaP] (**) - // TransP builds R [which is incorrect due to (**)] - // RebalanceTransfer rebalances R - // ... - // - // Couple solutions to this: - // 1. [implemented] Requre "tP" and "PR" reuse to only be used with - // implicit rebalancing. - // 2. Do deep copy of P, and changed domain map and importer there. - // Need to investigate how expensive this is. - TEUCHOS_TEST_FOR_EXCEPTION(this->doPRrebalance_ && (reuseType == "tP" || reuseType == "RP"), Exceptions::InvalidArgument, - "Reuse types \"tP\" and \"PR\" require \"repartition: rebalance P and R\" set to \"false\""); - - // TEUCHOS_TEST_FOR_EXCEPTION(aggType == "brick", Exceptions::InvalidArgument, - // "Aggregation type \"brick\" requires \"repartition: enable\" set to \"false\""); - - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: partitioner", std::string, partName); - TEUCHOS_TEST_FOR_EXCEPTION(partName != "zoltan" && partName != "zoltan2", Exceptions::InvalidArgument, - "Invalid partitioner name: \"" << partName << "\". Valid options: \"zoltan\", \"zoltan2\""); - -# ifndef HAVE_MUELU_ZOLTAN - bool switched = false; - if (partName == "zoltan") { - this->GetOStream(Warnings0) << "Zoltan interface is not available, trying to switch to Zoltan2" << std::endl; - partName = "zoltan2"; - switched = true; - } -# else -# ifndef HAVE_MUELU_ZOLTAN2 - bool switched = false; -# endif // HAVE_MUELU_ZOLTAN2 -# endif // HAVE_MUELU_ZOLTAN - -# ifndef HAVE_MUELU_ZOLTAN2 - if (partName == "zoltan2" && !switched) { - this->GetOStream(Warnings0) << "Zoltan2 interface is not available, trying to switch to Zoltan" << std::endl; - partName = "zoltan"; - } -# endif // HAVE_MUELU_ZOLTAN2 - - MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: node repartition level",int,nodeRepartitionLevel); - - // RepartitionHeuristic - auto repartheurFactory = rcp(new RepartitionHeuristicFactory()); - ParameterList repartheurParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: node repartition level", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: start level", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: min rows per proc", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: target rows per proc", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: min rows per thread", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: target rows per thread", int, repartheurParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: max imbalance", double, repartheurParams); - repartheurFactory->SetParameterList(repartheurParams); - repartheurFactory->SetFactory("A", manager.GetFactory("A")); - manager.SetFactory("number of partitions", repartheurFactory); - manager.SetFactory("repartition: heuristic target rows per process", repartheurFactory); - - // Partitioner - RCP partitioner; - if (levelID == nodeRepartitionLevel) { - // partitioner = rcp(new NodePartitionInterface()); - partitioner = rcp(new MueLu::NodePartitionInterface()); - ParameterList partParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: node id" ,int,repartheurParams); - partitioner->SetParameterList(partParams); - partitioner->SetFactory("Node Comm", manager.GetFactory("Node Comm")); - } - else if (partName == "zoltan") { -# ifdef HAVE_MUELU_ZOLTAN - partitioner = rcp(new ZoltanInterface()); - // NOTE: ZoltanInterface ("zoltan") does not support external parameters through ParameterList -# else - throw Exceptions::RuntimeError("Zoltan interface is not available"); -# endif // HAVE_MUELU_ZOLTAN - } else if (partName == "zoltan2") { -# ifdef HAVE_MUELU_ZOLTAN2 - partitioner = rcp(new Zoltan2Interface()); - ParameterList partParams; - RCP partpartParams = rcp(new ParameterList(paramList.sublist("repartition: params", false))); - partParams.set("ParameterList", partpartParams); - partitioner->SetParameterList(partParams); - partitioner->SetFactory("repartition: heuristic target rows per process", - manager.GetFactory("repartition: heuristic target rows per process")); -# else - throw Exceptions::RuntimeError("Zoltan2 interface is not available"); -# endif // HAVE_MUELU_ZOLTAN2 - } - - partitioner->SetFactory("A", manager.GetFactory("A")); - partitioner->SetFactory("number of partitions", manager.GetFactory("number of partitions")); - if (useCoordinates_) - partitioner->SetFactory("Coordinates", manager.GetFactory("Coordinates")); - manager.SetFactory("Partition", partitioner); - - // Repartitioner - auto repartFactory = rcp(new RepartitionFactory()); - ParameterList repartParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: print partition distribution", bool, repartParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap parts", bool, repartParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap num values", int, repartParams); - repartFactory->SetParameterList(repartParams); - repartFactory->SetFactory("A", manager.GetFactory("A")); - repartFactory->SetFactory("number of partitions", manager.GetFactory("number of partitions")); - repartFactory->SetFactory("Partition", manager.GetFactory("Partition")); - manager.SetFactory("Importer", repartFactory); - if (reuseType != "none" && reuseType != "S" && levelID) - keeps.push_back(keep_pair("Importer", manager.GetFactory("Importer").get())); - - - if(enableInPlace) { - // Rebalanced A (in place) - // NOTE: This is for when we want to constrain repartitioning to match some other idea of what's going on. - // The major application is the (1,1) hierarchy in the Maxwell1 preconditioner. - auto newA = rcp(new RebalanceAcFactory()); - ParameterList rebAcParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rebAcParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators in place", bool, rebAcParams); - newA->SetParameterList(rebAcParams); - newA->SetFactory("A", manager.GetFactory("A")); - newA->SetFactory("InPlaceMap", manager.GetFactory("InPlaceMap")); - manager.SetFactory("A",newA); - } - else { - // Rebalanced A - auto newA = rcp(new RebalanceAcFactory()); - ParameterList rebAcParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, rebAcParams); - newA->SetParameterList(rebAcParams); - newA->SetFactory("A", manager.GetFactory("A")); - newA->SetFactory("Importer", manager.GetFactory("Importer")); - manager.SetFactory("A", newA); - - // Rebalanced P - auto newP = rcp(new RebalanceTransferFactory()); - ParameterList newPparams; - newPparams.set("type", "Interpolation"); - if (changedPRrebalance_) - newPparams.set("repartition: rebalance P and R", this->doPRrebalance_); - if (changedPRViaCopyrebalance_) - newPparams.set("repartition: explicit via new copy rebalance P and R",true); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, newPparams); - newP-> SetParameterList(newPparams); - newP-> SetFactory("Importer", manager.GetFactory("Importer")); - newP-> SetFactory("P", manager.GetFactory("P")); - if (!paramList.isParameter("semicoarsen: number of levels")) - newP->SetFactory("Nullspace", manager.GetFactory("Ptent")); - else - newP->SetFactory("Nullspace", manager.GetFactory("P")); // TogglePFactory - if (useCoordinates_) - newP-> SetFactory("Coordinates", manager.GetFactory("Coordinates")); - manager.SetFactory("P", newP); - if (useCoordinates_) - manager.SetFactory("Coordinates", newP); - if (useBlockNumber_ && (levelID > 0)) { - newP->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); - manager.SetFactory("BlockNumber", newP); - } - - // Rebalanced R - auto newR = rcp(new RebalanceTransferFactory()); - ParameterList newRparams; - newRparams.set("type", "Restriction"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: use subcommunicators", bool, newRparams); - if (changedPRrebalance_) - newRparams.set("repartition: rebalance P and R", this->doPRrebalance_); - if (changedPRViaCopyrebalance_) - newPparams.set("repartition: explicit via new copy rebalance P and R",true); - if (changedImplicitTranspose_) - newRparams.set("transpose: use implicit", this->implicitTranspose_); - newR-> SetParameterList(newRparams); - newR-> SetFactory("Importer", manager.GetFactory("Importer")); - if (!this->implicitTranspose_) { - newR->SetFactory("R", manager.GetFactory("R")); - manager.SetFactory("R", newR); - } - - // NOTE: the role of NullspaceFactory is to provide nullspace on the finest - // level if a user does not do that. For all other levels it simply passes - // nullspace from a real factory to whoever needs it. If we don't use - // repartitioning, that factory is "TentativePFactory"; if we do, it is - // "RebalanceTransferFactory". But we still have to have NullspaceFactory as - // the "Nullspace" of the manager - // NOTE: This really needs to be set on the *NullSpaceFactory*, not manager.get("Nullspace"). - ParameterList newNullparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "nullspace: calculate rotations", bool, newNullparams); - nullSpaceFactory->SetFactory("Nullspace", newP); - nullSpaceFactory->SetParameterList(newNullparams); - } + // === Restriction: Nullspace Scaling === + if (paramList.isParameter("restriction: scale nullspace") && + paramList.get("restriction: scale nullspace")) { + RCP tentPFactory = rcp(new TentativePFactory()); + Teuchos::ParameterList tentPlist; + tentPlist.set("Nullspace name", "Scaled Nullspace"); + tentPFactory->SetParameterList(tentPlist); + tentPFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + tentPFactory->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + + if (R.is_null()) + R = rcp(new TransPFactory()); + R->SetFactory("P", tentPFactory); + } +} + +// ===================================================================================================== +// ========================================= Repartition +// =============================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Repartition(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps, + RCP &nullSpaceFactory) const { + // === Repartitioning === + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, + reuseType); + MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: enable", bool, + enableRepart); + if (enableRepart) { +#if defined(HAVE_MPI) && \ + (defined(HAVE_MUELU_ZOLTAN) || \ + defined(HAVE_MUELU_ZOLTAN2)) // skip to the end, print warning, and turn + // off repartitioning if we don't have MPI and + // Zoltan/Zoltan2 + MUELU_SET_VAR_2LIST(paramList, defaultList, + "repartition: use subcommunicators in place", bool, + enableInPlace); + // Short summary of the issue: RebalanceTransferFactory shares ownership + // of "P" with SaPFactory, and therefore, changes the stored version. + // That means that if SaPFactory generated P, and stored it on the level, + // then after rebalancing the value in that storage changed. It goes + // against the concept of factories (I think), that every factory is + // responsible for its own objects, and they are immutable outside. + // + // In reuse, this is what happens: as we reuse Importer across setups, + // the order of factories changes, and coupled with shared ownership + // leads to problems. + // *First setup* + // SaP builds P [and stores it] + // TransP builds R [and stores it] + // RAP builds A [and stores it] + // RebalanceTransfer rebalances P [and changes the P stored by SaP] (*) + // RebalanceTransfer rebalances R + // RebalanceAc rebalances A + // *Second setup* ("RP" reuse) + // RebalanceTransfer rebalances P [which is incorrect due to (*)] + // RebalanceTransfer rebalances R + // RAP builds A [which is incorrect due to (*)] + // RebalanceAc rebalances A [which throws due to map inconsistency] + // ... + // *Second setup* ("tP" reuse) + // SaP builds P [and stores it] + // RebalanceTransfer rebalances P [and changes the P stored by SaP] (**) + // TransP builds R [which is incorrect due to (**)] + // RebalanceTransfer rebalances R + // ... + // + // Couple solutions to this: + // 1. [implemented] Requre "tP" and "PR" reuse to only be used with + // implicit rebalancing. + // 2. Do deep copy of P, and changed domain map and importer there. + // Need to investigate how expensive this is. + TEUCHOS_TEST_FOR_EXCEPTION( + this->doPRrebalance_ && (reuseType == "tP" || reuseType == "RP"), + Exceptions::InvalidArgument, + "Reuse types \"tP\" and \"PR\" require \"repartition: rebalance P and " + "R\" set to \"false\""); + + // TEUCHOS_TEST_FOR_EXCEPTION(aggType == "brick", + // Exceptions::InvalidArgument, + // "Aggregation type \"brick\" requires + // \"repartition: enable\" set to \"false\""); + + MUELU_SET_VAR_2LIST(paramList, defaultList, "repartition: partitioner", + std::string, partName); + TEUCHOS_TEST_FOR_EXCEPTION( + partName != "zoltan" && partName != "zoltan2", + Exceptions::InvalidArgument, + "Invalid partitioner name: \"" + << partName << "\". Valid options: \"zoltan\", \"zoltan2\""); + +#ifndef HAVE_MUELU_ZOLTAN + bool switched = false; + if (partName == "zoltan") { + this->GetOStream(Warnings0) + << "Zoltan interface is not available, trying to switch to Zoltan2" + << std::endl; + partName = "zoltan2"; + switched = true; + } #else - paramList.set("repartition: enable",false); -# ifndef HAVE_MPI - this->GetOStream(Warnings0) << "No repartitioning available for a serial run\n"; -# else - this->GetOStream(Warnings0) << "Zoltan/Zoltan2 are unavailable for repartitioning\n"; -# endif // HAVE_MPI -#endif // defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || defined(HAVE_MUELU_ZOLTAN2)) +#ifndef HAVE_MUELU_ZOLTAN2 + bool switched = false; +#endif // HAVE_MUELU_ZOLTAN2 +#endif // HAVE_MUELU_ZOLTAN + +#ifndef HAVE_MUELU_ZOLTAN2 + if (partName == "zoltan2" && !switched) { + this->GetOStream(Warnings0) + << "Zoltan2 interface is not available, trying to switch to Zoltan" + << std::endl; + partName = "zoltan"; + } +#endif // HAVE_MUELU_ZOLTAN2 + + MUELU_SET_VAR_2LIST(paramList, defaultList, + "repartition: node repartition level", int, + nodeRepartitionLevel); + + // RepartitionHeuristic + auto repartheurFactory = rcp(new RepartitionHeuristicFactory()); + ParameterList repartheurParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: node repartition level", int, + repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: start level", int, + repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: min rows per proc", int, + repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: target rows per proc", int, + repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: min rows per thread", int, + repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: target rows per thread", int, + repartheurParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: max imbalance", double, + repartheurParams); + repartheurFactory->SetParameterList(repartheurParams); + repartheurFactory->SetFactory("A", manager.GetFactory("A")); + manager.SetFactory("number of partitions", repartheurFactory); + manager.SetFactory("repartition: heuristic target rows per process", + repartheurFactory); + + // Partitioner + RCP partitioner; + if (levelID == nodeRepartitionLevel) { + // partitioner = rcp(new NodePartitionInterface()); + partitioner = rcp(new MueLu::NodePartitionInterface()); + ParameterList partParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: node id", int, + repartheurParams); + partitioner->SetParameterList(partParams); + partitioner->SetFactory("Node Comm", manager.GetFactory("Node Comm")); + } else if (partName == "zoltan") { +#ifdef HAVE_MUELU_ZOLTAN + partitioner = rcp(new ZoltanInterface()); + // NOTE: ZoltanInterface ("zoltan") does not support external parameters + // through ParameterList +#else + throw Exceptions::RuntimeError("Zoltan interface is not available"); +#endif // HAVE_MUELU_ZOLTAN + } else if (partName == "zoltan2") { +#ifdef HAVE_MUELU_ZOLTAN2 + partitioner = rcp(new Zoltan2Interface()); + ParameterList partParams; + RCP partpartParams = rcp( + new ParameterList(paramList.sublist("repartition: params", false))); + partParams.set("ParameterList", partpartParams); + partitioner->SetParameterList(partParams); + partitioner->SetFactory( + "repartition: heuristic target rows per process", + manager.GetFactory("repartition: heuristic target rows per process")); +#else + throw Exceptions::RuntimeError("Zoltan2 interface is not available"); +#endif // HAVE_MUELU_ZOLTAN2 } - } - - // ===================================================================================================== - // ========================================= Low precision transfers =================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_LowPrecision(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "transfers: half precision", bool, enableLowPrecision); - if (enableLowPrecision) { - // Low precision P - auto newP = rcp(new LowPrecisionFactory()); + partitioner->SetFactory("A", manager.GetFactory("A")); + partitioner->SetFactory("number of partitions", + manager.GetFactory("number of partitions")); + if (useCoordinates_) + partitioner->SetFactory("Coordinates", manager.GetFactory("Coordinates")); + manager.SetFactory("Partition", partitioner); + + // Repartitioner + auto repartFactory = rcp(new RepartitionFactory()); + ParameterList repartParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: print partition distribution", + bool, repartParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "repartition: remap parts", bool, repartParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: remap num values", int, + repartParams); + repartFactory->SetParameterList(repartParams); + repartFactory->SetFactory("A", manager.GetFactory("A")); + repartFactory->SetFactory("number of partitions", + manager.GetFactory("number of partitions")); + repartFactory->SetFactory("Partition", manager.GetFactory("Partition")); + manager.SetFactory("Importer", repartFactory); + if (reuseType != "none" && reuseType != "S" && levelID) + keeps.push_back( + keep_pair("Importer", manager.GetFactory("Importer").get())); + + if (enableInPlace) { + // Rebalanced A (in place) + // NOTE: This is for when we want to constrain repartitioning to match + // some other idea of what's going on. The major application is the (1,1) + // hierarchy in the Maxwell1 preconditioner. + auto newA = rcp(new RebalanceAcFactory()); + ParameterList rebAcParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: use subcommunicators", bool, + rebAcParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "repartition: use subcommunicators in place", + bool, rebAcParams); + newA->SetParameterList(rebAcParams); + newA->SetFactory("A", manager.GetFactory("A")); + newA->SetFactory("InPlaceMap", manager.GetFactory("InPlaceMap")); + manager.SetFactory("A", newA); + } else { + // Rebalanced A + auto newA = rcp(new RebalanceAcFactory()); + ParameterList rebAcParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: use subcommunicators", bool, + rebAcParams); + newA->SetParameterList(rebAcParams); + newA->SetFactory("A", manager.GetFactory("A")); + newA->SetFactory("Importer", manager.GetFactory("Importer")); + manager.SetFactory("A", newA); + + // Rebalanced P + auto newP = rcp(new RebalanceTransferFactory()); ParameterList newPparams; - newPparams.set("matrix key", "P"); - newP-> SetParameterList(newPparams); - newP-> SetFactory("P", manager.GetFactory("P")); + newPparams.set("type", "Interpolation"); + if (changedPRrebalance_) + newPparams.set("repartition: rebalance P and R", this->doPRrebalance_); + if (changedPRViaCopyrebalance_) + newPparams.set("repartition: explicit via new copy rebalance P and R", + true); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: use subcommunicators", bool, + newPparams); + newP->SetParameterList(newPparams); + newP->SetFactory("Importer", manager.GetFactory("Importer")); + newP->SetFactory("P", manager.GetFactory("P")); + if (!paramList.isParameter("semicoarsen: number of levels")) + newP->SetFactory("Nullspace", manager.GetFactory("Ptent")); + else + newP->SetFactory("Nullspace", + manager.GetFactory("P")); // TogglePFactory + if (useCoordinates_) + newP->SetFactory("Coordinates", manager.GetFactory("Coordinates")); manager.SetFactory("P", newP); + if (useCoordinates_) + manager.SetFactory("Coordinates", newP); + if (useBlockNumber_ && (levelID > 0)) { + newP->SetFactory("BlockNumber", manager.GetFactory("BlockNumber")); + manager.SetFactory("BlockNumber", newP); + } + // Rebalanced R + auto newR = rcp(new RebalanceTransferFactory()); + ParameterList newRparams; + newRparams.set("type", "Restriction"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "repartition: use subcommunicators", bool, + newRparams); + if (changedPRrebalance_) + newRparams.set("repartition: rebalance P and R", this->doPRrebalance_); + if (changedPRViaCopyrebalance_) + newPparams.set("repartition: explicit via new copy rebalance P and R", + true); + if (changedImplicitTranspose_) + newRparams.set("transpose: use implicit", this->implicitTranspose_); + newR->SetParameterList(newRparams); + newR->SetFactory("Importer", manager.GetFactory("Importer")); if (!this->implicitTranspose_) { - // Low precision R - auto newR = rcp(new LowPrecisionFactory()); - ParameterList newRparams; - newRparams.set("matrix key", "R"); - newR-> SetParameterList(newRparams); - newR-> SetFactory("R", manager.GetFactory("R")); + newR->SetFactory("R", manager.GetFactory("R")); manager.SetFactory("R", newR); } - } - } - - // ===================================================================================================== - // =========================================== Nullspace =============================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Nullspace(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */, RCP & nullSpaceFactory) const - { - // Nullspace - MUELU_KOKKOS_FACTORY(nullSpace, NullspaceFactory, NullspaceFactory_kokkos); - bool have_userNS = false; - if (paramList.isParameter("Nullspace") && !paramList.get >("Nullspace").is_null()) - have_userNS = true; - - if (!have_userNS) { + // NOTE: the role of NullspaceFactory is to provide nullspace on the + // finest level if a user does not do that. For all other levels it simply + // passes nullspace from a real factory to whoever needs it. If we don't + // use repartitioning, that factory is "TentativePFactory"; if we do, it + // is "RebalanceTransferFactory". But we still have to have + // NullspaceFactory as the "Nullspace" of the manager NOTE: This really + // needs to be set on the *NullSpaceFactory*, not + // manager.get("Nullspace"). ParameterList newNullparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "nullspace: calculate rotations", bool, newNullparams); - nullSpace->SetParameterList(newNullparams); - nullSpace->SetFactory("Nullspace", manager.GetFactory("Ptent")); - manager.SetFactory("Nullspace", nullSpace); - } - nullSpaceFactory = nullSpace; - - if (paramList.isParameter("restriction: scale nullspace") && paramList.get("restriction: scale nullspace")) { - RCP scaledNSfactory = rcp(new ScaledNullspaceFactory()); - scaledNSfactory->SetFactory("Nullspace",nullSpaceFactory); - manager.SetFactory("Scaled Nullspace",scaledNSfactory); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "nullspace: calculate rotations", bool, + newNullparams); + nullSpaceFactory->SetFactory("Nullspace", newP); + nullSpaceFactory->SetParameterList(newNullparams); } - +#else + paramList.set("repartition: enable", false); +#ifndef HAVE_MPI + this->GetOStream(Warnings0) + << "No repartitioning available for a serial run\n"; +#else + this->GetOStream(Warnings0) + << "Zoltan/Zoltan2 are unavailable for repartitioning\n"; +#endif // HAVE_MPI +#endif // defined(HAVE_MPI) && (defined(HAVE_MUELU_ZOLTAN) || + // defined(HAVE_MUELU_ZOLTAN2)) } +} + +// ===================================================================================================== +// ========================================= Low precision transfers +// =================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_LowPrecision(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "transfers: half precision", bool, + enableLowPrecision); + + if (enableLowPrecision) { + // Low precision P + auto newP = rcp(new LowPrecisionFactory()); + ParameterList newPparams; + newPparams.set("matrix key", "P"); + newP->SetParameterList(newPparams); + newP->SetFactory("P", manager.GetFactory("P")); + manager.SetFactory("P", newP); - // ===================================================================================================== - // ================================= Algorithm: SemiCoarsening ========================================= - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_SemiCoarsen(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const - { - // === Semi-coarsening === - RCP semicoarsenFactory = Teuchos::null; - if (paramList.isParameter("semicoarsen: number of levels") && - paramList.get("semicoarsen: number of levels") > 0) { - - ParameterList togglePParams; - ParameterList semicoarsenPParams; - ParameterList linedetectionParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: number of levels", int, togglePParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: coarsen rate", int, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: piecewise constant", bool, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: piecewise linear", bool, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "semicoarsen: calculate nonsym restriction", bool, semicoarsenPParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "linedetection: orientation", std::string, linedetectionParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "linedetection: num layers", int, linedetectionParams); - - MUELU_KOKKOS_FACTORY_NO_DECL(semicoarsenFactory, SemiCoarsenPFactory, SemiCoarsenPFactory_kokkos); - RCP linedetectionFactory = rcp(new LineDetectionFactory()); - RCP togglePFactory = rcp(new TogglePFactory()); - - linedetectionFactory->SetParameterList(linedetectionParams); - semicoarsenFactory ->SetParameterList(semicoarsenPParams); - togglePFactory ->SetParameterList(togglePParams); - - togglePFactory->AddCoarseNullspaceFactory (semicoarsenFactory); - togglePFactory->AddProlongatorFactory (semicoarsenFactory); - togglePFactory->AddPtentFactory (semicoarsenFactory); - togglePFactory->AddCoarseNullspaceFactory (manager.GetFactory("Ptent")); - togglePFactory->AddProlongatorFactory (manager.GetFactory("P")); - togglePFactory->AddPtentFactory (manager.GetFactory("Ptent")); - - manager.SetFactory("CoarseNumZLayers", linedetectionFactory); - manager.SetFactory("LineDetection_Layers", linedetectionFactory); - manager.SetFactory("LineDetection_VertLineIds", linedetectionFactory); - - manager.SetFactory("P", togglePFactory); - manager.SetFactory("Ptent", togglePFactory); - manager.SetFactory("Nullspace", togglePFactory); - } - - if (paramList.isParameter("semicoarsen: number of levels")) { - auto tf = rcp(new ToggleCoordinatesTransferFactory()); - tf->SetFactory("Chosen P", manager.GetFactory("P")); - tf->AddCoordTransferFactory(semicoarsenFactory); - - RCP coords = rcp(new CoordinatesTransferFactory()); - coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); - tf->AddCoordTransferFactory(coords); - manager.SetFactory("Coordinates", tf); + if (!this->implicitTranspose_) { + // Low precision R + auto newR = rcp(new LowPrecisionFactory()); + ParameterList newRparams; + newRparams.set("matrix key", "R"); + newR->SetParameterList(newRparams); + newR->SetFactory("R", manager.GetFactory("R")); + manager.SetFactory("R", newR); } } +} + +// ===================================================================================================== +// =========================================== Nullspace +// =============================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Nullspace(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int /* levelID */, + std::vector & /* keeps */, + RCP &nullSpaceFactory) const { + // Nullspace + MUELU_KOKKOS_FACTORY(nullSpace, NullspaceFactory, NullspaceFactory_kokkos); + + bool have_userNS = false; + if (paramList.isParameter("Nullspace") && + !paramList.get>("Nullspace").is_null()) + have_userNS = true; + + if (!have_userNS) { + ParameterList newNullparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "nullspace: calculate rotations", bool, + newNullparams); + nullSpace->SetParameterList(newNullparams); + nullSpace->SetFactory("Nullspace", manager.GetFactory("Ptent")); + manager.SetFactory("Nullspace", nullSpace); + } + nullSpaceFactory = nullSpace; + + if (paramList.isParameter("restriction: scale nullspace") && + paramList.get("restriction: scale nullspace")) { + RCP scaledNSfactory = + rcp(new ScaledNullspaceFactory()); + scaledNSfactory->SetFactory("Nullspace", nullSpaceFactory); + manager.SetFactory("Scaled Nullspace", scaledNSfactory); + } +} + +// ===================================================================================================== +// ================================= Algorithm: SemiCoarsening +// ========================================= +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_SemiCoarsen( + ParameterList ¶mList, const ParameterList &defaultList, + FactoryManager &manager, int /* levelID */, + std::vector & /* keeps */) const { + // === Semi-coarsening === + RCP semicoarsenFactory = Teuchos::null; + if (paramList.isParameter("semicoarsen: number of levels") && + paramList.get("semicoarsen: number of levels") > 0) { + + ParameterList togglePParams; + ParameterList semicoarsenPParams; + ParameterList linedetectionParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "semicoarsen: number of levels", int, + togglePParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "semicoarsen: coarsen rate", int, + semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "semicoarsen: piecewise constant", bool, + semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "semicoarsen: piecewise linear", bool, + semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "semicoarsen: calculate nonsym restriction", + bool, semicoarsenPParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "linedetection: orientation", std::string, + linedetectionParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "linedetection: num layers", int, + linedetectionParams); + + MUELU_KOKKOS_FACTORY_NO_DECL(semicoarsenFactory, SemiCoarsenPFactory, + SemiCoarsenPFactory_kokkos); + RCP linedetectionFactory = + rcp(new LineDetectionFactory()); + RCP togglePFactory = rcp(new TogglePFactory()); + + linedetectionFactory->SetParameterList(linedetectionParams); + semicoarsenFactory->SetParameterList(semicoarsenPParams); + togglePFactory->SetParameterList(togglePParams); + + togglePFactory->AddCoarseNullspaceFactory(semicoarsenFactory); + togglePFactory->AddProlongatorFactory(semicoarsenFactory); + togglePFactory->AddPtentFactory(semicoarsenFactory); + togglePFactory->AddCoarseNullspaceFactory(manager.GetFactory("Ptent")); + togglePFactory->AddProlongatorFactory(manager.GetFactory("P")); + togglePFactory->AddPtentFactory(manager.GetFactory("Ptent")); + + manager.SetFactory("CoarseNumZLayers", linedetectionFactory); + manager.SetFactory("LineDetection_Layers", linedetectionFactory); + manager.SetFactory("LineDetection_VertLineIds", linedetectionFactory); + + manager.SetFactory("P", togglePFactory); + manager.SetFactory("Ptent", togglePFactory); + manager.SetFactory("Nullspace", togglePFactory); + } + if (paramList.isParameter("semicoarsen: number of levels")) { + auto tf = rcp(new ToggleCoordinatesTransferFactory()); + tf->SetFactory("Chosen P", manager.GetFactory("P")); + tf->AddCoordTransferFactory(semicoarsenFactory); - // ===================================================================================================== - // ================================== Algorithm: P-Coarsening ========================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_PCoarsen(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int levelID, std::vector& keeps) const - { + RCP coords = rcp(new CoordinatesTransferFactory()); + coords->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + coords->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + tf->AddCoordTransferFactory(coords); + manager.SetFactory("Coordinates", tf); + } +} + +// ===================================================================================================== +// ================================== Algorithm: P-Coarsening +// ========================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_PCoarsen(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int levelID, + std::vector &keeps) const { #ifdef HAVE_MUELU_INTREPID2 - // This only makes sense to invoke from the default list. - if (defaultList.isParameter("pcoarsen: schedule") && defaultList.isParameter("pcoarsen: element")) { - // P-Coarsening by schedule (new interface) - // NOTE: levelID represents the *coarse* level in this case - auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter(defaultList,"pcoarsen: schedule"); - auto pcoarsen_element = defaultList.get("pcoarsen: element"); - - if (levelID >= (int)pcoarsen_schedule.size()) { - // Past the p-coarsening levels, we do Smoothed Aggregation - // NOTE: We should probably consider allowing other options past p-coarsening - UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); - - } else { - // P-Coarsening - ParameterList Pparams; - auto P = rcp(new IntrepidPCoarsenFactory()); - std::string lo = pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); - std::string hi = (levelID ? pcoarsen_element + std::to_string(pcoarsen_schedule[levelID-1]) : lo); - Pparams.set("pcoarsen: hi basis", hi); - Pparams.set("pcoarsen: lo basis", lo); - P->SetParameterList(Pparams); - manager.SetFactory("P", P); - - // Add special nullspace handling - rcp_dynamic_cast(manager.GetFactoryNonConst("Nullspace"))->SetFactory("Nullspace", manager.GetFactory("P")); - } + // This only makes sense to invoke from the default list. + if (defaultList.isParameter("pcoarsen: schedule") && + defaultList.isParameter("pcoarsen: element")) { + // P-Coarsening by schedule (new interface) + // NOTE: levelID represents the *coarse* level in this case + auto pcoarsen_schedule = Teuchos::getArrayFromStringParameter( + defaultList, "pcoarsen: schedule"); + auto pcoarsen_element = defaultList.get("pcoarsen: element"); + + if (levelID >= (int)pcoarsen_schedule.size()) { + // Past the p-coarsening levels, we do Smoothed Aggregation + // NOTE: We should probably consider allowing other options past + // p-coarsening + UpdateFactoryManager_SA(paramList, defaultList, manager, levelID, keeps); } else { - // P-Coarsening by manual specification (old interface) + // P-Coarsening ParameterList Pparams; auto P = rcp(new IntrepidPCoarsenFactory()); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: hi basis", std::string, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: lo basis", std::string, Pparams); + std::string lo = + pcoarsen_element + std::to_string(pcoarsen_schedule[levelID]); + std::string hi = + (levelID ? pcoarsen_element + + std::to_string(pcoarsen_schedule[levelID - 1]) + : lo); + Pparams.set("pcoarsen: hi basis", hi); + Pparams.set("pcoarsen: lo basis", lo); P->SetParameterList(Pparams); manager.SetFactory("P", P); // Add special nullspace handling - rcp_dynamic_cast(manager.GetFactoryNonConst("Nullspace"))->SetFactory("Nullspace", manager.GetFactory("P")); + rcp_dynamic_cast(manager.GetFactoryNonConst("Nullspace")) + ->SetFactory("Nullspace", manager.GetFactory("P")); } -#endif - } - - // ===================================================================================================== - // ============================== Algorithm: Smoothed Aggregation ====================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_SA(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const { - // Smoothed aggregation - MUELU_KOKKOS_FACTORY(P, SaPFactory, SaPFactory_kokkos); + } else { + // P-Coarsening by manual specification (old interface) ParameterList Pparams; - if (paramList.isSublist("matrixmatrix: kernel params")) - Pparams.sublist("matrixmatrix: kernel params", false) = paramList.sublist("matrixmatrix: kernel params"); - if (defaultList.isSublist("matrixmatrix: kernel params")) - Pparams.sublist("matrixmatrix: kernel params", false) = defaultList.sublist("matrixmatrix: kernel params"); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: damping factor", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: calculate eigenvalue estimate", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: max eigenvalue", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: eigenvalue estimate num iterations", int, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: use rowsumabs diagonal scaling", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs diagonal replacement tolerance", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs diagonal replacement value", double, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: rowsumabs use automatic diagonal tolerance", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: enforce constraints", bool, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "tentative: calculate qr", bool, Pparams); - + auto P = rcp(new IntrepidPCoarsenFactory()); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: hi basis", + std::string, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "pcoarsen: lo basis", + std::string, Pparams); P->SetParameterList(Pparams); - - - // Filtering - MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, useFiltering); - if (useFiltering) { - // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the - // dependency tree is setup. The Kokkos version has merged the the - // FilteredAFactory into the CoalesceDropFactory. - if (!useKokkos_) { - RCP filterFactory = rcp(new FilteredAFactory()); - - ParameterList fParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use root stencil", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: Dirichlet threshold", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use spread lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom growth factor", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom cap", double, fParams); - filterFactory->SetParameterList(fParams); - filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); - filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - filterFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - // I'm not sure why we need this line. See comments for DofsPerNode for UncoupledAggregation above - filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); - - P->SetFactory("A", filterFactory); - - } else { - P->SetFactory("A", manager.GetFactory("Graph")); - } - } - - P->SetFactory("P", manager.GetFactory("Ptent")); manager.SetFactory("P", P); - bool filteringChangesMatrix = useFiltering && !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", double, 0); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - if (reuseType == "tP" && !filteringChangesMatrix) - keeps.push_back(keep_pair("AP reuse data", P.get())); + // Add special nullspace handling + rcp_dynamic_cast(manager.GetFactoryNonConst("Nullspace")) + ->SetFactory("Nullspace", manager.GetFactory("P")); } - // ===================================================================================================== - // =============================== Algorithm: Energy Minimization ====================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Emin(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const - { - MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: pattern", std::string, patternType); - MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, reuseType); - TEUCHOS_TEST_FOR_EXCEPTION(patternType != "AkPtent", Exceptions::InvalidArgument, - "Invalid pattern name: \"" << patternType << "\". Valid options: \"AkPtent\""); - // Pattern - auto patternFactory = rcp(new PatternFactory()); - ParameterList patternParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: pattern order", int, patternParams); - patternFactory->SetParameterList(patternParams); - patternFactory->SetFactory("P", manager.GetFactory("Ptent")); - manager.SetFactory("Ppattern", patternFactory); - - // Constraint - auto constraintFactory = rcp(new ConstraintFactory()); - constraintFactory->SetFactory("Ppattern", manager.GetFactory("Ppattern")); - constraintFactory->SetFactory("CoarseNullspace", manager.GetFactory("Ptent")); - manager.SetFactory("Constraint", constraintFactory); - - // Emin Factory - auto P = rcp(new EminPFactory()); - // Filtering - MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: use filtered matrix", bool, useFiltering); - if(useFiltering) { - // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the - // dependency tree is setup. The Kokkos version has merged the the - // FilteredAFactory into the CoalesceDropFactory. - if (!useKokkos_) { - RCP filterFactory = rcp(new FilteredAFactory()); - - ParameterList fParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse graph", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: reuse eigenvalue", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use root stencil", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: Dirichlet threshold", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: use spread lumping", bool, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom growth factor", double, fParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "filtered matrix: spread lumping diag dom cap", double, fParams); - filterFactory->SetParameterList(fParams); - filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); - filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); - filterFactory->SetFactory("UnAmalgamationInfo", manager.GetFactory("UnAmalgamationInfo")); - // I'm not sure why we need this line. See comments for DofsPerNode for UncoupledAggregation above - filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); - - P->SetFactory("A", filterFactory); - - } else { - P->SetFactory("A", manager.GetFactory("Graph")); - } - } +#endif +} + +// ===================================================================================================== +// ============================== Algorithm: Smoothed Aggregation +// ====================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_SA(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int /* levelID */, + std::vector &keeps) const { + // Smoothed aggregation + MUELU_KOKKOS_FACTORY(P, SaPFactory, SaPFactory_kokkos); + ParameterList Pparams; + if (paramList.isSublist("matrixmatrix: kernel params")) + Pparams.sublist("matrixmatrix: kernel params", false) = + paramList.sublist("matrixmatrix: kernel params"); + if (defaultList.isSublist("matrixmatrix: kernel params")) + Pparams.sublist("matrixmatrix: kernel params", false) = + defaultList.sublist("matrixmatrix: kernel params"); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: damping factor", + double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "sa: calculate eigenvalue estimate", bool, + Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "sa: max eigenvalue", + double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "sa: eigenvalue estimate num iterations", int, + Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "sa: use rowsumabs diagonal scaling", bool, + Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "sa: rowsumabs diagonal replacement tolerance", + double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "sa: rowsumabs diagonal replacement value", + double, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "sa: rowsumabs use automatic diagonal tolerance", + bool, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "sa: enforce constraints", bool, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "tentative: calculate qr", bool, Pparams); + + P->SetParameterList(Pparams); + + // Filtering + MUELU_SET_VAR_2LIST(paramList, defaultList, "sa: use filtered matrix", bool, + useFiltering); + if (useFiltering) { + // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the + // dependency tree is setup. The Kokkos version has merged the the + // FilteredAFactory into the CoalesceDropFactory. + if (!useKokkos_) { + RCP filterFactory = rcp(new FilteredAFactory()); + + ParameterList fParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: use lumping", bool, + fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: reuse graph", bool, + fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: reuse eigenvalue", bool, + fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: use root stencil", bool, + fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: Dirichlet threshold", + double, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: use spread lumping", + bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, + "filtered matrix: spread lumping diag dom growth factor", double, + fParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, + "filtered matrix: spread lumping diag dom cap", double, fParams); + filterFactory->SetParameterList(fParams); + filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); + filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + filterFactory->SetFactory("UnAmalgamationInfo", + manager.GetFactory("UnAmalgamationInfo")); + // I'm not sure why we need this line. See comments for DofsPerNode for + // UncoupledAggregation above + filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); + + P->SetFactory("A", filterFactory); - // Energy minimization - ParameterList Pparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num iterations", int, Pparams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: iterative method", std::string, Pparams); - if (reuseType == "emin") { - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num reuse iterations", int, Pparams); - Pparams.set("Keep P0", true); - Pparams.set("Keep Constraint0", true); + } else { + P->SetFactory("A", manager.GetFactory("Graph")); } - P->SetParameterList(Pparams); - P->SetFactory("P", manager.GetFactory("Ptent")); - P->SetFactory("Constraint", manager.GetFactory("Constraint")); - manager.SetFactory("P", P); } - // ===================================================================================================== - // ================================= Algorithm: Petrov-Galerkin ======================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_PG(ParameterList& /* paramList */, const ParameterList& /* defaultList */, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const - { - TEUCHOS_TEST_FOR_EXCEPTION(this->implicitTranspose_, Exceptions::RuntimeError, - "Implicit transpose not supported with Petrov-Galerkin smoothed transfer operators: Set \"transpose: use implicit\" to false!\n" \ - "Petrov-Galerkin transfer operator smoothing for non-symmetric problems requires a separate handling of the restriction operator which " \ - "does not allow the usage of implicit transpose easily."); - - // Petrov-Galerkin - auto P = rcp(new PgPFactory()); - P->SetFactory("P", manager.GetFactory("Ptent")); - manager.SetFactory("P", P); - } - - // ===================================================================================================== - // ================================= Algorithm: Replicate ======================================== - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Replicate(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const - { - auto P = rcp(new MueLu::ReplicatePFactory()); - - ParameterList Pparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "replicate: npdes", int, Pparams); - - P->SetParameterList(Pparams); - manager.SetFactory("P", P); + P->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("P", P); + + bool filteringChangesMatrix = + useFiltering && + !MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: drop tol", + double, 0); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, + reuseType); + if (reuseType == "tP" && !filteringChangesMatrix) + keeps.push_back(keep_pair("AP reuse data", P.get())); +} + +// ===================================================================================================== +// =============================== Algorithm: Energy Minimization +// ====================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Emin(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int /* levelID */, + std::vector & /* keeps */) const { + MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: pattern", std::string, + patternType); + MUELU_SET_VAR_2LIST(paramList, defaultList, "reuse: type", std::string, + reuseType); + TEUCHOS_TEST_FOR_EXCEPTION( + patternType != "AkPtent", Exceptions::InvalidArgument, + "Invalid pattern name: \"" << patternType + << "\". Valid options: \"AkPtent\""); + // Pattern + auto patternFactory = rcp(new PatternFactory()); + ParameterList patternParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: pattern order", + int, patternParams); + patternFactory->SetParameterList(patternParams); + patternFactory->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("Ppattern", patternFactory); + + // Constraint + auto constraintFactory = rcp(new ConstraintFactory()); + constraintFactory->SetFactory("Ppattern", manager.GetFactory("Ppattern")); + constraintFactory->SetFactory("CoarseNullspace", manager.GetFactory("Ptent")); + manager.SetFactory("Constraint", constraintFactory); + + // Emin Factory + auto P = rcp(new EminPFactory()); + // Filtering + MUELU_SET_VAR_2LIST(paramList, defaultList, "emin: use filtered matrix", bool, + useFiltering); + if (useFiltering) { + // NOTE: Here, non-Kokkos and Kokkos versions diverge in the way the + // dependency tree is setup. The Kokkos version has merged the the + // FilteredAFactory into the CoalesceDropFactory. + if (!useKokkos_) { + RCP filterFactory = rcp(new FilteredAFactory()); + + ParameterList fParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: use lumping", bool, + fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: reuse graph", bool, + fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: reuse eigenvalue", bool, + fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: use root stencil", bool, + fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: Dirichlet threshold", + double, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "filtered matrix: use spread lumping", + bool, fParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, + "filtered matrix: spread lumping diag dom growth factor", double, + fParams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, + "filtered matrix: spread lumping diag dom cap", double, fParams); + filterFactory->SetParameterList(fParams); + filterFactory->SetFactory("Graph", manager.GetFactory("Graph")); + filterFactory->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + filterFactory->SetFactory("UnAmalgamationInfo", + manager.GetFactory("UnAmalgamationInfo")); + // I'm not sure why we need this line. See comments for DofsPerNode for + // UncoupledAggregation above + filterFactory->SetFactory("Filtering", manager.GetFactory("Graph")); + + P->SetFactory("A", filterFactory); + } else { + P->SetFactory("A", manager.GetFactory("Graph")); + } } - // ===================================================================================================== - // ====================================== Algorithm: Combine ============================================ - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Combine(ParameterList& paramList, const ParameterList& defaultList, FactoryManager& manager, int /* levelID */, std::vector& keeps) const - { - auto P = rcp(new MueLu::CombinePFactory()); - - ParameterList Pparams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "combine: numBlks", int, Pparams); - - P->SetParameterList(Pparams); - manager.SetFactory("P", P); - + // Energy minimization + ParameterList Pparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "emin: num iterations", + int, Pparams); + MUELU_TEST_AND_SET_PARAM_2LIST( + paramList, defaultList, "emin: iterative method", std::string, Pparams); + if (reuseType == "emin") { + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, + "emin: num reuse iterations", int, Pparams); + Pparams.set("Keep P0", true); + Pparams.set("Keep Constraint0", true); } - - - // ===================================================================================================== - // ====================================== Algorithm: Matlab ============================================ - // ===================================================================================================== - template - void ParameterListInterpreter:: - UpdateFactoryManager_Matlab(ParameterList& paramList, const ParameterList& /* defaultList */, FactoryManager& manager, - int /* levelID */, std::vector& /* keeps */) const { + P->SetParameterList(Pparams); + P->SetFactory("P", manager.GetFactory("Ptent")); + P->SetFactory("Constraint", manager.GetFactory("Constraint")); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ================================= Algorithm: Petrov-Galerkin +// ======================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_PG(ParameterList & /* paramList */, + const ParameterList & /* defaultList */, + FactoryManager &manager, int /* levelID */, + std::vector & /* keeps */) const { + TEUCHOS_TEST_FOR_EXCEPTION( + this->implicitTranspose_, Exceptions::RuntimeError, + "Implicit transpose not supported with Petrov-Galerkin smoothed transfer " + "operators: Set \"transpose: use implicit\" to false!\n" + "Petrov-Galerkin transfer operator smoothing for non-symmetric problems " + "requires a separate handling of the restriction operator which " + "does not allow the usage of implicit transpose easily."); + + // Petrov-Galerkin + auto P = rcp(new PgPFactory()); + P->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ================================= Algorithm: Replicate +// ======================================== +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Replicate(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int /* levelID */, + std::vector &keeps) const { + auto P = rcp(new MueLu::ReplicatePFactory()); + + ParameterList Pparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "replicate: npdes", + int, Pparams); + + P->SetParameterList(Pparams); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ====================================== Algorithm: Combine +// ============================================ +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Combine(ParameterList ¶mList, + const ParameterList &defaultList, + FactoryManager &manager, int /* levelID */, + std::vector &keeps) const { + auto P = rcp( + new MueLu::CombinePFactory()); + + ParameterList Pparams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "combine: numBlks", + int, Pparams); + + P->SetParameterList(Pparams); + manager.SetFactory("P", P); +} + +// ===================================================================================================== +// ====================================== Algorithm: Matlab +// ============================================ +// ===================================================================================================== +template +void ParameterListInterpreter:: + UpdateFactoryManager_Matlab(ParameterList ¶mList, + const ParameterList & /* defaultList */, + FactoryManager &manager, int /* levelID */, + std::vector & /* keeps */) const { #ifdef HAVE_MUELU_MATLAB - ParameterList Pparams = paramList.sublist("transfer: params"); - auto P = rcp(new TwoLevelMatlabFactory()); - P->SetParameterList(Pparams); - P->SetFactory("P", manager.GetFactory("Ptent")); - manager.SetFactory("P", P); + ParameterList Pparams = paramList.sublist("transfer: params"); + auto P = rcp(new TwoLevelMatlabFactory()); + P->SetParameterList(Pparams); + P->SetFactory("P", manager.GetFactory("Ptent")); + manager.SetFactory("P", P); #else - (void)paramList; - (void)manager; + (void)paramList; + (void)manager; #endif - } +} #undef MUELU_SET_VAR_2LIST #undef MUELU_TEST_AND_SET_VAR @@ -2210,544 +2894,664 @@ namespace MueLu { #undef MUELU_TEST_PARAM_2LIST #undef MUELU_KOKKOS_FACTORY - size_t LevenshteinDistance(const char* s, size_t len_s, const char* t, size_t len_t); - - template - void ParameterListInterpreter::Validate(const ParameterList& constParamList) const { - ParameterList paramList = constParamList; - const ParameterList& validList = *MasterList::List(); - // Validate up to maxLevels level specific parameter sublists - const int maxLevels = 100; - - // Extract level specific list - std::vector paramLists; - for (int levelID = 0; levelID < maxLevels; levelID++) { - std::string sublistName = "level " + toString(levelID); - if (paramList.isSublist(sublistName)) { - paramLists.push_back(paramList.sublist(sublistName)); - // paramLists.back().setName(sublistName); - paramList.remove(sublistName); - } +size_t LevenshteinDistance(const char *s, size_t len_s, const char *t, + size_t len_t); + +template +void ParameterListInterpreter:: + Validate(const ParameterList &constParamList) const { + ParameterList paramList = constParamList; + const ParameterList &validList = *MasterList::List(); + // Validate up to maxLevels level specific parameter sublists + const int maxLevels = 100; + + // Extract level specific list + std::vector paramLists; + for (int levelID = 0; levelID < maxLevels; levelID++) { + std::string sublistName = "level " + toString(levelID); + if (paramList.isSublist(sublistName)) { + paramLists.push_back(paramList.sublist(sublistName)); + // paramLists.back().setName(sublistName); + paramList.remove(sublistName); } - paramLists.push_back(paramList); - // paramLists.back().setName("main"); + } + paramLists.push_back(paramList); + // paramLists.back().setName("main"); #ifdef HAVE_MUELU_MATLAB - // If Muemex is supported, hide custom level variables from validator by removing them from paramList's sublists - for (size_t i = 0; i < paramLists.size(); i++) { - std::vector customVars; // list of names (keys) to be removed from list - - for(Teuchos::ParameterList::ConstIterator it = paramLists[i].begin(); it != paramLists[i].end(); it++) { - std::string paramName = paramLists[i].name(it); - - if (IsParamMuemexVariable(paramName)) - customVars.push_back(paramName); - } - - // Remove the keys - for (size_t j = 0; j < customVars.size(); j++) - paramLists[i].remove(customVars[j], false); + // If Muemex is supported, hide custom level variables from validator by + // removing them from paramList's sublists + for (size_t i = 0; i < paramLists.size(); i++) { + std::vector + customVars; // list of names (keys) to be removed from list + + for (Teuchos::ParameterList::ConstIterator it = paramLists[i].begin(); + it != paramLists[i].end(); it++) { + std::string paramName = paramLists[i].name(it); + + if (IsParamMuemexVariable(paramName)) + customVars.push_back(paramName); } + + // Remove the keys + for (size_t j = 0; j < customVars.size(); j++) + paramLists[i].remove(customVars[j], false); + } #endif - const int maxDepth = 0; - for (size_t i = 0; i < paramLists.size(); i++) { - // validate every sublist - try { - paramLists[i].validateParameters(validList, maxDepth); - - } catch (const Teuchos::Exceptions::InvalidParameterName& e) { - std::string eString = e.what(); - - // Parse name from: - size_t nameStart = eString.find_first_of('"') + 1; - size_t nameEnd = eString.find_first_of('"', nameStart); - std::string name = eString.substr(nameStart, nameEnd - nameStart); - - size_t bestScore = 100; - std::string bestName = ""; - for (ParameterList::ConstIterator it = validList.begin(); it != validList.end(); it++) { - const std::string& pName = validList.name(it); - this->GetOStream(Runtime1) << "| " << pName; - size_t score = LevenshteinDistance(name.c_str(), name.length(), pName.c_str(), pName.length()); - this->GetOStream(Runtime1) << " -> " << score << std::endl; - if (score < bestScore) { - bestScore = score; - bestName = pName; - } + const int maxDepth = 0; + for (size_t i = 0; i < paramLists.size(); i++) { + // validate every sublist + try { + paramLists[i].validateParameters(validList, maxDepth); + + } catch (const Teuchos::Exceptions::InvalidParameterName &e) { + std::string eString = e.what(); + + // Parse name from: + size_t nameStart = eString.find_first_of('"') + 1; + size_t nameEnd = eString.find_first_of('"', nameStart); + std::string name = eString.substr(nameStart, nameEnd - nameStart); + + size_t bestScore = 100; + std::string bestName = ""; + for (ParameterList::ConstIterator it = validList.begin(); + it != validList.end(); it++) { + const std::string &pName = validList.name(it); + this->GetOStream(Runtime1) << "| " << pName; + size_t score = LevenshteinDistance(name.c_str(), name.length(), + pName.c_str(), pName.length()); + this->GetOStream(Runtime1) << " -> " << score << std::endl; + if (score < bestScore) { + bestScore = score; + bestName = pName; } - if (bestScore < 10 && bestName != "") { - TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameterName, - eString << "The parameter name \"" + name + "\" is not valid. Did you mean \"" + bestName << "\"?\n"); + } + if (bestScore < 10 && bestName != "") { + TEUCHOS_TEST_FOR_EXCEPTION( + true, Teuchos::Exceptions::InvalidParameterName, + eString << "The parameter name \"" + name + + "\" is not valid. Did you mean \"" + bestName + << "\"?\n"); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameterName, + } else { + TEUCHOS_TEST_FOR_EXCEPTION( + true, Teuchos::Exceptions::InvalidParameterName, eString << "The parameter name \"" + name + "\" is not valid.\n"); - } } } } +} + +// ===================================================================================================== +// ==================================== FACTORY interpreter +// ============================================ +// ===================================================================================================== +template +void ParameterListInterpreter:: + SetFactoryParameterList(const ParameterList &constParamList) { + // Create a non const copy of the parameter list + // Working with a modifiable list is much much easier than with original one + ParameterList paramList = constParamList; + + // Parameter List Parsing: + // --------- + // + // + // + if (paramList.isSublist("Matrix")) { + blockSize_ = paramList.sublist("Matrix").get( + "PDE equations", MasterList::getDefault("number of equations")); + dofOffset_ = paramList.sublist("Matrix").get( + "DOF offset", + 0); // undocumented parameter allowing to define a DOF offset of the + // global dofs of an operator (defaul = 0) + } - // ===================================================================================================== - // ==================================== FACTORY interpreter ============================================ - // ===================================================================================================== - template - void ParameterListInterpreter:: - SetFactoryParameterList(const ParameterList& constParamList) { - // Create a non const copy of the parameter list - // Working with a modifiable list is much much easier than with original one - ParameterList paramList = constParamList; - - // Parameter List Parsing: - // --------- - // - // - // - if (paramList.isSublist("Matrix")) { - blockSize_ = paramList.sublist("Matrix").get("PDE equations", MasterList::getDefault("number of equations")); - dofOffset_ = paramList.sublist("Matrix").get("DOF offset", 0); // undocumented parameter allowing to define a DOF offset of the global dofs of an operator (defaul = 0) + // create new FactoryFactory object if necessary + if (factFact_ == Teuchos::null) + factFact_ = Teuchos::rcp(new FactoryFactory()); + + // Parameter List Parsing: + // --------- + // + // <== call BuildFactoryMap() on this + // parameter list + // ... + // + // + FactoryMap factoryMap; + FactoryManagerMap factoryManagers; + if (paramList.isSublist("Factories")) + this->BuildFactoryMap(paramList.sublist("Factories"), factoryMap, + factoryMap, factoryManagers); + + // Parameter List Parsing: + // --------- + // + // + // <== get + // <== get + // + // <== parse + // first args and call BuildFactoryMap() on the rest of this parameter + // list + // ... + // + // + // + if (paramList.isSublist("Hierarchy")) { + ParameterList hieraList = paramList.sublist( + "Hierarchy"); // copy because list temporally modified (remove 'id') + + // Get hierarchy options + if (hieraList.isParameter("max levels")) { + this->numDesiredLevel_ = hieraList.get("max levels"); + hieraList.remove("max levels"); } - // create new FactoryFactory object if necessary - if (factFact_ == Teuchos::null) - factFact_ = Teuchos::rcp(new FactoryFactory()); - - // Parameter List Parsing: - // --------- - // - // <== call BuildFactoryMap() on this parameter list - // ... - // - // - FactoryMap factoryMap; - FactoryManagerMap factoryManagers; - if (paramList.isSublist("Factories")) - this->BuildFactoryMap(paramList.sublist("Factories"), factoryMap, factoryMap, factoryManagers); - - // Parameter List Parsing: - // --------- - // - // - // <== get - // <== get - // - // <== parse first args and call BuildFactoryMap() on the rest of this parameter list - // ... - // - // - // - if (paramList.isSublist("Hierarchy")) { - ParameterList hieraList = paramList.sublist("Hierarchy"); // copy because list temporally modified (remove 'id') - - // Get hierarchy options - if (hieraList.isParameter("max levels")) { - this->numDesiredLevel_ = hieraList.get("max levels"); - hieraList.remove("max levels"); - } + if (hieraList.isParameter("coarse: max size")) { + this->maxCoarseSize_ = hieraList.get("coarse: max size"); + hieraList.remove("coarse: max size"); + } - if (hieraList.isParameter("coarse: max size")) { - this->maxCoarseSize_ = hieraList.get("coarse: max size"); - hieraList.remove("coarse: max size"); - } + if (hieraList.isParameter("repartition: rebalance P and R")) { + this->doPRrebalance_ = + hieraList.get("repartition: rebalance P and R"); + hieraList.remove("repartition: rebalance P and R"); + } - if (hieraList.isParameter("repartition: rebalance P and R")) { - this->doPRrebalance_ = hieraList.get("repartition: rebalance P and R"); - hieraList.remove("repartition: rebalance P and R"); - } + if (hieraList.isParameter("transpose: use implicit")) { + this->implicitTranspose_ = hieraList.get("transpose: use implicit"); + hieraList.remove("transpose: use implicit"); + } - if (hieraList.isParameter("transpose: use implicit")) { - this->implicitTranspose_ = hieraList.get("transpose: use implicit"); - hieraList.remove("transpose: use implicit"); - } + if (hieraList.isParameter("fuse prolongation and update")) { + this->fuseProlongationAndUpdate_ = + hieraList.get("fuse prolongation and update"); + hieraList.remove("fuse prolongation and update"); + } - if (hieraList.isParameter("fuse prolongation and update")) { - this->fuseProlongationAndUpdate_ = hieraList.get("fuse prolongation and update"); - hieraList.remove("fuse prolongation and update"); - } + if (hieraList.isParameter("nullspace: suppress dimension check")) { + this->suppressNullspaceDimensionCheck_ = + hieraList.get("nullspace: suppress dimension check"); + hieraList.remove("nullspace: suppress dimension check"); + } - if (hieraList.isParameter("nullspace: suppress dimension check")) { - this->suppressNullspaceDimensionCheck_ = hieraList.get("nullspace: suppress dimension check"); - hieraList.remove("nullspace: suppress dimension check"); - } + if (hieraList.isParameter("number of vectors")) { + this->numDesiredLevel_ = hieraList.get("number of vectors"); + hieraList.remove("number of vectors"); + } - if (hieraList.isParameter("number of vectors")) { - this->numDesiredLevel_ = hieraList.get("number of vectors"); - hieraList.remove("number of vectors"); - } + if (hieraList.isSublist("matvec params")) + this->matvecParams_ = + Teuchos::parameterList(hieraList.sublist("matvec params")); - if (hieraList.isSublist("matvec params")) - this->matvecParams_ = Teuchos::parameterList(hieraList.sublist("matvec params")); + if (hieraList.isParameter("coarse grid correction scaling factor")) { + this->scalingFactor_ = + hieraList.get("coarse grid correction scaling factor"); + hieraList.remove("coarse grid correction scaling factor"); + } + // Translate cycle type parameter + if (hieraList.isParameter("cycle type")) { + std::map cycleMap; + cycleMap["V"] = VCYCLE; + cycleMap["W"] = WCYCLE; - if (hieraList.isParameter("coarse grid correction scaling factor")) { - this->scalingFactor_ = hieraList.get("coarse grid correction scaling factor"); - hieraList.remove("coarse grid correction scaling factor"); - } + std::string cycleType = hieraList.get("cycle type"); + TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, + Exceptions::RuntimeError, + "Invalid cycle type: \"" << cycleType << "\""); + this->Cycle_ = cycleMap[cycleType]; + } - // Translate cycle type parameter - if (hieraList.isParameter("cycle type")) { - std::map cycleMap; - cycleMap["V"] = VCYCLE; - cycleMap["W"] = WCYCLE; + if (hieraList.isParameter("W cycle start level")) { + this->WCycleStartLevel_ = hieraList.get("W cycle start level"); + } - std::string cycleType = hieraList.get("cycle type"); - TEUCHOS_TEST_FOR_EXCEPTION(cycleMap.count(cycleType) == 0, Exceptions::RuntimeError, "Invalid cycle type: \"" << cycleType << "\""); - this->Cycle_ = cycleMap[cycleType]; - } + if (hieraList.isParameter("verbosity")) { + std::string vl = hieraList.get("verbosity"); + hieraList.remove("verbosity"); + this->verbosity_ = toVerbLevel(vl); + } - if (hieraList.isParameter("W cycle start level")) { - this->WCycleStartLevel_ = hieraList.get("W cycle start level"); - } + if (hieraList.isParameter("output filename")) + VerboseObject::SetMueLuOFileStream( + hieraList.get("output filename")); + + if (hieraList.isParameter("dependencyOutputLevel")) + this->graphOutputLevel_ = hieraList.get("dependencyOutputLevel"); + + // Check for the reuse case + if (hieraList.isParameter("reuse")) + Factory::DisableMultipleCheckGlobally(); + + if (hieraList.isSublist("DataToWrite")) { + // TODO We should be able to specify any data. If it exists, write it. + // TODO This would requires something like std::set > + ParameterList foo = hieraList.sublist("DataToWrite"); + std::string dataName = "Matrices"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["A"] = + Teuchos::getArrayFromStringParameter(foo, dataName); + dataName = "Prolongators"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["P"] = + Teuchos::getArrayFromStringParameter(foo, dataName); + dataName = "Restrictors"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["R"] = + Teuchos::getArrayFromStringParameter(foo, dataName); + dataName = "D0"; + if (foo.isParameter(dataName)) + this->matricesToPrint_["D0"] = + Teuchos::getArrayFromStringParameter(foo, dataName); + } - if (hieraList.isParameter("verbosity")) { - std::string vl = hieraList.get("verbosity"); - hieraList.remove("verbosity"); - this->verbosity_ = toVerbLevel(vl); - } + // Get level configuration + for (ParameterList::ConstIterator param = hieraList.begin(); + param != hieraList.end(); ++param) { + const std::string ¶mName = hieraList.name(param); - if (hieraList.isParameter("output filename")) - VerboseObject::SetMueLuOFileStream(hieraList.get("output filename")); - - if (hieraList.isParameter("dependencyOutputLevel")) - this->graphOutputLevel_ = hieraList.get("dependencyOutputLevel"); - - // Check for the reuse case - if (hieraList.isParameter("reuse")) - Factory::DisableMultipleCheckGlobally(); - - if (hieraList.isSublist("DataToWrite")) { - //TODO We should be able to specify any data. If it exists, write it. - //TODO This would requires something like std::set > - ParameterList foo = hieraList.sublist("DataToWrite"); - std::string dataName = "Matrices"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["A"] = Teuchos::getArrayFromStringParameter(foo, dataName); - dataName = "Prolongators"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["P"] = Teuchos::getArrayFromStringParameter(foo, dataName); - dataName = "Restrictors"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["R"] = Teuchos::getArrayFromStringParameter(foo, dataName); - dataName = "D0"; - if (foo.isParameter(dataName)) - this->matricesToPrint_["D0"] = Teuchos::getArrayFromStringParameter(foo, dataName); - } + if (paramName != "DataToWrite" && hieraList.isSublist(paramName)) { + ParameterList levelList = hieraList.sublist( + paramName); // copy because list temporally modified (remove 'id') - // Get level configuration - for (ParameterList::ConstIterator param = hieraList.begin(); param != hieraList.end(); ++param) { - const std::string & paramName = hieraList.name(param); - - if (paramName != "DataToWrite" && hieraList.isSublist(paramName)) { - ParameterList levelList = hieraList.sublist(paramName); // copy because list temporally modified (remove 'id') - - int startLevel = 0; if(levelList.isParameter("startLevel")) { startLevel = levelList.get("startLevel"); levelList.remove("startLevel"); } - int numDesiredLevel = 1; if(levelList.isParameter("numDesiredLevel")) { numDesiredLevel = levelList.get("numDesiredLevel"); levelList.remove("numDesiredLevel"); } - - // Parameter List Parsing: - // --------- - // - // - // - // - // - // [] <== call BuildFactoryMap() on the rest of the parameter list - // - // - FactoryMap levelFactoryMap; - BuildFactoryMap(levelList, factoryMap, levelFactoryMap, factoryManagers); - - RCP m = rcp(new FactoryManager(levelFactoryMap)); - if (hieraList.isParameter("use kokkos refactor")) - m->SetKokkosRefactor(hieraList.get("use kokkos refactor")); - - if (startLevel >= 0) - this->AddFactoryManager(startLevel, numDesiredLevel, m); - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::ParameterListInterpreter():: invalid level id"); - } /* TODO: else { } */ - } + int startLevel = 0; + if (levelList.isParameter("startLevel")) { + startLevel = levelList.get("startLevel"); + levelList.remove("startLevel"); + } + int numDesiredLevel = 1; + if (levelList.isParameter("numDesiredLevel")) { + numDesiredLevel = levelList.get("numDesiredLevel"); + levelList.remove("numDesiredLevel"); + } + + // Parameter List Parsing: + // --------- + // + // + // + // + // + // [] <== call BuildFactoryMap() on the rest of the parameter list + // + // + FactoryMap levelFactoryMap; + BuildFactoryMap(levelList, factoryMap, levelFactoryMap, + factoryManagers); + + RCP m = rcp(new FactoryManager(levelFactoryMap)); + if (hieraList.isParameter("use kokkos refactor")) + m->SetKokkosRefactor(hieraList.get("use kokkos refactor")); + + if (startLevel >= 0) + this->AddFactoryManager(startLevel, numDesiredLevel, m); + else + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "MueLu::ParameterListInterpreter():: invalid level id"); + } /* TODO: else { } */ } } +} + +// TODO: static? +/// \brief Interpret "Factories" sublist +/// +/// \param paramList [in]: "Factories" ParameterList +/// \param factoryMapIn [in]: FactoryMap maps variable names to factories. This +/// factory map is used to resolve data dependencies of previously defined +/// factories. \param factoryMapOut [out]: FactoryMap maps variable names to +/// factories. New factory entries are added to that FactoryMap. Usually, +/// factoryMapIn and factoryMapOut should use the same object, such that new +/// factories are added. We have to distinguish input and output if we build +/// sub-factory managers, though. \param factoryManagers [in/out]: +/// FacotryManagerMap maps group names to a FactoryManager object. +/// +/// Interpret "Factories" parameter list. For each "factory" entry, add a new +/// entry in the factoryMapOut map or create a new FacotryManager +/// +/// Parameter List Parsing: +/// Create an entry in factoryMapOut for each parameter of the list paramList +/// --------- +/// +/// +/// +/// +/// +/// ... +/// +/// +/// +/// --------- +/// Group factories +/// We can group factories using parameter sublists with the "group" parameter +/// +/// +/// +/// +/// +/// +/// +/// +/// + +/// +/// +/// +/// +/// +/// +/// +/// These factory groups can be used with factories for blocked operators (such +/// as the BlockedPFactory) to easily define the operations on the sub-blocks. +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// As an alternative one can also directly specify the factories in the +/// sublists "block1", "block2", etc..., of course. But using blocks has the +/// advantage that one can reuse them in all blocked factories. +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// As an alternative one can also directly specify the factories in the +/// sublists "block1", "block2", etc..., of course. +/// +/// + +/// --------- +/// add more dependencies (circular dependencies) +/// +/// The NullspaceFactory needs to know which factory generates the null space +/// on the coarse level (e.g., the TentativePFactory or the +/// RebalancedPFactory). However, we cannot set the information in this place +/// in the xml file, since the tentative prolongator facotry is typically +/// defined later. We have to add that dependency later to the +/// NullspaceFactory: +/// +/// +/// +/// +/// +/// +/// +/// +/// <...> +/// +/// +/// +/// +/// +/// +/// <...> +/// +/// +/// +/// After the definition of the generating factory for the nullspace (in this +/// case myRebalanceProlongatorFact) we add that dependency to the +/// NullspaceFactory instance myNspFact +/// +/// +/// +/// +/// +/// +/// We have to create a new block (with a different name than myNspFact). In +/// the example we use "myNspFactDeps". It should contain a parameter +/// "dependency for" with the name of the factory that we want the dependencies +/// to be addded to. With above block we do not need the entry for the +/// Nullspace in the global FactoryManager any more. +template +void ParameterListInterpreter:: + BuildFactoryMap(const ParameterList ¶mList, + const FactoryMap &factoryMapIn, FactoryMap &factoryMapOut, + FactoryManagerMap &factoryManagers) const { + for (ParameterList::ConstIterator param = paramList.begin(); + param != paramList.end(); ++param) { + const std::string ¶mName = + paramList.name(param); //< paramName contains the user chosen factory + // name (e.g., "smootherFact1") + const Teuchos::ParameterEntry ¶mValue = paramList.entry( + param); //< for factories, paramValue should be either a list or just a + // MueLu Factory (e.g., TrilinosSmoother) + + // TODO: do not allow name of existing MueLu classes (can be tested using + // FactoryFactory) + + if (paramValue.isList()) { + ParameterList paramList1 = Teuchos::getValue(paramValue); + if (paramList1.isParameter( + "factory")) { // default: just a factory definition + // New Factory is a sublist with internal parameters and/or data + // dependencies + TEUCHOS_TEST_FOR_EXCEPTION( + paramList1.isParameter("dependency for") == true, + Exceptions::RuntimeError, + "MueLu::ParameterListInterpreter(): It seems that in the parameter " + "lists for defining " + << paramName + << " there is both a 'factory' and 'dependency for' parameter. " + "This is not allowed. Please remove the 'dependency for' " + "parameter."); + + factoryMapOut[paramName] = + factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); + + } else if (paramList1.isParameter( + "dependency for")) { // add more data dependencies to + // existing factory + TEUCHOS_TEST_FOR_EXCEPTION( + paramList1.isParameter("factory") == true, Exceptions::RuntimeError, + "MueLu::ParameterListInterpreter(): It seems that in the parameter " + "lists for defining " + << paramName + << " there is both a 'factory' and 'dependency for' parameter. " + "This is not allowed."); + + std::string factoryName = paramList1.get("dependency for"); + + RCP factbase = + factoryMapIn.find(factoryName /*paramName*/) + ->second; // access previously defined factory + TEUCHOS_TEST_FOR_EXCEPTION( + factbase.is_null() == true, Exceptions::RuntimeError, + "MueLu::ParameterListInterpreter(): could not find factory " + + factoryName + " in factory map. Did you define it before?"); + + RCP factoryconst = + Teuchos::rcp_dynamic_cast(factbase); + RCP factory = Teuchos::rcp_const_cast(factoryconst); + + // Read the RCP parameters of the class T + RCP validParamList = + factory->GetValidParameterList(); + for (ParameterList::ConstIterator vparam = validParamList->begin(); + vparam != validParamList->end(); ++vparam) { + const std::string &pName = validParamList->name(vparam); + + if (!paramList1.isParameter(pName)) { + // Ignore unknown parameters + continue; + } - - //TODO: static? - /// \brief Interpret "Factories" sublist - /// - /// \param paramList [in]: "Factories" ParameterList - /// \param factoryMapIn [in]: FactoryMap maps variable names to factories. This factory map is used to resolve data dependencies of previously defined factories. - /// \param factoryMapOut [out]: FactoryMap maps variable names to factories. New factory entries are added to that FactoryMap. Usually, factoryMapIn and factoryMapOut should use the same object, such that new factories are added. We have to distinguish input and output if we build sub-factory managers, though. - /// \param factoryManagers [in/out]: FacotryManagerMap maps group names to a FactoryManager object. - /// - /// Interpret "Factories" parameter list. For each "factory" entry, add a new entry in the factoryMapOut map or create a new FacotryManager - /// - /// Parameter List Parsing: - /// Create an entry in factoryMapOut for each parameter of the list paramList - /// --------- - /// - /// - /// - /// - /// - /// ... - /// - /// - /// - /// --------- - /// Group factories - /// We can group factories using parameter sublists with the "group" parameter - /// - /// - /// - /// - /// - /// - /// - /// - /// - - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// These factory groups can be used with factories for blocked operators (such as the BlockedPFactory) - /// to easily define the operations on the sub-blocks. - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// As an alternative one can also directly specify the factories in the sublists "block1", "block2", etc..., of course. - /// But using blocks has the advantage that one can reuse them in all blocked factories. - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// As an alternative one can also directly specify the factories in the sublists "block1", "block2", etc..., of course. - /// - /// - - /// --------- - /// add more dependencies (circular dependencies) - /// - /// The NullspaceFactory needs to know which factory generates the null space on the coarse level (e.g., the TentativePFactory or the RebalancedPFactory). - /// However, we cannot set the information in this place in the xml file, since the tentative prolongator facotry is typically defined later. - /// We have to add that dependency later to the NullspaceFactory: - /// - /// - /// - /// - /// - /// - /// - /// - /// <...> - /// - /// - /// - /// - /// - /// - /// <...> - /// - /// - /// - /// After the definition of the generating factory for the nullspace (in this case myRebalanceProlongatorFact) - /// we add that dependency to the NullspaceFactory instance myNspFact - /// - /// - /// - /// - /// - /// - /// We have to create a new block (with a different name than myNspFact). In the example we use "myNspFactDeps". - /// It should contain a parameter "dependency for" with the name of the factory that we want the dependencies to be addded to. - /// With above block we do not need the entry for the Nullspace in the global FactoryManager any more. - template - void ParameterListInterpreter:: - BuildFactoryMap(const ParameterList& paramList, const FactoryMap& factoryMapIn, FactoryMap& factoryMapOut, FactoryManagerMap& factoryManagers) const { - for (ParameterList::ConstIterator param = paramList.begin(); param != paramList.end(); ++param) { - const std::string & paramName = paramList.name(param); //< paramName contains the user chosen factory name (e.g., "smootherFact1") - const Teuchos::ParameterEntry & paramValue = paramList.entry(param); //< for factories, paramValue should be either a list or just a MueLu Factory (e.g., TrilinosSmoother) - - //TODO: do not allow name of existing MueLu classes (can be tested using FactoryFactory) - - if (paramValue.isList()) { - ParameterList paramList1 = Teuchos::getValue(paramValue); - if (paramList1.isParameter("factory")) { // default: just a factory definition - // New Factory is a sublist with internal parameters and/or data dependencies - TEUCHOS_TEST_FOR_EXCEPTION(paramList1.isParameter("dependency for") == true, Exceptions::RuntimeError, - "MueLu::ParameterListInterpreter(): It seems that in the parameter lists for defining " << paramName << - " there is both a 'factory' and 'dependency for' parameter. This is not allowed. Please remove the 'dependency for' parameter."); - - factoryMapOut[paramName] = factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); - - } else if (paramList1.isParameter("dependency for")) { // add more data dependencies to existing factory - TEUCHOS_TEST_FOR_EXCEPTION(paramList1.isParameter("factory") == true, Exceptions::RuntimeError, - "MueLu::ParameterListInterpreter(): It seems that in the parameter lists for defining " << paramName << - " there is both a 'factory' and 'dependency for' parameter. This is not allowed."); - - std::string factoryName = paramList1.get("dependency for"); - - RCP factbase = factoryMapIn.find(factoryName /*paramName*/)->second; // access previously defined factory - TEUCHOS_TEST_FOR_EXCEPTION(factbase.is_null() == true, Exceptions::RuntimeError, - "MueLu::ParameterListInterpreter(): could not find factory " + factoryName + " in factory map. Did you define it before?"); - - RCP factoryconst = Teuchos::rcp_dynamic_cast(factbase); - RCP< Factory> factory = Teuchos::rcp_const_cast(factoryconst); - - // Read the RCP parameters of the class T - RCP validParamList = factory->GetValidParameterList(); - for (ParameterList::ConstIterator vparam = validParamList->begin(); vparam != validParamList->end(); ++vparam) { - const std::string& pName = validParamList->name(vparam); - - if (!paramList1.isParameter(pName)) { - // Ignore unknown parameters - continue; - } - - if (validParamList->isType< RCP >(pName)) { - // Generate or get factory described by pName and set dependency - RCP generatingFact = factFact_->BuildFactory(paramList1.getEntry(pName), factoryMapIn, factoryManagers); - factory->SetFactory(pName, generatingFact.create_weak()); - - } else if (validParamList->isType >(pName)) { - if (pName == "ParameterList") { - // NOTE: we cannot use - // subList = sublist(rcpFromRef(paramList), pName) - // here as that would result in sublist also being a reference to a temporary object. - // The resulting dereferencing in the corresponding factory would then segfault - RCP subList = Teuchos::sublist(rcp(new ParameterList(paramList1)), pName); - factory->SetParameter(pName, ParameterEntry(subList)); - } - } else { - factory->SetParameter(pName, paramList1.getEntry(pName)); + if (validParamList->isType>(pName)) { + // Generate or get factory described by pName and set dependency + RCP generatingFact = factFact_->BuildFactory( + paramList1.getEntry(pName), factoryMapIn, factoryManagers); + factory->SetFactory(pName, generatingFact.create_weak()); + + } else if (validParamList->isType>(pName)) { + if (pName == "ParameterList") { + // NOTE: we cannot use + // subList = sublist(rcpFromRef(paramList), pName) + // here as that would result in sublist also being a reference to + // a temporary object. The resulting dereferencing in the + // corresponding factory would then segfault + RCP subList = + Teuchos::sublist(rcp(new ParameterList(paramList1)), pName); + factory->SetParameter(pName, ParameterEntry(subList)); } + } else { + factory->SetParameter(pName, paramList1.getEntry(pName)); } + } - } else if (paramList1.isParameter("group")) { // definitiion of a factory group (for a factory manager) - // Define a new (sub) FactoryManager - std::string groupType = paramList1.get("group"); - TEUCHOS_TEST_FOR_EXCEPTION(groupType!="FactoryManager", Exceptions::RuntimeError, - "group must be of type \"FactoryManager\"."); - - ParameterList groupList = paramList1; // copy because list temporally modified (remove 'id') - groupList.remove("group"); - - bool setKokkosRefactor = false; - bool kokkosRefactor = useKokkos_; - if (groupList.isParameter("use kokkos refactor")) { - kokkosRefactor = groupList.get("use kokkos refactor"); - groupList.remove("use kokkos refactor"); - setKokkosRefactor = true; - } + } else if (paramList1.isParameter( + "group")) { // definitiion of a factory group (for a + // factory manager) + // Define a new (sub) FactoryManager + std::string groupType = paramList1.get("group"); + TEUCHOS_TEST_FOR_EXCEPTION(groupType != "FactoryManager", + Exceptions::RuntimeError, + "group must be of type \"FactoryManager\"."); + + ParameterList groupList = + paramList1; // copy because list temporally modified (remove 'id') + groupList.remove("group"); + + bool setKokkosRefactor = false; + bool kokkosRefactor = useKokkos_; + if (groupList.isParameter("use kokkos refactor")) { + kokkosRefactor = groupList.get("use kokkos refactor"); + groupList.remove("use kokkos refactor"); + setKokkosRefactor = true; + } - FactoryMap groupFactoryMap; - BuildFactoryMap(groupList, factoryMapIn, groupFactoryMap, factoryManagers); + FactoryMap groupFactoryMap; + BuildFactoryMap(groupList, factoryMapIn, groupFactoryMap, + factoryManagers); - // do not store groupFactoryMap in factoryMapOut - // Create a factory manager object from groupFactoryMap - RCP m = rcp(new FactoryManager(groupFactoryMap)); - if (setKokkosRefactor) - m->SetKokkosRefactor(kokkosRefactor); - factoryManagers[paramName] = m; + // do not store groupFactoryMap in factoryMapOut + // Create a factory manager object from groupFactoryMap + RCP m = rcp(new FactoryManager(groupFactoryMap)); + if (setKokkosRefactor) + m->SetKokkosRefactor(kokkosRefactor); + factoryManagers[paramName] = m; - } else { - this->GetOStream(Warnings0) << "Could not interpret parameter list " << paramList1 << std::endl; - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, - "XML Parameter list must either be of type \"factory\" or of type \"group\"."); - } } else { - // default: just a factory (no parameter list) - factoryMapOut[paramName] = factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); + this->GetOStream(Warnings0) + << "Could not interpret parameter list " << paramList1 << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, + "XML Parameter list must either be of type " + "\"factory\" or of type \"group\"."); } + } else { + // default: just a factory (no parameter list) + factoryMapOut[paramName] = + factFact_->BuildFactory(paramValue, factoryMapIn, factoryManagers); } } - - // ===================================================================================================== - // ======================================= MISC functions ============================================== - // ===================================================================================================== - template - void ParameterListInterpreter::SetupOperator(Operator& Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blockSize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blockSize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl - << "You may want to check \"number of equations\" (or \"PDE equations\" for factory style list) parameter." << std::endl; - - A.SetFixedBlockSize(blockSize_, dofOffset_); +} + +// ===================================================================================================== +// ======================================= MISC functions +// ============================================== +// ===================================================================================================== +template +void ParameterListInterpreter::SetupOperator(Operator &Op) const { + try { + Matrix &A = dynamic_cast(Op); + if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blockSize_)) + this->GetOStream(Warnings0) + << "Setting matrix block size to " << blockSize_ + << " (value of the parameter in the list) " + << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." + << std::endl + << "You may want to check \"number of equations\" (or \"PDE " + "equations\" for factory style list) parameter." + << std::endl; + + A.SetFixedBlockSize(blockSize_, dofOffset_); #ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(A); + MatrixUtils::checkLocalRowMapMatchesColMap(A); #endif // HAVE_MUELU_DEBUG - } catch (std::bad_cast&) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; - } - } - - template - void ParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { - H.SetCycle(Cycle_); - H.SetCycleStartLevel(WCycleStartLevel_); - H.SetProlongatorScalingFactor(scalingFactor_); - HierarchyManager::SetupHierarchy(H); + } catch (std::bad_cast &) { + this->GetOStream(Warnings0) + << "Skipping setting block size as the operator is not a matrix" + << std::endl; } - - static bool compare(const ParameterList& list1, const ParameterList& list2) { - // First loop through and validate the parameters at this level. - // In addition, we generate a list of sublists that we will search next - for (ParameterList::ConstIterator it = list1.begin(); it != list1.end(); it++) { - const std::string& name = it->first; - const Teuchos::ParameterEntry& entry1 = it->second; - - const Teuchos::ParameterEntry *entry2 = list2.getEntryPtr(name); - if (!entry2) // entry is not present in the second list - return false; - if (entry1.isList() && entry2->isList()) { // sublist check - compare(Teuchos::getValue(entry1), Teuchos::getValue(*entry2)); - continue; - } - if (entry1.getAny(false) != entry2->getAny(false)) // entries have different types or different values - return false; +} + +template +void ParameterListInterpreter::SetupHierarchy(Hierarchy &H) const { + H.SetCycle(Cycle_); + H.SetCycleStartLevel(WCycleStartLevel_); + H.SetProlongatorScalingFactor(scalingFactor_); + HierarchyManager::SetupHierarchy(H); +} + +static bool compare(const ParameterList &list1, const ParameterList &list2) { + // First loop through and validate the parameters at this level. + // In addition, we generate a list of sublists that we will search next + for (ParameterList::ConstIterator it = list1.begin(); it != list1.end(); + it++) { + const std::string &name = it->first; + const Teuchos::ParameterEntry &entry1 = it->second; + + const Teuchos::ParameterEntry *entry2 = list2.getEntryPtr(name); + if (!entry2) // entry is not present in the second list + return false; + if (entry1.isList() && entry2->isList()) { // sublist check + compare(Teuchos::getValue(entry1), + Teuchos::getValue(*entry2)); + continue; } - - return true; + if (entry1.getAny(false) != + entry2->getAny( + false)) // entries have different types or different values + return false; } - static inline bool areSame(const ParameterList& list1, const ParameterList& list2) { - return compare(list1, list2) && compare(list2, list1); - } + return true; +} + +static inline bool areSame(const ParameterList &list1, + const ParameterList &list2) { + return compare(list1, list2) && compare(list2, list1); +} } // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp b/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp index 051a2dfd622e..48803eea9720 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListUtils.cpp @@ -49,137 +49,170 @@ namespace MueLu { - /* See also: ML_Epetra::UpdateList */ - //! @brief: merge two parameter lists - //! - //! @param source [in]: parameter lists with source parameters which are to be merged in into the dest parameter list - //! @param dest [in,out]: parameter list with, e.g., default parameters which is extended by parameters from source parameter list - //! @param overWrite (bool): if true, overwrite parameters in dest with entries from source - void MergeParameterList(const Teuchos::ParameterList &source, Teuchos::ParameterList &dest, bool overWrite){ - for(Teuchos::ParameterList::ConstIterator param=source.begin(); param!=source.end(); ++param) - if (dest.isParameter(source.name(param)) == false || overWrite) - dest.setEntry(source.name(param),source.entry(param)); - } - - void CreateSublists(const Teuchos::ParameterList &List, Teuchos::ParameterList &newList) - { - using Teuchos::ParameterList; - using std::string; - - newList.setName(List.name()); - - // Copy general (= not level-specific) options and sublists to the new list. - // - Coarse and level-specific parameters are not copied yet. They will be moved to sublists later. - // - Already existing level-specific lists are copied to the new list but the coarse list is not copied - // yet because it has to be modified before copy (s/coarse/smoother/) - for (ParameterList::ConstIterator param=List.begin(); param!=List.end(); ++param) +/* See also: ML_Epetra::UpdateList */ +//! @brief: merge two parameter lists +//! +//! @param source [in]: parameter lists with source parameters which are to be +//! merged in into the dest parameter list +//! @param dest [in,out]: parameter list with, e.g., default parameters which is +//! extended by parameters from source parameter list +//! @param overWrite (bool): if true, overwrite parameters in dest with entries +//! from source +void MergeParameterList(const Teuchos::ParameterList &source, + Teuchos::ParameterList &dest, bool overWrite) { + for (Teuchos::ParameterList::ConstIterator param = source.begin(); + param != source.end(); ++param) + if (dest.isParameter(source.name(param)) == false || overWrite) + dest.setEntry(source.name(param), source.entry(param)); +} + +void CreateSublists(const Teuchos::ParameterList &List, + Teuchos::ParameterList &newList) { + using std::string; + using Teuchos::ParameterList; + + newList.setName(List.name()); + + // Copy general (= not level-specific) options and sublists to the new list. + // - Coarse and level-specific parameters are not copied yet. They will be + // moved to sublists later. + // - Already existing level-specific lists are copied to the new list but the + // coarse list is not copied + // yet because it has to be modified before copy (s/coarse/smoother/) + for (ParameterList::ConstIterator param = List.begin(); param != List.end(); + ++param) { + const string &pname = List.name(param); + + if ((pname.find(" (level", 0) == string::npos || + pname.find("smoother: list (level", 0) == 0 || + pname.find("aggregation: list (level", 0) == 0) && + (pname.find("coarse: ", 0) == string::npos)) { + newList.setEntry(pname, List.entry(param)); + } + } // for + + // Copy of the sublist "coarse: list" to the new list. Change "coarse:" to + // "smoother:" along the way. + if (List.isSublist("coarse: list")) { + const ParameterList &coarseList = List.sublist("coarse: list"); + ParameterList &newCoarseList = newList.sublist("coarse: list"); + for (ParameterList::ConstIterator param = coarseList.begin(); + param != coarseList.end(); ++param) { + const string &pname = coarseList.name(param); + + if (pname.find("coarse:", 0) == 0) { + // change "coarse: " to "smoother:" + newCoarseList.setEntry("smoother: " + pname.substr(8), + coarseList.entry(param)); + } else { + newCoarseList.setEntry(pname, coarseList.entry(param)); + } + } + } // if + + // Copy of level-specific parameters and coarse parameters to sublist + for (ParameterList::ConstIterator param = List.begin(); param != List.end(); + ++param) { + const string &pname = List.name(param); + if (pname.find(" (level", 0) != string::npos && + pname.find("smoother: list (level", 0) != 0 && + pname.find("aggregation: list (level", 0) != 0) { + // Copy level-specific parameters (smoother and aggregation) + + // Scan pname (ex: pname="smoother: type (level 2)") + string type, option; + int levelID = -1; { - const string & pname=List.name(param); - - if ((pname.find(" (level",0) == string::npos || pname.find("smoother: list (level",0) == 0 || pname.find("aggregation: list (level",0) == 0) && - (pname.find("coarse: ",0) == string::npos)) - { - newList.setEntry(pname,List.entry(param)); - } - } // for - - // Copy of the sublist "coarse: list" to the new list. Change "coarse:" to "smoother:" along the way. - if (List.isSublist("coarse: list")) { - const ParameterList &coarseList = List.sublist("coarse: list"); - ParameterList &newCoarseList = newList.sublist("coarse: list"); - for (ParameterList::ConstIterator param=coarseList.begin(); param!=coarseList.end() ; ++param) { - const string & pname=coarseList.name(param); - - if (pname.find("coarse:",0) == 0) { - // change "coarse: " to "smoother:" - newCoarseList.setEntry("smoother: "+pname.substr(8),coarseList.entry(param)); - } else { - newCoarseList.setEntry(pname,coarseList.entry(param)); + typedef Teuchos::ArrayRCP::size_type size_type; // (!) + Teuchos::Array ctype(size_type(pname.size() + 1)); + Teuchos::Array coption(size_type(pname.size() + 1)); + + int matched = + sscanf(pname.c_str(), "%s %[^(](level %d)", ctype.getRawPtr(), + coption.getRawPtr(), + &levelID); // use [^(] instead of %s to allow for strings + // with white-spaces (ex: "ifpack list") + type = string(ctype.getRawPtr()); + option = string(coption.getRawPtr()); + option.resize(option.size() - 1); // remove final white-space + + if (matched != 3 || (type != "smoother:" && type != "aggregation:")) { + TEUCHOS_TEST_FOR_EXCEPTION( + true, MueLu::Exceptions::RuntimeError, + "MueLu::CreateSublist(), Line " + << __LINE__ << ". " + << "Error in creating level-specific sublists" << std::endl + << "Offending parameter: " << pname << std::endl); } } - } // if - // Copy of level-specific parameters and coarse parameters to sublist - for (ParameterList::ConstIterator param=List.begin(); param!=List.end(); ++param) - { - const string & pname=List.name(param); - if (pname.find(" (level",0) != string::npos && pname.find("smoother: list (level",0) != 0 && pname.find("aggregation: list (level",0) != 0) - { - // Copy level-specific parameters (smoother and aggregation) - - // Scan pname (ex: pname="smoother: type (level 2)") - string type, option; - int levelID=-1; - { - typedef Teuchos::ArrayRCP::size_type size_type; // (!) - Teuchos::Array ctype (size_type(pname.size()+1)); - Teuchos::Array coption(size_type(pname.size()+1)); - - int matched = sscanf(pname.c_str(),"%s %[^(](level %d)", ctype.getRawPtr(), coption.getRawPtr(), &levelID); // use [^(] instead of %s to allow for strings with white-spaces (ex: "ifpack list") - type = string(ctype.getRawPtr()); - option = string(coption.getRawPtr()); option.resize(option.size () - 1); // remove final white-space - - if (matched != 3 || (type != "smoother:" && type != "aggregation:")) { - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::CreateSublist(), Line " << __LINE__ << ". " - << "Error in creating level-specific sublists" << std::endl - << "Offending parameter: " << pname << std::endl); - } - } - - // Create/grab the corresponding sublist of newList - ParameterList &newSubList = newList.sublist(type + " list (level " + Teuchos::toString(levelID) + ")"); - // Shove option w/o level number into sublist - newSubList.setEntry(type + " " + option,List.entry(param)); - - } else if (pname.find("coarse:",0) == 0 && pname != "coarse: list") { - // Copy coarse parameters - ParameterList &newCoarseList = newList.sublist("coarse: list"); // the coarse sublist is created only if there is at least one "coarse:" parameter - newCoarseList.setEntry("smoother: "+pname.substr(8),List.entry(param)); // change "coarse: " to "smoother:" - } // end if - - } // for - - } //MueLu::CreateSublist() - - // Usage: GetMLSubList(paramList, "smoother", 2); - const Teuchos::ParameterList & GetMLSubList(const Teuchos::ParameterList & paramList, const std::string & type, int levelID) { - static const Teuchos::ParameterList emptyParamList; - - char levelChar[11]; - sprintf(levelChar, "(level %d)", levelID); - std::string levelStr(levelChar); - - if (paramList.isSublist(type + ": list " + levelStr)) { - return paramList.sublist(type + ": list " + levelStr); - } else { - return emptyParamList; - } + // Create/grab the corresponding sublist of newList + ParameterList &newSubList = newList.sublist( + type + " list (level " + Teuchos::toString(levelID) + ")"); + // Shove option w/o level number into sublist + newSubList.setEntry(type + " " + option, List.entry(param)); + + } else if (pname.find("coarse:", 0) == 0 && pname != "coarse: list") { + // Copy coarse parameters + ParameterList &newCoarseList = newList.sublist( + "coarse: list"); // the coarse sublist is created only if there is at + // least one "coarse:" parameter + newCoarseList.setEntry( + "smoother: " + pname.substr(8), + List.entry(param)); // change "coarse: " to "smoother:" + } // end if + + } // for + +} // MueLu::CreateSublist() + +// Usage: GetMLSubList(paramList, "smoother", 2); +const Teuchos::ParameterList & +GetMLSubList(const Teuchos::ParameterList ¶mList, const std::string &type, + int levelID) { + static const Teuchos::ParameterList emptyParamList; + + char levelChar[11]; + sprintf(levelChar, "(level %d)", levelID); + std::string levelStr(levelChar); + + if (paramList.isSublist(type + ": list " + levelStr)) { + return paramList.sublist(type + ": list " + levelStr); + } else { + return emptyParamList; } - - // Extract all the parameters that begin with "str:" (but skip sublist) - Teuchos::RCP ExtractSetOfParameters(const Teuchos::ParameterList & paramList, const std::string & str) { - Teuchos::RCP subList = rcp(new Teuchos::ParameterList()); - - for (Teuchos::ParameterList::ConstIterator param = paramList.begin(); param != paramList.end(); ++param) { - const Teuchos::ParameterEntry & entry = paramList.entry(param); - const std::string & pname = paramList.name(param); - if (pname.find(str+":",0) == 0 && !entry.isList()) { - subList->setEntry(pname,entry); - } +} + +// Extract all the parameters that begin with "str:" (but skip sublist) +Teuchos::RCP +ExtractSetOfParameters(const Teuchos::ParameterList ¶mList, + const std::string &str) { + Teuchos::RCP subList = + rcp(new Teuchos::ParameterList()); + + for (Teuchos::ParameterList::ConstIterator param = paramList.begin(); + param != paramList.end(); ++param) { + const Teuchos::ParameterEntry &entry = paramList.entry(param); + const std::string &pname = paramList.name(param); + if (pname.find(str + ":", 0) == 0 && !entry.isList()) { + subList->setEntry(pname, entry); } - - return subList; } - // replace all string occurrences "from" with "to" in "str" - void replaceAll(std::string& str, const std::string& from, const std::string& to) { - if(from.empty()) - return; - size_t start_pos = 0; - while((start_pos = str.find(from, start_pos)) != std::string::npos) { - str.replace(start_pos, from.length(), to); - start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx' - } + return subList; +} + +// replace all string occurrences "from" with "to" in "str" +void replaceAll(std::string &str, const std::string &from, + const std::string &to) { + if (from.empty()) + return; + size_t start_pos = 0; + while ((start_pos = str.find(from, start_pos)) != std::string::npos) { + str.replace(start_pos, from.length(), to); + start_pos += to.length(); // In case 'to' contains 'from', like replacing + // 'x' with 'yx' } +} } // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp b/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp index 9d55b472ad02..82c46f22d075 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListUtils.hpp @@ -47,55 +47,65 @@ #ifndef MUELU_PARAMETERLISTUTILS_HPP #define MUELU_PARAMETERLISTUTILS_HPP -#include #include +#include +#include "MueLu_ConfigDefs.hpp" #include #include -#include "MueLu_ConfigDefs.hpp" namespace MueLu { - /* See also: ML_Epetra::UpdateList */ - void MergeParameterList(const Teuchos::ParameterList &source, Teuchos::ParameterList &dest, bool overWrite); - - void CreateSublists(const Teuchos::ParameterList &List, Teuchos::ParameterList &newList); - - // Usage: GetMLSubList(paramList, "smoother", 2); - const Teuchos::ParameterList & GetMLSubList(const Teuchos::ParameterList & paramList, const std::string & type, int levelID); - - // Extract all the parameters that begin with "str:" (but skip sublist) - Teuchos::RCP ExtractSetOfParameters(const Teuchos::ParameterList & paramList, const std::string & str); - - //! replace all string occurrences "from" with "to" in "str" - //! - //! @param str: input and output string - //! @param from: search string - //! @param to: replace with "to" - void replaceAll(std::string& str, const std::string& from, const std::string& to); - - //! templated version to replace placeholder by data in "str" - template - bool replacePlaceholder(std::string& str, const std::string& placeholder, Type data) { - std::stringstream s; - s << data; - replaceAll(str, placeholder, s.str()); - return true; - } - - template - bool actionInterpretParameter(Teuchos::ParameterList& mlParams, const std::string& paramName, std::string& str) { - - //MUELU_READ_PARAM(mlParams, paramName, int, 0, data); - - Type varName; // = defaultValue; // extract from master list - if (mlParams.isParameter(paramName)) varName = mlParams.get(paramName); - - std::stringstream placeholder; - placeholder << "$" << paramName << "$"; - - return MueLu::replacePlaceholder(str, placeholder.str(), varName); - } +/* See also: ML_Epetra::UpdateList */ +void MergeParameterList(const Teuchos::ParameterList &source, + Teuchos::ParameterList &dest, bool overWrite); + +void CreateSublists(const Teuchos::ParameterList &List, + Teuchos::ParameterList &newList); + +// Usage: GetMLSubList(paramList, "smoother", 2); +const Teuchos::ParameterList & +GetMLSubList(const Teuchos::ParameterList ¶mList, const std::string &type, + int levelID); + +// Extract all the parameters that begin with "str:" (but skip sublist) +Teuchos::RCP +ExtractSetOfParameters(const Teuchos::ParameterList ¶mList, + const std::string &str); + +//! replace all string occurrences "from" with "to" in "str" +//! +//! @param str: input and output string +//! @param from: search string +//! @param to: replace with "to" +void replaceAll(std::string &str, const std::string &from, + const std::string &to); + +//! templated version to replace placeholder by data in "str" +template +bool replacePlaceholder(std::string &str, const std::string &placeholder, + Type data) { + std::stringstream s; + s << data; + replaceAll(str, placeholder, s.str()); + return true; +} + +template +bool actionInterpretParameter(Teuchos::ParameterList &mlParams, + const std::string ¶mName, std::string &str) { + + // MUELU_READ_PARAM(mlParams, paramName, int, 0, data); + + Type varName; // = defaultValue; // extract from master list + if (mlParams.isParameter(paramName)) + varName = mlParams.get(paramName); + + std::stringstream placeholder; + placeholder << "$" << paramName << "$"; + + return MueLu::replacePlaceholder(str, placeholder.str(), varName); +} } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp index a41ee8b9ae26..64ae03c88cb6 100644 --- a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp @@ -46,112 +46,121 @@ #ifndef MUELU_AGGREGATEQUALITYESTIMATEFACTORY_DECL_HPP #define MUELU_AGGREGATEQUALITYESTIMATEFACTORY_DECL_HPP +#include "MueLu_AggregateQualityEstimateFactory_fwd.hpp" #include "MueLu_ConfigDefs.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" -#include "MueLu_AggregateQualityEstimateFactory_fwd.hpp" -#include -#include #include +#include #include -#include +#include #include - +#include #include "MueLu_Aggregates_fwd.hpp" #include "MueLu_Level_fwd.hpp" namespace MueLu { - /*! - @class AggregateQualityEstimateFactory class. - @brief An factory which assigns each aggregate a quality - estimate. Originally developed by Napov and Notay in the - context of plain aggregation, while this quality estimate - does not correspond to a robust convergence guarentee (as - it does for plain aggregation), we find empirically that - it is a good way of discovering poorly constructed aggregates - even in the smoothed aggregation context. - - Napov, A., & Notay, Y. (2012). An algebraic multigrid method - with guaranteed convergence rate. SIAM journal on scientific - computing, 34(2), A1079-A1109. - */ - - template - class AggregateQualityEstimateFactory : public SingleLevelFactoryBase { +/*! + @class AggregateQualityEstimateFactory class. + @brief An factory which assigns each aggregate a quality + estimate. Originally developed by Napov and Notay in the + context of plain aggregation, while this quality estimate + does not correspond to a robust convergence guarentee (as + it does for plain aggregation), we find empirically that + it is a good way of discovering poorly constructed aggregates + even in the smoothed aggregation context. + + Napov, A., & Notay, Y. (2012). An algebraic multigrid method + with guaranteed convergence rate. SIAM journal on scientific + computing, 34(2), A1079-A1109. +*/ + +template +class AggregateQualityEstimateFactory : public SingleLevelFactoryBase { #undef MUELU_AGGREGATEQUALITYESTIMATEFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - typedef typename Teuchos::ScalarTraits::magnitudeType magnitudeType; +public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - AggregateQualityEstimateFactory(); + typedef typename Teuchos::ScalarTraits::magnitudeType magnitudeType; - //! Destructor. - virtual ~AggregateQualityEstimateFactory(); + //! Constructor. + AggregateQualityEstimateFactory(); - //@} + //! Destructor. + virtual ~AggregateQualityEstimateFactory(); - RCP GetValidParameterList() const; + //@} - //! @name Input - //@{ + RCP GetValidParameterList() const; - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + //! @name Input + //@{ - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level ¤tLevel) const; + /*! @brief Specifies the data that this class needs, and the factories that + generate that data. - //@} - - //! @name Build methods. - //@{ - - //! Build aggregate quality esimates with this factory. - void Build(Level & currentLevel) const; - - //@} + If the Build method of this class requires some data, but the generating + factory is not specified in DeclareInput, then this class will fall back to + the settings in FactoryManager. + */ + void DeclareInput(Level ¤tLevel) const; - //! @name Utility method to convert aggregate data to a convenient format. - //@{ + //@} - //! Build aggregate quality esimates with this factory. - static void ConvertAggregatesData(RCP aggs, ArrayRCP& aggSortedVertices, ArrayRCP& aggsToIndices, ArrayRCP& aggSizes); + //! @name Build methods. + //@{ - //@} + //! Build aggregate quality esimates with this factory. + void Build(Level ¤tLevel) const; - private: + //@} - //! @name Internal method for computing aggregate quality. - //@{ + //! @name Utility method to convert aggregate data to a convenient format. + //@{ - void ComputeAggregateQualities(RCP A, RCP aggs, RCP> agg_qualities) const; + //! Build aggregate quality esimates with this factory. + static void ConvertAggregatesData(RCP aggs, + ArrayRCP &aggSortedVertices, + ArrayRCP &aggsToIndices, + ArrayRCP &aggSizes); - void ComputeAggregateSizes(RCP A, RCP aggs, RCP agg_sizes) const; + //@} - //@} +private: + //! @name Internal method for computing aggregate quality. + //@{ - //! @name Internal method for outputting aggregate quality - //@{ + void ComputeAggregateQualities( + RCP A, RCP aggs, + RCP> agg_qualities) + const; - void OutputAggQualities(const Level& level, RCP> agg_qualities) const; + void ComputeAggregateSizes(RCP A, RCP aggs, + RCP agg_sizes) const; - - void OutputAggSizes(const Level& level, RCP agg_sizes) const; + //@} + //! @name Internal method for outputting aggregate quality + //@{ + void + OutputAggQualities(const Level &level, + RCP> + agg_qualities) const; - //@} + void OutputAggSizes(const Level &level, + RCP agg_sizes) const; + //@} - }; // class AggregateQualityEsimateFactory(); +}; // class AggregateQualityEsimateFactory(); } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp index 44b760ea9ee0..b9f72dd27635 100644 --- a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp @@ -45,498 +45,558 @@ // @HEADER #ifndef MUELU_AGGREGATEQUALITYESTIMATEFACTORY_DEF_HPP #define MUELU_AGGREGATEQUALITYESTIMATEFACTORY_DEF_HPP -#include #include "MueLu_AggregateQualityEstimateFactory_decl.hpp" +#include #include "MueLu_Level.hpp" -#include #include +#include #include "MueLu_Aggregates_decl.hpp" -#include #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" #include "MueLu_Utilities.hpp" +#include #include namespace MueLu { - template - AggregateQualityEstimateFactory::AggregateQualityEstimateFactory() - { } - - template - AggregateQualityEstimateFactory::~AggregateQualityEstimateFactory() {} - - template - void AggregateQualityEstimateFactory::DeclareInput(Level& currentLevel) const { - - Input(currentLevel, "A"); - Input(currentLevel, "Aggregates"); - Input(currentLevel, "CoarseMap"); - - } - - template - RCP AggregateQualityEstimateFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregate qualities: good aggregate threshold"); - SET_VALID_ENTRY("aggregate qualities: file output"); - SET_VALID_ENTRY("aggregate qualities: file base"); - SET_VALID_ENTRY("aggregate qualities: check symmetry"); - SET_VALID_ENTRY("aggregate qualities: algorithm"); - SET_VALID_ENTRY("aggregate qualities: zero threshold"); - SET_VALID_ENTRY("aggregate qualities: percentiles"); - SET_VALID_ENTRY("aggregate qualities: mode"); - -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("Aggregates", Teuchos::null, "Generating factory of the aggregates"); - validParamList->set< RCP >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - - return validParamList; - } +template +AggregateQualityEstimateFactory::AggregateQualityEstimateFactory() {} +template +AggregateQualityEstimateFactory::~AggregateQualityEstimateFactory() {} - template - void AggregateQualityEstimateFactory::Build(Level & currentLevel) const { +template +void AggregateQualityEstimateFactory::DeclareInput(Level ¤tLevel) + const { - FactoryMonitor m(*this, "Build", currentLevel); + Input(currentLevel, "A"); + Input(currentLevel, "Aggregates"); + Input(currentLevel, "CoarseMap"); +} - RCP A = Get>(currentLevel, "A"); - RCP aggregates = Get>(currentLevel, "Aggregates"); +template +RCP +AggregateQualityEstimateFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregate qualities: good aggregate threshold"); + SET_VALID_ENTRY("aggregate qualities: file output"); + SET_VALID_ENTRY("aggregate qualities: file base"); + SET_VALID_ENTRY("aggregate qualities: check symmetry"); + SET_VALID_ENTRY("aggregate qualities: algorithm"); + SET_VALID_ENTRY("aggregate qualities: zero threshold"); + SET_VALID_ENTRY("aggregate qualities: percentiles"); + SET_VALID_ENTRY("aggregate qualities: mode"); + +#undef SET_VALID_ENTRY + + validParamList->set>( + "A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>( + "Aggregates", Teuchos::null, "Generating factory of the aggregates"); + validParamList->set>( + "CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + + return validParamList; +} - RCP map = Get< RCP >(currentLevel, "CoarseMap"); +template +void AggregateQualityEstimateFactory::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); - assert(!aggregates->AggregatesCrossProcessors()); - ParameterList pL = GetParameterList(); - std::string mode = pL.get("aggregate qualities: mode"); - GetOStream(Statistics1) << "AggregateQuality: mode "< A = Get>(currentLevel, "A"); + RCP aggregates = Get>(currentLevel, "Aggregates"); - RCP> aggregate_qualities; - if(mode == "eigenvalue" || mode == "both") { - aggregate_qualities = Xpetra::MultiVectorFactory::Build(map, 1); - ComputeAggregateQualities(A, aggregates, aggregate_qualities); - OutputAggQualities(currentLevel, aggregate_qualities); - } - if(mode == "size" || mode =="both") { - RCP aggregate_sizes = Xpetra::VectorFactory::Build(map); - ComputeAggregateSizes(A,aggregates,aggregate_sizes); - Set(currentLevel, "AggregateSizes",aggregate_sizes); - OutputAggSizes(currentLevel, aggregate_sizes); - } - Set(currentLevel, "AggregateQualities", aggregate_qualities); + RCP map = Get>(currentLevel, "CoarseMap"); + assert(!aggregates->AggregatesCrossProcessors()); + ParameterList pL = GetParameterList(); + std::string mode = pL.get("aggregate qualities: mode"); + GetOStream(Statistics1) << "AggregateQuality: mode " << mode << std::endl; + RCP> aggregate_qualities; + if (mode == "eigenvalue" || mode == "both") { + aggregate_qualities = + Xpetra::MultiVectorFactory::Build(map, 1); + ComputeAggregateQualities(A, aggregates, aggregate_qualities); + OutputAggQualities(currentLevel, aggregate_qualities); } - - template - void AggregateQualityEstimateFactory::ConvertAggregatesData(RCP aggs, ArrayRCP& aggSortedVertices, ArrayRCP& aggsToIndices, ArrayRCP& aggSizes) { - - // Reorder local aggregate information into a format amenable to computing - // per-aggregate quantities. Specifically, we compute a format - // similar to compressed sparse row format for sparse matrices in which - // we store all the local vertices in a single array in blocks corresponding - // to aggregates. (This array is aggSortedVertices.) We then store a second - // array (aggsToIndices) whose k-th element stores the index of the first - // vertex in aggregate k in the array aggSortedVertices. - - const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); - const LO LO_ONE = Teuchos::OrdinalTraits::one(); - - LO numAggs = aggs->GetNumAggregates(); - aggSizes = aggs->ComputeAggregateSizesArrayRCP(); - - aggsToIndices = ArrayRCP(numAggs+LO_ONE,LO_ZERO); - - for (LO i=0;i vertex2AggId = aggs->GetVertex2AggId(); - const ArrayRCP vertex2AggIdData = vertex2AggId->getData(0); - - LO numNodes = vertex2AggId->getLocalLength(); - aggSortedVertices = ArrayRCP(numNodes,-LO_ONE); - std::vector vertexInsertionIndexByAgg(numNodes,LO_ZERO); - - for (LO i=0;i=numAggs) continue; - - aggSortedVertices[aggsToIndices[aggId]+vertexInsertionIndexByAgg[aggId]] = i; - vertexInsertionIndexByAgg[aggId]++; - - } - - + if (mode == "size" || mode == "both") { + RCP aggregate_sizes = + Xpetra::VectorFactory::Build(map); + ComputeAggregateSizes(A, aggregates, aggregate_sizes); + Set(currentLevel, "AggregateSizes", aggregate_sizes); + OutputAggSizes(currentLevel, aggregate_sizes); } + Set(currentLevel, "AggregateQualities", aggregate_qualities); +} - template - void AggregateQualityEstimateFactory::ComputeAggregateQualities(RCP A, RCP aggs, RCP> agg_qualities) const { +template +void AggregateQualityEstimateFactory< + Scalar, LocalOrdinal, GlobalOrdinal, + Node>::ConvertAggregatesData(RCP aggs, + ArrayRCP &aggSortedVertices, + ArrayRCP &aggsToIndices, + ArrayRCP &aggSizes) { + + // Reorder local aggregate information into a format amenable to computing + // per-aggregate quantities. Specifically, we compute a format + // similar to compressed sparse row format for sparse matrices in which + // we store all the local vertices in a single array in blocks corresponding + // to aggregates. (This array is aggSortedVertices.) We then store a second + // array (aggsToIndices) whose k-th element stores the index of the first + // vertex in aggregate k in the array aggSortedVertices. + + const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); + const LO LO_ONE = Teuchos::OrdinalTraits::one(); + + LO numAggs = aggs->GetNumAggregates(); + aggSizes = aggs->ComputeAggregateSizesArrayRCP(); + + aggsToIndices = ArrayRCP(numAggs + LO_ONE, LO_ZERO); + + for (LO i = 0; i < numAggs; ++i) { + aggsToIndices[i + LO_ONE] = aggsToIndices[i] + aggSizes[i]; + } - const SC SCALAR_ONE = Teuchos::ScalarTraits::one(); - const SC SCALAR_TWO = SCALAR_ONE + SCALAR_ONE; + const RCP vertex2AggId = aggs->GetVertex2AggId(); + const ArrayRCP vertex2AggIdData = vertex2AggId->getData(0); - const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); - const LO LO_ONE = Teuchos::OrdinalTraits::one(); + LO numNodes = vertex2AggId->getLocalLength(); + aggSortedVertices = ArrayRCP(numNodes, -LO_ONE); + std::vector vertexInsertionIndexByAgg(numNodes, LO_ZERO); - using MT = magnitudeType; - const MT MT_ZERO = Teuchos::ScalarTraits::zero(); - const MT MT_ONE = Teuchos::ScalarTraits::one(); - ParameterList pL = GetParameterList(); + for (LO i = 0; i < numNodes; ++i) { - RCP AT = A; + LO aggId = vertex2AggIdData[i]; + if (aggId < 0 || aggId >= numAggs) + continue; - // Algorithm check - std::string algostr = pL.get("aggregate qualities: algorithm"); - MT zeroThreshold = Teuchos::as(pL.get("aggregate qualities: zero threshold")); - enum AggAlgo {ALG_FORWARD=0, ALG_REVERSE}; - AggAlgo algo; - if(algostr == "forward") {algo = ALG_FORWARD; GetOStream(Statistics1) << "AggregateQuality: Using 'forward' algorithm" << std::endl;} - else if(algostr == "reverse") {algo = ALG_REVERSE; GetOStream(Statistics1) << "AggregateQuality: Using 'reverse' algorithm" << std::endl;} - else { - TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "\"algorithm\" must be one of (forward|reverse)"); - } + aggSortedVertices[aggsToIndices[aggId] + vertexInsertionIndexByAgg[aggId]] = + i; + vertexInsertionIndexByAgg[aggId]++; + } +} - bool check_symmetry = pL.get("aggregate qualities: check symmetry"); - if (check_symmetry) { +template +void AggregateQualityEstimateFactory:: + ComputeAggregateQualities( + RCP A, RCP aggs, + RCP> agg_qualities) + const { + + const SC SCALAR_ONE = Teuchos::ScalarTraits::one(); + const SC SCALAR_TWO = SCALAR_ONE + SCALAR_ONE; + + const LO LO_ZERO = Teuchos::OrdinalTraits::zero(); + const LO LO_ONE = Teuchos::OrdinalTraits::one(); + + using MT = magnitudeType; + const MT MT_ZERO = Teuchos::ScalarTraits::zero(); + const MT MT_ONE = Teuchos::ScalarTraits::one(); + ParameterList pL = GetParameterList(); + + RCP AT = A; + + // Algorithm check + std::string algostr = pL.get("aggregate qualities: algorithm"); + MT zeroThreshold = + Teuchos::as(pL.get("aggregate qualities: zero threshold")); + enum AggAlgo { ALG_FORWARD = 0, ALG_REVERSE }; + AggAlgo algo; + if (algostr == "forward") { + algo = ALG_FORWARD; + GetOStream(Statistics1) + << "AggregateQuality: Using 'forward' algorithm" << std::endl; + } else if (algostr == "reverse") { + algo = ALG_REVERSE; + GetOStream(Statistics1) + << "AggregateQuality: Using 'reverse' algorithm" << std::endl; + } else { + TEUCHOS_TEST_FOR_EXCEPTION( + 1, Exceptions::RuntimeError, + "\"algorithm\" must be one of (forward|reverse)"); + } - RCP x = MultiVectorFactory::Build(A->getMap(), 1, false); - x->Xpetra_randomize(); + bool check_symmetry = pL.get("aggregate qualities: check symmetry"); + if (check_symmetry) { - RCP tmp = MultiVectorFactory::Build(A->getMap(), 1, false); + RCP x = MultiVectorFactory::Build(A->getMap(), 1, false); + x->Xpetra_randomize(); - A->apply(*x, *tmp, Teuchos::NO_TRANS); // tmp now stores A*x - A->apply(*x, *tmp, Teuchos::TRANS, -SCALAR_ONE, SCALAR_ONE); // tmp now stores A*x - A^T*x + RCP tmp = MultiVectorFactory::Build(A->getMap(), 1, false); - Array tmp_norm(1); - tmp->norm2(tmp_norm()); + A->apply(*x, *tmp, Teuchos::NO_TRANS); // tmp now stores A*x + A->apply(*x, *tmp, Teuchos::TRANS, -SCALAR_ONE, + SCALAR_ONE); // tmp now stores A*x - A^T*x - Array x_norm(1); - tmp->norm2(x_norm()); + Array tmp_norm(1); + tmp->norm2(tmp_norm()); - if (tmp_norm[0] > 1e-10*x_norm[0]) { - std::string transpose_string = "transpose"; - RCP whatever; - AT = Utilities::Transpose(*rcp_const_cast(A), true, transpose_string, whatever); + Array x_norm(1); + tmp->norm2(x_norm()); - assert(A->getMap()->isSameAs( *(AT->getMap()) )); - } + if (tmp_norm[0] > 1e-10 * x_norm[0]) { + std::string transpose_string = "transpose"; + RCP whatever; + AT = Utilities::Transpose(*rcp_const_cast(A), true, + transpose_string, whatever); + assert(A->getMap()->isSameAs(*(AT->getMap()))); } + } - // Reorder local aggregate information into a format amenable to computing - // per-aggregate quantities. Specifically, we compute a format - // similar to compressed sparse row format for sparse matrices in which - // we store all the local vertices in a single array in blocks corresponding - // to aggregates. (This array is aggSortedVertices.) We then store a second - // array (aggsToIndices) whose k-th element stores the index of the first - // vertex in aggregate k in the array aggSortedVertices. - - ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; - ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); - - LO numAggs = aggs->GetNumAggregates(); + // Reorder local aggregate information into a format amenable to computing + // per-aggregate quantities. Specifically, we compute a format + // similar to compressed sparse row format for sparse matrices in which + // we store all the local vertices in a single array in blocks corresponding + // to aggregates. (This array is aggSortedVertices.) We then store a second + // array (aggsToIndices) whose k-th element stores the index of the first + // vertex in aggregate k in the array aggSortedVertices. - // Compute the per-aggregate quality estimate + ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; + ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); - typedef Teuchos::SerialDenseMatrix DenseMatrix; - typedef Teuchos::SerialDenseVector DenseVector; + LO numAggs = aggs->GetNumAggregates(); - ArrayView rowIndices; - ArrayView rowValues; - ArrayView colValues; - Teuchos::LAPACK myLapack; + // Compute the per-aggregate quality estimate - // Iterate over each aggregate to compute the quality estimate - for (LO aggId=LO_ZERO; aggId DenseMatrix; + typedef Teuchos::SerialDenseVector DenseVector; - LO aggSize = aggSizes[aggId]; - DenseMatrix A_aggPart(aggSize, aggSize, true); - DenseVector offDiagonalAbsoluteSums(aggSize, true); + ArrayView rowIndices; + ArrayView rowValues; + ArrayView colValues; + Teuchos::LAPACK myLapack; - // Iterate over each node in the aggregate - for (LO idx=LO_ZERO; idxgetLocalRowView(nodeId, rowIndices, rowValues); - AT->getLocalRowView(nodeId, rowIndices, colValues); + LO aggSize = aggSizes[aggId]; + DenseMatrix A_aggPart(aggSize, aggSize, true); + DenseVector offDiagonalAbsoluteSums(aggSize, true); - // Iterate over each element in the row corresponding to the current node - for (LO elem=LO_ZERO; elemgetLocalRowView(nodeId, rowIndices, rowValues); + AT->getLocalRowView(nodeId, rowIndices, colValues); - LO idxInAgg = -LO_ONE; // -1 if element is not in aggregate + // Iterate over each element in the row corresponding to the current node + for (LO elem = LO_ZERO; elem < rowIndices.size(); ++elem) { - // Check whether the element belongs in the aggregate. If it does - // find, its index. Otherwise, add it's value to the off diagonal - // sums - for (LO idx2=LO_ZERO; idx2::magnitude(val); - - } else { // Element does belong to aggregate + if (idxInAgg == -LO_ONE) { // Element does not belong to aggregate - A_aggPart(idx,idxInAgg) = Teuchos::ScalarTraits::real(val); + offDiagonalAbsoluteSums[idx] += + Teuchos::ScalarTraits::magnitude(val); - } + } else { // Element does belong to aggregate + A_aggPart(idx, idxInAgg) = Teuchos::ScalarTraits::real(val); } - } + } - // Construct a diagonal matrix consisting of the diagonal - // of A_aggPart - DenseMatrix A_aggPartDiagonal(aggSize, aggSize, true); - MT diag_sum = MT_ZERO; - for (int i=0;i::real(A_aggPart(i,i)); - diag_sum += Teuchos::ScalarTraits::real(A_aggPart(i,i)); - } - - DenseMatrix ones(aggSize, aggSize, false); - ones.putScalar(MT_ONE); - - // Compute matrix on top of generalized Rayleigh quotient - // topMatrix = A_aggPartDiagonal - A_aggPartDiagonal*ones*A_aggPartDiagonal/diag_sum; - DenseMatrix tmp(aggSize, aggSize, false); - DenseMatrix topMatrix(A_aggPartDiagonal); - - tmp.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, MT_ONE, ones, A_aggPartDiagonal, MT_ZERO); - topMatrix.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -MT_ONE/diag_sum, A_aggPartDiagonal, tmp, MT_ONE); - - // Compute matrix on bottom of generalized Rayleigh quotient - DenseMatrix bottomMatrix(A_aggPart); - MT matrixNorm = A_aggPart.normInf(); + // Construct a diagonal matrix consisting of the diagonal + // of A_aggPart + DenseMatrix A_aggPartDiagonal(aggSize, aggSize, true); + MT diag_sum = MT_ZERO; + for (int i = 0; i < aggSize; ++i) { + A_aggPartDiagonal(i, i) = + Teuchos::ScalarTraits::real(A_aggPart(i, i)); + diag_sum += Teuchos::ScalarTraits::real(A_aggPart(i, i)); + } - // Forward mode: Include a small perturbation to the bottom matrix to make it nonsingular - const MT boost = (algo == ALG_FORWARD) ? (-1e4*Teuchos::ScalarTraits::eps()*matrixNorm) : MT_ZERO; + DenseMatrix ones(aggSize, aggSize, false); + ones.putScalar(MT_ONE); + + // Compute matrix on top of generalized Rayleigh quotient + // topMatrix = A_aggPartDiagonal - + // A_aggPartDiagonal*ones*A_aggPartDiagonal/diag_sum; + DenseMatrix tmp(aggSize, aggSize, false); + DenseMatrix topMatrix(A_aggPartDiagonal); + + tmp.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, MT_ONE, ones, + A_aggPartDiagonal, MT_ZERO); + topMatrix.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -MT_ONE / diag_sum, + A_aggPartDiagonal, tmp, MT_ONE); + + // Compute matrix on bottom of generalized Rayleigh quotient + DenseMatrix bottomMatrix(A_aggPart); + MT matrixNorm = A_aggPart.normInf(); + + // Forward mode: Include a small perturbation to the bottom matrix to make + // it nonsingular + const MT boost = + (algo == ALG_FORWARD) + ? (-1e4 * Teuchos::ScalarTraits::eps() * matrixNorm) + : MT_ZERO; + + for (int i = 0; i < aggSize; ++i) { + bottomMatrix(i, i) -= offDiagonalAbsoluteSums(i) + boost; + } - for (int i=0;igetDataNonConst(0))[aggId] = (MT_ONE+MT_ONE)*maxEigenVal; - } - else { - // Reverse: Swap the top and bottom matrices for the generalized eigenvalue problem - // This is trickier, since we need to grab the smallest non-zero eigenvalue and invert it. - myLapack.GGES(compute_flag,compute_flag,compute_flag,ptr2func,aggSize, - bottomMatrix.values(),aggSize,topMatrix.values(),aggSize,&sdim, - alpha_real.values(),alpha_imag.values(),beta.values(),vl,aggSize, - vr,aggSize,workArray.values(),workArray.length(),bwork, - &info); - - TEUCHOS_ASSERT(info == LO_ZERO); - - MT minEigenVal = MT_ZERO; - - for (int i=LO_ZERO;i zeroThreshold) { - if (minEigenVal == MT_ZERO) minEigenVal = ev; - else minEigenVal = std::min(minEigenVal,ev); - } + (agg_qualities->getDataNonConst(0))[aggId] = + (MT_ONE + MT_ONE) * maxEigenVal; + } else { + // Reverse: Swap the top and bottom matrices for the generalized + // eigenvalue problem This is trickier, since we need to grab the smallest + // non-zero eigenvalue and invert it. + myLapack.GGES(compute_flag, compute_flag, compute_flag, ptr2func, aggSize, + bottomMatrix.values(), aggSize, topMatrix.values(), aggSize, + &sdim, alpha_real.values(), alpha_imag.values(), + beta.values(), vl, aggSize, vr, aggSize, workArray.values(), + workArray.length(), bwork, &info); + + TEUCHOS_ASSERT(info == LO_ZERO); + + MT minEigenVal = MT_ZERO; + + for (int i = LO_ZERO; i < aggSize; ++i) { + MT ev = alpha_real[i] / beta[i]; + if (ev > zeroThreshold) { + if (minEigenVal == MT_ZERO) + minEigenVal = ev; + else + minEigenVal = std::min(minEigenVal, ev); } - if(minEigenVal == MT_ZERO) (agg_qualities->getDataNonConst(0))[aggId] = Teuchos::ScalarTraits::rmax(); - else (agg_qualities->getDataNonConst(0))[aggId] = (MT_ONE+MT_ONE) / minEigenVal; } - }//end aggId loop - } - - template - void AggregateQualityEstimateFactory::OutputAggQualities(const Level& level, RCP> agg_qualities) const { - - ParameterList pL = GetParameterList(); - - magnitudeType good_agg_thresh = Teuchos::as(pL.get("aggregate qualities: good aggregate threshold")); - using MT = magnitudeType; - - ArrayRCP data = agg_qualities->getData(0); + if (minEigenVal == MT_ZERO) + (agg_qualities->getDataNonConst(0))[aggId] = + Teuchos::ScalarTraits::rmax(); + else + (agg_qualities->getDataNonConst(0))[aggId] = + (MT_ONE + MT_ONE) / minEigenVal; + } + } // end aggId loop +} - LO num_bad_aggs = 0; - MT worst_agg = 0.0; +template +void AggregateQualityEstimateFactory:: + OutputAggQualities( + const Level &level, + RCP> + agg_qualities) const { - MT mean_bad_agg = 0.0; - MT mean_good_agg = 0.0; + ParameterList pL = GetParameterList(); + magnitudeType good_agg_thresh = Teuchos::as( + pL.get("aggregate qualities: good aggregate threshold")); + using MT = magnitudeType; - for (size_t i=0;igetLocalLength();++i) { + ArrayRCP data = agg_qualities->getData(0); - if (data[i] > good_agg_thresh) { - num_bad_aggs++; - mean_bad_agg += data[i]; - } - else { - mean_good_agg += data[i]; - } - worst_agg = std::max(worst_agg, data[i]); - } + LO num_bad_aggs = 0; + MT worst_agg = 0.0; + MT mean_bad_agg = 0.0; + MT mean_good_agg = 0.0; - if (num_bad_aggs > 0) mean_bad_agg /= num_bad_aggs; - mean_good_agg /= agg_qualities->getLocalLength() - num_bad_aggs; + for (size_t i = 0; i < agg_qualities->getLocalLength(); ++i) { - if (num_bad_aggs == 0) { - GetOStream(Statistics1) << "All aggregates passed the quality measure. Worst aggregate had quality " << worst_agg << ". Mean aggregate quality " << mean_good_agg << "." << std::endl; + if (data[i] > good_agg_thresh) { + num_bad_aggs++; + mean_bad_agg += data[i]; } else { - GetOStream(Statistics1) << num_bad_aggs << " out of " << agg_qualities->getLocalLength() << " did not pass the quality measure. Worst aggregate had quality " << worst_agg << ". " - << "Mean bad aggregate quality " << mean_bad_agg << ". Mean good aggregate quality " << mean_good_agg << "." << std::endl; + mean_good_agg += data[i]; } + worst_agg = std::max(worst_agg, data[i]); + } - if (pL.get("aggregate qualities: file output")) { - std::string filename = pL.get("aggregate qualities: file base")+"."+std::to_string(level.GetLevelID()); - Xpetra::IO::Write(filename, *agg_qualities); - } + if (num_bad_aggs > 0) + mean_bad_agg /= num_bad_aggs; + mean_good_agg /= agg_qualities->getLocalLength() - num_bad_aggs; + + if (num_bad_aggs == 0) { + GetOStream(Statistics1) << "All aggregates passed the quality measure. " + "Worst aggregate had quality " + << worst_agg << ". Mean aggregate quality " + << mean_good_agg << "." << std::endl; + } else { + GetOStream(Statistics1) + << num_bad_aggs << " out of " << agg_qualities->getLocalLength() + << " did not pass the quality measure. Worst aggregate had quality " + << worst_agg << ". " + << "Mean bad aggregate quality " << mean_bad_agg + << ". Mean good aggregate quality " << mean_good_agg << "." + << std::endl; + } - { - const auto n = size_t(agg_qualities->getLocalLength()); + if (pL.get("aggregate qualities: file output")) { + std::string filename = + pL.get("aggregate qualities: file base") + "." + + std::to_string(level.GetLevelID()); + Xpetra::IO::Write(filename, *agg_qualities); + } - std::vector tmp; - tmp.reserve(n); + { + const auto n = size_t(agg_qualities->getLocalLength()); - for (size_t i=0; i tmp; + tmp.reserve(n); - std::sort(tmp.begin(), tmp.end()); + for (size_t i = 0; i < n; ++i) { + tmp.push_back(data[i]); + } - Teuchos::ArrayView percents = pL.get >("aggregate qualities: percentiles")(); + std::sort(tmp.begin(), tmp.end()); - GetOStream(Statistics1) << "AGG QUALITY HEADER : | LEVEL | TOTAL |"; - for (auto percent : percents) { - GetOStream(Statistics1) << std::fixed << std::setprecision(4) <<100.0*percent << "% |"; - } - GetOStream(Statistics1) << std::endl; - - GetOStream(Statistics1) << "AGG QUALITY PERCENTILES: | " << level.GetLevelID() << " | " << n << "|"; - for (auto percent : percents) { - size_t i = size_t(n*percent); - i = i < n ? i : n-1u; - i = i > 0u ? i : 0u; - GetOStream(Statistics1) << std::fixed < percents = + pL.get>("aggregate qualities: percentiles")(); + GetOStream(Statistics1) << "AGG QUALITY HEADER : | LEVEL | TOTAL |"; + for (auto percent : percents) { + GetOStream(Statistics1) + << std::fixed << std::setprecision(4) << 100.0 * percent << "% |"; } + GetOStream(Statistics1) << std::endl; + + GetOStream(Statistics1) << "AGG QUALITY PERCENTILES: | " + << level.GetLevelID() << " | " << n << "|"; + for (auto percent : percents) { + size_t i = size_t(n * percent); + i = i < n ? i : n - 1u; + i = i > 0u ? i : 0u; + GetOStream(Statistics1) + << std::fixed << std::setprecision(4) << tmp[i] << " |"; + } + GetOStream(Statistics1) << std::endl; } - - +} template - void AggregateQualityEstimateFactory::ComputeAggregateSizes(RCP A, RCP aggs, RCP agg_sizes) const { - - ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; - ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); - - // Iterate over each node in the aggregate - auto data = agg_sizes->getDataNonConst(0); - for (LO i=0; i<(LO)aggSizes.size(); i++) - data[i] = aggSizes[i]; +void AggregateQualityEstimateFactory< + Scalar, LocalOrdinal, GlobalOrdinal, + Node>::ComputeAggregateSizes(RCP A, + RCP aggs, + RCP agg_sizes) const { + + ArrayRCP aggSortedVertices, aggsToIndices, aggSizes; + ConvertAggregatesData(aggs, aggSortedVertices, aggsToIndices, aggSizes); + + // Iterate over each node in the aggregate + auto data = agg_sizes->getDataNonConst(0); + for (LO i = 0; i < (LO)aggSizes.size(); i++) + data[i] = aggSizes[i]; } - - template - void AggregateQualityEstimateFactory::OutputAggSizes(const Level& level, RCP agg_sizes) const { +void AggregateQualityEstimateFactory< + Scalar, LocalOrdinal, GlobalOrdinal, + Node>::OutputAggSizes(const Level &level, + RCP agg_sizes) const { - ParameterList pL = GetParameterList(); - using MT = magnitudeType; + ParameterList pL = GetParameterList(); + using MT = magnitudeType; - ArrayRCP data = agg_sizes->getData(0); + ArrayRCP data = agg_sizes->getData(0); + if (pL.get("aggregate qualities: file output")) { + std::string filename = + pL.get("aggregate qualities: file base") + ".sizes." + + std::to_string(level.GetLevelID()); + Xpetra::IO::Write(filename, *agg_sizes); + } - if (pL.get("aggregate qualities: file output")) { - std::string filename = pL.get("aggregate qualities: file base")+".sizes."+std::to_string(level.GetLevelID()); - Xpetra::IO::Write(filename, *agg_sizes); - } - - { - size_t n = (size_t)agg_sizes->getLocalLength(); - - std::vector tmp; - tmp.reserve(n); + { + size_t n = (size_t)agg_sizes->getLocalLength(); - for (size_t i=0; i(data[i])); - } + std::vector tmp; + tmp.reserve(n); - std::sort(tmp.begin(), tmp.end()); + for (size_t i = 0; i < n; ++i) { + tmp.push_back(Teuchos::as(data[i])); + } - Teuchos::ArrayView percents = pL.get >("aggregate qualities: percentiles")(); + std::sort(tmp.begin(), tmp.end()); - GetOStream(Statistics1) << "AGG SIZE HEADER : | LEVEL | TOTAL |"; - for (auto percent : percents) { - GetOStream(Statistics1) << std::fixed << std::setprecision(4) <<100.0*percent << "% |"; - } - GetOStream(Statistics1) << std::endl; - - GetOStream(Statistics1) << "AGG SIZE PERCENTILES: | " << level.GetLevelID() << " | " << n << "|"; - for (auto percent : percents) { - size_t i = size_t(n*percent); - i = i < n ? i : n-1u; - i = i > 0u ? i : 0u; - GetOStream(Statistics1) << std::fixed < percents = + pL.get>("aggregate qualities: percentiles")(); + GetOStream(Statistics1) << "AGG SIZE HEADER : | LEVEL | TOTAL |"; + for (auto percent : percents) { + GetOStream(Statistics1) + << std::fixed << std::setprecision(4) << 100.0 * percent << "% |"; + } + GetOStream(Statistics1) << std::endl; + + GetOStream(Statistics1) << "AGG SIZE PERCENTILES: | " + << level.GetLevelID() << " | " << n << "|"; + for (auto percent : percents) { + size_t i = size_t(n * percent); + i = i < n ? i : n - 1u; + i = i > 0u ? i : 0u; + GetOStream(Statistics1) + << std::fixed << std::setprecision(4) << tmp[i] << " |"; } + GetOStream(Statistics1) << std::endl; } - - +} } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp index 06c3a5ed6917..ea54de3a7f2c 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_decl.hpp @@ -48,8 +48,8 @@ #include "MueLu_ConfigDefs.hpp" #include "MueLu_TwoLevelFactoryBase.hpp" -#include "Xpetra_MultiVector_fwd.hpp" #include "Xpetra_Matrix_fwd.hpp" +#include "Xpetra_MultiVector_fwd.hpp" #include "MueLu_BlockedCoordinatesTransferFactory_fwd.hpp" @@ -57,100 +57,109 @@ namespace MueLu { /*! @class BlockedCoordinatesTransferFactory class. - @brief Class for transferring coordinates from a finer level to a coarser one for BlockedCrsMatrices. - This basically combines the Coordinates generated by each separate block + @brief Class for transferring coordinates from a finer level to a coarser one + for BlockedCrsMatrices. This basically combines the Coordinates generated by + each separate block ## Input/output of BlockedCoordinatesTransferFactory ## ### User parameters of BlockedCoordinatesTransferFactory ### Parameter | type | default | master.xml | validated | requested | description ----------|------|---------|:----------:|:---------:|:---------:|------------ - | BlockedCoordinates| Factory | null | | * | (*) | Factory providing coordinates - | Aggregates | Factory | null | | * | (*) | Factory providing aggregates - | CoarseMap | Factory | null | | * | (*) | Generating factory of the coarse map - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see BlockedCoordinatesTransferFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see BlockedCoordinatesTransferFactory::DeclareInput). - - The BlockedCoordinatesTransferFact first checks whether there is already valid coarse coordinates information - available on the coarse level. If that is the case, we can skip the coordinate transfer and just reuse - the available information. - Otherwise we try to build coarse grid coordinates by using the information from the sub-factories. + | BlockedCoordinates| Factory | null | | * | (*) | Factory providing + coordinates | Aggregates | Factory | null | | * | (*) | Factory providing + aggregates | CoarseMap | Factory | null | | * | (*) | Generating factory of + the coarse map + + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + BlockedCoordinatesTransferFactory::GetValidParameters).
The * in the @c + requested column states that the data is requested as input with all + dependencies (see BlockedCoordinatesTransferFactory::DeclareInput). + + The BlockedCoordinatesTransferFact first checks whether there is already valid + coarse coordinates information available on the coarse level. If that is the + case, we can skip the coordinate transfer and just reuse the available + information. Otherwise we try to build coarse grid coordinates by using the + information from the sub-factories. ### Variables provided by BlockedCoordinatesTransferFactory ### - After BlockedCoordinatesTransferFactory::Build the following data is available (if requested) + After BlockedCoordinatesTransferFactory::Build the following data is available + (if requested) Parameter | generated by | description ----------|--------------|------------ - | Coordinates | BlockedCoordinatesTransferFactory | coarse level coordinates (unified) + | Coordinates | BlockedCoordinatesTransferFactory | coarse level coordinates + (unified) */ - template - class BlockedCoordinatesTransferFactory : public TwoLevelFactoryBase { +template +class BlockedCoordinatesTransferFactory : public TwoLevelFactoryBase { #undef MUELU_BLOCKEDCOORDINATESTRANSFERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - /*! @brief Constructor. +public: + //! @name Constructors/Destructors. + //@{ - @param vectorName The name of the quantity to be restricted. - @param restrictionName The name of the restriction Matrix. + /*! @brief Constructor. - The operator associated with projectionName will be applied to the MultiVector associated with - vectorName. - */ - BlockedCoordinatesTransferFactory() { } + @param vectorName The name of the quantity to be restricted. + @param restrictionName The name of the restriction Matrix. - //! Destructor. - virtual ~BlockedCoordinatesTransferFactory() { } + The operator associated with projectionName will be applied to the + MultiVector associated with vectorName. + */ + BlockedCoordinatesTransferFactory() {} - RCP GetValidParameterList() const; + //! Destructor. + virtual ~BlockedCoordinatesTransferFactory() {} - //@} + RCP GetValidParameterList() const; - //! @name Input - //@{ + //@} - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + //! @name Input + //@{ - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + /*! @brief Specifies the data that this class needs, and the factories that + generate that data. - //@} + If the Build method of this class requires some data, but the generating + factory is not specified in DeclareInput, then this class will fall back to + the settings in FactoryManager. + */ + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //! @name Build methods. - //@{ + //@} - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! @name Build methods. + //@{ - //@} + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - //@{ - /*! @brief Add (sub) coords factory in the end of list of factories in BlockedCoordinatesTransferFactory. + //@} - */ - void AddFactory(const RCP& factory); + //@{ + /*! @brief Add (sub) coords factory in the end of list of factories in + BlockedCoordinatesTransferFactory. + */ + void AddFactory(const RCP &factory); - //! Returns number of sub factories. - size_t NumFactories() const { return subFactories_.size(); } + //! Returns number of sub factories. + size_t NumFactories() const { return subFactories_.size(); } - //@} - private: - //! list of user-defined sub Factories - std::vector > subFactories_; + //@} +private: + //! list of user-defined sub Factories + std::vector> subFactories_; - }; // class BlockedCoordinatesTransferFactory +}; // class BlockedCoordinatesTransferFactory } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp index e83e92696def..893bc9da51a7 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedCoordinatesTransferFactory_def.hpp @@ -46,10 +46,10 @@ #ifndef MUELU_BLOCKEDCOORDINATESTRANSFER_FACTORY_DEF_HPP #define MUELU_BLOCKEDCOORDINATESTRANSFER_FACTORY_DEF_HPP +#include "Xpetra_IO.hpp" #include "Xpetra_ImportFactory.hpp" -#include "Xpetra_MultiVectorFactory.hpp" #include "Xpetra_MapFactory.hpp" -#include "Xpetra_IO.hpp" +#include "Xpetra_MultiVectorFactory.hpp" #include "MueLu_BlockedCoordinatesTransferFactory_decl.hpp" @@ -58,109 +58,141 @@ namespace MueLu { - template - RCP BlockedCoordinatesTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set >("Coordinates", Teuchos::null, "Factory for coordinates generation"); - validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - return validParamList; +template +RCP +BlockedCoordinatesTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set>( + "Coordinates", Teuchos::null, "Factory for coordinates generation"); + validParamList->set>( + "CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + return validParamList; +} + +template +void BlockedCoordinatesTransferFactory< + Scalar, LocalOrdinal, GlobalOrdinal, + Node>::DeclareInput(Level & /* fineLevel */, Level &coarseLevel) const { + Input(coarseLevel, "CoarseMap"); + + // Make sure the Level knows I need these sub-Factories + const size_t numSubFactories = NumFactories(); + for (size_t i = 0; i < numSubFactories; i++) { + const RCP &myFactory = subFactories_[i]; + coarseLevel.DeclareInput("Coordinates", myFactory.getRawPtr(), this); } - template - void BlockedCoordinatesTransferFactory::DeclareInput(Level& /* fineLevel */, Level& coarseLevel) const { - Input(coarseLevel, "CoarseMap"); - - // Make sure the Level knows I need these sub-Factories - const size_t numSubFactories = NumFactories(); - for(size_t i=0; i& myFactory = subFactories_[i]; - coarseLevel.DeclareInput("Coordinates", myFactory.getRawPtr(), this); - } - - // call DeclareInput of all user-given transfer factories - for (std::vector >::const_iterator it = subFactories_.begin(); it != subFactories_.end(); ++it) - (*it)->CallDeclareInput(coarseLevel); + // call DeclareInput of all user-given transfer factories + for (std::vector>::const_iterator it = + subFactories_.begin(); + it != subFactories_.end(); ++it) + (*it)->CallDeclareInput(coarseLevel); +} + +template +void BlockedCoordinatesTransferFactory::Build(Level & /* fineLevel */, + Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); + + typedef Xpetra::MultiVector< + typename Teuchos::ScalarTraits::coordinateType, LO, GO, NO> + dMV; + typedef Xpetra::BlockedMultiVector< + typename Teuchos::ScalarTraits::coordinateType, LO, GO, NO> + dBV; + + GetOStream(Runtime0) << "Transferring (blocked) coordinates" << std::endl; + + const size_t numSubFactories = NumFactories(); + std::vector> subBlockMaps(numSubFactories); + std::vector> subBlockCoords(numSubFactories); + + if (coarseLevel.IsAvailable("Coordinates", this)) { + GetOStream(Runtime0) << "Reusing coordinates" << std::endl; + return; } - template - void BlockedCoordinatesTransferFactory::Build(Level & /* fineLevel */, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); - - typedef Xpetra::MultiVector::coordinateType,LO,GO,NO> dMV; - typedef Xpetra::BlockedMultiVector::coordinateType,LO,GO,NO> dBV; - - GetOStream(Runtime0) << "Transferring (blocked) coordinates" << std::endl; - - const size_t numSubFactories = NumFactories(); - std::vector > subBlockMaps(numSubFactories); - std::vector > subBlockCoords(numSubFactories); - - if (coarseLevel.IsAvailable("Coordinates", this)) { - GetOStream(Runtime0) << "Reusing coordinates" << std::endl; - return; - } + // Get components + for (size_t i = 0; i < numSubFactories; i++) { + GetOStream(Runtime1) << "Generating Coordinates for block " << i << "/" + << numSubFactories << std::endl; + const RCP &myFactory = subFactories_[i]; + myFactory->CallBuild(coarseLevel); + subBlockCoords[i] = + coarseLevel.Get>("Coordinates", myFactory.get()); + subBlockMaps[i] = subBlockCoords[i]->getMap(); + } - // Get components - for(size_t i=0; i& myFactory = subFactories_[i]; - myFactory->CallBuild(coarseLevel); - subBlockCoords[i] = coarseLevel.Get >("Coordinates", myFactory.get()); - subBlockMaps[i] = subBlockCoords[i]->getMap(); + // Blocked Map + RCP coarseCoordMapBlocked; + + { + // coarseMap is being used to set up the domain map of tentative P, and + // therefore, the row map of Ac Therefore, if we amalgamate coarseMap, + // logical nodes in the coordinates vector would correspond to logical + // blocks in the matrix + RCP coarseMap = + Get>(coarseLevel, "CoarseMap"); + bool thyraMode = coarseMap->getThyraMode(); + + ArrayView elementAList = + coarseMap->getFullMap()->getLocalElementList(); + + LO blkSize = 1; + if (rcp_dynamic_cast(coarseMap->getMap(0, thyraMode)) != + Teuchos::null) + blkSize = + rcp_dynamic_cast(coarseMap->getMap(0, thyraMode)) + ->getFixedBlockSize(); + + for (size_t i = 1; i < numSubFactories; i++) { + LO otherBlkSize = 1; + if (rcp_dynamic_cast(coarseMap->getMap(i, thyraMode)) != + Teuchos::null) + otherBlkSize = + rcp_dynamic_cast(coarseMap->getMap(i, thyraMode)) + ->getFixedBlockSize(); + TEUCHOS_TEST_FOR_EXCEPTION( + otherBlkSize != blkSize, Exceptions::RuntimeError, + "BlockedCoordinatesTransferFactory: Subblocks have different Block " + "sizes. This is not yet supported."); } - // Blocked Map - RCP coarseCoordMapBlocked; - - { - // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac - // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to - // logical blocks in the matrix - RCP coarseMap = Get< RCP >(coarseLevel, "CoarseMap"); - bool thyraMode = coarseMap->getThyraMode(); - - ArrayView elementAList = coarseMap->getFullMap()->getLocalElementList(); - - LO blkSize = 1; - if (rcp_dynamic_cast(coarseMap->getMap(0, thyraMode)) != Teuchos::null) - blkSize = rcp_dynamic_cast(coarseMap->getMap(0, thyraMode))->getFixedBlockSize(); - - for(size_t i=1; i(coarseMap->getMap(i, thyraMode)) != Teuchos::null) - otherBlkSize = rcp_dynamic_cast(coarseMap->getMap(i, thyraMode))->getFixedBlockSize(); - TEUCHOS_TEST_FOR_EXCEPTION(otherBlkSize != blkSize, Exceptions::RuntimeError, "BlockedCoordinatesTransferFactory: Subblocks have different Block sizes. This is not yet supported."); - } + GO indexBase = coarseMap->getFullMap()->getIndexBase(); + size_t numElements = elementAList.size() / blkSize; + Array elementList(numElements); - GO indexBase = coarseMap->getFullMap()->getIndexBase(); - size_t numElements = elementAList.size() / blkSize; - Array elementList(numElements); + // Amalgamate the map + for (LO i = 0; i < Teuchos::as(numElements); i++) + elementList[i] = + (elementAList[i * blkSize] - indexBase) / blkSize + indexBase; - // Amalgamate the map - for (LO i = 0; i < Teuchos::as(numElements); i++) - elementList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase; + RCP coarseCoordMap = MapFactory::Build( + coarseMap->getFullMap()->lib(), + Teuchos::OrdinalTraits::invalid(), elementList, + indexBase, coarseMap->getFullMap()->getComm()); - RCP coarseCoordMap = MapFactory::Build(coarseMap->getFullMap()->lib(), - Teuchos::OrdinalTraits::invalid(), elementList, indexBase, coarseMap->getFullMap()->getComm()); - - coarseCoordMapBlocked = rcp(new BlockedMap(coarseCoordMap, subBlockMaps, thyraMode)); - } - - // Build blocked coordinates vector - RCP bcoarseCoords = rcp(new dBV(coarseCoordMapBlocked,subBlockCoords)); - - // Turn the blocked coordinates vector into an unblocked one - RCP coarseCoords = bcoarseCoords->Merge(); - Set >(coarseLevel, "Coordinates", coarseCoords); + coarseCoordMapBlocked = + rcp(new BlockedMap(coarseCoordMap, subBlockMaps, thyraMode)); } - template - void BlockedCoordinatesTransferFactory::AddFactory(const RCP& factory) { - subFactories_.push_back(factory); - } + // Build blocked coordinates vector + RCP bcoarseCoords = rcp(new dBV(coarseCoordMapBlocked, subBlockCoords)); + // Turn the blocked coordinates vector into an unblocked one + RCP coarseCoords = bcoarseCoords->Merge(); + Set>(coarseLevel, "Coordinates", coarseCoords); +} +template +void BlockedCoordinatesTransferFactory< + Scalar, LocalOrdinal, GlobalOrdinal, + Node>::AddFactory(const RCP &factory) { + subFactories_.push_back(factory); +} } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp index 63449a12d82b..b4f1d5ae3b6a 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_decl.hpp @@ -46,109 +46,114 @@ #ifndef MUELU_BLOCKEDRAPFACTORY_DECL_HPP #define MUELU_BLOCKEDRAPFACTORY_DECL_HPP -#include -#include #include +#include #include +#include #include "MueLu_ConfigDefs.hpp" #include "MueLu_RAPFactory_fwd.hpp" -#include "MueLu_Level_fwd.hpp" #include "MueLu_FactoryBase_fwd.hpp" +#include "MueLu_Level_fwd.hpp" #include "MueLu_PerfUtils_fwd.hpp" #include "MueLu_TwoLevelFactoryBase.hpp" namespace MueLu { - /*! - @class BlockedRAPFactory - @brief Factory for building coarse matrices. - */ - template - class BlockedRAPFactory : public TwoLevelFactoryBase { +/*! + @class BlockedRAPFactory + @brief Factory for building coarse matrices. +*/ +template +class BlockedRAPFactory : public TwoLevelFactoryBase { #undef MUELU_BLOCKEDRAPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - BlockedRAPFactory(); + BlockedRAPFactory(); - virtual ~BlockedRAPFactory() = default; - //@} + virtual ~BlockedRAPFactory() = default; + //@} - //! @name Input - //@{ + //! @name Input + //@{ - RCP GetValidParameterList() const override; + RCP GetValidParameterList() const override; - void DeclareInput(Level &fineLevel, Level &coarseLevel) const override; + void DeclareInput(Level &fineLevel, Level &coarseLevel) const override; - //@} + //@} - //! @name Build methods. - //@{ - void Build(Level &fineLevel, Level &coarseLevel) const override; - //@} + //! @name Build methods. + //@{ + void Build(Level &fineLevel, Level &coarseLevel) const override; + //@} - //! @name Handling of user-defined transfer factories - //@{ + //! @name Handling of user-defined transfer factories + //@{ - //! Indicate that zero entries on the diagonal of Ac shall be repaired (i.e. if A(i,i) == 0.0 set A(i,i) = 1.0) - void SetRepairZeroDiagonal(bool const &repair) { - repairZeroDiagonals_ = repair; - if(repair) checkAc_ = true; // make sure that plausibility check is performed. Otherwise SetRepairZeroDiagonal(true) has no effect. - } + //! Indicate that zero entries on the diagonal of Ac shall be repaired (i.e. + //! if A(i,i) == 0.0 set A(i,i) = 1.0) + void SetRepairZeroDiagonal(bool const &repair) { + repairZeroDiagonals_ = repair; + if (repair) + checkAc_ = true; // make sure that plausibility check is performed. + // Otherwise SetRepairZeroDiagonal(true) has no effect. + } - //! Indicate that a simple plausibility check shall be done for Ac after building RAP - void SetPlausibilityCheck(bool const &check) { - checkAc_ = check; - } + //! Indicate that a simple plausibility check shall be done for Ac after + //! building RAP + void SetPlausibilityCheck(bool const &check) { checkAc_ = check; } - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories in + RepartitionAcFactory. - Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to - the next coarser level. - */ - void AddTransferFactory(const RCP& factory); - - // TODO add a function to remove a specific transfer factory? + Transfer factories are derived from TwoLevelFactoryBase and project some data + from the fine level to the next coarser level. + */ + void AddTransferFactory(const RCP &factory); - //! Returns number of transfer factories. - size_t NumTransferFactories() const { return transferFacts_.size(); } + // TODO add a function to remove a specific transfer factory? - //@} + //! Returns number of transfer factories. + size_t NumTransferFactories() const { return transferFacts_.size(); } - private: + //@} - //! @name internal plausibility check methods - //! checks main diagonal entries of (0,0) block. Does not affect entries in (1,1) block! - static void CheckMainDiagonal(RCP & bAc, bool repairZeroDiagonals = false); +private: + //! @name internal plausibility check methods + //! checks main diagonal entries of (0,0) block. Does not affect entries in + //! (1,1) block! + static void CheckMainDiagonal(RCP &bAc, + bool repairZeroDiagonals = false); - //! If true, perform a basic plausibility check on Ac (default = false) - //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == true - bool checkAc_; + //! If true, perform a basic plausibility check on Ac (default = false) + //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == + //! true + bool checkAc_; - //! If true, the CheckMainDiagonal routine automatically repairs zero entries on main diagonal (default = false) - //! i.e. if A(i,i) == 0.0 set A(i,i) = 1.0 - //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == true - bool repairZeroDiagonals_; + //! If true, the CheckMainDiagonal routine automatically repairs zero entries + //! on main diagonal (default = false) i.e. if A(i,i) == 0.0 set A(i,i) = 1.0 + //! note, that the repairZeroDiagonals_ flag only is valid for checkAc_ == + //! true + bool repairZeroDiagonals_; - //@{ + //@{ - //! list of user-defined transfer Factories - std::vector > transferFacts_; + //! list of user-defined transfer Factories + std::vector> transferFacts_; - //@} + //@} - }; //class BlockedRAPFactory +}; // class BlockedRAPFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_BLOCKEDRAPFACTORY_SHORT #endif // MUELU_BLOCKEDRAPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp index 7817372296cb..f52d1b75a587 100644 --- a/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_BlockedRAPFactory_def.hpp @@ -47,8 +47,8 @@ #define MUELU_BLOCKEDRAPFACTORY_DEF_HPP #include -#include #include +#include #include #include "MueLu_BlockedRAPFactory_decl.hpp" @@ -60,171 +60,202 @@ namespace MueLu { - template - BlockedRAPFactory::BlockedRAPFactory() - : checkAc_(false), repairZeroDiagonals_(false) - { } - - template - RCP BlockedRAPFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("transpose: use implicit"); -#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", null, "Generating factory of the matrix A used during the prolongator smoothing process"); - validParamList->set< RCP >("P", null, "Prolongator factory"); - validParamList->set< RCP >("R", null, "Restrictor factory"); +template +BlockedRAPFactory::BlockedRAPFactory() + : checkAc_(false), repairZeroDiagonals_(false) {} + +template +RCP +BlockedRAPFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("transpose: use implicit"); +#undef SET_VALID_ENTRY + validParamList->set>( + "A", null, + "Generating factory of the matrix A used during the prolongator " + "smoothing process"); + validParamList->set>("P", null, "Prolongator factory"); + validParamList->set>("R", null, "Restrictor factory"); + + return validParamList; +} + +template +void BlockedRAPFactory::DeclareInput( + Level &fineLevel, Level &coarseLevel) const { + const Teuchos::ParameterList &pL = GetParameterList(); + if (pL.get("transpose: use implicit") == false) + Input(coarseLevel, "R"); + + Input(fineLevel, "A"); + Input(coarseLevel, "P"); + + // call DeclareInput of all user-given transfer factories + for (std::vector>::const_iterator it = + transferFacts_.begin(); + it != transferFacts_.end(); ++it) + (*it)->CallDeclareInput(coarseLevel); +} + +template +void BlockedRAPFactory::Build( + Level &fineLevel, + Level &coarseLevel) const { // FIXME make fineLevel const!! + FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel); + + const ParameterList &pL = GetParameterList(); + + RCP A = Get>(fineLevel, "A"); + RCP P = Get>(coarseLevel, "P"); + + RCP bA = rcp_dynamic_cast(A); + RCP bP = rcp_dynamic_cast(P); + TEUCHOS_TEST_FOR_EXCEPTION( + bA.is_null() || bP.is_null(), Exceptions::BadCast, + "Matrices A and P must be of type BlockedCrsMatrix."); + + RCP bAP; + RCP bAc; + { + SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); + + // Triple matrix product for BlockedCrsMatrixClass + TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast, + "Block matrix dimensions do not match: " + "A is " + << bA->Rows() << "x" << bA->Cols() << "P is " + << bP->Rows() << "x" << bP->Cols()); - return validParamList; + bAP = MatrixMatrix::TwoMatrixMultiplyBlock( + *bA, false, *bP, false, GetOStream(Statistics2), true, true); } - template - void BlockedRAPFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - const Teuchos::ParameterList& pL = GetParameterList(); - if (pL.get("transpose: use implicit") == false) - Input(coarseLevel, "R"); - - Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - // call DeclareInput of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) - (*it)->CallDeclareInput(coarseLevel); + // If we do not modify matrix later, allow optimization of storage. + // This is necessary for new faster Epetra MM kernels. + bool doOptimizeStorage = !checkAc_; + + const bool doTranspose = true; + const bool doFillComplete = true; + if (pL.get("transpose: use implicit") == true) { + SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); + bAc = MatrixMatrix::TwoMatrixMultiplyBlock( + *bP, doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage); + + } else { + RCP R = Get>(coarseLevel, "R"); + RCP bR = rcp_dynamic_cast(R); + TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, + "Matrix R must be of type BlockedCrsMatrix."); + + TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast, + "Block matrix dimensions do not match: " + "R is " + << bR->Rows() << "x" << bR->Cols() << "A is " + << bA->Rows() << "x" << bA->Cols()); + + SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); + bAc = MatrixMatrix::TwoMatrixMultiplyBlock( + *bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage); } - template - void BlockedRAPFactory::Build(Level &fineLevel, Level &coarseLevel) const { //FIXME make fineLevel const!! - FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel); + if (checkAc_) + CheckMainDiagonal(bAc); - const ParameterList& pL = GetParameterList(); + GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)"); - RCP A = Get< RCP >(fineLevel, "A"); - RCP P = Get< RCP >(coarseLevel, "P"); + Set>(coarseLevel, "A", bAc); + if (transferFacts_.begin() != transferFacts_.end()) { + SubFactoryMonitor m1(*this, "Projections", coarseLevel); - RCP bA = rcp_dynamic_cast(A); - RCP bP = rcp_dynamic_cast(P); - TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null() || bP.is_null(), Exceptions::BadCast, "Matrices A and P must be of type BlockedCrsMatrix."); + // call Build of all user-given transfer factories + for (std::vector>::const_iterator it = + transferFacts_.begin(); + it != transferFacts_.end(); ++it) { + RCP fac = *it; + GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " + << fac->description() << std::endl; - RCP bAP; - RCP bAc; - { - SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); + fac->CallBuild(coarseLevel); - // Triple matrix product for BlockedCrsMatrixClass - TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast, - "Block matrix dimensions do not match: " - "A is " << bA->Rows() << "x" << bA->Cols() << - "P is " << bP->Rows() << "x" << bP->Cols()); - - bAP = MatrixMatrix::TwoMatrixMultiplyBlock(*bA, false, *bP, false, GetOStream(Statistics2), true, true); + // AP (11/11/13): I am not sure exactly why we need to call Release, but + // we do need it to get rid of dangling data for + // CoordinatesTransferFactory + coarseLevel.Release(*fac); } - - - // If we do not modify matrix later, allow optimization of storage. - // This is necessary for new faster Epetra MM kernels. - bool doOptimizeStorage = !checkAc_; - - const bool doTranspose = true; - const bool doFillComplete = true; - if (pL.get("transpose: use implicit") == true) { - SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); - bAc = MatrixMatrix::TwoMatrixMultiplyBlock(*bP, doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); - - } else { - RCP R = Get< RCP >(coarseLevel, "R"); - RCP bR = rcp_dynamic_cast(R); - TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, "Matrix R must be of type BlockedCrsMatrix."); - - TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast, - "Block matrix dimensions do not match: " - "R is " << bR->Rows() << "x" << bR->Cols() << - "A is " << bA->Rows() << "x" << bA->Cols()); - - SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); - bAc = MatrixMatrix::TwoMatrixMultiplyBlock(*bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); + } +} + +template +void BlockedRAPFactory::CheckMainDiagonal(RCP &bAc, + bool repairZeroDiagonals) { + RCP c00 = bAc->getMatrix(0, 0); + RCP Aout = + MatrixFactory::Build(c00->getRowMap(), c00->getGlobalMaxNumRowEntries()); + + RCP diagVec = VectorFactory::Build(c00->getRowMap()); + c00->getLocalDiagCopy(*diagVec); + ArrayRCP diagVal = diagVec->getDataNonConst(0); + + // loop over local rows + for (size_t row = 0; row < c00->getLocalNumRows(); row++) { + // get global row id + GO grid = c00->getRowMap()->getGlobalElement(row); // global row id + + ArrayView indices; + ArrayView vals; + c00->getLocalRowView(row, indices, vals); + + // just copy all values in output + ArrayRCP indout(indices.size(), Teuchos::OrdinalTraits::zero()); + ArrayRCP valout(indices.size(), Teuchos::ScalarTraits::zero()); + + // just copy values + for (size_t i = 0; i < as(indices.size()); i++) { + GO gcid = + c00->getColMap()->getGlobalElement(indices[i]); // LID -> GID (column) + indout[i] = gcid; + valout[i] = vals[i]; } - - if (checkAc_) - CheckMainDiagonal(bAc); - - GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)"); - - Set >(coarseLevel, "A", bAc); - - if (transferFacts_.begin() != transferFacts_.end()) { - SubFactoryMonitor m1(*this, "Projections", coarseLevel); - - // call Build of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { - RCP fac = *it; - - GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl; - - fac->CallBuild(coarseLevel); - - // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid - // of dangling data for CoordinatesTransferFactory - coarseLevel.Release(*fac); - } + Aout->insertGlobalValues(grid, indout.view(0, indout.size()), + valout.view(0, valout.size())); + if (diagVal[row] == Teuchos::ScalarTraits::zero() && + repairZeroDiagonals) { + // always overwrite diagonal entry + Aout->insertGlobalValues(grid, Teuchos::tuple(grid), + Teuchos::tuple(1.0)); } } + Aout->fillComplete(c00->getDomainMap(), c00->getRangeMap()); - template - void BlockedRAPFactory::CheckMainDiagonal(RCP & bAc, bool repairZeroDiagonals) { - RCP c00 = bAc->getMatrix(0, 0); - RCP Aout = MatrixFactory::Build(c00->getRowMap(), c00->getGlobalMaxNumRowEntries()); - - RCP diagVec = VectorFactory::Build(c00->getRowMap()); - c00->getLocalDiagCopy(*diagVec); - ArrayRCP diagVal = diagVec->getDataNonConst(0); - - // loop over local rows - for (size_t row = 0; row < c00->getLocalNumRows(); row++) { - // get global row id - GO grid = c00->getRowMap()->getGlobalElement(row); // global row id - - ArrayView indices; - ArrayView vals; - c00->getLocalRowView(row, indices, vals); - - // just copy all values in output - ArrayRCP indout(indices.size(), Teuchos::OrdinalTraits::zero()); - ArrayRCP valout(indices.size(), Teuchos::ScalarTraits::zero()); - - // just copy values - for (size_t i = 0; i < as(indices.size()); i++) { - GO gcid = c00->getColMap()->getGlobalElement(indices[i]); // LID -> GID (column) - indout [i] = gcid; - valout [i] = vals[i]; - } - - Aout->insertGlobalValues(grid, indout.view(0, indout.size()), valout.view(0, valout.size())); - if (diagVal[row] == Teuchos::ScalarTraits::zero() && repairZeroDiagonals) { - // always overwrite diagonal entry - Aout->insertGlobalValues(grid, Teuchos::tuple(grid), Teuchos::tuple(1.0)); - } - } - - Aout->fillComplete(c00->getDomainMap(), c00->getRangeMap()); + bAc->setMatrix(0, 0, Aout); +} - bAc->setMatrix(0, 0, Aout); - } - - template - void BlockedRAPFactory::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, - "Transfer factory is not derived from TwoLevelFactoryBase. This is very strange. " +template +void BlockedRAPFactory:: + AddTransferFactory(const RCP &factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION( + rcp_dynamic_cast(factory) == Teuchos::null, + Exceptions::BadCast, + "Transfer factory is not derived from TwoLevelFactoryBase. This is very " + "strange. " "(Note: you can remove this exception if there's a good reason for)"); - transferFacts_.push_back(factory); - } + transferFacts_.push_back(factory); +} -} //namespace MueLu +} // namespace MueLu #define MUELU_BLOCKEDRAPFACTORY_SHORT #endif // MUELU_BLOCKEDRAPFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp index 2ee51c482c54..3b0f4c07ceb2 100644 --- a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_decl.hpp @@ -48,8 +48,8 @@ #include "MueLu_ConfigDefs.hpp" #include "MueLu_TwoLevelFactoryBase.hpp" -#include "Xpetra_MultiVector_fwd.hpp" #include "Xpetra_MultiVectorFactory_fwd.hpp" +#include "Xpetra_MultiVector_fwd.hpp" #include "MueLu_Aggregates_fwd.hpp" @@ -61,9 +61,10 @@ namespace MueLu { @class CoordinatesTransferFactory class. @brief Class for transferring coordinates from a finer level to a coarser one - This is separate from MultiVectorTransferFactory which potentially can be used for scalar problems. - For non-scalar problems, however, we cannot use restriction operator as that essentially is matrix Q - from tentative prolongator initialization. + This is separate from MultiVectorTransferFactory which potentially can be + used for scalar problems. For non-scalar problems, however, we cannot use + restriction operator as that essentially is matrix Q from tentative + prolongator initialization. ## Input/output of CoordinatesTransferFactory ## @@ -72,88 +73,94 @@ namespace MueLu { ----------|------|---------|:----------:|:---------:|:---------:|------------ | Coordinates| Factory | null | | * | (*) | Factory providing coordinates | Aggregates | Factory | null | | * | (*) | Factory providing aggregates - | CoarseMap | Factory | null | | * | (*) | Generating factory of the coarse map - | write start| int | -1 | | * | | first level at which coordinates should be written to file - | write end | int | -1 | | * | | last level at which coordinates should be written to file - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see CoordinatesTransferFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see CoordinatesTransferFactory::DeclareInput). - - The CoordinatesTransferFact first checks whether there is already valid coarse coordinates information - available on the coarse level. If that is the case, we can skip the coordinate transfer and just reuse - the available information. - Otherwise we try to build coarse grid coordinates by using the information about the - aggregates, the fine level coordinates and the coarse map information. + | CoarseMap | Factory | null | | * | (*) | Generating factory of the coarse + map | write start| int | -1 | | * | | first level at which + coordinates should be written to file | write end | int | -1 | | * | + | last level at which coordinates should be written to file + + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + CoordinatesTransferFactory::GetValidParameters).
The * in the @c requested + column states that the data is requested as input with all dependencies (see + CoordinatesTransferFactory::DeclareInput). + + The CoordinatesTransferFact first checks whether there is already valid coarse + coordinates information available on the coarse level. If that is the case, we + can skip the coordinate transfer and just reuse the available information. + Otherwise we try to build coarse grid coordinates by using the information + about the aggregates, the fine level coordinates and the coarse map + information. ### Variables provided by CoordinatesTransferFactory ### - After CoordinatesTransferFactory::Build the following data is available (if requested) + After CoordinatesTransferFactory::Build the following data is available (if + requested) Parameter | generated by | description ----------|--------------|------------ | Coordinates | CoordinatesTransferFactory | coarse level coordinates */ - template - class CoordinatesTransferFactory : public TwoLevelFactoryBase { - public: - typedef Scalar scalar_type; - typedef LocalOrdinal local_ordinal_type; - typedef GlobalOrdinal global_ordinal_type; - typedef typename Node::device_type DeviceType; - typedef typename DeviceType::execution_space execution_space; - - private: +template +class CoordinatesTransferFactory : public TwoLevelFactoryBase { +public: + typedef Scalar scalar_type; + typedef LocalOrdinal local_ordinal_type; + typedef GlobalOrdinal global_ordinal_type; + typedef typename Node::device_type DeviceType; + typedef typename DeviceType::execution_space execution_space; + +private: #undef MUELU_COORDINATESTRANSFERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - /*! @brief Constructor. +public: + //! @name Constructors/Destructors. + //@{ - @param vectorName The name of the quantity to be restricted. - @param restrictionName The name of the restriction Matrix. + /*! @brief Constructor. - The operator associated with projectionName will be applied to the MultiVector associated with - vectorName. - */ - CoordinatesTransferFactory() { } + @param vectorName The name of the quantity to be restricted. + @param restrictionName The name of the restriction Matrix. - //! Destructor. - virtual ~CoordinatesTransferFactory() { } + The operator associated with projectionName will be applied to the + MultiVector associated with vectorName. + */ + CoordinatesTransferFactory() {} - RCP GetValidParameterList() const; + //! Destructor. + virtual ~CoordinatesTransferFactory() {} - //@} + RCP GetValidParameterList() const; - //! @name Input - //@{ + //@} - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + //! @name Input + //@{ - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + /*! @brief Specifies the data that this class needs, and the factories that + generate that data. - //@} + If the Build method of this class requires some data, but the generating + factory is not specified in DeclareInput, then this class will fall back to + the settings in FactoryManager. + */ + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //! @name Build methods. - //@{ + //@} - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! @name Build methods. + //@{ - //@} + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - private: + //@} - }; // class CoordinatesTransferFactory +private: +}; // class CoordinatesTransferFactory } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp index 574e7ace4950..fd5423050091 100644 --- a/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_CoordinatesTransferFactory_def.hpp @@ -46,10 +46,10 @@ #ifndef MUELU_COORDINATESTRANSFER_FACTORY_DEF_HPP #define MUELU_COORDINATESTRANSFER_FACTORY_DEF_HPP +#include "Xpetra_IO.hpp" #include "Xpetra_ImportFactory.hpp" -#include "Xpetra_MultiVectorFactory.hpp" #include "Xpetra_MapFactory.hpp" -#include "Xpetra_IO.hpp" +#include "Xpetra_MultiVectorFactory.hpp" #include "MueLu_Aggregates.hpp" #include "MueLu_CoordinatesTransferFactory_decl.hpp" @@ -60,214 +60,284 @@ namespace MueLu { - template - RCP CoordinatesTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set >("Coordinates", Teuchos::null, "Factory for coordinates generation"); - validParamList->set >("Aggregates", Teuchos::null, "Factory for coordinates generation"); - validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - validParamList->set ("structured aggregation", false, "Flag specifying that the geometric data is transferred for StructuredAggregationFactory"); - validParamList->set ("aggregation coupled", false, "Flag specifying if the aggregation algorithm was used in coupled mode."); - validParamList->set ("Geometric", false, "Flag specifying that the coordinates are transferred for GeneralGeometricPFactory"); - validParamList->set >("coarseCoordinates", Teuchos::null, "Factory for coarse coordinates generation"); - validParamList->set >("gCoarseNodesPerDim", Teuchos::null, "Factory providing the global number of nodes per spatial dimensions of the mesh"); - validParamList->set >("lCoarseNodesPerDim", Teuchos::null, "Factory providing the local number of nodes per spatial dimensions of the mesh"); - validParamList->set >("numDimensions" , Teuchos::null, "Factory providing the number of spatial dimensions of the mesh"); - validParamList->set ("write start", -1, "first level at which coordinates should be written to file"); - validParamList->set ("write end", -1, "last level at which coordinates should be written to file"); - validParamList->set ("hybrid aggregation", false, "Flag specifying that hybrid aggregation data is transfered for HybridAggregationFactory"); - validParamList->set >("aggregationRegionTypeCoarse", Teuchos::null, "Factory indicating what aggregation type is to be used on the coarse level of the region"); - validParamList->set ("interface aggregation", false, "Flag specifying that interface aggregation data is transfered for HybridAggregationFactory"); - validParamList->set >("coarseInterfacesDimensions", Teuchos::null, "Factory providing coarseInterfacesDimensions"); - validParamList->set >("nodeOnCoarseInterface", Teuchos::null, "Factory providing nodeOnCoarseInterface"); - - - return validParamList; - } - - template - void CoordinatesTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - static bool isAvailableCoords = false; - - const ParameterList& pL = GetParameterList(); - if(pL.get("structured aggregation") == true) { - if(pL.get("aggregation coupled") == true) { - Input(fineLevel, "gCoarseNodesPerDim"); - } - Input(fineLevel, "lCoarseNodesPerDim"); - Input(fineLevel, "numDimensions"); - } else if(pL.get("Geometric") == true) { - Input(coarseLevel, "coarseCoordinates"); - Input(coarseLevel, "gCoarseNodesPerDim"); - Input(coarseLevel, "lCoarseNodesPerDim"); - } else if(pL.get("hybrid aggregation") == true) { - Input(fineLevel, "aggregationRegionTypeCoarse"); - Input(fineLevel, "lCoarseNodesPerDim"); - Input(fineLevel, "numDimensions"); - if(pL.get("interface aggregation") == true) { - Input(fineLevel, "coarseInterfacesDimensions"); - Input(fineLevel, "nodeOnCoarseInterface"); - } - } else { - if (coarseLevel.GetRequestMode() == Level::REQUEST) - isAvailableCoords = coarseLevel.IsAvailable("Coordinates", this); - - if (isAvailableCoords == false) { - Input(fineLevel, "Coordinates"); - Input(fineLevel, "Aggregates"); - Input(fineLevel, "CoarseMap"); - } +template +RCP +CoordinatesTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set>( + "Coordinates", Teuchos::null, "Factory for coordinates generation"); + validParamList->set>( + "Aggregates", Teuchos::null, "Factory for coordinates generation"); + validParamList->set>( + "CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + validParamList->set("structured aggregation", false, + "Flag specifying that the geometric data is " + "transferred for StructuredAggregationFactory"); + validParamList->set( + "aggregation coupled", false, + "Flag specifying if the aggregation algorithm was used in coupled mode."); + validParamList->set("Geometric", false, + "Flag specifying that the coordinates are " + "transferred for GeneralGeometricPFactory"); + validParamList->set>( + "coarseCoordinates", Teuchos::null, + "Factory for coarse coordinates generation"); + validParamList->set>( + "gCoarseNodesPerDim", Teuchos::null, + "Factory providing the global number of nodes per spatial dimensions of " + "the mesh"); + validParamList->set>( + "lCoarseNodesPerDim", Teuchos::null, + "Factory providing the local number of nodes per spatial dimensions of " + "the mesh"); + validParamList->set>( + "numDimensions", Teuchos::null, + "Factory providing the number of spatial dimensions of the mesh"); + validParamList->set( + "write start", -1, + "first level at which coordinates should be written to file"); + validParamList->set( + "write end", -1, + "last level at which coordinates should be written to file"); + validParamList->set("hybrid aggregation", false, + "Flag specifying that hybrid aggregation data is " + "transfered for HybridAggregationFactory"); + validParamList->set>( + "aggregationRegionTypeCoarse", Teuchos::null, + "Factory indicating what aggregation type is to be used on the coarse " + "level of the region"); + validParamList->set("interface aggregation", false, + "Flag specifying that interface aggregation data " + "is transfered for HybridAggregationFactory"); + validParamList->set>( + "coarseInterfacesDimensions", Teuchos::null, + "Factory providing coarseInterfacesDimensions"); + validParamList->set>( + "nodeOnCoarseInterface", Teuchos::null, + "Factory providing nodeOnCoarseInterface"); + + return validParamList; +} + +template +void CoordinatesTransferFactory::DeclareInput(Level &fineLevel, + Level &coarseLevel) const { + static bool isAvailableCoords = false; + + const ParameterList &pL = GetParameterList(); + if (pL.get("structured aggregation") == true) { + if (pL.get("aggregation coupled") == true) { + Input(fineLevel, "gCoarseNodesPerDim"); + } + Input(fineLevel, "lCoarseNodesPerDim"); + Input(fineLevel, "numDimensions"); + } else if (pL.get("Geometric") == true) { + Input(coarseLevel, "coarseCoordinates"); + Input(coarseLevel, "gCoarseNodesPerDim"); + Input(coarseLevel, "lCoarseNodesPerDim"); + } else if (pL.get("hybrid aggregation") == true) { + Input(fineLevel, "aggregationRegionTypeCoarse"); + Input(fineLevel, "lCoarseNodesPerDim"); + Input(fineLevel, "numDimensions"); + if (pL.get("interface aggregation") == true) { + Input(fineLevel, "coarseInterfacesDimensions"); + Input(fineLevel, "nodeOnCoarseInterface"); + } + } else { + if (coarseLevel.GetRequestMode() == Level::REQUEST) + isAvailableCoords = coarseLevel.IsAvailable("Coordinates", this); + + if (isAvailableCoords == false) { + Input(fineLevel, "Coordinates"); + Input(fineLevel, "Aggregates"); + Input(fineLevel, "CoarseMap"); } } +} + +template +void CoordinatesTransferFactory::Build(Level &fineLevel, + Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); + + using xdMV = + Xpetra::MultiVector::magnitudeType, + LO, GO, NO>; + + GetOStream(Runtime0) << "Transferring coordinates" << std::endl; + + int numDimensions; + RCP coarseCoords; + RCP fineCoords; + Array gCoarseNodesPerDir; + Array lCoarseNodesPerDir; + + const ParameterList &pL = GetParameterList(); + + if (pL.get("hybrid aggregation") == true) { + std::string regionType = + Get(fineLevel, "aggregationRegionTypeCoarse"); + numDimensions = Get(fineLevel, "numDimensions"); + lCoarseNodesPerDir = Get>(fineLevel, "lCoarseNodesPerDim"); + Set(coarseLevel, "aggregationRegionType", regionType); + Set(coarseLevel, "numDimensions", numDimensions); + Set>(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + + if ((pL.get("interface aggregation") == true) && + (regionType == "uncoupled")) { + Array coarseInterfacesDimensions = + Get>(fineLevel, "coarseInterfacesDimensions"); + Array nodeOnCoarseInterface = + Get>(fineLevel, "nodeOnCoarseInterface"); + Set>(coarseLevel, "interfacesDimensions", + coarseInterfacesDimensions); + Set>(coarseLevel, "nodeOnInterface", nodeOnCoarseInterface); + } - template - void CoordinatesTransferFactory::Build(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); - - using xdMV = Xpetra::MultiVector::magnitudeType,LO,GO,NO>; - - GetOStream(Runtime0) << "Transferring coordinates" << std::endl; - - int numDimensions; - RCP coarseCoords; - RCP fineCoords; - Array gCoarseNodesPerDir; - Array lCoarseNodesPerDir; - - const ParameterList& pL = GetParameterList(); - - if(pL.get("hybrid aggregation") == true) { - std::string regionType = Get(fineLevel,"aggregationRegionTypeCoarse"); - numDimensions = Get(fineLevel, "numDimensions"); - lCoarseNodesPerDir = Get >(fineLevel, "lCoarseNodesPerDim"); - Set(coarseLevel, "aggregationRegionType", regionType); - Set (coarseLevel, "numDimensions", numDimensions); - Set > (coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); - - if((pL.get("interface aggregation") == true) && (regionType == "uncoupled")) { - Array coarseInterfacesDimensions = Get >(fineLevel, "coarseInterfacesDimensions"); - Array nodeOnCoarseInterface = Get >(fineLevel, "nodeOnCoarseInterface"); - Set >(coarseLevel, "interfacesDimensions", coarseInterfacesDimensions); - Set >(coarseLevel, "nodeOnInterface", nodeOnCoarseInterface); - } - - } else if(pL.get("structured aggregation") == true) { - if(pL.get("aggregation coupled") == true) { - gCoarseNodesPerDir = Get >(fineLevel, "gCoarseNodesPerDim"); - Set >(coarseLevel, "gNodesPerDim", gCoarseNodesPerDir); - } - lCoarseNodesPerDir = Get >(fineLevel, "lCoarseNodesPerDim"); - Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); - numDimensions = Get(fineLevel, "numDimensions"); - Set(coarseLevel, "numDimensions", numDimensions); - - } else if(pL.get("Geometric") == true) { - coarseCoords = Get >(coarseLevel, "coarseCoordinates"); - gCoarseNodesPerDir = Get >(coarseLevel, "gCoarseNodesPerDim"); - lCoarseNodesPerDir = Get >(coarseLevel, "lCoarseNodesPerDim"); - Set >(coarseLevel, "gNodesPerDim", gCoarseNodesPerDir); - Set >(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); - - Set >(coarseLevel, "Coordinates", coarseCoords); - - } else { - if (coarseLevel.IsAvailable("Coordinates", this)) { - GetOStream(Runtime0) << "Reusing coordinates" << std::endl; - return; - } - - fineCoords = Get< RCP >(fineLevel, "Coordinates"); - RCP coarseMap = Get< RCP > (fineLevel, "CoarseMap"); - - // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac - // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to - // logical blocks in the matrix - - ArrayView elementAList = coarseMap->getLocalElementList(); - - LO blkSize = 1; - if (rcp_dynamic_cast(coarseMap) != Teuchos::null) - blkSize = rcp_dynamic_cast(coarseMap)->getFixedBlockSize(); - - GO indexBase = coarseMap->getIndexBase(); - size_t numElements = elementAList.size() / blkSize; - Array elementList(numElements); - - // Amalgamate the map - for (LO i = 0; i < Teuchos::as(numElements); i++) - elementList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase; - - RCP uniqueMap = fineCoords->getMap(); - RCP coarseCoordMap = MapFactory ::Build(coarseMap->lib(), Teuchos::OrdinalTraits::invalid(), elementList, indexBase, coarseMap->getComm()); - coarseCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(coarseCoordMap, fineCoords->getNumVectors()); - - - RCP aggregates; - bool aggregatesCrossProcessors; - aggregates = Get >(fineLevel, "Aggregates"); - aggregatesCrossProcessors = aggregates->AggregatesCrossProcessors(); - - // Create overlapped fine coordinates to reduce global communication - RCP ghostedCoords = fineCoords; - if (aggregatesCrossProcessors) { - RCP nonUniqueMap = aggregates->GetMap(); - RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - - ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType,LO,GO,NO>::Build(nonUniqueMap, fineCoords->getNumVectors()); - ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT); - } - - // The good news is that this graph has already been constructed for the - // TentativePFactory and was cached in Aggregates. So this is a no-op. - auto aggGraph = aggregates->GetGraph(); - auto numAggs = aggGraph.numRows(); - - auto fineCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadOnly); - auto coarseCoordsView = coarseCoords->getDeviceLocalView(Xpetra::Access::OverwriteAll); - - // Fill in coarse coordinates - { - SubFactoryMonitor m2(*this, "AverageCoords", coarseLevel); - - const auto dim = ghostedCoords->getNumVectors(); - - typename AppendTrait::type fineCoordsRandomView = fineCoordsView; - for (size_t j = 0; j < dim; j++) { - Kokkos::parallel_for("MueLu:CoordinatesTransferF:Build:coord", Kokkos::RangePolicy(0, numAggs), - KOKKOS_LAMBDA(const LO i) { - // A row in this graph represents all node ids in the aggregate - // Therefore, averaging is very easy - - auto aggregate = aggGraph.rowConst(i); + } else if (pL.get("structured aggregation") == true) { + if (pL.get("aggregation coupled") == true) { + gCoarseNodesPerDir = Get>(fineLevel, "gCoarseNodesPerDim"); + Set>(coarseLevel, "gNodesPerDim", gCoarseNodesPerDir); + } + lCoarseNodesPerDir = Get>(fineLevel, "lCoarseNodesPerDim"); + Set>(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + numDimensions = Get(fineLevel, "numDimensions"); + Set(coarseLevel, "numDimensions", numDimensions); + + } else if (pL.get("Geometric") == true) { + coarseCoords = Get>(coarseLevel, "coarseCoordinates"); + gCoarseNodesPerDir = Get>(coarseLevel, "gCoarseNodesPerDim"); + lCoarseNodesPerDir = Get>(coarseLevel, "lCoarseNodesPerDim"); + Set>(coarseLevel, "gNodesPerDim", gCoarseNodesPerDir); + Set>(coarseLevel, "lNodesPerDim", lCoarseNodesPerDir); + + Set>(coarseLevel, "Coordinates", coarseCoords); + + } else { + if (coarseLevel.IsAvailable("Coordinates", this)) { + GetOStream(Runtime0) << "Reusing coordinates" << std::endl; + return; + } - typename Teuchos::ScalarTraits::magnitudeType sum = 0.0; // do not use Scalar here (Stokhos) - for (size_t colID = 0; colID < static_cast(aggregate.length); colID++) - sum += fineCoordsRandomView(aggregate(colID),j); + fineCoords = Get>(fineLevel, "Coordinates"); + RCP coarseMap = Get>(fineLevel, "CoarseMap"); + + // coarseMap is being used to set up the domain map of tentative P, and + // therefore, the row map of Ac Therefore, if we amalgamate coarseMap, + // logical nodes in the coordinates vector would correspond to logical + // blocks in the matrix + + ArrayView elementAList = coarseMap->getLocalElementList(); + + LO blkSize = 1; + if (rcp_dynamic_cast(coarseMap) != Teuchos::null) + blkSize = + rcp_dynamic_cast(coarseMap)->getFixedBlockSize(); + + GO indexBase = coarseMap->getIndexBase(); + size_t numElements = elementAList.size() / blkSize; + Array elementList(numElements); + + // Amalgamate the map + for (LO i = 0; i < Teuchos::as(numElements); i++) + elementList[i] = + (elementAList[i * blkSize] - indexBase) / blkSize + indexBase; + + RCP uniqueMap = fineCoords->getMap(); + RCP coarseCoordMap = MapFactory ::Build( + coarseMap->lib(), + Teuchos::OrdinalTraits::invalid(), elementList, + indexBase, coarseMap->getComm()); + coarseCoords = Xpetra::MultiVectorFactory< + typename Teuchos::ScalarTraits::magnitudeType, LO, GO, + NO>::Build(coarseCoordMap, fineCoords->getNumVectors()); + + RCP aggregates; + bool aggregatesCrossProcessors; + aggregates = Get>(fineLevel, "Aggregates"); + aggregatesCrossProcessors = aggregates->AggregatesCrossProcessors(); + + // Create overlapped fine coordinates to reduce global communication + RCP ghostedCoords = fineCoords; + if (aggregatesCrossProcessors) { + RCP nonUniqueMap = aggregates->GetMap(); + RCP importer = + ImportFactory::Build(uniqueMap, nonUniqueMap); + + ghostedCoords = Xpetra::MultiVectorFactory< + typename Teuchos::ScalarTraits::magnitudeType, LO, GO, + NO>::Build(nonUniqueMap, fineCoords->getNumVectors()); + ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT); + } - coarseCoordsView(i,j) = sum / aggregate.length; - }); - } + // The good news is that this graph has already been constructed for the + // TentativePFactory and was cached in Aggregates. So this is a no-op. + auto aggGraph = aggregates->GetGraph(); + auto numAggs = aggGraph.numRows(); + + auto fineCoordsView = + ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto coarseCoordsView = + coarseCoords->getDeviceLocalView(Xpetra::Access::OverwriteAll); + + // Fill in coarse coordinates + { + SubFactoryMonitor m2(*this, "AverageCoords", coarseLevel); + + const auto dim = ghostedCoords->getNumVectors(); + + typename AppendTrait::type + fineCoordsRandomView = fineCoordsView; + for (size_t j = 0; j < dim; j++) { + Kokkos::parallel_for( + "MueLu:CoordinatesTransferF:Build:coord", + Kokkos::RangePolicy(0, + numAggs), + KOKKOS_LAMBDA(const LO i) { + // A row in this graph represents all node ids in the aggregate + // Therefore, averaging is very easy + + auto aggregate = aggGraph.rowConst(i); + + typename Teuchos::ScalarTraits::magnitudeType sum = + 0.0; // do not use Scalar here (Stokhos) + for (size_t colID = 0; + colID < static_cast(aggregate.length); colID++) + sum += fineCoordsRandomView(aggregate(colID), j); + + coarseCoordsView(i, j) = sum / aggregate.length; + }); } - - Set >(coarseLevel, "Coordinates", coarseCoords); - } - int writeStart = pL.get("write start"), writeEnd = pL.get("write end"); - if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd) { - std::ostringstream buf; - buf << fineLevel.GetLevelID(); - std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; - Xpetra::IO::magnitudeType,LO,GO,NO>::Write(fileName,*fineCoords); - } - if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd) { - std::ostringstream buf; - buf << coarseLevel.GetLevelID(); - std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; - Xpetra::IO::magnitudeType,LO,GO,NO>::Write(fileName,*coarseCoords); - } + Set>(coarseLevel, "Coordinates", coarseCoords); + } + + int writeStart = pL.get("write start"), + writeEnd = pL.get("write end"); + if (writeStart == 0 && fineLevel.GetLevelID() == 0 && + writeStart <= writeEnd) { + std::ostringstream buf; + buf << fineLevel.GetLevelID(); + std::string fileName = + "coordinates_before_rebalance_level_" + buf.str() + ".m"; + Xpetra::IO::magnitudeType, LO, GO, + NO>::Write(fileName, *fineCoords); + } + if (writeStart <= coarseLevel.GetLevelID() && + coarseLevel.GetLevelID() <= writeEnd) { + std::ostringstream buf; + buf << coarseLevel.GetLevelID(); + std::string fileName = + "coordinates_before_rebalance_level_" + buf.str() + ".m"; + Xpetra::IO::magnitudeType, LO, GO, + NO>::Write(fileName, *coarseCoords); } +} } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp index 75931f90de75..6ae09827cd99 100644 --- a/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_DemoFactory_decl.hpp @@ -47,60 +47,61 @@ #define MUELU_DEMOFACTORY_DECL_HPP #include "MueLu_ConfigDefs.hpp" -#include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_DemoFactory_fwd.hpp" +#include "MueLu_SingleLevelFactoryBase.hpp" namespace MueLu { - /*! - @class DemoFactory class. - @brief empty factory for demonstration - */ +/*! + @class DemoFactory class. + @brief empty factory for demonstration +*/ - template - class DemoFactory : public SingleLevelFactoryBase { +template +class DemoFactory : public SingleLevelFactoryBase { #undef MUELU_DEMOFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - DemoFactory(); + //! Constructor. + DemoFactory(); - //! Destructor. - virtual ~DemoFactory(); + //! Destructor. + virtual ~DemoFactory(); - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that + generate that data. - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level ¤tLevel) const; + If the Build method of this class requires some data, but the generating + factory is not specified in DeclareInput, then this class will fall back to + the settings in FactoryManager. + */ + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & currentLevel) const; + //! Build an object with this factory. + void Build(Level ¤tLevel) const; - //@} + //@} - private: - // TODO add member variables +private: + // TODO add member variables - }; // class DemoFactory +}; // class DemoFactory } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp b/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp index b390142ca2f8..c5e51780b194 100644 --- a/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_DemoFactory_def.hpp @@ -55,23 +55,24 @@ namespace MueLu { - template - DemoFactory::DemoFactory() - { } +template +DemoFactory::DemoFactory() {} - template - DemoFactory::~DemoFactory() {} +template +DemoFactory::~DemoFactory() {} - template - void DemoFactory::DeclareInput(Level &/* currentLevel */) const { - // TODO: declare input for factory - //Input(currentLevel, varName_); - } +template +void DemoFactory::DeclareInput( + Level & /* currentLevel */) const { + // TODO: declare input for factory + // Input(currentLevel, varName_); +} - template - void DemoFactory::Build(Level & /* currentLevel */) const { - // TODO: implement factory - } +template +void DemoFactory::Build( + Level & /* currentLevel */) const { + // TODO: implement factory +} } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp index ab99f2f9b964..0b14922ce450 100644 --- a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_decl.hpp @@ -55,58 +55,58 @@ namespace MueLu { - /*! - @class DropNegativeEntriesFactory class. - @brief Application-specific filtering for A. Can be used in context of graph coarsening and aggregation. - - This factory drops all negative entries (or entries with a magnitude < 0). Only weak positive connections are kept. - Do not use this kind of filtering for regular PDEs unless you have very good reasons. - */ - - template - class DropNegativeEntriesFactory : public SingleLevelFactoryBase { +/*! + @class DropNegativeEntriesFactory class. + @brief Application-specific filtering for A. Can be used in context of graph + coarsening and aggregation. + + This factory drops all negative entries (or entries with a magnitude < 0). + Only weak positive connections are kept. Do not use this kind of filtering for + regular PDEs unless you have very good reasons. +*/ + +template +class DropNegativeEntriesFactory : public SingleLevelFactoryBase { #undef MUELU_DROPNEGATIVEENTRIESFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - DropNegativeEntriesFactory() { } + DropNegativeEntriesFactory() {} - //! Destructor. - virtual ~DropNegativeEntriesFactory() { } + //! Destructor. + virtual ~DropNegativeEntriesFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. + /*! + @brief Build method. - Builds filtered matrix and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + Builds filtered matrix and returns it in currentLevel. + */ + void Build(Level ¤tLevel) const; - //@} + //@} - }; //class DropNegativeEntriesFactory +}; // class DropNegativeEntriesFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_DROPNEGATIVEENTRIESFACTORY_SHORT #endif // MUELU_DROPNEGATIVEENTRIESFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp index 1bf7b0bfd4c8..f837f1dea980 100644 --- a/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_DropNegativeEntriesFactory_def.hpp @@ -48,10 +48,10 @@ #define MUELU_DROPNEGATIVEENTRIESFACTORY_DEF_HPP #include +#include #include -#include #include -#include +#include #include "MueLu_DropNegativeEntriesFactory_decl.hpp" @@ -60,76 +60,95 @@ namespace MueLu { - template - RCP DropNegativeEntriesFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); - - return validParamList; - } - - template - void DropNegativeEntriesFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - } - - template - void DropNegativeEntriesFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Matrix filtering (springs)", currentLevel); - - RCP Ain = Get< RCP >(currentLevel, "A"); - - LocalOrdinal nDofsPerNode = Ain->GetFixedBlockSize(); - - // create new empty Operator - Teuchos::RCP Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); - - size_t numLocalRows = Ain->getLocalNumRows(); - for(size_t row=0; rowgetRowMap()->getGlobalElement(row); - - int rDofID = Teuchos::as(grid % nDofsPerNode); - - // extract row information from input matrix - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - Ain->getLocalRowView(row, indices, vals); - - // just copy all values in output - Teuchos::ArrayRCP indout(indices.size(),Teuchos::ScalarTraits::zero()); - Teuchos::ArrayRCP valout(indices.size(),Teuchos::ScalarTraits::zero()); - - size_t nNonzeros = 0; - for(size_t i=0; i<(size_t)indices.size(); i++) { - GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id - - int cDofID = Teuchos::as(gcid % nDofsPerNode); - if(rDofID == cDofID && Teuchos::ScalarTraits::magnitude(vals[i]) >= Teuchos::ScalarTraits::magnitude(Teuchos::ScalarTraits::zero())) { - indout [nNonzeros] = gcid; - valout [nNonzeros] = vals[i]; - nNonzeros++; - } +template +RCP +DropNegativeEntriesFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) +#undef SET_VALID_ENTRY + + validParamList->set>( + "A", Teuchos::null, + "Generating factory of the matrix A used for filtering"); + + return validParamList; +} + +template +void DropNegativeEntriesFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); +} + +template +void DropNegativeEntriesFactory::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "Matrix filtering (springs)", currentLevel); + + RCP Ain = Get>(currentLevel, "A"); + + LocalOrdinal nDofsPerNode = Ain->GetFixedBlockSize(); + + // create new empty Operator + Teuchos::RCP Aout = + MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); + + size_t numLocalRows = Ain->getLocalNumRows(); + for (size_t row = 0; row < numLocalRows; row++) { + GlobalOrdinal grid = Ain->getRowMap()->getGlobalElement(row); + + int rDofID = Teuchos::as(grid % nDofsPerNode); + + // extract row information from input matrix + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + Ain->getLocalRowView(row, indices, vals); + + // just copy all values in output + Teuchos::ArrayRCP indout( + indices.size(), Teuchos::ScalarTraits::zero()); + Teuchos::ArrayRCP valout(indices.size(), + Teuchos::ScalarTraits::zero()); + + size_t nNonzeros = 0; + for (size_t i = 0; i < (size_t)indices.size(); i++) { + GlobalOrdinal gcid = + Ain->getColMap()->getGlobalElement(indices[i]); // global column id + + int cDofID = Teuchos::as(gcid % nDofsPerNode); + if (rDofID == cDofID && + Teuchos::ScalarTraits::magnitude(vals[i]) >= + Teuchos::ScalarTraits::magnitude( + Teuchos::ScalarTraits::zero())) { + indout[nNonzeros] = gcid; + valout[nNonzeros] = vals[i]; + nNonzeros++; } - indout.resize(nNonzeros); - valout.resize(nNonzeros); - - Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); } + indout.resize(nNonzeros); + valout.resize(nNonzeros); - Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); + Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), + indout.view(0, indout.size()), + valout.view(0, valout.size())); + } - // copy block size information - Aout->SetFixedBlockSize(nDofsPerNode); + Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); - GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; + // copy block size information + Aout->SetFixedBlockSize(nDofsPerNode); - Set(currentLevel, "A", Aout); - } + GetOStream(Statistics0, 0) + << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() + << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() + << std::endl; + + Set(currentLevel, "A", Aout); +} -} //namespace MueLu +} // namespace MueLu #endif // MUELU_DROPNEGATIVEENTRIESFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp index 23f56137596d..3b0d4fceb2d6 100644 --- a/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_FilteredAFactory_decl.hpp @@ -51,67 +51,72 @@ #include "MueLu_ConfigDefs.hpp" #include "MueLu_FilteredAFactory_fwd.hpp" +#include "MueLu_Aggregates_fwd.hpp" +#include "MueLu_AmalgamationInfo_fwd.hpp" #include "MueLu_GraphBase.hpp" #include "MueLu_Level_fwd.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" -#include "MueLu_AmalgamationInfo_fwd.hpp" -#include "MueLu_Aggregates_fwd.hpp" namespace MueLu { - /*! - @class FilteredAFactory class. - @brief Factory for building filtered matrices using filtered graphs. - */ - - template - class FilteredAFactory : public SingleLevelFactoryBase { +/*! + @class FilteredAFactory class. + @brief Factory for building filtered matrices using filtered graphs. +*/ + +template +class FilteredAFactory : public SingleLevelFactoryBase { #undef MUELU_FILTEREDAFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: +public: + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + FilteredAFactory() {} - FilteredAFactory() { } + //! Destructor. + virtual ~FilteredAFactory() {} - //! Destructor. - virtual ~FilteredAFactory() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level ¤tLevel) const; - void DeclareInput(Level& currentLevel) const; + //@} - //@} + //! @name Build methods. + //@{ - //! @name Build methods. - //@{ - - /*! - @brief Build method. - - Builds filtered matrix and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; - - //@} - private: - void BuildReuse(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const; - void BuildNew (const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const; - void BuildNewUsingRootStencil(const Matrix& A, const GraphBase& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const; - void ExperimentalLumping(const Matrix& A, Matrix& filteredA, double rho, double rho2) const; - - }; //class FilteredAFactory - -} //namespace MueLu + /*! + @brief Build method. + + Builds filtered matrix and returns it in currentLevel. + */ + void Build(Level ¤tLevel) const; + + //@} +private: + void BuildReuse(const Matrix &A, const GraphBase &G, const bool lumping, + double dirichletThresh, Matrix &filteredA) const; + void BuildNew(const Matrix &A, const GraphBase &G, const bool lumping, + double dirichletThresh, Matrix &filteredA) const; + void BuildNewUsingRootStencil(const Matrix &A, const GraphBase &G, + double dirichletThresh, Level ¤tLevel, + Matrix &filteredA, bool use_spread_lumping, + double DdomAllowGrowthRate, + double DdomCap) const; + void ExperimentalLumping(const Matrix &A, Matrix &filteredA, double rho, + double rho2) const; + +}; // class FilteredAFactory + +} // namespace MueLu #define MUELU_FILTEREDAFACTORY_SHORT #endif // MUELU_FILTEREDAFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp b/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp index 2828b3c0cb0e..a2c0411a0d14 100644 --- a/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_FilteredAFactory_def.hpp @@ -46,993 +46,1099 @@ #ifndef MUELU_FILTEREDAFACTORY_DEF_HPP #define MUELU_FILTEREDAFACTORY_DEF_HPP +#include #include #include -#include #include "MueLu_FilteredAFactory_decl.hpp" +#include "MueLu_Aggregates.hpp" +#include "MueLu_AmalgamationInfo.hpp" #include "MueLu_Level.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" -#include "MueLu_Aggregates.hpp" -#include "MueLu_AmalgamationInfo.hpp" #include "MueLu_Utilities.hpp" // Variable to enable lots of debug output #define MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING 0 - namespace MueLu { - template - void sort_and_unique(T & array) { - std::sort(array.begin(),array.end()); - std::unique(array.begin(),array.end()); +template void sort_and_unique(T &array) { + std::sort(array.begin(), array.end()); + std::unique(array.begin(), array.end()); +} + +template +RCP FilteredAFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("filtered matrix: use lumping"); + SET_VALID_ENTRY("filtered matrix: reuse graph"); + SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); + SET_VALID_ENTRY("filtered matrix: use root stencil"); + SET_VALID_ENTRY("filtered matrix: use spread lumping"); + SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor"); + SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap"); + SET_VALID_ENTRY("filtered matrix: Dirichlet threshold"); +#undef SET_VALID_ENTRY + + validParamList->set>( + "A", Teuchos::null, + "Generating factory of the matrix A used for filtering"); + validParamList->set>( + "Graph", Teuchos::null, + "Generating factory for coalesced filtered graph"); + validParamList->set>( + "Filtering", Teuchos::null, "Generating factory for filtering boolean"); + + // Only need these for the "use root stencil" option + validParamList->set>( + "Aggregates", Teuchos::null, "Generating factory of the aggregates"); + validParamList->set>( + "UnAmalgamationInfo", Teuchos::null, + "Generating factory of UnAmalgamationInfo"); + return validParamList; +} + +template +void FilteredAFactory::DeclareInput( + Level ¤tLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "Filtering"); + Input(currentLevel, "Graph"); + const ParameterList &pL = GetParameterList(); + if (pL.isParameter("filtered matrix: use root stencil") && + pL.get("filtered matrix: use root stencil") == true) { + Input(currentLevel, "Aggregates"); + Input(currentLevel, "UnAmalgamationInfo"); + } +} + +template +void FilteredAFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "Matrix filtering", currentLevel); + + RCP A = Get>(currentLevel, "A"); + if (Get(currentLevel, "Filtering") == false) { + GetOStream(Runtime0) << "Filtered matrix is not being constructed as no " + "filtering is being done" + << std::endl; + Set(currentLevel, "A", A); + return; } + const ParameterList &pL = GetParameterList(); + bool lumping = pL.get("filtered matrix: use lumping"); + if (lumping) + GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; + bool use_spread_lumping = pL.get("filtered matrix: use spread lumping"); + if (use_spread_lumping && (!lumping)) + throw std::runtime_error("Must also request 'filtered matrix: use lumping' " + "in order to use spread lumping"); - template - RCP FilteredAFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("filtered matrix: use lumping"); - SET_VALID_ENTRY("filtered matrix: reuse graph"); - SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); - SET_VALID_ENTRY("filtered matrix: use root stencil"); - SET_VALID_ENTRY("filtered matrix: use spread lumping"); - SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor"); - SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap"); - SET_VALID_ENTRY("filtered matrix: Dirichlet threshold"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); - validParamList->set< RCP >("Graph", Teuchos::null, "Generating factory for coalesced filtered graph"); - validParamList->set< RCP >("Filtering", Teuchos::null, "Generating factory for filtering boolean"); - - - // Only need these for the "use root stencil" option - validParamList->set< RCP >("Aggregates", Teuchos::null, "Generating factory of the aggregates"); - validParamList->set< RCP >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo"); - return validParamList; + if (use_spread_lumping) { + GetOStream(Runtime0) << "using spread lumping " << std::endl; } - template - void FilteredAFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "Filtering"); - Input(currentLevel, "Graph"); - const ParameterList& pL = GetParameterList(); - if(pL.isParameter("filtered matrix: use root stencil") && pL.get("filtered matrix: use root stencil") == true){ - Input(currentLevel, "Aggregates"); - Input(currentLevel, "UnAmalgamationInfo"); - } + double DdomAllowGrowthRate = 1.1; + double DdomCap = 2.0; + if (use_spread_lumping) { + DdomAllowGrowthRate = pL.get( + "filtered matrix: spread lumping diag dom growth factor"); + DdomCap = pL.get("filtered matrix: spread lumping diag dom cap"); } - - template - void FilteredAFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Matrix filtering", currentLevel); - - RCP A = Get< RCP >(currentLevel, "A"); - if (Get(currentLevel, "Filtering") == false) { - GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl; - Set(currentLevel, "A", A); - return; - } - - const ParameterList& pL = GetParameterList(); - bool lumping = pL.get("filtered matrix: use lumping"); - if (lumping) - GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; - - bool use_spread_lumping = pL.get("filtered matrix: use spread lumping"); - if (use_spread_lumping && (!lumping) ) - throw std::runtime_error("Must also request 'filtered matrix: use lumping' in order to use spread lumping"); - - if (use_spread_lumping) { - GetOStream(Runtime0) << "using spread lumping " << std::endl; - } - - double DdomAllowGrowthRate = 1.1; - double DdomCap = 2.0; - if (use_spread_lumping) { - DdomAllowGrowthRate = pL.get("filtered matrix: spread lumping diag dom growth factor"); - DdomCap = pL.get("filtered matrix: spread lumping diag dom cap"); - } - bool use_root_stencil = lumping && pL.get("filtered matrix: use root stencil"); - if (use_root_stencil) - GetOStream(Runtime0) << "Using root stencil for dropping" << std::endl; - double dirichlet_threshold = pL.get("filtered matrix: Dirichlet threshold"); - if(dirichlet_threshold >= 0.0) - GetOStream(Runtime0) << "Filtering Dirichlet threshold of "<("filtered matrix: reuse graph")) - GetOStream(Runtime0) << "Reusing graph"< G = Get< RCP >(currentLevel, "Graph"); - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) - { - FILE * f = fopen("graph.dat","w"); - size_t numGRows = G->GetNodeNumVertices(); - for (size_t i = 0; i < numGRows; i++) { - // Set up filtering array - ArrayView indsG = G->getNeighborVertices(i); - for(size_t j=0; j<(size_t)indsG.size(); j++) { - fprintf(f,"%d %d 1.0\n",(int)i,(int)indsG[j]); - } + bool use_root_stencil = + lumping && pL.get("filtered matrix: use root stencil"); + if (use_root_stencil) + GetOStream(Runtime0) << "Using root stencil for dropping" << std::endl; + double dirichlet_threshold = + pL.get("filtered matrix: Dirichlet threshold"); + if (dirichlet_threshold >= 0.0) + GetOStream(Runtime0) << "Filtering Dirichlet threshold of " + << dirichlet_threshold << std::endl; + + if (use_root_stencil || pL.get("filtered matrix: reuse graph")) + GetOStream(Runtime0) << "Reusing graph" << std::endl; + else + GetOStream(Runtime0) << "Generating new graph" << std::endl; + + RCP G = Get>(currentLevel, "Graph"); + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) { + FILE *f = fopen("graph.dat", "w"); + size_t numGRows = G->GetNodeNumVertices(); + for (size_t i = 0; i < numGRows; i++) { + // Set up filtering array + ArrayView indsG = G->getNeighborVertices(i); + for (size_t j = 0; j < (size_t)indsG.size(); j++) { + fprintf(f, "%d %d 1.0\n", (int)i, (int)indsG[j]); } - fclose(f); - } - - RCP fillCompleteParams(new ParameterList); - fillCompleteParams->set("No Nonlocal Changes", true); - - RCP filteredA; - if(use_root_stencil) { - filteredA = MatrixFactory::Build(A->getCrsGraph()); - filteredA->fillComplete(fillCompleteParams); - filteredA->resumeFill(); - BuildNewUsingRootStencil(*A, *G, dirichlet_threshold, currentLevel,*filteredA, use_spread_lumping,DdomAllowGrowthRate, DdomCap); - filteredA->fillComplete(fillCompleteParams); - - } - else if (pL.get("filtered matrix: reuse graph")) { - filteredA = MatrixFactory::Build(A->getCrsGraph()); - filteredA->resumeFill(); - BuildReuse(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold,*filteredA); - // only lump inside BuildReuse if lumping is true and use_spread_lumping is false - // note: they use_spread_lumping cannot be true if lumping is false - - if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); - filteredA->fillComplete(fillCompleteParams); - - } else { - - filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); - BuildNew(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold,*filteredA); - // only lump inside BuildNew if lumping is true and use_spread_lumping is false - // note: they use_spread_lumping cannot be true if lumping is false - if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); - filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); } + fclose(f); + } + RCP fillCompleteParams(new ParameterList); + fillCompleteParams->set("No Nonlocal Changes", true); + + RCP filteredA; + if (use_root_stencil) { + filteredA = MatrixFactory::Build(A->getCrsGraph()); + filteredA->fillComplete(fillCompleteParams); + filteredA->resumeFill(); + BuildNewUsingRootStencil(*A, *G, dirichlet_threshold, currentLevel, + *filteredA, use_spread_lumping, + DdomAllowGrowthRate, DdomCap); + filteredA->fillComplete(fillCompleteParams); + + } else if (pL.get("filtered matrix: reuse graph")) { + filteredA = MatrixFactory::Build(A->getCrsGraph()); + filteredA->resumeFill(); + BuildReuse(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, + *filteredA); + // only lump inside BuildReuse if lumping is true and use_spread_lumping is + // false note: they use_spread_lumping cannot be true if lumping is false + + if (use_spread_lumping) + ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); + filteredA->fillComplete(fillCompleteParams); + + } else { + + filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), + A->getLocalMaxNumRowEntries()); + BuildNew(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, + *filteredA); + // only lump inside BuildNew if lumping is true and use_spread_lumping is + // false note: they use_spread_lumping cannot be true if lumping is false + if (use_spread_lumping) + ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap); + filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), + fillCompleteParams); + } + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) { + Xpetra::IO::Write("filteredA.dat", *filteredA); + + // original filtered A and actual A + Xpetra::IO::Write("A.dat", *A); + RCP origFilteredA = MatrixFactory::Build( + A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); + BuildNew(*A, *G, lumping, dirichlet_threshold, *origFilteredA); + if (use_spread_lumping) + ExperimentalLumping(*A, *origFilteredA, DdomAllowGrowthRate, DdomCap); + origFilteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), + fillCompleteParams); + Xpetra::IO::Write("origFilteredA.dat", *origFilteredA); + } - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING) - { - Xpetra::IO::Write("filteredA.dat", *filteredA); - - //original filtered A and actual A - Xpetra::IO::Write("A.dat", *A); - RCP origFilteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries()); - BuildNew(*A, *G, lumping, dirichlet_threshold,*origFilteredA); - if (use_spread_lumping) ExperimentalLumping(*A, *origFilteredA, DdomAllowGrowthRate, DdomCap); - origFilteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); - Xpetra::IO::Write("origFilteredA.dat", *origFilteredA); - } - - - filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); - - if (pL.get("filtered matrix: reuse eigenvalue")) { - // Reuse max eigenvalue from A - // It is unclear what eigenvalue is the best for the smoothing, but we already may have - // the D^{-1}A estimate in A, may as well use it. - // NOTE: ML does that too - filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); - } + filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); - Set(currentLevel, "A", filteredA); + if (pL.get("filtered matrix: reuse eigenvalue")) { + // Reuse max eigenvalue from A + // It is unclear what eigenvalue is the best for the smoothing, but we + // already may have the D^{-1}A estimate in A, may as well use it. NOTE: ML + // does that too + filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); } + Set(currentLevel, "A", filteredA); +} + // Epetra's API allows direct access to row array. // Tpetra's API does not, providing only ArrayView // But in most situations we are currently interested in, it is safe to assume // that the view is to the actual data. So this macro directs the code to do // const_cast, and modify the entries directly. This allows us to avoid // replaceLocalValues() call which is quite expensive due to all the searches. -//#define ASSUME_DIRECT_ACCESS_TO_ROW // See github issue 10883#issuecomment-1256676340 - - // Both Epetra and Tpetra matrix-matrix multiply use the following trick: - // if an entry of the left matrix is zero, it does not compute or store the - // zero value. - // - // This trick allows us to bypass constructing a new matrix. Instead, we - // make a deep copy of the original one, and fill it in with zeros, which - // are ignored during the prolongator smoothing. - template - void FilteredAFactory:: - BuildReuse(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const { - using TST = typename Teuchos::ScalarTraits; - SC zero = TST::zero(); - - - size_t blkSize = A.GetFixedBlockSize(); - - ArrayView inds; - ArrayView valsA; +//#define ASSUME_DIRECT_ACCESS_TO_ROW // See github issue +// 10883#issuecomment-1256676340 + +// Both Epetra and Tpetra matrix-matrix multiply use the following trick: +// if an entry of the left matrix is zero, it does not compute or store the +// zero value. +// +// This trick allows us to bypass constructing a new matrix. Instead, we +// make a deep copy of the original one, and fill it in with zeros, which +// are ignored during the prolongator smoothing. +template +void FilteredAFactory::BuildReuse( + const Matrix &A, const GraphBase &G, const bool lumping, + double dirichletThresh, Matrix &filteredA) const { + using TST = typename Teuchos::ScalarTraits; + SC zero = TST::zero(); + + size_t blkSize = A.GetFixedBlockSize(); + + ArrayView inds; + ArrayView valsA; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW - ArrayView vals; + ArrayView vals; #else - Array vals; + Array vals; #endif - Array filter( std::max(blkSize*G.GetImportMap()->getLocalNumElements(), - A.getColMap()->getLocalNumElements()), - 0); + Array filter(std::max(blkSize * G.GetImportMap()->getLocalNumElements(), + A.getColMap()->getLocalNumElements()), + 0); - size_t numGRows = G.GetNodeNumVertices(); - for (size_t i = 0; i < numGRows; i++) { - // Set up filtering array - ArrayView indsG = G.getNeighborVertices(i); - for (size_t j = 0; j < as(indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 1; + size_t numGRows = G.GetNodeNumVertices(); + for (size_t i = 0; i < numGRows; i++) { + // Set up filtering array + ArrayView indsG = G.getNeighborVertices(i); + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 1; - for (size_t k = 0; k < blkSize; k++) { - LO row = i*blkSize + k; + for (size_t k = 0; k < blkSize; k++) { + LO row = i * blkSize + k; - A.getLocalRowView(row, inds, valsA); + A.getLocalRowView(row, inds, valsA); - size_t nnz = inds.size(); - if (nnz == 0) - continue; + size_t nnz = inds.size(); + if (nnz == 0) + continue; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW - // Transform ArrayView into ArrayView - ArrayView vals1; - filteredA.getLocalRowView(row, inds, vals1); - vals = ArrayView(const_cast(vals1.getRawPtr()), nnz); + // Transform ArrayView into ArrayView + ArrayView vals1; + filteredA.getLocalRowView(row, inds, vals1); + vals = ArrayView(const_cast(vals1.getRawPtr()), nnz); - memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz*sizeof(SC)); + memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz * sizeof(SC)); #else - vals = Array(valsA); + vals = Array(valsA); #endif - SC ZERO = Teuchos::ScalarTraits::zero(); - // SC ONE = Teuchos::ScalarTraits::one(); - SC A_rowsum = ZERO, F_rowsum = ZERO; - for(LO l = 0; l < (LO)inds.size(); l++) - A_rowsum += valsA[l]; + SC ZERO = Teuchos::ScalarTraits::zero(); + // SC ONE = Teuchos::ScalarTraits::one(); + SC A_rowsum = ZERO, F_rowsum = ZERO; + for (LO l = 0; l < (LO)inds.size(); l++) + A_rowsum += valsA[l]; + + if (lumping == false) { + for (size_t j = 0; j < nnz; j++) + if (!filter[inds[j]]) + vals[j] = zero; - if (lumping == false) { - for (size_t j = 0; j < nnz; j++) - if (!filter[inds[j]]) - vals[j] = zero; + } else { + LO diagIndex = -1; + SC diagExtra = zero; - } else { - LO diagIndex = -1; - SC diagExtra = zero; - - for (size_t j = 0; j < nnz; j++) { - if (filter[inds[j]]) { - if (inds[j] == row) { - // Remember diagonal position - diagIndex = j; - } - continue; + for (size_t j = 0; j < nnz; j++) { + if (filter[inds[j]]) { + if (inds[j] == row) { + // Remember diagonal position + diagIndex = j; } + continue; + } - diagExtra += vals[j]; + diagExtra += vals[j]; - vals[j] = zero; - } + vals[j] = zero; + } - // Lump dropped entries - // NOTE - // * Does it make sense to lump for elasticity? - // * Is it different for diffusion and elasticity? - //SC diagA = ZERO; - if (diagIndex != -1) { - //diagA = vals[diagIndex]; - vals[diagIndex] += diagExtra; - if(dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) { - - // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA); - for(LO l = 0; l < (LO)nnz; l++) - F_rowsum += vals[l]; - // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum); - vals[diagIndex] = TST::one(); - } + // Lump dropped entries + // NOTE + // * Does it make sense to lump for elasticity? + // * Is it different for diffusion and elasticity? + // SC diagA = ZERO; + if (diagIndex != -1) { + // diagA = vals[diagIndex]; + vals[diagIndex] += diagExtra; + if (dirichletThresh >= 0.0 && + TST::real(vals[diagIndex]) <= dirichletThresh) { + + // printf("WARNING: row %d diag(Afiltered) = %8.2e + // diag(A)=%8.2e\n",row,vals[diagIndex],diagA); + for (LO l = 0; l < (LO)nnz; l++) + F_rowsum += vals[l]; + // printf(" : A rowsum = %8.2e F rowsum = + // %8.2e\n",A_rowsum,F_rowsum); + vals[diagIndex] = TST::one(); } - } + } #ifndef ASSUME_DIRECT_ACCESS_TO_ROW - // Because we used a column map in the construction of the matrix - // we can just use insertLocalValues here instead of insertGlobalValues - filteredA.replaceLocalValues(row, inds, vals); + // Because we used a column map in the construction of the matrix + // we can just use insertLocalValues here instead of insertGlobalValues + filteredA.replaceLocalValues(row, inds, vals); #endif - } - - // Reset filtering array - for (size_t j = 0; j < as (indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 0; } - } - - template - void FilteredAFactory:: - BuildNew(const Matrix& A, const GraphBase& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const { - using TST = typename Teuchos::ScalarTraits; - SC zero = Teuchos::ScalarTraits::zero(); - size_t blkSize = A.GetFixedBlockSize(); - - ArrayView indsA; - ArrayView valsA; - Array inds; - Array vals; - - Array filter(blkSize * G.GetImportMap()->getLocalNumElements(), 0); - - size_t numGRows = G.GetNodeNumVertices(); - for (size_t i = 0; i < numGRows; i++) { - // Set up filtering array - ArrayView indsG = G.getNeighborVertices(i); - for (size_t j = 0; j < as(indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 1; - - for (size_t k = 0; k < blkSize; k++) { - LO row = i*blkSize + k; - - A.getLocalRowView(row, indsA, valsA); - - size_t nnz = indsA.size(); - if (nnz == 0) - continue; - - inds.resize(indsA.size()); - vals.resize(valsA.size()); + // Reset filtering array + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 0; + } +} + +template +void FilteredAFactory::BuildNew( + const Matrix &A, const GraphBase &G, const bool lumping, + double dirichletThresh, Matrix &filteredA) const { + using TST = typename Teuchos::ScalarTraits; + SC zero = Teuchos::ScalarTraits::zero(); + + size_t blkSize = A.GetFixedBlockSize(); + + ArrayView indsA; + ArrayView valsA; + Array inds; + Array vals; + + Array filter(blkSize * G.GetImportMap()->getLocalNumElements(), 0); + + size_t numGRows = G.GetNodeNumVertices(); + for (size_t i = 0; i < numGRows; i++) { + // Set up filtering array + ArrayView indsG = G.getNeighborVertices(i); + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 1; + + for (size_t k = 0; k < blkSize; k++) { + LO row = i * blkSize + k; + + A.getLocalRowView(row, indsA, valsA); + + size_t nnz = indsA.size(); + if (nnz == 0) + continue; + + inds.resize(indsA.size()); + vals.resize(valsA.size()); + + size_t numInds = 0; + if (lumping == false) { + for (size_t j = 0; j < nnz; j++) + if (filter[indsA[j]]) { + inds[numInds] = indsA[j]; + vals[numInds] = valsA[j]; + numInds++; + } - size_t numInds = 0; - if (lumping == false) { - for (size_t j = 0; j < nnz; j++) - if (filter[indsA[j]]) { - inds[numInds] = indsA[j]; - vals[numInds] = valsA[j]; - numInds++; - } + } else { + LO diagIndex = -1; + SC diagExtra = zero; - } else { - LO diagIndex = -1; - SC diagExtra = zero; + for (size_t j = 0; j < nnz; j++) { + if (filter[indsA[j]]) { + inds[numInds] = indsA[j]; + vals[numInds] = valsA[j]; - for (size_t j = 0; j < nnz; j++) { - if (filter[indsA[j]]) { - inds[numInds] = indsA[j]; - vals[numInds] = valsA[j]; + // Remember diagonal position + if (inds[numInds] == row) + diagIndex = numInds; - // Remember diagonal position - if (inds[numInds] == row) - diagIndex = numInds; + numInds++; - numInds++; - - } else { - diagExtra += valsA[j]; - } + } else { + diagExtra += valsA[j]; } + } - // Lump dropped entries - // NOTE - // * Does it make sense to lump for elasticity? - // * Is it different for diffusion and elasticity? - if (diagIndex != -1) { - vals[diagIndex] += diagExtra; - if(dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) { - // SC A_rowsum = ZERO, F_rowsum = ZERO; - // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA); - // for(LO l = 0; l < (LO)nnz; l++) - // F_rowsum += vals[l]; - // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum); - vals[diagIndex] = TST::one(); - } + // Lump dropped entries + // NOTE + // * Does it make sense to lump for elasticity? + // * Is it different for diffusion and elasticity? + if (diagIndex != -1) { + vals[diagIndex] += diagExtra; + if (dirichletThresh >= 0.0 && + TST::real(vals[diagIndex]) <= dirichletThresh) { + // SC A_rowsum = ZERO, F_rowsum = ZERO; + // printf("WARNING: row %d diag(Afiltered) = %8.2e + // diag(A)=%8.2e\n",row,vals[diagIndex],diagA); for(LO + // l = 0; l < (LO)nnz; l++) + // F_rowsum += vals[l]; + // printf(" : A rowsum = %8.2e F rowsum = + // %8.2e\n",A_rowsum,F_rowsum); + vals[diagIndex] = TST::one(); } - } - inds.resize(numInds); - vals.resize(numInds); - - - - // Because we used a column map in the construction of the matrix - // we can just use insertLocalValues here instead of insertGlobalValues - filteredA.insertLocalValues(row, inds, vals); } + inds.resize(numInds); + vals.resize(numInds); - // Reset filtering array - for (size_t j = 0; j < as (indsG.size()); j++) - for (size_t k = 0; k < blkSize; k++) - filter[indsG[j]*blkSize+k] = 0; + // Because we used a column map in the construction of the matrix + // we can just use insertLocalValues here instead of insertGlobalValues + filteredA.insertLocalValues(row, inds, vals); } - } - template - void FilteredAFactory:: - BuildNewUsingRootStencil(const Matrix& A, const GraphBase& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const { - using TST = typename Teuchos::ScalarTraits; - using Teuchos::arcp_const_cast; - SC ZERO = Teuchos::ScalarTraits::zero(); - SC ONE = Teuchos::ScalarTraits::one(); - LO INVALID = Teuchos::OrdinalTraits::invalid(); - - size_t numNodes = G.GetNodeNumVertices(); - size_t blkSize = A.GetFixedBlockSize(); - size_t numRows = A.getMap()->getLocalNumElements(); - ArrayView indsA; - ArrayView valsA; - ArrayRCP rowptr; - ArrayRCP inds; - ArrayRCP vals_const; - ArrayRCP vals; - - // We're going to grab the vals array from filteredA and then blitz it with NAN as a placeholder for "entries that have - // not yey been touched." If I see an entry in the primary loop that has a zero, then I assume it has been nuked by - // it's symmetric pair, so I add it to the diagonal. If it has a NAN, process as normal. - RCP filteredAcrs = dynamic_cast(&filteredA)->getCrsMatrix(); - filteredAcrs->getAllValues(rowptr,inds,vals_const); - vals = arcp_const_cast(vals_const); - Array vals_dropped_indicator(vals.size(),false); - - // In the badAggNeighbors loop, if the entry has any number besides NAN, I add it to the diagExtra and then zero the guy. - RCP aggregates = Get< RCP > (currentLevel, "Aggregates"); - RCP amalgInfo = Get< RCP > (currentLevel, "UnAmalgamationInfo"); - LO numAggs = aggregates->GetNumAggregates(); - - // Check map nesting - RCP rowMap = A.getRowMap(); - RCP colMap = A.getColMap(); - bool goodMap = MueLu::Utilities::MapsAreNested(*rowMap, *colMap); - TEUCHOS_TEST_FOR_EXCEPTION(!goodMap, Exceptions::RuntimeError,"FilteredAFactory: Maps are not nested"); - - // Since we're going to symmetrize this - Array diagIndex(numRows,INVALID); - Array diagExtra(numRows,ZERO); - - // Lists of nodes in each aggregate - struct { - // GH: For now, copy everything to host until we properly set this factory to run device code - // Instead, we'll copy data into HostMirrors and run the algorithms on host, saving optimization for later. - typename Aggregates::LO_view ptr, nodes, unaggregated; - typename Aggregates::LO_view::HostMirror ptr_h, nodes_h, unaggregated_h; - } nodesInAgg; - aggregates->ComputeNodesInAggregate(nodesInAgg.ptr, nodesInAgg.nodes, nodesInAgg.unaggregated); - nodesInAgg.ptr_h = Kokkos::create_mirror_view(nodesInAgg.ptr); - nodesInAgg.nodes_h = Kokkos::create_mirror_view(nodesInAgg.nodes); - nodesInAgg.unaggregated_h = Kokkos::create_mirror_view(nodesInAgg.unaggregated); - Kokkos::deep_copy(nodesInAgg.ptr_h, nodesInAgg.ptr); - Kokkos::deep_copy(nodesInAgg.nodes_h, nodesInAgg.nodes); - Kokkos::deep_copy(nodesInAgg.unaggregated_h, nodesInAgg.unaggregated); - Teuchos::ArrayRCP vertex2AggId = aggregates->GetVertex2AggId()->getData(0); // GH: this is needed on device, grab the pointer after we call ComputeNodesInAggregate - - LO graphNumCols = G.GetImportMap()->getLocalNumElements(); - Array filter(graphNumCols, false); - - // Loop over the unaggregated nodes. Blitz those rows. We don't want to smooth singletons. - for(LO i=0; i< (LO)nodesInAgg.unaggregated_h.extent(0); i++) { - for (LO m = 0; m < (LO)blkSize; m++) { - LO row = amalgInfo->ComputeLocalDOF(nodesInAgg.unaggregated_h(i),m); - if (row >= (LO)numRows) continue; - size_t index_start = rowptr[row]; - A.getLocalRowView(row, indsA, valsA); - for(LO k=0; k<(LO)indsA.size(); k++) { - if(row == indsA[k]) { - vals[index_start+k] = ONE; - diagIndex[row] = k; - } - else - vals[index_start+k] = ZERO; - } + // Reset filtering array + for (size_t j = 0; j < as(indsG.size()); j++) + for (size_t k = 0; k < blkSize; k++) + filter[indsG[j] * blkSize + k] = 0; + } +} + +template +void FilteredAFactory:: + BuildNewUsingRootStencil(const Matrix &A, const GraphBase &G, + double dirichletThresh, Level ¤tLevel, + Matrix &filteredA, bool use_spread_lumping, + double DdomAllowGrowthRate, double DdomCap) const { + using TST = typename Teuchos::ScalarTraits; + using Teuchos::arcp_const_cast; + SC ZERO = Teuchos::ScalarTraits::zero(); + SC ONE = Teuchos::ScalarTraits::one(); + LO INVALID = Teuchos::OrdinalTraits::invalid(); + + size_t numNodes = G.GetNodeNumVertices(); + size_t blkSize = A.GetFixedBlockSize(); + size_t numRows = A.getMap()->getLocalNumElements(); + ArrayView indsA; + ArrayView valsA; + ArrayRCP rowptr; + ArrayRCP inds; + ArrayRCP vals_const; + ArrayRCP vals; + + // We're going to grab the vals array from filteredA and then blitz it with + // NAN as a placeholder for "entries that have not yey been touched." If I + // see an entry in the primary loop that has a zero, then I assume it has been + // nuked by it's symmetric pair, so I add it to the diagonal. If it has a + // NAN, process as normal. + RCP filteredAcrs = + dynamic_cast(&filteredA)->getCrsMatrix(); + filteredAcrs->getAllValues(rowptr, inds, vals_const); + vals = arcp_const_cast(vals_const); + Array vals_dropped_indicator(vals.size(), false); + + // In the badAggNeighbors loop, if the entry has any number besides NAN, I add + // it to the diagExtra and then zero the guy. + RCP aggregates = Get>(currentLevel, "Aggregates"); + RCP amalgInfo = + Get>(currentLevel, "UnAmalgamationInfo"); + LO numAggs = aggregates->GetNumAggregates(); + + // Check map nesting + RCP rowMap = A.getRowMap(); + RCP colMap = A.getColMap(); + bool goodMap = + MueLu::Utilities::MapsAreNested(*rowMap, *colMap); + TEUCHOS_TEST_FOR_EXCEPTION(!goodMap, Exceptions::RuntimeError, + "FilteredAFactory: Maps are not nested"); + + // Since we're going to symmetrize this + Array diagIndex(numRows, INVALID); + Array diagExtra(numRows, ZERO); + + // Lists of nodes in each aggregate + struct { + // GH: For now, copy everything to host until we properly set this factory + // to run device code Instead, we'll copy data into HostMirrors and run the + // algorithms on host, saving optimization for later. + typename Aggregates::LO_view ptr, nodes, unaggregated; + typename Aggregates::LO_view::HostMirror ptr_h, nodes_h, unaggregated_h; + } nodesInAgg; + aggregates->ComputeNodesInAggregate(nodesInAgg.ptr, nodesInAgg.nodes, + nodesInAgg.unaggregated); + nodesInAgg.ptr_h = Kokkos::create_mirror_view(nodesInAgg.ptr); + nodesInAgg.nodes_h = Kokkos::create_mirror_view(nodesInAgg.nodes); + nodesInAgg.unaggregated_h = + Kokkos::create_mirror_view(nodesInAgg.unaggregated); + Kokkos::deep_copy(nodesInAgg.ptr_h, nodesInAgg.ptr); + Kokkos::deep_copy(nodesInAgg.nodes_h, nodesInAgg.nodes); + Kokkos::deep_copy(nodesInAgg.unaggregated_h, nodesInAgg.unaggregated); + Teuchos::ArrayRCP vertex2AggId = + aggregates->GetVertex2AggId()->getData( + 0); // GH: this is needed on device, grab the pointer after we call + // ComputeNodesInAggregate + + LO graphNumCols = G.GetImportMap()->getLocalNumElements(); + Array filter(graphNumCols, false); + + // Loop over the unaggregated nodes. Blitz those rows. We don't want to smooth + // singletons. + for (LO i = 0; i < (LO)nodesInAgg.unaggregated_h.extent(0); i++) { + for (LO m = 0; m < (LO)blkSize; m++) { + LO row = amalgInfo->ComputeLocalDOF(nodesInAgg.unaggregated_h(i), m); + if (row >= (LO)numRows) + continue; + size_t index_start = rowptr[row]; + A.getLocalRowView(row, indsA, valsA); + for (LO k = 0; k < (LO)indsA.size(); k++) { + if (row == indsA[k]) { + vals[index_start + k] = ONE; + diagIndex[row] = k; + } else + vals[index_start + k] = ZERO; } - }//end nodesInAgg.unaggregated.extent(0); - - - std::vector badCount(numAggs,0); - - // Find the biggest aggregate size in *nodes* - LO maxAggSize=0; - for(LO i=0; i goodAggNeighbors(G.getLocalMaxNumRowEntries()); - std::vector badAggNeighbors(std::min(G.getLocalMaxNumRowEntries()*maxAggSize,numNodes)); - - size_t numNewDrops=0; - size_t numOldDrops=0; - size_t numFixedDiags=0; - size_t numSymDrops = 0; - - for(LO i=0; iIsRoot(nodesInAgg.nodes_h(k))) { - root_node = nodesInAgg.nodes_h(k); break; - } + } + } // end nodesInAgg.unaggregated.extent(0); + + std::vector badCount(numAggs, 0); + + // Find the biggest aggregate size in *nodes* + LO maxAggSize = 0; + for (LO i = 0; i < numAggs; i++) + maxAggSize = + std::max(maxAggSize, nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i)); + + // Loop over all the aggregates + std::vector goodAggNeighbors(G.getLocalMaxNumRowEntries()); + std::vector badAggNeighbors( + std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes)); + + size_t numNewDrops = 0; + size_t numOldDrops = 0; + size_t numFixedDiags = 0; + size_t numSymDrops = 0; + + for (LO i = 0; i < numAggs; i++) { + LO numNodesInAggregate = nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i); + if (numNodesInAggregate == 0) + continue; + + // Find the root *node* + LO root_node = INVALID; + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + if (aggregates->IsRoot(nodesInAgg.nodes_h(k))) { + root_node = nodesInAgg.nodes_h(k); + break; } + } - TEUCHOS_TEST_FOR_EXCEPTION(root_node == INVALID, - Exceptions::RuntimeError,"MueLu::FilteredAFactory::BuildNewUsingRootStencil: Cannot find root node"); - - // Find the list of "good" node neighbors (aka nodes which border the root node in the Graph G) - ArrayView goodNodeNeighbors = G.getNeighborVertices(root_node); - - // Now find the list of "good" aggregate neighbors (aka the aggregates neighbor the root node in the Graph G) - goodAggNeighbors.resize(0); - for(LO k=0; k<(LO) goodNodeNeighbors.size(); k++) { - goodAggNeighbors.push_back(vertex2AggId[goodNodeNeighbors[k]]); - } - sort_and_unique(goodAggNeighbors); - - // Now we get the list of "bad" aggregate neighbors (aka aggregates which border the - // root node in the original matrix A, which are not goodNodeNeighbors). Since we - // don't have an amalgamated version of the original matrix, we use the matrix directly - badAggNeighbors.resize(0); - for(LO j = 0; j < (LO)blkSize; j++) { - LO row = amalgInfo->ComputeLocalDOF(root_node,j); - if (row >= (LO)numRows) continue; - A.getLocalRowView(row, indsA, valsA); - for(LO k=0; k<(LO)indsA.size(); k++) { - if ( (indsA[k] < (LO)numRows) && (TST::magnitude(valsA[k]) != TST::magnitude(ZERO))) { - LO node = amalgInfo->ComputeLocalNode(indsA[k]); - LO agg = vertex2AggId[node]; - if(!std::binary_search(goodAggNeighbors.begin(),goodAggNeighbors.end(),agg)) - badAggNeighbors.push_back(agg); - } - } - } - sort_and_unique(badAggNeighbors); + TEUCHOS_TEST_FOR_EXCEPTION( + root_node == INVALID, Exceptions::RuntimeError, + "MueLu::FilteredAFactory::BuildNewUsingRootStencil: Cannot find root " + "node"); - // Go through the filtered graph and count the number of connections to the badAggNeighbors - // if there are 2 or more of these connections, remove them from the bad list. + // Find the list of "good" node neighbors (aka nodes which border the root + // node in the Graph G) + ArrayView goodNodeNeighbors = G.getNeighborVertices(root_node); - for (LO k=nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i+1); k++) { - ArrayView nodeNeighbors = G.getNeighborVertices(k); - for (LO kk=0; kk < nodeNeighbors.size(); kk++) { - if ( (vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs)) - (badCount[vertex2AggId[nodeNeighbors[kk]]])++; + // Now find the list of "good" aggregate neighbors (aka the aggregates + // neighbor the root node in the Graph G) + goodAggNeighbors.resize(0); + for (LO k = 0; k < (LO)goodNodeNeighbors.size(); k++) { + goodAggNeighbors.push_back(vertex2AggId[goodNodeNeighbors[k]]); + } + sort_and_unique(goodAggNeighbors); + + // Now we get the list of "bad" aggregate neighbors (aka aggregates which + // border the root node in the original matrix A, which are not + // goodNodeNeighbors). Since we don't have an amalgamated version of the + // original matrix, we use the matrix directly + badAggNeighbors.resize(0); + for (LO j = 0; j < (LO)blkSize; j++) { + LO row = amalgInfo->ComputeLocalDOF(root_node, j); + if (row >= (LO)numRows) + continue; + A.getLocalRowView(row, indsA, valsA); + for (LO k = 0; k < (LO)indsA.size(); k++) { + if ((indsA[k] < (LO)numRows) && + (TST::magnitude(valsA[k]) != TST::magnitude(ZERO))) { + LO node = amalgInfo->ComputeLocalNode(indsA[k]); + LO agg = vertex2AggId[node]; + if (!std::binary_search(goodAggNeighbors.begin(), + goodAggNeighbors.end(), agg)) + badAggNeighbors.push_back(agg); } } - std::vector reallyBadAggNeighbors(std::min(G.getLocalMaxNumRowEntries()*maxAggSize,numNodes)); - reallyBadAggNeighbors.resize(0); - for (LO k=0; k < (LO) badAggNeighbors.size(); k++) { - if (badCount[badAggNeighbors[k]] <= 1 ) reallyBadAggNeighbors.push_back(badAggNeighbors[k]); + } + sort_and_unique(badAggNeighbors); + + // Go through the filtered graph and count the number of connections to the + // badAggNeighbors if there are 2 or more of these connections, remove them + // from the bad list. + + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + ArrayView nodeNeighbors = G.getNeighborVertices(k); + for (LO kk = 0; kk < nodeNeighbors.size(); kk++) { + if ((vertex2AggId[nodeNeighbors[kk]] >= 0) && + (vertex2AggId[nodeNeighbors[kk]] < numAggs)) + (badCount[vertex2AggId[nodeNeighbors[kk]]])++; } - for (LO k=nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i+1); k++) { - ArrayView nodeNeighbors = G.getNeighborVertices(k); - for (LO kk=0; kk < nodeNeighbors.size(); kk++) { - if ( (vertex2AggId[nodeNeighbors[kk]] >= 0) && (vertex2AggId[nodeNeighbors[kk]] < numAggs)) - badCount[vertex2AggId[nodeNeighbors[kk]]] = 0; - } + } + std::vector reallyBadAggNeighbors( + std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes)); + reallyBadAggNeighbors.resize(0); + for (LO k = 0; k < (LO)badAggNeighbors.size(); k++) { + if (badCount[badAggNeighbors[k]] <= 1) + reallyBadAggNeighbors.push_back(badAggNeighbors[k]); + } + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + ArrayView nodeNeighbors = G.getNeighborVertices(k); + for (LO kk = 0; kk < nodeNeighbors.size(); kk++) { + if ((vertex2AggId[nodeNeighbors[kk]] >= 0) && + (vertex2AggId[nodeNeighbors[kk]] < numAggs)) + badCount[vertex2AggId[nodeNeighbors[kk]]] = 0; } + } - // For each of the reallyBadAggNeighbors, we go and blitz their connections to dofs in this aggregate. - // We remove the INVALID marker when we do this so we don't wind up doubling this up later - for(LO b=0; b<(LO)reallyBadAggNeighbors.size(); b++) { - LO bad_agg = reallyBadAggNeighbors[b]; - for (LO k=nodesInAgg.ptr_h(bad_agg); k < nodesInAgg.ptr_h(bad_agg+1); k++) { - LO bad_node = nodesInAgg.nodes_h(k); - for(LO j = 0; j < (LO)blkSize; j++) { - LO bad_row = amalgInfo->ComputeLocalDOF(bad_node,j); - if (bad_row >= (LO)numRows) continue; - size_t index_start = rowptr[bad_row]; - A.getLocalRowView(bad_row, indsA, valsA); - for(LO l = 0; l < (LO)indsA.size(); l++) { - if(indsA[l] < (LO)numRows && vertex2AggId[amalgInfo->ComputeLocalNode(indsA[l])] == i && vals_dropped_indicator[index_start+l] == false) { - vals_dropped_indicator[index_start + l] = true; - vals[index_start + l] = ZERO; - diagExtra[bad_row] += valsA[l]; - numSymDrops++; - } + // For each of the reallyBadAggNeighbors, we go and blitz their connections + // to dofs in this aggregate. We remove the INVALID marker when we do this + // so we don't wind up doubling this up later + for (LO b = 0; b < (LO)reallyBadAggNeighbors.size(); b++) { + LO bad_agg = reallyBadAggNeighbors[b]; + for (LO k = nodesInAgg.ptr_h(bad_agg); k < nodesInAgg.ptr_h(bad_agg + 1); + k++) { + LO bad_node = nodesInAgg.nodes_h(k); + for (LO j = 0; j < (LO)blkSize; j++) { + LO bad_row = amalgInfo->ComputeLocalDOF(bad_node, j); + if (bad_row >= (LO)numRows) + continue; + size_t index_start = rowptr[bad_row]; + A.getLocalRowView(bad_row, indsA, valsA); + for (LO l = 0; l < (LO)indsA.size(); l++) { + if (indsA[l] < (LO)numRows && + vertex2AggId[amalgInfo->ComputeLocalNode(indsA[l])] == i && + vals_dropped_indicator[index_start + l] == false) { + vals_dropped_indicator[index_start + l] = true; + vals[index_start + l] = ZERO; + diagExtra[bad_row] += valsA[l]; + numSymDrops++; } } } } + } - // Now lets fill the rows in this aggregate and figure out the diagonal lumping - // We loop over each node in the aggregate and then over the neighbors of that node - - for(LO k=nodesInAgg.ptr_h(i); k indsG = G.getNeighborVertices(row_node); - for (size_t j = 0; j < as(indsG.size()); j++) - filter[indsG[j]]=true; - - for (LO m = 0; m < (LO)blkSize; m++) { - LO row = amalgInfo->ComputeLocalDOF(row_node,m); - if (row >= (LO)numRows) continue; - size_t index_start = rowptr[row]; - A.getLocalRowView(row, indsA, valsA); - - for(LO l = 0; l < (LO)indsA.size(); l++) { - int col_node = amalgInfo->ComputeLocalNode(indsA[l]); - bool is_good = filter[col_node]; - if (indsA[l] == row) { - diagIndex[row] = l; - vals[index_start + l] = valsA[l]; - continue; - } + // Now lets fill the rows in this aggregate and figure out the diagonal + // lumping We loop over each node in the aggregate and then over the + // neighbors of that node - // If we've already dropped this guy (from symmetry above), then continue onward - if(vals_dropped_indicator[index_start +l] == true) { - if(is_good) numOldDrops++; - else numNewDrops++; - continue; - } + for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) { + LO row_node = nodesInAgg.nodes_h(k); + // Set up filtering array + ArrayView indsG = G.getNeighborVertices(row_node); + for (size_t j = 0; j < as(indsG.size()); j++) + filter[indsG[j]] = true; - // FIXME: I'm assuming vertex2AggId is only length of the rowmap, so - // we won'd do secondary dropping on off-processor neighbors - if(is_good && indsA[l] < (LO)numRows) { - int agg = vertex2AggId[col_node]; - if(std::binary_search(reallyBadAggNeighbors.begin(),reallyBadAggNeighbors.end(),agg)) - is_good = false; - } + for (LO m = 0; m < (LO)blkSize; m++) { + LO row = amalgInfo->ComputeLocalDOF(row_node, m); + if (row >= (LO)numRows) + continue; + size_t index_start = rowptr[row]; + A.getLocalRowView(row, indsA, valsA); - if(is_good){ - vals[index_start+l] = valsA[l]; - } - else { - if(!filter[col_node]) numOldDrops++; - else numNewDrops++; - diagExtra[row] += valsA[l]; - vals[index_start+l]=ZERO; - vals_dropped_indicator[index_start+l]=true; - } - } //end for l "indsA.size()" loop - - }//end m "blkSize" loop - - // Clear filtering array - for (size_t j = 0; j < as(indsG.size()); j++) - filter[indsG[j]]=false; - - }// end k loop over number of nodes in this agg - }//end i loop over numAggs - - if (!use_spread_lumping) { - // Now do the diagonal modifications in one, final pass - for(LO row=0; row <(LO)numRows; row++) { - if (diagIndex[row] != INVALID) { - size_t index_start = rowptr[row]; - size_t diagIndexInMatrix = index_start + diagIndex[row]; - // printf("diag_vals pre update = %8.2e\n", vals[diagIndex] ); - vals[diagIndexInMatrix] += diagExtra[row]; - SC A_rowsum=ZERO, A_absrowsum = ZERO, F_rowsum = ZERO; - - - if( (dirichletThresh >= 0.0 && TST::real(vals[diagIndexInMatrix]) <= dirichletThresh) || TST::real(vals[diagIndexInMatrix]) == ZERO) { - - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING>0) { - A.getLocalRowView(row, indsA, valsA); - // SC diagA = valsA[diagIndex[row]]; - // printf("WARNING: row %d (diagIndex=%d) diag(Afiltered) = %8.2e diag(A)=%8.2e numInds = %d\n",row,diagIndex[row],vals[diagIndexInMatrix],diagA,(LO)indsA.size()); - - for(LO l = 0; l < (LO)indsA.size(); l++) { - A_rowsum += valsA[l]; - A_absrowsum+=std::abs(valsA[l]); - } - for(LO l = 0; l < (LO)indsA.size(); l++) - F_rowsum += vals[index_start+l]; - // printf(" : A rowsum = %8.2e |A| rowsum = %8.2e rowsum = %8.2e\n",A_rowsum,A_absrowsum,F_rowsum); - if(MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING > 1){ - // printf(" Avals ="); - // for(LO l = 0; l < (LO)indsA.size(); l++) - // printf("%d(%8.2e)[%d] ",(LO)indsA[l],valsA[l],(LO)l); - // printf("\n"); - // printf(" Fvals ="); - // for(LO l = 0; l < (LO)indsA.size(); l++) - // if(vals[index_start+l] != ZERO) - // printf("%d(%8.2e)[%d] ",(LO)indsA[l],vals[index_start+l],(LO)l); - } - } - // Don't know what to do, so blitz the row and dump a one on the diagonal - for(size_t l=rowptr[row]; lComputeLocalNode(indsA[l]); + bool is_good = filter[col_node]; + if (indsA[l] == row) { + diagIndex[row] = l; + vals[index_start + l] = valsA[l]; + continue; } - } - else { - GetOStream(Runtime0)<<"WARNING: Row "<getComm(), numNewDrops, g_newDrops); - MueLu_sumAll(A.getRowMap()->getComm(), numOldDrops, g_oldDrops); - MueLu_sumAll(A.getRowMap()->getComm(), numFixedDiags, g_fixedDiags); - GetOStream(Runtime0)<< "Filtering out "< - void FilteredAFactory:: - ExperimentalLumping(const Matrix& A, Matrix& filteredA, double irho, double irho2) const { - using TST = typename Teuchos::ScalarTraits; - SC zero = TST::zero(); - SC one = TST::one(); - - ArrayView inds; - ArrayView vals; - ArrayView finds; - ArrayView fvals; - - SC PosOffSum, NegOffSum, PosOffDropSum, NegOffDropSum; - SC diag, gamma, alpha; - LO NumPosKept, NumNegKept; - - SC noLumpDdom; - SC numer,denom; - SC PosFilteredSum, NegFilteredSum; - SC Target; - - SC rho = as(irho); - SC rho2 = as(irho2); - - for (LO row = 0; row < (LO) A.getRowMap()->getLocalNumElements(); row++) { - noLumpDdom = as(10000.0); // only used if diagonal is zero - // the whole idea sort of breaks down - // when the diagonal is zero. In particular, - // the old diag dominance ratio is infinity - // ... so what do we want for the new ddom - // ratio. Do we want to allow the diagonal - // to go negative, just to have a better ddom - // ratio? This current choice essentially - // changes 'Target' to a large number - // meaning that we will allow the new - // ddom number to be fairly large (because - // the old one was infinity) - - ArrayView tvals; - A.getLocalRowView(row, inds, vals); - size_t nnz = inds.size(); - if (nnz == 0) continue; - filteredA.getLocalRowView(row, finds, tvals);//assume 2 getLocalRowView()s - // have things in same order - fvals = ArrayView(const_cast(tvals.getRawPtr()), nnz); - - LO diagIndex = -1, fdiagIndex = -1; - - PosOffSum=zero; NegOffSum=zero; PosOffDropSum=zero; NegOffDropSum=zero; - diag=zero; NumPosKept=0; NumNegKept=0; - - // first record diagonal, offdiagonal sums and off diag dropped sums - for (size_t j = 0; j < nnz; j++) { - if (inds[j] == row) { - diagIndex = j; - diag = vals[j]; - } - else { // offdiagonal - if (TST::real(vals[j]) > TST::real(zero) ) PosOffSum += vals[j]; - else NegOffSum += vals[j]; + // If we've already dropped this guy (from symmetry above), then + // continue onward + if (vals_dropped_indicator[index_start + l] == true) { + if (is_good) + numOldDrops++; + else + numNewDrops++; + continue; } - } - PosOffDropSum = PosOffSum; - NegOffDropSum = NegOffSum; - NumPosKept = 0; - NumNegKept = 0; - LO j = 0; - for (size_t jj = 0; jj < (size_t) finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if (finds[jj] == row) fdiagIndex = jj; - else { - if (TST::real(vals[j]) > TST::real(zero) ) { - PosOffDropSum -= fvals[jj]; - if (TST::real(fvals[jj]) != TST::real(zero) ) NumPosKept++; - } - else { - NegOffDropSum -= fvals[jj]; - if (TST::real(fvals[jj]) != TST::real(zero) ) NumNegKept++; - } + + // FIXME: I'm assuming vertex2AggId is only length of the rowmap, so + // we won'd do secondary dropping on off-processor neighbors + if (is_good && indsA[l] < (LO)numRows) { + int agg = vertex2AggId[col_node]; + if (std::binary_search(reallyBadAggNeighbors.begin(), + reallyBadAggNeighbors.end(), agg)) + is_good = false; } - } - // measure of diagonal dominance if no lumping is done. - if (TST::magnitude(diag) != TST::magnitude(zero) ) - noLumpDdom = (PosOffSum - NegOffSum)/diag; + if (is_good) { + vals[index_start + l] = valsA[l]; + } else { + if (!filter[col_node]) + numOldDrops++; + else + numNewDrops++; + diagExtra[row] += valsA[l]; + vals[index_start + l] = ZERO; + vals_dropped_indicator[index_start + l] = true; + } + } // end for l "indsA.size()" loop - // Target is an acceptable diagonal dominance ratio - // which should really be larger than 1 + } // end m "blkSize" loop - Target = rho*noLumpDdom; - if (TST::magnitude(Target) <= TST::magnitude(rho)) Target = rho2; + // Clear filtering array + for (size_t j = 0; j < as(indsG.size()); j++) + filter[indsG[j]] = false; - PosFilteredSum = PosOffSum - PosOffDropSum; - NegFilteredSum = NegOffSum - NegOffDropSum; - // Note: PosNotFilterdSum is not equal to the sum of the - // positive entries after lumping. It just reflects the - // pos offdiag sum of the filtered matrix before lumping - // and does not account for negative dropped terms lumped - // to the positive kept terms. + } // end k loop over number of nodes in this agg + } // end i loop over numAggs - // dropped positive offdiags always go to the diagonal as these - // always improve diagonal dominance. + if (!use_spread_lumping) { + // Now do the diagonal modifications in one, final pass + for (LO row = 0; row < (LO)numRows; row++) { + if (diagIndex[row] != INVALID) { + size_t index_start = rowptr[row]; + size_t diagIndexInMatrix = index_start + diagIndex[row]; + // printf("diag_vals pre update = %8.2e\n", vals[diagIndex] ); + vals[diagIndexInMatrix] += diagExtra[row]; + SC A_rowsum = ZERO, A_absrowsum = ZERO, F_rowsum = ZERO; + + if ((dirichletThresh >= 0.0 && + TST::real(vals[diagIndexInMatrix]) <= dirichletThresh) || + TST::real(vals[diagIndexInMatrix]) == ZERO) { + + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING > 0) { + A.getLocalRowView(row, indsA, valsA); + // SC diagA = valsA[diagIndex[row]]; + // printf("WARNING: row %d (diagIndex=%d) diag(Afiltered) + // = %8.2e diag(A)=%8.2e numInds = + // %d\n",row,diagIndex[row],vals[diagIndexInMatrix],diagA,(LO)indsA.size()); + + for (LO l = 0; l < (LO)indsA.size(); l++) { + A_rowsum += valsA[l]; + A_absrowsum += std::abs(valsA[l]); + } + for (LO l = 0; l < (LO)indsA.size(); l++) + F_rowsum += vals[index_start + l]; + // printf(" : A rowsum = %8.2e |A| rowsum = + // %8.2e rowsum = + // %8.2e\n",A_rowsum,A_absrowsum,F_rowsum); + if (MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING > 1) { + // printf(" Avals ="); + // for(LO l = 0; l < (LO)indsA.size(); l++) + // printf("%d(%8.2e)[%d] + // ",(LO)indsA[l],valsA[l],(LO)l); + // printf("\n"); + // printf(" Fvals ="); + // for(LO l = 0; l < (LO)indsA.size(); l++) + // if(vals[index_start+l] != ZERO) + // printf("%d(%8.2e)[%d] + // ",(LO)indsA[l],vals[index_start+l],(LO)l); + } + } + // Don't know what to do, so blitz the row and dump a one on the + // diagonal + for (size_t l = rowptr[row]; l < rowptr[row + 1]; l++) { + vals[l] = ZERO; + } + vals[diagIndexInMatrix] = TST::one(); + numFixedDiags++; + } + } else { + GetOStream(Runtime0) + << "WARNING: Row " << row << " has no diagonal " << std::endl; + } + } /*end row "numRows" loop"*/ + } - diag += PosOffDropSum; + // Copy all the goop out + for (LO row = 0; row < (LO)numRows; row++) { + filteredA.replaceLocalValues( + row, inds(rowptr[row], rowptr[row + 1] - rowptr[row]), + vals(rowptr[row], rowptr[row + 1] - rowptr[row])); + } + if (use_spread_lumping) + ExperimentalLumping(A, filteredA, DdomAllowGrowthRate, DdomCap); + + size_t g_newDrops = 0, g_oldDrops = 0, g_fixedDiags = 0; + + MueLu_sumAll(A.getRowMap()->getComm(), numNewDrops, g_newDrops); + MueLu_sumAll(A.getRowMap()->getComm(), numOldDrops, g_oldDrops); + MueLu_sumAll(A.getRowMap()->getComm(), numFixedDiags, g_fixedDiags); + GetOStream(Runtime0) << "Filtering out " << g_newDrops + << " edges, in addition to the " << g_oldDrops + << " edges dropped earlier" << std::endl; + GetOStream(Runtime0) << "Fixing " << g_fixedDiags << " zero diagonal values" + << std::endl; +} + +// fancy lumping trying to not just move everything to the diagonal but to also +// consider moving some lumping to the kept off-diagonals. We basically aim to +// not increase the diagonal dominance in a row. In particular, the goal is that +// row i satisfies +// +// lumpedDiagDomMeasure_i <= rho2 +// or +// lumpedDiagDomMeasure <= rho*unlumpedDiagDomMeasure +// +// NOTE: THIS CODE assumes direct access to a row. See comments above concerning +// ASSUME_DIRECT_ACCESS_TO_ROW +// +template +void FilteredAFactory::ExperimentalLumping(const Matrix &A, + Matrix &filteredA, double irho, + double irho2) const { + using TST = typename Teuchos::ScalarTraits; + SC zero = TST::zero(); + SC one = TST::one(); + + ArrayView inds; + ArrayView vals; + ArrayView finds; + ArrayView fvals; + + SC PosOffSum, NegOffSum, PosOffDropSum, NegOffDropSum; + SC diag, gamma, alpha; + LO NumPosKept, NumNegKept; + + SC noLumpDdom; + SC numer, denom; + SC PosFilteredSum, NegFilteredSum; + SC Target; + + SC rho = as(irho); + SC rho2 = as(irho2); + + for (LO row = 0; row < (LO)A.getRowMap()->getLocalNumElements(); row++) { + noLumpDdom = + as(10000.0); // only used if diagonal is zero + // the whole idea sort of breaks down + // when the diagonal is zero. In particular, + // the old diag dominance ratio is infinity + // ... so what do we want for the new ddom + // ratio. Do we want to allow the diagonal + // to go negative, just to have a better ddom + // ratio? This current choice essentially + // changes 'Target' to a large number + // meaning that we will allow the new + // ddom number to be fairly large (because + // the old one was infinity) + + ArrayView tvals; + A.getLocalRowView(row, inds, vals); + size_t nnz = inds.size(); + if (nnz == 0) + continue; + filteredA.getLocalRowView(row, finds, tvals); // assume 2 getLocalRowView()s + // have things in same order + fvals = ArrayView(const_cast(tvals.getRawPtr()), nnz); + + LO diagIndex = -1, fdiagIndex = -1; + + PosOffSum = zero; + NegOffSum = zero; + PosOffDropSum = zero; + NegOffDropSum = zero; + diag = zero; + NumPosKept = 0; + NumNegKept = 0; + + // first record diagonal, offdiagonal sums and off diag dropped sums + for (size_t j = 0; j < nnz; j++) { + if (inds[j] == row) { + diagIndex = j; + diag = vals[j]; + } else { // offdiagonal + if (TST::real(vals[j]) > TST::real(zero)) + PosOffSum += vals[j]; + else + NegOffSum += vals[j]; + } + } + PosOffDropSum = PosOffSum; + NegOffDropSum = NegOffSum; + NumPosKept = 0; + NumNegKept = 0; + LO j = 0; + for (size_t jj = 0; jj < (size_t)finds.size(); jj++) { + while (inds[j] != finds[jj]) + j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if (finds[jj] == row) + fdiagIndex = jj; + else { + if (TST::real(vals[j]) > TST::real(zero)) { + PosOffDropSum -= fvals[jj]; + if (TST::real(fvals[jj]) != TST::real(zero)) + NumPosKept++; + } else { + NegOffDropSum -= fvals[jj]; + if (TST::real(fvals[jj]) != TST::real(zero)) + NumNegKept++; + } + } + } - // now lets work on lumping dropped negative offdiags - gamma = -NegOffDropSum - PosFilteredSum; + // measure of diagonal dominance if no lumping is done. + if (TST::magnitude(diag) != TST::magnitude(zero)) + noLumpDdom = (PosOffSum - NegOffSum) / diag; + + // Target is an acceptable diagonal dominance ratio + // which should really be larger than 1 + + Target = rho * noLumpDdom; + if (TST::magnitude(Target) <= TST::magnitude(rho)) + Target = rho2; + + PosFilteredSum = PosOffSum - PosOffDropSum; + NegFilteredSum = NegOffSum - NegOffDropSum; + // Note: PosNotFilterdSum is not equal to the sum of the + // positive entries after lumping. It just reflects the + // pos offdiag sum of the filtered matrix before lumping + // and does not account for negative dropped terms lumped + // to the positive kept terms. + + // dropped positive offdiags always go to the diagonal as these + // always improve diagonal dominance. + + diag += PosOffDropSum; + + // now lets work on lumping dropped negative offdiags + gamma = -NegOffDropSum - PosFilteredSum; + + if (TST::real(gamma) < TST::real(zero)) { + // the total amount of negative dropping is less than PosFilteredSum, + // so we can distribute this dropping to pos offdiags. After lumping + // the sum of the pos offdiags is just -gamma so we just assign pos + // offdiags proportional to vals[j]/PosFilteredSum + // Note: in this case the diagonal is not changed as all lumping + // occurs to the pos offdiags + + if (fdiagIndex != -1) + fvals[fdiagIndex] = diag; + j = 0; + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) + j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((j != diagIndex) && (TST::real(vals[j]) > TST::real(zero)) && + (TST::magnitude(fvals[jj]) != TST::magnitude(zero))) + fvals[jj] = -gamma * (vals[j] / PosFilteredSum); + } + } else { + // So there are more negative values that need lumping than kept + // positive offdiags. Meaning there is enough negative lumping to + // completely clear out all pos offdiags. If we lump all negs + // to pos off diags, we'd actually change them to negative. We + // only do this if we are desperate. Otherwise, we'll clear out + // all the positive kept offdiags and try to lump the rest + // somewhere else. We defer the clearing of pos off diags + // to see first if we are going to be desperate. + + bool flipPosOffDiagsToNeg = false; + + // Even if we lumped by zeroing positive offdiags, we are still + // going to have more lumping to distribute to either + // 1) the diagonal + // 2) the kept negative offdiags + // 3) the kept positive offdiags (desperate) + + // Let's first considering lumping the remaining neg offdiag stuff + // to the diagonal ... if this does not increase the diagonal + // dominance ratio too much (given by rho). + + if ((TST::real(diag) > TST::real(gamma)) && + (TST::real((-NegFilteredSum) / (diag - gamma)) <= + TST::real(Target))) { + // 1st if term above insures that resulting diagonal (=diag-gamma) + // is positive. . The left side of 2nd term is the diagonal dominance + // if we lump the remaining stuff (gamma) to the diagonal. Recall, + // that now there are no positive off-diags so the sum(abs(offdiags)) + // is just the negative of NegFilteredSum + + if (fdiagIndex != -1) + fvals[fdiagIndex] = diag - gamma; + } else if (NumNegKept > 0) { + // need to do some lumping to neg offdiags to avoid a large + // increase in diagonal dominance. We first compute alpha + // which measures how much gamma should go to the + // negative offdiags. The rest will go to the diagonal + + numer = -NegFilteredSum - Target * (diag - gamma); + denom = gamma * (Target - TST::one()); + + // make sure that alpha is between 0 and 1 ... and that it doesn't + // result in a sign flip + // Note: when alpha is set to 1, then the diagonal is not modified + // and the negative offdiags just get shifted from those + // removed and those kept, meaning that the digaonal dominance + // should be the same as before + // + // can alpha be negative? It looks like denom should always + // be positive. The 'if' statement above + // Normally, diag-gamma should also be positive (but if it + // is negative then numer is guaranteed to be positve). + // look at the 'if' above, + // if (( TST::real(diag) > TST::real(gamma)) && + // ( TST::real((-NegFilteredSum)/(diag - gamma)) <= + // TST::real(Target))) { + // + // Should guarantee that numer is positive. This is obvious + // when the second condition is false. When it is the first + // condition that is false, it follows that the two indiviudal + // terms in the numer formula must be positive. + + if (TST::magnitude(denom) < TST::magnitude(numer)) + alpha = TST::one(); + else + alpha = numer / denom; + if (TST::real(alpha) < TST::real(zero)) + alpha = zero; + if (TST::real(diag) < TST::real((one - alpha) * gamma)) + alpha = TST::one(); + + // first change the diagonal + + if (fdiagIndex != -1) + fvals[fdiagIndex] = diag - (one - alpha) * gamma; + + // after lumping the sum of neg offdiags will be NegFilteredSum + // + alpha*gamma. That is the remaining negative entries altered + // by the percent (=alpha) of stuff (=gamma) that needs to be + // lumped after taking into account lumping to pos offdiags + + // Do this by assigning a fraction of NegFilteredSum+alpha*gamma + // proportional to vals[j]/NegFilteredSum + + SC temp = (NegFilteredSum + alpha * gamma) / NegFilteredSum; + j = 0; + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) + j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((jj != fdiagIndex) && + (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) && + (TST::real(vals[j]) < TST::real(zero))) + fvals[jj] = temp * vals[j]; + } + } else { // desperate case + // So we don't have any kept negative offdiags ... - if (TST::real(gamma) < TST::real(zero) ) { - // the total amount of negative dropping is less than PosFilteredSum, - // so we can distribute this dropping to pos offdiags. After lumping - // the sum of the pos offdiags is just -gamma so we just assign pos - // offdiags proportional to vals[j]/PosFilteredSum - // Note: in this case the diagonal is not changed as all lumping - // occurs to the pos offdiags + if (NumPosKept > 0) { + // luckily we can push this stuff to the pos offdiags + // which now makes them negative + flipPosOffDiagsToNeg = true; - if (fdiagIndex != -1) fvals[fdiagIndex] = diag; j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ((j != diagIndex)&&(TST::real(vals[j]) > TST::real(zero) ) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero))) - fvals[jj] = -gamma*(vals[j]/PosFilteredSum); - + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) + j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((j != diagIndex) && + (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) && + (TST::real(vals[j]) > TST::real(zero))) + fvals[jj] = -gamma / ((SC)NumPosKept); } } - else { - // So there are more negative values that need lumping than kept - // positive offdiags. Meaning there is enough negative lumping to - // completely clear out all pos offdiags. If we lump all negs - // to pos off diags, we'd actually change them to negative. We - // only do this if we are desperate. Otherwise, we'll clear out - // all the positive kept offdiags and try to lump the rest - // somewhere else. We defer the clearing of pos off diags - // to see first if we are going to be desperate. - - bool flipPosOffDiagsToNeg = false; - - // Even if we lumped by zeroing positive offdiags, we are still - // going to have more lumping to distribute to either - // 1) the diagonal - // 2) the kept negative offdiags - // 3) the kept positive offdiags (desperate) - - // Let's first considering lumping the remaining neg offdiag stuff - // to the diagonal ... if this does not increase the diagonal - // dominance ratio too much (given by rho). - - if (( TST::real(diag) > TST::real(gamma)) && - ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) { - // 1st if term above insures that resulting diagonal (=diag-gamma) - // is positive. . The left side of 2nd term is the diagonal dominance - // if we lump the remaining stuff (gamma) to the diagonal. Recall, - // that now there are no positive off-diags so the sum(abs(offdiags)) - // is just the negative of NegFilteredSum - - if (fdiagIndex != -1) fvals[fdiagIndex] = diag - gamma; - } - else if (NumNegKept > 0) { - // need to do some lumping to neg offdiags to avoid a large - // increase in diagonal dominance. We first compute alpha - // which measures how much gamma should go to the - // negative offdiags. The rest will go to the diagonal - - numer = -NegFilteredSum - Target*(diag-gamma); - denom = gamma*(Target - TST::one()); - - // make sure that alpha is between 0 and 1 ... and that it doesn't - // result in a sign flip - // Note: when alpha is set to 1, then the diagonal is not modified - // and the negative offdiags just get shifted from those - // removed and those kept, meaning that the digaonal dominance - // should be the same as before - // - // can alpha be negative? It looks like denom should always - // be positive. The 'if' statement above - // Normally, diag-gamma should also be positive (but if it - // is negative then numer is guaranteed to be positve). - // look at the 'if' above, - // if (( TST::real(diag) > TST::real(gamma)) && - // ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) { - // - // Should guarantee that numer is positive. This is obvious when - // the second condition is false. When it is the first condition that - // is false, it follows that the two indiviudal terms in the numer - // formula must be positive. - - if ( TST::magnitude(denom) < TST::magnitude(numer) ) alpha = TST::one(); - else alpha = numer/denom; - if ( TST::real(alpha) < TST::real(zero)) alpha = zero; - if ( TST::real(diag) < TST::real((one-alpha)*gamma) ) alpha = TST::one(); - - // first change the diagonal - - if (fdiagIndex != -1) fvals[fdiagIndex] = diag - (one-alpha)*gamma; - - // after lumping the sum of neg offdiags will be NegFilteredSum - // + alpha*gamma. That is the remaining negative entries altered - // by the percent (=alpha) of stuff (=gamma) that needs to be - // lumped after taking into account lumping to pos offdiags - - // Do this by assigning a fraction of NegFilteredSum+alpha*gamma - // proportional to vals[j]/NegFilteredSum - - SC temp = (NegFilteredSum+alpha*gamma)/NegFilteredSum; - j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ( (jj != fdiagIndex)&&(TST::magnitude(fvals[jj]) != TST::magnitude(zero) ) && - ( TST::real(vals[j]) < TST::real(zero) ) ) - fvals[jj] = temp*vals[j]; - } - } - else { // desperate case - // So we don't have any kept negative offdiags ... - - if (NumPosKept > 0) { - // luckily we can push this stuff to the pos offdiags - // which now makes them negative - flipPosOffDiagsToNeg = true; - - j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ( (j != diagIndex)&&(TST::magnitude(fvals[jj]) != TST::magnitude(zero) ) && - (TST::real(vals[j]) > TST::real(zero) )) - fvals[jj] = -gamma/( (SC) NumPosKept); - } - } - // else abandon rowsum preservation and do nothing - - } - if (!flipPosOffDiagsToNeg) { // not desperate so we now zero out - // all pos terms including some - // not originally filtered - // but zeroed due to lumping - j = 0; - for(LO jj = 0; jj < (LO)finds.size(); jj++) { - while( inds[j] != finds[jj] ) j++; // assumes that finds is in the same order as - // inds ... but perhaps has some entries missing - if ((jj != fdiagIndex)&& (TST::real(vals[j]) > TST::real(zero))) fvals[jj] = zero; - } - } - } // positive gamma else - - } //loop over all rows - } + // else abandon rowsum preservation and do nothing + } + if (!flipPosOffDiagsToNeg) { // not desperate so we now zero out + // all pos terms including some + // not originally filtered + // but zeroed due to lumping + j = 0; + for (LO jj = 0; jj < (LO)finds.size(); jj++) { + while (inds[j] != finds[jj]) + j++; // assumes that finds is in the same order as + // inds ... but perhaps has some entries missing + if ((jj != fdiagIndex) && (TST::real(vals[j]) > TST::real(zero))) + fvals[jj] = zero; + } + } + } // positive gamma else + } // loop over all rows +} -} //namespace MueLu +} // namespace MueLu #endif // MUELU_FILTEREDAFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp index 32b5389b0e1d..ffdd5c1e9a61 100644 --- a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_decl.hpp @@ -47,7 +47,6 @@ #ifndef PACKAGES_MUELU_SRC_MISC_MUELU_FINELEVELINPUTDATAFACTORY_DECL_HPP_ #define PACKAGES_MUELU_SRC_MISC_MUELU_FINELEVELINPUTDATAFACTORY_DECL_HPP_ - #include #include "MueLu_ConfigDefs.hpp" @@ -57,68 +56,67 @@ #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_Aggregates_fwd.hpp" -#include "MueLu_SmootherPrototype_fwd.hpp" -#include "MueLu_SmootherBase_fwd.hpp" #include "MueLu_Graph_fwd.hpp" +#include "MueLu_SmootherBase_fwd.hpp" +#include "MueLu_SmootherPrototype_fwd.hpp" namespace MueLuTests { - template - class FineLevelInputDataFactoryTester; +template +class FineLevelInputDataFactoryTester; } namespace MueLu { - /*! - @class FineLevelInputData class. - @brief Factory for piping in input data from the finest level into the MueLu data dependency system - */ - - template - class FineLevelInputDataFactory : public SingleLevelFactoryBase { - friend class MueLuTests::FineLevelInputDataFactoryTester; +/*! + @class FineLevelInputData class. + @brief Factory for piping in input data from the finest level into the MueLu + data dependency system +*/ + +template +class FineLevelInputDataFactory : public SingleLevelFactoryBase { + friend class MueLuTests::FineLevelInputDataFactoryTester; #undef MUELU_FINELEVELINPUTDATAFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - FineLevelInputDataFactory() { } + FineLevelInputDataFactory() {} - //! Destructor. - virtual ~FineLevelInputDataFactory() { } + //! Destructor. + virtual ~FineLevelInputDataFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. - */ - void Build(Level& currentLevel) const; - - //@} - private: + /*! + @brief Build method. + */ + void Build(Level ¤tLevel) const; - void test() const { std::cout << "TEST" << std::endl; } + //@} +private: + void test() const { std::cout << "TEST" << std::endl; } - }; //class FineLevelInputDataFactory +}; // class FineLevelInputDataFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_FINELEVELINPUTDATAFACTORY_SHORT diff --git a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp index a54c59a6bf1f..37d692a5356d 100644 --- a/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_FineLevelInputDataFactory_def.hpp @@ -55,131 +55,150 @@ namespace MueLu { - template - RCP FineLevelInputDataFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - // Variable name (e.g. A or P or Coordinates) - validParamList->set< std::string >("Variable", std::string("A"), "Variable name on all coarse levels (except the finest level)."); - - // Names of generating factories (on finest level and coarse levels) - validParamList->set< RCP >("Fine level factory", Teuchos::null, "Generating factory of the fine level variable"); - validParamList->set< RCP >("Coarse level factory", Teuchos::null, "Generating factory for data on all coarse levels (except the finest)"); - - // Type of variable (see source code for a complete list of all available types) - validParamList->set ("Variable type", std::string("Matrix"), "Type of variable"); - - return validParamList; +template +RCP +FineLevelInputDataFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + // Variable name (e.g. A or P or Coordinates) + validParamList->set( + "Variable", std::string("A"), + "Variable name on all coarse levels (except the finest level)."); + + // Names of generating factories (on finest level and coarse levels) + validParamList->set>( + "Fine level factory", Teuchos::null, + "Generating factory of the fine level variable"); + validParamList->set>( + "Coarse level factory", Teuchos::null, + "Generating factory for data on all coarse levels (except the finest)"); + + // Type of variable (see source code for a complete list of all available + // types) + validParamList->set("Variable type", std::string("Matrix"), + "Type of variable"); + + return validParamList; +} + +template +void FineLevelInputDataFactory::DeclareInput(Level ¤tLevel) const { + + const ParameterList &pL = GetParameterList(); + + std::string variableName = ""; + if (pL.isParameter("Variable")) + variableName = pL.get("Variable"); + + std::string factoryName = "NoFactory"; + if (currentLevel.GetLevelID() == 0) { + factoryName = "Fine level factory"; + } else { + factoryName = "Coarse level factory"; } - template - void FineLevelInputDataFactory::DeclareInput(Level& currentLevel) const { - - const ParameterList & pL = GetParameterList(); - - std::string variableName = ""; - if(pL.isParameter("Variable")) - variableName = pL.get("Variable"); + TEUCHOS_TEST_FOR_EXCEPTION( + variableName == "", MueLu::Exceptions::RuntimeError, + "FineLevelInputDataFactory: no variable name provided. Please set " + "\'Variable\' parameter in your input deck."); - std::string factoryName = "NoFactory"; - if (currentLevel.GetLevelID() == 0) { - factoryName = "Fine level factory"; - } else { - factoryName = "Coarse level factory"; - } - - TEUCHOS_TEST_FOR_EXCEPTION(variableName == "", MueLu::Exceptions::RuntimeError, "FineLevelInputDataFactory: no variable name provided. Please set \'Variable\' parameter in your input deck."); + // data must be specified in factory! (not in factory manager) + RCP fact = GetFactory(factoryName); + currentLevel.DeclareInput(variableName, fact.get(), this); +} - // data must be specified in factory! (not in factory manager) - RCP fact = GetFactory(factoryName); - currentLevel.DeclareInput(variableName, fact.get(), this); - } - - template - void FineLevelInputDataFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "InputUserData", currentLevel); +template +void FineLevelInputDataFactory::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "InputUserData", currentLevel); - const ParameterList& pL = GetParameterList(); + const ParameterList &pL = GetParameterList(); - std::string variableName = ""; - if (pL.isParameter("Variable")) - variableName = pL.get("Variable"); + std::string variableName = ""; + if (pL.isParameter("Variable")) + variableName = pL.get("Variable"); - std::string variableType = ""; - if(pL.isParameter("Variable type")) - variableType = pL.get("Variable type"); + std::string variableType = ""; + if (pL.isParameter("Variable type")) + variableType = pL.get("Variable type"); - std::string factoryName = "NoFactory"; - if (currentLevel.GetLevelID() == 0) { - factoryName = "Fine level factory"; - } else { - factoryName = "Coarse level factory"; - } - RCP fact = GetFactory(factoryName); - - GetOStream(Debug) << "Use " << variableName << " of type " << variableType << " from " << factoryName << "(" << fact.get() << ")" << std::endl; - - // check data type - //std::string strType = currentLevel.GetTypeName(variableName, fact.get()); - if (variableType == "int") { - int data = currentLevel.Get(variableName, fact.get()); + std::string factoryName = "NoFactory"; + if (currentLevel.GetLevelID() == 0) { + factoryName = "Fine level factory"; + } else { + factoryName = "Coarse level factory"; + } + RCP fact = GetFactory(factoryName); + + GetOStream(Debug) << "Use " << variableName << " of type " << variableType + << " from " << factoryName << "(" << fact.get() << ")" + << std::endl; + + // check data type + // std::string strType = currentLevel.GetTypeName(variableName, fact.get()); + if (variableType == "int") { + int data = currentLevel.Get(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType == "double") { + double data = currentLevel.Get(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType == "string") { + std::string data = currentLevel.Get(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else { + size_t npos = std::string::npos; + + if (variableType.find("Aggregates") != npos) { + RCP data = + currentLevel.Get>(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Graph") != npos) { + RCP data = currentLevel.Get>(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("SmootherBase") != npos) { + RCP data = + currentLevel.Get>(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("SmootherPrototype") != npos) { + RCP data = + currentLevel.Get>(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Export") != npos) { + RCP data = + currentLevel.Get>(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Import") != npos) { + RCP data = + currentLevel.Get>(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Map") != npos) { + RCP data = currentLevel.Get>(variableName, fact.get()); + Set(currentLevel, variableName, data); + } else if (variableType.find("Matrix") != npos) { + RCP data = + currentLevel.Get>(variableName, fact.get()); Set(currentLevel, variableName, data); - } else if (variableType == "double") { - double data = currentLevel.Get(variableName, fact.get()); + } else if (variableType.find("MultiVector") != npos) { + RCP data = + currentLevel.Get>(variableName, fact.get()); Set(currentLevel, variableName, data); - } else if (variableType == "string") { - std::string data = currentLevel.Get(variableName, fact.get()); + } else if (variableType.find("Operator") != npos) { + RCP data = + currentLevel.Get>(variableName, fact.get()); Set(currentLevel, variableName, data); } else { - size_t npos = std::string::npos; - - if (variableType.find("Aggregates") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Graph") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("SmootherBase") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("SmootherPrototype") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Export") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Import") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Map") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Matrix") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("MultiVector") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else if (variableType.find("Operator") != npos) { - RCP data = currentLevel.Get >(variableName, fact.get()); - Set(currentLevel, variableName, data); - } - else { - // TAW: is this working with empty procs? - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "FineLevelInputDataFactory: cannot detect type of variable " << variableName << " generated by " << fact.get() << ". User provided type " << variableType ); - } + // TAW: is this working with empty procs? + TEUCHOS_TEST_FOR_EXCEPTION( + true, MueLu::Exceptions::RuntimeError, + "FineLevelInputDataFactory: cannot detect type of variable " + << variableName << " generated by " << fact.get() + << ". User provided type " << variableType); } } +} -} //namespace MueLu +} // namespace MueLu #endif /* PACKAGES_MUELU_SRC_MISC_MUELU_FINELEVELINPUTDATAFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp index 8e28f8fc1c43..5357bb83fcf4 100644 --- a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_decl.hpp @@ -49,65 +49,65 @@ #include "MueLu_ConfigDefs.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "Xpetra_Matrix_fwd.hpp" -#include "Xpetra_Vector_fwd.hpp" #include "Xpetra_VectorFactory_fwd.hpp" +#include "Xpetra_Vector_fwd.hpp" #include "MueLu_InitialBlockNumberFactory_fwd.hpp" - namespace MueLu { /*! @class InitialBlockNumberFactory class. - @brief Class for generating an initial LocalOrdinal-type BlockNumber vector, based on an input paraemter for interleaved dofs. - + @brief Class for generating an initial LocalOrdinal-type BlockNumber vector, + based on an input paraemter for interleaved dofs. + */ - template - class InitialBlockNumberFactory : public SingleLevelFactoryBase { +template +class InitialBlockNumberFactory : public SingleLevelFactoryBase { #undef MUELU_INITIALBLOCKNUMBERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - - /*! @brief Constructor. - */ - InitialBlockNumberFactory() { } +public: + //! @name Constructors/Destructors. - //! Destructor. - virtual ~InitialBlockNumberFactory() { } + /*! @brief Constructor. + */ + InitialBlockNumberFactory() {} - RCP GetValidParameterList() const; + //! Destructor. + virtual ~InitialBlockNumberFactory() {} - //@} + RCP GetValidParameterList() const; - //! @name Input - //@{ + //@} - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + //! @name Input + //@{ - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level ¤tLevel) const; + /*! @brief Specifies the data that this class needs, and the factories that + generate that data. - //@} + If the Build method of this class requires some data, but the generating + factory is not specified in DeclareInput, then this class will fall back to + the settings in FactoryManager. + */ + void DeclareInput(Level ¤tLevel) const; - //! @name Build methods. - //@{ + //@} - //! Build an object with this factory. - void Build(Level & currentLevel) const; + //! @name Build methods. + //@{ - //@} + //! Build an object with this factory. + void Build(Level ¤tLevel) const; - private: + //@} - }; // class InitialBlockNumberFactory +private: +}; // class InitialBlockNumberFactory } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp index 8cfe22245925..8671e5b99188 100644 --- a/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InitialBlockNumberFactory_def.hpp @@ -46,52 +46,61 @@ #ifndef MUELU_INITIALBLOCKNUMBER_FACTORY_DEF_HPP #define MUELU_INITIALBLOCKNUMBER_FACTORY_DEF_HPP -#include "Xpetra_VectorFactory.hpp" +#include "Xpetra_IO.hpp" #include "Xpetra_Map.hpp" #include "Xpetra_Matrix.hpp" -#include "Xpetra_IO.hpp" +#include "Xpetra_VectorFactory.hpp" #include "MueLu_InitialBlockNumberFactory_decl.hpp" -#include "MueLu_MasterList.hpp" #include "MueLu_Level.hpp" +#include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - RCP InitialBlockNumberFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); -#undef SET_VALID_ENTRY +template +RCP +InitialBlockNumberFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); +#undef SET_VALID_ENTRY + + validParamList->set>( + "A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); + return validParamList; +} - return validParamList; - } +template +void InitialBlockNumberFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); +} - template - void InitialBlockNumberFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - } +template +void InitialBlockNumberFactory::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + const ParameterList &pL = GetParameterList(); - template - void InitialBlockNumberFactory::Build(Level & currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - const ParameterList & pL = GetParameterList(); + RCP A = Get>(currentLevel, "A"); + LO blocksize = + as(pL.get("aggregation: block diagonal: interleaved blocksize")); - RCP A = Get< RCP >(currentLevel, "A"); - LO blocksize = as(pL.get("aggregation: block diagonal: interleaved blocksize")); + GetOStream(Statistics1) << "Generating interleaved blocking with " + << blocksize << " equations" << std::endl; + RCP BlockNumber = + LocalOrdinalVectorFactory::Build(A->getRowMap(), false); + Teuchos::ArrayRCP bn_data = BlockNumber->getDataNonConst(0); + for (LO i = 0; i < (LO)A->getRowMap()->getLocalNumElements(); i++) + bn_data[i] = i % blocksize; - GetOStream(Statistics1) << "Generating interleaved blocking with "< BlockNumber = LocalOrdinalVectorFactory::Build(A->getRowMap(),false); - Teuchos::ArrayRCP bn_data = BlockNumber->getDataNonConst(0); - for(LO i=0; i<(LO)A->getRowMap()->getLocalNumElements();i++) - bn_data[i] = i % blocksize; - - Set(currentLevel,"BlockNumber",BlockNumber); - } + Set(currentLevel, "BlockNumber", BlockNumber); +} } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp index b992723be328..b2c6b4989be3 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_decl.hpp @@ -48,39 +48,43 @@ #include "MueLu_SingleLevelFactoryBase.hpp" -namespace MueLu -{ +namespace MueLu { /*! @class InterfaceAggregationFactory class. - @brief Factory for building aggregates for Lagrange multipliers in surface-coupled problems. + @brief Factory for building aggregates for Lagrange multipliers in + surface-coupled problems. ## Context, assumptions, and use cases ## - This factory is intended to be used for saddle-point systems of surface-coupled problems, - where constraints are enforced with Lagrange multipliers. - In addition to the primal unknowns, Lagrange multipliers are considered as dual unknowns. - The presence of Lagrange multipliers make this a primal/dual problem. + This factory is intended to be used for saddle-point systems of + surface-coupled problems, where constraints are enforced with Lagrange + multipliers. In addition to the primal unknowns, Lagrange multipliers are + considered as dual unknowns. The presence of Lagrange multipliers make this a + primal/dual problem. - It is assumed that each primal slave-side interface node (carrying primal unknowns) is replicated - with a dual node carrying the dual unknowns. - While the number of degrees of freedom (DOFs) per dual node is required to be constant for all dual nodes, - the number of dual DOFs per node can differ from the number of primal DOFs per node. + It is assumed that each primal slave-side interface node (carrying primal + unknowns) is replicated with a dual node carrying the dual unknowns. While the + number of degrees of freedom (DOFs) per dual node is required to be constant + for all dual nodes, the number of dual DOFs per node can differ from the + number of primal DOFs per node. ## Idea ## - This factory will generate aggregates for the dual nodes such that they "match" the aggregates of their primal counterpart. - Instead of performing an actual aggregation procedure on the dual nodes, - we grep the existing primal aggregates and use a user-given mapping of dual-to-primal node IDs - to create the dual aggregates. + This factory will generate aggregates for the dual nodes such that they + "match" the aggregates of their primal counterpart. Instead of performing an + actual aggregation procedure on the dual nodes, we grep the existing primal + aggregates and use a user-given mapping of dual-to-primal node IDs to create + the dual aggregates. ### References ### - - Wiesner, T. A.: Flexible Aggregation-based Algebraic Multigrid Methods for Contact and Flow Problems, - PhD thesis, Technical University of Munich, 2015 + - Wiesner, T. A.: Flexible Aggregation-based Algebraic Multigrid Methods for + Contact and Flow Problems, PhD thesis, Technical University of Munich, 2015 - Wiesner, T. A., Mayr, M., Popp, A., Gee, M. W., Wall, W. A.: - Algebraic multigrid methods for saddle point systems arising from mortar contact formulations, - Int. J. Numer. Methods Eng., 122(15):3749–3779, 2021, https://doi.org/10.1002/nme.6680 + Algebraic multigrid methods for saddle point systems arising from mortar + contact formulations, Int. J. Numer. Methods Eng., 122(15):3749–3779, 2021, + https://doi.org/10.1002/nme.6680 @ingroup Aggregation @@ -89,46 +93,59 @@ namespace MueLu ### User parameters of InterfaceAggregationFactory ### Parameter | type | default | master.xml | validated | requested | description ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the matrix A - Aggregates | Factory | null | | * | * | Generating factory of the aggregates (of type "Aggregates" produced, e.g., by the UncoupledAggregationFactory) - Dual/primal mapping strategy | string | vague | | * | * | Chosen strategy and type of input data to generate dual/primal mappings - DualNodeID2PrimalNodeID | Factory | null | | * | * | Generating factory of the fine dual-to-primal node mapping - Primal interface DOF map | Factory | null | | * | * | Generating factory of the fine row map of primal interface degrees of freedom - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see @c GetValidParameters() ).
- The * in the @c requested column states that the data is requested as input with all dependencies (see @c DeclareInput() ). + A | Factory | null | | * | * | Generating + factory of the matrix A Aggregates | Factory | null | | * + | * | Generating factory of the aggregates (of type "Aggregates" produced, + e.g., by the UncoupledAggregationFactory) Dual/primal mapping strategy | + string | vague | | * | * | Chosen strategy and type of input data to + generate dual/primal mappings DualNodeID2PrimalNodeID | Factory | null | + | * | * | Generating factory of the fine dual-to-primal node mapping Primal + interface DOF map | Factory | null | | * | * | Generating factory of + the fine row map of primal interface degrees of freedom + + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see @c + GetValidParameters() ).
The * in the @c requested column states that the + data is requested as input with all dependencies (see @c DeclareInput() ). #### Remarks - This factory support multiple dual/primal mapping strategies based on different inputs. They are: + This factory support multiple dual/primal mapping strategies based on + different inputs. They are: - - node-based: The mapping of dual-to-primal node IDs, \c DualNodeID2PrimalNodeID, is of data type \c std::map. - The 'key' refers to the local ID of the dual node, while the 'value' represents the local ID of its primal counterpart. - - dof-based: The row map of primal interface degrees of freedom (DOFs) is a subset of the row map of all primal DOFs. - It only contains the primal DOFs of interface nodes, that also carry a Lagrange multiplier in the context of a mortar method. + - node-based: The mapping of dual-to-primal node IDs, \c + DualNodeID2PrimalNodeID, is of data type \c + std::map. The 'key' refers to the local ID of the + dual node, while the 'value' represents the local ID of its primal + counterpart. + - dof-based: The row map of primal interface degrees of freedom (DOFs) is a + subset of the row map of all primal DOFs. It only contains the primal DOFs of + interface nodes, that also carry a Lagrange multiplier in the context of a + mortar method. ### Variables provided by this factory ### - After InterfaceAggregationFactory::Build the following data is available (if requested) + After InterfaceAggregationFactory::Build the following data is available (if + requested) Parameter | generated by | description ----------|--------------|------------ - | Aggregates | InterfaceAggregationFactory | Aggregates of "dual nodes" carrying Lagrange multipliers in surface-coupled problems with primal and dual variables. - | CoarseDualNodeID2PrimalNodeID | InterfaceAggregationFactory | Coarsened mapping of dual node IDs two primal node IDs. + | Aggregates | InterfaceAggregationFactory | Aggregates of + "dual nodes" carrying Lagrange multipliers in surface-coupled problems with + primal and dual variables. | CoarseDualNodeID2PrimalNodeID | + InterfaceAggregationFactory | Coarsened mapping of dual node IDs two primal + node IDs. */ template -class InterfaceAggregationFactory : public SingleLevelFactoryBase -{ + class GlobalOrdinal = DefaultGlobalOrdinal, class Node = DefaultNode> +class InterfaceAggregationFactory : public SingleLevelFactoryBase { #undef MUELU_INTERFACEAGGREGATIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" public: - //! Input //@{ @@ -150,23 +167,29 @@ class InterfaceAggregationFactory : public SingleLevelFactoryBase /*! @brief Build dual aggregates based on a given dual-to-primal node mapping * * @param[in] prefix Prefix for screen output - * @param[in/out] currentLevel Level on which the aggregation needs to be performed + * @param[in/out] currentLevel Level on which the aggregation needs to be + * performed */ - void BuildBasedOnNodeMapping(const std::string& prefix, Level& currentLevel) const; + void BuildBasedOnNodeMapping(const std::string &prefix, + Level ¤tLevel) const; - /*! @brief Build dual aggregates based on a given interface row map of the primal and dual problem + /*! @brief Build dual aggregates based on a given interface row map of the + * primal and dual problem * - * The row map of the interface portion of the primal problem corresponds to the row map of the dual problem. - * This correspondence is exploited to form the dual aggregates based on available primal aggregates. + * The row map of the interface portion of the primal problem corresponds to + * the row map of the dual problem. This correspondence is exploited to form + * the dual aggregates based on available primal aggregates. * - * @note In the context of mortar methods, the two maps correspond to the range and domain map of the slave-sided - * mortar operator \f$D\f$, which connects primal interface unknowns and dual unknowns. + * @note In the context of mortar methods, the two maps correspond to the + * range and domain map of the slave-sided mortar operator \f$D\f$, which + * connects primal interface unknowns and dual unknowns. * * @param[in] prefix Prefix for screen output - * @param[in/out] currentLevel Level on which the aggregation needs to be performed + * @param[in/out] currentLevel Level on which the aggregation needs to be + * performed */ - void BuildBasedOnPrimalInterfaceDofMap(const std::string& prefix, Level& currentLevel) const; - + void BuildBasedOnPrimalInterfaceDofMap(const std::string &prefix, + Level ¤tLevel) const; }; } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp index 4382cca2523b..03edfdc32ee6 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp @@ -58,69 +58,85 @@ #include "MueLu_InterfaceAggregationFactory_decl.hpp" -namespace MueLu -{ +namespace MueLu { template -RCP InterfaceAggregationFactory::GetValidParameterList() const -{ +RCP +InterfaceAggregationFactory::GetValidParameterList() const { RCP validParamList = rcp(new ParameterList()); - validParamList->set>("A", Teuchos::null, "Generating factory of A (matrix block related to dual DOFs)"); - validParamList->set>("Aggregates", Teuchos::null, "Generating factory of the Aggregates (for block 0,0)"); - - validParamList->set("Dual/primal mapping strategy", "vague", - "Strategy to represent mapping between dual and primal quantities [node-based, dof-based]"); - - validParamList->set>("DualNodeID2PrimalNodeID", Teuchos::null, - "Generating factory of the DualNodeID2PrimalNodeID map as input data in a Moertel-compatible std::map to map local IDs of dual nodes to local IDs of primal nodes"); - validParamList->set("number of DOFs per dual node", -Teuchos::ScalarTraits::one(), - "Number of DOFs per dual node"); - - validParamList->set>("Primal interface DOF map", Teuchos::null, - "Generating factory of the primal DOF row map of slave side of the coupling surface"); + validParamList->set>( + "A", Teuchos::null, + "Generating factory of A (matrix block related to dual DOFs)"); + validParamList->set>( + "Aggregates", Teuchos::null, + "Generating factory of the Aggregates (for block 0,0)"); + + validParamList->set( + "Dual/primal mapping strategy", "vague", + "Strategy to represent mapping between dual and primal quantities " + "[node-based, dof-based]"); + + validParamList->set>( + "DualNodeID2PrimalNodeID", Teuchos::null, + "Generating factory of the DualNodeID2PrimalNodeID map as input data in " + "a Moertel-compatible std::map to map local IDs of dual nodes to " + "local IDs of primal nodes"); + validParamList->set("number of DOFs per dual node", + -Teuchos::ScalarTraits::one(), + "Number of DOFs per dual node"); + + validParamList->set>( + "Primal interface DOF map", Teuchos::null, + "Generating factory of the primal DOF row map of slave side of the " + "coupling surface"); return validParamList; } // GetValidParameterList() template -void InterfaceAggregationFactory::DeclareInput(Level ¤tLevel) const -{ +void InterfaceAggregationFactory::DeclareInput(Level ¤tLevel) + const { Input(currentLevel, "A"); // matrix block of dual variables Input(currentLevel, "Aggregates"); const ParameterList &pL = GetParameterList(); - TEUCHOS_TEST_FOR_EXCEPTION(pL.get("Dual/primal mapping strategy")=="vague", Exceptions::InvalidArgument, - "Strategy for dual/primal mapping not selected. Please select one of the available strategies.") - if (pL.get("Dual/primal mapping strategy") == "node-based") - { - if (currentLevel.GetLevelID() == 0) - { - TEUCHOS_TEST_FOR_EXCEPTION(!currentLevel.IsAvailable("DualNodeID2PrimalNodeID", NoFactory::get()), - Exceptions::RuntimeError, "DualNodeID2PrimalNodeID was not provided by the user on level 0!"); - - currentLevel.DeclareInput("DualNodeID2PrimalNodeID", NoFactory::get(), this); - } - else - { + TEUCHOS_TEST_FOR_EXCEPTION( + pL.get("Dual/primal mapping strategy") == "vague", + Exceptions::InvalidArgument, + "Strategy for dual/primal mapping not selected. Please select one of the " + "available strategies.") + if (pL.get("Dual/primal mapping strategy") == "node-based") { + if (currentLevel.GetLevelID() == 0) { + TEUCHOS_TEST_FOR_EXCEPTION( + !currentLevel.IsAvailable("DualNodeID2PrimalNodeID", + NoFactory::get()), + Exceptions::RuntimeError, + "DualNodeID2PrimalNodeID was not provided by the user on level 0!"); + + currentLevel.DeclareInput("DualNodeID2PrimalNodeID", NoFactory::get(), + this); + } else { Input(currentLevel, "DualNodeID2PrimalNodeID"); } - } - else if (pL.get("Dual/primal mapping strategy") == "dof-based") - { + } else if (pL.get("Dual/primal mapping strategy") == + "dof-based") { if (currentLevel.GetLevelID() == 0) - currentLevel.DeclareInput("Primal interface DOF map", NoFactory::get(), this); + currentLevel.DeclareInput("Primal interface DOF map", NoFactory::get(), + this); else Input(currentLevel, "Primal interface DOF map"); - } - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::InvalidArgument, "Unknown strategy for dual/primal mapping.") + } else + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::InvalidArgument, + "Unknown strategy for dual/primal mapping.") } // DeclareInput template -void InterfaceAggregationFactory::Build(Level ¤tLevel) const -{ +void InterfaceAggregationFactory::Build(Level ¤tLevel) const { const std::string prefix = "MueLu::InterfaceAggregationFactory::Build: "; FactoryMonitor m(*this, "Build", currentLevel); @@ -133,70 +149,102 @@ void InterfaceAggregationFactory::Bui else if (pL.get(parameterName) == "dof-based") BuildBasedOnPrimalInterfaceDofMap(prefix, currentLevel); else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::InvalidArgument, - "MueLu::InterfaceAggregationFactory::Builld(): Unknown strategy for dual/primal mapping. Set a valid value for the parameter \"" << parameterName << "\".") + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::InvalidArgument, + "MueLu::InterfaceAggregationFactory::Builld(): Unknown strategy for " + "dual/primal mapping. Set a valid value for the parameter \"" + << parameterName << "\".") } template -void InterfaceAggregationFactory::BuildBasedOnNodeMapping(const std::string& prefix, - Level ¤tLevel) const -{ +void InterfaceAggregationFactory:: + BuildBasedOnNodeMapping(const std::string &prefix, + Level ¤tLevel) const { using Dual2Primal_type = std::map; const ParameterList &pL = GetParameterList(); RCP A = Get>(currentLevel, "A"); - const LocalOrdinal numDofsPerDualNode = pL.get("number of DOFs per dual node"); - TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerDualNode::one(), Exceptions::InvalidArgument, - "Number of dual DOFs per node < 0 (default value). Specify a valid \"number of DOFs per dual node\" in the parameter list for the InterfaceAggregationFactory."); - - RCP primalAggregates = Get>(currentLevel, "Aggregates"); - ArrayRCP primalVertex2AggId = primalAggregates->GetVertex2AggId()->getData(0); + const LocalOrdinal numDofsPerDualNode = + pL.get("number of DOFs per dual node"); + TEUCHOS_TEST_FOR_EXCEPTION( + numDofsPerDualNode < Teuchos::ScalarTraits::one(), + Exceptions::InvalidArgument, + "Number of dual DOFs per node < 0 (default value). Specify a valid " + "\"number of DOFs per dual node\" in the parameter list for the " + "InterfaceAggregationFactory."); + + RCP primalAggregates = + Get>(currentLevel, "Aggregates"); + ArrayRCP primalVertex2AggId = + primalAggregates->GetVertex2AggId()->getData(0); // Get the user-prescribed mapping of dual to primal node IDs RCP mapNodesDualToPrimal; if (currentLevel.GetLevelID() == 0) - mapNodesDualToPrimal = currentLevel.Get>("DualNodeID2PrimalNodeID", NoFactory::get()); + mapNodesDualToPrimal = currentLevel.Get>( + "DualNodeID2PrimalNodeID", NoFactory::get()); else - mapNodesDualToPrimal = Get>(currentLevel, "DualNodeID2PrimalNodeID"); + mapNodesDualToPrimal = + Get>(currentLevel, "DualNodeID2PrimalNodeID"); RCP operatorRangeMap = A->getRangeMap(); const size_t myRank = operatorRangeMap->getComm()->getRank(); - LocalOrdinal globalNumDualNodes = operatorRangeMap->getGlobalNumElements() / numDofsPerDualNode; - LocalOrdinal localNumDualNodes = operatorRangeMap->getLocalNumElements() / numDofsPerDualNode; + LocalOrdinal globalNumDualNodes = + operatorRangeMap->getGlobalNumElements() / numDofsPerDualNode; + LocalOrdinal localNumDualNodes = + operatorRangeMap->getLocalNumElements() / numDofsPerDualNode; - TEUCHOS_TEST_FOR_EXCEPTION(localNumDualNodes != Teuchos::as(mapNodesDualToPrimal->size()), - std::runtime_error, prefix << " MueLu requires the range map and the DualNodeID2PrimalNodeID map to be compatible."); + TEUCHOS_TEST_FOR_EXCEPTION( + localNumDualNodes != + Teuchos::as(mapNodesDualToPrimal->size()), + std::runtime_error, + prefix << " MueLu requires the range map and the DualNodeID2PrimalNodeID " + "map to be compatible."); RCP dualNodeMap = Teuchos::null; if (numDofsPerDualNode == 1) dualNodeMap = operatorRangeMap; - else - { + else { GlobalOrdinal indexBase = operatorRangeMap->getIndexBase(); auto comm = operatorRangeMap->getComm(); std::vector myDualNodes = {}; - for (size_t i = 0; i < operatorRangeMap->getLocalNumElements(); i += numDofsPerDualNode) - myDualNodes.push_back((operatorRangeMap->getGlobalElement(i) - indexBase) / numDofsPerDualNode + indexBase); + for (size_t i = 0; i < operatorRangeMap->getLocalNumElements(); + i += numDofsPerDualNode) + myDualNodes.push_back( + (operatorRangeMap->getGlobalElement(i) - indexBase) / + numDofsPerDualNode + + indexBase); - dualNodeMap = MapFactory::Build(operatorRangeMap->lib(), globalNumDualNodes, myDualNodes, indexBase, comm); + dualNodeMap = MapFactory::Build(operatorRangeMap->lib(), globalNumDualNodes, + myDualNodes, indexBase, comm); } - TEUCHOS_TEST_FOR_EXCEPTION(localNumDualNodes != Teuchos::as(dualNodeMap->getLocalNumElements()), - std::runtime_error, prefix << " Local number of dual nodes given by user is incompatible to the dual node map."); + TEUCHOS_TEST_FOR_EXCEPTION( + localNumDualNodes != + Teuchos::as(dualNodeMap->getLocalNumElements()), + std::runtime_error, + prefix << " Local number of dual nodes given by user is incompatible to " + "the dual node map."); RCP dualAggregates = rcp(new Aggregates(dualNodeMap)); dualAggregates->setObjectLabel("InterfaceAggregation"); - // Copy setting from primal aggregates, as we copy the interface part of primal aggregates anyways - dualAggregates->AggregatesCrossProcessors(primalAggregates->AggregatesCrossProcessors()); + // Copy setting from primal aggregates, as we copy the interface part of + // primal aggregates anyways + dualAggregates->AggregatesCrossProcessors( + primalAggregates->AggregatesCrossProcessors()); - ArrayRCP dualVertex2AggId = dualAggregates->GetVertex2AggId()->getDataNonConst(0); - ArrayRCP dualProcWinner = dualAggregates->GetProcWinner()->getDataNonConst(0); + ArrayRCP dualVertex2AggId = + dualAggregates->GetVertex2AggId()->getDataNonConst(0); + ArrayRCP dualProcWinner = + dualAggregates->GetProcWinner()->getDataNonConst(0); - RCP coarseMapNodesDualToPrimal = rcp(new Dual2Primal_type()); - RCP coarseMapNodesPrimalToDual = rcp(new Dual2Primal_type()); + RCP coarseMapNodesDualToPrimal = + rcp(new Dual2Primal_type()); + RCP coarseMapNodesPrimalToDual = + rcp(new Dual2Primal_type()); LocalOrdinal numLocalDualAggregates = 0; @@ -205,42 +253,45 @@ void InterfaceAggregationFactory::Bui * - assign dual nodes to dual aggregates * - recursively coarsen the dual-to-primal node mapping */ - LocalOrdinal localPrimalNodeID = - Teuchos::ScalarTraits::one(); - LocalOrdinal currentPrimalAggId = - Teuchos::ScalarTraits::one(); - for (LocalOrdinal localDualNodeID = 0; localDualNodeID < localNumDualNodes; ++localDualNodeID) - { + LocalOrdinal localPrimalNodeID = -Teuchos::ScalarTraits::one(); + LocalOrdinal currentPrimalAggId = -Teuchos::ScalarTraits::one(); + for (LocalOrdinal localDualNodeID = 0; localDualNodeID < localNumDualNodes; + ++localDualNodeID) { // Get local ID of the primal node associated to the current dual node localPrimalNodeID = (*mapNodesDualToPrimal)[localDualNodeID]; // Find the primal aggregate that owns the current primal node currentPrimalAggId = primalVertex2AggId[localPrimalNodeID]; - // Test if the current primal aggregate has no associated dual aggregate, yet. - // Create new dual aggregate, if necessary. - if (coarseMapNodesPrimalToDual->count(currentPrimalAggId) == 0) - { + // Test if the current primal aggregate has no associated dual aggregate, + // yet. Create new dual aggregate, if necessary. + if (coarseMapNodesPrimalToDual->count(currentPrimalAggId) == 0) { // Associate a new dual aggregate w/ the current primal aggregate - (*coarseMapNodesPrimalToDual)[currentPrimalAggId] = numLocalDualAggregates; - (*coarseMapNodesDualToPrimal)[numLocalDualAggregates] = currentPrimalAggId; + (*coarseMapNodesPrimalToDual)[currentPrimalAggId] = + numLocalDualAggregates; + (*coarseMapNodesDualToPrimal)[numLocalDualAggregates] = + currentPrimalAggId; ++numLocalDualAggregates; } // Fill the dual aggregate - dualVertex2AggId[localDualNodeID] = (*coarseMapNodesPrimalToDual)[currentPrimalAggId]; + dualVertex2AggId[localDualNodeID] = + (*coarseMapNodesPrimalToDual)[currentPrimalAggId]; dualProcWinner[localDualNodeID] = myRank; } // Store dual aggregeate data as well as coarsening information dualAggregates->SetNumAggregates(numLocalDualAggregates); Set(currentLevel, "Aggregates", dualAggregates); - Set(currentLevel, "CoarseDualNodeID2PrimalNodeID", coarseMapNodesDualToPrimal); + Set(currentLevel, "CoarseDualNodeID2PrimalNodeID", + coarseMapNodesDualToPrimal); GetOStream(Statistics1) << dualAggregates->description() << std::endl; } // BuildBasedOnNodeMapping template -void InterfaceAggregationFactory::BuildBasedOnPrimalInterfaceDofMap( - const std::string& prefix, Level ¤tLevel) const -{ +void InterfaceAggregationFactory:: + BuildBasedOnPrimalInterfaceDofMap(const std::string &prefix, + Level ¤tLevel) const { const GlobalOrdinal GO_ZERO = Teuchos::ScalarTraits::zero(); const GlobalOrdinal GO_ONE = Teuchos::ScalarTraits::one(); @@ -250,8 +301,10 @@ void InterfaceAggregationFactory::Bui // Grab the off-diagonal block (0,1) from the global blocked operator RCP A01 = Get>(currentLevel, "A"); - RCP primalAggregates = Get>(currentLevel, "Aggregates"); - ArrayRCP primalVertex2AggId = primalAggregates->GetVertex2AggId()->getData(0); + RCP primalAggregates = + Get>(currentLevel, "Aggregates"); + ArrayRCP primalVertex2AggId = + primalAggregates->GetVertex2AggId()->getData(0); auto comm = A01->getRowMap()->getComm(); const int myRank = comm->getRank(); @@ -259,149 +312,200 @@ void InterfaceAggregationFactory::Bui RCP primalInterfaceDofRowMap = Teuchos::null; if (currentLevel.GetLevelID() == 0) { // Use NoFactory, since the fine level asks for user data - primalInterfaceDofRowMap = currentLevel.Get>("Primal interface DOF map", NoFactory::get()); + primalInterfaceDofRowMap = currentLevel.Get>( + "Primal interface DOF map", NoFactory::get()); } else { - primalInterfaceDofRowMap = Get>(currentLevel, "Primal interface DOF map"); + primalInterfaceDofRowMap = + Get>(currentLevel, "Primal interface DOF map"); } TEUCHOS_ASSERT(!primalInterfaceDofRowMap.is_null()); - if (A01->IsView("stridedMaps") && rcp_dynamic_cast(A01->getRowMap("stridedMaps")) != Teuchos::null) { - auto stridedRowMap = rcp_dynamic_cast(A01->getRowMap("stridedMaps")); - auto stridedColMap = rcp_dynamic_cast(A01->getColMap("stridedMaps")); - numDofsPerPrimalNode = Teuchos::as(stridedRowMap->getFixedBlockSize()); - numDofsPerDualNode = Teuchos::as(stridedColMap->getFixedBlockSize()); + if (A01->IsView("stridedMaps") && + rcp_dynamic_cast(A01->getRowMap("stridedMaps")) != + Teuchos::null) { + auto stridedRowMap = + rcp_dynamic_cast(A01->getRowMap("stridedMaps")); + auto stridedColMap = + rcp_dynamic_cast(A01->getColMap("stridedMaps")); + numDofsPerPrimalNode = + Teuchos::as(stridedRowMap->getFixedBlockSize()); + numDofsPerDualNode = + Teuchos::as(stridedColMap->getFixedBlockSize()); if (numDofsPerPrimalNode != numDofsPerDualNode) { - GetOStream(Warnings) << "InterfaceAggregation attempts to work with " - << numDofsPerPrimalNode << " primal DOFs per node and " << numDofsPerDualNode << " dual DOFs per node." - << "Be careful! Algorithm is not well-tested, if number of primal and dual DOFs per node differ." << std::endl; + GetOStream(Warnings) << "InterfaceAggregation attempts to work with " + << numDofsPerPrimalNode + << " primal DOFs per node and " << numDofsPerDualNode + << " dual DOFs per node." + << "Be careful! Algorithm is not well-tested, if " + "number of primal and dual DOFs per node differ." + << std::endl; } } - TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerPrimalNode==0, Exceptions::RuntimeError, - "InterfaceAggregationFactory could not extract the number of primal DOFs per node from striding information. At least, make sure that StridedMap information has actually been provided."); - TEUCHOS_TEST_FOR_EXCEPTION(numDofsPerDualNode==0, Exceptions::RuntimeError, - "InterfaceAggregationFactory could not extract the number of dual DOFs per node from striding information. At least, make sure that StridedMap information has actually been provided."); + TEUCHOS_TEST_FOR_EXCEPTION( + numDofsPerPrimalNode == 0, Exceptions::RuntimeError, + "InterfaceAggregationFactory could not extract the number of primal DOFs " + "per node from striding information. At least, make sure that StridedMap " + "information has actually been provided."); + TEUCHOS_TEST_FOR_EXCEPTION( + numDofsPerDualNode == 0, Exceptions::RuntimeError, + "InterfaceAggregationFactory could not extract the number of dual DOFs " + "per node from striding information. At least, make sure that StridedMap " + "information has actually been provided."); /* Determine block information for primal block - * - * primalDofOffset: global offset of primal DOF GIDs (usually is zero (default)) - * primalBlockDim: block dim for fixed size blocks - * - is 2 or 3 (for 2d or 3d problems) on the finest level (# displacement dofs per node) - * - is 3 or 6 (for 2d or 3d problems) on coarser levels (# nullspace vectors) - */ + * + * primalDofOffset: global offset of primal DOF GIDs (usually is zero + * (default)) primalBlockDim: block dim for fixed size blocks + * - is 2 or 3 (for 2d or 3d problems) on the finest level (# displacement + * dofs per node) + * - is 3 or 6 (for 2d or 3d problems) on coarser levels (# nullspace vectors) + */ GlobalOrdinal primalDofOffset = GO_ZERO; LocalOrdinal primalBlockDim = numDofsPerPrimalNode; /* Determine block information for Lagrange multipliers - * - * dualDofOffset: usually > zero (set by domainOffset for Ptent11Fact) - * dualBlockDim: - * - is primalBlockDim (for 2d or 3d problems) on the finest level (1 Lagrange multiplier per - * displacement dof) - * - is 2 or 3 (for 2d or 3d problems) on coarser levels (same as on finest level, whereas there - * are 3 or 6 displacement dofs per node) - */ + * + * dualDofOffset: usually > zero (set by domainOffset for Ptent11Fact) + * dualBlockDim: + * - is primalBlockDim (for 2d or 3d problems) on the finest level (1 Lagrange + * multiplier per displacement dof) + * - is 2 or 3 (for 2d or 3d problems) on coarser levels (same as on finest + * level, whereas there are 3 or 6 displacement dofs per node) + */ GlobalOrdinal dualDofOffset = A01->getColMap()->getMinAllGlobalIndex(); LocalOrdinal dualBlockDim = numDofsPerDualNode; // Generate global replicated mapping "lagrNodeId -> dispNodeId" RCP dualDofMap = A01->getDomainMap(); GlobalOrdinal gMaxDualNodeId = AmalgamationFactory::DOFGid2NodeId( - dualDofMap->getMaxAllGlobalIndex(), dualBlockDim, dualDofOffset, dualDofMap->getIndexBase()); + dualDofMap->getMaxAllGlobalIndex(), dualBlockDim, dualDofOffset, + dualDofMap->getIndexBase()); GlobalOrdinal gMinDualNodeId = AmalgamationFactory::DOFGid2NodeId( - dualDofMap->getMinAllGlobalIndex(), dualBlockDim, dualDofOffset, dualDofMap->getIndexBase()); - - GetOStream(Runtime1) << " Dual DOF map: index base = " << dualDofMap->getIndexBase() - << ", block dim = " << dualBlockDim - << ", gid offset = " << dualDofOffset - << std::endl; - - GetOStream(Runtime1) << " [primal / dual] DOFs per node = [" << numDofsPerPrimalNode - << "/" << numDofsPerDualNode << "]" << std::endl; - - // Generate locally replicated vector for mapping dual node IDs to primal node IDs - Array dualNodeId2primalNodeId(gMaxDualNodeId - gMinDualNodeId + 1, -GO_ONE); - Array local_dualNodeId2primalNodeId(gMaxDualNodeId - gMinDualNodeId + 1, -GO_ONE); - - // Generate locally replicated vector for mapping dual node IDs to primal aggregate ID - Array dualNodeId2primalAggId(gMaxDualNodeId - gMinDualNodeId + 1, -GO_ONE); - Array local_dualNodeId2primalAggId(gMaxDualNodeId - gMinDualNodeId + 1, -GO_ONE); - - Array dualDofId2primalDofId(primalInterfaceDofRowMap->getGlobalNumElements(), -GO_ONE); - Array local_dualDofId2primalDofId(primalInterfaceDofRowMap->getGlobalNumElements(), -GO_ONE); + dualDofMap->getMinAllGlobalIndex(), dualBlockDim, dualDofOffset, + dualDofMap->getIndexBase()); + + GetOStream(Runtime1) << " Dual DOF map: index base = " + << dualDofMap->getIndexBase() + << ", block dim = " << dualBlockDim + << ", gid offset = " << dualDofOffset << std::endl; + + GetOStream(Runtime1) << " [primal / dual] DOFs per node = [" + << numDofsPerPrimalNode << "/" << numDofsPerDualNode + << "]" << std::endl; + + // Generate locally replicated vector for mapping dual node IDs to primal node + // IDs + Array dualNodeId2primalNodeId( + gMaxDualNodeId - gMinDualNodeId + 1, -GO_ONE); + Array local_dualNodeId2primalNodeId( + gMaxDualNodeId - gMinDualNodeId + 1, -GO_ONE); + + // Generate locally replicated vector for mapping dual node IDs to primal + // aggregate ID + Array dualNodeId2primalAggId( + gMaxDualNodeId - gMinDualNodeId + 1, -GO_ONE); + Array local_dualNodeId2primalAggId( + gMaxDualNodeId - gMinDualNodeId + 1, -GO_ONE); + + Array dualDofId2primalDofId( + primalInterfaceDofRowMap->getGlobalNumElements(), -GO_ONE); + Array local_dualDofId2primalDofId( + primalInterfaceDofRowMap->getGlobalNumElements(), -GO_ONE); // Fill mapping of Lagrange Node IDs to displacement aggregate IDs - const size_t numMyPrimalInterfaceDOFs = primalInterfaceDofRowMap->getLocalNumElements(); - for (size_t r = 0; r < numMyPrimalInterfaceDOFs; r += numDofsPerPrimalNode) - { + const size_t numMyPrimalInterfaceDOFs = + primalInterfaceDofRowMap->getLocalNumElements(); + for (size_t r = 0; r < numMyPrimalInterfaceDOFs; r += numDofsPerPrimalNode) { GlobalOrdinal gPrimalRowId = primalInterfaceDofRowMap->getGlobalElement(r); if (A01->getRowMap()->isNodeGlobalElement(gPrimalRowId)) // Remove this if? { - const LocalOrdinal lPrimalRowId = A01->getRowMap()->getLocalElement(gPrimalRowId); - const GlobalOrdinal gPrimalNodeId = AmalgamationFactory::DOFGid2NodeId(gPrimalRowId, primalBlockDim, primalDofOffset, primalInterfaceDofRowMap->getIndexBase()); + const LocalOrdinal lPrimalRowId = + A01->getRowMap()->getLocalElement(gPrimalRowId); + const GlobalOrdinal gPrimalNodeId = AmalgamationFactory::DOFGid2NodeId( + gPrimalRowId, primalBlockDim, primalDofOffset, + primalInterfaceDofRowMap->getIndexBase()); const LocalOrdinal lPrimalNodeId = lPrimalRowId / numDofsPerPrimalNode; const LocalOrdinal primalAggId = primalVertex2AggId[lPrimalNodeId]; const GlobalOrdinal gDualDofId = A01->getColMap()->getGlobalElement(r); - const GlobalOrdinal gDualNodeId = AmalgamationFactory::DOFGid2NodeId(gDualDofId, dualBlockDim, dualDofOffset, 0); + const GlobalOrdinal gDualNodeId = AmalgamationFactory::DOFGid2NodeId( + gDualDofId, dualBlockDim, dualDofOffset, 0); - if (local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] == -GO_ONE) { - local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] = gPrimalNodeId; - local_dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId] = primalAggId; + if (local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] == + -GO_ONE) { + local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] = + gPrimalNodeId; + local_dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId] = + primalAggId; } else { - GetOStream(Warnings) << "PROC: " << myRank << " gDualNodeId " << gDualNodeId << " is already connected to primal nodeId " + GetOStream(Warnings) + << "PROC: " << myRank << " gDualNodeId " << gDualNodeId + << " is already connected to primal nodeId " << local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] << ". Ignore new dispNodeId: " << gPrimalNodeId << std::endl; } - } } - const int dualNodeId2primalNodeIdSize = Teuchos::as(local_dualNodeId2primalNodeId.size()); + const int dualNodeId2primalNodeIdSize = + Teuchos::as(local_dualNodeId2primalNodeId.size()); Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, dualNodeId2primalNodeIdSize, - &local_dualNodeId2primalNodeId[0], &dualNodeId2primalNodeId[0]); + &local_dualNodeId2primalNodeId[0], + &dualNodeId2primalNodeId[0]); Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, dualNodeId2primalNodeIdSize, - &local_dualNodeId2primalAggId[0], &dualNodeId2primalAggId[0]); + &local_dualNodeId2primalAggId[0], + &dualNodeId2primalAggId[0]); // build node map for dual variables // generate "artificial nodes" for lagrange multipliers - // the node map is also used for defining the Aggregates for the lagrange multipliers + // the node map is also used for defining the Aggregates for the lagrange + // multipliers std::vector dualNodes; - for (size_t r = 0; r < A01->getDomainMap()->getLocalNumElements(); r++) - { + for (size_t r = 0; r < A01->getDomainMap()->getLocalNumElements(); r++) { // determine global Lagrange multiplier row Dof - // generate a node id using the grid, lagr_blockdim and lagr_offset // todo make sure, that - // nodeId is unique and does not interfer with the displacement nodes + // generate a node id using the grid, lagr_blockdim and lagr_offset // todo + // make sure, that nodeId is unique and does not interfer with the + // displacement nodes GlobalOrdinal gDualDofId = A01->getDomainMap()->getGlobalElement(r); - GlobalOrdinal gDualNodeId = AmalgamationFactory::DOFGid2NodeId(gDualDofId, dualBlockDim, dualDofOffset, 0); + GlobalOrdinal gDualNodeId = AmalgamationFactory::DOFGid2NodeId( + gDualDofId, dualBlockDim, dualDofOffset, 0); dualNodes.push_back(gDualNodeId); } // remove all duplicates - dualNodes.erase(std::unique(dualNodes.begin(), dualNodes.end()), dualNodes.end()); + dualNodes.erase(std::unique(dualNodes.begin(), dualNodes.end()), + dualNodes.end()); // define node map for Lagrange multipliers - Teuchos::RCP dualNodeMap = MapFactory::Build(A01->getRowMap()->lib(), - Teuchos::OrdinalTraits::invalid(), dualNodes, A01->getRowMap()->getIndexBase(), comm); + Teuchos::RCP dualNodeMap = MapFactory::Build( + A01->getRowMap()->lib(), + Teuchos::OrdinalTraits::invalid(), dualNodes, + A01->getRowMap()->getIndexBase(), comm); // Build aggregates using the lagrange multiplier node map - Teuchos::RCP dualAggregates = Teuchos::rcp(new Aggregates(dualNodeMap)); + Teuchos::RCP dualAggregates = + Teuchos::rcp(new Aggregates(dualNodeMap)); dualAggregates->setObjectLabel("UC (dual variables)"); // extract aggregate data structures to fill - Teuchos::ArrayRCP dualVertex2AggId = dualAggregates->GetVertex2AggId()->getDataNonConst(0); - Teuchos::ArrayRCP dualProcWinner = dualAggregates->GetProcWinner()->getDataNonConst(0); + Teuchos::ArrayRCP dualVertex2AggId = + dualAggregates->GetVertex2AggId()->getDataNonConst(0); + Teuchos::ArrayRCP dualProcWinner = + dualAggregates->GetProcWinner()->getDataNonConst(0); // loop over local lagrange multiplier node ids LocalOrdinal nLocalAggregates = 0; std::map primalAggId2localDualAggId; - for (size_t lDualNodeID = 0; lDualNodeID < dualNodeMap->getLocalNumElements(); ++lDualNodeID) - { - const GlobalOrdinal gDualNodeId = dualNodeMap->getGlobalElement(lDualNodeID); - const GlobalOrdinal primalAggId = dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId]; + for (size_t lDualNodeID = 0; lDualNodeID < dualNodeMap->getLocalNumElements(); + ++lDualNodeID) { + const GlobalOrdinal gDualNodeId = + dualNodeMap->getGlobalElement(lDualNodeID); + const GlobalOrdinal primalAggId = + dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId]; if (primalAggId2localDualAggId.count(primalAggId) == 0) primalAggId2localDualAggId[primalAggId] = nLocalAggregates++; dualVertex2AggId[lDualNodeID] = primalAggId2localDualAggId[primalAggId]; @@ -426,17 +530,22 @@ void InterfaceAggregationFactory::Bui TEUCHOS_ASSERT(A01->isFillComplete()); - RCP dualAmalgamationInfo = rcp(new AmalgamationInfo(rowTranslation, colTranslation, - A01->getDomainMap(), A01->getDomainMap(), A01->getDomainMap(), - fullblocksize, offset, blockid, nStridedOffset, stridedblocksize)); + RCP dualAmalgamationInfo = rcp(new AmalgamationInfo( + rowTranslation, colTranslation, A01->getDomainMap(), A01->getDomainMap(), + A01->getDomainMap(), fullblocksize, offset, blockid, nStridedOffset, + stridedblocksize)); dualAggregates->SetNumAggregates(nLocalAggregates); - dualAggregates->AggregatesCrossProcessors(primalAggregates->AggregatesCrossProcessors()); + dualAggregates->AggregatesCrossProcessors( + primalAggregates->AggregatesCrossProcessors()); if (dualAggregates->AggregatesCrossProcessors()) - GetOStream(Runtime1) << "Interface aggregates cross processor boundaries." << std::endl; + GetOStream(Runtime1) << "Interface aggregates cross processor boundaries." + << std::endl; else - GetOStream(Runtime1) << "Interface aggregates do not cross processor boundaries." << std::endl; + GetOStream(Runtime1) + << "Interface aggregates do not cross processor boundaries." + << std::endl; currentLevel.Set("Aggregates", dualAggregates, this); currentLevel.Set("UnAmalgamationInfo", dualAmalgamationInfo, this); diff --git a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp index 0c22adb5e443..b3d6890534f1 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_decl.hpp @@ -45,15 +45,14 @@ #ifndef MUELU_INTERFACEMAPPINGTRANSFERFACTORY_DECL_HPP_ #define MUELU_INTERFACEMAPPINGTRANSFERFACTORY_DECL_HPP_ -#include "MueLu_Level.hpp" #include "MueLu_FactoryManagerBase.hpp" +#include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" #include "MueLu_ConfigDefs.hpp" #include "MueLu_TwoLevelFactoryBase.hpp" -namespace MueLu -{ +namespace MueLu { /*! @class InterfaceMappingTransferFactory @@ -67,17 +66,19 @@ namespace MueLu ### User parameters ### Parameter | type | default | master.xml | validated | requested | description ----------|------|---------|:----------:|:---------:|:---------:|------------ - CoarseDualNodeID2PrimalNodeID | Factory | null | | * | * | Generating factory of the coarse dual-to-primal node mapping + CoarseDualNodeID2PrimalNodeID | Factory | null | | * | * | Generating + factory of the coarse dual-to-primal node mapping - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see InterfaceAggregationFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see InterfaceAggregationFactory::DeclareInput). + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + InterfaceAggregationFactory::GetValidParameters).
The * in the @c + requested column states that the data is requested as input with all + dependencies (see InterfaceAggregationFactory::DeclareInput). */ template -class InterfaceMappingTransferFactory : public TwoLevelFactoryBase -{ + class GlobalOrdinal = DefaultGlobalOrdinal, class Node = DefaultNode> +class InterfaceMappingTransferFactory : public TwoLevelFactoryBase { #undef MUELU_INTERFACEMAPPINGTRANSFERFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" public: diff --git a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp index 5daab13cbdfb..cc937a1ff902 100644 --- a/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InterfaceMappingTransferFactory_def.hpp @@ -47,29 +47,34 @@ #include "MueLu_InterfaceMappingTransferFactory_decl.hpp" -namespace MueLu -{ +namespace MueLu { template -RCP InterfaceMappingTransferFactory::GetValidParameterList() const -{ +RCP +InterfaceMappingTransferFactory::GetValidParameterList() const { RCP validParamList = rcp(new ParameterList()); - validParamList->set>("CoarseDualNodeID2PrimalNodeID", Teuchos::null, "Generating factory of the CoarseDualNodeID2PrimalNodeID map"); + validParamList->set>( + "CoarseDualNodeID2PrimalNodeID", Teuchos::null, + "Generating factory of the CoarseDualNodeID2PrimalNodeID map"); return validParamList; } template -void InterfaceMappingTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const -{ +void InterfaceMappingTransferFactory< + LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level &fineLevel, + Level &coarseLevel) const { Input(fineLevel, "CoarseDualNodeID2PrimalNodeID"); } template -void InterfaceMappingTransferFactory::Build(Level &fineLevel, Level &coarseLevel) const -{ +void InterfaceMappingTransferFactory::Build( + Level &fineLevel, Level &coarseLevel) const { Monitor m(*this, "Interface Mapping transfer factory"); - RCP> coarseLagr2Dof = Get>>(fineLevel, "CoarseDualNodeID2PrimalNodeID"); + RCP> coarseLagr2Dof = + Get>>( + fineLevel, "CoarseDualNodeID2PrimalNodeID"); Set(coarseLevel, "DualNodeID2PrimalNodeID", coarseLagr2Dof); } diff --git a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp index 91b50b0d5c1f..f7382b215bc6 100644 --- a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_decl.hpp @@ -60,76 +60,85 @@ namespace MueLu { - /*! - @class InverseApproximationFactory class. - @brief Factory for building the approximate inverse of a matrix. - - ## Context, assumptions, and use cases ## - - This factory is intended to be used for building an approximate inverse of a given matrix \A. This is for now only - used in the SchurComplementFactory to generate a respective \Ainv matrix. - - For blocked matrices, the InverseApproximationFactory per default generates an approximate inverse of the A_00 term. - - ## Input/output of this factory ## - - ### User parameters of InterfaceAggregationFactory ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the matrix A - inverse: approximation type | string | diagonal | | * | * | Method used to approximate the inverse - inverse: fixing | bool | false | | * | * | Fix diagonal by replacing small entries with 1.0 - - The * in the master.xml column denotes that the parameter is defined in the master.xml file. - The * in the validated column means that the parameter is declared in the list of valid input parameters (see GetValidParameters() ). - The * in the requested column states that the data is requested as input with all dependencies (see DeclareInput() ). - - ### Variables provided by this factory ### - - After InverseApproximationFactory::Build the following data is available (if requested) - - Parameter | generated by | description - ----------|--------------|------------ - | Ainv | InverseApproximationFactory | The approximate inverse of a given matrix. - */ - - template - class InverseApproximationFactory : public SingleLevelFactoryBase { +/*! + @class InverseApproximationFactory class. + @brief Factory for building the approximate inverse of a matrix. + + ## Context, assumptions, and use cases ## + + This factory is intended to be used for building an approximate inverse of a + given matrix \A. This is for now only used in the SchurComplementFactory to + generate a respective \Ainv matrix. + + For blocked matrices, the InverseApproximationFactory per default generates an + approximate inverse of the A_00 term. + + ## Input/output of this factory ## + + ### User parameters of InterfaceAggregationFactory ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating + factory of the matrix A inverse: approximation type | string | diagonal | | + * | * | Method used to approximate the inverse inverse: fixing | + bool | false | | * | * | Fix diagonal by replacing small entries + with 1.0 + + The * in the master.xml column denotes that the parameter is defined in the + master.xml file. The * in the validated column means that the parameter is + declared in the list of valid input parameters (see GetValidParameters() ). + The * in the requested column states that the data is requested as input with + all dependencies (see DeclareInput() ). + + ### Variables provided by this factory ### + + After InverseApproximationFactory::Build the following data is available (if + requested) + + Parameter | generated by | description + ----------|--------------|------------ + | Ainv | InverseApproximationFactory | The approximate inverse of a given + matrix. +*/ + +template +class InverseApproximationFactory : public SingleLevelFactoryBase { #undef MUELU_INVERSEAPPROXIMATIONFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" +#include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - InverseApproximationFactory() = default; + //! Constructor. + InverseApproximationFactory() = default; - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level ¤tLevel) const; - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //@{ - //! @name Build methods. + //@{ + //! @name Build methods. - //! Build an object with this factory. - void Build(Level& currentLevel) const; + //! Build an object with this factory. + void Build(Level ¤tLevel) const; - //@} + //@} - private: - //! Sparse inverse calculation method. - RCP GetSparseInverse(const RCP& A, const RCP& sparsityPattern) const; +private: + //! Sparse inverse calculation method. + RCP + GetSparseInverse(const RCP &A, + const RCP &sparsityPattern) const; - }; // class InverseApproximationFactory +}; // class InverseApproximationFactory } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp index 7ec193a94d78..cc3cdb5277d5 100644 --- a/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_InverseApproximationFactory_def.hpp @@ -49,166 +49,201 @@ #include #include #include -#include #include -#include -#include -#include +#include #include +#include #include +#include #include +#include -#include #include +#include #include +#include "MueLu_InverseApproximationFactory_decl.hpp" #include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" #include "MueLu_Utilities.hpp" -#include "MueLu_InverseApproximationFactory_decl.hpp" namespace MueLu { - template - RCP InverseApproximationFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; - - validParamList->set >("A", NoFactory::getRCP(), "Matrix to build the approximate inverse on.\n"); - - validParamList->set ("inverse: approximation type", "diagonal", "Method used to approximate the inverse."); - validParamList->set ("inverse: drop tolerance", 0.0 , "Values below this threshold are dropped from the matrix (or fixed if diagonal fixing is active)."); - validParamList->set ("inverse: fixing", false , "Keep diagonal and fix small entries with 1.0"); - - return validParamList; - } - - template - void InverseApproximationFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); +template +RCP +InverseApproximationFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; + + validParamList->set>( + "A", NoFactory::getRCP(), + "Matrix to build the approximate inverse on.\n"); + + validParamList->set("inverse: approximation type", "diagonal", + "Method used to approximate the inverse."); + validParamList->set( + "inverse: drop tolerance", 0.0, + "Values below this threshold are dropped from the matrix (or fixed if " + "diagonal fixing is active)."); + validParamList->set("inverse: fixing", false, + "Keep diagonal and fix small entries with 1.0"); + + return validParamList; +} + +template +void InverseApproximationFactory::DeclareInput(Level ¤tLevel) + const { + Input(currentLevel, "A"); +} + +template +void InverseApproximationFactory::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + using STS = Teuchos::ScalarTraits; + const SC one = STS::one(); + using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; + + const ParameterList &pL = GetParameterList(); + const bool fixing = pL.get("inverse: fixing"); + + // check which approximation type to use + const std::string method = pL.get("inverse: approximation type"); + TEUCHOS_TEST_FOR_EXCEPTION( + method != "diagonal" && method != "lumping" && + method != "sparseapproxinverse", + Exceptions::RuntimeError, + "MueLu::InverseApproximationFactory::Build: Approximation type can be " + "'diagonal' or 'lumping' or " + "'sparseapproxinverse'."); + + RCP A = Get>(currentLevel, "A"); + RCP bA = Teuchos::rcp_dynamic_cast(A); + const bool isBlocked = (bA == Teuchos::null ? false : true); + + // if blocked operator is used, defaults to A(0,0) + if (isBlocked) + A = bA->getMatrix(0, 0); + + const Magnitude tol = pL.get("inverse: drop tolerance"); + RCP Ainv = Teuchos::null; + + if (method == "diagonal") { + const auto diag = VectorFactory::Build(A->getRangeMap(), true); + A->getLocalDiagCopy(*diag); + const RCP D = + (!fixing ? Utilities::GetInverse(diag) + : Utilities::GetInverse(diag, tol, one)); + Ainv = MatrixFactory::Build(D); + } else if (method == "lumping") { + const auto diag = Utilities::GetLumpedMatrixDiagonal(*A); + const RCP D = + (!fixing ? Utilities::GetInverse(diag) + : Utilities::GetInverse(diag, tol, one)); + Ainv = MatrixFactory::Build(D); + } else if (method == "sparseapproxinverse") { + RCP sparsityPattern = + Utilities::GetThresholdedGraph(A, tol, A->getGlobalMaxNumRowEntries()); + GetOStream(Statistics1) + << "NNZ Graph(A): " << A->getCrsGraph()->getGlobalNumEntries() + << " , NNZ Tresholded Graph(A): " + << sparsityPattern->getGlobalNumEntries() << std::endl; + RCP pAinv = GetSparseInverse(A, sparsityPattern); + Ainv = Utilities::GetThresholdedMatrix(pAinv, tol, fixing, + pAinv->getGlobalMaxNumRowEntries()); + GetOStream(Statistics1) + << "NNZ Ainv: " << pAinv->getGlobalNumEntries() + << ", NNZ Tresholded Ainv (parameter: " << tol + << "): " << Ainv->getGlobalNumEntries() << std::endl; } - template - void InverseApproximationFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - using STS = Teuchos::ScalarTraits; - const SC one = STS::one(); - using Magnitude = typename Teuchos::ScalarTraits::magnitudeType; - - const ParameterList& pL = GetParameterList(); - const bool fixing = pL.get("inverse: fixing"); - - // check which approximation type to use - const std::string method = pL.get("inverse: approximation type"); - TEUCHOS_TEST_FOR_EXCEPTION(method != "diagonal" && method != "lumping" && method != "sparseapproxinverse", Exceptions::RuntimeError, - "MueLu::InverseApproximationFactory::Build: Approximation type can be 'diagonal' or 'lumping' or " - "'sparseapproxinverse'."); - - RCP A = Get >(currentLevel, "A"); - RCP bA = Teuchos::rcp_dynamic_cast(A); - const bool isBlocked = (bA == Teuchos::null ? false : true); - - // if blocked operator is used, defaults to A(0,0) - if(isBlocked) A = bA->getMatrix(0,0); - - const Magnitude tol = pL.get("inverse: drop tolerance"); - RCP Ainv = Teuchos::null; - - if(method=="diagonal") - { - const auto diag = VectorFactory::Build(A->getRangeMap(), true); - A->getLocalDiagCopy(*diag); - const RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one)); - Ainv = MatrixFactory::Build(D); - } - else if(method=="lumping") - { - const auto diag = Utilities::GetLumpedMatrixDiagonal(*A); - const RCP D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one)); - Ainv = MatrixFactory::Build(D); + GetOStream(Statistics1) << "Approximate inverse calculated by: " << method + << "." << std::endl; + GetOStream(Statistics1) << "Ainv has " << Ainv->getGlobalNumRows() << "x" + << Ainv->getGlobalNumCols() << " rows and columns." + << std::endl; + + Set(currentLevel, "Ainv", Ainv); +} + +template +RCP> +InverseApproximationFactory::GetSparseInverse(const RCP &Aorg, + const RCP & + sparsityPattern) const { + + // construct the inverse matrix with the given sparsity pattern + RCP Ainv = MatrixFactory::Build(sparsityPattern); + Ainv->resumeFill(); + + // gather missing rows from other procs to generate an overlapping map + RCP rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), + sparsityPattern->getColMap()); + RCP A = MatrixFactory::Build(Aorg, *rowImport); + + // loop over all rows of the inverse sparsity pattern (this can be done in + // parallel) + for (size_t k = 0; k < sparsityPattern->getLocalNumRows(); k++) { + + // 1. get column indices Ik of local row k + ArrayView Ik; + sparsityPattern->getLocalRowView(k, Ik); + + // 2. get all local A(Ik,:) rows + Array> J(Ik.size()); + Array> Ak(Ik.size()); + Array Jk; + for (LO i = 0; i < Ik.size(); i++) { + A->getLocalRowView(Ik[i], J[i], Ak[i]); + for (LO j = 0; j < J[i].size(); j++) + Jk.append(J[i][j]); } - else if(method=="sparseapproxinverse") - { - RCP sparsityPattern = Utilities::GetThresholdedGraph(A, tol, A->getGlobalMaxNumRowEntries()); - GetOStream(Statistics1) << "NNZ Graph(A): " << A->getCrsGraph()->getGlobalNumEntries() << " , NNZ Tresholded Graph(A): " << sparsityPattern->getGlobalNumEntries() << std::endl; - RCP pAinv = GetSparseInverse(A, sparsityPattern); - Ainv = Utilities::GetThresholdedMatrix(pAinv, tol, fixing, pAinv->getGlobalMaxNumRowEntries()); - GetOStream(Statistics1) << "NNZ Ainv: " << pAinv->getGlobalNumEntries() << ", NNZ Tresholded Ainv (parameter: " << tol << "): " << Ainv->getGlobalNumEntries() << std::endl; - } - - GetOStream(Statistics1) << "Approximate inverse calculated by: " << method << "." << std::endl; - GetOStream(Statistics1) << "Ainv has " << Ainv->getGlobalNumRows() << "x" << Ainv->getGlobalNumCols() << " rows and columns." << std::endl; - - Set(currentLevel, "Ainv", Ainv); - } - - template - RCP> - InverseApproximationFactory::GetSparseInverse(const RCP& Aorg, const RCP& sparsityPattern) const { - - // construct the inverse matrix with the given sparsity pattern - RCP Ainv = MatrixFactory::Build(sparsityPattern); - Ainv->resumeFill(); - - // gather missing rows from other procs to generate an overlapping map - RCP rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), sparsityPattern->getColMap()); - RCP A = MatrixFactory::Build(Aorg, *rowImport); - - // loop over all rows of the inverse sparsity pattern (this can be done in parallel) - for(size_t k=0; kgetLocalNumRows(); k++) { - - // 1. get column indices Ik of local row k - ArrayView Ik; - sparsityPattern->getLocalRowView(k, Ik); - - // 2. get all local A(Ik,:) rows - Array> J(Ik.size()); - Array> Ak(Ik.size()); - Array Jk; - for (LO i = 0; i < Ik.size(); i++) { - A->getLocalRowView(Ik[i], J[i], Ak[i]); - for (LO j = 0; j < J[i].size(); j++) - Jk.append(J[i][j]); + // set of unique column indices Jk + std::sort(Jk.begin(), Jk.end()); + Jk.erase(std::unique(Jk.begin(), Jk.end()), Jk.end()); + // create map + std::map G; + for (LO i = 0; i < Jk.size(); i++) + G.insert(std::pair(Jk[i], i)); + + // 3. merge rows together + Teuchos::SerialDenseMatrix localA(Jk.size(), Ik.size(), true); + for (LO i = 0; i < Ik.size(); i++) { + for (LO j = 0; j < J[i].size(); j++) { + localA(G.at(J[i][j]), i) = Ak[i][j]; } - // set of unique column indices Jk - std::sort(Jk.begin(), Jk.end()); - Jk.erase(std::unique(Jk.begin(), Jk.end()), Jk.end()); - // create map - std::map G; - for (LO i = 0; i < Jk.size(); i++) G.insert(std::pair(Jk[i], i)); - - // 3. merge rows together - Teuchos::SerialDenseMatrix localA(Jk.size(), Ik.size(), true); - for (LO i = 0; i < Ik.size(); i++) { - for (LO j = 0; j < J[i].size(); j++) { - localA(G.at(J[i][j]), i) = Ak[i][j]; - } - } - - // 4. get direction-vector - // diagonal needs an entry! - Teuchos::SerialDenseVector ek(Jk.size(), true); - ek[std::find(Jk.begin(), Jk.end(), k) - Jk.begin()] = Teuchos::ScalarTraits::one();; - - // 5. solve linear system for x - Teuchos::SerialDenseVector localX(Ik.size()); - Teuchos::SerialQRDenseSolver qrSolver; - qrSolver.setMatrix(Teuchos::rcp(&localA, false)); - qrSolver.setVectors(Teuchos::rcp(&localX, false), Teuchos::rcp(&ek, false)); - const int err = qrSolver.solve(); - TEUCHOS_TEST_FOR_EXCEPTION(err != 0, Exceptions::RuntimeError, - "MueLu::InverseApproximationFactory::GetSparseInverse: Error in serial QR solve."); - - // 6. set calculated row into Ainv - ArrayView Mk(localX.values(), localX.length()); - Ainv->replaceLocalValues(k, Ik, Mk); - } - Ainv->fillComplete(); - return Ainv; + // 4. get direction-vector + // diagonal needs an entry! + Teuchos::SerialDenseVector ek(Jk.size(), true); + ek[std::find(Jk.begin(), Jk.end(), k) - Jk.begin()] = + Teuchos::ScalarTraits::one(); + ; + + // 5. solve linear system for x + Teuchos::SerialDenseVector localX(Ik.size()); + Teuchos::SerialQRDenseSolver qrSolver; + qrSolver.setMatrix(Teuchos::rcp(&localA, false)); + qrSolver.setVectors(Teuchos::rcp(&localX, false), Teuchos::rcp(&ek, false)); + const int err = qrSolver.solve(); + TEUCHOS_TEST_FOR_EXCEPTION(err != 0, Exceptions::RuntimeError, + "MueLu::InverseApproximationFactory::" + "GetSparseInverse: Error in serial QR solve."); + + // 6. set calculated row into Ainv + ArrayView Mk(localX.values(), localX.length()); + Ainv->replaceLocalValues(k, Ik, Mk); } + Ainv->fillComplete(); + + return Ainv; +} } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp index 9a8207ae430a..5106c8cb231a 100644 --- a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_decl.hpp @@ -47,9 +47,9 @@ #define MUELU_LINEDETECTIONFACTORY_DECL_HPP // same as in SemiCoarsenPFactory (TODO rework this) -#define VERTICAL 1 -#define HORIZONTAL 2 -#define GRID_SUPPLIED -1 +#define VERTICAL 1 +#define HORIZONTAL 2 +#define GRID_SUPPLIED -1 #include "MueLu_ConfigDefs.hpp" #include "MueLu_LineDetectionFactory_fwd.hpp" @@ -59,82 +59,77 @@ namespace MueLu { - /*! - @class LineDetectionFactory class. - @brief Factory for building line detection information - */ - - template - class LineDetectionFactory : public SingleLevelFactoryBase { +/*! + @class LineDetectionFactory class. + @brief Factory for building line detection information +*/ + +template +class LineDetectionFactory : public SingleLevelFactoryBase { #undef MUELU_LINEDETECTIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: +public: + using coordinate_type = typename Teuchos::ScalarTraits::coordinateType; + using CoordinateMultiVector = + typename Xpetra::MultiVector; - using coordinate_type = typename Teuchos::ScalarTraits::coordinateType; - using CoordinateMultiVector = typename Xpetra::MultiVector; + //! @name Constructors/Destructors. + //@{ - //! @name Constructors/Destructors. - //@{ + LineDetectionFactory() : Zorientation_(VERTICAL) {} - LineDetectionFactory() : Zorientation_(VERTICAL) { } + //! Destructor. + virtual ~LineDetectionFactory() {} - //! Destructor. - virtual ~LineDetectionFactory() { } + RCP GetValidParameterList() const; - RCP GetValidParameterList() const; + //@} - //@} + //! Input + //@{ - //! Input - //@{ + void DeclareInput(Level ¤tLevel) const; - void DeclareInput(Level& currentLevel) const; + //@} - //@} + //! @name Build methods. + //@{ - //! @name Build methods. - //@{ - - /*! - @brief Build method. + /*! + @brief Build method. - Builds line detection information and stores it in currentLevel - */ - void Build(Level& currentLevel) const; + Builds line detection information and stores it in currentLevel + */ + void Build(Level ¤tLevel) const; - //@} + //@} - private: - void sort_coordinates(LO numCoords, LO* OrigLoc, - coordinate_type* xvals, - coordinate_type* yvals, - coordinate_type* zvals, - coordinate_type* xtemp, - coordinate_type* ytemp, - coordinate_type* ztemp, - bool flipXY = false) const; +private: + void sort_coordinates(LO numCoords, LO *OrigLoc, coordinate_type *xvals, + coordinate_type *yvals, coordinate_type *zvals, + coordinate_type *xtemp, coordinate_type *ytemp, + coordinate_type *ztemp, bool flipXY = false) const; - LO ML_compute_line_info(LO LayerId[], LO VertLineId[], - LO Ndof, LO DofsPerNode, - LO MeshNumbering, LO NumNodesPerVertLine, - coordinate_type *xvals, coordinate_type *yvals, coordinate_type *zvals, - const Teuchos::Comm& comm ) const ; + LO ML_compute_line_info(LO LayerId[], LO VertLineId[], LO Ndof, + LO DofsPerNode, LO MeshNumbering, + LO NumNodesPerVertLine, coordinate_type *xvals, + coordinate_type *yvals, coordinate_type *zvals, + const Teuchos::Comm &comm) const; - void ML_az_dsort2(coordinate_type dlist[], LO N, LO list2[]) const; + void ML_az_dsort2(coordinate_type dlist[], LO N, LO list2[]) const; - //! internally stores line detection mode - //! can be either vertical, horizontal or coordinates - //! for the first run. On the coarser levels we automatically - //! switch to vertical mode - mutable LO Zorientation_; + //! internally stores line detection mode + //! can be either vertical, horizontal or coordinates + //! for the first run. On the coarser levels we automatically + //! switch to vertical mode + mutable LO Zorientation_; - }; //class LineDetectionFactory +}; // class LineDetectionFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_LINEDETECTIONFACTORY_SHORT #endif // MUELU_LINEDETECTIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp index f16534229ce6..3f98153dfccb 100644 --- a/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_LineDetectionFactory_def.hpp @@ -57,452 +57,570 @@ namespace MueLu { - template - RCP LineDetectionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("linedetection: orientation"); - SET_VALID_ENTRY("linedetection: num layers"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< RCP >("Coordinates", Teuchos::null, "Generating factory for coorindates"); - - return validParamList; - } - - template - void LineDetectionFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - - // The factory needs the information about the number of z-layers. While this information is - // provided by the user for the finest level, the factory itself is responsible to provide the - // corresponding information on the coarser levels. Since a factory cannot be dependent on itself - // we use the NoFactory class as generator class, but remove the UserData keep flag, such that - // "NumZLayers" is part of the request/release mechanism. - // Please note, that this prevents us from having several (independent) CoarsePFactory instances! - // TODO: allow factory to dependent on self-generated data for TwoLevelFactories -> introduce ExpertRequest/Release in Level - currentLevel.DeclareInput("NumZLayers", NoFactory::get(), this); - currentLevel.RemoveKeepFlag("NumZLayers", NoFactory::get(), MueLu::UserData); +template +RCP +LineDetectionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("linedetection: orientation"); + SET_VALID_ENTRY("linedetection: num layers"); +#undef SET_VALID_ENTRY + + validParamList->set>( + "A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>( + "Coordinates", Teuchos::null, "Generating factory for coorindates"); + + return validParamList; +} + +template +void LineDetectionFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); + + // The factory needs the information about the number of z-layers. While this + // information is provided by the user for the finest level, the factory + // itself is responsible to provide the corresponding information on the + // coarser levels. Since a factory cannot be dependent on itself we use the + // NoFactory class as generator class, but remove the UserData keep flag, such + // that "NumZLayers" is part of the request/release mechanism. Please note, + // that this prevents us from having several (independent) CoarsePFactory + // instances! + // TODO: allow factory to dependent on self-generated data for + // TwoLevelFactories -> introduce ExpertRequest/Release in Level + currentLevel.DeclareInput("NumZLayers", NoFactory::get(), this); + currentLevel.RemoveKeepFlag("NumZLayers", NoFactory::get(), MueLu::UserData); +} + +template +void LineDetectionFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "Line detection (Ray style)", currentLevel); + + LO NumZDir = 0; + RCP fineCoords; + ArrayRCP x, y, z; + coordinate_type *xptr = NULL, *yptr = NULL, *zptr = NULL; + + // obtain general variables + RCP A = Get>(currentLevel, "A"); + LO BlkSize = A->GetFixedBlockSize(); + RCP rowMap = A->getRowMap(); + LO Ndofs = rowMap->getLocalNumElements(); + LO Nnodes = Ndofs / BlkSize; + + // collect information provided by user + const ParameterList &pL = GetParameterList(); + const std::string lineOrientation = + pL.get("linedetection: orientation"); + + // interpret "line orientation" parameter provided by the user on the finest + // level + if (currentLevel.GetLevelID() == 0) { + if (lineOrientation == "vertical") + Zorientation_ = VERTICAL; + else if (lineOrientation == "horizontal") + Zorientation_ = HORIZONTAL; + else if (lineOrientation == "coordinates") + Zorientation_ = GRID_SUPPLIED; + else + TEUCHOS_TEST_FOR_EXCEPTION( + false, Exceptions::RuntimeError, + "LineDetectionFactory: The parameter 'semicoarsen: line orientation' " + "must be either 'vertical', 'horizontal' or 'coordinates'."); } - template - void LineDetectionFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Line detection (Ray style)", currentLevel); - - LO NumZDir = 0; - RCP fineCoords; - ArrayRCP x, y, z; - coordinate_type *xptr = NULL, *yptr = NULL, *zptr = NULL; - - // obtain general variables - RCP A = Get< RCP > (currentLevel, "A"); - LO BlkSize = A->GetFixedBlockSize(); - RCP rowMap = A->getRowMap(); - LO Ndofs = rowMap->getLocalNumElements(); - LO Nnodes = Ndofs/BlkSize; - - // collect information provided by user - const ParameterList& pL = GetParameterList(); - const std::string lineOrientation = pL.get("linedetection: orientation"); - - // interpret "line orientation" parameter provided by the user on the finest level - if(currentLevel.GetLevelID() == 0) { - if(lineOrientation=="vertical") - Zorientation_ = VERTICAL; - else if (lineOrientation=="horizontal") - Zorientation_ = HORIZONTAL; - else if (lineOrientation=="coordinates") - Zorientation_ = GRID_SUPPLIED; - else - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: The parameter 'semicoarsen: line orientation' must be either 'vertical', 'horizontal' or 'coordinates'."); - } - - //TEUCHOS_TEST_FOR_EXCEPTION(Zorientation_!=VERTICAL, Exceptions::RuntimeError, "LineDetectionFactory: The 'horizontal' or 'coordinates' have not been tested!!!. Please remove this exception check and carefully test these modes!"); - - // obtain number of z layers (variable over levels) - // This information is user-provided on the finest level and transferred to the coarser - // levels by the SemiCoarsenPFactor using the internal "NumZLayers" variable. - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { - NumZDir = currentLevel.Get("NumZLayers", NoFactory::get()); //obtain info - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information from Level(0))" << std::endl; - } else { - // check whether user provides information or it can be reconstructed from coordinates - NumZDir = pL.get("linedetection: num layers"); - if(NumZDir == -1) { - bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); - - if (CoordsAvail == true) { - // try to reconstruct the number of layers from coordinates - fineCoords = Get< RCP > (currentLevel, "Coordinates"); - TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given."); - x = fineCoords->getDataNonConst(0); - y = fineCoords->getDataNonConst(1); - z = fineCoords->getDataNonConst(2); - xptr = x.getRawPtr(); - yptr = y.getRawPtr(); - zptr = z.getRawPtr(); - - LO NumCoords = Ndofs/BlkSize; - - /* sort coordinates so that we can order things according to lines */ - Teuchos::ArrayRCP TOrigLoc= Teuchos::arcp(NumCoords); LO* OrigLoc= TOrigLoc.getRawPtr(); - Teuchos::ArrayRCP Txtemp = Teuchos::arcp(NumCoords); coordinate_type* xtemp = Txtemp.getRawPtr(); - Teuchos::ArrayRCP Tytemp = Teuchos::arcp(NumCoords); coordinate_type* ytemp = Tytemp.getRawPtr(); - Teuchos::ArrayRCP Tztemp = Teuchos::arcp(NumCoords); coordinate_type* ztemp = Tztemp.getRawPtr(); - - // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can order things according to lines - // switch x and y coordinates for semi-coarsening... - sort_coordinates(NumCoords, OrigLoc, xptr, yptr, zptr, xtemp, ytemp, ztemp, true); - - /* go through each vertical line and populate blockIndices so all */ - /* dofs within a PDE within a vertical line correspond to one block.*/ - LO NumBlocks = 0; - LO NumNodesPerVertLine = 0; - LO index = 0; - - while ( index < NumCoords ) { - coordinate_type xfirst = xtemp[index]; coordinate_type yfirst = ytemp[index]; - LO next = index+1; - while ( (next != NumCoords) && (xtemp[next] == xfirst) && - (ytemp[next] == yfirst)) - next++; - if (NumBlocks == 0) { - NumNodesPerVertLine = next-index; - } - // the number of vertical lines must be the same on all processors - // TAW: Sep 14 2015: or zero as we allow "empty" processors - //TEUCHOS_TEST_FOR_EXCEPTION(next-index != NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works for constant block size now!!!\n"); - NumBlocks++; - index = next; + // TEUCHOS_TEST_FOR_EXCEPTION(Zorientation_!=VERTICAL, + // Exceptions::RuntimeError, "LineDetectionFactory: The 'horizontal' or + // 'coordinates' have not been tested!!!. Please remove this exception check + // and carefully test these modes!"); + + // obtain number of z layers (variable over levels) + // This information is user-provided on the finest level and transferred to + // the coarser levels by the SemiCoarsenPFactor using the internal + // "NumZLayers" variable. + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { + NumZDir = + currentLevel.Get("NumZLayers", NoFactory::get()); // obtain info + GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir + << " (information from Level(0))" << std::endl; + } else { + // check whether user provides information or it can be reconstructed from + // coordinates + NumZDir = pL.get("linedetection: num layers"); + if (NumZDir == -1) { + bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); + + if (CoordsAvail == true) { + // try to reconstruct the number of layers from coordinates + fineCoords = + Get>(currentLevel, "Coordinates"); + TEUCHOS_TEST_FOR_EXCEPTION( + fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, + "Three coordinates arrays must be supplied if line detection " + "orientation not given."); + x = fineCoords->getDataNonConst(0); + y = fineCoords->getDataNonConst(1); + z = fineCoords->getDataNonConst(2); + xptr = x.getRawPtr(); + yptr = y.getRawPtr(); + zptr = z.getRawPtr(); + + LO NumCoords = Ndofs / BlkSize; + + /* sort coordinates so that we can order things according to lines */ + Teuchos::ArrayRCP TOrigLoc = Teuchos::arcp(NumCoords); + LO *OrigLoc = TOrigLoc.getRawPtr(); + Teuchos::ArrayRCP Txtemp = + Teuchos::arcp(NumCoords); + coordinate_type *xtemp = Txtemp.getRawPtr(); + Teuchos::ArrayRCP Tytemp = + Teuchos::arcp(NumCoords); + coordinate_type *ytemp = Tytemp.getRawPtr(); + Teuchos::ArrayRCP Tztemp = + Teuchos::arcp(NumCoords); + coordinate_type *ztemp = Tztemp.getRawPtr(); + + // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that + // we can order things according to lines switch x and y coordinates + // for semi-coarsening... + sort_coordinates(NumCoords, OrigLoc, xptr, yptr, zptr, xtemp, ytemp, + ztemp, true); + + /* go through each vertical line and populate blockIndices so all */ + /* dofs within a PDE within a vertical line correspond to one block.*/ + LO NumBlocks = 0; + LO NumNodesPerVertLine = 0; + LO index = 0; + + while (index < NumCoords) { + coordinate_type xfirst = xtemp[index]; + coordinate_type yfirst = ytemp[index]; + LO next = index + 1; + while ((next != NumCoords) && (xtemp[next] == xfirst) && + (ytemp[next] == yfirst)) + next++; + if (NumBlocks == 0) { + NumNodesPerVertLine = next - index; } - - NumZDir = NumNodesPerVertLine; - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information reconstructed from provided node coordinates)" << std::endl; - } else { - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: BuildP: User has to provide valid number of layers (e.g. using the 'line detection: num layers' parameter)."); + // the number of vertical lines must be the same on all processors + // TAW: Sep 14 2015: or zero as we allow "empty" processors + // TEUCHOS_TEST_FOR_EXCEPTION(next-index != + // NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only + // works for constant block size now!!!\n"); + NumBlocks++; + index = next; } + + NumZDir = NumNodesPerVertLine; + GetOStream(Runtime1) + << "Number of layers for line detection: " << NumZDir + << " (information reconstructed from provided node coordinates)" + << std::endl; } else { - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << " (information provided by user through 'line detection: num layers')" << std::endl; + TEUCHOS_TEST_FOR_EXCEPTION( + false, Exceptions::RuntimeError, + "LineDetectionFactory: BuildP: User has to provide valid number " + "of layers (e.g. using the 'line detection: num layers' " + "parameter)."); } - } // end else (user provides information or can be reconstructed) on finest level - } else { - // coarse level information - // TODO get rid of NoFactory here and use SemiCoarsenPFactory as source of NumZLayers instead. - if(currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { - NumZDir = currentLevel.Get("NumZLayers", NoFactory::get()); //obtain info - GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir << std::endl; } else { - TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, "LineDetectionFactory: BuildP: No NumZLayers variable found. This cannot be."); - } - } - - // plausibility check and further variable collection - if (Zorientation_ == GRID_SUPPLIED) { // On finest level, fetch user-provided coordinates if available... - bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); - - if (CoordsAvail == false) { - if (currentLevel.GetLevelID() == 0) - throw Exceptions::RuntimeError("Coordinates must be supplied if line detection orientation not given."); - else - throw Exceptions::RuntimeError("Coordinates not generated by previous invocation of LineDetectionFactory's BuildP() method."); + GetOStream(Runtime1) + << "Number of layers for line detection: " << NumZDir + << " (information provided by user through 'line detection: num " + "layers')" + << std::endl; } - fineCoords = Get< RCP > (currentLevel, "Coordinates"); - TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given."); - x = fineCoords->getDataNonConst(0); - y = fineCoords->getDataNonConst(1); - z = fineCoords->getDataNonConst(2); - xptr = x.getRawPtr(); - yptr = y.getRawPtr(); - zptr = z.getRawPtr(); - } - - // perform line detection - if (NumZDir > 0) { - LO *LayerId, *VertLineId; - Teuchos::ArrayRCP TLayerId = Teuchos::arcp(Nnodes); LayerId = TLayerId.getRawPtr(); - Teuchos::ArrayRCP TVertLineId= Teuchos::arcp(Nnodes); VertLineId = TVertLineId.getRawPtr(); - - NumZDir = ML_compute_line_info(LayerId, VertLineId, Ndofs, BlkSize, - Zorientation_, NumZDir,xptr,yptr,zptr, *(rowMap->getComm())); - //it is NumZDir=NCLayers*NVertLines*DofsPerNode; - - // store output data on current level - // The line detection data is used by the SemiCoarsenPFactory and the line smoothers in Ifpack/Ifpack2 - Set(currentLevel, "CoarseNumZLayers", NumZDir); - Set(currentLevel, "LineDetection_Layers", TLayerId); - Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); + } // end else (user provides information or can be reconstructed) on finest + // level + } else { + // coarse level information + // TODO get rid of NoFactory here and use SemiCoarsenPFactory as source of + // NumZLayers instead. + if (currentLevel.IsAvailable("NumZLayers", NoFactory::get())) { + NumZDir = + currentLevel.Get("NumZLayers", NoFactory::get()); // obtain info + GetOStream(Runtime1) << "Number of layers for line detection: " << NumZDir + << std::endl; } else { - Teuchos::ArrayRCP TLayerId = Teuchos::arcp(0); - Teuchos::ArrayRCP TVertLineId = Teuchos::arcp(0); - Teuchos::ArrayRCP TVertLineIdSmoo= Teuchos::arcp(0); - - // store output data on current level - // The line detection data is used by the SemiCoarsenPFactory and the line smoothers in Ifpack/Ifpack2 - Set(currentLevel, "CoarseNumZLayers", NumZDir); - Set(currentLevel, "LineDetection_Layers", TLayerId); - Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); + TEUCHOS_TEST_FOR_EXCEPTION(false, Exceptions::RuntimeError, + "LineDetectionFactory: BuildP: No NumZLayers " + "variable found. This cannot be."); } - - // automatically switch to vertical mode on the coarser levels - if(Zorientation_ != VERTICAL) - Zorientation_ = VERTICAL; } - template - LocalOrdinal LineDetectionFactory::ML_compute_line_info(LocalOrdinal LayerId[], LocalOrdinal VertLineId[], LocalOrdinal Ndof, LocalOrdinal DofsPerNode, LocalOrdinal MeshNumbering, LocalOrdinal NumNodesPerVertLine, typename Teuchos::ScalarTraits::coordinateType *xvals, typename Teuchos::ScalarTraits::coordinateType *yvals, typename Teuchos::ScalarTraits::coordinateType *zvals, const Teuchos::Comm& /* comm */) const { - - LO Nnodes, NVertLines, MyNode; - LO NumCoords, next; //, subindex, subnext; - coordinate_type xfirst, yfirst; - coordinate_type *xtemp, *ytemp, *ztemp; - LO *OrigLoc; - LO i,j,count; - LO RetVal; - - RetVal = 0; - if ((MeshNumbering != VERTICAL) && (MeshNumbering != HORIZONTAL)) { - if ( (xvals == NULL) || (yvals == NULL) || (zvals == NULL)) RetVal = -1; - } - else { - if (NumNodesPerVertLine == -1) RetVal = -4; - if ( ((Ndof/DofsPerNode)%NumNodesPerVertLine) != 0) RetVal = -3; - } - if ( (Ndof%DofsPerNode) != 0) RetVal = -2; - - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -1, Exceptions::RuntimeError, "Not semicoarsening as no mesh numbering information or coordinates are given\n"); - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -4, Exceptions::RuntimeError, "Not semicoarsening as the number of z nodes is not given.\n"); - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -3, Exceptions::RuntimeError, "Not semicoarsening as the total number of nodes is not evenly divisible by the number of z direction nodes .\n"); - TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -2, Exceptions::RuntimeError, "Not semicoarsening as something is off with the number of degrees-of-freedom per node.\n"); - - Nnodes = Ndof/DofsPerNode; - for (MyNode = 0; MyNode < Nnodes; MyNode++) VertLineId[MyNode] = -1; - for (MyNode = 0; MyNode < Nnodes; MyNode++) LayerId[MyNode] = -1; - - if (MeshNumbering == VERTICAL) { - for (MyNode = 0; MyNode < Nnodes; MyNode++) { - LayerId[MyNode]= MyNode%NumNodesPerVertLine; - VertLineId[MyNode]= (MyNode- LayerId[MyNode])/NumNodesPerVertLine; - } - } - else if (MeshNumbering == HORIZONTAL) { - NVertLines = Nnodes/NumNodesPerVertLine; - for (MyNode = 0; MyNode < Nnodes; MyNode++) { - VertLineId[MyNode] = MyNode%NVertLines; - LayerId[MyNode] = (MyNode- VertLineId[MyNode])/NVertLines; - } - } - else { - // coordinates mode: we distinguish between vertical line numbering for semi-coarsening and line smoothing - NumCoords = Ndof/DofsPerNode; - - // reserve temporary memory - Teuchos::ArrayRCP TOrigLoc= Teuchos::arcp(NumCoords); OrigLoc= TOrigLoc.getRawPtr(); - Teuchos::ArrayRCP Txtemp = Teuchos::arcp(NumCoords); xtemp = Txtemp.getRawPtr(); - Teuchos::ArrayRCP Tytemp = Teuchos::arcp(NumCoords); ytemp = Tytemp.getRawPtr(); - Teuchos::ArrayRCP Tztemp = Teuchos::arcp(NumCoords); ztemp = Tztemp.getRawPtr(); - - // build vertical line info for semi-coarsening - - // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can order things according to lines - // switch x and y coordinates for semi-coarsening... - sort_coordinates(NumCoords, OrigLoc, xvals, yvals, zvals, xtemp, ytemp, ztemp, /*true*/ true); - - LO NumBlocks = 0; - LO index = 0; - - while ( index < NumCoords ) { - xfirst = xtemp[index]; yfirst = ytemp[index]; - next = index+1; - while ( (next != NumCoords) && (xtemp[next] == xfirst) && - (ytemp[next] == yfirst)) - next++; - if (NumBlocks == 0) { - NumNodesPerVertLine = next-index; - } - // The number of vertical lines must be the same on all processors - // TAW: Sep 14, 2015: or zero as we allow for empty processors. - //TEUCHOS_TEST_FOR_EXCEPTION(next-index != NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works for constant block size now!!!\n"); - count = 0; - for (j= index; j < next; j++) { - VertLineId[OrigLoc[j]] = NumBlocks; - LayerId[OrigLoc[j]] = count++; - } - NumBlocks++; - index = next; - } - } - - /* check that everyone was assigned */ + // plausibility check and further variable collection + if (Zorientation_ == GRID_SUPPLIED) { // On finest level, fetch user-provided + // coordinates if available... + bool CoordsAvail = currentLevel.IsAvailable("Coordinates"); - for (i = 0; i < Nnodes; i++) { - if (VertLineId[i] == -1) { - GetOStream(Warnings1) << "Warning: did not assign " << i << " to a vertical line?????\n" << std::endl; - } - if (LayerId[i] == -1) { - GetOStream(Warnings1) << "Warning: did not assign " << i << " to a Layer?????\n" << std::endl; - } + if (CoordsAvail == false) { + if (currentLevel.GetLevelID() == 0) + throw Exceptions::RuntimeError("Coordinates must be supplied if line " + "detection orientation not given."); + else + throw Exceptions::RuntimeError( + "Coordinates not generated by previous invocation of " + "LineDetectionFactory's BuildP() method."); } + fineCoords = Get>(currentLevel, "Coordinates"); + TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, + Exceptions::RuntimeError, + "Three coordinates arrays must be supplied if " + "line detection orientation not given."); + x = fineCoords->getDataNonConst(0); + y = fineCoords->getDataNonConst(1); + z = fineCoords->getDataNonConst(2); + xptr = x.getRawPtr(); + yptr = y.getRawPtr(); + zptr = z.getRawPtr(); + } - // TAW: Sep 14 2015: relax plausibility checks as we allow for empty processors - //MueLu_maxAll(&comm, NumNodesPerVertLine, i); - //if (NumNodesPerVertLine == -1) NumNodesPerVertLine = i; - //TEUCHOS_TEST_FOR_EXCEPTION(NumNodesPerVertLine != i,Exceptions::RuntimeError, "Different processors have different z direction line lengths?\n"); - - return NumNodesPerVertLine; + // perform line detection + if (NumZDir > 0) { + LO *LayerId, *VertLineId; + Teuchos::ArrayRCP TLayerId = Teuchos::arcp(Nnodes); + LayerId = TLayerId.getRawPtr(); + Teuchos::ArrayRCP TVertLineId = Teuchos::arcp(Nnodes); + VertLineId = TVertLineId.getRawPtr(); + + NumZDir = + ML_compute_line_info(LayerId, VertLineId, Ndofs, BlkSize, Zorientation_, + NumZDir, xptr, yptr, zptr, *(rowMap->getComm())); + // it is NumZDir=NCLayers*NVertLines*DofsPerNode; + + // store output data on current level + // The line detection data is used by the SemiCoarsenPFactory and the line + // smoothers in Ifpack/Ifpack2 + Set(currentLevel, "CoarseNumZLayers", NumZDir); + Set(currentLevel, "LineDetection_Layers", TLayerId); + Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); + } else { + Teuchos::ArrayRCP TLayerId = Teuchos::arcp(0); + Teuchos::ArrayRCP TVertLineId = Teuchos::arcp(0); + Teuchos::ArrayRCP TVertLineIdSmoo = Teuchos::arcp(0); + + // store output data on current level + // The line detection data is used by the SemiCoarsenPFactory and the line + // smoothers in Ifpack/Ifpack2 + Set(currentLevel, "CoarseNumZLayers", NumZDir); + Set(currentLevel, "LineDetection_Layers", TLayerId); + Set(currentLevel, "LineDetection_VertLineIds", TVertLineId); } - /* Private member function to sort coordinates in arrays. This is an expert routine. Do not use or change.*/ - template - void LineDetectionFactory::sort_coordinates(LO numCoords, LO* OrigLoc, - typename Teuchos::ScalarTraits::coordinateType* xvals, - typename Teuchos::ScalarTraits::coordinateType* yvals, - typename Teuchos::ScalarTraits::coordinateType* zvals, - typename Teuchos::ScalarTraits::coordinateType* xtemp, - typename Teuchos::ScalarTraits::coordinateType* ytemp, - typename Teuchos::ScalarTraits::coordinateType* ztemp, - bool flipXY) const { - - if( flipXY == false ) { // for line-smoothing - for (LO i = 0; i < numCoords; i++) xtemp[i]= xvals[i]; - } else { // for semi-coarsening - for (LO i = 0; i < numCoords; i++) xtemp[i]= yvals[i]; + // automatically switch to vertical mode on the coarser levels + if (Zorientation_ != VERTICAL) + Zorientation_ = VERTICAL; +} + +template +LocalOrdinal LineDetectionFactory:: + ML_compute_line_info( + LocalOrdinal LayerId[], LocalOrdinal VertLineId[], LocalOrdinal Ndof, + LocalOrdinal DofsPerNode, LocalOrdinal MeshNumbering, + LocalOrdinal NumNodesPerVertLine, + typename Teuchos::ScalarTraits::coordinateType *xvals, + typename Teuchos::ScalarTraits::coordinateType *yvals, + typename Teuchos::ScalarTraits::coordinateType *zvals, + const Teuchos::Comm & /* comm */) const { + + LO Nnodes, NVertLines, MyNode; + LO NumCoords, next; //, subindex, subnext; + coordinate_type xfirst, yfirst; + coordinate_type *xtemp, *ytemp, *ztemp; + LO *OrigLoc; + LO i, j, count; + LO RetVal; + + RetVal = 0; + if ((MeshNumbering != VERTICAL) && (MeshNumbering != HORIZONTAL)) { + if ((xvals == NULL) || (yvals == NULL) || (zvals == NULL)) + RetVal = -1; + } else { + if (NumNodesPerVertLine == -1) + RetVal = -4; + if (((Ndof / DofsPerNode) % NumNodesPerVertLine) != 0) + RetVal = -3; + } + if ((Ndof % DofsPerNode) != 0) + RetVal = -2; + + TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -1, Exceptions::RuntimeError, + "Not semicoarsening as no mesh numbering " + "information or coordinates are given\n"); + TEUCHOS_TEST_FOR_EXCEPTION( + RetVal == -4, Exceptions::RuntimeError, + "Not semicoarsening as the number of z nodes is not given.\n"); + TEUCHOS_TEST_FOR_EXCEPTION( + RetVal == -3, Exceptions::RuntimeError, + "Not semicoarsening as the total number of nodes is not evenly divisible " + "by the number of z direction nodes .\n"); + TEUCHOS_TEST_FOR_EXCEPTION(RetVal == -2, Exceptions::RuntimeError, + "Not semicoarsening as something is off with the " + "number of degrees-of-freedom per node.\n"); + + Nnodes = Ndof / DofsPerNode; + for (MyNode = 0; MyNode < Nnodes; MyNode++) + VertLineId[MyNode] = -1; + for (MyNode = 0; MyNode < Nnodes; MyNode++) + LayerId[MyNode] = -1; + + if (MeshNumbering == VERTICAL) { + for (MyNode = 0; MyNode < Nnodes; MyNode++) { + LayerId[MyNode] = MyNode % NumNodesPerVertLine; + VertLineId[MyNode] = (MyNode - LayerId[MyNode]) / NumNodesPerVertLine; } - for (LO i = 0; i < numCoords; i++) OrigLoc[i]= i; - - ML_az_dsort2(xtemp,numCoords,OrigLoc); - if( flipXY == false ) { // for line-smoothing - for (LO i = 0; i < numCoords; i++) ytemp[i]= yvals[OrigLoc[i]]; - } else { - for (LO i = 0; i < numCoords; i++) ytemp[i]= xvals[OrigLoc[i]]; + } else if (MeshNumbering == HORIZONTAL) { + NVertLines = Nnodes / NumNodesPerVertLine; + for (MyNode = 0; MyNode < Nnodes; MyNode++) { + VertLineId[MyNode] = MyNode % NVertLines; + LayerId[MyNode] = (MyNode - VertLineId[MyNode]) / NVertLines; } - + } else { + // coordinates mode: we distinguish between vertical line numbering for + // semi-coarsening and line smoothing + NumCoords = Ndof / DofsPerNode; + + // reserve temporary memory + Teuchos::ArrayRCP TOrigLoc = Teuchos::arcp(NumCoords); + OrigLoc = TOrigLoc.getRawPtr(); + Teuchos::ArrayRCP Txtemp = + Teuchos::arcp(NumCoords); + xtemp = Txtemp.getRawPtr(); + Teuchos::ArrayRCP Tytemp = + Teuchos::arcp(NumCoords); + ytemp = Tytemp.getRawPtr(); + Teuchos::ArrayRCP Tztemp = + Teuchos::arcp(NumCoords); + ztemp = Tztemp.getRawPtr(); + + // build vertical line info for semi-coarsening + + // sort coordinates in {x,y,z}vals (returned in {x,y,z}temp) so that we can + // order things according to lines switch x and y coordinates for + // semi-coarsening... + sort_coordinates(NumCoords, OrigLoc, xvals, yvals, zvals, xtemp, ytemp, + ztemp, /*true*/ true); + + LO NumBlocks = 0; LO index = 0; - while ( index < numCoords ) { - coordinate_type xfirst = xtemp[index]; - LO next = index+1; - while ( (next != numCoords) && (xtemp[next] == xfirst)) + while (index < NumCoords) { + xfirst = xtemp[index]; + yfirst = ytemp[index]; + next = index + 1; + while ((next != NumCoords) && (xtemp[next] == xfirst) && + (ytemp[next] == yfirst)) next++; - ML_az_dsort2(&(ytemp[index]),next-index,&(OrigLoc[index])); - for (LO i = index; i < next; i++) ztemp[i]= zvals[OrigLoc[i]]; - /* One final sort so that the ztemps are in order */ - LO subindex = index; - while (subindex != next) { - coordinate_type yfirst = ytemp[subindex]; - LO subnext = subindex+1; - while ( (subnext != next) && (ytemp[subnext] == yfirst)) subnext++; - ML_az_dsort2(&(ztemp[subindex]),subnext-subindex,&(OrigLoc[subindex])); - subindex = subnext; + if (NumBlocks == 0) { + NumNodesPerVertLine = next - index; + } + // The number of vertical lines must be the same on all processors + // TAW: Sep 14, 2015: or zero as we allow for empty processors. + // TEUCHOS_TEST_FOR_EXCEPTION(next-index != + // NumNodesPerVertLine,Exceptions::RuntimeError, "Error code only works + // for constant block size now!!!\n"); + count = 0; + for (j = index; j < next; j++) { + VertLineId[OrigLoc[j]] = NumBlocks; + LayerId[OrigLoc[j]] = count++; } + NumBlocks++; index = next; } - } - /* Sort coordinates and additional array accordingly (if provided). This is an expert routine borrowed from ML. Do not change.*/ - template - void LineDetectionFactory::ML_az_dsort2(typename Teuchos::ScalarTraits::coordinateType dlist[], LocalOrdinal N, LocalOrdinal list2[]) const { - LO l, r, j, i, flag; - LO RR2; - coordinate_type dRR, dK; - - // note: we use that routine for sorting coordinates only. No complex coordinates are assumed... - typedef Teuchos::ScalarTraits STS; - - if (N <= 1) return; + /* check that everyone was assigned */ - l = N / 2 + 1; - r = N - 1; - l = l - 1; - dRR = dlist[l - 1]; - dK = dlist[l - 1]; + for (i = 0; i < Nnodes; i++) { + if (VertLineId[i] == -1) { + GetOStream(Warnings1) + << "Warning: did not assign " << i << " to a vertical line?????\n" + << std::endl; + } + if (LayerId[i] == -1) { + GetOStream(Warnings1) + << "Warning: did not assign " << i << " to a Layer?????\n" + << std::endl; + } + } - if (list2 != NULL) { - RR2 = list2[l - 1]; - while (r != 0) { - j = l; - flag = 1; + // TAW: Sep 14 2015: relax plausibility checks as we allow for empty + // processors + // MueLu_maxAll(&comm, NumNodesPerVertLine, i); + // if (NumNodesPerVertLine == -1) NumNodesPerVertLine = i; + // TEUCHOS_TEST_FOR_EXCEPTION(NumNodesPerVertLine != + // i,Exceptions::RuntimeError, "Different processors have different z + // direction line lengths?\n"); + + return NumNodesPerVertLine; +} + +/* Private member function to sort coordinates in arrays. This is an expert + * routine. Do not use or change.*/ +template +void LineDetectionFactory:: + sort_coordinates( + LO numCoords, LO *OrigLoc, + typename Teuchos::ScalarTraits::coordinateType *xvals, + typename Teuchos::ScalarTraits::coordinateType *yvals, + typename Teuchos::ScalarTraits::coordinateType *zvals, + typename Teuchos::ScalarTraits::coordinateType *xtemp, + typename Teuchos::ScalarTraits::coordinateType *ytemp, + typename Teuchos::ScalarTraits::coordinateType *ztemp, + bool flipXY) const { + + if (flipXY == false) { // for line-smoothing + for (LO i = 0; i < numCoords; i++) + xtemp[i] = xvals[i]; + } else { // for semi-coarsening + for (LO i = 0; i < numCoords; i++) + xtemp[i] = yvals[i]; + } + for (LO i = 0; i < numCoords; i++) + OrigLoc[i] = i; + + ML_az_dsort2(xtemp, numCoords, OrigLoc); + if (flipXY == false) { // for line-smoothing + for (LO i = 0; i < numCoords; i++) + ytemp[i] = yvals[OrigLoc[i]]; + } else { + for (LO i = 0; i < numCoords; i++) + ytemp[i] = xvals[OrigLoc[i]]; + } - while (flag == 1) { - i = j; - j = j + j; + LO index = 0; + + while (index < numCoords) { + coordinate_type xfirst = xtemp[index]; + LO next = index + 1; + while ((next != numCoords) && (xtemp[next] == xfirst)) + next++; + ML_az_dsort2(&(ytemp[index]), next - index, &(OrigLoc[index])); + for (LO i = index; i < next; i++) + ztemp[i] = zvals[OrigLoc[i]]; + /* One final sort so that the ztemps are in order */ + LO subindex = index; + while (subindex != next) { + coordinate_type yfirst = ytemp[subindex]; + LO subnext = subindex + 1; + while ((subnext != next) && (ytemp[subnext] == yfirst)) + subnext++; + ML_az_dsort2(&(ztemp[subindex]), subnext - subindex, + &(OrigLoc[subindex])); + subindex = subnext; + } + index = next; + } +} + +/* Sort coordinates and additional array accordingly (if provided). This is an + * expert routine borrowed from ML. Do not change.*/ +template +void LineDetectionFactory:: + ML_az_dsort2(typename Teuchos::ScalarTraits::coordinateType dlist[], + LocalOrdinal N, LocalOrdinal list2[]) const { + LO l, r, j, i, flag; + LO RR2; + coordinate_type dRR, dK; + + // note: we use that routine for sorting coordinates only. No complex + // coordinates are assumed... + typedef Teuchos::ScalarTraits STS; + + if (N <= 1) + return; + + l = N / 2 + 1; + r = N - 1; + l = l - 1; + dRR = dlist[l - 1]; + dK = dlist[l - 1]; + + if (list2 != NULL) { + RR2 = list2[l - 1]; + while (r != 0) { + j = l; + flag = 1; + + while (flag == 1) { + i = j; + j = j + j; + + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) + j = j + 1; - if (j > r + 1) + if (STS::real(dlist[j - 1]) > STS::real(dK)) { + dlist[i - 1] = dlist[j - 1]; + list2[i - 1] = list2[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) j = j + 1; - - if (STS::real(dlist[j - 1]) > STS::real(dK)) { - dlist[ i - 1] = dlist[ j - 1]; - list2[i - 1] = list2[j - 1]; - } - else { - flag = 0; - } } } - dlist[ i - 1] = dRR; - list2[i - 1] = RR2; - - if (l == 1) { - dRR = dlist [r]; - RR2 = list2[r]; - dK = dlist[r]; - dlist[r ] = dlist[0]; - list2[r] = list2[0]; - r = r - 1; - } - else { - l = l - 1; - dRR = dlist[ l - 1]; - RR2 = list2[l - 1]; - dK = dlist[l - 1]; - } } - dlist[ 0] = dRR; - list2[0] = RR2; + dlist[i - 1] = dRR; + list2[i - 1] = RR2; + + if (l == 1) { + dRR = dlist[r]; + RR2 = list2[r]; + dK = dlist[r]; + dlist[r] = dlist[0]; + list2[r] = list2[0]; + r = r - 1; + } else { + l = l - 1; + dRR = dlist[l - 1]; + RR2 = list2[l - 1]; + dK = dlist[l - 1]; + } } - else { - while (r != 0) { - j = l; - flag = 1; - while (flag == 1) { - i = j; - j = j + j; - if (j > r + 1) + dlist[0] = dRR; + list2[0] = RR2; + } else { + while (r != 0) { + j = l; + flag = 1; + while (flag == 1) { + i = j; + j = j + j; + if (j > r + 1) + flag = 0; + else { + if (j < r + 1) + if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) + j = j + 1; + if (STS::real(dlist[j - 1]) > STS::real(dK)) { + dlist[i - 1] = dlist[j - 1]; + } else { flag = 0; - else { - if (j < r + 1) - if (STS::real(dlist[j]) > STS::real(dlist[j - 1])) j = j + 1; - if (STS::real(dlist[j - 1]) > STS::real(dK)) { - dlist[ i - 1] = dlist[ j - 1]; - } - else { - flag = 0; - } } } - dlist[ i - 1] = dRR; - if (l == 1) { - dRR = dlist [r]; - dK = dlist[r]; - dlist[r ] = dlist[0]; - r = r - 1; - } - else { - l = l - 1; - dRR = dlist[ l - 1]; - dK = dlist[l - 1]; - } } - dlist[ 0] = dRR; + dlist[i - 1] = dRR; + if (l == 1) { + dRR = dlist[r]; + dK = dlist[r]; + dlist[r] = dlist[0]; + r = r - 1; + } else { + l = l - 1; + dRR = dlist[l - 1]; + dK = dlist[l - 1]; + } } - + dlist[0] = dRR; } -} //namespace MueLu +} +} // namespace MueLu #endif // MUELU_LINEDETECTIONFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp index 0fb3650ef6a2..1586977d24f9 100644 --- a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_decl.hpp @@ -48,8 +48,8 @@ #include "MueLu_ConfigDefs.hpp" #include "MueLu_TwoLevelFactoryBase.hpp" -#include "Xpetra_MultiVector_fwd.hpp" #include "Xpetra_CrsGraph_fwd.hpp" +#include "Xpetra_MultiVector_fwd.hpp" #include "MueLu_LocalOrdinalTransferFactory_fwd.hpp" @@ -57,108 +57,118 @@ namespace MueLu { /*! @class LocalOrdinalTransferFactory class. - @brief Class for transferring a vector of local ordinals from a finer level to a coarser one, where each aggregate has a unique one - + @brief Class for transferring a vector of local ordinals from a finer level to + a coarser one, where each aggregate has a unique one + ## Input/output of LocalOrdinalTransferFactory ## ### User parameters of LocalOrdinalTransferFactory ### Parameter | type | default | master.xml | validated | requested | description ----------|------|---------|:----------:|:---------:|:---------:|------------ - | TransferVec| Factory | null | | * | (*) | Factory providing vector to be transered - | Aggregates | Factory | null | | * | (*) | Factory providing aggregates - | CoarseMap | Factory | null | | * | (*) | Generating factory of the coarse map - | write start| int | -1 | | * | | first level at which coordinates should be written to file - | write end | int | -1 | | * | | last level at which coordinates should be written to file - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see LocalOrdinalTransferFactory::GetValidParameters).
- The * in the @c requested column states that the data is requested as input with all dependencies (see LocalOrdinalTransferFactory::DeclareInput). - - The LocalOrdinalTransferFact first checks whether there is already valid coarse TransferVec information - available on the coarse level. If that is the case, we can skip the coarse TransferVec generation and just reuse - the available information. - Otherwise we try to build coarse grid TransverVec by using the information about the - aggregates, the fine level TransferVec and the coarse map information. + | TransferVec| Factory | null | | * | (*) | Factory providing vector to be + transered | Aggregates | Factory | null | | * | (*) | Factory providing + aggregates | CoarseMap | Factory | null | | * | (*) | Generating factory of + the coarse map | write start| int | -1 | | * | | first level at + which coordinates should be written to file | write end | int | -1 | | + * | | last level at which coordinates should be written to file + + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see + LocalOrdinalTransferFactory::GetValidParameters).
The * in the @c + requested column states that the data is requested as input with all + dependencies (see LocalOrdinalTransferFactory::DeclareInput). + + The LocalOrdinalTransferFact first checks whether there is already valid + coarse TransferVec information available on the coarse level. If that is the + case, we can skip the coarse TransferVec generation and just reuse the + available information. Otherwise we try to build coarse grid TransverVec by + using the information about the aggregates, the fine level TransferVec and the + coarse map information. ### Variables provided by LocalOrdinalTransferFactory ### - After LocalOrdinalTransferFactory::Build the following data is available (if requested) + After LocalOrdinalTransferFactory::Build the following data is available (if + requested) Parameter | generated by | description ----------|--------------|------------ | TransferVec | LocalOrdinalTransferFactory | coarse level transfervec */ - - - template - class LocalOrdinalTransferFactory : public TwoLevelFactoryBase { +template +class LocalOrdinalTransferFactory : public TwoLevelFactoryBase { #undef MUELU_LOCALORDINALTRANSFERFACTORY_SHORT #include "MueLu_UseShortNamesOrdinal.hpp" - public: - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - // Default constructor is distabled - LocalOrdinalTransferFactory() = delete; + // Default constructor is distabled + LocalOrdinalTransferFactory() = delete; - /*! @brief Constructor. + /*! @brief Constructor. - @param vectorName The name of the quantity to be restricted. - @param restrictionName The name of the restriction Matrix. + @param vectorName The name of the quantity to be restricted. + @param restrictionName The name of the restriction Matrix. - The operator associated with projectionName will be applied to the MultiVector associated with - vectorName. - */ - LocalOrdinalTransferFactory(const std::string & TransferVecName, const std::string & mode): TransferVecName_(TransferVecName) { - if(mode == "classical") useAggregatesMode_ = false; - else useAggregatesMode_ = true; - } + The operator associated with projectionName will be applied to the + MultiVector associated with vectorName. + */ + LocalOrdinalTransferFactory(const std::string &TransferVecName, + const std::string &mode) + : TransferVecName_(TransferVecName) { + if (mode == "classical") + useAggregatesMode_ = false; + else + useAggregatesMode_ = true; + } - //! Destructor. - virtual ~LocalOrdinalTransferFactory() { } + //! Destructor. + virtual ~LocalOrdinalTransferFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + /*! @brief Specifies the data that this class needs, and the factories that + generate that data. - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + If the Build method of this class requires some data, but the generating + factory is not specified in DeclareInput, then this class will fall back to + the settings in FactoryManager. + */ + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - private: +private: + void BuildAggregates(Level &fineLevel, Level &coarseLevel) const; - void BuildAggregates(Level & fineLevel, Level &coarseLevel) const; + void BuildFC(Level &fineLevel, Level &coarseLevel) const; - void BuildFC(Level & fineLevel, Level &coarseLevel) const; - - //! Use aggregates mode (as opposed to FC mode) - bool useAggregatesMode_; + //! Use aggregates mode (as opposed to FC mode) + bool useAggregatesMode_; - //! The name for the vector to be transfered. This allows us to have multiple factories for different variables - std::string TransferVecName_; + //! The name for the vector to be transfered. This allows us to have multiple + //! factories for different variables + std::string TransferVecName_; - }; // class LocalOrdinalTransferFactory +}; // class LocalOrdinalTransferFactory } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp index d870306b54b6..70dfc60e8da4 100644 --- a/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_LocalOrdinalTransferFactory_def.hpp @@ -46,10 +46,10 @@ #ifndef MUELU_LOCALORDINALTRANSFER_FACTORY_DEF_HPP #define MUELU_LOCALORDINALTRANSFER_FACTORY_DEF_HPP +#include "Xpetra_CrsGraph.hpp" #include "Xpetra_ImportFactory.hpp" -#include "Xpetra_VectorFactory.hpp" #include "Xpetra_MapFactory.hpp" -#include "Xpetra_CrsGraph.hpp" +#include "Xpetra_VectorFactory.hpp" #include "Xpetra_IO.hpp" @@ -61,201 +61,217 @@ namespace MueLu { - template - RCP LocalOrdinalTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); +template +RCP +LocalOrdinalTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); - validParamList->set >(TransferVecName_, Teuchos::null, "Factory for TransferVec generation"); - validParamList->set >("P Graph", Teuchos::null, "Factory for P generation"); - validParamList->set >("Aggregates", Teuchos::null, "Factory for aggregates generation"); - validParamList->set >("CoarseMap", Teuchos::null, "Generating factory of the coarse map"); + validParamList->set>( + TransferVecName_, Teuchos::null, "Factory for TransferVec generation"); + validParamList->set>("P Graph", Teuchos::null, + "Factory for P generation"); + validParamList->set>( + "Aggregates", Teuchos::null, "Factory for aggregates generation"); + validParamList->set>( + "CoarseMap", Teuchos::null, "Generating factory of the coarse map"); - return validParamList; - } + return validParamList; +} + +template +void LocalOrdinalTransferFactory::DeclareInput(Level &fineLevel, + Level &coarseLevel) const { + static bool isAvailableXfer = false; + if (coarseLevel.GetRequestMode() == Level::REQUEST) { + isAvailableXfer = coarseLevel.IsAvailable(TransferVecName_, this); + if (isAvailableXfer == false) { + Input(fineLevel, TransferVecName_); + Input(fineLevel, "CoarseMap"); - template - void LocalOrdinalTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - static bool isAvailableXfer = false; - if (coarseLevel.GetRequestMode() == Level::REQUEST) { - isAvailableXfer = coarseLevel.IsAvailable(TransferVecName_, this); - if (isAvailableXfer == false) { - Input(fineLevel, TransferVecName_); - Input(fineLevel, "CoarseMap"); - - if(useAggregatesMode_) - Input(fineLevel, "Aggregates"); - else { - Input(coarseLevel, "P Graph"); - } + if (useAggregatesMode_) + Input(fineLevel, "Aggregates"); + else { + Input(coarseLevel, "P Graph"); } } - } +} + +template +void LocalOrdinalTransferFactory::Build( + Level &fineLevel, Level &coarseLevel) const { + if (useAggregatesMode_) + BuildAggregates(fineLevel, coarseLevel); + else + BuildFC(fineLevel, coarseLevel); +} + +template +void LocalOrdinalTransferFactory::BuildFC( + Level &fineLevel, Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); - template - void LocalOrdinalTransferFactory::Build(Level & fineLevel, Level &coarseLevel) const { - if(useAggregatesMode_) BuildAggregates(fineLevel,coarseLevel); - else BuildFC(fineLevel,coarseLevel); + GetOStream(Runtime0) << "Transferring " << TransferVecName_ << std::endl; + LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + if (coarseLevel.IsAvailable(TransferVecName_, this)) { + GetOStream(Runtime0) << "Reusing " << TransferVecName_ << std::endl; + return; } - template - void LocalOrdinalTransferFactory::BuildFC(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); + // Get everything we need + RCP P = Get>(coarseLevel, "P Graph"); + RCP fineTV = + Get>(fineLevel, TransferVecName_); + RCP coarseMap = Get>(fineLevel, "CoarseMap"); + RCP uniqueMap = fineTV->getMap(); + ArrayRCP fineData = fineTV->getData(0); - GetOStream(Runtime0) << "Transferring " <::invalid(); + // Allocate new LO Vector + RCP coarseTV = + LocalOrdinalVectorFactory::Build(coarseMap, 1); + ArrayRCP coarseData = coarseTV->getDataNonConst(0); - if (coarseLevel.IsAvailable(TransferVecName_, this)) { - GetOStream(Runtime0) << "Reusing "< P = Get< RCP >(coarseLevel,"P Graph"); - RCP fineTV = Get< RCP >(fineLevel, TransferVecName_); - RCP coarseMap = Get< RCP > (fineLevel, "CoarseMap"); - RCP uniqueMap = fineTV->getMap(); - ArrayRCP fineData = fineTV->getData(0); - - // Allocate new LO Vector - RCP coarseTV = LocalOrdinalVectorFactory::Build(coarseMap,1); - ArrayRCP coarseData = coarseTV->getDataNonConst(0); - - // Invalidate everything first, to check for errors - for(LO i=0; igetDomainMap()->getLocalNumElements(); - for (LO row=0; row<(LO)P->getLocalNumRows(); row++) { - LO fineNumber = fineData[row]; - ArrayView indices; - P->getLocalRowView(row,indices); - - for(LO j=0; j<(LO)indices.size(); j++) { - LO col = indices[j]; - if (col >= domMapNumElements) { - // skip off rank entries of P - } else { - coarseData[col] = fineNumber; - } + // Fill in coarse TV + LO domMapNumElements = P->getDomainMap()->getLocalNumElements(); + for (LO row = 0; row < (LO)P->getLocalNumRows(); row++) { + LO fineNumber = fineData[row]; + ArrayView indices; + P->getLocalRowView(row, indices); + + for (LO j = 0; j < (LO)indices.size(); j++) { + LO col = indices[j]; + if (col >= domMapNumElements) { + // skip off rank entries of P + } else { + coarseData[col] = fineNumber; } } + } #ifdef HAVE_MUELU_DEBUG - size_t error_count = 0; - { - RCP coarseTVghosted; - RCP importer = P->getImporter(); - if (!importer.is_null()) { - coarseTVghosted = LocalOrdinalVectorFactory::Build(P->getColMap(),1); - coarseTVghosted->doImport(*coarseTV, *importer, Xpetra::INSERT); - } else { - coarseTVghosted = coarseTV; - } - ArrayRCP coarseDataGhosted = coarseTVghosted->getDataNonConst(0); - for (LO col=0; col<(LO)P->getColMap()->getLocalNumElements(); col++) { - if (coarseDataGhosted[col] == LO_INVALID) + size_t error_count = 0; + { + RCP coarseTVghosted; + RCP importer = P->getImporter(); + if (!importer.is_null()) { + coarseTVghosted = LocalOrdinalVectorFactory::Build(P->getColMap(), 1); + coarseTVghosted->doImport(*coarseTV, *importer, Xpetra::INSERT); + } else { + coarseTVghosted = coarseTV; + } + ArrayRCP coarseDataGhosted = coarseTVghosted->getDataNonConst(0); + for (LO col = 0; col < (LO)P->getColMap()->getLocalNumElements(); col++) { + if (coarseDataGhosted[col] == LO_INVALID) + error_count++; + } + for (LO row = 0; row < (LO)P->getLocalNumRows(); row++) { + LO fineNumber = fineData[row]; + ArrayView indices; + P->getLocalRowView(row, indices); + for (LO j = 0; j < (LO)indices.size(); j++) { + if (coarseDataGhosted[indices[j]] != fineNumber) error_count++; } - for (LO row=0; row<(LO)P->getLocalNumRows(); row++) { - LO fineNumber = fineData[row]; - ArrayView indices; - P->getLocalRowView(row,indices); - for(LO j=0; j<(LO)indices.size(); j++) { - if (coarseDataGhosted[indices[j]] != fineNumber) - error_count++; - } - } } + } - // Error checking: All nodes in an aggregate must share a local ordinal - if(error_count > 0) { - std::ostringstream ofs; - ofs << "LocalOrdinalTransferFactory("< 0) { + std::ostringstream ofs; + ofs << "LocalOrdinalTransferFactory(" << TransferVecName_ + << "): ERROR: Each coarse dof must have a unique LO value. We had " + << std::to_string(error_count) << " unknowns that did not match."; + throw std::runtime_error(ofs.str()); + } #endif - - Set >(coarseLevel, TransferVecName_, coarseTV); + Set>(coarseLevel, TransferVecName_, coarseTV); +} + +template +void LocalOrdinalTransferFactory::BuildAggregates(Level &fineLevel, + Level &coarseLevel) + const { + FactoryMonitor m(*this, "Build", coarseLevel); + + GetOStream(Runtime0) << "Transferring " << TransferVecName_ << std::endl; + RCP coarseTV; + RCP fineTV; + LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + + if (coarseLevel.IsAvailable(TransferVecName_, this)) { + GetOStream(Runtime0) << "Reusing " << TransferVecName_ << std::endl; + return; } - + RCP aggregates = Get>(fineLevel, "Aggregates"); + fineTV = Get>(fineLevel, TransferVecName_); + RCP coarseMap = Get>(fineLevel, "CoarseMap"); + RCP uniqueMap = fineTV->getMap(); - template - void LocalOrdinalTransferFactory::BuildAggregates(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); + ArrayView elementAList = coarseMap->getLocalElementList(); - GetOStream(Runtime0) << "Transferring " < coarseTV; - RCP fineTV; - LO LO_INVALID = Teuchos::OrdinalTraits::invalid(); + coarseTV = LocalOrdinalVectorFactory::Build(coarseMap, 1); - if (coarseLevel.IsAvailable(TransferVecName_, this)) { - GetOStream(Runtime0) << "Reusing "< aggregates = Get< RCP > (fineLevel, "Aggregates"); - fineTV = Get< RCP >(fineLevel, TransferVecName_); - RCP coarseMap = Get< RCP > (fineLevel, "CoarseMap"); - RCP uniqueMap = fineTV->getMap(); - - ArrayView elementAList = coarseMap->getLocalElementList(); - - coarseTV = LocalOrdinalVectorFactory::Build(coarseMap,1); - - // Create overlapped fine TV to reduce global communication - RCP ghostedTV = fineTV; - if (aggregates->AggregatesCrossProcessors()) { - - RCP nonUniqueMap = aggregates->GetMap(); - RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); - - ghostedTV = LocalOrdinalVectorFactory::Build(nonUniqueMap, 1); - ghostedTV->doImport(*fineTV, *importer, Xpetra::INSERT); - } - - // Get some info about aggregates - int myPID = uniqueMap->getComm()->getRank(); - ArrayRCP aggSizes = aggregates->ComputeAggregateSizesArrayRCP(); - const ArrayRCP vertex2AggID = aggregates->GetVertex2AggId()->getData(0); - const ArrayRCP procWinner = aggregates->GetProcWinner()->getData(0); - - - ArrayRCP fineData = ghostedTV->getData(0); - ArrayRCP coarseData = coarseTV->getDataNonConst(0); - - // Invalidate everything first, to check for errors - for(LO i=0; i ghostedTV = fineTV; + if (aggregates->AggregatesCrossProcessors()) { + + RCP nonUniqueMap = aggregates->GetMap(); + RCP importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + + ghostedTV = LocalOrdinalVectorFactory::Build(nonUniqueMap, 1); + ghostedTV->doImport(*fineTV, *importer, Xpetra::INSERT); + } - // Error checking: All nodes in an aggregate must share a local ordinal - if(error_count > 0) { - std::ostringstream ofs; - ofs << "LocalOrdinalTransferFactory: ERROR: Each aggregate must have a unique LO value. We had "<getComm()->getRank(); + ArrayRCP aggSizes = aggregates->ComputeAggregateSizesArrayRCP(); + const ArrayRCP vertex2AggID = + aggregates->GetVertex2AggId()->getData(0); + const ArrayRCP procWinner = aggregates->GetProcWinner()->getData(0); + + ArrayRCP fineData = ghostedTV->getData(0); + ArrayRCP coarseData = coarseTV->getDataNonConst(0); + + // Invalidate everything first, to check for errors + for (LO i = 0; i < coarseData.size(); i++) + coarseData[i] = LO_INVALID; + + // Fill in coarse TV + size_t error_count = 0; + for (LO lnode = 0; lnode < vertex2AggID.size(); lnode++) { + if (procWinner[lnode] == myPID && + // lnode < vertex2AggID.size() && + lnode < fineData.size() && // TAW do not access off-processor data + vertex2AggID[lnode] < coarseData.size()) { + if (coarseData[vertex2AggID[lnode]] == LO_INVALID) + coarseData[vertex2AggID[lnode]] = fineData[lnode]; + if (coarseData[vertex2AggID[lnode]] != fineData[lnode]) + error_count++; } - - Set >(coarseLevel, TransferVecName_, coarseTV); + } + // Error checking: All nodes in an aggregate must share a local ordinal + if (error_count > 0) { + std::ostringstream ofs; + ofs << "LocalOrdinalTransferFactory: ERROR: Each aggregate must have a " + "unique LO value. We had " + << std::to_string(error_count) << " unknowns that did not match."; + throw std::runtime_error(ofs.str()); } + Set>(coarseLevel, TransferVecName_, coarseTV); +} + } // namespace MueLu #endif // MUELU_LOCALORDINALTRANSFER_FACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp index 37ff1ce0e56e..c9acd283f1c2 100644 --- a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_decl.hpp @@ -56,141 +56,143 @@ namespace MueLu { - /*! - @class LowPrecisionFactory class. - @brief Factory for converting matrices to half precision operators - */ - - template - class LowPrecisionFactory : public SingleLevelFactoryBase { +/*! + @class LowPrecisionFactory class. + @brief Factory for converting matrices to half precision operators +*/ + +template +class LowPrecisionFactory : public SingleLevelFactoryBase { #undef MUELU_LOWPRECISIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - LowPrecisionFactory() { } + LowPrecisionFactory() {} - //! Destructor. - virtual ~LowPrecisionFactory() { } + //! Destructor. + virtual ~LowPrecisionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. - - Converts a matrix to half precision operators and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + /*! + @brief Build method. - //@} + Converts a matrix to half precision operators and returns it in + currentLevel. + */ + void Build(Level ¤tLevel) const; - }; //class LowPrecisionFactory + //@} +}; // class LowPrecisionFactory #if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) - template - class LowPrecisionFactory : public SingleLevelFactoryBase { - typedef double Scalar; +template +class LowPrecisionFactory + : public SingleLevelFactoryBase { + typedef double Scalar; #undef MUELU_LOWPRECISIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - LowPrecisionFactory() { } + LowPrecisionFactory() {} - //! Destructor. - virtual ~LowPrecisionFactory() { } + //! Destructor. + virtual ~LowPrecisionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. + /*! + @brief Build method. - Converts a matrix to half precision operators and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + Converts a matrix to half precision operators and returns it in + currentLevel. + */ + void Build(Level ¤tLevel) const; - //@} + //@} - }; //class LowPrecisionFactory +}; // class LowPrecisionFactory #endif - -#if defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT) - template - class LowPrecisionFactory,LocalOrdinal,GlobalOrdinal,Node> : public SingleLevelFactoryBase { - typedef std::complex Scalar; +#if defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && \ + defined(HAVE_TPETRA_INST_COMPLEX_FLOAT) +template +class LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, + Node> : public SingleLevelFactoryBase { + typedef std::complex Scalar; #undef MUELU_LOWPRECISIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - LowPrecisionFactory() { } + LowPrecisionFactory() {} - //! Destructor. - virtual ~LowPrecisionFactory() { } + //! Destructor. + virtual ~LowPrecisionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. + /*! + @brief Build method. - Converts a matrix to half precision operators and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + Converts a matrix to half precision operators and returns it in + currentLevel. + */ + void Build(Level ¤tLevel) const; - //@} + //@} - }; //class LowPrecisionFactory +}; // class LowPrecisionFactory #endif - -} //namespace MueLu +} // namespace MueLu #define MUELU_LOWPRECISIONFACTORY_SHORT #endif // MUELU_LOWPRECISIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp index 5182d762e949..071427ab10bc 100644 --- a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp @@ -46,152 +46,202 @@ #ifndef MUELU_LOWPRECISIONFACTORY_DEF_HPP #define MUELU_LOWPRECISIONFACTORY_DEF_HPP +#include #include #include #include -#include #include "MueLu_LowPrecisionFactory_decl.hpp" #include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" - namespace MueLu { - template - RCP LowPrecisionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set("matrix key", "A", ""); - validParamList->set< RCP >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - - return validParamList; - } - - template - void LowPrecisionFactory::DeclareInput(Level& currentLevel) const { - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - Input(currentLevel, matrixKey); - } - - template - void LowPrecisionFactory::Build(Level& currentLevel) const { - using Teuchos::ParameterList; - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - - FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); - - RCP A = Get< RCP >(currentLevel, matrixKey); - - GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; - Set(currentLevel, matrixKey, A); - } - +template +RCP +LowPrecisionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set("matrix key", "A", ""); + validParamList->set>( + "R", Teuchos::null, + "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set>( + "A", Teuchos::null, + "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set>( + "P", Teuchos::null, + "Generating factory of the matrix A to be converted to lower precision"); + + return validParamList; +} + +template +void LowPrecisionFactory::DeclareInput(Level ¤tLevel) const { + + const ParameterList &pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + Input(currentLevel, matrixKey); +} + +template +void LowPrecisionFactory::Build( + Level ¤tLevel) const { + using Teuchos::ParameterList; + + const ParameterList &pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + + FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", + currentLevel); + + RCP A = Get>(currentLevel, matrixKey); + + GetOStream(Warnings) + << "Matrix not converted to half precision. This only works for Tpetra " + "and when both Scalar and HalfScalar have been instantiated." + << std::endl; + Set(currentLevel, matrixKey, A); +} #if defined(HAVE_TPETRA_INST_DOUBLE) && defined(HAVE_TPETRA_INST_FLOAT) - template - RCP LowPrecisionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set("matrix key", "A", ""); - validParamList->set< RCP >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - - return validParamList; +template +RCP +LowPrecisionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set("matrix key", "A", ""); + validParamList->set>( + "R", Teuchos::null, + "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set>( + "A", Teuchos::null, + "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set>( + "P", Teuchos::null, + "Generating factory of the matrix A to be converted to lower precision"); + + return validParamList; +} + +template +void LowPrecisionFactory::DeclareInput(Level ¤tLevel) const { + + const ParameterList &pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + Input(currentLevel, matrixKey); +} + +template +void LowPrecisionFactory::Build( + Level ¤tLevel) const { + using Teuchos::ParameterList; + using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; + + const ParameterList &pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + + FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", + currentLevel); + + RCP A = Get>(currentLevel, matrixKey); + + if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && + std::is_same::value) { + auto tpA = rcp_dynamic_cast( + rcp_dynamic_cast(A)->getCrsMatrix(), true) + ->getTpetra_CrsMatrix(); + auto tpLowA = tpA->template convert(); + auto tpLowOpA = + rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); + auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); + auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); + Set(currentLevel, matrixKey, xpLowOpA); + return; } - template - void LowPrecisionFactory::DeclareInput(Level& currentLevel) const { - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - Input(currentLevel, matrixKey); - } - - template - void LowPrecisionFactory::Build(Level& currentLevel) const { - using Teuchos::ParameterList; - using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - - FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); - - RCP A = Get< RCP >(currentLevel, matrixKey); - - if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same::value) { - auto tpA = rcp_dynamic_cast(rcp_dynamic_cast(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix(); - auto tpLowA = tpA->template convert(); - auto tpLowOpA = rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); - auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); - auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); - Set(currentLevel, matrixKey, xpLowOpA); - return; - } - - GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; - Set(currentLevel, matrixKey, A); - } + GetOStream(Warnings) + << "Matrix not converted to half precision. This only works for Tpetra " + "and when both Scalar and HalfScalar have been instantiated." + << std::endl; + Set(currentLevel, matrixKey, A); +} #endif - -#if defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && defined(HAVE_TPETRA_INST_COMPLEX_FLOAT) - template - RCP LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set("matrix key", "A", ""); - validParamList->set< RCP >("R", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - validParamList->set< RCP >("P", Teuchos::null, "Generating factory of the matrix A to be converted to lower precision"); - - return validParamList; +#if defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) && \ + defined(HAVE_TPETRA_INST_COMPLEX_FLOAT) +template +RCP +LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, + Node>::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set("matrix key", "A", ""); + validParamList->set>( + "R", Teuchos::null, + "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set>( + "A", Teuchos::null, + "Generating factory of the matrix A to be converted to lower precision"); + validParamList->set>( + "P", Teuchos::null, + "Generating factory of the matrix A to be converted to lower precision"); + + return validParamList; +} + +template +void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, + Node>::DeclareInput(Level ¤tLevel) const { + + const ParameterList &pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + Input(currentLevel, matrixKey); +} + +template +void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, + Node>::Build(Level ¤tLevel) const { + using Teuchos::ParameterList; + using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; + + const ParameterList &pL = GetParameterList(); + std::string matrixKey = pL.get("matrix key"); + + FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", + currentLevel); + + RCP A = Get>(currentLevel, matrixKey); + + if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && + std::is_same>::value) { + auto tpA = rcp_dynamic_cast( + rcp_dynamic_cast(A)->getCrsMatrix(), true) + ->getTpetra_CrsMatrix(); + auto tpLowA = tpA->template convert(); + auto tpLowOpA = + rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); + auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); + auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); + Set(currentLevel, matrixKey, xpLowOpA); + return; } - template - void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& currentLevel) const { - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - Input(currentLevel, matrixKey); - } - - template - void LowPrecisionFactory, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { - using Teuchos::ParameterList; - using HalfScalar = typename Teuchos::ScalarTraits::halfPrecision; - - const ParameterList& pL = GetParameterList(); - std::string matrixKey = pL.get("matrix key"); - - FactoryMonitor m(*this, "Converting " + matrixKey + " to half precision", currentLevel); - - RCP A = Get< RCP >(currentLevel, matrixKey); - - if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same >::value) { - auto tpA = rcp_dynamic_cast(rcp_dynamic_cast(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix(); - auto tpLowA = tpA->template convert(); - auto tpLowOpA = rcp(new Tpetra::CrsMatrixMultiplyOp(tpLowA)); - auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA)); - auto xpLowOpA = rcp_dynamic_cast(xpTpLowOpA); - Set(currentLevel, matrixKey, xpLowOpA); - return; - } - - GetOStream(Warnings) << "Matrix not converted to half precision. This only works for Tpetra and when both Scalar and HalfScalar have been instantiated." << std::endl; - Set(currentLevel, matrixKey, A); - } + GetOStream(Warnings) + << "Matrix not converted to half precision. This only works for Tpetra " + "and when both Scalar and HalfScalar have been instantiated." + << std::endl; + Set(currentLevel, matrixKey, A); +} #endif -} //namespace MueLu +} // namespace MueLu #endif // MUELU_LOWPRECISIONFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp index 5ca98e15b840..5569bd5982f5 100644 --- a/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_MapTransferFactory_decl.hpp @@ -52,87 +52,96 @@ namespace MueLu { - /*! - @class MapTransferFactory class. - @brief Factory to transfer a map from a fine to a coarse level - - Factory that transfers a map (given by a variable name and a generating factory) for building - a coarse version of the map. The coarse map is stored on the coarse level using the same variable name - and generating factory than the original fine level map. - - The transfer is based on the prolongator maps. The prolongator/nullspace can also contain rotational modes, - that might not be of interest for the map of interest. Use the option "nullspace vectors: limit to" to exclude such modes. - - ## Input/output ## - - ### User parameters ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - map: name | string | "" | | * | * | Name of the map - map: factory | string | "null" | | * | * | Name of the generating factory - P | Factory | null | | * | * | Generating factory of prolongator - nullspace vectors: limit to | string | "all" | | * | * | Use only these nullspace vectors/columns of P to transfer the map (e.g. to drop rotations) - - The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.
- The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see @c GetValidParameters() ).
- The * in the @c requested column states that the data is requested as input with all dependencies (see @c DeclareInput() ). - - ### Variables provided by this factory ### - - After \c Build() , the following data is available (if requested): - - Parameter | generated by | description - ----------|--------------|------------ - | map: name | MapTransferFactory | Coarse version of the input map - - */ - - template - class MapTransferFactory : public TwoLevelFactoryBase { +/*! + @class MapTransferFactory class. + @brief Factory to transfer a map from a fine to a coarse level + + Factory that transfers a map (given by a variable name and a generating + factory) for building a coarse version of the map. The coarse map is stored on + the coarse level using the same variable name and generating factory than the + original fine level map. + + The transfer is based on the prolongator maps. The prolongator/nullspace can + also contain rotational modes, that might not be of interest for the map of + interest. Use the option "nullspace vectors: limit to" to exclude such modes. + + ## Input/output ## + + ### User parameters ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + map: name | string | "" | | * | * | Name of the map + map: factory | string | "null" | | * | * | Name of the + generating factory P | Factory | null | | * | * + | Generating factory of prolongator nullspace vectors: limit to | string | + "all" | | * | * | Use only these nullspace vectors/columns of P to transfer + the map (e.g. to drop rotations) + + The * in the @c master.xml column denotes that the parameter is defined in the + @c master.xml file.
The * in the @c validated column means that the + parameter is declared in the list of valid input parameters (see @c + GetValidParameters() ).
The * in the @c requested column states that the + data is requested as input with all dependencies (see @c DeclareInput() ). + + ### Variables provided by this factory ### + + After \c Build() , the following data is available (if requested): + + Parameter | generated by | description + ----------|--------------|------------ + | map: name | MapTransferFactory | Coarse version of the input map + +*/ + +template +class MapTransferFactory : public TwoLevelFactoryBase { #undef MUELU_MAPTRANSFERFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" +#include "MueLu_UseShortNames.hpp" - public: +public: + //! Input + //@{ - //! Input - //@{ + RCP GetValidParameterList() const override; - RCP GetValidParameterList() const override; + void DeclareInput(Level &fineLevel, Level &coarseLevel) const override; - void DeclareInput(Level& fineLevel, Level& coarseLevel) const override; + //@} - //@} + //@{ + //! @name Build methods. - //@{ - //! @name Build methods. + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const override; - //! Build an object with this factory. - void Build(Level& fineLevel, Level& coarseLevel) const override; + //@} - //@} - - private: - - /*! - @brief Get the max number of entries per row of P to be considered for map transfer +private: + /*! + @brief Get the max number of entries per row of P to be considered for map + transfer - To exclude some nullspace vectors (e.g. rotations in 2D or 3D elasticity), when doing the map transfer, - this routine translates the user wish to a maximal number of entries per row of P to be considered during the map transfer. + To exclude some nullspace vectors (e.g. rotations in 2D or 3D elasticity), + when doing the map transfer, this routine translates the user wish to a + maximal number of entries per row of P to be considered during the map + transfer. - \warning Rows of P are looped from left to right, so we rely on the usual ordering of the nullspace vectors (translations in x/y/z, then rotations around x,y,z). + \warning Rows of P are looped from left to right, so we rely on the usual + ordering of the nullspace vectors (translations in x/y/z, then rotations + around x,y,z). - @param[in] pL Parameter list with user-given configuration - @return Number of entries per row of the prolongator to be used for the map transfer - */ - int GetLimitOfProlongatorColumns(const ParameterList& pL) const; + @param[in] pL Parameter list with user-given configuration + @return Number of entries per row of the prolongator to be used for the map + transfer + */ + int GetLimitOfProlongatorColumns(const ParameterList &pL) const; - //! Generating factory of input variable - mutable RCP mapFact_; + //! Generating factory of input variable + mutable RCP mapFact_; - }; // class MapTransferFactory +}; // class MapTransferFactory } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp index b96ee324a0df..952270050e1f 100644 --- a/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_MapTransferFactory_def.hpp @@ -53,146 +53,164 @@ #include #include -#include "MueLu_Level.hpp" #include "MueLu_FactoryManagerBase.hpp" +#include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" namespace MueLu { - template - RCP MapTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->setEntry("map: name", Teuchos::ParameterEntry(std::string(""))); - validParamList->setEntry("map: factory", Teuchos::ParameterEntry(std::string("null"))); - - validParamList->set>("P", Teuchos::null, "Tentative prolongator factory"); - validParamList->set("nullspace vectors: limit to", "all", "Limit the number of nullspace vectors to be used for the map transfer (especially to exclude rotational vectors)."); - - return validParamList; +template +RCP +MapTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->setEntry("map: name", + Teuchos::ParameterEntry(std::string(""))); + validParamList->setEntry("map: factory", + Teuchos::ParameterEntry(std::string("null"))); + + validParamList->set>("P", Teuchos::null, + "Tentative prolongator factory"); + validParamList->set( + "nullspace vectors: limit to", "all", + "Limit the number of nullspace vectors to be used for the map transfer " + "(especially to exclude rotational vectors)."); + + return validParamList; +} + +template +void MapTransferFactory::DeclareInput(Level &fineLevel, + Level &coarseLevel) const { + const ParameterList &pL = GetParameterList(); + const std::string mapFactName = pL.get("map: factory"); + const std::string mapName = pL.get("map: name"); + + if (fineLevel.GetLevelID() == 0) { + // Not needed, if the map is provided as user data + fineLevel.DeclareInput(mapName, NoFactory::get(), this); + } else { + // check whether user has provided a specific name for the MapFactory + if (mapFactName == "" || mapFactName == "NoFactory") + mapFact_ = MueLu::NoFactory::getRCP(); + else if (mapFactName != "null") + mapFact_ = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); + + // request map generated by mapFact_ + fineLevel.DeclareInput(mapName, mapFact_.get(), this); } - template - void MapTransferFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { - const ParameterList & pL = GetParameterList(); - const std::string mapFactName = pL.get("map: factory"); - const std::string mapName = pL.get("map: name"); - - if (fineLevel.GetLevelID() == 0) - { - // Not needed, if the map is provided as user data - fineLevel.DeclareInput(mapName, NoFactory::get(), this); - } - else - { - // check whether user has provided a specific name for the MapFactory - if (mapFactName == "" || mapFactName == "NoFactory") - mapFact_ = MueLu::NoFactory::getRCP(); - else if (mapFactName != "null") - mapFact_ = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); - - // request map generated by mapFact_ - fineLevel.DeclareInput(mapName, mapFact_.get(), this); - } - - // request Ptent - // note that "P" provided by the user (through XML file) is supposed to be of type TentativePFactory - Teuchos::RCP tentPFact = GetFactory("P"); - if (tentPFact == Teuchos::null) - tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); - coarseLevel.DeclareInput("P", tentPFact.get(), this); + // request Ptent + // note that "P" provided by the user (through XML file) is supposed to be of + // type TentativePFactory + Teuchos::RCP tentPFact = GetFactory("P"); + if (tentPFact == Teuchos::null) + tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); + coarseLevel.DeclareInput("P", tentPFact.get(), this); +} + +template +void MapTransferFactory::Build( + Level &fineLevel, Level &coarseLevel) const { + Monitor m(*this, "MapTransferFactory"); + + const ParameterList &pL = GetParameterList(); + const std::string mapName = pL.get("map: name"); + const int maxNumProlongCols = GetLimitOfProlongatorColumns(pL); + + // fetch map from level + RCP transferMap = Teuchos::null; + if (fineLevel.GetLevelID() == 0) { + transferMap = fineLevel.Get>(mapName, NoFactory::get()); + } else { + if (fineLevel.IsAvailable(mapName, mapFact_.get()) == false) + GetOStream(Runtime0) << "MapTransferFactory::Build: User provided map \"" + << mapName << "\" not found in Level class on level " + << fineLevel.GetLevelID() << "." << std::endl; + transferMap = fineLevel.Get>(mapName, mapFact_.get()); } - template - void MapTransferFactory::Build(Level& fineLevel, Level& coarseLevel) const { - Monitor m(*this, "MapTransferFactory"); - - const ParameterList & pL = GetParameterList(); - const std::string mapName = pL.get("map: name"); - const int maxNumProlongCols = GetLimitOfProlongatorColumns(pL); - - // fetch map from level - RCP transferMap = Teuchos::null; - if (fineLevel.GetLevelID() == 0) { - transferMap = fineLevel.Get>(mapName, NoFactory::get()); - } else { - if (fineLevel.IsAvailable(mapName, mapFact_.get()) == false) - GetOStream(Runtime0) << "MapTransferFactory::Build: User provided map \"" << mapName << "\" not found in Level class on level " << fineLevel.GetLevelID() << "." << std::endl; - transferMap = fineLevel.Get>(mapName, mapFact_.get()); - } - - // Get default tentative prolongator factory - // Getting it that way ensures that the same factory instance will be used for both SaPFactory and NullspaceFactory. - RCP tentPFact = GetFactory("P"); - if (tentPFact == Teuchos::null) - tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); - TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("P", tentPFact.get()), Exceptions::RuntimeError, - "MueLu::MapTransferFactory::Build(): P (generated by TentativePFactory) not available."); - RCP Ptent = coarseLevel.Get >("P", tentPFact.get()); - - // loop over local rows of Ptent and figure out the corresponding coarse GIDs - Array coarseMapGids; - RCP prolongColMap = Ptent->getColMap(); - GO gRowID = -1; - int numColEntries = 0; - for (size_t row = 0; row < Ptent->getLocalNumRows(); ++row) { - gRowID = Ptent->getRowMap()->getGlobalElement(row); - - if (transferMap->isNodeGlobalElement(gRowID)) { - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - Ptent->getLocalRowView(row, indices, vals); - - numColEntries = as(indices.size()); - if (maxNumProlongCols > 0) - numColEntries = std::min(numColEntries, maxNumProlongCols); - - for (size_t col = 0; col < as(numColEntries); ++col) { - // mark all (selected) columns in Ptent(gRowID,*) to be coarse Dofs of next level transferMap - GO gcid = prolongColMap->getGlobalElement(indices[col]); - coarseMapGids.push_back(gcid); - } + // Get default tentative prolongator factory + // Getting it that way ensures that the same factory instance will be used for + // both SaPFactory and NullspaceFactory. + RCP tentPFact = GetFactory("P"); + if (tentPFact == Teuchos::null) + tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); + TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("P", tentPFact.get()), + Exceptions::RuntimeError, + "MueLu::MapTransferFactory::Build(): P (generated " + "by TentativePFactory) not available."); + RCP Ptent = coarseLevel.Get>("P", tentPFact.get()); + + // loop over local rows of Ptent and figure out the corresponding coarse GIDs + Array coarseMapGids; + RCP prolongColMap = Ptent->getColMap(); + GO gRowID = -1; + int numColEntries = 0; + for (size_t row = 0; row < Ptent->getLocalNumRows(); ++row) { + gRowID = Ptent->getRowMap()->getGlobalElement(row); + + if (transferMap->isNodeGlobalElement(gRowID)) { + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + Ptent->getLocalRowView(row, indices, vals); + + numColEntries = as(indices.size()); + if (maxNumProlongCols > 0) + numColEntries = std::min(numColEntries, maxNumProlongCols); + + for (size_t col = 0; col < as(numColEntries); ++col) { + // mark all (selected) columns in Ptent(gRowID,*) to be coarse Dofs of + // next level transferMap + GO gcid = prolongColMap->getGlobalElement(indices[col]); + coarseMapGids.push_back(gcid); } } - - // build coarse version of the input map - const GO INVALID = Teuchos::OrdinalTraits::invalid(); - std::sort(coarseMapGids.begin(), coarseMapGids.end()); - coarseMapGids.erase(std::unique(coarseMapGids.begin(), coarseMapGids.end()), coarseMapGids.end()); - RCP coarseTransferMap = MapFactory::Build(prolongColMap->lib(), INVALID, coarseMapGids(), - prolongColMap->getIndexBase(), prolongColMap->getComm()); - - // store map in coarse level - if (fineLevel.GetLevelID() == 0) - { - const std::string mapFactName = pL.get("map: factory"); - RCP mapFact = coarseLevel.GetFactoryManager()->GetFactory(mapFactName); - coarseLevel.Set(mapName, coarseTransferMap, mapFact.get()); - } - else - coarseLevel.Set(mapName, coarseTransferMap, mapFact_.get()); - } - template - int MapTransferFactory::GetLimitOfProlongatorColumns(const ParameterList& pL) const - { - const std::string useTheseNspVectors = pL.get("nullspace vectors: limit to"); - - // Leave right away, if no limit is prescribed by the user - if (useTheseNspVectors == "all" || useTheseNspVectors == "") - return -1; - - // Simplify? Maybe replace by boolean flag "nullspace: exclude rotations" - int maxNumProlongCols = -1; - if (useTheseNspVectors == "translations") - maxNumProlongCols = 1; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::InvalidArgument, "Unknown subset of nullspace vectors to be used, when performing a map transfer.") - - return maxNumProlongCols; - } + // build coarse version of the input map + const GO INVALID = Teuchos::OrdinalTraits::invalid(); + std::sort(coarseMapGids.begin(), coarseMapGids.end()); + coarseMapGids.erase(std::unique(coarseMapGids.begin(), coarseMapGids.end()), + coarseMapGids.end()); + RCP coarseTransferMap = MapFactory::Build( + prolongColMap->lib(), INVALID, coarseMapGids(), + prolongColMap->getIndexBase(), prolongColMap->getComm()); + + // store map in coarse level + if (fineLevel.GetLevelID() == 0) { + const std::string mapFactName = pL.get("map: factory"); + RCP mapFact = + coarseLevel.GetFactoryManager()->GetFactory(mapFactName); + coarseLevel.Set(mapName, coarseTransferMap, mapFact.get()); + } else + coarseLevel.Set(mapName, coarseTransferMap, mapFact_.get()); +} + +template +int MapTransferFactory:: + GetLimitOfProlongatorColumns(const ParameterList &pL) const { + const std::string useTheseNspVectors = + pL.get("nullspace vectors: limit to"); + + // Leave right away, if no limit is prescribed by the user + if (useTheseNspVectors == "all" || useTheseNspVectors == "") + return -1; + + // Simplify? Maybe replace by boolean flag "nullspace: exclude rotations" + int maxNumProlongCols = -1; + if (useTheseNspVectors == "translations") + maxNumProlongCols = 1; + else + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::InvalidArgument, + "Unknown subset of nullspace vectors to be " + "used, when performing a map transfer.") + + return maxNumProlongCols; +} } // namespace MueLu -#endif /* MUELU_MAPTRANSFERFACTORY_DEF_HPP_ */ \ No newline at end of file +#endif /* MUELU_MAPTRANSFERFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp index d5d4b459d55f..1fd08f3b7db1 100644 --- a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_decl.hpp @@ -46,61 +46,56 @@ #ifndef MUELU_MERGEDBLOCKEDMATRIXFACTORY_DECL_HPP_ #define MUELU_MERGEDBLOCKEDMATRIXFACTORY_DECL_HPP_ -#include -#include #include +#include #include +#include #include "MueLu_ConfigDefs.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" -#include "MueLu_Level_fwd.hpp" #include "MueLu_FactoryBase_fwd.hpp" +#include "MueLu_Level_fwd.hpp" namespace MueLu { - /*! - @class MergedBlockedMatrix - @brief Factory provides a merged version of a blocked matrix - */ - template - class MergedBlockedMatrixFactory : public SingleLevelFactoryBase { +/*! + @class MergedBlockedMatrix + @brief Factory provides a merged version of a blocked matrix +*/ +template +class MergedBlockedMatrixFactory : public SingleLevelFactoryBase { #undef MUELU_MERGEDBLOCKEDMATRIXFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - MergedBlockedMatrixFactory(); - - virtual ~MergedBlockedMatrixFactory() { } - //@} - - //! @name Input - //@{ - - RCP GetValidParameterList() const; +public: + //! @name Constructors/Destructors. + //@{ - void DeclareInput(Level ¤tLevel) const; + MergedBlockedMatrixFactory(); - //@} + virtual ~MergedBlockedMatrixFactory() {} + //@} - //! @name Build methods. - //@{ - void Build(Level ¤tLevel) const; - //@} + //! @name Input + //@{ + RCP GetValidParameterList() const; + void DeclareInput(Level ¤tLevel) const; - private: + //@} + //! @name Build methods. + //@{ + void Build(Level ¤tLevel) const; + //@} - }; //class MergedBlockedMatrixFactory +private: +}; // class MergedBlockedMatrixFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_MERGEDBLOCKEDMATRIXFACTORY_SHORT diff --git a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp index ce3a8cf46d0e..bc4ee795b856 100644 --- a/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_MergedBlockedMatrixFactory_def.hpp @@ -46,49 +46,59 @@ #ifndef MUELU_MERGEDBLOCKEDMATRIXFACTORY_DEF_HPP_ #define MUELU_MERGEDBLOCKEDMATRIXFACTORY_DEF_HPP_ -#include #include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" #include "MueLu_PerfUtils.hpp" +#include #include "MueLu_MergedBlockedMatrixFactory_decl.hpp" namespace MueLu { template -MergedBlockedMatrixFactory::MergedBlockedMatrixFactory() -{ } +MergedBlockedMatrixFactory::MergedBlockedMatrixFactory() {} template -RCP MergedBlockedMatrixFactory::GetValidParameterList() const { +RCP +MergedBlockedMatrixFactory::GetValidParameterList() const { RCP validParamList = rcp(new ParameterList()); - validParamList->set< RCP >("A", MueLu::NoFactory::getRCP()/*Teuchos::null*/, "Generating factory of the matrix A used for building SchurComplement (must be a 2x2 blocked operator, default = MueLu::NoFactory::getRCP())"); + validParamList->set>( + "A", MueLu::NoFactory::getRCP() /*Teuchos::null*/, + "Generating factory of the matrix A used for building SchurComplement " + "(must be a 2x2 blocked operator, default = MueLu::NoFactory::getRCP())"); return validParamList; } - template -void MergedBlockedMatrixFactory::DeclareInput(Level ¤tLevel) const { +void MergedBlockedMatrixFactory::DeclareInput(Level ¤tLevel) const { Input(currentLevel, "A"); } template -void MergedBlockedMatrixFactory::Build(Level & currentLevel) const -{ - FactoryMonitor m(*this, "MergedBlockedMatrix", currentLevel); - Teuchos::RCP A = Get >(currentLevel, "A"); +void MergedBlockedMatrixFactory::Build(Level ¤tLevel) const { + FactoryMonitor m(*this, "MergedBlockedMatrix", currentLevel); + Teuchos::RCP A = Get>(currentLevel, "A"); RCP bA = Teuchos::rcp_dynamic_cast(A); - TEUCHOS_TEST_FOR_EXCEPTION(bA == Teuchos::null, Exceptions::BadCast, "MueLu::MergedBlockedMatrixFactory::Build: input matrix A is not of type BlockedCrsMatrix! A generated by AFact_ must be a 2x2 block operator. error."); + TEUCHOS_TEST_FOR_EXCEPTION( + bA == Teuchos::null, Exceptions::BadCast, + "MueLu::MergedBlockedMatrixFactory::Build: input matrix A is not of type " + "BlockedCrsMatrix! A generated by AFact_ must be a 2x2 block operator. " + "error."); Teuchos::RCP mergedA = bA->Merge(); { - GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*mergedA, "A (merged)"); + GetOStream(Statistics1) + << PerfUtils::PrintMatrixInfo(*mergedA, "A (merged)"); - // note: variable "A" generated by this MergedBlockedMatrix factory is in fact the a merged version - // of the blocked matrix A (from the input) + // note: variable "A" generated by this MergedBlockedMatrix factory is in + // fact the a merged version of the blocked matrix A (from the input) Set(currentLevel, "A", mergedA); } } diff --git a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp index 75f8fdc123d2..ffd688a49e28 100644 --- a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_decl.hpp @@ -47,77 +47,79 @@ #define MUELU_MULTIVECTORTRANSFER_FACTORY_DECL_HPP #include "MueLu_ConfigDefs.hpp" +#include "MueLu_MultiVectorTransferFactory_fwd.hpp" #include "MueLu_TwoLevelFactoryBase.hpp" -#include "Xpetra_MultiVector_fwd.hpp" -#include "Xpetra_MultiVectorFactory_fwd.hpp" #include "Xpetra_Matrix_fwd.hpp" -#include "MueLu_MultiVectorTransferFactory_fwd.hpp" +#include "Xpetra_MultiVectorFactory_fwd.hpp" +#include "Xpetra_MultiVector_fwd.hpp" namespace MueLu { - /*! - @class MultiVectorTransferFactory class. - @brief Class for restricting a MultiVector from a finer to a coarser level. +/*! + @class MultiVectorTransferFactory class. + @brief Class for restricting a MultiVector from a finer to a coarser level. - This is to be used in conjunction with Muelu::RAPFactory::AddTransferFactory(). - */ + This is to be used in conjunction with + Muelu::RAPFactory::AddTransferFactory(). +*/ - template - class MultiVectorTransferFactory : public TwoLevelFactoryBase { +template +class MultiVectorTransferFactory : public TwoLevelFactoryBase { #undef MUELU_MULTIVECTORTRANSFERFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - /*! @brief Constructor. +public: + //! @name Constructors/Destructors. + //@{ - @param vectorName The name of the quantity to be restricted. - @param restrictionName The name of the restriction Matrix. + /*! @brief Constructor. - The operator associated with projectionName will be applied to the MultiVector associated with - vectorName. - */ - MultiVectorTransferFactory() { } + @param vectorName The name of the quantity to be restricted. + @param restrictionName The name of the restriction Matrix. - MultiVectorTransferFactory(std::string const & vectorName); // deprecated + The operator associated with projectionName will be applied to the + MultiVector associated with vectorName. + */ + MultiVectorTransferFactory() {} - //! Destructor. - virtual ~MultiVectorTransferFactory() { } + MultiVectorTransferFactory(std::string const &vectorName); // deprecated - RCP GetValidParameterList() const; + //! Destructor. + virtual ~MultiVectorTransferFactory() {} - //@} + RCP GetValidParameterList() const; - //! @name Input - //@{ + //@} - /*! @brief Specifies the data that this class needs, and the factories that generate that data. + //! @name Input + //@{ - If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class - will fall back to the settings in FactoryManager. - */ - void DeclareInput(Level &finelevel, Level &coarseLevel) const; + /*! @brief Specifies the data that this class needs, and the factories that + generate that data. - //@} + If the Build method of this class requires some data, but the generating + factory is not specified in DeclareInput, then this class will fall back to + the settings in FactoryManager. + */ + void DeclareInput(Level &finelevel, Level &coarseLevel) const; - //! @name Build methods. - //@{ + //@} - //! Build an object with this factory. - void Build(Level & fineLevel, Level &coarseLevel) const; + //! @name Build methods. + //@{ - //@} + //! Build an object with this factory. + void Build(Level &fineLevel, Level &coarseLevel) const; - private: + //@} - static ArrayRCP expandCoordinates(ArrayRCP coord, LocalOrdinal blksize); +private: + static ArrayRCP expandCoordinates(ArrayRCP coord, + LocalOrdinal blksize); - }; // class MultiVectorTransferFactory +}; // class MultiVectorTransferFactory } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp index 4ecf3bedfc09..01056d1a3267 100644 --- a/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_MultiVectorTransferFactory_def.hpp @@ -54,72 +54,98 @@ namespace MueLu { - template - RCP MultiVectorTransferFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set< std::string > ("Vector name", "undefined", "Name of the vector that will be transferred on the coarse grid (level key)"); // TODO: how to set a validator without default value? - validParamList->set< RCP >("Vector factory", Teuchos::null, "Factory of the vector"); - validParamList->set< RCP >("R", Teuchos::null, "Factory of the transfer operator (restriction)"); - - return validParamList; - } - - template - MultiVectorTransferFactory::MultiVectorTransferFactory(std::string const & vectorName) { - SetParameter("Vector name", ParameterEntry(vectorName)); - } - - template - void MultiVectorTransferFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - const ParameterList & pL = GetParameterList(); - std::string vectorName = pL.get("Vector name"); - - fineLevel.DeclareInput(vectorName, GetFactory("Vector factory").get(), this); - Input(coarseLevel, "R"); - } - - template - void MultiVectorTransferFactory::Build(Level & fineLevel, Level &coarseLevel) const { - FactoryMonitor m(*this, "Build", coarseLevel); - - const ParameterList & pL = GetParameterList(); - std::string vectorName = pL.get("Vector name"); - - RCP fineVector = fineLevel.Get< RCP >(vectorName, GetFactory("Vector factory").get()); - RCP transferOp = Get >(coarseLevel, "R"); - - RCP coarseVector = MultiVectorFactory::Build(transferOp->getRangeMap(), fineVector->getNumVectors()); - GetOStream(Runtime0) << "Transferring multivector \"" << vectorName << "\"" << std::endl; - - RCP onesVector = MultiVectorFactory::Build(transferOp->getDomainMap(), 1); - onesVector->putScalar(Teuchos::ScalarTraits::one()); - RCP rowSumVector = MultiVectorFactory::Build(transferOp->getRangeMap(), 1); - transferOp->apply(*onesVector, *rowSumVector); - transferOp->apply(*fineVector, *coarseVector); - - if (vectorName == "Coordinates") - TEUCHOS_TEST_FOR_EXCEPTION(true,Exceptions::RuntimeError,"Use CoordinatesTransferFactory to transfer coordinates instead of MultiVectorTransferFactory."); - - Set >(coarseLevel, vectorName, coarseVector); - - } // Build - - template - ArrayRCP MultiVectorTransferFactory::expandCoordinates(ArrayRCP coordinates, LocalOrdinal blksize) { - if (blksize == 1) - return coordinates; - - ArrayRCP expandCoord(coordinates.size()*blksize); //TODO: how to avoid automatic initialization of the vector? using arcp()? - - for(int i=0; i +RCP +MultiVectorTransferFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set( + "Vector name", "undefined", + "Name of the vector that will be transferred on the coarse grid (level " + "key)"); // TODO: how to set a validator without default value? + validParamList->set>("Vector factory", Teuchos::null, + "Factory of the vector"); + validParamList->set>( + "R", Teuchos::null, "Factory of the transfer operator (restriction)"); + + return validParamList; +} + +template +MultiVectorTransferFactory:: + MultiVectorTransferFactory(std::string const &vectorName) { + SetParameter("Vector name", ParameterEntry(vectorName)); +} + +template +void MultiVectorTransferFactory::DeclareInput(Level &fineLevel, + Level &coarseLevel) const { + const ParameterList &pL = GetParameterList(); + std::string vectorName = pL.get("Vector name"); + + fineLevel.DeclareInput(vectorName, GetFactory("Vector factory").get(), this); + Input(coarseLevel, "R"); +} + +template +void MultiVectorTransferFactory::Build(Level &fineLevel, + Level &coarseLevel) const { + FactoryMonitor m(*this, "Build", coarseLevel); + + const ParameterList &pL = GetParameterList(); + std::string vectorName = pL.get("Vector name"); + + RCP fineVector = fineLevel.Get>( + vectorName, GetFactory("Vector factory").get()); + RCP transferOp = Get>(coarseLevel, "R"); + + RCP coarseVector = MultiVectorFactory::Build( + transferOp->getRangeMap(), fineVector->getNumVectors()); + GetOStream(Runtime0) << "Transferring multivector \"" << vectorName << "\"" + << std::endl; + + RCP onesVector = + MultiVectorFactory::Build(transferOp->getDomainMap(), 1); + onesVector->putScalar(Teuchos::ScalarTraits::one()); + RCP rowSumVector = + MultiVectorFactory::Build(transferOp->getRangeMap(), 1); + transferOp->apply(*onesVector, *rowSumVector); + transferOp->apply(*fineVector, *coarseVector); + + if (vectorName == "Coordinates") + TEUCHOS_TEST_FOR_EXCEPTION( + true, Exceptions::RuntimeError, + "Use CoordinatesTransferFactory to transfer coordinates instead of " + "MultiVectorTransferFactory."); + + Set>(coarseLevel, vectorName, coarseVector); + +} // Build + +template +ArrayRCP +MultiVectorTransferFactory::expandCoordinates(ArrayRCP coordinates, + LocalOrdinal blksize) { + if (blksize == 1) + return coordinates; + + ArrayRCP expandCoord( + coordinates.size() * + blksize); // TODO: how to avoid automatic initialization of the vector? + // using arcp()? + + for (int i = 0; i < coordinates.size(); i++) { + for (int j = 0; j < blksize; j++) { + expandCoord[i * blksize + j] = coordinates[i]; } - return expandCoord; + } + return expandCoord; - } // expandCoordinates +} // expandCoordinates } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_RAPFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_RAPFactory_decl.hpp index ef345754bf94..d6d44e5f5735 100644 --- a/packages/muelu/src/Misc/MueLu_RAPFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_RAPFactory_decl.hpp @@ -49,8 +49,8 @@ #include #include -#include #include +#include #include "MueLu_ConfigDefs.hpp" @@ -63,76 +63,74 @@ #include "MueLu_Utilities_fwd.hpp" namespace MueLu { - /*! - @class RAPFactory - @brief Factory for building coarse matrices. - */ - template - class RAPFactory : public TwoLevelFactoryBase { +/*! + @class RAPFactory + @brief Factory for building coarse matrices. +*/ +template +class RAPFactory : public TwoLevelFactoryBase { #undef MUELU_RAPFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - RAPFactory(); + RAPFactory(); - virtual ~RAPFactory() { } + virtual ~RAPFactory() {} - //@} + //@} - //! @name Input - //@{ + //! @name Input + //@{ - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - void DeclareInput(Level& fineLevel, Level& coarseLevel) const; + void DeclareInput(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - //! @name Build methods. - //@{ - void Build(Level& fineLevel, Level& coarseLevel) const; - //@} + //! @name Build methods. + //@{ + void Build(Level &fineLevel, Level &coarseLevel) const; + //@} - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories in + RepartitionAcFactory. - Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to - the next coarser level. - */ - void AddTransferFactory(const RCP& factory); + Transfer factories are derived from TwoLevelFactoryBase and project some data + from the fine level to the next coarser level. + */ + void AddTransferFactory(const RCP &factory); - // TODO add a function to remove a specific transfer factory? + // TODO add a function to remove a specific transfer factory? - //! Returns number of transfer factories. - size_t NumTransferFactories() const { return transferFacts_.size(); } + //! Returns number of transfer factories. + size_t NumTransferFactories() const { return transferFacts_.size(); } - //@} + //@} - private: +private: + //@{ - //@{ - - mutable - bool hasDeclaredInput_; + mutable bool hasDeclaredInput_; - //@} + //@} - //@{ + //@{ - //! list of user-defined transfer Factories - std::vector > transferFacts_; + //! list of user-defined transfer Factories + std::vector> transferFacts_; - //@} + //@} - }; //class RAPFactory +}; // class RAPFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_RAPFACTORY_SHORT #endif // MUELU_RAPFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp b/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp index 5e67cc295d48..74bd67e49773 100644 --- a/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_RAPFactory_def.hpp @@ -46,9 +46,9 @@ #ifndef MUELU_RAPFACTORY_DEF_HPP #define MUELU_RAPFACTORY_DEF_HPP - #include +#include #include #include #include @@ -56,7 +56,6 @@ #include #include #include -#include #include "MueLu_RAPFactory_decl.hpp" @@ -67,342 +66,435 @@ namespace MueLu { - template - RAPFactory::RAPFactory() - : hasDeclaredInput_(false) { } - - template - RCP RAPFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("transpose: use implicit"); - SET_VALID_ENTRY("rap: triple product"); - SET_VALID_ENTRY("rap: fix zero diagonals"); - SET_VALID_ENTRY("rap: fix zero diagonals threshold"); - SET_VALID_ENTRY("rap: fix zero diagonals replacement"); - SET_VALID_ENTRY("rap: relative diagonal floor"); -#undef SET_VALID_ENTRY - validParamList->set< RCP >("A", null, "Generating factory of the matrix A used during the prolongator smoothing process"); - validParamList->set< RCP >("P", null, "Prolongator factory"); - validParamList->set< RCP >("R", null, "Restrictor factory"); - - validParamList->set< bool > ("CheckMainDiagonal", false, "Check main diagonal for zeros"); - validParamList->set< bool > ("RepairMainDiagonal", false, "Repair zeros on main diagonal"); - - // Make sure we don't recursively validate options for the matrixmatrix kernels - ParameterList norecurse; - norecurse.disableRecursiveValidation(); - validParamList->set ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters"); - - return validParamList; - } - - template - void RAPFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - const Teuchos::ParameterList& pL = GetParameterList(); - if (pL.get("transpose: use implicit") == false) - Input(coarseLevel, "R"); - - Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - // call DeclareInput of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) - (*it)->CallDeclareInput(coarseLevel); - - hasDeclaredInput_ = true; - } - - template - void RAPFactory::Build(Level& fineLevel, Level& coarseLevel) const { - const bool doTranspose = true; - const bool doFillComplete = true; - const bool doOptimizeStorage = true; - RCP Ac; - { - FactoryMonitor m(*this, "Computing Ac", coarseLevel); - std::ostringstream levelstr; - levelstr << coarseLevel.GetLevelID(); - std::string labelstr = FormattingHelper::getColonLabel(coarseLevel.getObjectLabel()); - - TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_ == false, Exceptions::RuntimeError, - "MueLu::RAPFactory::Build(): CallDeclareInput has not been called before Build!"); - - const Teuchos::ParameterList& pL = GetParameterList(); - RCP A = Get< RCP >(fineLevel, "A"); - RCP P = Get< RCP >(coarseLevel, "P"), AP; - // We don't have a valid P (e.g., # global aggregates = 0) so we bail. - // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize() - if (P == Teuchos::null) { - Ac = Teuchos::null; - Set(coarseLevel, "A", Ac); - return; - } +template +RAPFactory::RAPFactory() + : hasDeclaredInput_(false) {} + +template +RCP +RAPFactory::GetValidParameterList() + const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("transpose: use implicit"); + SET_VALID_ENTRY("rap: triple product"); + SET_VALID_ENTRY("rap: fix zero diagonals"); + SET_VALID_ENTRY("rap: fix zero diagonals threshold"); + SET_VALID_ENTRY("rap: fix zero diagonals replacement"); + SET_VALID_ENTRY("rap: relative diagonal floor"); +#undef SET_VALID_ENTRY + validParamList->set>( + "A", null, + "Generating factory of the matrix A used during the prolongator " + "smoothing process"); + validParamList->set>("P", null, "Prolongator factory"); + validParamList->set>("R", null, "Restrictor factory"); + + validParamList->set("CheckMainDiagonal", false, + "Check main diagonal for zeros"); + validParamList->set("RepairMainDiagonal", false, + "Repair zeros on main diagonal"); + + // Make sure we don't recursively validate options for the matrixmatrix + // kernels + ParameterList norecurse; + norecurse.disableRecursiveValidation(); + validParamList->set("matrixmatrix: kernel params", norecurse, + "MatrixMatrix kernel parameters"); + + return validParamList; +} + +template +void RAPFactory::DeclareInput( + Level &fineLevel, Level &coarseLevel) const { + const Teuchos::ParameterList &pL = GetParameterList(); + if (pL.get("transpose: use implicit") == false) + Input(coarseLevel, "R"); + + Input(fineLevel, "A"); + Input(coarseLevel, "P"); + + // call DeclareInput of all user-given transfer factories + for (std::vector>::const_iterator it = + transferFacts_.begin(); + it != transferFacts_.end(); ++it) + (*it)->CallDeclareInput(coarseLevel); + + hasDeclaredInput_ = true; +} + +template +void RAPFactory::Build( + Level &fineLevel, Level &coarseLevel) const { + const bool doTranspose = true; + const bool doFillComplete = true; + const bool doOptimizeStorage = true; + RCP Ac; + { + FactoryMonitor m(*this, "Computing Ac", coarseLevel); + std::ostringstream levelstr; + levelstr << coarseLevel.GetLevelID(); + std::string labelstr = + FormattingHelper::getColonLabel(coarseLevel.getObjectLabel()); + + TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_ == false, + Exceptions::RuntimeError, + "MueLu::RAPFactory::Build(): CallDeclareInput " + "has not been called before Build!"); + + const Teuchos::ParameterList &pL = GetParameterList(); + RCP A = Get>(fineLevel, "A"); + RCP P = Get>(coarseLevel, "P"), AP; + // We don't have a valid P (e.g., # global aggregates = 0) so we bail. + // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a + // resize() + if (P == Teuchos::null) { + Ac = Teuchos::null; + Set(coarseLevel, "A", Ac); + return; + } - bool isEpetra = A->getRowMap()->lib() == Xpetra::UseEpetra; - bool isGPU = + bool isEpetra = A->getRowMap()->lib() == Xpetra::UseEpetra; + bool isGPU = #ifdef KOKKOS_ENABLE_CUDA - (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) || + (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) || #endif #ifdef KOKKOS_ENABLE_HIP - (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) || + (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) || #endif #ifdef KOKKOS_ENABLE_SYCL - (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) || + (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) || #endif - false; - - if (pL.get("rap: triple product") == false || isEpetra || isGPU) { - if (pL.get("rap: triple product") && isEpetra) - GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for Epetra.\n"; -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_SYCL) - if (pL.get("rap: triple product") && isGPU) - GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for " - << Node::execution_space::name() << std::endl; + false; + + if (pL.get("rap: triple product") == false || isEpetra || isGPU) { + if (pL.get("rap: triple product") && isEpetra) + GetOStream(Warnings1) + << "Switching from triple product to R x (A x P) since triple " + "product has not been implemented for Epetra.\n"; +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ + defined(KOKKOS_ENABLE_SYCL) + if (pL.get("rap: triple product") && isGPU) + GetOStream(Warnings1) + << "Switching from triple product to R x (A x P) since triple " + "product has not been implemented for " + << Node::execution_space::name() << std::endl; #endif - // Reuse pattern if available (multiple solve) - RCP APparams = rcp(new ParameterList); - if(pL.isSublist("matrixmatrix: kernel params")) - APparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); - - // By default, we don't need global constants for A*P - APparams->set("compute global constants: temporaries",APparams->get("compute global constants: temporaries",false)); - APparams->set("compute global constants",APparams->get("compute global constants",false)); - - if (coarseLevel.IsAvailable("AP reuse data", this)) { - GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous AP data" << std::endl; - - APparams = coarseLevel.Get< RCP >("AP reuse data", this); - - if (APparams->isParameter("graph")) - AP = APparams->get< RCP >("graph"); - } - - { - SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); - - AP = MatrixMatrix::Multiply(*A, !doTranspose, *P, !doTranspose, AP, GetOStream(Statistics2), - doFillComplete, doOptimizeStorage, labelstr+std::string("MueLu::A*P-")+levelstr.str(), APparams); - } - - // Reuse coarse matrix memory if available (multiple solve) - RCP RAPparams = rcp(new ParameterList); - if(pL.isSublist("matrixmatrix: kernel params")) - RAPparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); - - if (coarseLevel.IsAvailable("RAP reuse data", this)) { - GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous RAP data" << std::endl; - - RAPparams = coarseLevel.Get< RCP >("RAP reuse data", this); - - if (RAPparams->isParameter("graph")) - Ac = RAPparams->get< RCP >("graph"); - - // Some eigenvalue may have been cached with the matrix in the previous run. - // As the matrix values will be updated, we need to reset the eigenvalue. - Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); - } - - // We *always* need global constants for the RAP, but not for the temps - RAPparams->set("compute global constants: temporaries",RAPparams->get("compute global constants: temporaries",false)); - RAPparams->set("compute global constants",true); - - // Allow optimization of storage. - // This is necessary for new faster Epetra MM kernels. - // Seems to work with matrix modifications to repair diagonal entries. - - if (pL.get("transpose: use implicit") == true) { - SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); - - Ac = MatrixMatrix::Multiply(*P, doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), - doFillComplete, doOptimizeStorage, labelstr+std::string("MueLu::R*(AP)-implicit-")+levelstr.str(), RAPparams); - - } else { - RCP R = Get< RCP >(coarseLevel, "R"); - - SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); - - Ac = MatrixMatrix::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), - doFillComplete, doOptimizeStorage, labelstr+std::string("MueLu::R*(AP)-explicit-")+levelstr.str(), RAPparams); - } - - Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); - if(relativeFloor.size() > 0) { - Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor,GetOStream(Statistics2)); - } - - bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); - bool checkAc = pL.get("CheckMainDiagonal")|| pL.get("rap: fix zero diagonals"); ; - if (checkAc || repairZeroDiagonals) { - using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; - magnitudeType threshold; - if (pL.isType("rap: fix zero diagonals threshold")) - threshold = pL.get("rap: fix zero diagonals threshold"); - else - threshold = Teuchos::as(pL.get("rap: fix zero diagonals threshold")); - Scalar replacement = Teuchos::as(pL.get("rap: fix zero diagonals replacement")); - Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, replacement); - } - - if (IsPrint(Statistics2)) { - RCP params = rcp(new ParameterList());; - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); - GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); - } - - if(!Ac.is_null()) {std::ostringstream oss; oss << "A_" << coarseLevel.GetLevelID(); Ac->setObjectLabel(oss.str());} - Set(coarseLevel, "A", Ac); - - if (!isGPU) { - APparams->set("graph", AP); - Set(coarseLevel, "AP reuse data", APparams); - } - if (!isGPU) { - RAPparams->set("graph", Ac); - Set(coarseLevel, "RAP reuse data", RAPparams); - } + // Reuse pattern if available (multiple solve) + RCP APparams = rcp(new ParameterList); + if (pL.isSublist("matrixmatrix: kernel params")) + APparams->sublist("matrixmatrix: kernel params") = + pL.sublist("matrixmatrix: kernel params"); + + // By default, we don't need global constants for A*P + APparams->set( + "compute global constants: temporaries", + APparams->get("compute global constants: temporaries", false)); + APparams->set("compute global constants", + APparams->get("compute global constants", false)); + + if (coarseLevel.IsAvailable("AP reuse data", this)) { + GetOStream(static_cast(Runtime0 | Test)) + << "Reusing previous AP data" << std::endl; + + APparams = coarseLevel.Get>("AP reuse data", this); + + if (APparams->isParameter("graph")) + AP = APparams->get>("graph"); + } + + { + SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); + + AP = MatrixMatrix::Multiply( + *A, !doTranspose, *P, !doTranspose, AP, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage, + labelstr + std::string("MueLu::A*P-") + levelstr.str(), APparams); + } + + // Reuse coarse matrix memory if available (multiple solve) + RCP RAPparams = rcp(new ParameterList); + if (pL.isSublist("matrixmatrix: kernel params")) + RAPparams->sublist("matrixmatrix: kernel params") = + pL.sublist("matrixmatrix: kernel params"); + + if (coarseLevel.IsAvailable("RAP reuse data", this)) { + GetOStream(static_cast(Runtime0 | Test)) + << "Reusing previous RAP data" << std::endl; + + RAPparams = coarseLevel.Get>("RAP reuse data", this); + + if (RAPparams->isParameter("graph")) + Ac = RAPparams->get>("graph"); + + // Some eigenvalue may have been cached with the matrix in the previous + // run. As the matrix values will be updated, we need to reset the + // eigenvalue. + Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } + + // We *always* need global constants for the RAP, but not for the temps + RAPparams->set( + "compute global constants: temporaries", + RAPparams->get("compute global constants: temporaries", false)); + RAPparams->set("compute global constants", true); + + // Allow optimization of storage. + // This is necessary for new faster Epetra MM kernels. + // Seems to work with matrix modifications to repair diagonal entries. + + if (pL.get("transpose: use implicit") == true) { + SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); + + Ac = MatrixMatrix::Multiply( + *P, doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage, + labelstr + std::string("MueLu::R*(AP)-implicit-") + levelstr.str(), + RAPparams); + } else { - RCP RAPparams = rcp(new ParameterList); - if(pL.isSublist("matrixmatrix: kernel params")) - RAPparams->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params"); - - if (coarseLevel.IsAvailable("RAP reuse data", this)) { - GetOStream(static_cast(Runtime0 | Test)) << "Reusing previous RAP data" << std::endl; - - RAPparams = coarseLevel.Get< RCP >("RAP reuse data", this); - - if (RAPparams->isParameter("graph")) - Ac = RAPparams->get< RCP >("graph"); - - // Some eigenvalue may have been cached with the matrix in the previous run. - // As the matrix values will be updated, we need to reset the eigenvalue. - Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); - } - - // We *always* need global constants for the RAP, but not for the temps - RAPparams->set("compute global constants: temporaries",RAPparams->get("compute global constants: temporaries",false)); - RAPparams->set("compute global constants",true); - - if (pL.get("transpose: use implicit") == true) { - - Ac = MatrixFactory::Build(P->getDomainMap(), Teuchos::as(0)); - - SubFactoryMonitor m2(*this, "MxMxM: R x A x P (implicit)", coarseLevel); - - Xpetra::TripleMatrixMultiply:: - MultiplyRAP(*P, doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, doFillComplete, - doOptimizeStorage, labelstr+std::string("MueLu::R*A*P-implicit-")+levelstr.str(), - RAPparams); - } else { - RCP R = Get< RCP >(coarseLevel, "R"); - Ac = MatrixFactory::Build(R->getRowMap(), Teuchos::as(0)); - - SubFactoryMonitor m2(*this, "MxMxM: R x A x P (explicit)", coarseLevel); - - Xpetra::TripleMatrixMultiply:: - MultiplyRAP(*R, !doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, doFillComplete, - doOptimizeStorage, labelstr+std::string("MueLu::R*A*P-explicit-")+levelstr.str(), - RAPparams); - } - - Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); - if(relativeFloor.size() > 0) { - Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor,GetOStream(Statistics2)); - } - - bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); - bool checkAc = pL.get("CheckMainDiagonal")|| pL.get("rap: fix zero diagonals"); ; - if (checkAc || repairZeroDiagonals) { - using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; - magnitudeType threshold; - if (pL.isType("rap: fix zero diagonals threshold")) - threshold = pL.get("rap: fix zero diagonals threshold"); - else - threshold = Teuchos::as(pL.get("rap: fix zero diagonals threshold")); - Scalar replacement = Teuchos::as(pL.get("rap: fix zero diagonals replacement")); - Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, replacement); - } - - - if (IsPrint(Statistics2)) { - RCP params = rcp(new ParameterList());; - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); - GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); - } - - if(!Ac.is_null()) {std::ostringstream oss; oss << "A_" << coarseLevel.GetLevelID(); Ac->setObjectLabel(oss.str());} - Set(coarseLevel, "A", Ac); - - if (!isGPU) { - RAPparams->set("graph", Ac); - Set(coarseLevel, "RAP reuse data", RAPparams); - } + RCP R = Get>(coarseLevel, "R"); + + SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); + + Ac = MatrixMatrix::Multiply( + *R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), + doFillComplete, doOptimizeStorage, + labelstr + std::string("MueLu::R*(AP)-explicit-") + levelstr.str(), + RAPparams); } + Teuchos::ArrayView relativeFloor = + pL.get>("rap: relative diagonal floor")(); + if (relativeFloor.size() > 0) { + Xpetra::MatrixUtils::RelativeDiagonalBoost( + Ac, relativeFloor, GetOStream(Statistics2)); + } - } + bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || + pL.get("rap: fix zero diagonals"); + bool checkAc = pL.get("CheckMainDiagonal") || + pL.get("rap: fix zero diagonals"); + ; + if (checkAc || repairZeroDiagonals) { + using magnitudeType = + typename Teuchos::ScalarTraits::magnitudeType; + magnitudeType threshold; + if (pL.isType("rap: fix zero diagonals threshold")) + threshold = + pL.get("rap: fix zero diagonals threshold"); + else + threshold = Teuchos::as( + pL.get("rap: fix zero diagonals threshold")); + Scalar replacement = Teuchos::as( + pL.get("rap: fix zero diagonals replacement")); + Xpetra::MatrixUtils::CheckRepairMainDiagonal( + Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, + replacement); + } -#ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(*Ac); -#endif // HAVE_MUELU_DEBUG + if (IsPrint(Statistics2)) { + RCP params = rcp(new ParameterList()); + ; + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); + GetOStream(Statistics2) + << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); + } - if (transferFacts_.begin() != transferFacts_.end()) { - SubFactoryMonitor m(*this, "Projections", coarseLevel); - - // call Build of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { - RCP fac = *it; - GetOStream(Runtime0) << "RAPFactory: call transfer factory: " << fac->description() << std::endl; - fac->CallBuild(coarseLevel); - // Coordinates transfer is marginally different from all other operations - // because it is *optional*, and not required. For instance, we may need - // coordinates only on level 4 if we start repartitioning from that level, - // but we don't need them on level 1,2,3. As our current Hierarchy setup - // assumes propagation of dependencies only through three levels, this - // means that we need to rely on other methods to propagate optional data. - // - // The method currently used is through RAP transfer factories, which are - // simply factories which are called at the end of RAP with a single goal: - // transfer some fine data to coarser level. Because these factories are - // kind of outside of the mainline factories, they behave different. In - // particular, we call their Build method explicitly, rather than through - // Get calls. This difference is significant, as the Get call is smart - // enough to know when to release all factory dependencies, and Build is - // dumb. This led to the following CoordinatesTransferFactory sequence: - // 1. Request level 0 - // 2. Request level 1 - // 3. Request level 0 - // 4. Release level 0 - // 5. Release level 1 - // - // The problem is missing "6. Release level 0". Because it was missing, - // we had outstanding request on "Coordinates", "Aggregates" and - // "CoarseMap" on level 0. - // - // This was fixed by explicitly calling Release on transfer factories in - // RAPFactory. I am still unsure how exactly it works, but now we have - // clear data requests for all levels. - coarseLevel.Release(*fac); + if (!Ac.is_null()) { + std::ostringstream oss; + oss << "A_" << coarseLevel.GetLevelID(); + Ac->setObjectLabel(oss.str()); } - } + Set(coarseLevel, "A", Ac); - } + if (!isGPU) { + APparams->set("graph", AP); + Set(coarseLevel, "AP reuse data", APparams); + } + if (!isGPU) { + RAPparams->set("graph", Ac); + Set(coarseLevel, "RAP reuse data", RAPparams); + } + } else { + RCP RAPparams = rcp(new ParameterList); + if (pL.isSublist("matrixmatrix: kernel params")) + RAPparams->sublist("matrixmatrix: kernel params") = + pL.sublist("matrixmatrix: kernel params"); + + if (coarseLevel.IsAvailable("RAP reuse data", this)) { + GetOStream(static_cast(Runtime0 | Test)) + << "Reusing previous RAP data" << std::endl; + + RAPparams = coarseLevel.Get>("RAP reuse data", this); + + if (RAPparams->isParameter("graph")) + Ac = RAPparams->get>("graph"); + + // Some eigenvalue may have been cached with the matrix in the previous + // run. As the matrix values will be updated, we need to reset the + // eigenvalue. + Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } + + // We *always* need global constants for the RAP, but not for the temps + RAPparams->set( + "compute global constants: temporaries", + RAPparams->get("compute global constants: temporaries", false)); + RAPparams->set("compute global constants", true); + + if (pL.get("transpose: use implicit") == true) { + + Ac = MatrixFactory::Build(P->getDomainMap(), Teuchos::as(0)); + + SubFactoryMonitor m2(*this, "MxMxM: R x A x P (implicit)", coarseLevel); + + Xpetra::TripleMatrixMultiply::MultiplyRAP( + *P, doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, + doFillComplete, doOptimizeStorage, + labelstr + std::string("MueLu::R*A*P-implicit-") + levelstr.str(), + RAPparams); + } else { + RCP R = Get>(coarseLevel, "R"); + Ac = MatrixFactory::Build(R->getRowMap(), Teuchos::as(0)); - template - void RAPFactory::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, - "MueLu::RAPFactory::AddTransferFactory: Transfer factory is not derived from TwoLevelFactoryBase. " - "This is very strange. (Note: you can remove this exception if there's a good reason for)"); - TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_, Exceptions::RuntimeError, "MueLu::RAPFactory::AddTransferFactory: Factory is being added after we have already declared input"); - transferFacts_.push_back(factory); + SubFactoryMonitor m2(*this, "MxMxM: R x A x P (explicit)", coarseLevel); + + Xpetra::TripleMatrixMultiply::MultiplyRAP( + *R, !doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, + doFillComplete, doOptimizeStorage, + labelstr + std::string("MueLu::R*A*P-explicit-") + levelstr.str(), + RAPparams); + } + + Teuchos::ArrayView relativeFloor = + pL.get>("rap: relative diagonal floor")(); + if (relativeFloor.size() > 0) { + Xpetra::MatrixUtils::RelativeDiagonalBoost( + Ac, relativeFloor, GetOStream(Statistics2)); + } + + bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || + pL.get("rap: fix zero diagonals"); + bool checkAc = pL.get("CheckMainDiagonal") || + pL.get("rap: fix zero diagonals"); + ; + if (checkAc || repairZeroDiagonals) { + using magnitudeType = + typename Teuchos::ScalarTraits::magnitudeType; + magnitudeType threshold; + if (pL.isType("rap: fix zero diagonals threshold")) + threshold = + pL.get("rap: fix zero diagonals threshold"); + else + threshold = Teuchos::as( + pL.get("rap: fix zero diagonals threshold")); + Scalar replacement = Teuchos::as( + pL.get("rap: fix zero diagonals replacement")); + Xpetra::MatrixUtils::CheckRepairMainDiagonal( + Ac, repairZeroDiagonals, GetOStream(Warnings1), threshold, + replacement); + } + + if (IsPrint(Statistics2)) { + RCP params = rcp(new ParameterList()); + ; + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); + GetOStream(Statistics2) + << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); + } + + if (!Ac.is_null()) { + std::ostringstream oss; + oss << "A_" << coarseLevel.GetLevelID(); + Ac->setObjectLabel(oss.str()); + } + Set(coarseLevel, "A", Ac); + + if (!isGPU) { + RAPparams->set("graph", Ac); + Set(coarseLevel, "RAP reuse data", RAPparams); + } + } } -} //namespace MueLu +#ifdef HAVE_MUELU_DEBUG + MatrixUtils::checkLocalRowMapMatchesColMap(*Ac); +#endif // HAVE_MUELU_DEBUG + + if (transferFacts_.begin() != transferFacts_.end()) { + SubFactoryMonitor m(*this, "Projections", coarseLevel); + + // call Build of all user-given transfer factories + for (std::vector>::const_iterator it = + transferFacts_.begin(); + it != transferFacts_.end(); ++it) { + RCP fac = *it; + GetOStream(Runtime0) << "RAPFactory: call transfer factory: " + << fac->description() << std::endl; + fac->CallBuild(coarseLevel); + // Coordinates transfer is marginally different from all other operations + // because it is *optional*, and not required. For instance, we may need + // coordinates only on level 4 if we start repartitioning from that level, + // but we don't need them on level 1,2,3. As our current Hierarchy setup + // assumes propagation of dependencies only through three levels, this + // means that we need to rely on other methods to propagate optional data. + // + // The method currently used is through RAP transfer factories, which are + // simply factories which are called at the end of RAP with a single goal: + // transfer some fine data to coarser level. Because these factories are + // kind of outside of the mainline factories, they behave different. In + // particular, we call their Build method explicitly, rather than through + // Get calls. This difference is significant, as the Get call is smart + // enough to know when to release all factory dependencies, and Build is + // dumb. This led to the following CoordinatesTransferFactory sequence: + // 1. Request level 0 + // 2. Request level 1 + // 3. Request level 0 + // 4. Release level 0 + // 5. Release level 1 + // + // The problem is missing "6. Release level 0". Because it was missing, + // we had outstanding request on "Coordinates", "Aggregates" and + // "CoarseMap" on level 0. + // + // This was fixed by explicitly calling Release on transfer factories in + // RAPFactory. I am still unsure how exactly it works, but now we have + // clear data requests for all levels. + coarseLevel.Release(*fac); + } + } +} + +template +void RAPFactory::AddTransferFactory( + const RCP &factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION( + Teuchos::rcp_dynamic_cast(factory) == + Teuchos::null, + Exceptions::BadCast, + "MueLu::RAPFactory::AddTransferFactory: Transfer factory is not derived " + "from TwoLevelFactoryBase. " + "This is very strange. (Note: you can remove this exception if there's a " + "good reason for)"); + TEUCHOS_TEST_FOR_EXCEPTION( + hasDeclaredInput_, Exceptions::RuntimeError, + "MueLu::RAPFactory::AddTransferFactory: Factory is being added after we " + "have already declared input"); + transferFacts_.push_back(factory); +} + +} // namespace MueLu #define MUELU_RAPFACTORY_SHORT #endif // MUELU_RAPFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp index 91cbf254d6fe..1d4c7cbc6369 100644 --- a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_decl.hpp @@ -48,10 +48,10 @@ #include -#include #include -#include +#include #include +#include #include "MueLu_ConfigDefs.hpp" #include "MueLu_RAPShiftFactory_fwd.hpp" @@ -62,90 +62,90 @@ #include "MueLu_TwoLevelFactoryBase.hpp" namespace MueLu { - /*! - @class RAPShiftFactory - @brief Factory for building coarse grid matrices, when the matrix - is of the form K+a*M. Useful when you want to change the shift - variable ("a") at every level. Each level must store the stiffness - matrix K and mass matrix M separately. - */ - template - class RAPShiftFactory : public TwoLevelFactoryBase { +/*! + @class RAPShiftFactory + @brief Factory for building coarse grid matrices, when the matrix + is of the form K+a*M. Useful when you want to change the shift + variable ("a") at every level. Each level must store the stiffness + matrix K and mass matrix M separately. +*/ +template +class RAPShiftFactory : public TwoLevelFactoryBase { #undef MUELU_RAPSHIFTFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ - - RAPShiftFactory(); +public: + //! @name Constructors/Destructors. + //@{ - virtual ~RAPShiftFactory() { } + RAPShiftFactory(); - //@} + virtual ~RAPShiftFactory() {} - //! @name Input - //@{ + //@} - RCP GetValidParameterList() const; + //! @name Input + //@{ - void DeclareInput(Level &fineLevel, Level &coarseLevel) const; + RCP GetValidParameterList() const; - //@} + void DeclareInput(Level &fineLevel, Level &coarseLevel) const; - //! @name Build methods. - //@{ - void Build(Level &fineLevel, Level &coarseLevel) const; - //@} + //@} - //! @name Handling of user-defined transfer factories - //@{ + //! @name Build methods. + //@{ + void Build(Level &fineLevel, Level &coarseLevel) const; + //@} - //! Indicate that the restriction operator action should be implicitly defined by the transpose of the prolongator. - void SetImplicitTranspose(bool const &implicit) { - implicitTranspose_ = implicit; - } + //! @name Handling of user-defined transfer factories + //@{ - void SetShifts(std::vector& shifts) { - shifts_.clear(); - shifts_ = shifts; - } + //! Indicate that the restriction operator action should be implicitly defined + //! by the transpose of the prolongator. + void SetImplicitTranspose(bool const &implicit) { + implicitTranspose_ = implicit; + } - //@} + void SetShifts(std::vector &shifts) { + shifts_.clear(); + shifts_ = shifts; + } - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories in RepartitionAcFactory. + //@} - Transfer factories are derived from TwoLevelFactoryBase and project some data from the fine level to - the next coarser level. - */ - void AddTransferFactory(const RCP& factory); + //@{ + /*! @brief Add transfer factory in the end of list of transfer factories in + RepartitionAcFactory. - // TODO add a function to remove a specific transfer factory? - - //! Returns number of transfer factories. - size_t NumTransferFactories() const { return transferFacts_.size(); } + Transfer factories are derived from TwoLevelFactoryBase and project some data + from the fine level to the next coarser level. + */ + void AddTransferFactory(const RCP &factory); - //@} + // TODO add a function to remove a specific transfer factory? - private: + //! Returns number of transfer factories. + size_t NumTransferFactories() const { return transferFacts_.size(); } - //! If true, the action of the restriction operator action is implicitly defined by the transpose of the prolongator. - bool implicitTranspose_; + //@} +private: + //! If true, the action of the restriction operator action is implicitly + //! defined by the transpose of the prolongator. + bool implicitTranspose_; - //! list of user-defined transfer Factories - std::vector > transferFacts_; + //! list of user-defined transfer Factories + std::vector> transferFacts_; - // vector of shifting terms - std::vector shifts_; + // vector of shifting terms + std::vector shifts_; - }; //class RAPShiftFactory +}; // class RAPShiftFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_RAPSHIFTFACTORY_SHORT #endif // MUELU_RAPSHIFTFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp index b3c1c0833bb4..b770446601d7 100644 --- a/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_RAPShiftFactory_def.hpp @@ -54,346 +54,431 @@ #include #include - -#include "MueLu_RAPShiftFactory_decl.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" #include "MueLu_PerfUtils.hpp" +#include "MueLu_RAPShiftFactory_decl.hpp" namespace MueLu { - /*********************************************************************************************************/ - template - RAPShiftFactory::RAPShiftFactory() - : implicitTranspose_(false) { } - - - /*********************************************************************************************************/ - template - RCP RAPShiftFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("transpose: use implicit"); - SET_VALID_ENTRY("rap: fix zero diagonals"); - SET_VALID_ENTRY("rap: shift"); - SET_VALID_ENTRY("rap: shift array"); - SET_VALID_ENTRY("rap: cfl array"); - SET_VALID_ENTRY("rap: shift diagonal M"); - SET_VALID_ENTRY("rap: shift low storage"); - SET_VALID_ENTRY("rap: relative diagonal floor"); -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process"); - validParamList->set< RCP >("M", Teuchos::null, "Generating factory of the matrix M used during the non-Galerkin RAP"); - validParamList->set< RCP >("Mdiag", Teuchos::null, "Generating factory of the matrix Mdiag used during the non-Galerkin RAP"); - validParamList->set< RCP >("K", Teuchos::null, "Generating factory of the matrix K used during the non-Galerkin RAP"); - validParamList->set< RCP >("P", Teuchos::null, "Prolongator factory"); - validParamList->set< RCP >("R", Teuchos::null, "Restrictor factory"); - - validParamList->set< bool > ("CheckMainDiagonal", false, "Check main diagonal for zeros"); - validParamList->set< bool > ("RepairMainDiagonal", false, "Repair zeros on main diagonal"); - - validParamList->set > ("deltaT", Teuchos::null, "user deltaT"); - validParamList->set > ("cfl", Teuchos::null, "user cfl"); - validParamList->set > ("cfl-based shift array", Teuchos::null, "MueLu-generated shift array for CFL-based shifting"); - - // Make sure we don't recursively validate options for the matrixmatrix kernels - ParameterList norecurse; - norecurse.disableRecursiveValidation(); - validParamList->set ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters"); - - return validParamList; +/*********************************************************************************************************/ +template +RAPShiftFactory::RAPShiftFactory() + : implicitTranspose_(false) {} + +/*********************************************************************************************************/ +template +RCP RAPShiftFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("transpose: use implicit"); + SET_VALID_ENTRY("rap: fix zero diagonals"); + SET_VALID_ENTRY("rap: shift"); + SET_VALID_ENTRY("rap: shift array"); + SET_VALID_ENTRY("rap: cfl array"); + SET_VALID_ENTRY("rap: shift diagonal M"); + SET_VALID_ENTRY("rap: shift low storage"); + SET_VALID_ENTRY("rap: relative diagonal floor"); +#undef SET_VALID_ENTRY + + validParamList->set>( + "A", Teuchos::null, + "Generating factory of the matrix A used during the prolongator " + "smoothing process"); + validParamList->set>( + "M", Teuchos::null, + "Generating factory of the matrix M used during the non-Galerkin RAP"); + validParamList->set>( + "Mdiag", Teuchos::null, + "Generating factory of the matrix Mdiag used during the non-Galerkin " + "RAP"); + validParamList->set>( + "K", Teuchos::null, + "Generating factory of the matrix K used during the non-Galerkin RAP"); + validParamList->set>("P", Teuchos::null, + "Prolongator factory"); + validParamList->set>("R", Teuchos::null, + "Restrictor factory"); + + validParamList->set("CheckMainDiagonal", false, + "Check main diagonal for zeros"); + validParamList->set("RepairMainDiagonal", false, + "Repair zeros on main diagonal"); + + validParamList->set>("deltaT", Teuchos::null, + "user deltaT"); + validParamList->set>("cfl", Teuchos::null, "user cfl"); + validParamList->set>( + "cfl-based shift array", Teuchos::null, + "MueLu-generated shift array for CFL-based shifting"); + + // Make sure we don't recursively validate options for the matrixmatrix + // kernels + ParameterList norecurse; + norecurse.disableRecursiveValidation(); + validParamList->set("matrixmatrix: kernel params", norecurse, + "MatrixMatrix kernel parameters"); + + return validParamList; +} + +/*********************************************************************************************************/ +template +void RAPShiftFactory::DeclareInput( + Level &fineLevel, Level &coarseLevel) const { + const Teuchos::ParameterList &pL = GetParameterList(); + + bool use_mdiag = false; + if (pL.isParameter("rap: shift diagonal M")) + use_mdiag = pL.get("rap: shift diagonal M"); + + // The low storage version requires mdiag + bool use_low_storage = false; + if (pL.isParameter("rap: shift low storage")) { + use_low_storage = pL.get("rap: shift low storage"); + use_mdiag = use_low_storage ? true : use_mdiag; + } + + if (implicitTranspose_ == false) { + Input(coarseLevel, "R"); + } + + if (!use_low_storage) + Input(fineLevel, "K"); + else + Input(fineLevel, "A"); + Input(coarseLevel, "P"); + + if (!use_mdiag) + Input(fineLevel, "M"); + else + Input(fineLevel, "Mdiag"); + + // CFL array stuff + if (pL.isParameter("rap: cfl array") && + pL.get>("rap: cfl array").size() > 0) { + if (fineLevel.GetLevelID() == 0) { + if (fineLevel.IsAvailable("deltaT", NoFactory::get())) { + fineLevel.DeclareInput("deltaT", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION( + fineLevel.IsAvailable("fine deltaT", NoFactory::get()), + Exceptions::RuntimeError, + "deltaT was not provided by the user on level0!"); + } + + if (fineLevel.IsAvailable("cfl", NoFactory::get())) { + fineLevel.DeclareInput("cfl", NoFactory::get(), this); + } else { + TEUCHOS_TEST_FOR_EXCEPTION( + fineLevel.IsAvailable("fine cfl", NoFactory::get()), + Exceptions::RuntimeError, + "cfl was not provided by the user on level0!"); + } + } else { + Input(fineLevel, "cfl-based shift array"); + } } - /*********************************************************************************************************/ - template - void RAPShiftFactory::DeclareInput(Level &fineLevel, Level &coarseLevel) const { - const Teuchos::ParameterList& pL = GetParameterList(); + // call DeclareInput of all user-given transfer factories + for (std::vector>::const_iterator it = + transferFacts_.begin(); + it != transferFacts_.end(); ++it) { + (*it)->CallDeclareInput(coarseLevel); + } +} + +template +void RAPShiftFactory::Build( + Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const + { + FactoryMonitor m(*this, "Computing Ac", coarseLevel); + const Teuchos::ParameterList &pL = GetParameterList(); - bool use_mdiag = false; - if(pL.isParameter("rap: shift diagonal M")) - use_mdiag = pL.get("rap: shift diagonal M"); + bool M_is_diagonal = false; + if (pL.isParameter("rap: shift diagonal M")) + M_is_diagonal = pL.get("rap: shift diagonal M"); // The low storage version requires mdiag bool use_low_storage = false; - if(pL.isParameter("rap: shift low storage")) { + if (pL.isParameter("rap: shift low storage")) { use_low_storage = pL.get("rap: shift low storage"); - use_mdiag = use_low_storage ? true : use_mdiag; + M_is_diagonal = use_low_storage ? true : M_is_diagonal; } - if (implicitTranspose_ == false) { - Input(coarseLevel, "R"); + Teuchos::ArrayView doubleShifts; + Teuchos::ArrayRCP myshifts; + if (pL.isParameter("rap: shift array") && + pL.get>("rap: shift array").size() > 0) { + // Do we have an array of shifts? If so, we set doubleShifts_ + doubleShifts = pL.get>("rap: shift array")(); } - - if(!use_low_storage) Input(fineLevel, "K"); - else Input(fineLevel, "A"); - Input(coarseLevel, "P"); - - if(!use_mdiag) Input(fineLevel, "M"); - else Input(fineLevel, "Mdiag"); - - // CFL array stuff - if(pL.isParameter("rap: cfl array") && pL.get >("rap: cfl array").size() > 0) { - if(fineLevel.GetLevelID() == 0) { - if(fineLevel.IsAvailable("deltaT", NoFactory::get())) { - fineLevel.DeclareInput("deltaT", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("fine deltaT", NoFactory::get()), - Exceptions::RuntimeError, - "deltaT was not provided by the user on level0!"); + if (pL.isParameter("rap: cfl array") && + pL.get>("rap: cfl array").size() > 0) { + // Do we have an array of CFLs? If so, we calculated the shifts from + // them. + Teuchos::ArrayView CFLs = + pL.get>("rap: cfl array")(); + if (fineLevel.GetLevelID() == 0) { + double dt = Get(fineLevel, "deltaT"); + double cfl = Get(fineLevel, "cfl"); + double ts_at_cfl1 = dt / cfl; + myshifts.resize(CFLs.size()); + Teuchos::Array myCFLs(CFLs.size()); + myCFLs[0] = cfl; + + // Never make the CFL bigger + for (int i = 1; i < (int)CFLs.size(); i++) + myCFLs[i] = (CFLs[i] > cfl) ? cfl : CFLs[i]; + + { + std::ostringstream ofs; + ofs << "RAPShiftFactory: CFL schedule = "; + for (int i = 0; i < (int)CFLs.size(); i++) + ofs << " " << myCFLs[i]; + GetOStream(Statistics0) << ofs.str() << std::endl; } - - if(fineLevel.IsAvailable("cfl", NoFactory::get())) { - fineLevel.DeclareInput("cfl", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("fine cfl", NoFactory::get()), - Exceptions::RuntimeError, - "cfl was not provided by the user on level0!"); - } - } - else { - Input(fineLevel,"cfl-based shift array"); + GetOStream(Statistics0) << "RAPShiftFactory: Timestep at CFL=1 is " + << ts_at_cfl1 << " " << std::endl; + + // The shift array needs to be 1/dt + for (int i = 0; i < (int)myshifts.size(); i++) + myshifts[i] = 1.0 / (ts_at_cfl1 * myCFLs[i]); + doubleShifts = myshifts(); + + { + std::ostringstream ofs; + ofs << "RAPShiftFactory: shift schedule = "; + for (int i = 0; i < (int)doubleShifts.size(); i++) + ofs << " " << doubleShifts[i]; + GetOStream(Statistics0) << ofs.str() << std::endl; + } + Set(coarseLevel, "cfl-based shift array", myshifts); + } else { + myshifts = + Get>(fineLevel, "cfl-based shift array"); + doubleShifts = myshifts(); + Set(coarseLevel, "cfl-based shift array", myshifts); + // NOTE: If we're not on level zero, then we should have a shift array } } - // call DeclareInput of all user-given transfer factories - for(std::vector >::const_iterator it = transferFacts_.begin(); it!=transferFacts_.end(); ++it) { - (*it)->CallDeclareInput(coarseLevel); - } - } + // Inputs: K, M, P + // Note: In the low-storage case we do not keep a separate "K", we just use + // A + RCP K; + RCP M; + RCP Mdiag; + + if (use_low_storage) + K = Get>(fineLevel, "A"); + else + K = Get>(fineLevel, "K"); + if (!M_is_diagonal) + M = Get>(fineLevel, "M"); + else + Mdiag = Get>(fineLevel, "Mdiag"); + + RCP P = Get>(coarseLevel, "P"); + + // Build Kc = RKP, Mc = RMP + RCP KP, MP; + + // Reuse pattern if available (multiple solve) + // FIXME: Old style reuse doesn't work any more + // if (IsAvailable(coarseLevel, "AP Pattern")) { + // KP = Get< RCP >(coarseLevel, "AP Pattern"); + // MP = Get< RCP >(coarseLevel, "AP Pattern"); + // } - template - void RAPShiftFactory::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const { - FactoryMonitor m(*this, "Computing Ac", coarseLevel); - const Teuchos::ParameterList& pL = GetParameterList(); - - bool M_is_diagonal = false; - if(pL.isParameter("rap: shift diagonal M")) - M_is_diagonal = pL.get("rap: shift diagonal M"); - - // The low storage version requires mdiag - bool use_low_storage = false; - if(pL.isParameter("rap: shift low storage")) { - use_low_storage = pL.get("rap: shift low storage"); - M_is_diagonal = use_low_storage ? true : M_is_diagonal; - } - - Teuchos::ArrayView doubleShifts; - Teuchos::ArrayRCP myshifts; - if(pL.isParameter("rap: shift array") && pL.get >("rap: shift array").size() > 0 ) { - // Do we have an array of shifts? If so, we set doubleShifts_ - doubleShifts = pL.get >("rap: shift array")(); - } - if(pL.isParameter("rap: cfl array") && pL.get >("rap: cfl array").size() > 0) { - // Do we have an array of CFLs? If so, we calculated the shifts from them. - Teuchos::ArrayView CFLs = pL.get >("rap: cfl array")(); - if(fineLevel.GetLevelID() == 0) { - double dt = Get(fineLevel,"deltaT"); - double cfl = Get(fineLevel,"cfl"); - double ts_at_cfl1 = dt / cfl; - myshifts.resize(CFLs.size()); - Teuchos::Array myCFLs(CFLs.size()); - myCFLs[0] = cfl; - - // Never make the CFL bigger - for(int i=1; i<(int)CFLs.size(); i++) - myCFLs[i] = (CFLs[i]> cfl) ? cfl : CFLs[i]; - - { - std::ostringstream ofs; - ofs<<"RAPShiftFactory: CFL schedule = "; - for(int i=0; i<(int)CFLs.size(); i++) - ofs<<" "< > (fineLevel,"cfl-based shift array"); - doubleShifts = myshifts(); - Set(coarseLevel,"cfl-based shift array",myshifts); - // NOTE: If we're not on level zero, then we should have a shift array - } - } - - // Inputs: K, M, P - // Note: In the low-storage case we do not keep a separate "K", we just use A - RCP K; - RCP M; - RCP Mdiag; - - if(use_low_storage) K = Get< RCP >(fineLevel, "A"); - else K = Get< RCP >(fineLevel, "K"); - if(!M_is_diagonal) M = Get< RCP >(fineLevel, "M"); - else Mdiag = Get< RCP >(fineLevel, "Mdiag"); - - RCP P = Get< RCP >(coarseLevel, "P"); - - // Build Kc = RKP, Mc = RMP - RCP KP, MP; - - // Reuse pattern if available (multiple solve) - // FIXME: Old style reuse doesn't work any more - // if (IsAvailable(coarseLevel, "AP Pattern")) { - // KP = Get< RCP >(coarseLevel, "AP Pattern"); - // MP = Get< RCP >(coarseLevel, "AP Pattern"); - // } - - { - SubFactoryMonitor subM(*this, "MxM: K x P", coarseLevel); - KP = Xpetra::MatrixMatrix::Multiply(*K, false, *P, false, KP, GetOStream(Statistics2)); - if(!M_is_diagonal) { - MP = Xpetra::MatrixMatrix::Multiply(*M, false, *P, false, MP, GetOStream(Statistics2)); - } - else { - MP = Xpetra::MatrixFactory2::BuildCopy(P); - MP->leftScale(*Mdiag); - } - - Set(coarseLevel, "AP Pattern", KP); + SubFactoryMonitor subM(*this, "MxM: K x P", coarseLevel); + KP = Xpetra::MatrixMatrix::Multiply(*K, false, *P, false, KP, + GetOStream(Statistics2)); + if (!M_is_diagonal) { + MP = Xpetra::MatrixMatrix::Multiply(*M, false, *P, false, MP, + GetOStream(Statistics2)); + } else { + MP = Xpetra::MatrixFactory2::BuildCopy(P); + MP->leftScale(*Mdiag); } - bool doOptimizedStorage = true; - - RCP Ac, Kc, Mc; - - // Reuse pattern if available (multiple solve) - // if (IsAvailable(coarseLevel, "RAP Pattern")) - // Ac = Get< RCP >(coarseLevel, "RAP Pattern"); + Set(coarseLevel, "AP Pattern", KP); + } - bool doFillComplete=true; - if (implicitTranspose_) { - SubFactoryMonitor m2(*this, "MxM: P' x (KP) (implicit)", coarseLevel); - Kc = Xpetra::MatrixMatrix::Multiply(*P, true, *KP, false, Kc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - Mc = Xpetra::MatrixMatrix::Multiply(*P, true, *MP, false, Mc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - } - else { - RCP R = Get< RCP >(coarseLevel, "R"); - SubFactoryMonitor m2(*this, "MxM: R x (KP) (explicit)", coarseLevel); - Kc = Xpetra::MatrixMatrix::Multiply(*R, false, *KP, false, Kc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - Mc = Xpetra::MatrixMatrix::Multiply(*R, false, *MP, false, Mc, GetOStream(Statistics2), doFillComplete, doOptimizedStorage); - } + bool doOptimizedStorage = true; + + RCP Ac, Kc, Mc; + + // Reuse pattern if available (multiple solve) + // if (IsAvailable(coarseLevel, "RAP Pattern")) + // Ac = Get< RCP >(coarseLevel, "RAP Pattern"); + + bool doFillComplete = true; + if (implicitTranspose_) { + SubFactoryMonitor m2(*this, "MxM: P' x (KP) (implicit)", coarseLevel); + Kc = Xpetra::MatrixMatrix::Multiply(*P, true, *KP, false, Kc, + GetOStream(Statistics2), + doFillComplete, + doOptimizedStorage); + Mc = Xpetra::MatrixMatrix::Multiply(*P, true, *MP, false, Mc, + GetOStream(Statistics2), + doFillComplete, + doOptimizedStorage); + } else { + RCP R = Get>(coarseLevel, "R"); + SubFactoryMonitor m2(*this, "MxM: R x (KP) (explicit)", coarseLevel); + Kc = Xpetra::MatrixMatrix::Multiply(*R, false, *KP, false, Kc, + GetOStream(Statistics2), + doFillComplete, + doOptimizedStorage); + Mc = Xpetra::MatrixMatrix::Multiply(*R, false, *MP, false, Mc, + GetOStream(Statistics2), + doFillComplete, + doOptimizedStorage); + } - // Get the shift - // FIXME - We should really get rid of the shifts array and drive this the same way everything else works - // If we're using the recursive "low storage" version, we need to shift by ( \prod_{i=1}^k shift[i] - \prod_{i=1}^{k-1} shift[i]) to - // get the recursive relationships correct - int level = coarseLevel.GetLevelID(); - Scalar shift = Teuchos::ScalarTraits::zero(); - if(!use_low_storage) { - // High Storage version - if(level < (int)shifts_.size()) shift = shifts_[level]; - else shift = Teuchos::as(pL.get("rap: shift")); - } - else { - // Low Storage Version - if(level < (int)shifts_.size()) { - if(level==1) shift = shifts_[level]; - else { - Scalar prod1 = Teuchos::ScalarTraits::one(); - for(int i=1; i < level-1; i++) { - prod1 *= shifts_[i]; - } - shift = (prod1 * shifts_[level] - prod1); - } - } - else if(doubleShifts.size() != 0) { - double d_shift = 0.0; - if(level < doubleShifts.size()) - d_shift = doubleShifts[level] - doubleShifts[level-1]; - - if(d_shift < 0.0) - GetOStream(Warnings1) << "WARNING: RAPShiftFactory has detected a negative shift... This implies a less stable coarse grid."<(d_shift); - } + // Get the shift + // FIXME - We should really get rid of the shifts array and drive this the + // same way everything else works If we're using the recursive "low storage" + // version, we need to shift by ( \prod_{i=1}^k shift[i] - \prod_{i=1}^{k-1} + // shift[i]) to get the recursive relationships correct + int level = coarseLevel.GetLevelID(); + Scalar shift = Teuchos::ScalarTraits::zero(); + if (!use_low_storage) { + // High Storage version + if (level < (int)shifts_.size()) + shift = shifts_[level]; + else + shift = Teuchos::as(pL.get("rap: shift")); + } else { + // Low Storage Version + if (level < (int)shifts_.size()) { + if (level == 1) + shift = shifts_[level]; else { - double base_shift = pL.get("rap: shift"); - if(level == 1) shift = Teuchos::as(base_shift); - else shift = Teuchos::as(pow(base_shift,level) - pow(base_shift,level-1)); + Scalar prod1 = Teuchos::ScalarTraits::one(); + for (int i = 1; i < level - 1; i++) { + prod1 *= shifts_[i]; + } + shift = (prod1 * shifts_[level] - prod1); } + } else if (doubleShifts.size() != 0) { + double d_shift = 0.0; + if (level < doubleShifts.size()) + d_shift = doubleShifts[level] - doubleShifts[level - 1]; + + if (d_shift < 0.0) + GetOStream(Warnings1) + << "WARNING: RAPShiftFactory has detected a negative shift... " + "This implies a less stable coarse grid." + << std::endl; + shift = Teuchos::as(d_shift); + } else { + double base_shift = pL.get("rap: shift"); + if (level == 1) + shift = Teuchos::as(base_shift); + else + shift = Teuchos::as(pow(base_shift, level) - + pow(base_shift, level - 1)); } - GetOStream(Runtime0) << "RAPShiftFactory: Using shift " << shift << std::endl; - - - // recombine to get K+shift*M - { - SubFactoryMonitor m2(*this, "Add: RKP + s*RMP", coarseLevel); - Xpetra::MatrixMatrix::TwoMatrixAdd(*Kc, false, Teuchos::ScalarTraits::one(), *Mc, false, shift, Ac, GetOStream(Statistics2)); - Ac->fillComplete(); - } - - Teuchos::ArrayView relativeFloor = pL.get >("rap: relative diagonal floor")(); - if(relativeFloor.size() > 0) - Xpetra::MatrixUtils::RelativeDiagonalBoost(Ac, relativeFloor,GetOStream(Statistics2)); - - - bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || pL.get("rap: fix zero diagonals"); - bool checkAc = pL.get("CheckMainDiagonal")|| pL.get("rap: fix zero diagonals"); ; - if (checkAc || repairZeroDiagonals) - Xpetra::MatrixUtils::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, GetOStream(Warnings1)); - - RCP params = rcp(new ParameterList());; - params->set("printLoadBalancingInfo", true); - GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); - - Set(coarseLevel, "A", Ac); - // We only need K in the 'high storage' mode - if(!use_low_storage) - Set(coarseLevel, "K", Kc); - - if(!M_is_diagonal) { - Set(coarseLevel, "M", Mc); - } - else { - // If M is diagonal, then we only pass that part down the hierarchy - // NOTE: Should we be doing some kind of rowsum instead? - RCP Mcv = Xpetra::VectorFactory::Build(Mc->getRowMap(),false); - Mc->getLocalDiagCopy(*Mcv); - Set(coarseLevel, "Mdiag", Mcv); - } + } + GetOStream(Runtime0) << "RAPShiftFactory: Using shift " << shift + << std::endl; - // Set(coarseLevel, "RAP Pattern", Ac); + // recombine to get K+shift*M + { + SubFactoryMonitor m2(*this, "Add: RKP + s*RMP", coarseLevel); + Xpetra::MatrixMatrix:: + TwoMatrixAdd(*Kc, false, Teuchos::ScalarTraits::one(), *Mc, + false, shift, Ac, GetOStream(Statistics2)); + Ac->fillComplete(); } - if (transferFacts_.begin() != transferFacts_.end()) { - SubFactoryMonitor m(*this, "Projections", coarseLevel); - - // call Build of all user-given transfer factories - for (std::vector >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { - RCP fac = *it; - GetOStream(Runtime0) << "RAPShiftFactory: call transfer factory: " << fac->description() << std::endl; - fac->CallBuild(coarseLevel); - // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid - // of dangling data for CoordinatesTransferFactory - coarseLevel.Release(*fac); - } + Teuchos::ArrayView relativeFloor = + pL.get>("rap: relative diagonal floor")(); + if (relativeFloor.size() > 0) + Xpetra::MatrixUtils::RelativeDiagonalBoost( + Ac, relativeFloor, GetOStream(Statistics2)); + + bool repairZeroDiagonals = pL.get("RepairMainDiagonal") || + pL.get("rap: fix zero diagonals"); + bool checkAc = pL.get("CheckMainDiagonal") || + pL.get("rap: fix zero diagonals"); + ; + if (checkAc || repairZeroDiagonals) + Xpetra::MatrixUtils::CheckRepairMainDiagonal( + Ac, repairZeroDiagonals, GetOStream(Warnings1)); + + RCP params = rcp(new ParameterList()); + ; + params->set("printLoadBalancingInfo", true); + GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); + + Set(coarseLevel, "A", Ac); + // We only need K in the 'high storage' mode + if (!use_low_storage) + Set(coarseLevel, "K", Kc); + + if (!M_is_diagonal) { + Set(coarseLevel, "M", Mc); + } else { + // If M is diagonal, then we only pass that part down the hierarchy + // NOTE: Should we be doing some kind of rowsum instead? + RCP Mcv = + Xpetra::VectorFactory::Build(Mc->getRowMap(), false); + Mc->getLocalDiagCopy(*Mcv); + Set(coarseLevel, "Mdiag", Mcv); } - } - template - void RAPShiftFactory::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "MueLu::RAPShiftFactory::AddTransferFactory: Transfer factory is not derived from TwoLevelFactoryBase. This is very strange. (Note: you can remove this exception if there's a good reason for)"); - transferFacts_.push_back(factory); + // Set(coarseLevel, "RAP Pattern", Ac); } -} //namespace MueLu + if (transferFacts_.begin() != transferFacts_.end()) { + SubFactoryMonitor m(*this, "Projections", coarseLevel); + + // call Build of all user-given transfer factories + for (std::vector>::const_iterator it = + transferFacts_.begin(); + it != transferFacts_.end(); ++it) { + RCP fac = *it; + GetOStream(Runtime0) << "RAPShiftFactory: call transfer factory: " + << fac->description() << std::endl; + fac->CallBuild(coarseLevel); + // AP (11/11/13): I am not sure exactly why we need to call Release, but + // we do need it to get rid of dangling data for + // CoordinatesTransferFactory + coarseLevel.Release(*fac); + } + } +} + +template +void RAPShiftFactory:: + AddTransferFactory(const RCP &factory) { + // check if it's a TwoLevelFactoryBase based transfer factory + TEUCHOS_TEST_FOR_EXCEPTION( + Teuchos::rcp_dynamic_cast(factory) == + Teuchos::null, + Exceptions::BadCast, + "MueLu::RAPShiftFactory::AddTransferFactory: Transfer factory is not " + "derived from TwoLevelFactoryBase. This is very strange. (Note: you can " + "remove this exception if there's a good reason for)"); + transferFacts_.push_back(factory); +} + +} // namespace MueLu #define MUELU_RAPSHIFTFACTORY_SHORT #endif // MUELU_RAPSHIFTFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp index e365ab23c75c..f30cb00bac94 100644 --- a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_decl.hpp @@ -60,85 +60,89 @@ #include #include - #include "MueLu_FactoryBase_fwd.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" - namespace MueLu { - /*! - @class SchurComplementFactory class. - @brief Factory for building the Schur Complement for a 2x2 block matrix. +/*! + @class SchurComplementFactory class. + @brief Factory for building the Schur Complement for a 2x2 block matrix. - ## Context, assumptions, and use cases ## + ## Context, assumptions, and use cases ## - This factory is intended to be used for building the schur complement for respective smoothers. The calculation is - for now restricted to 2x2 block matrices, where the schur complement is calculated on block A_11. + This factory is intended to be used for building the schur complement for + respective smoothers. The calculation is for now restricted to 2x2 block + matrices, where the schur complement is calculated on block A_11. - For a blocked matrix \A = [A_00 A_01; A_10 A_11] it computes the Schur complement S = A_11 - 1/\omega A_10 Ainv A_01, - where \omega is some scaling factor and \Ainv an approximation of A_00^{-1} (from InverseApproximationFactory). + For a blocked matrix \A = [A_00 A_01; A_10 A_11] it computes the Schur + complement S = A_11 - 1/\omega A_10 Ainv A_01, where \omega is some scaling + factor and \Ainv an approximation of A_00^{-1} (from + InverseApproximationFactory). - ## Input/output of this factory ## + ## Input/output of this factory ## - ### User parameters of InterfaceAggregationFactory ### - Parameter | type | default | master.xml | validated | requested | description - ----------|------|---------|:----------:|:---------:|:---------:|------------ - A | Factory | null | | * | * | Generating factory of the matrix A - Ainv | Factory | null | | * | * | Generating factory of the approximate inverse of A (produced by the InverseApproximationFactory) - omega | double | 1.0 | | * | * | Scaling factor + ### User parameters of InterfaceAggregationFactory ### + Parameter | type | default | master.xml | validated | requested | description + ----------|------|---------|:----------:|:---------:|:---------:|------------ + A | Factory | null | | * | * | Generating factory of the matrix A + Ainv | Factory | null | | * | * | Generating factory of the approximate + inverse of A (produced by the InverseApproximationFactory) omega | double + | 1.0 | | * | * | Scaling factor - The * in the master.xml column denotes that the parameter is defined in the master.xml file. - The * in the validated column means that the parameter is declared in the list of valid input parameters (see GetValidParameters() ). - The * in the requested column states that the data is requested as input with all dependencies (see DeclareInput() ). + The * in the master.xml column denotes that the parameter is defined in the + master.xml file. The * in the validated column means that the parameter is + declared in the list of valid input parameters (see GetValidParameters() ). + The * in the requested column states that the data is requested as input with + all dependencies (see DeclareInput() ). - ### Variables provided by this factory ### + ### Variables provided by this factory ### - After SchurComplementFactory::Build the following data is available (if requested) + After SchurComplementFactory::Build the following data is available (if + requested) - Parameter | generated by | description - ----------|--------------|------------ - | A | SchurComplementFactory | The schur complement of the given block matrix. - */ + Parameter | generated by | description + ----------|--------------|------------ + | A | SchurComplementFactory | The schur complement of the given block matrix. +*/ - template - class SchurComplementFactory : public SingleLevelFactoryBase { +template +class SchurComplementFactory : public SingleLevelFactoryBase { #undef MUELU_SCHURCOMPLEMENTFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" - - public: - //! @name Constructors/Destructors. - //@{ +#include "MueLu_UseShortNames.hpp" - //! Constructor. - SchurComplementFactory() = default; +public: + //! @name Constructors/Destructors. + //@{ - //! Input - //@{ + //! Constructor. + SchurComplementFactory() = default; - void DeclareInput(Level& currentLevel) const; + //! Input + //@{ - RCP GetValidParameterList() const; + void DeclareInput(Level ¤tLevel) const; - //@} + RCP GetValidParameterList() const; - //@{ - //! @name Build methods. + //@} - //! Build an object with this factory. - void Build(Level& currentLevel) const; + //@{ + //! @name Build methods. - //@} + //! Build an object with this factory. + void Build(Level ¤tLevel) const; + //@} - private: - //! Schur complement calculation method. - RCP ComputeSchurComplement(RCP& bA, RCP& Ainv) const; +private: + //! Schur complement calculation method. + RCP ComputeSchurComplement(RCP &bA, + RCP &Ainv) const; - }; // class SchurComplementFactory +}; // class SchurComplementFactory } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp index 66cbd74fa792..e6974ea91c54 100644 --- a/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_SchurComplementFactory_def.hpp @@ -47,14 +47,14 @@ #define MUELU_SCHURCOMPLEMENTFACTORY_DEF_HPP_ #include -#include -#include -#include +#include +#include #include +#include #include +#include #include -#include -#include +#include #include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" @@ -62,152 +62,198 @@ namespace MueLu { - template - RCP SchurComplementFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - const SC one = Teuchos::ScalarTraits::one(); - - validParamList->set >("A" , NoFactory::getRCP(), "Generating factory of the matrix A used for building Schur complement (must be a 2x2 blocked operator)"); - validParamList->set >("Ainv" , Teuchos::null, "Generating factory of the inverse matrix used in the Schur complement"); - - validParamList->set ("omega", one, "Scaling parameter in S = A(1,1) - 1/omega A(1,0) Ainv A(0,1)"); - - return validParamList; - } - - template - void SchurComplementFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - - // Get default or user-given inverse approximation factory - RCP AinvFact = GetFactory("Ainv"); - currentLevel.DeclareInput("Ainv", AinvFact.get(), this); - } - - template - void SchurComplementFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); - - RCP A = Get >(currentLevel, "A"); - RCP bA = rcp_dynamic_cast(A); - - TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null(), Exceptions::BadCast, - "MueLu::SchurComplementFactory::Build: input matrix A is not of type BlockedCrsMatrix!"); - TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != 2 || bA->Cols() != 2, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: input matrix A is a " << bA->Rows() << "x" << bA->Cols() << " block matrix. We expect a 2x2 blocked operator."); - - // Calculate Schur Complement - RCP Ainv = currentLevel.Get >("Ainv", this->GetFactory("Ainv").get()); - RCP S = ComputeSchurComplement(bA, Ainv); - - GetOStream(Statistics1) << "S has " << S->getGlobalNumRows() << "x" << S->getGlobalNumCols() << " rows and columns." << std::endl; - - // NOTE: "A" generated by this factory is actually the Schur complement - // matrix, but it is required as all smoothers expect "A" - Set(currentLevel, "A", S); - } - - template - RCP> - SchurComplementFactory::ComputeSchurComplement(RCP& bA, RCP& Ainv) const { - - using STS = Teuchos::ScalarTraits; - const SC zero = STS::zero(), one = STS::one(); - - RCP A01 = bA->getMatrix(0,1); - RCP A10 = bA->getMatrix(1,0); - RCP A11 = bA->getMatrix(1,1); - - RCP bA01 = Teuchos::rcp_dynamic_cast(A01); - const bool isBlocked = (bA01 == Teuchos::null ? false : true); - - const ParameterList& pL = GetParameterList(); - const SC omega = pL.get("omega"); - - TEUCHOS_TEST_FOR_EXCEPTION(omega == zero, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Scaling parameter omega must not be zero to avoid division by zero."); - - RCP S = Teuchos::null; // Schur complement - RCP D = Teuchos::null; // temporary result for A10*Ainv*A01 - - // only if the off-diagonal blocks A10 and A01 are non-zero we have to do the MM multiplication - if(A01.is_null() == false && A10.is_null() == false) { - // scale with -1/omega - Ainv->scale(Teuchos::as(-one/omega)); - - // build Schur complement operator - if (!isBlocked) { - RCP myparams = rcp(new ParameterList); - myparams->set("compute global constants", true); - - // -1/omega*Ainv*A01 - TEUCHOS_TEST_FOR_EXCEPTION(A01->getRangeMap()->isSameAs(*(Ainv->getDomainMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: RangeMap of A01 and domain map of Ainv are not the same."); - RCP C = MatrixMatrix::Multiply(*Ainv, false, *A01, false, GetOStream(Statistics2), true, true, std::string("SchurComplementFactory"), myparams); - - // -1/omega*A10*Ainv*A01 - TEUCHOS_TEST_FOR_EXCEPTION(A01->getRangeMap()->isSameAs(*(A10->getDomainMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: RangeMap of A10 and domain map A01 are not the same."); - D = MatrixMatrix::Multiply(*A10, false, *C, false, GetOStream(Statistics2), true, true, std::string("SchurComplementFactory"), myparams); - } - else { - // nested blocking - auto bA10 = Teuchos::rcp_dynamic_cast(A10); - auto bAinv = Teuchos::rcp_dynamic_cast(Ainv); - TEUCHOS_TEST_FOR_EXCEPTION(bAinv == Teuchos::null, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Casting Ainv to BlockedCrsMatrix not possible."); - - // -1/omega*bAinv*bA01 - TEUCHOS_TEST_FOR_EXCEPTION(bA01->Rows() != bAinv->Cols(), Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Block rows and cols of bA01 and bAinv are not compatible."); - RCP C = MatrixMatrix::TwoMatrixMultiplyBlock(*bAinv, false, *bA01, false, GetOStream(Statistics2)); - - // -1/omega*A10*Ainv*A01 - TEUCHOS_TEST_FOR_EXCEPTION(bA10->Rows() != bA01->Cols(), Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: Block rows and cols of bA10 and bA01 are not compatible."); - D = MatrixMatrix::TwoMatrixMultiplyBlock(*bA10, false, *C, false, GetOStream(Statistics2)); - } - if (!A11.is_null()) { - MatrixMatrix::TwoMatrixAdd(*A11, false, one, *D, false, one, S, GetOStream(Statistics2)); - S->fillComplete(); - - TEUCHOS_TEST_FOR_EXCEPTION(A11->getRangeMap()->isSameAs(*(S->getRangeMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: RangeMap of A11 and S are not the same."); - TEUCHOS_TEST_FOR_EXCEPTION(A11->getDomainMap()->isSameAs(*(S->getDomainMap())) == false, Exceptions::RuntimeError, - "MueLu::SchurComplementFactory::Build: DomainMap of A11 and S are not the same."); - } - else { - S = MatrixFactory::BuildCopy(D); - } +template +RCP +SchurComplementFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + const SC one = Teuchos::ScalarTraits::one(); + + validParamList->set>( + "A", NoFactory::getRCP(), + "Generating factory of the matrix A used for building Schur complement " + "(must be a 2x2 blocked operator)"); + validParamList->set>( + "Ainv", Teuchos::null, + "Generating factory of the inverse matrix used in the Schur complement"); + + validParamList->set( + "omega", one, + "Scaling parameter in S = A(1,1) - 1/omega A(1,0) Ainv A(0,1)"); + + return validParamList; +} + +template +void SchurComplementFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); + + // Get default or user-given inverse approximation factory + RCP AinvFact = GetFactory("Ainv"); + currentLevel.DeclareInput("Ainv", AinvFact.get(), this); +} + +template +void SchurComplementFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + RCP A = Get>(currentLevel, "A"); + RCP bA = rcp_dynamic_cast(A); + + TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null(), Exceptions::BadCast, + "MueLu::SchurComplementFactory::Build: input " + "matrix A is not of type BlockedCrsMatrix!"); + TEUCHOS_TEST_FOR_EXCEPTION( + bA->Rows() != 2 || bA->Cols() != 2, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: input matrix A is a " + << bA->Rows() << "x" << bA->Cols() + << " block matrix. We expect a 2x2 blocked operator."); + + // Calculate Schur Complement + RCP Ainv = + currentLevel.Get>("Ainv", this->GetFactory("Ainv").get()); + RCP S = ComputeSchurComplement(bA, Ainv); + + GetOStream(Statistics1) << "S has " << S->getGlobalNumRows() << "x" + << S->getGlobalNumCols() << " rows and columns." + << std::endl; + + // NOTE: "A" generated by this factory is actually the Schur complement + // matrix, but it is required as all smoothers expect "A" + Set(currentLevel, "A", S); +} + +template +RCP> +SchurComplementFactory::ComputeSchurComplement(RCP &bA, + RCP &Ainv) const { + + using STS = Teuchos::ScalarTraits; + const SC zero = STS::zero(), one = STS::one(); + + RCP A01 = bA->getMatrix(0, 1); + RCP A10 = bA->getMatrix(1, 0); + RCP A11 = bA->getMatrix(1, 1); + + RCP bA01 = Teuchos::rcp_dynamic_cast(A01); + const bool isBlocked = (bA01 == Teuchos::null ? false : true); + + const ParameterList &pL = GetParameterList(); + const SC omega = pL.get("omega"); + + TEUCHOS_TEST_FOR_EXCEPTION( + omega == zero, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Scaling parameter omega must not " + "be zero to avoid division by zero."); + + RCP S = Teuchos::null; // Schur complement + RCP D = Teuchos::null; // temporary result for A10*Ainv*A01 + + // only if the off-diagonal blocks A10 and A01 are non-zero we have to do the + // MM multiplication + if (A01.is_null() == false && A10.is_null() == false) { + // scale with -1/omega + Ainv->scale(Teuchos::as(-one / omega)); + + // build Schur complement operator + if (!isBlocked) { + RCP myparams = rcp(new ParameterList); + myparams->set("compute global constants", true); + + // -1/omega*Ainv*A01 + TEUCHOS_TEST_FOR_EXCEPTION( + A01->getRangeMap()->isSameAs(*(Ainv->getDomainMap())) == false, + Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: RangeMap of A01 and domain " + "map of Ainv are not the same."); + RCP C = MatrixMatrix::Multiply( + *Ainv, false, *A01, false, GetOStream(Statistics2), true, true, + std::string("SchurComplementFactory"), myparams); + + // -1/omega*A10*Ainv*A01 + TEUCHOS_TEST_FOR_EXCEPTION( + A01->getRangeMap()->isSameAs(*(A10->getDomainMap())) == false, + Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: RangeMap of A10 and domain " + "map A01 are not the same."); + D = MatrixMatrix::Multiply( + *A10, false, *C, false, GetOStream(Statistics2), true, true, + std::string("SchurComplementFactory"), myparams); + } else { + // nested blocking + auto bA10 = Teuchos::rcp_dynamic_cast(A10); + auto bAinv = Teuchos::rcp_dynamic_cast(Ainv); + TEUCHOS_TEST_FOR_EXCEPTION( + bAinv == Teuchos::null, Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Casting Ainv to " + "BlockedCrsMatrix not possible."); + + // -1/omega*bAinv*bA01 + TEUCHOS_TEST_FOR_EXCEPTION( + bA01->Rows() != bAinv->Cols(), Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Block rows and cols of bA01 " + "and bAinv are not compatible."); + RCP C = MatrixMatrix::TwoMatrixMultiplyBlock( + *bAinv, false, *bA01, false, GetOStream(Statistics2)); + + // -1/omega*A10*Ainv*A01 + TEUCHOS_TEST_FOR_EXCEPTION( + bA10->Rows() != bA01->Cols(), Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: Block rows and cols of bA10 " + "and bA01 are not compatible."); + D = MatrixMatrix::TwoMatrixMultiplyBlock(*bA10, false, *C, false, + GetOStream(Statistics2)); } - else { - if (!A11.is_null()) { - S = MatrixFactory::BuildCopy(A11); - } else { - S = MatrixFactory::Build(A11->getRowMap(), 10 /*A11->getLocalMaxNumRowEntries()*/); - S->fillComplete(A11->getDomainMap(),A11->getRangeMap()); - } + if (!A11.is_null()) { + MatrixMatrix::TwoMatrixAdd(*A11, false, one, *D, false, one, S, + GetOStream(Statistics2)); + S->fillComplete(); + + TEUCHOS_TEST_FOR_EXCEPTION( + A11->getRangeMap()->isSameAs(*(S->getRangeMap())) == false, + Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: RangeMap of A11 and S are not " + "the same."); + TEUCHOS_TEST_FOR_EXCEPTION( + A11->getDomainMap()->isSameAs(*(S->getDomainMap())) == false, + Exceptions::RuntimeError, + "MueLu::SchurComplementFactory::Build: DomainMap of A11 and S are " + "not the same."); + } else { + S = MatrixFactory::BuildCopy(D); } - - // Check whether Schur complement operator is a 1x1 block matrix. - // If so, unwrap it and return the CrsMatrix based Matrix object - // We need this, as single-block smoothers expect it this way. - // In case of Thyra GIDs we obtain a Schur complement operator in Thyra GIDs - // This may make some special handling in feeding the SchurComplement solver Apply routine - // necessary! - if (isBlocked) { - RCP bS = Teuchos::rcp_dynamic_cast(S); - - if (bS != Teuchos::null && bS->Rows() == 1 && bS->Cols() == 1) { - RCP temp = bS->getCrsMatrix(); - S.swap(temp); - } + } else { + if (!A11.is_null()) { + S = MatrixFactory::BuildCopy(A11); + } else { + S = MatrixFactory::Build(A11->getRowMap(), + 10 /*A11->getLocalMaxNumRowEntries()*/); + S->fillComplete(A11->getDomainMap(), A11->getRangeMap()); } + } - return S; + // Check whether Schur complement operator is a 1x1 block matrix. + // If so, unwrap it and return the CrsMatrix based Matrix object + // We need this, as single-block smoothers expect it this way. + // In case of Thyra GIDs we obtain a Schur complement operator in Thyra GIDs + // This may make some special handling in feeding the SchurComplement solver + // Apply routine necessary! + if (isBlocked) { + RCP bS = Teuchos::rcp_dynamic_cast(S); + + if (bS != Teuchos::null && bS->Rows() == 1 && bS->Cols() == 1) { + RCP temp = bS->getCrsMatrix(); + S.swap(temp); + } } + return S; +} + } // namespace MueLu #endif /* MUELU_SCHURCOMPLEMENTFACTORY_DEF_HPP_ */ diff --git a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp index 698be7f781aa..96755c9a3ad0 100644 --- a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp @@ -55,62 +55,62 @@ namespace MueLu { - /*! - @class SegregatedAFactory class. - @brief Factory for building a new "segregated" A operator. Here, "segregated" means that the user - provides a map (containing a subset of the row gids of the input matrix A) and the factory - drops the off-diagonal entries (a,b) and (b,a) in A where "a" denotes a GID entry in the provided map - and "b" denotes a GID that is not contained in the provided map. - - The idea is to use the output matrix A as input for the aggregation factory to have control over - the aggregates and make sure that aggregates do not cross certain areas. - - Note: we have to drop the entries (i.e. not just set them to zero) as the CoalesceDropFactory - does not distinguish between matrix entries which are zero and nonzero. - */ - - template - class SegregatedAFactory : public SingleLevelFactoryBase { +/*! + @class SegregatedAFactory class. + @brief Factory for building a new "segregated" A operator. Here, "segregated" + means that the user provides a map (containing a subset of the row gids of the + input matrix A) and the factory drops the off-diagonal entries (a,b) and (b,a) + in A where "a" denotes a GID entry in the provided map and "b" denotes a GID + that is not contained in the provided map. + + The idea is to use the output matrix A as input for the aggregation + factory to have control over the aggregates and make sure that aggregates do + not cross certain areas. + + Note: we have to drop the entries (i.e. not just set them to zero) as + the CoalesceDropFactory does not distinguish between matrix entries which are + zero and nonzero. +*/ + +template +class SegregatedAFactory : public SingleLevelFactoryBase { #undef MUELU_SEGREGATEDAFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! Constructor. - SegregatedAFactory() = default; +public: + //! Constructor. + SegregatedAFactory() = default; - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level ¤tLevel) const; - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. - - Builds filtered matrix and returns it in currentLevel. - */ - void Build(Level& currentLevel) const; + /*! + @brief Build method. - //@} + Builds filtered matrix and returns it in currentLevel. + */ + void Build(Level ¤tLevel) const; - private: + //@} - //! Generating factory of input variable - mutable RCP mapFact_; +private: + //! Generating factory of input variable + mutable RCP mapFact_; - }; //class SegregatedAFactory +}; // class SegregatedAFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_SEGREGATEDAFACTORY_SHORT #endif // MUELU_SEGREGATEDAFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp index c2993c84253b..192c75ec0503 100644 --- a/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_SegregatedAFactory_def.hpp @@ -57,112 +57,135 @@ namespace MueLu { - template - RCP SegregatedAFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) -#undef SET_VALID_ENTRY - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A used for filtering"); - - validParamList->set< std::string > ("map: name", "", "Name of map (Xpetra::Map) provided by user containing the special DOFs."); - validParamList->set< std::string > ("map: factory", "", "Name of generating factory for 'map: name'"); - - return validParamList; +template +RCP +SegregatedAFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) \ + validParamList->setEntry(name, MasterList::getEntry(name)) +#undef SET_VALID_ENTRY + + validParamList->set>( + "A", Teuchos::null, + "Generating factory of the matrix A used for filtering"); + + validParamList->set("map: name", "", + "Name of map (Xpetra::Map) provided by user " + "containing the special DOFs."); + validParamList->set( + "map: factory", "", "Name of generating factory for 'map: name'"); + + return validParamList; +} + +template +void SegregatedAFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, "A"); + + const ParameterList &pL = GetParameterList(); + std::string mapName = pL.get("map: name"); + std::string mapFactName = pL.get("map: factory"); + + if (currentLevel.GetLevelID() == 0) { + // Not needed, if the map is provided as user data + currentLevel.DeclareInput(mapName, NoFactory::get(), this); + } else { + // check whether user has provided a specific name for the MapFactory + if (mapFactName == "" || mapFactName == "NoFactory") + mapFact_ = MueLu::NoFactory::getRCP(); + else if (mapFactName != "null") + mapFact_ = currentLevel.GetFactoryManager()->GetFactory(mapFactName); + + // request map generated by mapFact_ + currentLevel.DeclareInput(mapName, mapFact_.get(), this); } - - template - void SegregatedAFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - - const ParameterList& pL = GetParameterList(); - std::string mapName = pL.get ("map: name"); - std::string mapFactName = pL.get ("map: factory"); - - if (currentLevel.GetLevelID() == 0) - { - // Not needed, if the map is provided as user data - currentLevel.DeclareInput(mapName, NoFactory::get(), this); - } - else - { - // check whether user has provided a specific name for the MapFactory - if (mapFactName == "" || mapFactName == "NoFactory") - mapFact_ = MueLu::NoFactory::getRCP(); - else if (mapFactName != "null") - mapFact_ = currentLevel.GetFactoryManager()->GetFactory(mapFactName); - - // request map generated by mapFact_ - currentLevel.DeclareInput(mapName, mapFact_.get(), this); - } +} + +template +void SegregatedAFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "Matrix filtering (segregation)", currentLevel); + + RCP Ain = Get>(currentLevel, "A"); + + const ParameterList &pL = GetParameterList(); + std::string mapName = pL.get("map: name"); + std::string mapFact = pL.get("map: factory"); + + // fetch map from level + RCP map = Teuchos::null; + if (currentLevel.GetLevelID() == 0) { + map = currentLevel.Get>(mapName, NoFactory::get()); + GetOStream(Statistics0) + << "User provided map \"" << mapName + << "\": length dimension=" << map->getGlobalNumElements() << std::endl; + } else { + if (currentLevel.IsAvailable(mapName, mapFact_.get()) == false) + GetOStream(Runtime0) << "User provided map \"" << mapName + << "\" not found in Level class on level " + << currentLevel.GetLevelID() << "." << std::endl; + map = currentLevel.Get>(mapName, mapFact_.get()); } - template - void SegregatedAFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Matrix filtering (segregation)", currentLevel); - - RCP Ain = Get< RCP >(currentLevel, "A"); - - const ParameterList& pL = GetParameterList(); - std::string mapName = pL.get ("map: name"); - std::string mapFact = pL.get ("map: factory"); - - // fetch map from level - RCP map = Teuchos::null; - if (currentLevel.GetLevelID() == 0) { - map = currentLevel.Get>(mapName, NoFactory::get()); - GetOStream(Statistics0) << "User provided map \"" << mapName << "\": length dimension=" << map->getGlobalNumElements() << std::endl; - } else { - if (currentLevel.IsAvailable(mapName, mapFact_.get()) == false) - GetOStream(Runtime0) << "User provided map \"" << mapName << "\" not found in Level class on level " << currentLevel.GetLevelID() << "." << std::endl; - map = currentLevel.Get>(mapName, mapFact_.get()); - } - - // create new empty Operator - Teuchos::RCP Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); - - size_t numLocalRows = Ain->getLocalNumRows(); - for(size_t row=0; rowgetRowMap()->getGlobalElement(row); // global row id - bool isInMap = map->isNodeGlobalElement(grid); - - // extract row information from input matrix - Teuchos::ArrayView indices; - Teuchos::ArrayView vals; - Ain->getLocalRowView(row, indices, vals); - - // just copy all values in output - Teuchos::ArrayRCP indout(indices.size(),Teuchos::ScalarTraits::zero()); - Teuchos::ArrayRCP valout(indices.size(),Teuchos::ScalarTraits::zero()); - - size_t nNonzeros = 0; - for(size_t i=0; i<(size_t)indices.size(); i++) { // or can be parallelize this loop? - GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id - bool isInMap2 = map->isNodeGlobalElement(gcid); - - if (isInMap == isInMap2) { - indout [nNonzeros] = gcid; - valout [nNonzeros] = vals[i]; - nNonzeros++; - } + // create new empty Operator + Teuchos::RCP Aout = + MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries()); + + size_t numLocalRows = Ain->getLocalNumRows(); + for (size_t row = 0; row < numLocalRows; + row++) { // how can i replace this by a parallel for? + GlobalOrdinal grid = + Ain->getRowMap()->getGlobalElement(row); // global row id + bool isInMap = map->isNodeGlobalElement(grid); + + // extract row information from input matrix + Teuchos::ArrayView indices; + Teuchos::ArrayView vals; + Ain->getLocalRowView(row, indices, vals); + + // just copy all values in output + Teuchos::ArrayRCP indout( + indices.size(), Teuchos::ScalarTraits::zero()); + Teuchos::ArrayRCP valout(indices.size(), + Teuchos::ScalarTraits::zero()); + + size_t nNonzeros = 0; + for (size_t i = 0; i < (size_t)indices.size(); + i++) { // or can be parallelize this loop? + GlobalOrdinal gcid = + Ain->getColMap()->getGlobalElement(indices[i]); // global column id + bool isInMap2 = map->isNodeGlobalElement(gcid); + + if (isInMap == isInMap2) { + indout[nNonzeros] = gcid; + valout[nNonzeros] = vals[i]; + nNonzeros++; } - indout.resize(nNonzeros); - valout.resize(nNonzeros); - - Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); } + indout.resize(nNonzeros); + valout.resize(nNonzeros); - Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); + Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), + indout.view(0, indout.size()), + valout.view(0, valout.size())); + } - // copy block size information - Aout->SetFixedBlockSize(Ain->GetFixedBlockSize()); + Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); - GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; + // copy block size information + Aout->SetFixedBlockSize(Ain->GetFixedBlockSize()); - Set(currentLevel, "A", Aout); - } + GetOStream(Statistics0, 0) + << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() + << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() + << std::endl; + + Set(currentLevel, "A", Aout); +} -} //namespace MueLu +} // namespace MueLu #endif // MUELU_SEGREGATEDAFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp index 86491d0ad5af..2bce3a9eb713 100644 --- a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_decl.hpp @@ -54,57 +54,54 @@ namespace MueLu { - /*! - @class StructuredLineDetectionFactory class. - @brief Factory building line detection information on structured meshes - */ - - template - class StructuredLineDetectionFactory : public SingleLevelFactoryBase { +/*! + @class StructuredLineDetectionFactory class. + @brief Factory building line detection information on structured meshes +*/ + +template +class StructuredLineDetectionFactory : public SingleLevelFactoryBase { #undef MUELU_STRUCTUREDLINEDETECTIONFACTORY_SHORT #include "MueLu_UseShortNames.hpp" - public: - - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - StructuredLineDetectionFactory() { } + StructuredLineDetectionFactory() {} - //! Destructor. - virtual ~StructuredLineDetectionFactory() { } + //! Destructor. + virtual ~StructuredLineDetectionFactory() {} - RCP GetValidParameterList() const; + RCP GetValidParameterList() const; - //@} + //@} - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //! @name Build methods. - //@{ + //! @name Build methods. + //@{ - /*! - @brief Build method. - - Builds line detection information and stores it in currentLevel - */ - void Build(Level& currentLevel) const; + /*! + @brief Build method. - //@} + Builds line detection information and stores it in currentLevel + */ + void Build(Level ¤tLevel) const; - private: + //@} - }; //class StructuredLineDetectionFactory +private: +}; // class StructuredLineDetectionFactory -} //namespace MueLu +} // namespace MueLu #define MUELU_STRUCTUREDLINEDETECTIONFACTORY_SHORT #endif // MUELU_STRUCTUREDLINEDETECTIONFACTORY_DECL_HPP diff --git a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp index 86c2e0493498..3140c2b3e69b 100644 --- a/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_StructuredLineDetectionFactory_def.hpp @@ -54,76 +54,90 @@ namespace MueLu { - template - RCP StructuredLineDetectionFactory::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); - - validParamList->set< RCP >("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set< std::string > ("orientation", "Z", "Lines orientation"); - validParamList->set< RCP >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimension provided by CoordinatesTransferFactory."); - - return validParamList; - } - - template - void StructuredLineDetectionFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - // Request the global number of nodes per dimensions - if(currentLevel.GetLevelID() == 0) { - if(currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { - currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), - Exceptions::RuntimeError, - "lNodesPerDim was not provided by the user on level0!"); - } +template +RCP +StructuredLineDetectionFactory::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + + validParamList->set>( + "A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set("orientation", "Z", "Lines orientation"); + validParamList->set>( + "lNodesPerDim", Teuchos::null, + "Number of nodes per spatial dimension provided by " + "CoordinatesTransferFactory."); + + return validParamList; +} + +template +void StructuredLineDetectionFactory::DeclareInput(Level ¤tLevel) + const { + Input(currentLevel, "A"); + // Request the global number of nodes per dimensions + if (currentLevel.GetLevelID() == 0) { + if (currentLevel.IsAvailable("lNodesPerDim", NoFactory::get())) { + currentLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this); } else { - Input(currentLevel, "lNodesPerDim"); + TEUCHOS_TEST_FOR_EXCEPTION( + currentLevel.IsAvailable("gNodesPerDim", NoFactory::get()), + Exceptions::RuntimeError, + "lNodesPerDim was not provided by the user on level0!"); } + } else { + Input(currentLevel, "lNodesPerDim"); + } +} + +template +void StructuredLineDetectionFactory::Build(Level ¤tLevel) const { + + // The following three variables are needed by the line smoothers in + // Ifpack/Ifpack2 + LO NumZDir = 0; + Teuchos::ArrayRCP VertLineId = Teuchos::arcp(0); + + // collect information provided by user + const ParameterList &pL = GetParameterList(); + const std::string lineOrientation = pL.get("orientation"); + + // Extract data from currentLevel + RCP A = Get>(currentLevel, "A"); + Array lNodesPerDir = Get>(currentLevel, "lNodesPerDim"); + LO numNodes = lNodesPerDir[0] * lNodesPerDir[1] * lNodesPerDir[2]; + VertLineId.resize(numNodes); + if (lineOrientation == "X") { + NumZDir = lNodesPerDir[0]; + } else if (lineOrientation == "Y") { + NumZDir = lNodesPerDir[1]; + } else if (lineOrientation == "Z") { + NumZDir = lNodesPerDir[2]; } - template - void StructuredLineDetectionFactory::Build(Level& currentLevel) const { - - // The following three variables are needed by the line smoothers in Ifpack/Ifpack2 - LO NumZDir = 0; - Teuchos::ArrayRCP VertLineId = Teuchos::arcp(0); - - // collect information provided by user - const ParameterList& pL = GetParameterList(); - const std::string lineOrientation = pL.get("orientation"); - - // Extract data from currentLevel - RCP A = Get< RCP >(currentLevel, "A"); - Array lNodesPerDir = Get > (currentLevel, "lNodesPerDim"); - LO numNodes = lNodesPerDir[0]*lNodesPerDir[1]*lNodesPerDir[2]; - VertLineId.resize(numNodes); - if(lineOrientation == "X") { - NumZDir = lNodesPerDir[0]; - } else if(lineOrientation == "Y") { - NumZDir = lNodesPerDir[1]; - } else if(lineOrientation == "Z") { - NumZDir = lNodesPerDir[2]; - } - - for(LO k = 0; k < lNodesPerDir[2]; ++k) { - for(LO j = 0; j < lNodesPerDir[1]; ++j) { - for(LO i = 0; i < lNodesPerDir[0]; ++i) { - if(lineOrientation == "X") { - VertLineId[k*lNodesPerDir[1]*lNodesPerDir[0] + j*lNodesPerDir[0] + i] = k*lNodesPerDir[1] + j; - } else if(lineOrientation == "Y") { - VertLineId[k*lNodesPerDir[1]*lNodesPerDir[0] + j*lNodesPerDir[0] + i] = k*lNodesPerDir[0] + i; - } else if(lineOrientation == "Z") { - VertLineId[k*lNodesPerDir[1]*lNodesPerDir[0] + j*lNodesPerDir[0] + i] = j*lNodesPerDir[0] + i; - } + for (LO k = 0; k < lNodesPerDir[2]; ++k) { + for (LO j = 0; j < lNodesPerDir[1]; ++j) { + for (LO i = 0; i < lNodesPerDir[0]; ++i) { + if (lineOrientation == "X") { + VertLineId[k * lNodesPerDir[1] * lNodesPerDir[0] + + j * lNodesPerDir[0] + i] = k * lNodesPerDir[1] + j; + } else if (lineOrientation == "Y") { + VertLineId[k * lNodesPerDir[1] * lNodesPerDir[0] + + j * lNodesPerDir[0] + i] = k * lNodesPerDir[0] + i; + } else if (lineOrientation == "Z") { + VertLineId[k * lNodesPerDir[1] * lNodesPerDir[0] + + j * lNodesPerDir[0] + i] = j * lNodesPerDir[0] + i; } } } - - Set(currentLevel, "CoarseNumZLayers", NumZDir); - Set(currentLevel, "LineDetection_VertLineIds", VertLineId); } -} //namespace MueLu + Set(currentLevel, "CoarseNumZLayers", NumZDir); + Set(currentLevel, "LineDetection_VertLineIds", VertLineId); +} + +} // namespace MueLu #endif // MUELU_STRUCTUREDLINEDETECTIONFACTORY_DEF_HPP diff --git a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp index 5f798befa2e8..875b9a8a5498 100644 --- a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp +++ b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_decl.hpp @@ -59,50 +59,50 @@ namespace MueLu { - /*! - @class ThresholdAFilterFactory class. - @brief Factory for building a thresholded operator. +/*! + @class ThresholdAFilterFactory class. + @brief Factory for building a thresholded operator. - */ +*/ - template - class ThresholdAFilterFactory : public SingleLevelFactoryBase { +template +class ThresholdAFilterFactory : public SingleLevelFactoryBase { #undef MUELU_THRESHOLDAFILTERFACTORY_SHORT - #include "MueLu_UseShortNames.hpp" +#include "MueLu_UseShortNames.hpp" - public: - //! @name Constructors/Destructors. - //@{ +public: + //! @name Constructors/Destructors. + //@{ - //! Constructor. - ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal=true, const GlobalOrdinal expectedNNZperRow=-1); + //! Constructor. + ThresholdAFilterFactory(const std::string &ename, const Scalar threshold, + const bool keepDiagonal = true, + const GlobalOrdinal expectedNNZperRow = -1); - //! Input - //@{ + //! Input + //@{ - void DeclareInput(Level ¤tLevel) const; + void DeclareInput(Level ¤tLevel) const; - //@} + //@} - //@{ - //! @name Build methods. + //@{ + //! @name Build methods. - //! Build an object with this factory. - void Build(Level & currentLevel) const; + //! Build an object with this factory. + void Build(Level ¤tLevel) const; - //@} + //@} - private: - std::string varName_; ///< name of input and output variable - const Scalar threshold_; ///< threshold parameter - const bool keepDiagonal_; - const GlobalOrdinal expectedNNZperRow_; +private: + std::string varName_; ///< name of input and output variable + const Scalar threshold_; ///< threshold parameter + const bool keepDiagonal_; + const GlobalOrdinal expectedNNZperRow_; - - }; // class ThresholdAFilterFactory +}; // class ThresholdAFilterFactory } // namespace MueLu diff --git a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp index 10da9befa53d..40ce144261f2 100644 --- a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp @@ -46,8 +46,8 @@ #ifndef MUELU_THRESHOLDAFILTERFACTORY_DEF_HPP #define MUELU_THRESHOLDAFILTERFACTORY_DEF_HPP -#include #include +#include #include "MueLu_ThresholdAFilterFactory_decl.hpp" @@ -56,29 +56,39 @@ namespace MueLu { - template - ThresholdAFilterFactory::ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) - : varName_(ename), threshold_(threshold), keepDiagonal_(keepDiagonal), expectedNNZperRow_(expectedNNZperRow) - { } +template +ThresholdAFilterFactory:: + ThresholdAFilterFactory(const std::string &ename, const Scalar threshold, + const bool keepDiagonal, + const GlobalOrdinal expectedNNZperRow) + : varName_(ename), threshold_(threshold), keepDiagonal_(keepDiagonal), + expectedNNZperRow_(expectedNNZperRow) {} - template - void ThresholdAFilterFactory::DeclareInput(Level ¤tLevel) const { - Input(currentLevel, varName_); - } +template +void ThresholdAFilterFactory::DeclareInput(Level ¤tLevel) const { + Input(currentLevel, varName_); +} - template - void ThresholdAFilterFactory:: - Build (Level & currentLevel) const - { - FactoryMonitor m (*this, "A filter (thresholding)", currentLevel); +template +void ThresholdAFilterFactory::Build( + Level ¤tLevel) const { + FactoryMonitor m(*this, "A filter (thresholding)", currentLevel); - RCP Ain = Get< RCP >(currentLevel, varName_); - RCP Aout = - MueLu::Utilities::GetThresholdedMatrix(Ain, threshold_, keepDiagonal_, expectedNNZperRow_); + RCP Ain = Get>(currentLevel, varName_); + RCP Aout = + MueLu::Utilities::GetThresholdedMatrix(Ain, threshold_, + keepDiagonal_, + expectedNNZperRow_); - GetOStream(Statistics0) << "Nonzeros in " << varName_ << "(input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering " << varName_ << " (parameter: " << threshold_ << "): " << Aout->getGlobalNumEntries() << std::endl; - currentLevel.Set(varName_, Teuchos::rcp_dynamic_cast(Aout), this); - } + GetOStream(Statistics0) << "Nonzeros in " << varName_ + << "(input): " << Ain->getGlobalNumEntries() + << ", Nonzeros after filtering " << varName_ + << " (parameter: " << threshold_ + << "): " << Aout->getGlobalNumEntries() << std::endl; + currentLevel.Set(varName_, Teuchos::rcp_dynamic_cast(Aout), this); +} } // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp b/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp index d291d05c4a9f..f3299f916506 100644 --- a/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp +++ b/packages/muelu/src/MueCentral/MueLu_BaseClass.hpp @@ -47,48 +47,48 @@ #define MUELU_BASECLASS_HPP #include "MueLu_ConfigDefs.hpp" -#include "MueLu_VerboseObject.hpp" #include "MueLu_Describable.hpp" +#include "MueLu_VerboseObject.hpp" namespace MueLu { - /*! - @class BaseClass class. - @brief Base class for MueLu classes - - @ingroup MueLuBaseClasses - */ - class BaseClass - : public VerboseObject, public Describable - { +/*! + @class BaseClass class. + @brief Base class for MueLu classes - public: + @ingroup MueLuBaseClasses +*/ +class BaseClass : public VerboseObject, public Describable { - //! @name Constructors/Destructors - //@{ +public: + //! @name Constructors/Destructors + //@{ - //! Destructor. - virtual ~BaseClass() {} + //! Destructor. + virtual ~BaseClass() {} - //@} + //@} - }; // class BaseClass +}; // class BaseClass } // namespace MueLu //! Helper macro for implementing Describable::describe() for BaseClass objects. -// This macro defines ostream out0 that print only on root node. It print description() and indent the ostream. -// Note: Runtime1 displays basic parameter information when Parameters0 is not enabled. -#define MUELU_DESCRIBE \ - using std::endl; \ - Teuchos::FancyOStream& out0 = (VerboseObject::GetProcRankVerbose() == 0) ? out : VerboseObject::GetBlackHole(); \ - \ - if ((verbLevel & Runtime1) && (!(verbLevel & Parameters0))) \ - out << description() << std::endl; \ - else if (verbLevel & Runtime0) \ - out << BaseClass::description() << std::endl; \ - \ - Teuchos::OSTab tab1(out); \ +// This macro defines ostream out0 that print only on root node. It print +// description() and indent the ostream. Note: Runtime1 displays basic +// parameter information when Parameters0 is not enabled. +#define MUELU_DESCRIBE \ + using std::endl; \ + Teuchos::FancyOStream &out0 = (VerboseObject::GetProcRankVerbose() == 0) \ + ? out \ + : VerboseObject::GetBlackHole(); \ + \ + if ((verbLevel & Runtime1) && (!(verbLevel & Parameters0))) \ + out << description() << std::endl; \ + else if (verbLevel & Runtime0) \ + out << BaseClass::description() << std::endl; \ + \ + Teuchos::OSTab tab1(out); \ // #define MUELU_BASECLASS_SHORT diff --git a/packages/muelu/src/MueCentral/MueLu_Describable.cpp b/packages/muelu/src/MueCentral/MueLu_Describable.cpp index c2bba36bc122..ba5211cc4eda 100644 --- a/packages/muelu/src/MueCentral/MueLu_Describable.cpp +++ b/packages/muelu/src/MueCentral/MueLu_Describable.cpp @@ -50,49 +50,54 @@ namespace MueLu { - Describable::~Describable() { } +Describable::~Describable() {} - void Describable::describe(Teuchos::FancyOStream &out_arg, const VerbLevel /* verbLevel */) const { - Teuchos::RCP out = rcp(&out_arg,false); //JG: no idea why we have to do that, but it's how Teuchos::Describable::describe() is implemented - Teuchos::OSTab tab(out); - *out << this->description() << std::endl; - } +void Describable::describe(Teuchos::FancyOStream &out_arg, + const VerbLevel /* verbLevel */) const { + Teuchos::RCP out = + rcp(&out_arg, false); // JG: no idea why we have to do that, but it's how + // Teuchos::Describable::describe() is implemented + Teuchos::OSTab tab(out); + *out << this->description() << std::endl; +} - std::string Describable::description() const { - std::string str = Teuchos::Describable::description(); +std::string Describable::description() const { + std::string str = Teuchos::Describable::description(); - // remove template parameters - size_t found = str.find_first_of("<"); - if (found != std::string::npos) - return str.substr(0, found); + // remove template parameters + size_t found = str.find_first_of("<"); + if (found != std::string::npos) + return str.substr(0, found); - return str; - } + return str; +} - void Describable::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const { describe(out, toMueLuVerbLevel(verbLevel)); } +void Describable::describe(Teuchos::FancyOStream &out, + const Teuchos::EVerbosityLevel verbLevel) const { + describe(out, toMueLuVerbLevel(verbLevel)); +} - std::string Describable::ShortClassName() const { - if ( shortClassName_.empty() ) - { - std::string str = Teuchos::Describable::description(); +std::string Describable::ShortClassName() const { + if (shortClassName_.empty()) { + std::string str = Teuchos::Describable::description(); - // remove template parameters - { - size_t found = str.find_first_of("<"); - if (found != std::string::npos) - str = str.substr(0, found); - } + // remove template parameters + { + size_t found = str.find_first_of("<"); + if (found != std::string::npos) + str = str.substr(0, found); + } - // remove namespace - { - size_t found = str.find_last_of(":"); - if (found != std::string::npos) - str = str.substr(found+1); - } - shortClassName_ = str; - } - return shortClassName_; + // remove namespace + { + size_t found = str.find_last_of(":"); + if (found != std::string::npos) + str = str.substr(found + 1); } + shortClassName_ = str; + } + return shortClassName_; +} } // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_Describable.hpp b/packages/muelu/src/MueCentral/MueLu_Describable.hpp index 7dbb4dc08811..3cb708233e75 100644 --- a/packages/muelu/src/MueCentral/MueLu_Describable.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Describable.hpp @@ -46,53 +46,57 @@ #ifndef MUELU_DESCRIBABLE_DECL_HPP #define MUELU_DESCRIBABLE_DECL_HPP -#include // for string -#include "Teuchos_FancyOStream.hpp" // for FancyOStream -#include "Teuchos_VerbosityLevel.hpp" // for EVerbosityLevel #include "Teuchos_Describable.hpp" +#include "Teuchos_FancyOStream.hpp" // for FancyOStream +#include "Teuchos_VerbosityLevel.hpp" // for EVerbosityLevel +#include // for string #include "MueLu_VerbosityLevel.hpp" namespace MueLu { - /*! - @class Describable - @brief Base class for MueLu classes +/*! + @class Describable + @brief Base class for MueLu classes - @ingroup MueLuBaseClasses - */ - class Describable - : public Teuchos::Describable - { - mutable std::string shortClassName_ = ""; // cached so that we don't have to call demangleName() every time; mutable so that ShortClassName() can initialize lazily while remaining const + @ingroup MueLuBaseClasses +*/ +class Describable : public Teuchos::Describable { + mutable std::string shortClassName_ = + ""; // cached so that we don't have to call demangleName() every time; + // mutable so that ShortClassName() can initialize lazily while + // remaining const - public: +public: + //! Destructor. + virtual ~Describable(); - //! Destructor. - virtual ~Describable(); + //! @name MueLu Describe + //@{ - //! @name MueLu Describe - //@{ + virtual void describe(Teuchos::FancyOStream &out_arg, + const VerbLevel verbLevel = Default) const; - virtual void describe(Teuchos::FancyOStream &out_arg, const VerbLevel verbLevel = Default) const; + //@} - //@} + //! @name Overridden from Teuchos::Describable + //@{ - //! @name Overridden from Teuchos::Describable - //@{ + //! Return a simple one-line description of this object. + virtual std::string description() const; - //! Return a simple one-line description of this object. - virtual std::string description() const; + //! Print the object with some verbosity level to an FancyOStream object. + void describe(Teuchos::FancyOStream &out, + const Teuchos::EVerbosityLevel verbLevel = + Teuchos::Describable::verbLevel_default) const; - //! Print the object with some verbosity level to an FancyOStream object. - void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::Describable::verbLevel_default) const; + //@} - //@} + //! Return the class name of the object, without template parameters and + //! without namespace + virtual std::string ShortClassName() const; - //! Return the class name of the object, without template parameters and without namespace - virtual std::string ShortClassName() const; - - }; // class Describable +}; // class Describable } // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_Factory.cpp b/packages/muelu/src/MueCentral/MueLu_Factory.cpp index 64c7032ce5bb..2e051119a2c9 100644 --- a/packages/muelu/src/MueCentral/MueLu_Factory.cpp +++ b/packages/muelu/src/MueCentral/MueLu_Factory.cpp @@ -48,9 +48,9 @@ namespace MueLu { - bool Factory::timerSync_ = false; +bool Factory::timerSync_ = false; #ifdef HAVE_MUELU_DEBUG - Factory::multipleCallCheckEnum Factory::multipleCallCheckGlobal_ = ENABLED; +Factory::multipleCallCheckEnum Factory::multipleCallCheckGlobal_ = ENABLED; #endif } // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_Factory.hpp b/packages/muelu/src/MueCentral/MueLu_Factory.hpp index 0be93d76bdff..5565d54986fd 100644 --- a/packages/muelu/src/MueCentral/MueLu_Factory.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Factory.hpp @@ -46,191 +46,212 @@ #ifndef MUELU_FACTORY_HPP #define MUELU_FACTORY_HPP -#include -#include // for _Deque_iterator, operator!= -#include // for operator<<, etc #include "Teuchos_ENull.hpp" // for ENull::null -#include "Teuchos_FilteredIterator.hpp" // for FilteredIterator, etc +#include "Teuchos_FilteredIterator.hpp" // for FilteredIterator, etc #include "Teuchos_ParameterEntry.hpp" // for ParameterEntry #include "Teuchos_ParameterList.hpp" // for ParameterList, etc -#include "Teuchos_RCPDecl.hpp" // for RCP -#include "Teuchos_RCPNode.hpp" // for operator<< -#include "Teuchos_StringIndexedOrderedValueObjectContainer.hpp" #include "Teuchos_RCP.hpp" +#include "Teuchos_RCPDecl.hpp" // for RCP +#include "Teuchos_RCPNode.hpp" // for operator<< +#include "Teuchos_StringIndexedOrderedValueObjectContainer.hpp" +#include // for _Deque_iterator, operator!= +#include // for operator<<, etc +#include #include "MueLu_ConfigDefs.hpp" -#include "MueLu_FactoryBase.hpp" #include "MueLu_FactoryAcceptor.hpp" -#include "MueLu_ParameterListAcceptor.hpp" +#include "MueLu_FactoryBase.hpp" #include "MueLu_Level.hpp" +#include "MueLu_ParameterListAcceptor.hpp" namespace MueLu { - class Factory : public FactoryBase, public FactoryAcceptor, public ParameterListAcceptorImpl { +class Factory : public FactoryBase, + public FactoryAcceptor, + public ParameterListAcceptorImpl { - public: - //@{ Constructors/Destructors. +public: + //@{ Constructors/Destructors. - //! Constructor. - Factory() + //! Constructor. + Factory() #ifdef HAVE_MUELU_DEBUG : multipleCallCheck_(FIRSTCALL), lastLevelID_(-1) #endif - { } - - //! Destructor. - virtual ~Factory() { } - //@} - - //@{ - //! Configuration - - //! SetFactory is for expert users only. To change configuration of the preconditioner, use a factory manager. - virtual void SetFactory(const std::string& varName, const RCP& factory) { - RCP f = factory; - SetParameter(varName, ParameterEntry(f)); // parameter validation done in ParameterListAcceptorImpl - } - - //! Default implementation of FactoryAcceptor::GetFactory() - const RCP GetFactory(const std::string& varName) const { - - // Special treatment for "NoFactory" - if (varName == "NoFactory") - return MueLu::NoFactory::getRCP(); - - if (!GetParameterList().isParameter(varName)&& GetValidParameterList() == Teuchos::null) { - // If the parameter is not on the list and there is not validator, the defaults values for 'varName' is not set. - // Failback by using directly the FactoryManager - // NOTE: call to GetValidParameterList() can be costly for classes that validate parameters. - // But it get called only (lazy '&&' operator) if the parameter 'varName' is not on the paramlist and - // the parameter 'varName' is always on the list when validator is present and 'varName' is valid (at least the default value is set). - return Teuchos::null; - } - - return GetParameterList().get< RCP >(varName); - } - - RCP RemoveFactoriesFromList(const ParameterList& list) const { - RCP paramList = rcp(new ParameterList(list)); - // Remove FactoryBase entries from the list - // The solution would be much more elegant if ParameterList support std::list like operations - // In that case, we could simply write: - // for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) - // if (paramList.isType >(it->first)) - // it = paramList.erase(it); - // else - // it++; - ParameterList::ConstIterator it = paramList->begin(); - while (it != paramList->end()) { - it = paramList->begin(); - - for (; it != paramList->end(); it++) - if (paramList->isType >(it->first)) - paramList->remove(it->first); - } - return paramList; - } - - // SetParameterList(...); - - // GetParameterList(...); - - //@} - - virtual RCP GetValidParameterList() const { - return Teuchos::null; // Teuchos::null == GetValidParameterList() not implemented == skip validation and no default values (dangerous) - } - - protected: - - void Input(Level& level, const std::string& varName) const { - level.DeclareInput(varName, GetFactory(varName).get(), this); - } - // Similar to the other Input, but we have an alias (varParamName) to the generated data name (varName) - void Input(Level& level, const std::string& varName, const std::string& varParamName) const { - level.DeclareInput(varName, GetFactory(varParamName).get(), this); - } - - template - T Get(Level& level, const std::string& varName) const { - return level.Get(varName, GetFactory(varName).get()); - } - // Similar to the other Get, but we have an alias (varParamName) to the generated data name (varName) - template - T Get(Level& level, const std::string& varName, const std::string& varParamName) const { - return level.Get(varName, GetFactory(varParamName).get()); + { + } + + //! Destructor. + virtual ~Factory() {} + //@} + + //@{ + //! Configuration + + //! SetFactory is for expert users only. To change configuration of the + //! preconditioner, use a factory manager. + virtual void SetFactory(const std::string &varName, + const RCP &factory) { + RCP f = factory; + SetParameter( + varName, + ParameterEntry( + f)); // parameter validation done in ParameterListAcceptorImpl + } + + //! Default implementation of FactoryAcceptor::GetFactory() + const RCP GetFactory(const std::string &varName) const { + + // Special treatment for "NoFactory" + if (varName == "NoFactory") + return MueLu::NoFactory::getRCP(); + + if (!GetParameterList().isParameter(varName) && + GetValidParameterList() == Teuchos::null) { + // If the parameter is not on the list and there is not validator, the + // defaults values for 'varName' is not set. Failback by using directly + // the FactoryManager NOTE: call to GetValidParameterList() can be costly + // for classes that validate parameters. But it get called only (lazy '&&' + // operator) if the parameter 'varName' is not on the paramlist and the + // parameter 'varName' is always on the list when validator is present and + // 'varName' is valid (at least the default value is set). + return Teuchos::null; } - template - void Set(Level& level, const std::string& varName, const T& data) const { - return level.Set(varName, data, this); + return GetParameterList().get>(varName); + } + + RCP RemoveFactoriesFromList(const ParameterList &list) const { + RCP paramList = rcp(new ParameterList(list)); + // Remove FactoryBase entries from the list + // The solution would be much more elegant if ParameterList support + // std::list like operations In that case, we could simply write: + // for (ParameterList::ConstIterator it = paramList.begin(); it != + // paramList.end(); it++) + // if (paramList.isType >(it->first)) + // it = paramList.erase(it); + // else + // it++; + ParameterList::ConstIterator it = paramList->begin(); + while (it != paramList->end()) { + it = paramList->begin(); + + for (; it != paramList->end(); it++) + if (paramList->isType>(it->first)) + paramList->remove(it->first); } - - template - bool IsType(Level& level, const std::string& varName) const { - return level.IsType(varName, GetFactory(varName).get()); - } - - bool IsAvailable(Level& level, const std::string& varName) const { - return level.IsAvailable(varName, GetFactory(varName).get()); - } - - public: - static void EnableTimerSync() { timerSync_ = true; } - static void DisableTimerSync() { timerSync_ = false; } - - protected: - static bool timerSync_; + return paramList; + } + + // SetParameterList(...); + + // GetParameterList(...); + + //@} + + virtual RCP GetValidParameterList() const { + return Teuchos::null; // Teuchos::null == GetValidParameterList() not + // implemented == skip validation and no default + // values (dangerous) + } + +protected: + void Input(Level &level, const std::string &varName) const { + level.DeclareInput(varName, GetFactory(varName).get(), this); + } + // Similar to the other Input, but we have an alias (varParamName) to the + // generated data name (varName) + void Input(Level &level, const std::string &varName, + const std::string &varParamName) const { + level.DeclareInput(varName, GetFactory(varParamName).get(), this); + } + + template T Get(Level &level, const std::string &varName) const { + return level.Get(varName, GetFactory(varName).get()); + } + // Similar to the other Get, but we have an alias (varParamName) to the + // generated data name (varName) + template + T Get(Level &level, const std::string &varName, + const std::string &varParamName) const { + return level.Get(varName, GetFactory(varParamName).get()); + } + + template + void Set(Level &level, const std::string &varName, const T &data) const { + return level.Set(varName, data, this); + } + + template + bool IsType(Level &level, const std::string &varName) const { + return level.IsType(varName, GetFactory(varName).get()); + } + + bool IsAvailable(Level &level, const std::string &varName) const { + return level.IsAvailable(varName, GetFactory(varName).get()); + } + +public: + static void EnableTimerSync() { timerSync_ = true; } + static void DisableTimerSync() { timerSync_ = false; } + +protected: + static bool timerSync_; #ifdef HAVE_MUELU_DEBUG - public: - enum multipleCallCheckEnum { ENABLED, DISABLED, FIRSTCALL }; - - void EnableMultipleCallCheck() const { multipleCallCheck_ = ENABLED; } - void DisableMultipleCallCheck() const { multipleCallCheck_ = DISABLED; } - void ResetDebugData() const { - if (multipleCallCheck_ == FIRSTCALL && lastLevelID_ == -1) - return; - - multipleCallCheck_ = FIRSTCALL; - lastLevelID_ = -1; - - const ParameterList& paramList = GetParameterList(); - - // We cannot use just FactoryManager to specify which factories call ResetDebugData(). - // The problem is that some factories are not present in the manager, but - // instead are only accessible through a parameter list of some factory. - // For instance, FilteredAFactory is only accessible from SaPFactory but - // nowhere else. So we miss those, and do not reset the data, resulting - // in problems. - // Therefore, for each factory we need to go through its dependent - // factories, and call reset on them. - for (ParameterList::ConstIterator it = paramList.begin(); it != paramList.end(); it++) - if (paramList.isType >(it->first)) { - RCP fact = rcp_dynamic_cast(paramList.get >(it->first)); - if (fact != Teuchos::null && fact != NoFactory::getRCP()) - fact->ResetDebugData(); - } - } - - static void EnableMultipleCheckGlobally() { multipleCallCheckGlobal_ = ENABLED; } - static void DisableMultipleCheckGlobally() { multipleCallCheckGlobal_ = DISABLED; } - - protected: - mutable multipleCallCheckEnum multipleCallCheck_; - static multipleCallCheckEnum multipleCallCheckGlobal_; - mutable int lastLevelID_; +public: + enum multipleCallCheckEnum{ENABLED, DISABLED, FIRSTCALL}; + + void EnableMultipleCallCheck() const { multipleCallCheck_ = ENABLED; } + void DisableMultipleCallCheck() const { multipleCallCheck_ = DISABLED; } + void ResetDebugData() const { + if (multipleCallCheck_ == FIRSTCALL && lastLevelID_ == -1) + return; + + multipleCallCheck_ = FIRSTCALL; + lastLevelID_ = -1; + + const ParameterList ¶mList = GetParameterList(); + + // We cannot use just FactoryManager to specify which factories call + // ResetDebugData(). The problem is that some factories are not present in + // the manager, but instead are only accessible through a parameter list of + // some factory. For instance, FilteredAFactory is only accessible from + // SaPFactory but nowhere else. So we miss those, and do not reset the data, + // resulting in problems. Therefore, for each factory we need to go through + // its dependent factories, and call reset on them. + for (ParameterList::ConstIterator it = paramList.begin(); + it != paramList.end(); it++) + if (paramList.isType>(it->first)) { + RCP fact = rcp_dynamic_cast( + paramList.get>(it->first)); + if (fact != Teuchos::null && fact != NoFactory::getRCP()) + fact->ResetDebugData(); + } + } + + static void EnableMultipleCheckGlobally() { + multipleCallCheckGlobal_ = ENABLED; + } + static void DisableMultipleCheckGlobally() { + multipleCallCheckGlobal_ = DISABLED; + } + +protected: + mutable multipleCallCheckEnum multipleCallCheck_; + static multipleCallCheckEnum multipleCallCheckGlobal_; + mutable int lastLevelID_; #else - public: - void EnableMultipleCallCheck() const { } - void DisableMultipleCallCheck() const { } - void ResetDebugData() const { } - static void EnableMultipleCheckGlobally() { } - static void DisableMultipleCheckGlobally() { } +public: + void EnableMultipleCallCheck() const {} + void DisableMultipleCallCheck() const {} + void ResetDebugData() const {} + static void EnableMultipleCheckGlobally() {} + static void DisableMultipleCheckGlobally() {} #endif - }; //class Factory +}; // class Factory -} //namespace MueLu +} // namespace MueLu #define MUELU_FACTORY_SHORT -#endif //ifndef MUELU_FACTORY_HPP +#endif // ifndef MUELU_FACTORY_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp index efe14effc27a..b5274e2fd14c 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryAcceptor.hpp @@ -48,35 +48,37 @@ #include -#include "Teuchos_RCP.hpp" #include "MueLu_ConfigDefs.hpp" #include "MueLu_FactoryBase.hpp" +#include "Teuchos_RCP.hpp" namespace MueLu { - class FactoryAcceptor { - - public: +class FactoryAcceptor { - virtual ~FactoryAcceptor() { } +public: + virtual ~FactoryAcceptor() {} - //@{ - //! Configuration + //@{ + //! Configuration - //! SetFactory is for expert users only. To change configuration of the preconditioner, use a factory manager. - virtual void SetFactory(const std::string & varName, const RCP & factory) = 0; + //! SetFactory is for expert users only. To change configuration of the + //! preconditioner, use a factory manager. + virtual void SetFactory(const std::string &varName, + const RCP &factory) = 0; - virtual const RCP GetFactory(const std::string & varName) const = 0; + virtual const RCP + GetFactory(const std::string &varName) const = 0; - // SetParameterList(...); + // SetParameterList(...); - // GetParameterList(...); + // GetParameterList(...); - //@} + //@} - }; //class FactoryAcceptor +}; // class FactoryAcceptor -} //namespace MueLu +} // namespace MueLu #define MUELU_FACTORYACCEPTOR_SHORT -#endif //ifndef MUELU_FACTORYACCEPTOR_HPP +#endif // ifndef MUELU_FACTORYACCEPTOR_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp b/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp index 07c413c94e8f..fb988e71e121 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryBase.cpp @@ -48,9 +48,9 @@ namespace MueLu { - int FactoryBase::GenerateUniqueId() { - static int i = 0; - return i++; - } +int FactoryBase::GenerateUniqueId() { + static int i = 0; + return i++; +} } // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp index b962d7f6f510..e88e74cbdc81 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryBase.hpp @@ -46,60 +46,57 @@ #ifndef MUELU_FACTORYBASE_HPP #define MUELU_FACTORYBASE_HPP -#include "MueLu_config.hpp" #include "MueLu_BaseClass.hpp" #include "MueLu_Level_fwd.hpp" +#include "MueLu_config.hpp" namespace MueLu { - /*! - @class FactoryBase - @brief Base class for factories (e.g., R, P, and A_coarse). - @ingroup MueLuBaseClasses - */ - class FactoryBase : public virtual BaseClass { +/*! + @class FactoryBase + @brief Base class for factories (e.g., R, P, and A_coarse). + @ingroup MueLuBaseClasses +*/ +class FactoryBase : public virtual BaseClass { - public: - //@{ Constructors/Destructors. +public: + //@{ Constructors/Destructors. - //! Constructor. - FactoryBase() - : id_(FactoryBase::GenerateUniqueId()) - { } + //! Constructor. + FactoryBase() : id_(FactoryBase::GenerateUniqueId()) {} - //! Destructor. - virtual ~FactoryBase() { } - //@} + //! Destructor. + virtual ~FactoryBase() {} + //@} - //@{ - //! @name Build methods. + //@{ + //! @name Build methods. - virtual void CallBuild(Level & requestedLevel) const = 0; + virtual void CallBuild(Level &requestedLevel) const = 0; - virtual void CallDeclareInput(Level & requestedLevel) const = 0; - //@} + virtual void CallDeclareInput(Level &requestedLevel) const = 0; + //@} - //@{ - //! @name Access factory properties + //@{ + //! @name Access factory properties - /// return unique factory id - int GetID() const { return id_; }; + /// return unique factory id + int GetID() const { return id_; }; //@} #ifdef HAVE_MUELU_DEBUG - virtual void ResetDebugData() const = 0; + virtual void ResetDebugData() const = 0; #endif - private: - - static int GenerateUniqueId(); +private: + static int GenerateUniqueId(); - const int id_; + const int id_; - }; //class FactoryBase +}; // class FactoryBase -} //namespace MueLu +} // namespace MueLu #define MUELU_FACTORYBASE_SHORT -#endif //ifndef MUELU_FACTORYBASE_HPP +#endif // ifndef MUELU_FACTORYBASE_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp index 90b2c8f86089..3b4929bf2a68 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryManagerBase.hpp @@ -48,64 +48,70 @@ #include -#include "MueLu_ConfigDefs.hpp" #include "MueLu_BaseClass.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_FactoryBase_fwd.hpp" namespace MueLu { - /*! - @class FactoryManagerBase - @brief Class that provides default factories within Needs class. - @ingroup MueLuBaseClasses - */ - class FactoryManagerBase : public BaseClass { +/*! + @class FactoryManagerBase + @brief Class that provides default factories within Needs class. + @ingroup MueLuBaseClasses +*/ +class FactoryManagerBase : public BaseClass { - public: - //@{ Constructors/Destructors. - FactoryManagerBase() : bIgnoreUserData_(false) { } +public: + //@{ Constructors/Destructors. + FactoryManagerBase() : bIgnoreUserData_(false) {} - //! Destructor. - virtual ~FactoryManagerBase() { } + //! Destructor. + virtual ~FactoryManagerBase() {} - //@} + //@} - //@{ Get/Set functions. + //@{ Get/Set functions. - //! Get - // Return ref because user also give ref to the Hierarchy. - const virtual RCP GetFactory(const std::string& varName) const = 0; - //@} + //! Get + // Return ref because user also give ref to the Hierarchy. + const virtual RCP + GetFactory(const std::string &varName) const = 0; + //@} - //! Check - // Return true if Factory associated with varName is registered - virtual bool hasFactory(const std::string& varName) const = 0; + //! Check + // Return true if Factory associated with varName is registered + virtual bool hasFactory(const std::string &varName) const = 0; - // Free temporarily hold data at the end of Hierarchy::Setup() - // This method is const because the clean concerns only mutable data. - virtual void Clean() const { } // TODO: should be used inside of MueLu::Hierarchy + // Free temporarily hold data at the end of Hierarchy::Setup() + // This method is const because the clean concerns only mutable data. + virtual void Clean() const { + } // TODO: should be used inside of MueLu::Hierarchy #ifdef HAVE_MUELU_DEBUG - virtual void ResetDebugData() const = 0; + virtual void ResetDebugData() const = 0; #endif - //! get IgnoreUserData flag - bool IgnoreUserData() const { return bIgnoreUserData_; } + //! get IgnoreUserData flag + bool IgnoreUserData() const { return bIgnoreUserData_; } - //! set IgnoreUserData flag - void SetIgnoreUserData(bool bIgnoreUserData = false) { bIgnoreUserData_ = bIgnoreUserData; } + //! set IgnoreUserData flag + void SetIgnoreUserData(bool bIgnoreUserData = false) { + bIgnoreUserData_ = bIgnoreUserData; + } - private: - //! boolean flag that controls behaviour of Level::GetFactory - //! if bIgnoreUserData == true, the Level::GetFactory function always asks the Factory manager for a valid factory given a variable name - //! if bIgnoreUserData == false, the Level::GetFactory prefers user-provided data for a variable name if available. Otherwise the factory manager is asked for a valid factory - //! default: bIgnoreUserData = false; - bool bIgnoreUserData_; +private: + //! boolean flag that controls behaviour of Level::GetFactory + //! if bIgnoreUserData == true, the Level::GetFactory function always asks + //! the Factory manager for a valid factory given a variable name if + //! bIgnoreUserData == false, the Level::GetFactory prefers user-provided data + //! for a variable name if available. Otherwise the factory manager is asked + //! for a valid factory default: bIgnoreUserData = false; + bool bIgnoreUserData_; - }; // class FactoryManagerBase +}; // class FactoryManagerBase } // namespace MueLu #define MUELU_FACTORYMANAGERBASE_SHORT -#endif //ifndef MUELU_FACTORYMANAGERBASE_HPP +#endif // ifndef MUELU_FACTORYMANAGERBASE_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp index 1cb897a8f7f8..aec010e39e7e 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryManager_decl.hpp @@ -47,22 +47,24 @@ #define MUELU_FACTORYMANAGER_DECL_HPP #include "MueLu_ConfigDefs.hpp" -#include "MueLu_FactoryManager_fwd.hpp" #include "MueLu_FactoryManagerBase.hpp" +#include "MueLu_FactoryManager_fwd.hpp" -#include "MueLu_AmalgamationFactory_fwd.hpp" #include "MueLu_AggregateQualityEstimateFactory_fwd.hpp" +#include "MueLu_AmalgamationFactory_fwd.hpp" #include "MueLu_CoalesceDropFactory_fwd.hpp" #include "MueLu_CoarseMapFactory_fwd.hpp" #include "MueLu_ConstraintFactory_fwd.hpp" #include "MueLu_DirectSolver_fwd.hpp" #include "MueLu_InitialBlockNumberFactory_fwd.hpp" +#include "MueLu_InterfaceAggregationFactory_fwd.hpp" +#include "MueLu_InterfaceMappingTransferFactory_fwd.hpp" #include "MueLu_LineDetectionFactory_fwd.hpp" #include "MueLu_NullspaceFactory_fwd.hpp" #include "MueLu_PatternFactory_fwd.hpp" #include "MueLu_RAPFactory_fwd.hpp" -#include "MueLu_RepartitionHeuristicFactory_fwd.hpp" #include "MueLu_RepartitionFactory_fwd.hpp" +#include "MueLu_RepartitionHeuristicFactory_fwd.hpp" #include "MueLu_SaPFactory_fwd.hpp" #include "MueLu_ScaledNullspaceFactory_fwd.hpp" #include "MueLu_SmootherFactory_fwd.hpp" @@ -72,9 +74,6 @@ #include "MueLu_TrilinosSmoother_fwd.hpp" #include "MueLu_UncoupledAggregationFactory_fwd.hpp" #include "MueLu_ZoltanInterface_fwd.hpp" -#include "MueLu_InterfaceMappingTransferFactory_fwd.hpp" -#include "MueLu_InterfaceAggregationFactory_fwd.hpp" - #include "MueLu_CoalesceDropFactory_kokkos_fwd.hpp" #include "MueLu_NullspaceFactory_kokkos_fwd.hpp" @@ -84,179 +83,201 @@ namespace MueLu { - /*! - @class FactoryManager class. - @brief This class specifies the default factory that should generate some data on a Level if the data does not exist and - the generating factory has not been specified. +/*! + @class FactoryManager class. + @brief This class specifies the default factory that should generate some data + on a Level if the data does not exist and the generating factory has not been + specified. + + Consider the following example. + + @code + RCP Afact; + Level currentLevel; + RCP thisLevelA; + thisLevelA = currentLevel.Get("A", Afact.get()); + @endcode + + @todo If Afact is null (actually, Teuchos::null), then the FactoryManager + associated with currentLevel will determine whether a default factory has been + specified for creating A. If "yes", then that factory will be called, A will + be stored in currentLevel, and an RCP will be returned by the Get call. If + "no", then the FactoryManager will throw an exception indicating that it + does not know how to generate A. +*/ + +template +class FactoryManager : public FactoryManagerBase { +#undef MUELU_FACTORYMANAGER_SHORT +#include "MueLu_UseShortNames.hpp" - Consider the following example. +public: + //! @name Constructor/Destructors + //@{ + + //! @brief Constructor. + FactoryManager() { + SetIgnoreUserData( + false); // set IgnorUserData flag to false (default behaviour) +#ifdef HAVE_MUELU_SERIAL + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosSerialWrapperNode).name()) + useKokkos_ = false; +#endif +#ifdef HAVE_MUELU_OPENMP + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosOpenMPWrapperNode).name()) + useKokkos_ = true; +#endif +#ifdef HAVE_MUELU_CUDA + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) + useKokkos_ = true; +#endif +#ifdef HAVE_MUELU_HIP + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) + useKokkos_ = true; +#endif +#ifdef HAVE_MUELU_SYCL + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) + useKokkos_ = true; +#endif + } + + //! Constructor used by HierarchyFactory (temporary, will be removed) + FactoryManager( + const std::map> &factoryTable) { + factoryTable_ = factoryTable; + SetIgnoreUserData( + false); // set IgnorUserData flag to false (default behaviour) //TODO: + // use parent class constructor instead +#ifdef HAVE_MUELU_SERIAL + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosSerialWrapperNode).name()) + useKokkos_ = false; +#endif +#ifdef HAVE_MUELU_OPENMP + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosOpenMPWrapperNode).name()) + useKokkos_ = true; +#endif +#ifdef HAVE_MUELU_CUDA + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) + useKokkos_ = true; +#endif +#ifdef HAVE_MUELU_HIP + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) + useKokkos_ = true; +#endif +#ifdef HAVE_MUELU_SYCL + if (typeid(Node).name() == + typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) + useKokkos_ = true; +#endif + } - @code - RCP Afact; - Level currentLevel; - RCP thisLevelA; - thisLevelA = currentLevel.Get("A", Afact.get()); - @endcode + //! Destructor. + virtual ~FactoryManager() {} - @todo If Afact is null (actually, Teuchos::null), then the FactoryManager associated with currentLevel will determine whether a default factory has - been specified for creating A. If "yes", then that factory will be called, A will be stored in currentLevel, and an RCP will be returned by - the Get call. If "no", then the FactoryManager will throw an exception indicating that it does not know how to generate A. + //@} + + //! @name Get/Set functions. + //@{ + + /*! @brief Set Factory + + Register the factory that should generate data if said factory is not + specified in the request. + + @param[in] name of variable + @param[in] factory that generates the data */ + void SetFactory(const std::string &varName, + const RCP &factory); - template - class FactoryManager : public FactoryManagerBase { -#undef MUELU_FACTORYMANAGER_SHORT -#include "MueLu_UseShortNames.hpp" + /*! @brief Get factory associated with a particular data name. + + @param[in] varName name of variable. + + */ + const RCP GetFactory(const std::string &varName) const; + + /*! @brief Get factory associated with a particular data name (NONCONST + version) + + @param[in] varName name of variable. - public: - - //! @name Constructor/Destructors - //@{ - - //! @brief Constructor. - FactoryManager() { - SetIgnoreUserData(false); // set IgnorUserData flag to false (default behaviour) -# ifdef HAVE_MUELU_SERIAL - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSerialWrapperNode).name()) - useKokkos_ = false; -# endif -# ifdef HAVE_MUELU_OPENMP - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosOpenMPWrapperNode).name()) - useKokkos_ = true; -# endif -# ifdef HAVE_MUELU_CUDA - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) - useKokkos_ = true; -# endif -# ifdef HAVE_MUELU_HIP - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) - useKokkos_ = true; -# endif -# ifdef HAVE_MUELU_SYCL - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) - useKokkos_ = true; -# endif - } - - //! Constructor used by HierarchyFactory (temporary, will be removed) - FactoryManager(const std::map >& factoryTable) { - factoryTable_ = factoryTable; - SetIgnoreUserData(false); // set IgnorUserData flag to false (default behaviour) //TODO: use parent class constructor instead -# ifdef HAVE_MUELU_SERIAL - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSerialWrapperNode).name()) - useKokkos_ = false; -# endif -# ifdef HAVE_MUELU_OPENMP - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosOpenMPWrapperNode).name()) - useKokkos_ = true; -# endif -# ifdef HAVE_MUELU_CUDA - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosCudaWrapperNode).name()) - useKokkos_ = true; -# endif -# ifdef HAVE_MUELU_HIP - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosHIPWrapperNode).name()) - useKokkos_ = true; -# endif -# ifdef HAVE_MUELU_SYCL - if (typeid(Node).name() == typeid(Tpetra::KokkosCompat::KokkosSYCLWrapperNode).name()) - useKokkos_ = true; -# endif - } - - //! Destructor. - virtual ~FactoryManager() { } - - //@} - - //! @name Get/Set functions. - //@{ - - /*! @brief Set Factory - - Register the factory that should generate data if said factory is not specified in the request. - - @param[in] name of variable - @param[in] factory that generates the data - */ - void SetFactory(const std::string & varName, const RCP& factory); - - /*! @brief Get factory associated with a particular data name. - - @param[in] varName name of variable. - - */ - const RCP GetFactory(const std::string& varName) const; - - /*! @brief Get factory associated with a particular data name (NONCONST version) - - @param[in] varName name of variable. - - */ - const RCP GetFactoryNonConst(const std::string& varName); - - //! Check - // Return true if Factory associated with varName is registered - bool hasFactory(const std::string& varName) const; - - - //! - const RCP GetDefaultFactory(const std::string& varName) const; - - void SetKokkosRefactor(const bool useKokkos) { - useKokkos_ = useKokkos; - } - - bool GetKokkosRefactor() const { return useKokkos_; } - - //@} - - void Clean() const { defaultFactoryTable_.clear(); } + */ + const RCP GetFactoryNonConst(const std::string &varName); + + //! Check + // Return true if Factory associated with varName is registered + bool hasFactory(const std::string &varName) const; + + //! + const RCP + GetDefaultFactory(const std::string &varName) const; + + void SetKokkosRefactor(const bool useKokkos) { useKokkos_ = useKokkos; } + + bool GetKokkosRefactor() const { return useKokkos_; } + + //@} + + void Clean() const { defaultFactoryTable_.clear(); } #ifdef HAVE_MUELU_DEBUG - void ResetDebugData() const; + void ResetDebugData() const; #endif - void Print() const; - - private: + void Print() const; - //! @name Helper functions - //@{ +private: + //! @name Helper functions + //@{ - /*! Add a factory to the default factory list and return it. This helper function is used by GetDefaultFactory() + /*! Add a factory to the default factory list and return it. This helper + function is used by GetDefaultFactory() - @todo TODO factory->setObjectLabel("Default " + varName + "Factory"); - */ + @todo TODO factory->setObjectLabel("Default " + varName + "Factory"); + */ - const RCP SetAndReturnDefaultFactory(const std::string& varName, const RCP& factory) const; - //@} + const RCP + SetAndReturnDefaultFactory(const std::string &varName, + const RCP &factory) const; + //@} - /*! @brief User-defined factories. - * - * User may overwrite default behaviour. The user provided factories are stored in a separate table. When we try to determine - * which factory generates the data, this table is searched first. + /*! @brief User-defined factories. + * + * User may overwrite default behaviour. The user provided factories are + stored in a separate table. When we try to determine + * which factory generates the data, this table is searched first. - Note: we distinguish 'user defined factory' and 'default factory' to allow the deallocation of default factories separately. - */ - std::map > factoryTable_; + Note: we distinguish 'user defined factory' and 'default factory' to allow + the deallocation of default factories separately. + */ + std::map> factoryTable_; - /*! @brief Table that holds default factories. + /*! @brief Table that holds default factories. - -# We distinguish 'user defined factory' and 'default factory' to allow the deallocation of default factories separately. - -# defaultFactoryTable_ is mutable because default factories are only added to the list when they are requested - to avoid allocation of unused factories. - */ - mutable - std::map > defaultFactoryTable_; + -# We distinguish 'user defined factory' and 'default factory' to allow the + deallocation of default factories separately. + -# defaultFactoryTable_ is mutable because default factories are + only added to the list when they are requested to avoid allocation of unused + factories. + */ + mutable std::map> defaultFactoryTable_; - //! Whether or not to use kokkos factories. - bool useKokkos_; + //! Whether or not to use kokkos factories. + bool useKokkos_; - }; // class +}; // class } // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp b/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp index f7004c043c69..bc7516132762 100644 --- a/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp +++ b/packages/muelu/src/MueCentral/MueLu_FactoryManager_def.hpp @@ -49,33 +49,33 @@ #include // Headers for factories used by default: +#include "MueLu_AggregateQualityEstimateFactory.hpp" #include "MueLu_AmalgamationFactory.hpp" #include "MueLu_CoalesceDropFactory.hpp" #include "MueLu_CoarseMapFactory.hpp" #include "MueLu_ConstraintFactory.hpp" -#include "MueLu_AggregateQualityEstimateFactory.hpp" #include "MueLu_DirectSolver.hpp" #include "MueLu_InitialBlockNumberFactory.hpp" #include "MueLu_LineDetectionFactory.hpp" // #include "MueLu_MultiVectorTransferFactory.hpp" +#include "MueLu_InterfaceAggregationFactory.hpp" +#include "MueLu_InterfaceMappingTransferFactory.hpp" +#include "MueLu_InverseApproximationFactory.hpp" #include "MueLu_NoFactory.hpp" #include "MueLu_NullspaceFactory.hpp" #include "MueLu_PatternFactory.hpp" #include "MueLu_RAPFactory.hpp" -#include "MueLu_RepartitionHeuristicFactory.hpp" #include "MueLu_RepartitionFactory.hpp" +#include "MueLu_RepartitionHeuristicFactory.hpp" #include "MueLu_SaPFactory.hpp" #include "MueLu_ScaledNullspaceFactory.hpp" #include "MueLu_SmootherFactory.hpp" +#include "MueLu_StructuredAggregationFactory.hpp" #include "MueLu_TentativePFactory.hpp" #include "MueLu_TransPFactory.hpp" #include "MueLu_TrilinosSmoother.hpp" #include "MueLu_UncoupledAggregationFactory.hpp" -#include "MueLu_StructuredAggregationFactory.hpp" #include "MueLu_ZoltanInterface.hpp" -#include "MueLu_InterfaceMappingTransferFactory.hpp" -#include "MueLu_InterfaceAggregationFactory.hpp" -#include "MueLu_InverseApproximationFactory.hpp" #include "MueLu_CoalesceDropFactory_kokkos.hpp" #include "MueLu_NullspaceFactory_kokkos.hpp" @@ -85,229 +85,317 @@ #include "MueLu_FactoryManager_decl.hpp" - namespace MueLu { -#define MUELU_KOKKOS_FACTORY(varName, oldFactory, newFactory) \ - (!useKokkos_) ? SetAndReturnDefaultFactory(varName, rcp(new oldFactory())) : \ - SetAndReturnDefaultFactory(varName, rcp(new newFactory())); - - template - void FactoryManager::SetFactory(const std::string& varName, const RCP& factory) { - factoryTable_[varName] = factory; +#define MUELU_KOKKOS_FACTORY(varName, oldFactory, newFactory) \ + (!useKokkos_) ? SetAndReturnDefaultFactory(varName, rcp(new oldFactory())) \ + : SetAndReturnDefaultFactory(varName, rcp(new newFactory())); + +template +void FactoryManager::SetFactory( + const std::string &varName, const RCP &factory) { + factoryTable_[varName] = factory; +} + +template +const RCP +FactoryManager::GetFactory( + const std::string &varName) const { + if (factoryTable_.count(varName)) { + // Search user provided factories + return factoryTable_.find(varName)->second; } - template - const RCP FactoryManager::GetFactory(const std::string& varName) const { - if (factoryTable_.count(varName)) { - // Search user provided factories - return factoryTable_.find(varName)->second; + // Search/create default factory for this name + return GetDefaultFactory(varName); +} + +template +const RCP +FactoryManager::GetFactoryNonConst( + const std::string &varName) { + return Teuchos::rcp_const_cast(GetFactory(varName)); +} + +template +bool FactoryManager::hasFactory( + const std::string &varName) const { + if (factoryTable_.count(varName)) + return true; + return false; +} + +template +const RCP +FactoryManager::GetDefaultFactory( + const std::string &varName) const { + if (defaultFactoryTable_.count(varName)) { + // The factory for this name was already created (possibly, for previous + // level, if we reuse factory manager) + return defaultFactoryTable_.find(varName)->second; + + } else { + // No factory was created for this name, but we may know which one to create + if (varName == "A") + return SetAndReturnDefaultFactory(varName, rcp(new RAPFactory())); + if (varName == "Ainv") + return SetAndReturnDefaultFactory(varName, + rcp(new InverseApproximationFactory())); + if (varName == "RAP Pattern") + return GetFactory("A"); + if (varName == "AP Pattern") + return GetFactory("A"); + if (varName == "Ptent") + return MUELU_KOKKOS_FACTORY(varName, TentativePFactory, + TentativePFactory_kokkos); + if (varName == "P") { + // GetFactory("Ptent"): we need to use the same factory instance for both + // "P" and "Nullspace" + RCP factory; + if (useKokkos_) + factory = rcp(new SaPFactory_kokkos()); + else + factory = rcp(new SaPFactory()); + factory->SetFactory("P", GetFactory("Ptent")); + return SetAndReturnDefaultFactory(varName, factory); } - - // Search/create default factory for this name - return GetDefaultFactory(varName); - } - - template - const RCP FactoryManager::GetFactoryNonConst(const std::string& varName) { - return Teuchos::rcp_const_cast(GetFactory(varName)); - } - - template - bool FactoryManager::hasFactory(const std::string& varName) const { - if (factoryTable_.count(varName)) return true; - return false; - } - - template - const RCP FactoryManager::GetDefaultFactory(const std::string& varName) const { - if (defaultFactoryTable_.count(varName)) { - // The factory for this name was already created (possibly, for previous level, if we reuse factory manager) - return defaultFactoryTable_.find(varName)->second; - - } else { - // No factory was created for this name, but we may know which one to create - if (varName == "A") return SetAndReturnDefaultFactory(varName, rcp(new RAPFactory())); - if (varName == "Ainv") return SetAndReturnDefaultFactory(varName, rcp(new InverseApproximationFactory())); - if (varName == "RAP Pattern") return GetFactory("A"); - if (varName == "AP Pattern") return GetFactory("A"); - if (varName == "Ptent") return MUELU_KOKKOS_FACTORY(varName, TentativePFactory, TentativePFactory_kokkos); - if (varName == "P") { - // GetFactory("Ptent"): we need to use the same factory instance for both "P" and "Nullspace" - RCP factory; - if (useKokkos_) - factory = rcp(new SaPFactory_kokkos()); - else - factory = rcp(new SaPFactory()); - factory->SetFactory("P", GetFactory("Ptent")); - return SetAndReturnDefaultFactory(varName, factory); - } - if (varName == "Nullspace") { - // GetFactory("Ptent"): we need to use the same factory instance for both "P" and "Nullspace" - RCP factory; - if (useKokkos_) - factory = rcp(new NullspaceFactory_kokkos()); - else - factory = rcp(new NullspaceFactory()); - factory->SetFactory("Nullspace", GetFactory("Ptent")); - return SetAndReturnDefaultFactory(varName, factory); - } - if (varName == "Scaled Nullspace") return SetAndReturnDefaultFactory(varName, rcp(new ScaledNullspaceFactory())); - - if (varName == "Coordinates") return GetFactory("Ptent"); - if (varName == "Node Comm") return GetFactory("Ptent"); - - if (varName == "R") return SetAndReturnDefaultFactory(varName, rcp(new TransPFactory())); - if (varName == "RfromPfactory") return GetFactory("P"); + if (varName == "Nullspace") { + // GetFactory("Ptent"): we need to use the same factory instance for both + // "P" and "Nullspace" + RCP factory; + if (useKokkos_) + factory = rcp(new NullspaceFactory_kokkos()); + else + factory = rcp(new NullspaceFactory()); + factory->SetFactory("Nullspace", GetFactory("Ptent")); + return SetAndReturnDefaultFactory(varName, factory); + } + if (varName == "Scaled Nullspace") + return SetAndReturnDefaultFactory(varName, + rcp(new ScaledNullspaceFactory())); + + if (varName == "Coordinates") + return GetFactory("Ptent"); + if (varName == "Node Comm") + return GetFactory("Ptent"); + + if (varName == "R") + return SetAndReturnDefaultFactory(varName, rcp(new TransPFactory())); + if (varName == "RfromPfactory") + return GetFactory("P"); #if defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MPI) - if (varName == "Partition") return SetAndReturnDefaultFactory(varName, rcp(new ZoltanInterface())); -#endif //ifdef HAVE_MPI + if (varName == "Partition") + return SetAndReturnDefaultFactory(varName, rcp(new ZoltanInterface())); +#endif // ifdef HAVE_MPI - if (varName == "Importer") { + if (varName == "Importer") { #ifdef HAVE_MPI - return SetAndReturnDefaultFactory(varName, rcp(new RepartitionFactory())); + return SetAndReturnDefaultFactory(varName, rcp(new RepartitionFactory())); #else - return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); + return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); #endif - } - if (varName == "number of partitions") { + } + if (varName == "number of partitions") { #ifdef HAVE_MPI - return SetAndReturnDefaultFactory(varName, rcp(new RepartitionHeuristicFactory())); + return SetAndReturnDefaultFactory(varName, + rcp(new RepartitionHeuristicFactory())); #else - return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); + return SetAndReturnDefaultFactory(varName, NoFactory::getRCP()); #endif - } - if (varName == "repartition: heuristic target rows per process") return GetFactory("number of partitions"); - - if (varName == "Graph") return MUELU_KOKKOS_FACTORY(varName, CoalesceDropFactory, CoalesceDropFactory_kokkos); - if (varName == "UnAmalgamationInfo") return SetAndReturnDefaultFactory(varName, rcp(new AmalgamationFactory())); - if (varName == "Aggregates") return MUELU_KOKKOS_FACTORY(varName, UncoupledAggregationFactory, UncoupledAggregationFactory_kokkos); - if (varName == "AggregateQualities") return SetAndReturnDefaultFactory(varName, rcp(new AggregateQualityEstimateFactory())); - if (varName == "CoarseMap") return SetAndReturnDefaultFactory(varName, rcp(new CoarseMapFactory())); - if (varName == "DofsPerNode") return GetFactory("Graph"); - if (varName == "Filtering") return GetFactory("Graph"); - if (varName == "BlockNumber") return SetAndReturnDefaultFactory(varName, rcp(new InitialBlockNumberFactory())); - if (varName == "LineDetection_VertLineIds") return SetAndReturnDefaultFactory(varName, rcp(new LineDetectionFactory())); - if (varName == "LineDetection_Layers") return GetFactory("LineDetection_VertLineIds"); - if (varName == "CoarseNumZLayers") return GetFactory("LineDetection_VertLineIds"); - - // Structured - if (varName == "structuredInterpolationOrder") return SetAndReturnDefaultFactory(varName, rcp(new StructuredAggregationFactory())); - - // Non-Galerkin - if (varName == "K") return GetFactory("A"); - if (varName == "M") return GetFactory("A"); - if (varName == "Mdiag") return GetFactory("A"); - if (varName == "cfl-based shift array") return GetFactory("A"); - - // Same factory for both Pre and Post Smoother. Factory for key "Smoother" can be set by users. - if (varName == "PreSmoother") return GetFactory("Smoother"); - if (varName == "PostSmoother") return GetFactory("Smoother"); - - if (varName == "Ppattern") { - RCP PpFact = rcp(new PatternFactory); - PpFact->SetFactory("P", GetFactory("Ptent")); - return SetAndReturnDefaultFactory(varName, PpFact); - } - if (varName == "Constraint") return SetAndReturnDefaultFactory(varName, rcp(new ConstraintFactory())); - - if (varName == "Smoother") { - Teuchos::ParameterList smootherParamList; - smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel"); - smootherParamList.set("relaxation: sweeps", Teuchos::OrdinalTraits::one()); - smootherParamList.set("relaxation: damping factor", Teuchos::ScalarTraits::one()); - return SetAndReturnDefaultFactory(varName, rcp(new SmootherFactory(rcp(new TrilinosSmoother("RELAXATION", smootherParamList))))); - } - if (varName == "CoarseSolver") return SetAndReturnDefaultFactory(varName, rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null))); - - if (varName == "DualNodeID2PrimalNodeID") return SetAndReturnDefaultFactory(varName, rcp(new InterfaceMappingTransferFactory())); - if (varName == "CoarseDualNodeID2PrimalNodeID") return SetAndReturnDefaultFactory(varName, rcp(new InterfaceAggregationFactory())); + } + if (varName == "repartition: heuristic target rows per process") + return GetFactory("number of partitions"); + + if (varName == "Graph") + return MUELU_KOKKOS_FACTORY(varName, CoalesceDropFactory, + CoalesceDropFactory_kokkos); + if (varName == "UnAmalgamationInfo") + return SetAndReturnDefaultFactory(varName, + rcp(new AmalgamationFactory())); + if (varName == "Aggregates") + return MUELU_KOKKOS_FACTORY(varName, UncoupledAggregationFactory, + UncoupledAggregationFactory_kokkos); + if (varName == "AggregateQualities") + return SetAndReturnDefaultFactory( + varName, rcp(new AggregateQualityEstimateFactory())); + if (varName == "CoarseMap") + return SetAndReturnDefaultFactory(varName, rcp(new CoarseMapFactory())); + if (varName == "DofsPerNode") + return GetFactory("Graph"); + if (varName == "Filtering") + return GetFactory("Graph"); + if (varName == "BlockNumber") + return SetAndReturnDefaultFactory(varName, + rcp(new InitialBlockNumberFactory())); + if (varName == "LineDetection_VertLineIds") + return SetAndReturnDefaultFactory(varName, + rcp(new LineDetectionFactory())); + if (varName == "LineDetection_Layers") + return GetFactory("LineDetection_VertLineIds"); + if (varName == "CoarseNumZLayers") + return GetFactory("LineDetection_VertLineIds"); + + // Structured + if (varName == "structuredInterpolationOrder") + return SetAndReturnDefaultFactory( + varName, rcp(new StructuredAggregationFactory())); + + // Non-Galerkin + if (varName == "K") + return GetFactory("A"); + if (varName == "M") + return GetFactory("A"); + if (varName == "Mdiag") + return GetFactory("A"); + if (varName == "cfl-based shift array") + return GetFactory("A"); + + // Same factory for both Pre and Post Smoother. Factory for key "Smoother" + // can be set by users. + if (varName == "PreSmoother") + return GetFactory("Smoother"); + if (varName == "PostSmoother") + return GetFactory("Smoother"); + + if (varName == "Ppattern") { + RCP PpFact = rcp(new PatternFactory); + PpFact->SetFactory("P", GetFactory("Ptent")); + return SetAndReturnDefaultFactory(varName, PpFact); + } + if (varName == "Constraint") + return SetAndReturnDefaultFactory(varName, rcp(new ConstraintFactory())); + + if (varName == "Smoother") { + Teuchos::ParameterList smootherParamList; + smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel"); + smootherParamList.set("relaxation: sweeps", + Teuchos::OrdinalTraits::one()); + smootherParamList.set("relaxation: damping factor", + Teuchos::ScalarTraits::one()); + return SetAndReturnDefaultFactory( + varName, rcp(new SmootherFactory(rcp(new TrilinosSmoother( + "RELAXATION", smootherParamList))))); + } + if (varName == "CoarseSolver") + return SetAndReturnDefaultFactory( + varName, + rcp(new SmootherFactory(rcp(new DirectSolver()), Teuchos::null))); + + if (varName == "DualNodeID2PrimalNodeID") + return SetAndReturnDefaultFactory( + varName, rcp(new InterfaceMappingTransferFactory())); + if (varName == "CoarseDualNodeID2PrimalNodeID") + return SetAndReturnDefaultFactory(varName, + rcp(new InterfaceAggregationFactory())); #ifdef HAVE_MUELU_INTREPID2 - // If we're asking for it, find who made P - if (varName == "pcoarsen: element to node map") return GetFactory("P"); + // If we're asking for it, find who made P + if (varName == "pcoarsen: element to node map") + return GetFactory("P"); #endif - // NOTE: These are user data, but we might want to print them, so they need a default factory - if (varName == "Pnodal") return NoFactory::getRCP(); - if (varName == "NodeMatrix") return NoFactory::getRCP(); - if (varName == "NodeAggMatrix") return NoFactory::getRCP(); - - - TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "MueLu::FactoryManager::GetDefaultFactory(): No default factory available for building '" + varName + "'."); - } + // NOTE: These are user data, but we might want to print them, so they need + // a default factory + if (varName == "Pnodal") + return NoFactory::getRCP(); + if (varName == "NodeMatrix") + return NoFactory::getRCP(); + if (varName == "NodeAggMatrix") + return NoFactory::getRCP(); + + TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, + "MueLu::FactoryManager::GetDefaultFactory(): No " + "default factory available for building '" + + varName + "'."); } - - template - const RCP FactoryManager::SetAndReturnDefaultFactory(const std::string& varName, const RCP& factory) const { - TEUCHOS_TEST_FOR_EXCEPTION(factory.is_null(), Exceptions::RuntimeError, "The default factory for building '" << varName << "' is null"); - - GetOStream(Runtime1) << "Using default factory (" << factory->ShortClassName() <<"["<GetID()<<"]) for building '" << varName << "'." << std::endl; - - defaultFactoryTable_[varName] = factory; - - return defaultFactoryTable_[varName]; - } - - template - void FactoryManager::Print() const { - std::map >::const_iterator it; - Teuchos::FancyOStream& fancy = GetOStream(Debug); - //auto & fancy = std::cout;// For debugging - - - fancy << "Users factory table (factoryTable_):" << std::endl; - for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) { - fancy << " " << it->first << " -> "; - if (it->second.get() == NoFactory::get()) fancy << "NoFactory"; - else if (!it->second.get()) fancy<< "NULL"; - else { - fancy << it->second.get()->ShortClassName()<<"["<second.get()->GetID()<<"]"; +} + +template +const RCP +FactoryManager::SetAndReturnDefaultFactory(const std::string &varName, + const RCP + &factory) const { + TEUCHOS_TEST_FOR_EXCEPTION(factory.is_null(), Exceptions::RuntimeError, + "The default factory for building '" + << varName << "' is null"); + + GetOStream(Runtime1) << "Using default factory (" << factory->ShortClassName() + << "[" << factory->GetID() << "]) for building '" + << varName << "'." << std::endl; + + defaultFactoryTable_[varName] = factory; + + return defaultFactoryTable_[varName]; +} + +template +void FactoryManager::Print() const { + std::map>::const_iterator it; + Teuchos::FancyOStream &fancy = GetOStream(Debug); + // auto & fancy = std::cout;// For debugging + + fancy << "Users factory table (factoryTable_):" << std::endl; + for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) { + fancy << " " << it->first << " -> "; + if (it->second.get() == NoFactory::get()) + fancy << "NoFactory"; + else if (!it->second.get()) + fancy << "NULL"; + else { + fancy << it->second.get()->ShortClassName() << "[" + << it->second.get()->GetID() << "]"; #ifdef HAVE_MUELU_DEBUG - fancy<<"("<second.get()) <<")"; + fancy << "(" << Teuchos::toString(it->second.get()) << ")"; #endif - } - fancy<< std::endl; } + fancy << std::endl; + } - fancy << "Default factory table (defaultFactoryTable_):" << std::endl; - for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) { - fancy << " " << it->first << " -> "; - if (it->second.get() == NoFactory::get()) fancy << "NoFactory"; - else if (!it->second.get()) fancy<< "NULL"; - else { - fancy << it->second.get()->ShortClassName()<<"["<second.get()->GetID()<<"]"; + fancy << "Default factory table (defaultFactoryTable_):" << std::endl; + for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); + it++) { + fancy << " " << it->first << " -> "; + if (it->second.get() == NoFactory::get()) + fancy << "NoFactory"; + else if (!it->second.get()) + fancy << "NULL"; + else { + fancy << it->second.get()->ShortClassName() << "[" + << it->second.get()->GetID() << "]"; #ifdef HAVE_MUELU_DEBUG - fancy<<"("<second.get()) <<")"; + fancy << "(" << Teuchos::toString(it->second.get()) << ")"; #endif - } - fancy<< std::endl; } - + fancy << std::endl; } +} #ifdef HAVE_MUELU_DEBUG - template - void FactoryManager::ResetDebugData() const { - std::map >::const_iterator it; - - for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) - if (!it->second.is_null()) - it->second->ResetDebugData(); - - for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) - if (!it->second.is_null()) - it->second->ResetDebugData(); - } +template +void FactoryManager::ResetDebugData() + const { + std::map>::const_iterator it; + + for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) + if (!it->second.is_null()) + it->second->ResetDebugData(); + + for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); + it++) + if (!it->second.is_null()) + it->second->ResetDebugData(); +} #endif - #undef MUELU_KOKKOS_FACTORY } // namespace MueLu -//TODO: add operator[] -//TODO: should we use a parameterList instead of a std::map? It might be useful to tag which factory have been used and report unused factory. -//TODO: add an option 'NoDefault' to check if we are using any default factory. -//TODO: use Teuchos::ConstNonConstObjectContainer to allow user to modify factories after a GetFactory() +// TODO: add operator[] +// TODO: should we use a parameterList instead of a std::map? It might be useful +// to tag which factory have been used and report unused factory. +// TODO: add an option 'NoDefault' to check if we are using any default factory. +// TODO: use Teuchos::ConstNonConstObjectContainer to allow user to modify +// factories after a GetFactory() #endif // MUELU_FACTORYMANAGER_DEF_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp index 2bfb4b97378b..18049698b4d2 100644 --- a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp +++ b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_decl.hpp @@ -48,95 +48,93 @@ #include "MueLu_ConfigDefs.hpp" -#include "MueLu_FactoryManager_fwd.hpp" #include "MueLu_FactoryManagerBase.hpp" +#include "MueLu_FactoryManager_fwd.hpp" +#include "MueLu_HierarchyManager_fwd.hpp" #include "MueLu_HierarchyUtils_fwd.hpp" +#include "MueLu_Hierarchy_fwd.hpp" #include "MueLu_Level_fwd.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_TwoLevelFactoryBase.hpp" -#include "MueLu_Hierarchy_fwd.hpp" -#include "MueLu_HierarchyManager_fwd.hpp" -// Warning: on TopRAPFactory and TopSmootherFactory constructors, Teuchos::null doesn't mean "default factory" but "no build" +// Warning: on TopRAPFactory and TopSmootherFactory constructors, Teuchos::null +// doesn't mean "default factory" but "no build" namespace MueLu { - //! An exception safe way to call the method 'Level::SetFactoryManager()' - class SetFactoryManager { - - public: - - //@{ - - /*! - @brief Constructor - - Set a given factory manager on a specific level - */ - SetFactoryManager(const RCP & level, const RCP & factoryManager) - : level_(level), prevFactoryManager_(level->GetFactoryManager()) - { - // set new factory manager - level->SetFactoryManager(factoryManager); - } - - //! Destructor. - virtual ~SetFactoryManager() { - // restore previous factory manager - level_->SetFactoryManager(prevFactoryManager_); - } - - //@} - - private: - //! needed to save & restore previous factoryManager - const RCP level_; - const RCP prevFactoryManager_; - }; - - - - - template - class HierarchyUtils { +//! An exception safe way to call the method 'Level::SetFactoryManager()' +class SetFactoryManager { + +public: + //@{ + + /*! + @brief Constructor + + Set a given factory manager on a specific level + */ + SetFactoryManager(const RCP &level, + const RCP &factoryManager) + : level_(level), prevFactoryManager_(level->GetFactoryManager()) { + // set new factory manager + level->SetFactoryManager(factoryManager); + } + + //! Destructor. + virtual ~SetFactoryManager() { + // restore previous factory manager + level_->SetFactoryManager(prevFactoryManager_); + } + + //@} + +private: + //! needed to save & restore previous factoryManager + const RCP level_; + const RCP prevFactoryManager_; +}; + +template +class HierarchyUtils { #undef MUELU_HIERARCHYUTILS_SHORT #include "MueLu_UseShortNames.hpp" - public: - /*! - \brief Add non-serializable data to Hierarchy - - Add non-serializable data given level-specific sublist \c nonSerialList to the Hierarchy \c H. - Calling \c AddLevel() along the way, if necessary. - - Non-serializable data to be added: - - Operator "A" - - Prolongator "P" - - Restrictor "R" - - "M" - - "Mdiag" - - "K" - - Nullspace information "Nullspace" - - Coordinate information "Coordinates" - - "Node Comm" - - Primal-to-dual node mapping "DualNodeID2PrimalNodeID" - - "Primal interface DOF map" - - "pcoarsen: element to node map - - This routine is used by the CreateXpetraPreconditioner() routine. - - @param HM Hierarhcy manager - @param H Hierarchy, where non-serializable data needs to be added - @param nonSerialList Parameter list containing non-serializable data - */ - static void AddNonSerializableDataToHierarchy(HierarchyManager& HM, Hierarchy& H, const ParameterList& nonSerialList); - static void CopyBetweenHierarchies(Hierarchy& fromHierarchy, Hierarchy& toHierarchy, const std::string fromLabel, const std::string toLabel, const std::string dataType); - }; - - - +public: + /*! + \brief Add non-serializable data to Hierarchy + + Add non-serializable data given level-specific sublist \c nonSerialList to the + Hierarchy \c H. Calling \c AddLevel() along the way, if necessary. + + Non-serializable data to be added: + - Operator "A" + - Prolongator "P" + - Restrictor "R" + - "M" + - "Mdiag" + - "K" + - Nullspace information "Nullspace" + - Coordinate information "Coordinates" + - "Node Comm" + - Primal-to-dual node mapping "DualNodeID2PrimalNodeID" + - "Primal interface DOF map" + - "pcoarsen: element to node map + + This routine is used by the CreateXpetraPreconditioner() routine. + + @param HM Hierarhcy manager + @param H Hierarchy, where non-serializable data needs to be added + @param nonSerialList Parameter list containing non-serializable data + */ + static void + AddNonSerializableDataToHierarchy(HierarchyManager &HM, Hierarchy &H, + const ParameterList &nonSerialList); + static void CopyBetweenHierarchies(Hierarchy &fromHierarchy, + Hierarchy &toHierarchy, + const std::string fromLabel, + const std::string toLabel, + const std::string dataType); +}; } // namespace MueLu diff --git a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp index 788081aada5d..3a2c1e2dacf1 100644 --- a/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp +++ b/packages/muelu/src/MueCentral/MueLu_HierarchyUtils_def.hpp @@ -51,366 +51,523 @@ #include #include -#include "MueLu_HierarchyUtils_decl.hpp" -#include "MueLu_HierarchyManager.hpp" #include "MueLu_FactoryManager.hpp" +#include "MueLu_HierarchyManager.hpp" +#include "MueLu_HierarchyUtils_decl.hpp" -//TODO/FIXME: DeclareInput(, **this**) cannot be used here +// TODO/FIXME: DeclareInput(, **this**) cannot be used here #ifdef HAVE_MUELU_INTREPID2 #include "Kokkos_DynRankView.hpp" #endif namespace MueLu { - // Copy object from one hierarchy to another calling AddNewLevel as appropriate. - template - void HierarchyUtils::CopyBetweenHierarchies(Hierarchy& fromHierarchy, Hierarchy& toHierarchy, const std::string fromLabel, const std::string toLabel, const std::string dataType) { +// Copy object from one hierarchy to another calling AddNewLevel as appropriate. +template +void HierarchyUtils::CopyBetweenHierarchies(Hierarchy &fromHierarchy, + Hierarchy &toHierarchy, + const std::string fromLabel, + const std::string toLabel, + const std::string dataType) { - // add any necessary levels - for (int i = toHierarchy.GetNumLevels(); i < fromHierarchy.GetNumLevels(); i++) - toHierarchy.AddNewLevel(); - - for (int i = 0; i < fromHierarchy.GetNumLevels(); i++) { - RCP fromLevel = fromHierarchy.GetLevel(i); - RCP toLevel = toHierarchy.GetLevel(i); - - TEUCHOS_TEST_FOR_EXCEPTION(dataType != "RCP" && dataType != "RCP" - , Exceptions::InvalidArgument, - std::string("MueLu::Utils::CopyBetweenHierarchies: unknown data type(") + dataType + ")"); - if (fromLevel->IsAvailable(fromLabel)) { - if (dataType == "RCP" ) { - // Normally, we should only do - // toLevel->Set(toLabel,fromLevel->Get >(fromLabel)); - // The logic below is meant to handle a special case when we - // repartition a processor away, leaving behind a RCP on - // on the level instead of an RCP + // add any necessary levels + for (int i = toHierarchy.GetNumLevels(); i < fromHierarchy.GetNumLevels(); + i++) + toHierarchy.AddNewLevel(); - auto tempOp = fromLevel->Get >(fromLabel); - auto tempMatrix = rcp_dynamic_cast(tempOp); - if(!tempMatrix.is_null()) toLevel->Set(toLabel,tempMatrix); - else toLevel->Set(toLabel,tempOp); - } - if (dataType == "RCP") { - toLevel->Set(toLabel,fromLevel->Get >(fromLabel)); - } + for (int i = 0; i < fromHierarchy.GetNumLevels(); i++) { + RCP fromLevel = fromHierarchy.GetLevel(i); + RCP toLevel = toHierarchy.GetLevel(i); + + TEUCHOS_TEST_FOR_EXCEPTION( + dataType != "RCP" && dataType != "RCP", + Exceptions::InvalidArgument, + std::string( + "MueLu::Utils::CopyBetweenHierarchies: unknown data type(") + + dataType + ")"); + if (fromLevel->IsAvailable(fromLabel)) { + if (dataType == "RCP") { + // Normally, we should only do + // toLevel->Set(toLabel,fromLevel->Get >(fromLabel)); + // The logic below is meant to handle a special case when we + // repartition a processor away, leaving behind a RCP on + // on the level instead of an RCP + + auto tempOp = fromLevel->Get>(fromLabel); + auto tempMatrix = rcp_dynamic_cast(tempOp); + if (!tempMatrix.is_null()) + toLevel->Set(toLabel, tempMatrix); + else + toLevel->Set(toLabel, tempOp); + } + if (dataType == "RCP") { + toLevel->Set(toLabel, fromLevel->Get>(fromLabel)); } } } +} - // Adds the following non-serializable data (A,P,R,Nullspace,Coordinates) from level-specific sublist nonSerialList, - // calling AddNewLevel as appropriate. - template - void HierarchyUtils::AddNonSerializableDataToHierarchy(HierarchyManager& HM, Hierarchy& H, const ParameterList& nonSerialList) { - typedef typename Xpetra::MultiVector::coordinateType, - LocalOrdinal, GlobalOrdinal, Node> realvaluedmultivector_type; +// Adds the following non-serializable data (A,P,R,Nullspace,Coordinates) from +// level-specific sublist nonSerialList, calling AddNewLevel as appropriate. +template +void HierarchyUtils:: + AddNonSerializableDataToHierarchy(HierarchyManager &HM, Hierarchy &H, + const ParameterList &nonSerialList) { + typedef typename Xpetra::MultiVector< + typename Teuchos::ScalarTraits::coordinateType, LocalOrdinal, + GlobalOrdinal, Node> + realvaluedmultivector_type; - for (ParameterList::ConstIterator nonSerialEntry = nonSerialList.begin(); nonSerialEntry != nonSerialList.end(); nonSerialEntry++) { - const std::string& levelName = nonSerialEntry->first; - // Check for match of the form "level X" where X is a positive integer - if (nonSerialList.isSublist(levelName) && levelName.find("level ") == 0 && levelName.size() > 6) { - int levelID = strtol(levelName.substr(6).c_str(), 0, 0); - if (levelID > 0) - { - // Do enough level adding so we can be sure to add the data to the right place - for (int i = H.GetNumLevels(); i <= levelID; i++) - H.AddNewLevel(); - } - RCP level = H.GetLevel(levelID); + for (ParameterList::ConstIterator nonSerialEntry = nonSerialList.begin(); + nonSerialEntry != nonSerialList.end(); nonSerialEntry++) { + const std::string &levelName = nonSerialEntry->first; + // Check for match of the form "level X" where X is a positive integer + if (nonSerialList.isSublist(levelName) && levelName.find("level ") == 0 && + levelName.size() > 6) { + int levelID = strtol(levelName.substr(6).c_str(), 0, 0); + if (levelID > 0) { + // Do enough level adding so we can be sure to add the data to the right + // place + for (int i = H.GetNumLevels(); i <= levelID; i++) + H.AddNewLevel(); + } + RCP level = H.GetLevel(levelID); - RCP M = Teuchos::rcp_dynamic_cast(HM.GetFactoryManager(levelID)); - TEUCHOS_TEST_FOR_EXCEPTION(M.is_null(), Exceptions::InvalidArgument, "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get FactoryManager"); + RCP M = Teuchos::rcp_dynamic_cast( + HM.GetFactoryManager(levelID)); + TEUCHOS_TEST_FOR_EXCEPTION( + M.is_null(), Exceptions::InvalidArgument, + "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get " + "FactoryManager"); - // Grab the level sublist & loop over parameters - const ParameterList& levelList = nonSerialList.sublist(levelName); - for (ParameterList::ConstIterator levelListEntry = levelList.begin(); levelListEntry != levelList.end(); levelListEntry++) { - const std::string& name = levelListEntry->first; - TEUCHOS_TEST_FOR_EXCEPTION(name != "A" && name != "P" && name != "R" && name != "K" && name != "M" && name != "Mdiag" && - name != "D0" && name != "M1" && name != "Ms" && name != "M0inv" && - name != "Pnodal" && name != "NodeMatrix" && name != "NodeAggMatrix" && - name != "Nullspace" && name != "Coordinates" && name != "pcoarsen: element to node map" && - name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && name != "Primal interface DOF map" && - !IsParamMuemexVariable(name), Exceptions::InvalidArgument, - std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: parameter list contains unknown data type(") + name + ")"); + // Grab the level sublist & loop over parameters + const ParameterList &levelList = nonSerialList.sublist(levelName); + for (ParameterList::ConstIterator levelListEntry = levelList.begin(); + levelListEntry != levelList.end(); levelListEntry++) { + const std::string &name = levelListEntry->first; + TEUCHOS_TEST_FOR_EXCEPTION( + name != "A" && name != "P" && name != "R" && name != "K" && + name != "M" && name != "Mdiag" && name != "D0" && + name != "M1" && name != "Ms" && name != "M0inv" && + name != "Pnodal" && name != "NodeMatrix" && + name != "NodeAggMatrix" && name != "Nullspace" && + name != "Coordinates" && + name != "pcoarsen: element to node map" && + name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && + name != "Primal interface DOF map" && + !IsParamMuemexVariable(name), + Exceptions::InvalidArgument, + std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: " + "parameter list contains unknown data type(") + + name + ")"); - // Get a valid communicator and lib - RCP > comm; - if (!level->GetComm().is_null()) - comm = level->GetComm(); - else if (level->IsAvailable("A")) { + // Get a valid communicator and lib + RCP> comm; + if (!level->GetComm().is_null()) + comm = level->GetComm(); + else if (level->IsAvailable("A")) { + RCP mat; + level->Get("A", mat); + comm = mat->getMap()->getComm(); + } else { + RCP level0 = H.GetLevel(0); + if (!level0->GetComm().is_null()) + comm = level0->GetComm(); + else { RCP mat; - level->Get("A", mat); + level0->Get("A", mat); comm = mat->getMap()->getComm(); - } else { - RCP level0 = H.GetLevel(0); - if (!level0->GetComm().is_null()) - comm = level0->GetComm(); - else { - RCP mat; - level0->Get("A", mat); - comm = mat->getMap()->getComm(); - } } - Xpetra::UnderlyingLib lib = level->lib(); + } + Xpetra::UnderlyingLib lib = level->lib(); + + if (name == "A") { + RCP mat; + if (levelListEntry->second.isType()) + // We might also want to read maps here. + mat = Xpetra::IO::Read( + Teuchos::getValue(levelListEntry->second), lib, + comm); + else + mat = Teuchos::getValue>(levelListEntry->second); + level->Set(name, mat, NoFactory::get()); + M->SetFactory( + name, NoFactory::getRCP()); // TAW: not sure about this: be aware + // that this affects all levels + // However, A is accessible + // through NoFactory anyway, so + // it should be fine here. + } else if (name == "P" || name == "R" || name == "K" || name == "M") { + if (levelListEntry->second.isType>()) { + RCP mat; + mat = Teuchos::getValue>(levelListEntry->second); + + RCP fact = M->GetFactory(name); + level->AddKeepFlag(name, fact.get(), MueLu::UserData); + level->Set(name, mat, fact.get()); - if (name == "A") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, mat, NoFactory::get()); + } else { RCP mat; if (levelListEntry->second.isType()) // We might also want to read maps here. - mat = Xpetra::IO::Read(Teuchos::getValue(levelListEntry->second), lib, comm); + mat = Xpetra::IO::Read( + Teuchos::getValue(levelListEntry->second), lib, + comm); else - mat = Teuchos::getValue > (levelListEntry->second); - level->Set(name, mat, NoFactory::get()); - M->SetFactory(name, NoFactory::getRCP()); // TAW: not sure about this: be aware that this affects all levels - // However, A is accessible through NoFactory anyway, so it should - // be fine here. - } - else if(name == "P" || name == "R" || name == "K" || name == "M" ) { - if (levelListEntry->second.isType >()) { - RCP mat; - mat = Teuchos::getValue > (levelListEntry->second); - - RCP fact = M->GetFactory(name); - level->AddKeepFlag(name,fact.get(),MueLu::UserData); - level->Set(name, mat, fact.get()); - - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, mat, NoFactory::get()); - } else { - RCP mat; - if (levelListEntry->second.isType()) - // We might also want to read maps here. - mat = Xpetra::IO::Read(Teuchos::getValue(levelListEntry->second), lib, comm); - else - mat = Teuchos::getValue > (levelListEntry->second); + mat = Teuchos::getValue>(levelListEntry->second); - RCP fact = M->GetFactory(name); - level->AddKeepFlag(name,fact.get(),MueLu::UserData); - level->Set(name, mat, fact.get()); + RCP fact = M->GetFactory(name); + level->AddKeepFlag(name, fact.get(), MueLu::UserData); + level->Set(name, mat, fact.get()); - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, mat, NoFactory::get()); - } - } - else if (name == "D0" || name == "M1" || name == "Ms" || name == "M0inv" || name == "Pnodal" || name == "NodeMatrix" || name == "NodeAggMatrix") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - if (levelListEntry->second.isType >()) - level->Set(name, Teuchos::getValue > (levelListEntry->second), NoFactory::get()); - else - level->Set(name, Teuchos::getValue > (levelListEntry->second), NoFactory::get()); - } - else if (name == "Mdiag") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > (levelListEntry->second), NoFactory::get()); - } - else if (name == "Nullspace") - { - RCP vec; - if (levelListEntry->second.isType()) { - TEUCHOS_ASSERT(level->IsAvailable("A")); - RCP mat; - level->Get("A", mat); - auto map = mat->getMap(); - vec = Xpetra::IO::ReadMultiVector(Teuchos::getValue(levelListEntry->second), map); - } else - vec = Teuchos::getValue > (levelListEntry->second); level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); - level->Set(name, vec, NoFactory::get()); - //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here - // One should do this only in very special cases + level->Set(name, mat, NoFactory::get()); } - else if(name == "Coordinates") //Scalar of Coordinates MV is always double - { - RCP vec; - if (levelListEntry->second.isType()) { - TEUCHOS_ASSERT(level->IsAvailable("A")); - RCP mat; - level->Get("A", mat); - size_t blkSize = mat->GetFixedBlockSize(); - RCP nodeMap = mat->getRowMap(); - if (blkSize > 1) { - // Create a nodal map, as coordinates have not been expanded to a DOF map yet. - RCP dofMap = mat->getRowMap(); - GO indexBase = dofMap->getIndexBase(); - size_t numLocalDOFs = dofMap->getLocalNumElements(); - TEUCHOS_TEST_FOR_EXCEPTION(numLocalDOFs % blkSize, Exceptions::RuntimeError, - "HierarchyUtils: block size (" << blkSize << ") is incompatible with the number of local dofs in a row map (" << numLocalDOFs); - ArrayView GIDs = dofMap->getLocalElementList(); + } else if (name == "D0" || name == "M1" || name == "Ms" || + name == "M0inv" || name == "Pnodal" || + name == "NodeMatrix" || name == "NodeAggMatrix") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + if (levelListEntry->second.isType>()) + level->Set(name, + Teuchos::getValue>(levelListEntry->second), + NoFactory::get()); + else + level->Set(name, + Teuchos::getValue>(levelListEntry->second), + NoFactory::get()); + } else if (name == "Mdiag") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>(levelListEntry->second), + NoFactory::get()); + } else if (name == "Nullspace") { + RCP vec; + if (levelListEntry->second.isType()) { + TEUCHOS_ASSERT(level->IsAvailable("A")); + RCP mat; + level->Get("A", mat); + auto map = mat->getMap(); + vec = Xpetra::IO:: + ReadMultiVector( + Teuchos::getValue(levelListEntry->second), + map); + } else + vec = Teuchos::getValue>(levelListEntry->second); + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, vec, NoFactory::get()); + // M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a + // bad idea to overwrite the factory manager data here + // One should do this only in very special cases + } else if (name == + "Coordinates") // Scalar of Coordinates MV is always double + { + RCP vec; + if (levelListEntry->second.isType()) { + TEUCHOS_ASSERT(level->IsAvailable("A")); + RCP mat; + level->Get("A", mat); + size_t blkSize = mat->GetFixedBlockSize(); + RCP nodeMap = mat->getRowMap(); + if (blkSize > 1) { + // Create a nodal map, as coordinates have not been expanded to a + // DOF map yet. + RCP dofMap = mat->getRowMap(); + GO indexBase = dofMap->getIndexBase(); + size_t numLocalDOFs = dofMap->getLocalNumElements(); + TEUCHOS_TEST_FOR_EXCEPTION( + numLocalDOFs % blkSize, Exceptions::RuntimeError, + "HierarchyUtils: block size (" + << blkSize + << ") is incompatible with the number of local dofs in a " + "row map (" + << numLocalDOFs); + ArrayView GIDs = dofMap->getLocalElementList(); - Array nodeGIDs(numLocalDOFs/blkSize); - for (size_t i = 0; i < numLocalDOFs; i += blkSize) - nodeGIDs[i/blkSize] = (GIDs[i] - indexBase)/blkSize + indexBase; + Array nodeGIDs(numLocalDOFs / blkSize); + for (size_t i = 0; i < numLocalDOFs; i += blkSize) + nodeGIDs[i / blkSize] = + (GIDs[i] - indexBase) / blkSize + indexBase; - Xpetra::global_size_t INVALID = Teuchos::OrdinalTraits::invalid(); - nodeMap = MapFactory::Build(dofMap->lib(), INVALID, nodeGIDs(), indexBase, dofMap->getComm()); - } - vec = Xpetra::IO::coordinateType,LocalOrdinal,GlobalOrdinal,Node>::ReadMultiVector(Teuchos::getValue(levelListEntry->second), nodeMap); - } else - vec = Teuchos::getValue > (levelListEntry->second); - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, vec, NoFactory::get()); - //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here - } - else if(name == "Node Comm") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - } - else if(name == "DualNodeID2PrimalNodeID") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>>(levelListEntry->second), NoFactory::get()); - } - else if(name == "Primal interface DOF map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>(levelListEntry->second), NoFactory::get()); - } + Xpetra::global_size_t INVALID = + Teuchos::OrdinalTraits::invalid(); + nodeMap = MapFactory::Build(dofMap->lib(), INVALID, nodeGIDs(), + indexBase, dofMap->getComm()); + } + vec = Xpetra::IO< + typename Teuchos::ScalarTraits::coordinateType, + LocalOrdinal, GlobalOrdinal, Node>:: + ReadMultiVector( + Teuchos::getValue(levelListEntry->second), + nodeMap); + } else + vec = Teuchos::getValue>( + levelListEntry->second); + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, vec, NoFactory::get()); + // M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a + // bad idea to overwrite the factory manager data here + } else if (name == "Node Comm") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>>( + levelListEntry->second), + NoFactory::get()); + } else if (name == "DualNodeID2PrimalNodeID") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set( + name, + Teuchos::getValue>>(levelListEntry->second), + NoFactory::get()); + } else if (name == "Primal interface DOF map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>(levelListEntry->second), + NoFactory::get()); + } #ifdef HAVE_MUELU_INTREPID2 - else if (name == "pcoarsen: element to node map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - } + else if (name == "pcoarsen: element to node map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>>( + levelListEntry->second), + NoFactory::get()); + } #endif - else + else #ifdef HAVE_MUELU_MATLAB - { - //Custom variable for Muemex - size_t typeNameStart = name.find_first_not_of(' '); - size_t typeNameEnd = name.find(' ', typeNameStart); - std::string typeName = name.substr(typeNameStart, typeNameEnd - typeNameStart); - std::transform(typeName.begin(), typeName.end(), typeName.begin(), ::tolower); - level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); - if(typeName == "matrix") - level->Set(name, Teuchos::getValue >(levelListEntry->second), NoFactory::get()); - else if(typeName == "multivector") - level->Set(name, Teuchos::getValue >(levelListEntry->second), NoFactory::get()); - else if(typeName == "map") - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - else if(typeName == "ordinalvector") - level->Set(name, Teuchos::getValue > >(levelListEntry->second), NoFactory::get()); - else if(typeName == "scalar") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - else if(typeName == "double") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - else if(typeName == "complex") - level->Set(name, Teuchos::getValue >(levelListEntry->second), NoFactory::get()); - else if(typeName == "int") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - else if(typeName == "string") - level->Set(name, Teuchos::getValue(levelListEntry->second), NoFactory::get()); - } + { + // Custom variable for Muemex + size_t typeNameStart = name.find_first_not_of(' '); + size_t typeNameEnd = name.find(' ', typeNameStart); + std::string typeName = + name.substr(typeNameStart, typeNameEnd - typeNameStart); + std::transform(typeName.begin(), typeName.end(), typeName.begin(), + ::tolower); + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + if (typeName == "matrix") + level->Set(name, + Teuchos::getValue>(levelListEntry->second), + NoFactory::get()); + else if (typeName == "multivector") + level->Set( + name, + Teuchos::getValue>(levelListEntry->second), + NoFactory::get()); + else if (typeName == "map") + level->Set(name, + Teuchos::getValue< + RCP>>( + levelListEntry->second), + NoFactory::get()); + else if (typeName == "ordinalvector") + level->Set( + name, + Teuchos::getValue>>( + levelListEntry->second), + NoFactory::get()); + else if (typeName == "scalar") + level->Set(name, Teuchos::getValue(levelListEntry->second), + NoFactory::get()); + else if (typeName == "double") + level->Set(name, Teuchos::getValue(levelListEntry->second), + NoFactory::get()); + else if (typeName == "complex") + level->Set( + name, + Teuchos::getValue>(levelListEntry->second), + NoFactory::get()); + else if (typeName == "int") + level->Set(name, Teuchos::getValue(levelListEntry->second), + NoFactory::get()); + else if (typeName == "string") + level->Set(name, + Teuchos::getValue(levelListEntry->second), + NoFactory::get()); + } #else - { - throw std::runtime_error("Invalid non-serializable data on list"); - } -#endif + { + throw std::runtime_error("Invalid non-serializable data on list"); } - } else if (nonSerialList.isSublist(levelName) && levelName.find("user data") != std::string::npos) { - // So far only put data on level 0 - int levelID = 0; - RCP level = H.GetLevel(levelID); +#endif + } + } else if (nonSerialList.isSublist(levelName) && + levelName.find("user data") != std::string::npos) { + // So far only put data on level 0 + int levelID = 0; + RCP level = H.GetLevel(levelID); - RCP M = Teuchos::rcp_dynamic_cast(HM.GetFactoryManager(levelID)); - TEUCHOS_TEST_FOR_EXCEPTION(M.is_null(), Exceptions::InvalidArgument, "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get FactoryManager"); + RCP M = Teuchos::rcp_dynamic_cast( + HM.GetFactoryManager(levelID)); + TEUCHOS_TEST_FOR_EXCEPTION( + M.is_null(), Exceptions::InvalidArgument, + "MueLu::Utils::AddNonSerializableDataToHierarchy: cannot get " + "FactoryManager"); - // Grab the user data sublist & loop over parameters - const ParameterList& userList = nonSerialList.sublist(levelName); - for (ParameterList::ConstIterator userListEntry = userList.begin(); userListEntry != userList.end(); userListEntry++) { - const std::string& name = userListEntry->first; - TEUCHOS_TEST_FOR_EXCEPTION(name != "P" && name != "R" && name != "K" && name != "M" && name != "Mdiag" && - name != "D0" && name != "M1" && name != "Ms" && name != "M0inv" && - name != "Nullspace" && name != "Coordinates" && name != "pcoarsen: element to node map" && - name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && name != "Primal interface DOF map" && - name != "output stream" && - !IsParamValidVariable(name), Exceptions::InvalidArgument, - std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: user data parameter list contains unknown data type (") + name + ")"); - if( name == "P" || name == "R" || name == "K" || name == "M" || name == "D0" || name == "M1" || name == "Ms" || name == "M0inv" ) { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > (userListEntry->second), NoFactory::get()); - } else if (name == "Mdiag") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - } else if (name == "Nullspace") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - //M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a bad idea to overwrite the factory manager data here - // One should do this only in very special cases - } else if(name == "Coordinates") {//Scalar of Coordinates MV is always double - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - } - else if(name == "Node Comm") { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - } - else if(name == "DualNodeID2PrimalNodeID") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>>(userListEntry->second), NoFactory::get()); - } - else if(name == "Primal interface DOF map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue>(userListEntry->second), NoFactory::get()); - } + // Grab the user data sublist & loop over parameters + const ParameterList &userList = nonSerialList.sublist(levelName); + for (ParameterList::ConstIterator userListEntry = userList.begin(); + userListEntry != userList.end(); userListEntry++) { + const std::string &name = userListEntry->first; + TEUCHOS_TEST_FOR_EXCEPTION( + name != "P" && name != "R" && name != "K" && name != "M" && + name != "Mdiag" && name != "D0" && name != "M1" && + name != "Ms" && name != "M0inv" && name != "Nullspace" && + name != "Coordinates" && + name != "pcoarsen: element to node map" && + name != "Node Comm" && name != "DualNodeID2PrimalNodeID" && + name != "Primal interface DOF map" && name != "output stream" && + !IsParamValidVariable(name), + Exceptions::InvalidArgument, + std::string("MueLu::Utils::AddNonSerializableDataToHierarchy: user " + "data parameter list contains unknown data type (") + + name + ")"); + if (name == "P" || name == "R" || name == "K" || name == "M" || + name == "D0" || name == "M1" || name == "Ms" || name == "M0inv") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>(userListEntry->second), + NoFactory::get()); + } else if (name == "Mdiag") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>(userListEntry->second), + NoFactory::get()); + } else if (name == "Nullspace") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>(userListEntry->second), + NoFactory::get()); + // M->SetFactory(name, NoFactory::getRCP()); // TAW: generally it is a + // bad idea to overwrite the factory manager data here + // One should do this only in very special cases + } else if (name == + "Coordinates") { // Scalar of Coordinates MV is always double + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>( + userListEntry->second), + NoFactory::get()); + } else if (name == "Node Comm") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>>( + userListEntry->second), + NoFactory::get()); + } else if (name == "DualNodeID2PrimalNodeID") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set( + name, + Teuchos::getValue>>(userListEntry->second), + NoFactory::get()); + } else if (name == "Primal interface DOF map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>(userListEntry->second), + NoFactory::get()); + } #ifdef HAVE_MUELU_INTREPID2 - else if (name == "pcoarsen: element to node map") - { - level->AddKeepFlag(name,NoFactory::get(),MueLu::UserData); - level->Set(name, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - } + else if (name == "pcoarsen: element to node map") { + level->AddKeepFlag(name, NoFactory::get(), MueLu::UserData); + level->Set(name, + Teuchos::getValue>>( + userListEntry->second), + NoFactory::get()); + } #endif - else if (name == "output stream") - { - H.SetMueLuOStream(Teuchos::getValue >(userListEntry->second)); - } - else { - //Custom variable - size_t typeNameStart = name.find_first_not_of(' '); - size_t typeNameEnd = name.find(' ', typeNameStart); - std::string typeName = name.substr(typeNameStart, typeNameEnd - typeNameStart); - size_t varNameStart = name.find_first_not_of(' ', typeNameEnd); - std::string varName = name.substr(varNameStart, name.size()); - std::transform(typeName.begin(), typeName.end(), typeName.begin(), ::tolower); - level->AddKeepFlag(varName, NoFactory::get(), MueLu::UserData); - if(typeName == "matrix") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "multivector") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "vector") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "map") - level->Set(varName, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - else if(typeName == "ordinalvector") - level->Set(varName, Teuchos::getValue > >(userListEntry->second), NoFactory::get()); - else if(typeName == "scalar") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "double") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "complex") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "int") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "string") - level->Set(varName, Teuchos::getValue(userListEntry->second), NoFactory::get()); - else if(typeName == "array") - level->Set(varName, Teuchos::getValue > (userListEntry->second), NoFactory::get()); - else if(typeName == "array") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "arrayrcp") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else if(typeName == "arrayrcp") - level->Set(varName, Teuchos::getValue >(userListEntry->second), NoFactory::get()); - else - throw std::runtime_error("Invalid non-serializable data on list"); - } + else if (name == "output stream") { + H.SetMueLuOStream(Teuchos::getValue>( + userListEntry->second)); + } else { + // Custom variable + size_t typeNameStart = name.find_first_not_of(' '); + size_t typeNameEnd = name.find(' ', typeNameStart); + std::string typeName = + name.substr(typeNameStart, typeNameEnd - typeNameStart); + size_t varNameStart = name.find_first_not_of(' ', typeNameEnd); + std::string varName = name.substr(varNameStart, name.size()); + std::transform(typeName.begin(), typeName.end(), typeName.begin(), + ::tolower); + level->AddKeepFlag(varName, NoFactory::get(), MueLu::UserData); + if (typeName == "matrix") + level->Set(varName, + Teuchos::getValue>(userListEntry->second), + NoFactory::get()); + else if (typeName == "multivector") + level->Set( + varName, + Teuchos::getValue>(userListEntry->second), + NoFactory::get()); + else if (typeName == "vector") + level->Set(varName, + Teuchos::getValue>(userListEntry->second), + NoFactory::get()); + else if (typeName == "map") + level->Set(varName, + Teuchos::getValue< + RCP>>( + userListEntry->second), + NoFactory::get()); + else if (typeName == "ordinalvector") + level->Set( + varName, + Teuchos::getValue>>( + userListEntry->second), + NoFactory::get()); + else if (typeName == "scalar") + level->Set(varName, + Teuchos::getValue(userListEntry->second), + NoFactory::get()); + else if (typeName == "double") + level->Set(varName, + Teuchos::getValue(userListEntry->second), + NoFactory::get()); + else if (typeName == "complex") + level->Set( + varName, + Teuchos::getValue>(userListEntry->second), + NoFactory::get()); + else if (typeName == "int") + level->Set(varName, Teuchos::getValue(userListEntry->second), + NoFactory::get()); + else if (typeName == "string") + level->Set(varName, + Teuchos::getValue(userListEntry->second), + NoFactory::get()); + else if (typeName == "array") + level->Set( + varName, + Teuchos::getValue>(userListEntry->second), + NoFactory::get()); + else if (typeName == "array") + level->Set( + varName, + Teuchos::getValue>(userListEntry->second), + NoFactory::get()); + else if (typeName == "arrayrcp") + level->Set(varName, + Teuchos::getValue>( + userListEntry->second), + NoFactory::get()); + else if (typeName == "arrayrcp") + level->Set(varName, + Teuchos::getValue>( + userListEntry->second), + NoFactory::get()); + else + throw std::runtime_error("Invalid non-serializable data on list"); } - // level->print(std::cout, MueLu::Debug); } + // level->print(std::cout, MueLu::Debug); } } +} } // namespace MueLu #define MUELU_HIERARCHY_UTILS_SHORT diff --git a/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp b/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp index 4c345a4e3c49..d1fa1a8eca77 100644 --- a/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp @@ -49,16 +49,16 @@ #include #include -#include // global_size_t +#include // global_size_t #include -#include #include +#include #include -#include -#include "MueLu_ConfigDefs.hpp" #include "MueLu_BaseClass.hpp" +#include "MueLu_ConfigDefs.hpp" #include "MueLu_Hierarchy_fwd.hpp" +#include #include "MueLu_Types.hpp" @@ -68,380 +68,442 @@ #include "MueLu_Level_fwd.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_NoFactory.hpp" -#include "MueLu_PerfUtils_fwd.hpp" #include "MueLu_PFactory_fwd.hpp" +#include "MueLu_PerfUtils_fwd.hpp" #include "MueLu_SmootherBase_fwd.hpp" #include "MueLu_SmootherFactory_fwd.hpp" #include "MueLu_Utilities_fwd.hpp" namespace MueLu { - enum class ConvergenceStatus { - Converged, - Unconverged, - Undefined - }; - - /*! - @class Hierarchy - @brief Provides methods to build a multigrid hierarchy and apply multigrid cycles. - - Allows users to manually populate operators at different levels within - a multigrid method and push them into the hierarchy via SetLevel() - and/or to supply factories for automatically generating prolongators, - restrictors, and coarse level discretizations. Additionally, this class contains - an apply method that supports V and W cycles. - */ - template - class Hierarchy : public BaseClass { +enum class ConvergenceStatus { Converged, Unconverged, Undefined }; + +/*! + @class Hierarchy + @brief Provides methods to build a multigrid hierarchy and apply multigrid + cycles. + + Allows users to manually populate operators at different levels within + a multigrid method and push them into the hierarchy via SetLevel() + and/or to supply factories for automatically generating prolongators, + restrictors, and coarse level discretizations. Additionally, this class + contains an apply method that supports V and W cycles. +*/ +template +class Hierarchy : public BaseClass { #undef MUELU_HIERARCHY_SHORT #include "MueLu_UseShortNames.hpp" - typedef Teuchos::ScalarTraits STS; - typedef typename STS::magnitudeType MagnitudeType; - - //! Data struct for defining stopping criteria of multigrid iteration - struct ConvData { - ConvData() : maxIts_(1), tol_(-STS::magnitude(STS::one())) { } - ConvData(LO maxIts) : maxIts_(maxIts), tol_(-STS::magnitude(STS::one())) { } - ConvData(MagnitudeType tol) : maxIts_(10000), tol_(tol) { } - ConvData(std::pair p) : maxIts_(p.first), tol_(p.second) { } - - LO maxIts_; - MagnitudeType tol_; - }; - - public: - - //! @name Constructors/Destructors - //@{ - - //! Default constructor. - Hierarchy(); - //! Constructor that labels the hierarchy. - Hierarchy(const std::string& label); - - //! Constructor - Hierarchy(const RCP & A); - - //! Constructor - Hierarchy(const RCP & A, const std::string& label); - - //! Destructor. - virtual ~Hierarchy() { } - - //@} - - //! @name Set/Get Methods. - //@{ - - //! - static CycleType GetDefaultCycle() { return MasterList::getDefault("cycle type") == "V" ? VCYCLE : WCYCLE; } - static int GetDefaultCycleStartLevel() { return MasterList::getDefault("W cycle start level"); } - static bool GetDefaultImplicitTranspose() { return MasterList::getDefault("transpose: use implicit"); } - static bool GetDefaultFuseProlongationAndUpdate() { return MasterList::getDefault("fuse prolongation and update"); } - static Xpetra::global_size_t GetDefaultMaxCoarseSize() { return MasterList::getDefault("coarse: max size"); } - static int GetDefaultMaxLevels() { return MasterList::getDefault("max levels"); } - static bool GetDefaultPRrebalance() { return MasterList::getDefault("repartition: rebalance P and R"); } - - Xpetra::global_size_t GetMaxCoarseSize() const { return maxCoarseSize_; } - bool GetImplicitTranspose() const { return implicitTranspose_; } - bool GetFuseProlongationAndUpdate() const { return fuseProlongationAndUpdate_; } - - void SetMaxCoarseSize(Xpetra::global_size_t maxCoarseSize) { maxCoarseSize_ = maxCoarseSize; } - void SetPRrebalance(bool doPRrebalance) { doPRrebalance_ = doPRrebalance; } - void SetPRViaCopyrebalance(bool doPRViaCopyrebalance) { doPRViaCopyrebalance_ = doPRViaCopyrebalance; } - void SetImplicitTranspose(const bool& implicit) { implicitTranspose_ = implicit; } - void SetFuseProlongationAndUpdate(const bool& fuse) { fuseProlongationAndUpdate_ = fuse; } - - //@} - - //! - - template - friend class Hierarchy; + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType MagnitudeType; - private: - int LastLevelID() const { return Levels_.size() - 1; } - void DumpCurrentGraph(int level) const; + //! Data struct for defining stopping criteria of multigrid iteration + struct ConvData { + ConvData() : maxIts_(1), tol_(-STS::magnitude(STS::one())) {} + ConvData(LO maxIts) : maxIts_(maxIts), tol_(-STS::magnitude(STS::one())) {} + ConvData(MagnitudeType tol) : maxIts_(10000), tol_(tol) {} + ConvData(std::pair p) + : maxIts_(p.first), tol_(p.second) {} - public: - - //! Add a level at the end of the hierarchy - void AddLevel(const RCP & level); - - //! Add a new level at the end of the hierarchy - void AddNewLevel(); - - //! Retrieve a certain level from hierarchy. - RCP & GetLevel(const int levelID = 0); - - int GetNumLevels() const; - int GetGlobalNumLevels() const; - - MagnitudeType GetRate() const { return rate_; } - - // This function is global - double GetOperatorComplexity() const; - - // This function is global - double GetSmootherComplexity() const; - - //! Helper function - void CheckLevel(Level& level, int levelID); - - void SetMatvecParams(RCP matvecParams); - - //! Multi-level setup phase: build a new level of the hierarchy. - /*! This method is aimed to be used in a loop building the hierarchy level by level. See Hierarchy::Setup(manager, startLevel, numDesiredLevels) for an example of usage. - * - * @param coarseLevelID ID of the level to be built. - * @param fineLevelManager defines how to build missing data of the fineLevel (example: aggregates) - * @param coarseLevelManager defines how to build the level - * @param nextLevelManager defines how the next coarse level will be built. This is used to post corresponding request before building the coarse level to keep useful data. - - CoarseLevel is considered to be the last level if: - - input parameter isLastLevel == true - or - - Ac->getRowMap()->getGlobalNumElements() <= maxCoarseSize_ - Method return true if CoarseLevel is the last level. - - Pre-condition: - * FineLevel: - - must have kept useful data (TODO: not tested yet) - - must be Teuchos::null when Setup is called for finest level (Setup then automatically calls Request for "Smoother" and "CoarseSolver") - * CoarseLevel: - - already allocated (using Hierarchy::AddLevel()) - - requests already posted - (exception: for finest level (=fineLevelManager==null) requests are called within setup routine) - * NextLevel: - - do not need to be allocate but could (FIXME: will be deleted if lastlevel...). - - should be null when Setup is called for last level + LO maxIts_; + MagnitudeType tol_; + }; - Post-condition: - * FineLevel: - - temporary data have been used and released (this condition is not tested) - * CoarseLevel: - - built, requests have been used - - if it is the last level (due to input parameter isLastLevel or getGlobalNumElements() <= maxCoarseSize_), - then the coarse solver factory of the factory manager have been used instead of the smoother factory. - * NextLevel: - If input parameter isLastLevel == false: - - have been allocated - - requests already posted. - */ - bool Setup(int coarseLevelID, const RCP fineLevelManager /* = Teuchos::null */, const RCP coarseLevelManager, - const RCP nextLevelManager = Teuchos::null); +public: + //! @name Constructors/Destructors + //@{ + + //! Default constructor. + Hierarchy(); + //! Constructor that labels the hierarchy. + Hierarchy(const std::string &label); + + //! Constructor + Hierarchy(const RCP &A); + + //! Constructor + Hierarchy(const RCP &A, const std::string &label); + + //! Destructor. + virtual ~Hierarchy() {} + + //@} + + //! @name Set/Get Methods. + //@{ + + //! + static CycleType GetDefaultCycle() { + return MasterList::getDefault("cycle type") == "V" ? VCYCLE + : WCYCLE; + } + static int GetDefaultCycleStartLevel() { + return MasterList::getDefault("W cycle start level"); + } + static bool GetDefaultImplicitTranspose() { + return MasterList::getDefault("transpose: use implicit"); + } + static bool GetDefaultFuseProlongationAndUpdate() { + return MasterList::getDefault("fuse prolongation and update"); + } + static Xpetra::global_size_t GetDefaultMaxCoarseSize() { + return MasterList::getDefault("coarse: max size"); + } + static int GetDefaultMaxLevels() { + return MasterList::getDefault("max levels"); + } + static bool GetDefaultPRrebalance() { + return MasterList::getDefault("repartition: rebalance P and R"); + } + + Xpetra::global_size_t GetMaxCoarseSize() const { return maxCoarseSize_; } + bool GetImplicitTranspose() const { return implicitTranspose_; } + bool GetFuseProlongationAndUpdate() const { + return fuseProlongationAndUpdate_; + } + + void SetMaxCoarseSize(Xpetra::global_size_t maxCoarseSize) { + maxCoarseSize_ = maxCoarseSize; + } + void SetPRrebalance(bool doPRrebalance) { doPRrebalance_ = doPRrebalance; } + void SetPRViaCopyrebalance(bool doPRViaCopyrebalance) { + doPRViaCopyrebalance_ = doPRViaCopyrebalance; + } + void SetImplicitTranspose(const bool &implicit) { + implicitTranspose_ = implicit; + } + void SetFuseProlongationAndUpdate(const bool &fuse) { + fuseProlongationAndUpdate_ = fuse; + } + + //@} + + //! + + template friend class Hierarchy; + +private: + int LastLevelID() const { return Levels_.size() - 1; } + void DumpCurrentGraph(int level) const; + +public: + //! Add a level at the end of the hierarchy + void AddLevel(const RCP &level); + + //! Add a new level at the end of the hierarchy + void AddNewLevel(); + + //! Retrieve a certain level from hierarchy. + RCP &GetLevel(const int levelID = 0); + + int GetNumLevels() const; + int GetGlobalNumLevels() const; + + MagnitudeType GetRate() const { return rate_; } + + // This function is global + double GetOperatorComplexity() const; + + // This function is global + double GetSmootherComplexity() const; + + //! Helper function + void CheckLevel(Level &level, int levelID); + + void SetMatvecParams(RCP matvecParams); + + //! Multi-level setup phase: build a new level of the hierarchy. + /*! This method is aimed to be used in a loop building the hierarchy level by + level. See Hierarchy::Setup(manager, startLevel, numDesiredLevels) for an + example of usage. + * + * @param coarseLevelID ID of the level to be built. + * @param fineLevelManager defines how to build missing data of the + fineLevel (example: aggregates) + * @param coarseLevelManager defines how to build the level + * @param nextLevelManager defines how the next coarse level will be built. + This is used to post corresponding request before building the coarse level + to keep useful data. + + CoarseLevel is considered to be the last level if: + - input parameter isLastLevel == true + or + - Ac->getRowMap()->getGlobalNumElements() <= maxCoarseSize_ + Method return true if CoarseLevel is the last level. + + Pre-condition: + * FineLevel: + - must have kept useful data (TODO: not tested yet) + - must be Teuchos::null when Setup is called for finest level (Setup then + automatically calls Request for "Smoother" and "CoarseSolver") + * CoarseLevel: + - already allocated (using Hierarchy::AddLevel()) + - requests already posted + (exception: for finest level (=fineLevelManager==null) requests are + called within setup routine) + * NextLevel: + - do not need to be allocate but could (FIXME: will be deleted if + lastlevel...). + - should be null when Setup is called for last level + + Post-condition: + * FineLevel: + - temporary data have been used and released (this condition is not + tested) + * CoarseLevel: + - built, requests have been used + - if it is the last level (due to input parameter isLastLevel or + getGlobalNumElements() <= maxCoarseSize_), then the coarse solver factory of + the factory manager have been used instead of the smoother factory. + * NextLevel: + If input parameter isLastLevel == false: + - have been allocated + - requests already posted. + */ + bool + Setup(int coarseLevelID, + const RCP + fineLevelManager /* = Teuchos::null */, + const RCP coarseLevelManager, + const RCP nextLevelManager = Teuchos::null); - //! - void Setup(const FactoryManagerBase& manager = FactoryManager(), int startLevel = 0, int numDesiredLevels = GetDefaultMaxLevels()); + //! + void Setup(const FactoryManagerBase &manager = FactoryManager(), + int startLevel = 0, int numDesiredLevels = GetDefaultMaxLevels()); - void SetupRe(); + void SetupRe(); - //! Clear impermanent data from previous setup - void Clear(int startLevel = 0); - void ExpertClear(); + //! Clear impermanent data from previous setup + void Clear(int startLevel = 0); + void ExpertClear(); - //! Returns multigrid cycle type (supports VCYCLE and WCYCLE) - CycleType GetCycle() const { return Cycle_; } + //! Returns multigrid cycle type (supports VCYCLE and WCYCLE) + CycleType GetCycle() const { return Cycle_; } - //! Supports VCYCLE and WCYCLE types. - void SetCycle(CycleType Cycle) { Cycle_ = Cycle; } + //! Supports VCYCLE and WCYCLE types. + void SetCycle(CycleType Cycle) { Cycle_ = Cycle; } - void SetCycleStartLevel(int cycleStart) { WCycleStartLevel_ = cycleStart; } + void SetCycleStartLevel(int cycleStart) { WCycleStartLevel_ = cycleStart; } - //! Specify damping factor alpha such that x = x + alpha*P*c, where c is the coarse grid correction. - void SetProlongatorScalingFactor(double scalingFactor) { scalingFactor_ = scalingFactor; } + //! Specify damping factor alpha such that x = x + alpha*P*c, where c is the + //! coarse grid correction. + void SetProlongatorScalingFactor(double scalingFactor) { + scalingFactor_ = scalingFactor; + } - /*! - @brief Apply the multigrid preconditioner. + /*! + @brief Apply the multigrid preconditioner. + + In theory, more general cycle types than just V- and W-cycles are possible. + However, the enumerated type CycleType would have to be extended. + + @param B right-hand side of linear problem + @param X initial and final (approximate) solution of linear problem + @param ConvData struct which stores convergence criteria (maximum number of + multigrid iterations or stopping tolerance) + @param InitialGuessIsZero Indicates whether the initial guess is zero + @param startLevel index of starting level to build multigrid hierarchy + (default = 0) + */ + ConvergenceStatus Iterate(const MultiVector &B, MultiVector &X, + ConvData conv = ConvData(), + bool InitialGuessIsZero = false, LO startLevel = 0); - In theory, more general cycle types than just V- and W-cycles are possible. However, - the enumerated type CycleType would have to be extended. + /*! + @brief Print matrices in the multigrid hierarchy to file. - @param B right-hand side of linear problem - @param X initial and final (approximate) solution of linear problem - @param ConvData struct which stores convergence criteria (maximum number of multigrid iterations or stopping tolerance) - @param InitialGuessIsZero Indicates whether the initial guess is zero - @param startLevel index of starting level to build multigrid hierarchy (default = 0) - */ - ConvergenceStatus Iterate(const MultiVector& B, MultiVector& X, ConvData conv = ConvData(), - bool InitialGuessIsZero = false, LO startLevel = 0); + @param[in] start start level + @param[in] end end level - /*! - @brief Print matrices in the multigrid hierarchy to file. + Default behavior is to print system and transfer matrices from the entire + hierarchy. Files are named "A_0.m", "P_1.m", "R_1.m", etc, and are in matrix + market coordinate format. + */ + void Write(const LO &start = -1, const LO &end = -1, + const std::string &suffix = ""); - @param[in] start start level - @param[in] end end level + //@} - Default behavior is to print system and transfer matrices from the entire hierarchy. - Files are named "A_0.m", "P_1.m", "R_1.m", etc, and are in matrix market coordinate format. - */ - void Write(const LO &start=-1, const LO &end=-1, const std::string &suffix=""); + //! @name Permanent storage + //@{ - //@} + //! Call Level::Keep(ename, factory) for each level of the Hierarchy. + void Keep(const std::string &ename, + const FactoryBase *factory = NoFactory::get()); - //! @name Permanent storage - //@{ + //! Call Level::Delete(ename, factory) for each level of the Hierarchy. + void Delete(const std::string &ename, + const FactoryBase *factory = NoFactory::get()); - //! Call Level::Keep(ename, factory) for each level of the Hierarchy. - void Keep(const std::string & ename, const FactoryBase* factory = NoFactory::get()); - - //! Call Level::Delete(ename, factory) for each level of the Hierarchy. - void Delete(const std::string& ename, const FactoryBase* factory = NoFactory::get()); - - //! Call Level::AddKeepFlag for each level of the Hierarchy. - void AddKeepFlag(const std::string & ename, const FactoryBase* factory = NoFactory::get(), KeepType keep = MueLu::Keep); - - //! Call Level::RemoveKeepFlag for each level of the Hierarchy - void RemoveKeepFlag(const std::string & ename, const FactoryBase* factory, KeepType keep = MueLu::All); - - //@} - - //! @name Overridden from Teuchos::Describable - //@{ - - //! Return a simple one-line description of this object. - std::string description() const; + //! Call Level::AddKeepFlag for each level of the Hierarchy. + void AddKeepFlag(const std::string &ename, + const FactoryBase *factory = NoFactory::get(), + KeepType keep = MueLu::Keep); - /*! @brief Print the Hierarchy with some verbosity level to a FancyOStream object. + //! Call Level::RemoveKeepFlag for each level of the Hierarchy + void RemoveKeepFlag(const std::string &ename, const FactoryBase *factory, + KeepType keep = MueLu::All); - @param[in] out The Teuchos::FancyOstream. - @param[in] verbLevel Controls amount of output. - */ - void describe(Teuchos::FancyOStream& out, const VerbLevel verbLevel = Default) const; - void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_HIGH) const; + //@} - //! Hierarchy::print is local hierarchy function, thus the statistics can be different from global ones - void print(std::ostream& out = std::cout, const VerbLevel verbLevel = (MueLu::Parameters | MueLu::Statistics0)) const; + //! @name Overridden from Teuchos::Describable + //@{ - /*! Indicate whether the multigrid method is a preconditioner or a solver. + //! Return a simple one-line description of this object. + std::string description() const; - This is used in conjunction with the verbosity level to determine whether the residuals can be printed. - */ - void IsPreconditioner(const bool flag); + /*! @brief Print the Hierarchy with some verbosity level to a FancyOStream + object. - //@} + @param[in] out The Teuchos::FancyOstream. + @param[in] verbLevel Controls amount of output. + */ + void describe(Teuchos::FancyOStream &out, + const VerbLevel verbLevel = Default) const; + void + describe(Teuchos::FancyOStream &out, + const Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_HIGH) const; + + //! Hierarchy::print is local hierarchy function, thus the statistics can be + //! different from global ones + void print(std::ostream &out = std::cout, + const VerbLevel verbLevel = (MueLu::Parameters | + MueLu::Statistics0)) const; + + /*! Indicate whether the multigrid method is a preconditioner or a solver. + + This is used in conjunction with the verbosity level to determine whether + the residuals can be printed. + */ + void IsPreconditioner(const bool flag); - void EnableGraphDumping(const std::string& filename, int levelID = 1) { - isDumpingEnabled_ = true; - dumpLevel_ = levelID; - dumpFile_ = filename; - } + //@} - void setlib(Xpetra::UnderlyingLib inlib) { lib_ = inlib; } - Xpetra::UnderlyingLib lib() { return lib_; } + void EnableGraphDumping(const std::string &filename, int levelID = 1) { + isDumpingEnabled_ = true; + dumpLevel_ = levelID; + dumpFile_ = filename; + } - //! force recreation of cached description_ next time description() is called: - void ResetDescription() { - description_ = ""; - } + void setlib(Xpetra::UnderlyingLib inlib) { lib_ = inlib; } + Xpetra::UnderlyingLib lib() { return lib_; } - void AllocateLevelMultiVectors(int numvecs, bool forceMapCheck=false); - void DeleteLevelMultiVectors(); + //! force recreation of cached description_ next time description() is called: + void ResetDescription() { description_ = ""; } - protected: - const RCP& GetLevelManager(const int levelID) const { - return levelManagers_[levelID]; - } + void AllocateLevelMultiVectors(int numvecs, bool forceMapCheck = false); + void DeleteLevelMultiVectors(); - private: - //! Copy constructor is not implemented. - Hierarchy(const Hierarchy &h); +protected: + const RCP & + GetLevelManager(const int levelID) const { + return levelManagers_[levelID]; + } - //! Decide if the residual needs to be computed - bool IsCalculationOfResidualRequired(const LO startLevel, const ConvData& conv) const; +private: + //! Copy constructor is not implemented. + Hierarchy(const Hierarchy &h); - /*! - \brief Decide if the multigrid iteration is converged + //! Decide if the residual needs to be computed + bool IsCalculationOfResidualRequired(const LO startLevel, + const ConvData &conv) const; - We judge convergence by comparing the current \c residualNorm - to the user given \c convergenceTolerance and then return the - appropriate \c ConvergenceStatus - */ - ConvergenceStatus IsConverged(const Teuchos::Array& residualNorm, - const MagnitudeType convergenceTolerance) const; + /*! + \brief Decide if the multigrid iteration is converged - //! Print \c residualNorm for this \c iteration to the screen - void PrintResidualHistory(const LO iteration, - const Teuchos::Array& residualNorm) const; + We judge convergence by comparing the current \c residualNorm + to the user given \c convergenceTolerance and then return the + appropriate \c ConvergenceStatus + */ + ConvergenceStatus + IsConverged(const Teuchos::Array &residualNorm, + const MagnitudeType convergenceTolerance) const; - //! Compute the residual norm and print it depending on the verbosity level - ConvergenceStatus ComputeResidualAndPrintHistory(const Operator& A, const MultiVector& X, - const MultiVector& B, const LO iteration, - const LO startLevel, const ConvData& conv, MagnitudeType& previousResidualNorm); + //! Print \c residualNorm for this \c iteration to the screen + void + PrintResidualHistory(const LO iteration, + const Teuchos::Array &residualNorm) const; - //! Container for Level objects - Array > Levels_; + //! Compute the residual norm and print it depending on the verbosity level + ConvergenceStatus + ComputeResidualAndPrintHistory(const Operator &A, const MultiVector &X, + const MultiVector &B, const LO iteration, + const LO startLevel, const ConvData &conv, + MagnitudeType &previousResidualNorm); - //! We replace coordinates GIDs to make them consistent with matrix GIDs, - //! even if user does not do that. Ideally, though, we should completely - //! remove any notion of coordinate GIDs, and deal only with LIDs, assuming - //! that they are consistent with matrix block IDs - void ReplaceCoordinateMap(Level& level); + //! Container for Level objects + Array> Levels_; - //! Minimum size of a matrix on any level. If we fall below that, we stop - //! the coarsening - Xpetra::global_size_t maxCoarseSize_; + //! We replace coordinates GIDs to make them consistent with matrix GIDs, + //! even if user does not do that. Ideally, though, we should completely + //! remove any notion of coordinate GIDs, and deal only with LIDs, assuming + //! that they are consistent with matrix block IDs + void ReplaceCoordinateMap(Level &level); - //! Potential speed up of the setup by skipping R construction, and using - //! transpose matrix-matrix product for RAP - bool implicitTranspose_; + //! Minimum size of a matrix on any level. If we fall below that, we stop + //! the coarsening + Xpetra::global_size_t maxCoarseSize_; - //! Potential speed up of the solve by fusing prolongation and update steps. - //! This can lead to more iterations to round-off error accumulation. - bool fuseProlongationAndUpdate_; + //! Potential speed up of the setup by skipping R construction, and using + //! transpose matrix-matrix product for RAP + bool implicitTranspose_; - //! Potential speed up of the setup by skipping rebalancing of P and R, and - //! doing extra import during solve - bool doPRrebalance_; - bool doPRViaCopyrebalance_; // fully explicit, needed for CombinePFactory + //! Potential speed up of the solve by fusing prolongation and update steps. + //! This can lead to more iterations to round-off error accumulation. + bool fuseProlongationAndUpdate_; - //! Hierarchy may be used in a standalone mode, or as a preconditioner - bool isPreconditioner_; + //! Potential speed up of the setup by skipping rebalancing of P and R, and + //! doing extra import during solve + bool doPRrebalance_; + bool doPRViaCopyrebalance_; // fully explicit, needed for CombinePFactory - //! V- or W-cycle - CycleType Cycle_; + //! Hierarchy may be used in a standalone mode, or as a preconditioner + bool isPreconditioner_; - //! Level at which to start W-cycle - int WCycleStartLevel_; + //! V- or W-cycle + CycleType Cycle_; - //! Scaling factor to be applied to coarse grid correction. - double scalingFactor_; + //! Level at which to start W-cycle + int WCycleStartLevel_; - //! Epetra/Tpetra mode - Xpetra::UnderlyingLib lib_; + //! Scaling factor to be applied to coarse grid correction. + double scalingFactor_; - //! cache description to avoid recreating in each call to description() - use ResetDescription() to force recreation in Setup, SetupRe, etc. - mutable std::string description_ = ""; // mutable so that we can lazily initialize in description(), which is declared const + //! Epetra/Tpetra mode + Xpetra::UnderlyingLib lib_; - /*! - @brief Graph dumping + //! cache description to avoid recreating in each call to description() - use + //! ResetDescription() to force recreation in Setup, SetupRe, etc. + mutable std::string description_ = + ""; // mutable so that we can lazily initialize in description(), which is + // declared const - If enabled, we dump the graph on a specified level into a specified file - */ - bool isDumpingEnabled_; - // -1 = dump all levels, -2 = dump nothing - int dumpLevel_; - std::string dumpFile_; + /*! + @brief Graph dumping - //! Convergece rate - MagnitudeType rate_; + If enabled, we dump the graph on a specified level into a specified file + */ + bool isDumpingEnabled_; + // -1 = dump all levels, -2 = dump nothing + int dumpLevel_; + std::string dumpFile_; - //! Level managers used during the Setup - Array > levelManagers_; + //! Convergece rate + MagnitudeType rate_; - //! Caching (Multi)Vectors used in Hierarchy::Iterate() - int sizeOfAllocatedLevelMultiVectors_; - Array > residual_, coarseRhs_, coarseX_, coarseImport_, coarseExport_, correction_; + //! Level managers used during the Setup + Array> levelManagers_; + //! Caching (Multi)Vectors used in Hierarchy::Iterate() + int sizeOfAllocatedLevelMultiVectors_; + Array> residual_, coarseRhs_, coarseX_, coarseImport_, + coarseExport_, correction_; - }; //class Hierarchy +}; // class Hierarchy -} //namespace MueLu +} // namespace MueLu #define MUELU_HIERARCHY_SHORT #endif // MUELU_HIERARCHY_DECL_HPP diff --git a/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp b/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp index 13a40368f1c4..fbd659ee1ccd 100644 --- a/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp +++ b/packages/muelu/src/MueCentral/MueLu_Hierarchy_def.hpp @@ -52,913 +52,1061 @@ #include #include +#include #include #include #include -#include #include "MueLu_Hierarchy_decl.hpp" #include "MueLu_FactoryManager.hpp" #include "MueLu_HierarchyUtils.hpp" -#include "MueLu_TopRAPFactory.hpp" -#include "MueLu_TopSmootherFactory.hpp" #include "MueLu_Level.hpp" #include "MueLu_Monitor.hpp" -#include "MueLu_PerfUtils.hpp" #include "MueLu_PFactory.hpp" -#include "MueLu_SmootherFactory.hpp" +#include "MueLu_PerfUtils.hpp" #include "MueLu_SmootherBase.hpp" +#include "MueLu_SmootherFactory.hpp" +#include "MueLu_TopRAPFactory.hpp" +#include "MueLu_TopSmootherFactory.hpp" #include "Teuchos_TimeMonitor.hpp" - - namespace MueLu { - template - Hierarchy::Hierarchy() - : maxCoarseSize_(GetDefaultMaxCoarseSize()), implicitTranspose_(GetDefaultImplicitTranspose()), +template +Hierarchy::Hierarchy() + : maxCoarseSize_(GetDefaultMaxCoarseSize()), + implicitTranspose_(GetDefaultImplicitTranspose()), fuseProlongationAndUpdate_(GetDefaultFuseProlongationAndUpdate()), - doPRrebalance_(GetDefaultPRrebalance()), doPRViaCopyrebalance_(false), isPreconditioner_(true), Cycle_(GetDefaultCycle()), WCycleStartLevel_(0), - scalingFactor_(Teuchos::ScalarTraits::one()), lib_(Xpetra::UseTpetra), isDumpingEnabled_(false), dumpLevel_(-2), rate_(-1), - sizeOfAllocatedLevelMultiVectors_(0) - { - AddLevel(rcp(new Level)); - } + doPRrebalance_(GetDefaultPRrebalance()), doPRViaCopyrebalance_(false), + isPreconditioner_(true), Cycle_(GetDefaultCycle()), WCycleStartLevel_(0), + scalingFactor_(Teuchos::ScalarTraits::one()), + lib_(Xpetra::UseTpetra), isDumpingEnabled_(false), dumpLevel_(-2), + rate_(-1), sizeOfAllocatedLevelMultiVectors_(0) { + AddLevel(rcp(new Level)); +} - template - Hierarchy::Hierarchy(const std::string& label) - : Hierarchy() - { - setObjectLabel(label); - Levels_[0]->setObjectLabel(label); - } +template +Hierarchy::Hierarchy( + const std::string &label) + : Hierarchy() { + setObjectLabel(label); + Levels_[0]->setObjectLabel(label); +} - template - Hierarchy::Hierarchy(const RCP& A) - : maxCoarseSize_(GetDefaultMaxCoarseSize()), implicitTranspose_(GetDefaultImplicitTranspose()), +template +Hierarchy::Hierarchy( + const RCP &A) + : maxCoarseSize_(GetDefaultMaxCoarseSize()), + implicitTranspose_(GetDefaultImplicitTranspose()), fuseProlongationAndUpdate_(GetDefaultFuseProlongationAndUpdate()), - doPRrebalance_(GetDefaultPRrebalance()), doPRViaCopyrebalance_(false), isPreconditioner_(true), Cycle_(GetDefaultCycle()), WCycleStartLevel_(0), - scalingFactor_(Teuchos::ScalarTraits::one()), isDumpingEnabled_(false), dumpLevel_(-2), rate_(-1), - sizeOfAllocatedLevelMultiVectors_(0) - { - lib_ = A->getDomainMap()->lib(); - - RCP Finest = rcp(new Level); - AddLevel(Finest); - - Finest->Set("A", A); - } - - template - Hierarchy::Hierarchy(const RCP& A, const std::string& label) - : Hierarchy(A) - { - setObjectLabel(label); - Levels_[0]->setObjectLabel(label); - } + doPRrebalance_(GetDefaultPRrebalance()), doPRViaCopyrebalance_(false), + isPreconditioner_(true), Cycle_(GetDefaultCycle()), WCycleStartLevel_(0), + scalingFactor_(Teuchos::ScalarTraits::one()), + isDumpingEnabled_(false), dumpLevel_(-2), rate_(-1), + sizeOfAllocatedLevelMultiVectors_(0) { + lib_ = A->getDomainMap()->lib(); - template - void Hierarchy::AddLevel(const RCP& level) { - int levelID = LastLevelID() + 1; // ID of the inserted level + RCP Finest = rcp(new Level); + AddLevel(Finest); - if (level->GetLevelID() != -1 && (level->GetLevelID() != levelID)) - GetOStream(Warnings1) << "Hierarchy::AddLevel(): Level with ID=" << level->GetLevelID() << - " have been added at the end of the hierarchy\n but its ID have been redefined" << - " because last level ID of the hierarchy was " << LastLevelID() << "." << std::endl; - - Levels_.push_back(level); - level->SetLevelID(levelID); - level->setlib(lib_); - - level->SetPreviousLevel( (levelID == 0) ? Teuchos::null : Levels_[LastLevelID() - 1] ); - level->setObjectLabel(this->getObjectLabel()); - } + Finest->Set("A", A); +} - template - void Hierarchy::AddNewLevel() { - RCP newLevel = Levels_[LastLevelID()]->Build(); // new coarse level, using copy constructor - newLevel->setlib(lib_); - this->AddLevel(newLevel); // add to hierarchy - } +template +Hierarchy::Hierarchy( + const RCP &A, const std::string &label) + : Hierarchy(A) { + setObjectLabel(label); + Levels_[0]->setObjectLabel(label); +} - template - RCP & Hierarchy::GetLevel(const int levelID) { - TEUCHOS_TEST_FOR_EXCEPTION(levelID < 0 || levelID > LastLevelID(), Exceptions::RuntimeError, - "MueLu::Hierarchy::GetLevel(): invalid input parameter value: LevelID = " << levelID); - return Levels_[levelID]; - } +template +void Hierarchy::AddLevel( + const RCP &level) { + int levelID = LastLevelID() + 1; // ID of the inserted level + + if (level->GetLevelID() != -1 && (level->GetLevelID() != levelID)) + GetOStream(Warnings1) << "Hierarchy::AddLevel(): Level with ID=" + << level->GetLevelID() + << " have been added at the end of the hierarchy\n " + "but its ID have been redefined" + << " because last level ID of the hierarchy was " + << LastLevelID() << "." << std::endl; + + Levels_.push_back(level); + level->SetLevelID(levelID); + level->setlib(lib_); + + level->SetPreviousLevel((levelID == 0) ? Teuchos::null + : Levels_[LastLevelID() - 1]); + level->setObjectLabel(this->getObjectLabel()); +} - template - int Hierarchy::GetNumLevels() const { - return Levels_.size(); - } +template +void Hierarchy::AddNewLevel() { + RCP newLevel = + Levels_[LastLevelID()] + ->Build(); // new coarse level, using copy constructor + newLevel->setlib(lib_); + this->AddLevel(newLevel); // add to hierarchy +} - template - int Hierarchy::GetGlobalNumLevels() const { - RCP A = Levels_[0]->template Get >("A"); - RCP > comm = A->getDomainMap()->getComm(); +template +RCP &Hierarchy::GetLevel( + const int levelID) { + TEUCHOS_TEST_FOR_EXCEPTION( + levelID < 0 || levelID > LastLevelID(), Exceptions::RuntimeError, + "MueLu::Hierarchy::GetLevel(): invalid input parameter value: LevelID = " + << levelID); + return Levels_[levelID]; +} - int numLevels = GetNumLevels(); - int numGlobalLevels; - Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numLevels, Teuchos::ptr(&numGlobalLevels)); +template +int Hierarchy::GetNumLevels() const { + return Levels_.size(); +} - return numGlobalLevels; - } +template +int Hierarchy::GetGlobalNumLevels() + const { + RCP A = Levels_[0]->template Get>("A"); + RCP> comm = A->getDomainMap()->getComm(); - template - double Hierarchy::GetOperatorComplexity() const { - double totalNnz = 0, lev0Nnz = 1; - for (int i = 0; i < GetNumLevels(); ++i) { - TEUCHOS_TEST_FOR_EXCEPTION(!(Levels_[i]->IsAvailable("A")) , Exceptions::RuntimeError, - "Operator complexity cannot be calculated because A is unavailable on level " << i); - RCP A = Levels_[i]->template Get >("A"); - if (A.is_null()) - break; + int numLevels = GetNumLevels(); + int numGlobalLevels; + Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numLevels, + Teuchos::ptr(&numGlobalLevels)); - RCP Am = rcp_dynamic_cast(A); - if (Am.is_null()) { - GetOStream(Warnings0) << "Some level operators are not matrices, operator complexity calculation aborted" << std::endl; - return 0.0; - } + return numGlobalLevels; +} - totalNnz += as(Am->getGlobalNumEntries()); - if (i == 0) - lev0Nnz = totalNnz; - } - return totalNnz / lev0Nnz; - } +template +double +Hierarchy::GetOperatorComplexity() + const { + double totalNnz = 0, lev0Nnz = 1; + for (int i = 0; i < GetNumLevels(); ++i) { + TEUCHOS_TEST_FOR_EXCEPTION(!(Levels_[i]->IsAvailable("A")), + Exceptions::RuntimeError, + "Operator complexity cannot be calculated " + "because A is unavailable on level " + << i); + RCP A = Levels_[i]->template Get>("A"); + if (A.is_null()) + break; - template - double Hierarchy::GetSmootherComplexity() const { - double node_sc = 0, global_sc=0; - double a0_nnz =0; - const size_t INVALID = Teuchos::OrdinalTraits::invalid(); - // Get cost of fine matvec - if (GetNumLevels() <= 0) return -1.0; - if (!Levels_[0]->IsAvailable("A")) return -1.0; - - RCP A = Levels_[0]->template Get >("A"); - if (A.is_null()) return -1.0; RCP Am = rcp_dynamic_cast(A); - if(Am.is_null()) return -1.0; - a0_nnz = as(Am->getGlobalNumEntries()); - - // Get smoother complexity at each level - for (int i = 0; i < GetNumLevels(); ++i) { - size_t level_sc=0; - if(!Levels_[i]->IsAvailable("PreSmoother")) continue; - RCP S = Levels_[i]->template Get >("PreSmoother"); - if (S.is_null()) continue; - level_sc = S->getNodeSmootherComplexity(); - if(level_sc == INVALID) {global_sc=-1.0;break;} - - node_sc += as(level_sc); + if (Am.is_null()) { + GetOStream(Warnings0) << "Some level operators are not matrices, " + "operator complexity calculation aborted" + << std::endl; + return 0.0; } - double min_sc=0.0; - RCP > comm =A->getDomainMap()->getComm(); - Teuchos::reduceAll(*comm,Teuchos::REDUCE_SUM,node_sc,Teuchos::ptr(&global_sc)); - Teuchos::reduceAll(*comm,Teuchos::REDUCE_MIN,node_sc,Teuchos::ptr(&min_sc)); - - if(min_sc < 0.0) return -1.0; - else return global_sc / a0_nnz; + totalNnz += as(Am->getGlobalNumEntries()); + if (i == 0) + lev0Nnz = totalNnz; } + return totalNnz / lev0Nnz; +} +template +double +Hierarchy::GetSmootherComplexity() + const { + double node_sc = 0, global_sc = 0; + double a0_nnz = 0; + const size_t INVALID = Teuchos::OrdinalTraits::invalid(); + // Get cost of fine matvec + if (GetNumLevels() <= 0) + return -1.0; + if (!Levels_[0]->IsAvailable("A")) + return -1.0; + + RCP A = Levels_[0]->template Get>("A"); + if (A.is_null()) + return -1.0; + RCP Am = rcp_dynamic_cast(A); + if (Am.is_null()) + return -1.0; + a0_nnz = as(Am->getGlobalNumEntries()); + + // Get smoother complexity at each level + for (int i = 0; i < GetNumLevels(); ++i) { + size_t level_sc = 0; + if (!Levels_[i]->IsAvailable("PreSmoother")) + continue; + RCP S = + Levels_[i]->template Get>("PreSmoother"); + if (S.is_null()) + continue; + level_sc = S->getNodeSmootherComplexity(); + if (level_sc == INVALID) { + global_sc = -1.0; + break; + } + node_sc += as(level_sc); + } + double min_sc = 0.0; + RCP> comm = A->getDomainMap()->getComm(); + Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, node_sc, + Teuchos::ptr(&global_sc)); + Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, node_sc, + Teuchos::ptr(&min_sc)); + + if (min_sc < 0.0) + return -1.0; + else + return global_sc / a0_nnz; +} - // Coherence checks todo in Setup() (using an helper function): - template - void Hierarchy::CheckLevel(Level& level, int levelID) { - TEUCHOS_TEST_FOR_EXCEPTION(level.lib() != lib_, Exceptions::RuntimeError, - "MueLu::Hierarchy::CheckLevel(): wrong underlying linear algebra library."); - TEUCHOS_TEST_FOR_EXCEPTION(level.GetLevelID() != levelID, Exceptions::RuntimeError, - "MueLu::Hierarchy::CheckLevel(): wrong level ID"); - TEUCHOS_TEST_FOR_EXCEPTION(levelID != 0 && level.GetPreviousLevel() != Levels_[levelID-1], Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): wrong level parent"); - } +// Coherence checks todo in Setup() (using an helper function): +template +void Hierarchy::CheckLevel( + Level &level, int levelID) { + TEUCHOS_TEST_FOR_EXCEPTION(level.lib() != lib_, Exceptions::RuntimeError, + "MueLu::Hierarchy::CheckLevel(): wrong underlying " + "linear algebra library."); + TEUCHOS_TEST_FOR_EXCEPTION(level.GetLevelID() != levelID, + Exceptions::RuntimeError, + "MueLu::Hierarchy::CheckLevel(): wrong level ID"); + TEUCHOS_TEST_FOR_EXCEPTION(levelID != 0 && level.GetPreviousLevel() != + Levels_[levelID - 1], + Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): wrong level parent"); +} - template - void Hierarchy::SetMatvecParams(RCP matvecParams) { - for (int i = 0; i < GetNumLevels(); ++i) { - RCP level = Levels_[i]; - if (level->IsAvailable("A")) { - RCP Aop = level->Get >("A"); - RCP A = rcp_dynamic_cast(Aop); - if (!A.is_null()) { - RCP xpImporter = A->getCrsGraph()->getImporter(); - if (!xpImporter.is_null()) - xpImporter->setDistributorParameters(matvecParams); - RCP xpExporter = A->getCrsGraph()->getExporter(); - if (!xpExporter.is_null()) - xpExporter->setDistributorParameters(matvecParams); - } - } - if (level->IsAvailable("P")) { - RCP P = level->Get >("P"); - RCP xpImporter = P->getCrsGraph()->getImporter(); - if (!xpImporter.is_null()) - xpImporter->setDistributorParameters(matvecParams); - RCP xpExporter = P->getCrsGraph()->getExporter(); - if (!xpExporter.is_null()) - xpExporter->setDistributorParameters(matvecParams); - } - if (level->IsAvailable("R")) { - RCP R = level->Get >("R"); - RCP xpImporter = R->getCrsGraph()->getImporter(); +template +void Hierarchy::SetMatvecParams( + RCP matvecParams) { + for (int i = 0; i < GetNumLevels(); ++i) { + RCP level = Levels_[i]; + if (level->IsAvailable("A")) { + RCP Aop = level->Get>("A"); + RCP A = rcp_dynamic_cast(Aop); + if (!A.is_null()) { + RCP xpImporter = A->getCrsGraph()->getImporter(); if (!xpImporter.is_null()) xpImporter->setDistributorParameters(matvecParams); - RCP xpExporter = R->getCrsGraph()->getExporter(); + RCP xpExporter = A->getCrsGraph()->getExporter(); if (!xpExporter.is_null()) xpExporter->setDistributorParameters(matvecParams); } - if (level->IsAvailable("Importer")) { - RCP xpImporter = level->Get< RCP >("Importer"); - if (!xpImporter.is_null()) - xpImporter->setDistributorParameters(matvecParams); - } + } + if (level->IsAvailable("P")) { + RCP P = level->Get>("P"); + RCP xpImporter = P->getCrsGraph()->getImporter(); + if (!xpImporter.is_null()) + xpImporter->setDistributorParameters(matvecParams); + RCP xpExporter = P->getCrsGraph()->getExporter(); + if (!xpExporter.is_null()) + xpExporter->setDistributorParameters(matvecParams); + } + if (level->IsAvailable("R")) { + RCP R = level->Get>("R"); + RCP xpImporter = R->getCrsGraph()->getImporter(); + if (!xpImporter.is_null()) + xpImporter->setDistributorParameters(matvecParams); + RCP xpExporter = R->getCrsGraph()->getExporter(); + if (!xpExporter.is_null()) + xpExporter->setDistributorParameters(matvecParams); + } + if (level->IsAvailable("Importer")) { + RCP xpImporter = level->Get>("Importer"); + if (!xpImporter.is_null()) + xpImporter->setDistributorParameters(matvecParams); } } +} - // The function uses three managers: fine, coarse and next coarse - // We construct the data for the coarse level, and do requests for the next coarse - template - bool Hierarchy::Setup(int coarseLevelID, - const RCP fineLevelManager, - const RCP coarseLevelManager, - const RCP nextLevelManager) { - // Use PrintMonitor/TimerMonitor instead of just a FactoryMonitor to print "Level 0" instead of Hierarchy(0) - // Print is done after the requests for next coarse level - - TEUCHOS_TEST_FOR_EXCEPTION(LastLevelID() < coarseLevelID, Exceptions::RuntimeError, - "MueLu::Hierarchy:Setup(): level " << coarseLevelID << " (specified by coarseLevelID argument) " - "must be built before calling this function."); - - Level& level = *Levels_[coarseLevelID]; - - std::string label = FormattingHelper::getColonLabel(level.getObjectLabel()); - TimeMonitor m1(*this, label + this->ShortClassName() + ": " + "Setup (total)"); - TimeMonitor m2(*this, label + this->ShortClassName() + ": " + "Setup" + " (total, level=" + Teuchos::toString(coarseLevelID) + ")"); - - // TODO: pass coarseLevelManager by reference - TEUCHOS_TEST_FOR_EXCEPTION(coarseLevelManager == Teuchos::null, Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): argument coarseLevelManager cannot be null"); - - typedef MueLu::TopRAPFactory TopRAPFactory; - typedef MueLu::TopSmootherFactory TopSmootherFactory; - - if (levelManagers_.size() < coarseLevelID+1) - levelManagers_.resize(coarseLevelID+1); - levelManagers_[coarseLevelID] = coarseLevelManager; - - bool isFinestLevel = (fineLevelManager.is_null()); - bool isLastLevel = (nextLevelManager.is_null()); - - int oldRank = -1; - if (isFinestLevel) { - RCP A = level.Get< RCP >("A"); - RCP domainMap = A->getDomainMap(); - RCP > comm = domainMap->getComm(); +// The function uses three managers: fine, coarse and next coarse +// We construct the data for the coarse level, and do requests for the next +// coarse +template +bool Hierarchy::Setup( + int coarseLevelID, const RCP fineLevelManager, + const RCP coarseLevelManager, + const RCP nextLevelManager) { + // Use PrintMonitor/TimerMonitor instead of just a FactoryMonitor to print + // "Level 0" instead of Hierarchy(0) Print is done after the requests for next + // coarse level + + TEUCHOS_TEST_FOR_EXCEPTION( + LastLevelID() < coarseLevelID, Exceptions::RuntimeError, + "MueLu::Hierarchy:Setup(): level " + << coarseLevelID + << " (specified by coarseLevelID argument) " + "must be built before calling this function."); + + Level &level = *Levels_[coarseLevelID]; + + std::string label = FormattingHelper::getColonLabel(level.getObjectLabel()); + TimeMonitor m1(*this, + label + this->ShortClassName() + ": " + "Setup (total)"); + TimeMonitor m2( + *this, label + this->ShortClassName() + ": " + "Setup" + + " (total, level=" + Teuchos::toString(coarseLevelID) + ")"); + + // TODO: pass coarseLevelManager by reference + TEUCHOS_TEST_FOR_EXCEPTION( + coarseLevelManager == Teuchos::null, Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): argument coarseLevelManager cannot be null"); + + typedef MueLu::TopRAPFactory + TopRAPFactory; + typedef MueLu::TopSmootherFactory + TopSmootherFactory; + + if (levelManagers_.size() < coarseLevelID + 1) + levelManagers_.resize(coarseLevelID + 1); + levelManagers_[coarseLevelID] = coarseLevelManager; + + bool isFinestLevel = (fineLevelManager.is_null()); + bool isLastLevel = (nextLevelManager.is_null()); + + int oldRank = -1; + if (isFinestLevel) { + RCP A = level.Get>("A"); + RCP domainMap = A->getDomainMap(); + RCP> comm = domainMap->getComm(); + + // Initialize random seed for reproducibility + Utilities::SetRandomSeed(*comm); + + // Record the communicator on the level (used for timers sync) + level.SetComm(comm); + oldRank = SetProcRankVerbose(comm->getRank()); + + // Set the Hierarchy library to match that of the finest level matrix, + // even if it was already set + lib_ = domainMap->lib(); + level.setlib(lib_); + + } else { + // Permeate library to a coarser level + level.setlib(lib_); + + Level &prevLevel = *Levels_[coarseLevelID - 1]; + oldRank = SetProcRankVerbose(prevLevel.GetComm()->getRank()); + } - // Initialize random seed for reproducibility - Utilities::SetRandomSeed(*comm); + CheckLevel(level, coarseLevelID); + + // Attach FactoryManager to the fine level + RCP SFMFine; + if (!isFinestLevel) + SFMFine = rcp( + new SetFactoryManager(Levels_[coarseLevelID - 1], fineLevelManager)); + + if (isFinestLevel && Levels_[coarseLevelID]->IsAvailable("Coordinates")) + ReplaceCoordinateMap(*Levels_[coarseLevelID]); + + // Attach FactoryManager to the coarse level + SetFactoryManager SFMCoarse(Levels_[coarseLevelID], coarseLevelManager); + + if (isDumpingEnabled_ && (dumpLevel_ == 0 || dumpLevel_ == -1) && + coarseLevelID == 1) + DumpCurrentGraph(0); + + RCP coarseFact; + RCP smootherFact = + rcp(new TopSmootherFactory(coarseLevelManager, "Smoother")); + + int nextLevelID = coarseLevelID + 1; + + RCP SFMNext; + if (isLastLevel == false) { + // We are not at the coarsest level, so there is going to be another level + // ("next coarse") after this one ("coarse") + if (nextLevelID > LastLevelID()) + AddNewLevel(); + CheckLevel(*Levels_[nextLevelID], nextLevelID); + + // Attach FactoryManager to the next level (level after coarse) + SFMNext = + rcp(new SetFactoryManager(Levels_[nextLevelID], nextLevelManager)); + Levels_[nextLevelID]->Request( + TopRAPFactory(coarseLevelManager, nextLevelManager)); + + // Do smoother requests here. We don't know whether this is going to be + // the coarsest level or not, but we need to DeclareInput before we call + // coarseRAPFactory.Build(), otherwise some stuff may be erased after + // level releases + level.Request(*smootherFact); + + } else { + // Similar to smoother above, do the coarse solver request here. We don't + // know whether this is going to be the coarsest level or not, but we + // need to DeclareInput before we call coarseRAPFactory.Build(), + // otherwise some stuff may be erased after level releases. This is + // actually evident on ProjectorSmoother. It requires both "A" and + // "Nullspace". However, "Nullspace" is erased after all releases, so if + // we call the coarse factory request after RAP build we would not have + // any data, and cannot get it as we don't have previous managers. The + // typical trace looks like this: + // + // MueLu::Level(0)::GetFactory(Aggregates, 0): No FactoryManager + // during request for data " Aggregates" on level 0 by factory + // TentativePFactory during request for data " P" on level 1 + // by factory EminPFactory during request for data " P" on + // level 1 by factory TransPFactory during request for data " R" on level + // 1 by factory RAPFactory during request for data " A" on + // level 1 by factory TentativePFactory during request for data " + // Nullspace" on level 2 by factory NullspaceFactory during request for + // data " Nullspace" on level 2 by factory NullspacePresmoothFactory + // during request for data " Nullspace" on level 2 by factory + // ProjectorSmoother during request for data " PreSmoother" on level 2 + // by factory NoFactory + if (coarseFact.is_null()) + coarseFact = + rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); + level.Request(*coarseFact); + } - // Record the communicator on the level (used for timers sync) - level.SetComm(comm); - oldRank = SetProcRankVerbose(comm->getRank()); + GetOStream(Runtime0) << std::endl; + PrintMonitor m0(*this, "Level " + Teuchos::toString(coarseLevelID), + static_cast(Runtime0 | Test)); - // Set the Hierarchy library to match that of the finest level matrix, - // even if it was already set - lib_ = domainMap->lib(); - level.setlib(lib_); + // Build coarse level hierarchy + RCP Ac = Teuchos::null; + TopRAPFactory coarseRAPFactory(fineLevelManager, coarseLevelManager); - } else { - // Permeate library to a coarser level - level.setlib(lib_); + if (level.IsAvailable("A")) { + Ac = level.Get>("A"); + } else if (!isFinestLevel) { + // We only build here, the release is done later + coarseRAPFactory.Build(*level.GetPreviousLevel(), level); + } - Level& prevLevel = *Levels_[coarseLevelID-1]; - oldRank = SetProcRankVerbose(prevLevel.GetComm()->getRank()); + bool setLastLevelviaMaxCoarseSize = false; + if (level.IsAvailable("A")) + Ac = level.Get>("A"); + RCP Acm = rcp_dynamic_cast(Ac); + + // Record the communicator on the level + if (!Ac.is_null()) + level.SetComm(Ac->getDomainMap()->getComm()); + + // Test if we reach the end of the hierarchy + bool isOrigLastLevel = isLastLevel; + if (isLastLevel) { + // Last level as we have achieved the max limit + isLastLevel = true; + + } else if (Ac.is_null()) { + // Last level for this processor, as it does not belong to the next + // subcommunicator. Other processors may continue working on the + // hierarchy + isLastLevel = true; + + } else { + if (!Acm.is_null() && Acm->getGlobalNumRows() <= maxCoarseSize_) { + // Last level as the size of the coarse matrix became too small + GetOStream(Runtime0) << "Max coarse size (<= " << maxCoarseSize_ + << ") achieved" << std::endl; + isLastLevel = true; + if (Acm->getGlobalNumRows() != 0) + setLastLevelviaMaxCoarseSize = true; } + } - CheckLevel(level, coarseLevelID); - - // Attach FactoryManager to the fine level - RCP SFMFine; - if (!isFinestLevel) - SFMFine = rcp(new SetFactoryManager(Levels_[coarseLevelID-1], fineLevelManager)); - - if (isFinestLevel && Levels_[coarseLevelID]->IsAvailable("Coordinates")) - ReplaceCoordinateMap(*Levels_[coarseLevelID]); - - // Attach FactoryManager to the coarse level - SetFactoryManager SFMCoarse(Levels_[coarseLevelID], coarseLevelManager); - - if (isDumpingEnabled_ && (dumpLevel_ == 0 || dumpLevel_ == -1) && coarseLevelID == 1) - DumpCurrentGraph(0); - - RCP coarseFact; - RCP smootherFact = rcp(new TopSmootherFactory(coarseLevelManager, "Smoother")); - - int nextLevelID = coarseLevelID + 1; - - RCP SFMNext; - if (isLastLevel == false) { - // We are not at the coarsest level, so there is going to be another level ("next coarse") after this one ("coarse") - if (nextLevelID > LastLevelID()) - AddNewLevel(); - CheckLevel(*Levels_[nextLevelID], nextLevelID); - - // Attach FactoryManager to the next level (level after coarse) - SFMNext = rcp(new SetFactoryManager(Levels_[nextLevelID], nextLevelManager)); - Levels_[nextLevelID]->Request(TopRAPFactory(coarseLevelManager, nextLevelManager)); + if (!Ac.is_null() && !isFinestLevel) { + RCP A = + Levels_[coarseLevelID - 1]->template Get>("A"); + RCP Am = rcp_dynamic_cast(A); - // Do smoother requests here. We don't know whether this is going to be - // the coarsest level or not, but we need to DeclareInput before we call - // coarseRAPFactory.Build(), otherwise some stuff may be erased after - // level releases - level.Request(*smootherFact); + const double maxCoarse2FineRatio = 0.8; + if (!Acm.is_null() && !Am.is_null() && + Acm->getGlobalNumRows() > + maxCoarse2FineRatio * Am->getGlobalNumRows()) { + // We could abort here, but for now we simply notify user. + // Couple of additional points: + // - if repartitioning is delayed until level K, but the aggregation + // procedure stagnates between levels K-1 and K. In this case, + // repartitioning could enable faster coarsening once again, but the + // hierarchy construction will abort due to the stagnation check. + // - if the matrix is small enough, we could move it to one processor. + GetOStream(Warnings0) + << "Aggregation stagnated. Please check your matrix and/or adjust " + "your configuration file." + << "Possible fixes:\n" + << " - reduce the maximum number of levels\n" + << " - enable repartitioning\n" + << " - increase the minimum coarse size." << std::endl; + } + } - } else { - // Similar to smoother above, do the coarse solver request here. We don't - // know whether this is going to be the coarsest level or not, but we - // need to DeclareInput before we call coarseRAPFactory.Build(), - // otherwise some stuff may be erased after level releases. This is - // actually evident on ProjectorSmoother. It requires both "A" and - // "Nullspace". However, "Nullspace" is erased after all releases, so if - // we call the coarse factory request after RAP build we would not have - // any data, and cannot get it as we don't have previous managers. The - // typical trace looks like this: - // - // MueLu::Level(0)::GetFactory(Aggregates, 0): No FactoryManager - // during request for data " Aggregates" on level 0 by factory TentativePFactory - // during request for data " P" on level 1 by factory EminPFactory - // during request for data " P" on level 1 by factory TransPFactory - // during request for data " R" on level 1 by factory RAPFactory - // during request for data " A" on level 1 by factory TentativePFactory - // during request for data " Nullspace" on level 2 by factory NullspaceFactory - // during request for data " Nullspace" on level 2 by factory NullspacePresmoothFactory - // during request for data " Nullspace" on level 2 by factory ProjectorSmoother - // during request for data " PreSmoother" on level 2 by factory NoFactory + if (isLastLevel) { + if (!isOrigLastLevel) { + // We did not expect to finish this early so we did request a smoother. + // We need a coarse solver instead. Do the magic. + level.Release(*smootherFact); if (coarseFact.is_null()) - coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); + coarseFact = + rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); level.Request(*coarseFact); } - GetOStream(Runtime0) << std::endl; - PrintMonitor m0(*this, "Level " + Teuchos::toString(coarseLevelID), static_cast(Runtime0 | Test)); - - // Build coarse level hierarchy - RCP Ac = Teuchos::null; - TopRAPFactory coarseRAPFactory(fineLevelManager, coarseLevelManager); - - if (level.IsAvailable("A")) { - Ac = level.Get >("A"); - } else if (!isFinestLevel) { - // We only build here, the release is done later - coarseRAPFactory.Build(*level.GetPreviousLevel(), level); - } - - bool setLastLevelviaMaxCoarseSize = false; - if (level.IsAvailable("A")) - Ac = level.Get >("A"); - RCP Acm = rcp_dynamic_cast(Ac); - - // Record the communicator on the level + // Do the actual build, if we have any data. + // NOTE: this is not a great check, we may want to call Build() regardless. if (!Ac.is_null()) - level.SetComm(Ac->getDomainMap()->getComm()); + coarseFact->Build(level); - // Test if we reach the end of the hierarchy - bool isOrigLastLevel = isLastLevel; - if (isLastLevel) { - // Last level as we have achieved the max limit - isLastLevel = true; - - } else if (Ac.is_null()) { - // Last level for this processor, as it does not belong to the next - // subcommunicator. Other processors may continue working on the - // hierarchy - isLastLevel = true; - - } else { - if (!Acm.is_null() && Acm->getGlobalNumRows() <= maxCoarseSize_) { - // Last level as the size of the coarse matrix became too small - GetOStream(Runtime0) << "Max coarse size (<= " << maxCoarseSize_ << ") achieved" << std::endl; - isLastLevel = true; - if (Acm->getGlobalNumRows() != 0) setLastLevelviaMaxCoarseSize = true; - } - } + // Once the dirty deed is done, release stuff. The smoother has already + // been released. + level.Release(*coarseFact); - if (!Ac.is_null() && !isFinestLevel) { - RCP A = Levels_[coarseLevelID-1]->template Get< RCP >("A"); - RCP Am = rcp_dynamic_cast(A); - - const double maxCoarse2FineRatio = 0.8; - if (!Acm.is_null() && !Am.is_null() && Acm->getGlobalNumRows() > maxCoarse2FineRatio * Am->getGlobalNumRows()) { - // We could abort here, but for now we simply notify user. - // Couple of additional points: - // - if repartitioning is delayed until level K, but the aggregation - // procedure stagnates between levels K-1 and K. In this case, - // repartitioning could enable faster coarsening once again, but the - // hierarchy construction will abort due to the stagnation check. - // - if the matrix is small enough, we could move it to one processor. - GetOStream(Warnings0) << "Aggregation stagnated. Please check your matrix and/or adjust your configuration file." - << "Possible fixes:\n" - << " - reduce the maximum number of levels\n" - << " - enable repartitioning\n" - << " - increase the minimum coarse size." << std::endl; + } else { + // isLastLevel = false => isOrigLastLevel = false, meaning that we have + // requested the smoother. Now we need to build it and to release it. + // We don't need to worry about the coarse solver, as we didn't request it. + if (!Ac.is_null()) + smootherFact->Build(level); - } - } + level.Release(*smootherFact); + } - if (isLastLevel) { - if (!isOrigLastLevel) { - // We did not expect to finish this early so we did request a smoother. - // We need a coarse solver instead. Do the magic. - level.Release(*smootherFact); - if (coarseFact.is_null()) - coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); - level.Request(*coarseFact); + if (isLastLevel == true) { + int actualNumLevels = nextLevelID; + if (isOrigLastLevel == false) { + // Earlier in the function, we constructed the next coarse level, and + // requested data for the that level, assuming that we are not at the + // coarsest level. Now, we changed our mind, so we have to release those. + Levels_[nextLevelID]->Release( + TopRAPFactory(coarseLevelManager, nextLevelManager)); + + // We truncate/resize the hierarchy and possibly remove the last created + // level if there is something wrong with it as indicated by its P not + // being valid. This might happen if the global number of aggregates turns + // out to be zero + + if (!setLastLevelviaMaxCoarseSize) { + if (Levels_[nextLevelID - 1]->IsAvailable("P")) { + if (Levels_[nextLevelID - 1]->template Get>("P") == + Teuchos::null) + actualNumLevels = nextLevelID - 1; + } else + actualNumLevels = nextLevelID - 1; } - - // Do the actual build, if we have any data. - // NOTE: this is not a great check, we may want to call Build() regardless. - if (!Ac.is_null()) - coarseFact->Build(level); - - // Once the dirty deed is done, release stuff. The smoother has already - // been released. - level.Release(*coarseFact); - - } else { - // isLastLevel = false => isOrigLastLevel = false, meaning that we have - // requested the smoother. Now we need to build it and to release it. - // We don't need to worry about the coarse solver, as we didn't request it. - if (!Ac.is_null()) - smootherFact->Build(level); - - level.Release(*smootherFact); } - - if (isLastLevel == true) { - int actualNumLevels = nextLevelID; - if (isOrigLastLevel == false) { - // Earlier in the function, we constructed the next coarse level, and requested data for the that level, - // assuming that we are not at the coarsest level. Now, we changed our mind, so we have to release those. - Levels_[nextLevelID]->Release(TopRAPFactory(coarseLevelManager, nextLevelManager)); - - // We truncate/resize the hierarchy and possibly remove the last created level if there is - // something wrong with it as indicated by its P not being valid. This might happen - // if the global number of aggregates turns out to be zero - - - if (!setLastLevelviaMaxCoarseSize) { - if (Levels_[nextLevelID-1]->IsAvailable("P")) { - if (Levels_[nextLevelID-1]->template Get >("P") == Teuchos::null) actualNumLevels = nextLevelID-1; - } - else actualNumLevels = nextLevelID-1; - } - } - if (actualNumLevels == nextLevelID-1) { - // Didn't expect to finish early so we requested smoother but need coarse solver instead. - Levels_[nextLevelID-2]->Release(*smootherFact); - - if (Levels_[nextLevelID-2]->IsAvailable("PreSmoother") ) Levels_[nextLevelID-2]->RemoveKeepFlag("PreSmoother" ,NoFactory::get()); - if (Levels_[nextLevelID-2]->IsAvailable("PostSmoother")) Levels_[nextLevelID-2]->RemoveKeepFlag("PostSmoother",NoFactory::get()); - if (coarseFact.is_null()) - coarseFact = rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); - Levels_[nextLevelID-2]->Request(*coarseFact); - if ( !(Levels_[nextLevelID-2]->template Get >("A").is_null() )) - coarseFact->Build( *(Levels_[nextLevelID-2])); - Levels_[nextLevelID-2]->Release(*coarseFact); - } - Levels_.resize(actualNumLevels); + if (actualNumLevels == nextLevelID - 1) { + // Didn't expect to finish early so we requested smoother but need coarse + // solver instead. + Levels_[nextLevelID - 2]->Release(*smootherFact); + + if (Levels_[nextLevelID - 2]->IsAvailable("PreSmoother")) + Levels_[nextLevelID - 2]->RemoveKeepFlag("PreSmoother", + NoFactory::get()); + if (Levels_[nextLevelID - 2]->IsAvailable("PostSmoother")) + Levels_[nextLevelID - 2]->RemoveKeepFlag("PostSmoother", + NoFactory::get()); + if (coarseFact.is_null()) + coarseFact = + rcp(new TopSmootherFactory(coarseLevelManager, "CoarseSolver")); + Levels_[nextLevelID - 2]->Request(*coarseFact); + if (!(Levels_[nextLevelID - 2]->template Get>("A").is_null())) + coarseFact->Build(*(Levels_[nextLevelID - 2])); + Levels_[nextLevelID - 2]->Release(*coarseFact); } + Levels_.resize(actualNumLevels); + } - // I think this is the proper place for graph so that it shows every dependence - if (isDumpingEnabled_ && ( (dumpLevel_ > 0 && coarseLevelID == dumpLevel_) || dumpLevel_ == -1 ) ) - DumpCurrentGraph(coarseLevelID); - - if (!isFinestLevel) { - // Release the hierarchy data - // We release so late to help blocked solvers, as the smoothers for them need A blocks - // which we construct in RAPFactory - level.Release(coarseRAPFactory); - } + // I think this is the proper place for graph so that it shows every + // dependence + if (isDumpingEnabled_ && + ((dumpLevel_ > 0 && coarseLevelID == dumpLevel_) || dumpLevel_ == -1)) + DumpCurrentGraph(coarseLevelID); + + if (!isFinestLevel) { + // Release the hierarchy data + // We release so late to help blocked solvers, as the smoothers for them + // need A blocks which we construct in RAPFactory + level.Release(coarseRAPFactory); + } - if (oldRank != -1) - SetProcRankVerbose(oldRank); + if (oldRank != -1) + SetProcRankVerbose(oldRank); - return isLastLevel; - } + return isLastLevel; +} - template - void Hierarchy::SetupRe() { - int numLevels = Levels_.size(); - TEUCHOS_TEST_FOR_EXCEPTION(levelManagers_.size() != numLevels, Exceptions::RuntimeError, - "Hierarchy::SetupRe: " << Levels_.size() << " levels, but " << levelManagers_.size() << " level factory managers"); +template +void Hierarchy::SetupRe() { + int numLevels = Levels_.size(); + TEUCHOS_TEST_FOR_EXCEPTION( + levelManagers_.size() != numLevels, Exceptions::RuntimeError, + "Hierarchy::SetupRe: " << Levels_.size() << " levels, but " + << levelManagers_.size() + << " level factory managers"); - const int startLevel = 0; - Clear(startLevel); + const int startLevel = 0; + Clear(startLevel); #ifdef HAVE_MUELU_DEBUG - // Reset factories' data used for debugging - for (int i = 0; i < numLevels; i++) - levelManagers_[i]->ResetDebugData(); + // Reset factories' data used for debugging + for (int i = 0; i < numLevels; i++) + levelManagers_[i]->ResetDebugData(); #endif - int levelID; - for (levelID = startLevel; levelID < numLevels;) { - bool r = Setup(levelID, - (levelID != 0 ? levelManagers_[levelID-1] : Teuchos::null), - levelManagers_[levelID], - (levelID+1 != numLevels ? levelManagers_[levelID+1] : Teuchos::null)); - levelID++; - if (r) break; - } - // We may construct fewer levels for some reason, make sure we continue - // doing that in the future - Levels_ .resize(levelID); - levelManagers_.resize(levelID); - - int sizeOfVecs = sizeOfAllocatedLevelMultiVectors_; - - AllocateLevelMultiVectors(sizeOfVecs, true); - - // since the # of levels, etc. may have changed, force re-determination of description during next call to description() - ResetDescription(); - - describe(GetOStream(Statistics0), GetVerbLevel()); + int levelID; + for (levelID = startLevel; levelID < numLevels;) { + bool r = Setup(levelID, + (levelID != 0 ? levelManagers_[levelID - 1] : Teuchos::null), + levelManagers_[levelID], + (levelID + 1 != numLevels ? levelManagers_[levelID + 1] + : Teuchos::null)); + levelID++; + if (r) + break; } + // We may construct fewer levels for some reason, make sure we continue + // doing that in the future + Levels_.resize(levelID); + levelManagers_.resize(levelID); - template - void Hierarchy::Setup(const FactoryManagerBase& manager, int startLevel, int numDesiredLevels) { - // Use MueLu::BaseClass::description() to avoid printing "{numLevels = 1}" (numLevels is increasing...) - PrintMonitor m0(*this, "Setup (" + this->MueLu::BaseClass::description() + ")", Runtime0); + int sizeOfVecs = sizeOfAllocatedLevelMultiVectors_; - Clear(startLevel); + AllocateLevelMultiVectors(sizeOfVecs, true); - // Check Levels_[startLevel] exists. - TEUCHOS_TEST_FOR_EXCEPTION(Levels_.size() <= startLevel, Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): fine level (" << startLevel << ") does not exist"); + // since the # of levels, etc. may have changed, force re-determination of + // description during next call to description() + ResetDescription(); - TEUCHOS_TEST_FOR_EXCEPTION(numDesiredLevels <= 0, Exceptions::RuntimeError, - "Constructing non-positive (" << numDesiredLevels << ") number of levels does not make sense."); - - // Check for fine level matrix A - TEUCHOS_TEST_FOR_EXCEPTION(!Levels_[startLevel]->IsAvailable("A"), Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): fine level (" << startLevel << ") has no matrix A! " - "Set fine level matrix A using Level.Set()"); - - RCP A = Levels_[startLevel]->template Get >("A"); - lib_ = A->getDomainMap()->lib(); - - if (IsPrint(Statistics2)) { - RCP Amat = rcp_dynamic_cast(A); - - if (!Amat.is_null()) { - RCP params = rcp(new ParameterList()); - params->set("printLoadBalancingInfo", true); - params->set("printCommInfo", true); - - GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); - } else { - GetOStream(Warnings1) << "Fine level operator is not a matrix, statistics are not available" << std::endl; - } - } - - RCP rcpmanager = rcpFromRef(manager); - - const int lastLevel = startLevel + numDesiredLevels - 1; - GetOStream(Runtime0) << "Setup loop: startLevel = " << startLevel << ", lastLevel = " << lastLevel - << " (stop if numLevels = " << numDesiredLevels << " or Ac.size() < " << maxCoarseSize_ << ")" << std::endl; - - // Setup multigrid levels - int iLevel = 0; - if (numDesiredLevels == 1) { - iLevel = 0; - Setup(startLevel, Teuchos::null, rcpmanager, Teuchos::null); // setup finest==coarsest level (first and last managers are Teuchos::null) + describe(GetOStream(Statistics0), GetVerbLevel()); +} +template +void Hierarchy::Setup( + const FactoryManagerBase &manager, int startLevel, int numDesiredLevels) { + // Use MueLu::BaseClass::description() to avoid printing "{numLevels = 1}" + // (numLevels is increasing...) + PrintMonitor m0( + *this, "Setup (" + this->MueLu::BaseClass::description() + ")", Runtime0); + + Clear(startLevel); + + // Check Levels_[startLevel] exists. + TEUCHOS_TEST_FOR_EXCEPTION(Levels_.size() <= startLevel, + Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): fine level (" + << startLevel << ") does not exist"); + + TEUCHOS_TEST_FOR_EXCEPTION(numDesiredLevels <= 0, Exceptions::RuntimeError, + "Constructing non-positive (" + << numDesiredLevels + << ") number of levels does not make sense."); + + // Check for fine level matrix A + TEUCHOS_TEST_FOR_EXCEPTION( + !Levels_[startLevel]->IsAvailable("A"), Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): fine level (" + << startLevel + << ") has no matrix A! " + "Set fine level matrix A using Level.Set()"); + + RCP A = Levels_[startLevel]->template Get>("A"); + lib_ = A->getDomainMap()->lib(); + + if (IsPrint(Statistics2)) { + RCP Amat = rcp_dynamic_cast(A); + + if (!Amat.is_null()) { + RCP params = rcp(new ParameterList()); + params->set("printLoadBalancingInfo", true); + params->set("printCommInfo", true); + + GetOStream(Statistics2) + << PerfUtils::PrintMatrixInfo(*Amat, "A0", params); } else { - bool bIsLastLevel = Setup(startLevel, Teuchos::null, rcpmanager, rcpmanager); // setup finest level (level 0) (first manager is Teuchos::null) - if (bIsLastLevel == false) { - for (iLevel = startLevel + 1; iLevel < lastLevel; iLevel++) { - bIsLastLevel = Setup(iLevel, rcpmanager, rcpmanager, rcpmanager); // setup intermediate levels - if (bIsLastLevel == true) - break; - } - if (bIsLastLevel == false) - Setup(lastLevel, rcpmanager, rcpmanager, Teuchos::null); // setup coarsest level (last manager is Teuchos::null) - } + GetOStream(Warnings1) + << "Fine level operator is not a matrix, statistics are not available" + << std::endl; } - - // TODO: some check like this should be done at the beginning of the routine - TEUCHOS_TEST_FOR_EXCEPTION(iLevel != Levels_.size() - 1, Exceptions::RuntimeError, - "MueLu::Hierarchy::Setup(): number of level"); - - // TODO: this is not exception safe: manager will still hold default - // factories if you exit this function with an exception - manager.Clean(); - - describe(GetOStream(Statistics0), GetVerbLevel()); } - template - void Hierarchy::Clear(int startLevel) { - if (startLevel < GetNumLevels()) - GetOStream(Runtime0) << "Clearing old data (if any)" << std::endl; - - for (int iLevel = startLevel; iLevel < GetNumLevels(); iLevel++) - Levels_[iLevel]->Clear(); - } - - template - void Hierarchy::ExpertClear() { - GetOStream(Runtime0) << "Clearing old data (expert)" << std::endl; - for (int iLevel = 0; iLevel < GetNumLevels(); iLevel++) - Levels_[iLevel]->ExpertClear(); + RCP rcpmanager = rcpFromRef(manager); + + const int lastLevel = startLevel + numDesiredLevels - 1; + GetOStream(Runtime0) << "Setup loop: startLevel = " << startLevel + << ", lastLevel = " << lastLevel + << " (stop if numLevels = " << numDesiredLevels + << " or Ac.size() < " << maxCoarseSize_ << ")" + << std::endl; + + // Setup multigrid levels + int iLevel = 0; + if (numDesiredLevels == 1) { + iLevel = 0; + Setup(startLevel, Teuchos::null, rcpmanager, + Teuchos::null); // setup finest==coarsest level (first and last + // managers are Teuchos::null) + + } else { + bool bIsLastLevel = Setup(startLevel, Teuchos::null, rcpmanager, + rcpmanager); // setup finest level (level 0) + // (first manager is Teuchos::null) + if (bIsLastLevel == false) { + for (iLevel = startLevel + 1; iLevel < lastLevel; iLevel++) { + bIsLastLevel = Setup(iLevel, rcpmanager, rcpmanager, + rcpmanager); // setup intermediate levels + if (bIsLastLevel == true) + break; + } + if (bIsLastLevel == false) + Setup(lastLevel, rcpmanager, rcpmanager, + Teuchos::null); // setup coarsest level (last manager is + // Teuchos::null) + } } -#if defined(HAVE_MUELU_EXPERIMENTAL) && defined(HAVE_MUELU_ADDITIVE_VARIANT) - template - ConvergenceStatus Hierarchy::Iterate(const MultiVector& B, MultiVector& X, ConvData conv, - bool InitialGuessIsZero, LO startLevel) { - LO nIts = conv.maxIts_; - MagnitudeType tol = conv.tol_; + // TODO: some check like this should be done at the beginning of the routine + TEUCHOS_TEST_FOR_EXCEPTION(iLevel != Levels_.size() - 1, + Exceptions::RuntimeError, + "MueLu::Hierarchy::Setup(): number of level"); - std::string prefix = this->ShortClassName() + ": "; - std::string levelSuffix = " (level=" + toString(startLevel) + ")"; - std::string levelSuffix1 = " (level=" + toString(startLevel+1) + ")"; - - using namespace Teuchos; - RCP