MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_RepartitionHeuristicFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42// Tobias Wiesner (tawiesn@sandia.gov)
43//
44// ***********************************************************************
45//
46// @HEADER
47#ifndef PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
48#define PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
49
50#include <algorithm>
51#include <iostream>
52#include <sstream>
53
54#ifdef HAVE_MPI
55#include <Teuchos_DefaultMpiComm.hpp>
56#include <Teuchos_CommHelpers.hpp>
57
58//#include <Xpetra_Map.hpp>
59#include <Xpetra_Matrix.hpp>
60
61#include "MueLu_Utilities.hpp"
62
63#include "MueLu_RAPFactory.hpp"
64#include "MueLu_BlockedRAPFactory.hpp"
65#include "MueLu_SubBlockAFactory.hpp"
66#include "MueLu_Level.hpp"
67#include "MueLu_MasterList.hpp"
68#include "MueLu_Monitor.hpp"
69
70#include "MueLu_RepartitionHeuristicFactory.hpp"
71
72namespace MueLu {
73
74 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
76 RCP<ParameterList> validParamList = rcp(new ParameterList());
77
78#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
79 SET_VALID_ENTRY("repartition: start level");
80 SET_VALID_ENTRY("repartition: use map");
81 SET_VALID_ENTRY("repartition: node repartition level");
82 SET_VALID_ENTRY("repartition: min rows per proc");
83 SET_VALID_ENTRY("repartition: target rows per proc");
84 SET_VALID_ENTRY("repartition: min rows per thread");
85 SET_VALID_ENTRY("repartition: target rows per thread");
86 SET_VALID_ENTRY("repartition: max imbalance");
87#undef SET_VALID_ENTRY
88
89 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Factory of the matrix A");
90 validParamList->set< RCP<const FactoryBase> >("Map", Teuchos::null, "Factory of the map Map");
91 validParamList->set< RCP<const FactoryBase> >("Node Comm", Teuchos::null, "Generating factory of the node level communicator");
92
93 return validParamList;
94 }
95
96 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
98 const Teuchos::ParameterList & pL = GetParameterList();
99 if(pL.isParameter("repartition: use map")) {
100 const bool useMap = pL.get<bool>("repartition: use map");
101 if (useMap)
102 Input(currentLevel, "Map");
103 else
104 Input(currentLevel, "A");
105 } else
106 Input(currentLevel, "A");
107 if(pL.isParameter("repartition: node repartition level")) {
108 const int nodeRepartLevel = pL.get<int>("repartition: node repartition level");
109 if(currentLevel.GetLevelID() == nodeRepartLevel) {
110 Input(currentLevel,"Node Comm");
111 }
112 }
113 }
114
115 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
117 FactoryMonitor m(*this, "Build", currentLevel);
118
119 const Teuchos::ParameterList & pL = GetParameterList();
120 // Access parameters here to make sure that we set the parameter entry flag to "used" even in case of short-circuit evaluation.
121 // TODO (JG): I don't really know if we want to do this.
122 const int startLevel = pL.get<int> ("repartition: start level");
123 const int nodeRepartLevel = pL.get<int> ("repartition: node repartition level");
124 LO minRowsPerProcess = pL.get<LO> ("repartition: min rows per proc");
125 LO targetRowsPerProcess = pL.get<LO> ("repartition: target rows per proc");
126 LO minRowsPerThread = pL.get<LO> ("repartition: min rows per thread");
127 LO targetRowsPerThread = pL.get<LO> ("repartition: target rows per thread");
128 const double nonzeroImbalance = pL.get<double>("repartition: max imbalance");
129 const bool useMap = pL.get<bool> ("repartition: use map");
130
131 int thread_per_mpi_rank = 1;
132#if defined(KOKKOS_ENABLE_OPENMP)
133 using execution_space = typename Node::device_type::execution_space;
134 if (std::is_same<execution_space, Kokkos::OpenMP>::value)
135 thread_per_mpi_rank = execution_space().concurrency();
136#endif
137
138 if (minRowsPerThread > 0)
139 // We ignore the value given by minRowsPerProcess and repartition based on threads instead
140 minRowsPerProcess = minRowsPerThread*thread_per_mpi_rank;
141
142 if (targetRowsPerThread == 0)
143 targetRowsPerThread = minRowsPerThread;
144
145 if (targetRowsPerThread > 0)
146 // We ignore the value given by targetRowsPerProcess and repartition based on threads instead
147 targetRowsPerProcess = targetRowsPerThread*thread_per_mpi_rank;
148
149 if (targetRowsPerProcess == 0)
150 targetRowsPerProcess = minRowsPerProcess;
151
152 // Stick this on the level so Zoltan2Interface can use this later
153 Set<LO>(currentLevel,"repartition: heuristic target rows per process",targetRowsPerProcess);
154
155 // Check for validity of the node repartition option
156 TEUCHOS_TEST_FOR_EXCEPTION(nodeRepartLevel >= startLevel, Exceptions::RuntimeError, "MueLu::RepartitionHeuristicFactory::Build(): If 'repartition: node repartition level' is set, it must be less than or equal to 'repartition: start level'");
157
158
159 RCP<Matrix> A;
160 RCP<const FactoryBase> Afact;
161 RCP<const Map> map;
162 if (!useMap) {
163 Afact = GetFactory("A");
164 if(!Afact.is_null() && Teuchos::rcp_dynamic_cast<const RAPFactory>(Afact) == Teuchos::null &&
165 Teuchos::rcp_dynamic_cast<const BlockedRAPFactory>(Afact) == Teuchos::null &&
166 Teuchos::rcp_dynamic_cast<const SubBlockAFactory>(Afact) == Teuchos::null) {
167 GetOStream(Warnings) <<
168 "MueLu::RepartitionHeuristicFactory::Build: The generation factory for A must " \
169 "be a RAPFactory or a SubBlockAFactory providing the non-rebalanced matrix information! " \
170 "It specifically must not be of type Rebalance(Blocked)AcFactory or similar. " \
171 "Please check the input. Make also sure that \"number of partitions\" is provided to " \
172 "the Interface class and the RepartitionFactory instance. Instead, we have a "<<Afact->description() << std::endl;
173 }
174 // TODO: We only need a CrsGraph. This class does not have to be templated on Scalar types.
175 A = Get< RCP<Matrix> >(currentLevel, "A");
176 map = A->getRowMap();
177 } else
178 map = Get< RCP<const Map> >(currentLevel, "Map");
179
180 // ======================================================================================================
181 // Determine whether partitioning is needed
182 // ======================================================================================================
183 // NOTE: most tests include some global communication, which is why we currently only do tests until we make
184 // a decision on whether to repartition. However, there is value in knowing how "close" we are to having to
185 // rebalance an operator. So, it would probably be beneficial to do and report *all* tests.
186
187 // Test0: Should we do node repartitioning?
188 if (currentLevel.GetLevelID() == nodeRepartLevel && map->getComm()->getSize() > 1) {
189 RCP<const Teuchos::Comm<int> > NodeComm = Get< RCP<const Teuchos::Comm<int> > >(currentLevel, "Node Comm");
190 TEUCHOS_TEST_FOR_EXCEPTION(NodeComm.is_null(), Exceptions::RuntimeError, "MueLu::RepartitionHeuristicFactory::Build(): NodeComm is null.");
191
192 // If we only have one node, then we don't want to pop down to one rank
193 if(NodeComm()->getSize() != map->getComm()->getSize()) {
194 GetOStream(Statistics1) << "Repartitioning? YES: \n Within node only"<<std::endl;
195 int nodeRank = NodeComm->getRank();
196
197 // Do a reduction to get the total number of nodes
198 int isZero = (nodeRank == 0);
199 int numNodes=0;
200 Teuchos::reduceAll(*map->getComm(), Teuchos::REDUCE_SUM, isZero, Teuchos::outArg(numNodes));
201 Set(currentLevel, "number of partitions", numNodes);
202 return;
203 }
204 }
205
206 // Test1: skip repartitioning if current level is less than the specified minimum level for repartitioning
207 if (currentLevel.GetLevelID() < startLevel) {
208 GetOStream(Statistics1) << "Repartitioning? NO:" <<
209 "\n current level = " << Teuchos::toString(currentLevel.GetLevelID()) <<
210 ", first level where repartitioning can happen is " + Teuchos::toString(startLevel) << std::endl;
211
212 // a negative number of processors means: no repartitioning
213 Set(currentLevel, "number of partitions", -1);
214
215 return;
216 }
217
218 RCP<const Teuchos::Comm<int> > origComm = map->getComm();
219 RCP<const Teuchos::Comm<int> > comm = origComm;
220
221 // Test 2: check whether A is actually distributed, i.e. more than one processor owns part of A
222 // TODO: this global communication can be avoided if we store the information with the matrix (it is known when matrix is created)
223 // TODO: further improvements could be achieved when we use subcommunicator for the active set. Then we only need to check its size
224
225 // TODO: The block transfer factories do not check correctly whether or not repartitioning actually took place.
226 {
227 if (comm->getSize() == 1 && Teuchos::rcp_dynamic_cast<const RAPFactory>(Afact) != Teuchos::null) {
228 GetOStream(Statistics1) << "Repartitioning? NO:" <<
229 "\n comm size = 1" << std::endl;
230
231 Set(currentLevel, "number of partitions", -1);
232 return;
233 }
234
235 int numActiveProcesses = 0;
236 MueLu_sumAll(comm, Teuchos::as<int>((map->getLocalNumElements() > 0) ? 1 : 0), numActiveProcesses);
237
238 if (numActiveProcesses == 1) {
239 GetOStream(Statistics1) << "Repartitioning? NO:" <<
240 "\n # processes with rows = " << Teuchos::toString(numActiveProcesses) << std::endl;
241
242 Set(currentLevel, "number of partitions", 1);
243 return;
244 }
245 }
246
247 bool test3 = false, test4 = false;
248 std::string msg3, msg4;
249
250 // Test3: check whether number of rows on any processor satisfies the minimum number of rows requirement
251 // NOTE: Test2 ensures that repartitionning is not done when there is only one processor (it may or may not satisfy Test3)
252 if (minRowsPerProcess > 0) {
253 LO numMyRows = Teuchos::as<LO>(map->getLocalNumElements()), minNumRows, LOMAX = Teuchos::OrdinalTraits<LO>::max();
254 LO haveFewRows = (numMyRows < minRowsPerProcess ? 1 : 0), numWithFewRows = 0;
255 MueLu_sumAll(comm, haveFewRows, numWithFewRows);
256 MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows);
257
258 // TODO: we could change it to repartition only if the number of processors with numRows < minNumRows is larger than some
259 // percentage of the total number. This way, we won't repartition if 2 out of 1000 processors don't have enough elements.
260 // I'm thinking maybe 20% threshold. To implement, simply add " && numWithFewRows < .2*numProcs" to the if statement.
261 if (numWithFewRows > 0)
262 test3 = true;
263
264 msg3 = "\n min # rows per proc = " + Teuchos::toString(minNumRows) + ", min allowable = " + Teuchos::toString(minRowsPerProcess);
265 }
266
267 // Test4: check whether the balance in the number of nonzeros per processor is greater than threshold
268 if (!test3) {
269 if (useMap)
270 msg4 = "";
271 else {
272 GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getLocalNumEntries());
273 MueLu_maxAll(comm, numMyNnz, maxNnz);
274 MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz); // min nnz over all active processors
275 double imbalance = Teuchos::as<double>(maxNnz)/minNnz;
276
277 if (imbalance > nonzeroImbalance)
278 test4 = true;
279
280 msg4 = "\n nonzero imbalance = " + Teuchos::toString(imbalance) + ", max allowable = " + Teuchos::toString(nonzeroImbalance);
281 }
282 }
283
284 if (!test3 && !test4) {
285 GetOStream(Statistics1) << "Repartitioning? NO:" << msg3 + msg4 << std::endl;
286
287 // A negative number of partitions means: no repartitioning
288 Set(currentLevel, "number of partitions", -1);
289 return;
290 }
291
292 GetOStream(Statistics1) << "Repartitioning? YES:" << msg3 + msg4 << std::endl;
293
294 // ======================================================================================================
295 // Calculate number of partitions
296 // ======================================================================================================
297 // FIXME Quick way to figure out how many partitions there should be (same algorithm as ML)
298 // FIXME Should take into account nnz? Perhaps only when user is using min #nnz per row threshold.
299
300 // The number of partitions is calculated by the RepartitionFactory and stored in "number of partitions" variable on
301 // the current level. If this variable is already set (e.g., by another instance of RepartitionFactory) then this number
302 // is used. The "number of partitions" variable serves as basic communication between the RepartitionFactory (which
303 // requests a certain number of partitions) and the *Interface classes which call the underlying partitioning algorithms
304 // and produce the "Partition" array with the requested number of partitions.
305 const auto globalNumRows = Teuchos::as<GO>(map->getGlobalNumElements());
306 int numPartitions = 1;
307 if (globalNumRows >= targetRowsPerProcess) {
308 // Make sure that each CPU thread has approximately targetRowsPerProcess
309 numPartitions = std::max(Teuchos::as<int>(globalNumRows / targetRowsPerProcess), 1);
310 }
311 numPartitions = std::min(numPartitions, comm->getSize());
312
313 Set(currentLevel, "number of partitions", numPartitions);
314
315 GetOStream(Statistics1) << "Number of partitions to use = " << numPartitions << std::endl;
316 } // Build
317} // namespace MueLu
318
319#endif //ifdef HAVE_MPI
320#endif /* PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_ */
#define SET_VALID_ENTRY(name)
#define MueLu_maxAll(rcpComm, in, out)
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_minAll(rcpComm, in, out)
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
void DeclareInput(Level &currentLevel) const
Determines the data that RepartitionHeuristicFactory needs, and the factories that generate that data...
void Build(Level &currentLevel) const
Build an object with this factory.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Namespace for MueLu classes and methods.
@ Warnings
Print all warning messages.
@ Statistics1
Print more statistics.