46#ifndef MUELU_REPARTITIONFACTORY_DEF_HPP
47#define MUELU_REPARTITIONFACTORY_DEF_HPP
56#include <Teuchos_DefaultMpiComm.hpp>
57#include <Teuchos_CommHelpers.hpp>
58#include <Teuchos_Details_MpiTypeTraits.hpp>
60#include <Xpetra_Map.hpp>
61#include <Xpetra_MapFactory.hpp>
62#include <Xpetra_MultiVectorFactory.hpp>
63#include <Xpetra_VectorFactory.hpp>
64#include <Xpetra_Import.hpp>
65#include <Xpetra_ImportFactory.hpp>
66#include <Xpetra_Export.hpp>
67#include <Xpetra_ExportFactory.hpp>
68#include <Xpetra_Matrix.hpp>
69#include <Xpetra_MatrixFactory.hpp>
71#include "MueLu_Utilities.hpp"
73#include "MueLu_CloneRepartitionInterface.hpp"
78#include "MueLu_PerfUtils.hpp"
82 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
84 RCP<ParameterList> validParamList = rcp(
new ParameterList());
86#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
94 validParamList->set< RCP<const FactoryBase> >(
"A", Teuchos::null,
"Factory of the matrix A");
95 validParamList->set< RCP<const FactoryBase> >(
"number of partitions", Teuchos::null,
"Instance of RepartitionHeuristicFactory.");
96 validParamList->set< RCP<const FactoryBase> >(
"Partition", Teuchos::null,
"Factory of the partition");
98 return validParamList;
101 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
103 Input(currentLevel,
"A");
104 Input(currentLevel,
"number of partitions");
105 Input(currentLevel,
"Partition");
108 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
112 const Teuchos::ParameterList & pL = GetParameterList();
115 bool remapPartitions = pL.get<
bool> (
"repartition: remap parts");
118 RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel,
"A");
119 if (A == Teuchos::null) {
120 Set<RCP<const Import> >(currentLevel,
"Importer", Teuchos::null);
123 RCP<const Map> rowMap = A->getRowMap();
124 GO indexBase = rowMap->getIndexBase();
125 Xpetra::UnderlyingLib lib = rowMap->lib();
127 RCP<const Teuchos::Comm<int> > origComm = rowMap->getComm();
128 RCP<const Teuchos::Comm<int> > comm = origComm;
130 int myRank = comm->getRank();
131 int numProcs = comm->getSize();
133 RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
134 TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null,
Exceptions::RuntimeError,
"Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
135 RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
138 int numPartitions = Get<int>(currentLevel,
"number of partitions");
143 RCP<GOVector> decomposition = Get<RCP<GOVector> >(currentLevel,
"Partition");
146 if(remapPartitions ==
true && Teuchos::rcp_dynamic_cast<const CloneRepartitionInterface>(GetFactory(
"Partition")) != Teuchos::null) {
150 remapPartitions =
false;
154 if (numPartitions == 1) {
159 GetOStream(
Runtime0) <<
"Only one partition: Skip call to the repartitioner." << std::endl;
160 }
else if (numPartitions == -1) {
162 GetOStream(
Runtime0) <<
"No repartitioning necessary: partitions were left unchanged by the repartitioner" << std::endl;
163 Set<RCP<const Import> >(currentLevel,
"Importer", Teuchos::null);
168 const int nodeRepartLevel = pL.get<
int> (
"repartition: node repartition level");
169 if(currentLevel.
GetLevelID() == nodeRepartLevel) {
174 remapPartitions =
false;
214 if (remapPartitions) {
217 bool acceptPartition = pL.get<
bool>(
"repartition: remap accept partition");
218 bool allSubdomainsAcceptPartitions;
219 int localNumAcceptPartition = acceptPartition;
220 int globalNumAcceptPartition;
221 MueLu_sumAll(comm, localNumAcceptPartition, globalNumAcceptPartition);
222 GetOStream(
Statistics2) <<
"Number of ranks that accept partitions: " << globalNumAcceptPartition << std::endl;
223 if (globalNumAcceptPartition < numPartitions) {
224 GetOStream(
Warnings0) <<
"Not enough ranks are willing to accept a partition, allowing partitions on all ranks." << std::endl;
225 acceptPartition =
true;
226 allSubdomainsAcceptPartitions =
true;
227 }
else if (numPartitions > numProcs) {
229 allSubdomainsAcceptPartitions =
true;
231 allSubdomainsAcceptPartitions =
false;
234 DeterminePartitionPlacement(*A, *decomposition, numPartitions, acceptPartition, allSubdomainsAcceptPartitions);
245 ArrayRCP<const GO> decompEntries;
246 if (decomposition->getLocalLength() > 0)
247 decompEntries = decomposition->getData(0);
249#ifdef HAVE_MUELU_DEBUG
251 int incorrectRank = -1;
252 for (
int i = 0; i < decompEntries.size(); i++)
253 if (decompEntries[i] >= numProcs || decompEntries[i] < 0) {
254 incorrectRank = myRank;
258 int incorrectGlobalRank = -1;
260 TEUCHOS_TEST_FOR_EXCEPTION(incorrectGlobalRank >- 1,
Exceptions::RuntimeError,
"pid " + Teuchos::toString(incorrectGlobalRank) +
" encountered a partition number is that out-of-range");
264 myGIDs.reserve(decomposition->getLocalLength());
269 typedef std::map<GO, Array<GO> > map_type;
271 for (LO i = 0; i < decompEntries.size(); i++) {
272 GO
id = decompEntries[i];
273 GO GID = rowMap->getGlobalElement(i);
276 myGIDs .push_back(GID);
278 sendMap[id].push_back(GID);
280 decompEntries = Teuchos::null;
283 GO numLocalKept = myGIDs.size(), numGlobalKept, numGlobalRows = A->getGlobalNumRows();
285 GetOStream(
Statistics2) <<
"Unmoved rows: " << numGlobalKept <<
" / " << numGlobalRows <<
" (" << 100*Teuchos::as<double>(numGlobalKept)/numGlobalRows <<
"%)" << std::endl;
288 int numSend = sendMap.size(), numRecv;
291 Array<GO> myParts(numSend), myPart(1);
294 for (
typename map_type::const_iterator it = sendMap.begin(); it != sendMap.end(); it++)
295 myParts[cnt++] = it->first;
299 GO partsIndexBase = 0;
300 RCP<Map> partsIHave = MapFactory ::Build(lib, Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), myParts(), partsIndexBase, comm);
301 RCP<Map> partsIOwn = MapFactory ::Build(lib, numProcs, myPart(), partsIndexBase, comm);
302 RCP<Export> partsExport = ExportFactory::Build(partsIHave, partsIOwn);
304 RCP<GOVector> partsISend = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIHave);
305 RCP<GOVector> numPartsIRecv = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIOwn);
307 ArrayRCP<GO> partsISendData = partsISend->getDataNonConst(0);
308 for (
int i = 0; i < numSend; i++)
309 partsISendData[i] = 1;
311 (numPartsIRecv->getDataNonConst(0))[0] = 0;
313 numPartsIRecv->doExport(*partsISend, *partsExport, Xpetra::ADD);
314 numRecv = (numPartsIRecv->getData(0))[0];
317 MPI_Datatype MpiType = Teuchos::Details::MpiTypeTraits<GO>::getType();
321 Array<MPI_Request> sendReqs(numSend);
323 for (
typename map_type::iterator it = sendMap.begin(); it != sendMap.end(); it++)
324 MPI_Isend(
static_cast<void*
>(it->second.getRawPtr()), it->second.size(), MpiType, Teuchos::as<GO>(it->first), msgTag, *rawMpiComm, &sendReqs[cnt++]);
327 size_t totalGIDs = myGIDs.size();
328 for (
int i = 0; i < numRecv; i++) {
330 MPI_Probe(MPI_ANY_SOURCE, msgTag, *rawMpiComm, &status);
333 int fromRank = status.MPI_SOURCE, count;
334 MPI_Get_count(&status, MpiType, &count);
336 recvMap[fromRank].resize(count);
337 MPI_Recv(
static_cast<void*
>(recvMap[fromRank].getRawPtr()), count, MpiType, fromRank, msgTag, *rawMpiComm, &status);
344 Array<MPI_Status> sendStatuses(numSend);
345 MPI_Waitall(numSend, sendReqs.getRawPtr(), sendStatuses.getRawPtr());
349 myGIDs.reserve(totalGIDs);
350 for (
typename map_type::const_iterator it = recvMap.begin(); it != recvMap.end(); it++) {
351 int offset = myGIDs.size(), len = it->second.size();
353 myGIDs.resize(offset + len);
354 memcpy(myGIDs.getRawPtr() + offset, it->second.getRawPtr(), len*
sizeof(GO));
359 std::sort(myGIDs.begin(), myGIDs.end());
362 RCP<Map> newRowMap = MapFactory ::Build(lib, rowMap->getGlobalNumElements(), myGIDs(), indexBase, origComm);
363 RCP<const Import> rowMapImporter;
365 RCP<const BlockedMap> blockedRowMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
370 if(blockedRowMap.is_null())
371 rowMapImporter = ImportFactory::Build(rowMap, newRowMap);
373 rowMapImporter = ImportFactory::Build(blockedRowMap->getMap(), newRowMap);
378 if(!blockedRowMap.is_null()) {
384 size_t numBlocks = blockedRowMap->getNumMaps();
385 std::vector<RCP<const Import> > subImports(numBlocks);
387 for(
size_t i=0; i<numBlocks; i++) {
388 RCP<const Map> source = blockedRowMap->getMap(i);
389 RCP<const Map> target = blockedTargetMap->getMap(i);
390 subImports[i] = ImportFactory::Build(source,target);
392 Set(currentLevel,
"SubImporters",subImports);
396 Set(currentLevel,
"Importer", rowMapImporter);
401 if (!rowMapImporter.is_null() && IsPrint(
Statistics2)) {
406 if (pL.get<
bool>(
"repartition: print partition distribution") && IsPrint(
Statistics2)) {
408 GetOStream(
Statistics2) <<
"Partition distribution over cores (ownership is indicated by '+')" << std::endl;
410 char amActive = (myGIDs.size() ? 1 : 0);
411 std::vector<char> areActive(numProcs, 0);
412 MPI_Gather(&amActive, 1, MPI_CHAR, &areActive[0], 1, MPI_CHAR, 0, *rawMpiComm);
414 int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
415 for (
int proc = 0; proc < numProcs; proc += rowWidth) {
416 for (
int j = 0; j < rowWidth; j++)
417 if (proc + j < numProcs)
418 GetOStream(
Statistics2) << (areActive[proc + j] ?
"+" :
".");
422 GetOStream(
Statistics2) <<
" " << proc <<
":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
429 template<
typename T,
typename W>
434 template<
typename T,
typename W>
439 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
441 DeterminePartitionPlacement(
const Matrix& A, GOVector& decomposition, GO numPartitions,
bool willAcceptPartition,
bool allSubdomainsAcceptPartitions)
const {
442 RCP<const Map> rowMap = A.getRowMap();
444 RCP<const Teuchos::Comm<int> > comm = rowMap->getComm()->duplicate();
445 int numProcs = comm->getSize();
447 RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
448 TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null,
Exceptions::RuntimeError,
"Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
449 RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
451 const Teuchos::ParameterList& pL = GetParameterList();
457 const int maxLocal = pL.get<
int>(
"repartition: remap num values");
458 const int dataSize = 2*maxLocal;
460 ArrayRCP<GO> decompEntries;
461 if (decomposition.getLocalLength() > 0)
462 decompEntries = decomposition.getDataNonConst(0);
474 std::map<GO,GO> lEdges;
475 if (willAcceptPartition)
476 for (LO i = 0; i < decompEntries.size(); i++)
477 lEdges[decompEntries[i]] += A.getNumEntriesInLocalRow(i);
481 std::multimap<GO,GO> revlEdges;
482 for (
typename std::map<GO,GO>::const_iterator it = lEdges.begin(); it != lEdges.end(); it++)
483 revlEdges.insert(std::make_pair(it->second, it->first));
488 Array<GO> lData(dataSize, -1), gData(numProcs * dataSize);
490 for (
typename std::multimap<GO,GO>::reverse_iterator rit = revlEdges.rbegin(); rit != revlEdges.rend() && numEdges < maxLocal; rit++) {
491 lData[2*numEdges+0] = rit->second;
492 lData[2*numEdges+1] = rit->first;
498 MPI_Datatype MpiType = Teuchos::Details::MpiTypeTraits<GO>::getType();
499 MPI_Allgather(
static_cast<void*
>(lData.getRawPtr()), dataSize, MpiType,
static_cast<void*
>(gData.getRawPtr()), dataSize, MpiType, *rawMpiComm);
504 Teuchos::Array<Triplet<int,int> > gEdges(numProcs * maxLocal);
505 Teuchos::Array<bool> procWillAcceptPartition(numProcs, allSubdomainsAcceptPartitions);
507 for (LO i = 0; i < gData.size(); i += 2) {
508 int procNo = i/dataSize;
509 GO part = gData[i+0];
510 GO weight = gData[i+1];
512 gEdges[k].i = procNo;
514 gEdges[k].v = weight;
515 procWillAcceptPartition[procNo] =
true;
523 std::sort(gEdges.begin(), gEdges.end(), compareTriplets<int,int>);
526 std::map<int,int> match;
527 Teuchos::Array<char> matchedRanks(numProcs, 0), matchedParts(numPartitions, 0);
529 for (
typename Teuchos::Array<
Triplet<int,int> >::const_iterator it = gEdges.begin(); it != gEdges.end(); it++) {
532 if (matchedRanks[rank] == 0 && matchedParts[part] == 0) {
533 matchedRanks[rank] = 1;
534 matchedParts[part] = 1;
539 GetOStream(
Statistics1) <<
"Number of unassigned partitions before cleanup stage: " << (numPartitions - numMatched) <<
" / " << numPartitions << std::endl;
546 if (numPartitions - numMatched > 0) {
547 Teuchos::Array<char> partitionCounts(numPartitions, 0);
548 for (
typename std::map<int,int>::const_iterator it = match.begin(); it != match.end(); it++)
549 partitionCounts[it->first] += 1;
550 for (
int part = 0, matcher = 0; part < numPartitions; part++) {
551 if (partitionCounts[part] == 0) {
553 while (matchedRanks[matcher] || !procWillAcceptPartition[matcher])
556 match[part] = matcher++;
562 TEUCHOS_TEST_FOR_EXCEPTION(numMatched != numPartitions,
Exceptions::RuntimeError,
"MueLu::RepartitionFactory::DeterminePartitionPlacement: Only " << numMatched <<
" partitions out of " << numPartitions <<
" got assigned to ranks.");
565 for (LO i = 0; i < decompEntries.size(); i++)
566 decompEntries[i] = match[decompEntries[i]];
#define SET_VALID_ENTRY(name)
#define MueLu_maxAll(rcpComm, in, out)
#define MueLu_sumAll(rcpComm, in, out)
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
static std::string PrintImporterInfo(RCP< const Import > importer, const std::string &msgTag)
void Build(Level ¤tLevel) const
Build an object with this factory.
void DeterminePartitionPlacement(const Matrix &A, GOVector &decomposition, GO numPartitions, bool willAcceptPartition=true, bool allSubdomainsAcceptPartitions=true) const
Determine which process should own each partition.
void DeclareInput(Level ¤tLevel) const
Determines the data that RepartitionFactory needs, and the factories that generate that data.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static RCP< const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > > GeneratedBlockedTargetMap(const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > &sourceBlockedMap, const Xpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer)
Namespace for MueLu classes and methods.
static bool compareTriplets(const Triplet< T, W > &a, const Triplet< T, W > &b)
@ Warnings0
Important warning messages (one line)
@ Statistics2
Print even more statistics.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.