46#ifndef MUELU_PERFUTILS_DEF_HPP
47#define MUELU_PERFUTILS_DEF_HPP
53#include <Teuchos_CommHelpers.hpp>
56#include <Xpetra_Export.hpp>
57#include <Xpetra_Import.hpp>
58#include <Xpetra_Matrix.hpp>
59#include <Xpetra_CrsMatrixWrap.hpp>
68 void calculateStats(Type& minVal, Type& maxVal,
double& avgVal,
double& devVal,
int& minProc,
int& maxProc,
const RCP<
const Teuchos::Comm<int> >& comm,
int numActiveProcs,
const Type& v) {
70 Type sumVal, sum2Val, v2 = v*v;
71 using MT =
typename Teuchos::ScalarTraits<Type>::magnitudeType;
72 double zero = Teuchos::ScalarTraits<double>::zero();
80 w = (minVal == v) ? comm->getRank() : -1;
82 w = (maxVal == v) ? comm->getRank() : -1;
85 avgVal = (numActiveProcs > 0 ? (as<double>(Teuchos::ScalarTraits<Type>::real(sumVal)) / numActiveProcs) : zero);
86 MT avgVal_MT = Teuchos::as<MT>(avgVal);
87 devVal = (numActiveProcs > 1 ? sqrt((as<double>(Teuchos::ScalarTraits<Type>::real(sum2Val - sumVal*avgVal_MT)))/(numActiveProcs-1)) : zero);
93 double avgVal, devVal;
95 calculateStats<Type>(minVal, maxVal, avgVal, devVal, minProc, maxProc, comm, numActiveProcs, v);
97 const double zero = Teuchos::ScalarTraits<double>::zero();
98 const double one = Teuchos::ScalarTraits<double>::one();
99 std::ostringstream buf;
101 if ((avgVal != zero) && (paramList.is_null() || !paramList->isParameter(
"print abs") || paramList->get<
bool>(
"print abs") ==
false)) {
102 double relDev = (devVal/avgVal)*100;
103 double relMin = (as<double>(Teuchos::ScalarTraits<Type>::real(minVal))/avgVal-one)*100;
104 double relMax = (as<double>(Teuchos::ScalarTraits<Type>::real(maxVal))/avgVal-one)*100;
105 buf <<
"avg = " << std::scientific << std::setw(10) << std::setprecision(2) << avgVal <<
", "
106 <<
"dev = " << std::fixed << std::setw(6) << std::setprecision(1) << relDev <<
"%, "
107 <<
"min = " << std::fixed << std::setw(7) << std::setprecision(1) << std::setw(7) << relMin <<
"%"
108 <<
" (" << std::scientific << std::setw(10) << std::setprecision(2) << minVal <<
" on " << std::fixed << std::setw(4) << minProc <<
"), "
109 <<
"max = " << std::fixed << std::setw(7) << std::setprecision(1) << relMax <<
"%"
110 <<
" (" << std::scientific << std::setw(10) << std::setprecision(2) << maxVal <<
" on " << std::fixed << std::setw(4) << maxProc <<
")";
112 double relDev = (avgVal != zero ? (devVal/avgVal)*100 : zero);
113 buf <<
"avg = " << std::scientific << std::setw(10) << std::setprecision(2) << avgVal <<
", "
114 <<
"dev = " << std::fixed << std::setw(6) << std::setprecision(1) << relDev <<
"%, "
115 <<
"min = " << std::scientific << std::setw(10) << std::setprecision(2) << minVal
116 <<
" (on " << std::fixed << std::setw(4) << minProc <<
"), "
117 <<
"max = " << std::scientific << std::setw(10) << std::setprecision(2) << maxVal
118 <<
" (on " << std::fixed << std::setw(4) << maxProc <<
")";
124 bool cmp_less(
typename Map::value_type& v1,
typename Map::value_type& v2) {
125 return v1.second < v2.second;
128 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
133 typedef Xpetra::global_size_t global_size_t;
135 std::ostringstream ss;
137 ss << msgTag <<
" size = " << A.getGlobalNumRows() <<
" x " << A.getGlobalNumCols();
138 if(A.haveGlobalConstants())
139 ss <<
", nnz = " << A.getGlobalNumEntries();
142 if (params.is_null())
145 bool printLoadBalanceInfo =
false, printCommInfo =
false, printEntryStats =
false;
146 if (params->isParameter(
"printLoadBalancingInfo") && params->get<
bool>(
"printLoadBalancingInfo"))
147 printLoadBalanceInfo =
true;
148 if (params->isParameter(
"printCommInfo") && params->get<
bool>(
"printCommInfo"))
149 printCommInfo =
true;
150 if (params->isParameter(
"printEntryStats") && params->get<
bool>(
"printEntryStats"))
151 printEntryStats =
true;
153 if (!printLoadBalanceInfo && !printCommInfo && !printEntryStats)
156 RCP<const Import> importer = A.getCrsGraph()->getImporter();
157 RCP<const Export> exporter = A.getCrsGraph()->getExporter();
159 size_t numMyNnz = A.getLocalNumEntries(), numMyRows = A.getLocalNumRows();
162 RCP<const Teuchos::Comm<int> > origComm = A.getRowMap()->getComm();
163 bool activeProc =
true;
164 int numProc = origComm->getSize();
165 int numActiveProcs = 0;
167 RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(origComm);
168 MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
170 std::vector<size_t> numRowsPerProc(numProc);
171 Teuchos::gatherAll(*origComm, 1, &numMyRows, numProc, &numRowsPerProc[0]);
174 bool rootFlag =
true;
175 for (
int i = 0; i < numProc; i++) {
176 if (numRowsPerProc[i]) {
185 if(numMyRows == 0) {activeProc =
false; numMyNnz = 0;}
199 ParameterList absList;
200 absList.set(
"print abs",
true);
202 RCP<const Matrix> rcpA = rcpFromRef(A);
203 RCP<const CrsMatrixWrap> crsWrapA = rcp_dynamic_cast<const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node> >(rcpA);
204 RCP<const CrsMatrix> crsA;
205 if (!crsWrapA.is_null())
206 crsA = crsWrapA->getCrsMatrix();
207 if (printEntryStats && !crsA.is_null()) {
208 typedef Teuchos::ScalarTraits<Scalar> STS;
209 typedef typename STS::magnitudeType magnitudeType;
210 typedef Teuchos::ScalarTraits<magnitudeType> MTS;
211 ArrayRCP<const size_t> rowptr_RCP;
212 ArrayRCP<const LocalOrdinal> colind_RCP;
213 ArrayRCP<const Scalar> vals_RCP;
214 ArrayRCP<size_t> offsets_RCP;
215 ArrayView<const size_t> rowptr;
216 ArrayView<const Scalar> vals;
217 ArrayView<size_t> offsets;
219 crsA->getAllValues(rowptr_RCP, colind_RCP, vals_RCP);
220 crsA->getLocalDiagOffsets(offsets_RCP);
221 rowptr = rowptr_RCP();
223 offsets = offsets_RCP();
225 Scalar val, minVal, maxVal;
226 magnitudeType absVal, minAbsVal, maxAbsVal;
228 minVal = STS::rmax();
229 maxVal = STS::rmin();
230 minAbsVal = MTS::rmax();
231 maxAbsVal = MTS::zero();
233 for (
int i = 0; i < offsets.size(); i++) {
234 val = vals[rowptr[i]+offsets[i]];
235 if (STS::real(val) < STS::real(minVal))
237 if (STS::real(val) > STS::real(maxVal))
239 absVal = STS::magnitude(val);
240 minAbsVal = std::min(minAbsVal, absVal);
241 maxAbsVal = std::max(maxAbsVal, absVal);
244 ss << msgTag <<
" diag min : " << stringStats<Scalar>(origComm, numActiveProcs, minVal) << std::endl;
245 ss << msgTag <<
" diag max : " << stringStats<Scalar>(origComm, numActiveProcs, maxVal) << std::endl;
246 ss << msgTag <<
" abs(diag) min : " << stringStats<Scalar>(origComm, numActiveProcs, minAbsVal) << std::endl;
247 ss << msgTag <<
" abs(diag) max : " << stringStats<Scalar>(origComm, numActiveProcs, maxAbsVal) << std::endl;
251 minVal = STS::rmax();
252 maxVal = STS::rmin();
253 minAbsVal = MTS::rmax();
254 maxAbsVal = MTS::zero();
256 for (
int i = 0; i < vals.size(); i++) {
258 if (STS::real(val) < STS::real(minVal))
260 if (STS::real(val) > STS::real(maxVal))
262 absVal = STS::magnitude(val);
263 minAbsVal = std::min(minAbsVal, absVal);
264 maxAbsVal = std::max(maxAbsVal, absVal);
267 ss << msgTag <<
" entry min : " << stringStats<Scalar>(origComm, numActiveProcs, minVal) << std::endl;
268 ss << msgTag <<
" entry max : " << stringStats<Scalar>(origComm, numActiveProcs, maxVal) << std::endl;
269 ss << msgTag <<
" abs(entry) min : " << stringStats<Scalar>(origComm, numActiveProcs, minAbsVal) << std::endl;
270 ss << msgTag <<
" abs(entry) max : " << stringStats<Scalar>(origComm, numActiveProcs, maxAbsVal) << std::endl;
275 if (printLoadBalanceInfo) {
276 ss << msgTag <<
" Load balancing info" << std::endl;
277 ss << msgTag <<
" # active processes: " << numActiveProcs <<
"/" << numProc << std::endl;
278 ss << msgTag <<
" # rows per proc : " << stringStats<global_size_t>(origComm, numActiveProcs, numMyRows) << std::endl;
279 ss << msgTag <<
" # nnz per proc : " << stringStats<global_size_t>(origComm, numActiveProcs, numMyNnz) << std::endl;
282 if (printCommInfo && numActiveProcs != 1) {
283 typedef std::map<int,size_t> map_type;
285 if (!importer.is_null()) {
286 ArrayView<const int> exportPIDs = importer->getExportPIDs();
287 if (exportPIDs.size())
288 for (
int i = 0; i < exportPIDs.size(); i++)
289 neighMap[exportPIDs[i]]++;
293 size_t numExportSend = 0;
294 size_t numImportSend = 0;
300 numExportSend = (!exporter.is_null() ? exporter->getNumExportIDs() : 0);
301 numImportSend = (!importer.is_null() ? importer->getNumExportIDs() : 0);
302 numMsgs = neighMap.size();
303 map_type::const_iterator it = std::min_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
304 minMsg = (it != neighMap.end() ? it->second : 0);
305 it = std::max_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
306 maxMsg = (it != neighMap.end() ? it->second : 0);
309 ss << msgTag <<
" Communication info" << std::endl;
310 ss << msgTag <<
" # num export send : " << stringStats<global_size_t>(origComm, numActiveProcs, numExportSend) << std::endl;
311 ss << msgTag <<
" # num import send : " << stringStats<global_size_t>(origComm, numActiveProcs, numImportSend) << std::endl;
312 ss << msgTag <<
" # num msgs : " << stringStats<global_size_t>(origComm, numActiveProcs, numMsgs, rcpFromRef(absList)) << std::endl;
313 ss << msgTag <<
" # min msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, minMsg) << std::endl;
314 ss << msgTag <<
" # max msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, maxMsg) << std::endl;
320 int strLength = outstr.size();
321 MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
322 if (origComm->getRank() != root)
323 outstr.resize(strLength);
324 MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
330 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
333 typedef Xpetra::global_size_t global_size_t;
335 std::ostringstream ss;
338 RCP<const Teuchos::Comm<int> > origComm = importer->getSourceMap()->getComm();
339 bool activeProc =
true;
340 int numActiveProcs = origComm->getSize();
342 RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(origComm);
343 MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
348 ParameterList absList;
349 absList.set(
"print abs",
true);
351 typedef std::map<int,size_t> map_type;
353 ArrayView<const int> exportPIDs = importer->getExportPIDs();
354 if (exportPIDs.size())
355 for (
int i = 0; i < exportPIDs.size(); i++)
356 neighMap[exportPIDs[i]]++;
359 size_t numImportSend = 0;
365 numImportSend = importer->getNumExportIDs();
366 numMsgs = neighMap.size();
367 map_type::const_iterator it = std::min_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
368 minMsg = (it != neighMap.end() ? it->second : 0);
369 it = std::max_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
370 maxMsg = (it != neighMap.end() ? it->second : 0);
373 ss << msgTag <<
" Communication info" << std::endl;
374 ss << msgTag <<
" # num import send : " << stringStats<global_size_t>(origComm, numActiveProcs, numImportSend) << std::endl;
375 ss << msgTag <<
" # num msgs : " << stringStats<global_size_t>(origComm, numActiveProcs, numMsgs, rcpFromRef(absList)) << std::endl;
376 ss << msgTag <<
" # min msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, minMsg) << std::endl;
377 ss << msgTag <<
" # max msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, maxMsg) << std::endl;
383 int strLength = outstr.size();
384 MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
385 if (origComm->getRank() != root)
386 outstr.resize(strLength);
387 MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
393 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
398 std::ostringstream out;
400 RCP<const Teuchos::Comm<int> > comm = A.getRowMap()->getComm();
401 int myRank = comm->getRank();
403 out << msgTag <<
" " << myRank <<
":";
405 RCP<const Import> importer = (A.getCrsGraph() != Teuchos::null ? A.getCrsGraph()->getImporter() : Teuchos::null);
406 if (importer.is_null()) {
411 ArrayView<const int> exportPIDs = importer->getExportPIDs();
413 if (exportPIDs.size()) {
415 int neigh = exportPIDs[0];
417 for (
int i = 1; i < exportPIDs.size(); i++) {
418 if (exportPIDs[i] != exportPIDs[i-1]) {
419 out <<
" " << neigh <<
"(" << weight <<
")";
421 neigh = exportPIDs[i];
428 out <<
" " << neigh <<
"(" << weight <<
")" << std::endl;
434 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
441 bool hasCrsGraph =
true;
#define MueLu_maxAll(rcpComm, in, out)
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_minAll(rcpComm, in, out)
MueLu::DefaultScalar Scalar
static bool CheckMatrix(const Matrix &A)
static std::string PrintImporterInfo(RCP< const Import > importer, const std::string &msgTag)
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
static std::string CommPattern(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Namespace for MueLu classes and methods.
bool cmp_less(typename Map::value_type &v1, typename Map::value_type &v2)
std::string stringStats(const RCP< const Teuchos::Comm< int > > &comm, int numActiveProcs, const Type &v, RCP< ParameterList > paramList=Teuchos::null)
void calculateStats(Type &minVal, Type &maxVal, double &avgVal, double &devVal, int &minProc, int &maxProc, const RCP< const Teuchos::Comm< int > > &comm, int numActiveProcs, const Type &v)