46#ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
47#define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
49#include <Xpetra_Map.hpp>
50#include <Xpetra_Vector.hpp>
51#include <Xpetra_MultiVectorFactory.hpp>
52#include <Xpetra_MapFactory.hpp>
53#include <Xpetra_VectorFactory.hpp>
55#include "KokkosKernels_Handle.hpp"
56#include "KokkosSparse_spgemm.hpp"
57#include "KokkosSparse_spmv.hpp"
61#include "MueLu_Aggregates.hpp"
67#include "MueLu_Utilities.hpp"
69#include "MueLu_Utilities_kokkos.hpp"
73 namespace NotayUtils {
74 template <
class LocalOrdinal>
76 return min + as<LocalOrdinal>((max-min+1) * (
static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
79 template <
class LocalOrdinal>
82 LO n = Teuchos::as<LO>(list.size());
83 for(LO i = 0; i < n-1; i++)
88 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
90 RCP<ParameterList> validParamList = rcp(
new ParameterList());
93#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
102 validParamList->set< RCP<const FactoryBase> >(
"A", null,
"Generating factory of the matrix");
103 validParamList->set< RCP<const FactoryBase> >(
"Graph", null,
"Generating factory of the graph");
104 validParamList->set< RCP<const FactoryBase> >(
"DofsPerNode", null,
"Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
105 validParamList->set< RCP<const FactoryBase> >(
"AggregateQualities", null,
"Generating factory for variable \'AggregateQualities\'");
108 return validParamList;
111 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
113 const ParameterList& pL = GetParameterList();
115 Input(currentLevel,
"A");
116 Input(currentLevel,
"Graph");
117 Input(currentLevel,
"DofsPerNode");
118 if (pL.get<
bool>(
"aggregation: compute aggregate qualities")) {
119 Input(currentLevel,
"AggregateQualities");
126 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
129 using STS = Teuchos::ScalarTraits<Scalar>;
130 using MT =
typename STS::magnitudeType;
132 const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
134 RCP<Teuchos::FancyOStream> out;
135 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
136 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
137 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
139 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
142 const ParameterList& pL = GetParameterList();
144 const MT kappa =
static_cast<MT
>(pL.get<
double>(
"aggregation: Dirichlet threshold"));
145 TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
147 "Pairwise requires kappa > 2"
148 " otherwise all rows are considered as Dirichlet rows.");
152 if (pL.isParameter(
"aggregation: pairwise: size"))
153 maxNumIter = pL.get<
int>(
"aggregation: pairwise: size");
154 TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
156 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
157 " must be a strictly positive integer");
160 RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel,
"Graph");
161 RCP<const Matrix> A = Get< RCP<Matrix> >(currentLevel,
"A");
164 RCP<Aggregates> aggregates = rcp(
new Aggregates(*graph));
165 aggregates->setObjectLabel(
"PW");
167 const LO numRows = graph->GetNodeNumVertices();
170 std::vector<unsigned> aggStat(numRows,
READY);
173 const int DofsPerNode = Get<int>(currentLevel,
"DofsPerNode");
175 "Pairwise only supports one dof per node");
182 std::string orderingStr = pL.get<std::string>(
"aggregation: ordering");
189 ordering = O_NATURAL;
190 if (orderingStr ==
"random" ) ordering = O_RANDOM;
191 else if(orderingStr ==
"natural") {}
192 else if(orderingStr ==
"cuthill-mckee" || orderingStr ==
"cm") ordering = O_CUTHILL_MCKEE;
201 Array<LO> orderingVector(numRows);
202 for (LO i = 0; i < numRows; i++)
203 orderingVector[i] = i;
204 if (ordering == O_RANDOM)
206 else if (ordering == O_CUTHILL_MCKEE) {
208 auto localVector = rcmVector->getData(0);
209 for (LO i = 0; i < numRows; i++)
210 orderingVector[i] = localVector[i];
214 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
215 BuildInitialAggregates(pL, A, orderingVector(), kappa,
216 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
218 "Initial pairwise aggregation failed to aggregate all nodes");
219 LO numLocalAggregates = aggregates->GetNumAggregates();
220 GetOStream(
Statistics0) <<
"Init : " << numLocalAggregates <<
" - "
221 << A->getLocalNumRows() / numLocalAggregates << std::endl;
230 LO numLocalDirichletNodes = numDirichletNodes;
231 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
232 BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
233 for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
235 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
236 localVertex2AggId(), intermediateP);
239 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
244 std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
245 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
246 for(LO i=0; i<(LO)numRows; i++) {
249 LO agg=vertex2AggId[i];
250 agg2vertex[agg].push_back(i);
253 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
254 for(LO i = 0; i < numRows; i++) {
258 int agg = vertex2AggId[i];
259 std::vector<LO> & myagg = agg2vertex[agg];
261 size_t nnz = A->getNumEntriesInLocalRow(i);
262 ArrayView<const LO> indices;
263 ArrayView<const SC> vals;
264 A->getLocalRowView(i, indices, vals);
266 SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
267 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
269 if(indices[colidx] < numRows) {
270 for(LO j=0; j<(LO)myagg.size(); j++)
271 if (vertex2AggId[indices[colidx]] == agg)
275 *out <<
"- ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
276 mysum += vals[colidx];
279 *out <<
"- NOT ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
283 rowSum_h[agg] = mysum;
285 Kokkos::deep_copy(rowSum, rowSum_h);
289 Array<LO> localOrderingVector(numRows);
290 for (LO i = 0; i < numRows; i++)
291 localOrderingVector[i] = i;
292 if (ordering == O_RANDOM)
294 else if (ordering == O_CUTHILL_MCKEE) {
296 auto localVector = rcmVector->getData(0);
297 for (LO i = 0; i < numRows; i++)
298 localOrderingVector[i] = localVector[i];
302 numLocalAggregates = 0;
303 numNonAggregatedNodes =
static_cast<LO
>(coarseLocalA.numRows());
304 std::vector<LO> localAggStat(numNonAggregatedNodes,
READY);
305 localVertex2AggId.resize(numNonAggregatedNodes, -1);
306 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
307 localAggStat, localVertex2AggId,
308 numLocalAggregates, numNonAggregatedNodes);
312 numLocalDirichletNodes = 0;
315 RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
316 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
317 for(
size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
318 LO oldAggIdx = vertex2AggId[vertexIdx];
319 if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
320 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
325 GetOStream(
Statistics0) <<
"Iter " << aggregationIter <<
": " << numLocalAggregates <<
" - "
326 << A->getLocalNumRows() / numLocalAggregates << std::endl;
328 aggregates->SetNumAggregates(numLocalAggregates);
329 aggregates->AggregatesCrossProcessors(
false);
330 aggregates->ComputeAggregateSizes(
true);
333 Set(currentLevel,
"Aggregates", aggregates);
334 GetOStream(
Statistics0) << aggregates->description() << std::endl;
338 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
341 const RCP<const Matrix>& A,
342 const Teuchos::ArrayView<const LO> & orderingVector,
343 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
345 std::vector<unsigned>& aggStat,
346 LO& numNonAggregatedNodes,
347 LO& numDirichletNodes)
const {
349 Monitor m(*
this,
"BuildInitialAggregates");
350 using STS = Teuchos::ScalarTraits<Scalar>;
351 using MT =
typename STS::magnitudeType;
352 using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
354 RCP<Teuchos::FancyOStream> out;
355 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
356 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
357 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
359 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
363 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
364 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
365 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
366 const MT MT_TWO = MT_ONE + MT_ONE;
367 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
368 const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
370 const MT kappa_init = kappa / (kappa - MT_TWO);
371 const LO numRows = aggStat.size();
372 const int myRank = A->getMap()->getComm()->getRank();
376 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
377 double tie_less = 1.0 - tie_criterion;
378 double tie_more = 1.0 + tie_criterion;
389 const ArrayRCP<const SC> D = ghostedDiag->getData(0);
390 const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
391 const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
394 ArrayRCP<LO> vertex2AggId_rcp = aggregates.
GetVertex2AggId()->getDataNonConst(0);
395 ArrayRCP<LO> procWinner_rcp = aggregates.
GetProcWinner() ->getDataNonConst(0);
396 ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
397 ArrayView<LO> procWinner = procWinner_rcp();
403 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
404 MT aii = STS::magnitude(D[row]);
405 MT rowsum = AbsRs[row];
407 if(aii >= kappa_init * rowsum) {
408 *out <<
"Flagging index " << row <<
" as dirichlet "
409 "aii >= kappa*rowsum = " << aii <<
" >= " << kappa_init <<
" " << rowsum << std::endl;
411 --numNonAggregatedNodes;
418 LO aggIndex = LO_ZERO;
419 for(LO i = 0; i < numRows; i++) {
420 LO current_idx = orderingVector[i];
422 if(aggStat[current_idx] !=
READY)
425 MT best_mu = MT_ZERO;
426 LO best_idx = LO_INVALID;
428 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
429 ArrayView<const LO> indices;
430 ArrayView<const SC> vals;
431 A->getLocalRowView(current_idx, indices, vals);
433 MT aii = STS::real(D[current_idx]);
434 MT si = STS::real(S[current_idx]);
435 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
437 LO col = indices[colidx];
438 SC val = vals[colidx];
439 if(current_idx == col || col >= numRows || aggStat[col] !=
READY || val == SC_ZERO)
442 MT aij = STS::real(val);
443 MT ajj = STS::real(D[col]);
444 MT sj = - STS::real(S[col]);
445 if(aii - si + ajj - sj >= MT_ZERO) {
447 MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
448 MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
449 MT mu = mu_top / mu_bottom;
452 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
453 (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
456 *out <<
"[" << current_idx <<
"] Column UPDATED " << col <<
": "
457 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
458 <<
" = " << aii - si + ajj - sj<<
", aij = "<<aij <<
", mu = " << mu << std::endl;
461 *out <<
"[" << current_idx <<
"] Column NOT UPDATED " << col <<
": "
462 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
463 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
467 *out <<
"[" << current_idx <<
"] Column FAILED " << col <<
": "
468 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
469 <<
" = " << aii - si + ajj - sj << std::endl;
473 if(best_idx == LO_INVALID) {
474 *out <<
"No node buddy found for index " << current_idx
475 <<
" [agg " << aggIndex <<
"]\n" << std::endl;
480 aggStat[current_idx] =
ONEPT;
481 vertex2AggId[current_idx] = aggIndex;
482 procWinner[current_idx] = myRank;
483 numNonAggregatedNodes--;
488 if(best_mu <= kappa) {
489 *out <<
"Node buddies (" << current_idx <<
"," << best_idx <<
") [agg " << aggIndex <<
"]" << std::endl;
493 vertex2AggId[current_idx] = aggIndex;
494 vertex2AggId[best_idx] = aggIndex;
495 procWinner[current_idx] = myRank;
496 procWinner[best_idx] = myRank;
497 numNonAggregatedNodes-=2;
501 *out <<
"No buddy found for index " << current_idx <<
","
502 " but aggregating as singleton [agg " << aggIndex <<
"]" << std::endl;
504 aggStat[current_idx] =
ONEPT;
505 vertex2AggId[current_idx] = aggIndex;
506 procWinner[current_idx] = myRank;
507 numNonAggregatedNodes--;
513 *out <<
"vertex2aggid :";
514 for(
int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
515 *out << i <<
"(" << vertex2AggId[i] <<
")";
523 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
526 const RCP<const Matrix>& A,
527 const Teuchos::ArrayView<const LO> & orderingVector,
528 const typename Matrix::local_matrix_type& coarseA,
529 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
530 const Kokkos::View<
typename Kokkos::ArithTraits<Scalar>::val_type*,
532 typename Matrix::local_matrix_type::device_type>& rowSum,
533 std::vector<LocalOrdinal>& localAggStat,
534 Teuchos::Array<LocalOrdinal>& localVertex2AggID,
535 LO& numLocalAggregates,
536 LO& numNonAggregatedNodes)
const {
537 Monitor m(*
this,
"BuildFurtherAggregates");
540 RCP<Teuchos::FancyOStream> out;
541 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
542 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
543 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
545 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
548 using value_type =
typename local_matrix_type::value_type;
549 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
550 const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
551 const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
553 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
557 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
558 double tie_less = 1.0 - tie_criterion;
559 double tie_more = 1.0 + tie_criterion;
561 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
562 Kokkos::deep_copy(rowSum_h, rowSum);
567 const LO numRows =
static_cast<LO
>(coarseA.numRows());
568 typename local_matrix_type::values_type::HostMirror diagA_h(
"diagA host", numRows);
569 typename local_matrix_type::row_map_type::HostMirror row_map_h
570 = Kokkos::create_mirror_view(coarseA.graph.row_map);
571 Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
572 typename local_matrix_type::index_type::HostMirror entries_h
573 = Kokkos::create_mirror_view(coarseA.graph.entries);
574 Kokkos::deep_copy(entries_h, coarseA.graph.entries);
575 typename local_matrix_type::values_type::HostMirror values_h
576 = Kokkos::create_mirror_view(coarseA.values);
577 Kokkos::deep_copy(values_h, coarseA.values);
578 for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
579 for(LO entryIdx =
static_cast<LO
>(row_map_h(rowIdx));
580 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
582 if(rowIdx ==
static_cast<LO
>(entries_h(entryIdx))) {
583 diagA_h(rowIdx) = values_h(entryIdx);
588 for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
589 if(localAggStat[currentIdx] !=
READY) {
593 LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
594 magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
595 const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
596 const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
597 for(
auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
598 const LO colIdx =
static_cast<LO
>(entries_h(entryIdx));
599 if(currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] !=
READY || values_h(entryIdx) == KAT_zero) {
603 const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
604 const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
605 const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx));
606 if(aii - si + ajj - sj >= MT_zero) {
607 const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
608 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
612 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
613 (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
616 *out <<
"[" << currentIdx <<
"] Column UPDATED " << colIdx <<
": "
617 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
618 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
" mu = " << mu << std::endl;
621 *out <<
"[" << currentIdx <<
"] Column NOT UPDATED " << colIdx <<
": "
622 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
623 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
626 *out <<
"[" << currentIdx <<
"] Column FAILED " << colIdx <<
": "
627 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
628 <<
" = " << aii - si + ajj - sj << std::endl;
632 if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
633 localAggStat[currentIdx] =
ONEPT;
634 localVertex2AggID[currentIdx] = numLocalAggregates;
635 --numNonAggregatedNodes;
636 ++numLocalAggregates;
638 if(best_mu <= kappa) {
639 *out <<
"Node buddies (" << currentIdx <<
"," << bestIdx <<
") [agg " << numLocalAggregates <<
"]" << std::endl;
642 localVertex2AggID[currentIdx] = numLocalAggregates;
643 --numNonAggregatedNodes;
646 localVertex2AggID[bestIdx] = numLocalAggregates;
647 --numNonAggregatedNodes;
649 ++numLocalAggregates;
651 *out <<
"No buddy found for index " << currentIdx <<
","
652 " but aggregating as singleton [agg " << numLocalAggregates <<
"]" << std::endl;
654 localAggStat[currentIdx] =
ONEPT;
655 localVertex2AggID[currentIdx] = numLocalAggregates;
656 --numNonAggregatedNodes;
657 ++numLocalAggregates;
664 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
667 typename Matrix::local_matrix_type& onrankA)
const {
668 Monitor m(*
this,
"BuildOnRankLocalMatrix");
671 RCP<Teuchos::FancyOStream> out;
672 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
673 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
674 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
676 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
679 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
680 using values_type =
typename local_matrix_type::values_type;
681 using size_type =
typename local_graph_type::size_type;
682 using col_index_type =
typename local_graph_type::data_type;
683 using array_layout =
typename local_graph_type::array_layout;
684 using memory_traits =
typename local_graph_type::memory_traits;
685 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
686 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
691 const int numRows =
static_cast<int>(localA.numRows());
692 row_pointer_type rowPtr(
"onrankA row pointer", numRows + 1);
693 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
694 typename local_graph_type::row_map_type::HostMirror origRowPtr_h
695 = Kokkos::create_mirror_view(localA.graph.row_map);
696 typename local_graph_type::entries_type::HostMirror origColind_h
697 = Kokkos::create_mirror_view(localA.graph.entries);
698 typename values_type::HostMirror origValues_h
699 = Kokkos::create_mirror_view(localA.values);
700 Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
701 Kokkos::deep_copy(origColind_h, localA.graph.entries);
702 Kokkos::deep_copy(origValues_h, localA.values);
706 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
707 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
708 if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
710 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
712 Kokkos::deep_copy(rowPtr, rowPtr_h);
714 const LO nnzOnrankA = rowPtr_h(numRows);
717 col_indices_type colInd(
"onrankA column indices", rowPtr_h(numRows));
718 values_type values(
"onrankA values", rowPtr_h(numRows));
719 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
720 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
722 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
724 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
725 if(origColind_h(entryIdx) < numRows) {
726 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
727 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
732 Kokkos::deep_copy(colInd, colInd_h);
733 Kokkos::deep_copy(values, values_h);
736 nnzOnrankA, values, rowPtr, colInd);
739 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
744 const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
745 typename Matrix::local_matrix_type& intermediateP)
const {
746 Monitor m(*
this,
"BuildIntermediateProlongator");
749 RCP<Teuchos::FancyOStream> out;
750 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
751 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
752 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
754 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
757 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
758 using values_type =
typename local_matrix_type::values_type;
759 using size_type =
typename local_graph_type::size_type;
760 using col_index_type =
typename local_graph_type::data_type;
761 using array_layout =
typename local_graph_type::array_layout;
762 using memory_traits =
typename local_graph_type::memory_traits;
763 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
764 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
766 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
768 const int intermediatePnnz = numRows - numDirichletNodes;
769 row_pointer_type rowPtr(
"intermediateP row pointer", numRows + 1);
770 col_indices_type colInd(
"intermediateP column indices", intermediatePnnz);
771 values_type values(
"intermediateP values", intermediatePnnz);
772 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
773 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
776 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
778 if(localVertex2AggID[rowIdx] == LO_INVALID) {
779 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
781 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
782 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
786 Kokkos::deep_copy(rowPtr, rowPtr_h);
787 Kokkos::deep_copy(colInd, colInd_h);
788 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
791 numRows, numLocalAggregates, intermediatePnnz,
792 values, rowPtr, colInd);
795 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
798 typename Matrix::local_matrix_type& coarseA)
const {
799 Monitor m(*
this,
"BuildCoarseLocalMatrix");
801 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
802 using values_type =
typename local_matrix_type::values_type;
803 using size_type =
typename local_graph_type::size_type;
804 using col_index_type =
typename local_graph_type::data_type;
805 using array_layout =
typename local_graph_type::array_layout;
806 using memory_traits =
typename local_graph_type::memory_traits;
807 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
808 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
811 localSpGEMM(coarseA, intermediateP,
"AP", AP);
824 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt row pointer"),
825 intermediateP.numCols() + 1);
826 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt column indices"),
827 intermediateP.nnz());
828 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt values"),
829 intermediateP.nnz());
831 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
832 typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
833 Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
834 Kokkos::deep_copy(rowPtrPt_h, 0);
835 for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
836 rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
838 for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
839 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
841 Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
843 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
844 Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
845 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
846 Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
847 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
848 Kokkos::deep_copy(valuesP_h, intermediateP.values);
849 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
850 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
851 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
852 Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
854 col_index_type colIdx = 0;
855 for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
856 for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
857 colIdx = entries_h(entryIdxP);
858 for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
859 if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
860 colIndPt_h(entryIdxPt) = rowIdx;
861 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
868 Kokkos::deep_copy(colIndPt, colIndPt_h);
869 Kokkos::deep_copy(valuesPt, valuesPt_h);
873 intermediateP.numCols(),
874 intermediateP.numRows(),
876 valuesPt, rowPtrPt, colIndPt);
879 localSpGEMM(intermediatePt, AP,
"coarseA", coarseA);
882 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
884 localSpGEMM(
const typename Matrix::local_matrix_type& A,
885 const typename Matrix::local_matrix_type& B,
886 const std::string matrixLabel,
887 typename Matrix::local_matrix_type& C)
const {
889 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
890 using values_type =
typename local_matrix_type::values_type;
891 using size_type =
typename local_graph_type::size_type;
892 using col_index_type =
typename local_graph_type::data_type;
893 using array_layout =
typename local_graph_type::array_layout;
894 using memory_space =
typename device_type::memory_space;
895 using memory_traits =
typename local_graph_type::memory_traits;
896 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
897 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
900 int team_work_size = 16;
901 std::string myalg(
"SPGEMM_KK_MEMORY");
902 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
903 KokkosKernels::Experimental::KokkosKernelsHandle<
typename row_pointer_type::const_value_type,
904 typename col_indices_type::const_value_type,
905 typename values_type::const_value_type,
909 kh.create_spgemm_handle(alg_enum);
910 kh.set_team_work_size(team_work_size);
913 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing(
"C row pointer"),
915 col_indices_type colIndC;
919 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
920 B.numRows(), B.numCols(),
921 A.graph.row_map, A.graph.entries,
false,
922 B.graph.row_map, B.graph.entries,
false,
926 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
928 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing(
"C column inds"), nnzC);
929 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing(
"C values"), nnzC);
933 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
934 B.numRows(), B.numCols(),
935 A.graph.row_map, A.graph.entries, A.values,
false,
936 B.graph.row_map, B.graph.entries, B.values,
false,
937 rowPtrC, colIndC, valuesC);
938 kh.destroy_spgemm_handle();
940 C =
local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Container class for aggregation information.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
Timer to be used in non-factories.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void BuildInitialAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
void BuildFurtherAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
typename device_type::execution_space execution_space
typename Matrix::local_matrix_type local_matrix_type
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level ¤tLevel) const
Input.
void Build(Level ¤tLevel) const
Build aggregates.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
Namespace for MueLu classes and methods.
@ Statistics0
Print statistics that do not involve significant additional computation.