MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
47#define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
48
49
50#include <Teuchos_Comm.hpp>
51#include <Teuchos_CommHelpers.hpp>
52
53#include <Xpetra_MapFactory.hpp>
54#include <Xpetra_Map.hpp>
55#include <Xpetra_CrsGraphFactory.hpp>
56#include <Xpetra_CrsGraph.hpp>
57
58#include "MueLu_Exceptions.hpp"
59#include "MueLu_Monitor.hpp"
60
61#include "MueLu_LWGraph_kokkos.hpp"
62#include "MueLu_Aggregates_kokkos.hpp"
63#include "MueLu_IndexManager_kokkos.hpp"
65
66namespace MueLu {
67
68 template <class LocalOrdinal, class GlobalOrdinal, class Node>
70 BuildAggregates(const Teuchos::ParameterList& /* params */, const LWGraph_kokkos& graph,
71 Aggregates_kokkos& aggregates,
72 Kokkos::View<unsigned*, device_type>& aggStat,
73 LO& numNonAggregatedNodes) const {
74 Monitor m(*this, "BuildAggregates");
75
76 RCP<Teuchos::FancyOStream> out;
77 if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) {
78 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
79 out->setShowAllFrontMatter(false).setShowProcRank(true);
80 } else {
81 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
82 }
83
84 RCP<IndexManager_kokkos> geoData = aggregates.GetIndexManager();
85 const LO numLocalFineNodes= geoData->getNumLocalFineNodes();
86 const LO numCoarseNodes = geoData->getNumCoarseNodes();
87 LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
88 LOVectorView procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
89
90 *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl;
91 LO numAggregatedNodes;
92 fillAggregatesFunctor fillAggregates(geoData,
93 graph.GetComm()->getRank(),
94 aggStat,
95 vertex2AggId,
96 procWinner);
97 Kokkos::parallel_reduce("StructuredAggregation: fill aggregates data",
98 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
99 fillAggregates,
100 numAggregatedNodes);
101
102 *out << "numCoarseNodes= " << numCoarseNodes
103 << ", numAggregatedNodes= " << numAggregatedNodes << std::endl;
104 numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes;
105
106 } // BuildAggregates()
107
108
109 template <class LocalOrdinal, class GlobalOrdinal, class Node>
111 BuildGraph(const LWGraph_kokkos& graph, RCP<IndexManager_kokkos>& geoData, const LO dofsPerNode,
112 RCP<CrsGraph>& myGraph) const {
113 Monitor m(*this, "BuildGraphP");
114
115 RCP<Teuchos::FancyOStream> out;
116 if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) {
117 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
118 out->setShowAllFrontMatter(false).setShowProcRank(true);
119 } else {
120 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
121 }
122
123 // Compute the number of coarse points needed to interpolate quantities to a fine point
124 int numInterpolationPoints = 0;
125 if(geoData->getInterpolationOrder() == 0) {
126 numInterpolationPoints = 1;
127 } else if(geoData->getInterpolationOrder() == 1) {
128 // Compute 2^numDimensions using bit logic to avoid round-off errors from std::pow()
129 numInterpolationPoints = 1 << geoData->getNumDimensions();
130 }
131 *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl;
132
133 const LO numLocalFineNodes = geoData->getNumLocalFineNodes();
134 const LO numCoarseNodes = geoData->getNumCoarseNodes();
135 const LO numNnzEntries = dofsPerNode*(numCoarseNodes + numInterpolationPoints
136 *(numLocalFineNodes - numCoarseNodes));
137
138 non_const_row_map_type rowPtr("Prolongator graph, rowPtr", dofsPerNode*(numLocalFineNodes + 1));
139 entries_type colIndex("Prolongator graph, colIndices", numNnzEntries);
140
141 *out << "Compute prolongatorGraph data" << std::endl;
142 if(geoData->getInterpolationOrder() == 0) {
143 computeGraphDataConstantFunctor computeGraphData(geoData,
144 numCoarseNodes,
145 dofsPerNode,
146 geoData->getCoarseningRates(),
147 geoData->getCoarseningEndRates(),
148 geoData->getLocalFineNodesPerDir(),
149 rowPtr,
150 colIndex);
151 Kokkos::parallel_for("Structured Aggregation: compute loca graph data",
152 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
153 computeGraphData);
154 } else if(geoData->getInterpolationOrder() == 1) {
155 // Note, lbv 2018-11-08: in the piece-wise linear case I am computing the rowPtr
156 // using a parallel scan, it might be possible to do something faster than that
157 // by including this calculation in computeGraphDataLinearFunctor but at the moment
158 // all the ideas I have include a bunch of if statements which I would like to avoid.
159 computeGraphRowPtrFunctor computeGraphRowPtr(geoData,
160 dofsPerNode,
161 numInterpolationPoints,
162 numLocalFineNodes,
163 geoData->getCoarseningRates(),
164 geoData->getLocalFineNodesPerDir(),
165 rowPtr);
166 Kokkos::parallel_scan("Structured Aggregation: compute rowPtr for prolongator graph",
167 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes + 1),
168 computeGraphRowPtr);
169
170 computeGraphDataLinearFunctor computeGraphData(geoData,
171 geoData->getNumDimensions(),
172 numCoarseNodes,
173 dofsPerNode,
174 numInterpolationPoints,
175 geoData->getCoarseningRates(),
176 geoData->getCoarseningEndRates(),
177 geoData->getLocalFineNodesPerDir(),
178 geoData->getCoarseNodesPerDir(),
179 rowPtr,
180 colIndex);
181 Kokkos::parallel_for("Structured Aggregation: compute loca graph data",
182 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
183 computeGraphData);
184 }
185
186 local_graph_type myLocalGraph(colIndex, rowPtr);
187
188 // Compute graph's colMap and domainMap
189 RCP<Map> colMap, domainMap;
190 *out << "Compute domain and column maps of the CrsGraph" << std::endl;
191 colMap = MapFactory::Build(graph.GetDomainMap()->lib(),
192 Teuchos::OrdinalTraits<GO>::invalid(),
193 numCoarseNodes,
194 graph.GetDomainMap()->getIndexBase(),
195 graph.GetDomainMap()->getComm());
196 domainMap = colMap;
197
198 myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap,
199 colMap, graph.GetDomainMap());
200
201 } // BuildGraph()
202
203
204 template <class LocalOrdinal, class GlobalOrdinal, class Node>
206 fillAggregatesFunctor::fillAggregatesFunctor(RCP<IndexManager_kokkos> geoData,
207 const int myRank,
208 Kokkos::View<unsigned*, device_type> aggStat,
209 LOVectorView vertex2AggID,
210 LOVectorView procWinner) :
211 geoData_(*geoData), myRank_(myRank), aggStat_(aggStat),
212 vertex2AggID_(vertex2AggID), procWinner_(procWinner) {}
213
214 template <class LocalOrdinal, class GlobalOrdinal, class Node>
215 KOKKOS_INLINE_FUNCTION
217 fillAggregatesFunctor::operator() (const LO nodeIdx, LO& lNumAggregatedNodes) const {
218 // Compute coarse ID associated with fine LID
219 LO rem, rate;
220 LO coarseNodeCoarseLID;
221 LO nodeFineTuple[3], coarseIdx[3];
222 auto coarseRate = geoData_.getCoarseningRates();
223 auto endRate = geoData_.getCoarseningEndRates();
224 auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir();
225 // Compute coarse ID associated with fine LID
226 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
227
228 for(int dim = 0; dim < 3; ++dim) {
229 coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim);
230 rem = nodeFineTuple[dim] % coarseRate(dim);
231 rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim);
232 if(rem > (rate / 2)) {++coarseIdx[dim];}
233 }
234
235 geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2],
236 coarseNodeCoarseLID);
237
238 vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID;
239 procWinner_(nodeIdx, 0) = myRank_;
240 aggStat_(nodeIdx) = AGGREGATED;
241 ++lNumAggregatedNodes;
242
243 } // fillAggregatesFunctor::operator()
244
245 template <class LocalOrdinal, class GlobalOrdinal, class Node>
248 computeGraphDataConstantFunctor(RCP<IndexManager_kokkos> geoData,
249 const LO NumGhostedNodes,
250 const LO dofsPerNode,
251 constIntTupleView coarseRate,
252 constIntTupleView endRate,
253 constLOTupleView lFineNodesPerDir,
255 entries_type colIndex) : geoData_(*geoData),
256 numGhostedNodes_(NumGhostedNodes), dofsPerNode_(dofsPerNode),
257 coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
258 rowPtr_(rowPtr), colIndex_(colIndex) {
259
260 } // computeGraphDataConstantFunctor()
261
262 template <class LocalOrdinal, class GlobalOrdinal, class Node>
263 KOKKOS_INLINE_FUNCTION
265 computeGraphDataConstantFunctor::operator() (const LO nodeIdx) const {
266 LO nodeFineTuple[3] = {0, 0, 0};
267 LO nodeCoarseTuple[3] = {0, 0, 0};
268
269 // Compute ghosted tuple associated with fine LID
270 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
271
272 // Compute coarse tuple associated with fine point
273 // then overwrite it with tuple associated with aggregate
274 LO rem, rate, coarseNodeCoarseLID;
275 for(int dim = 0; dim < 3; ++dim) {
276 nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
277 rem = nodeFineTuple[dim] % coarseRate_(dim);
278 if( nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim)) ) {
279 rate = coarseRate_(dim);
280 } else {
281 rate = endRate_(dim);
282 }
283 if(rem > (rate / 2)) {++nodeCoarseTuple[dim];}
284 }
285
286 // get LID associted with aggregate
287 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
288 coarseNodeCoarseLID);
289
290 // store data into CrsGraph taking care of multiple dofs case
291 for(LO dof = 0; dof < dofsPerNode_; ++dof) {
292 rowPtr_(nodeIdx*dofsPerNode_ + dof + 1) = nodeIdx*dofsPerNode_ + dof + 1;
293 colIndex_(nodeIdx*dofsPerNode_ + dof) = coarseNodeCoarseLID*dofsPerNode_ + dof;
294 }
295
296 } // computeGraphDataConstantFunctor::operator()
297
298 template <class LocalOrdinal, class GlobalOrdinal, class Node>
300 computeGraphRowPtrFunctor::computeGraphRowPtrFunctor(RCP<IndexManager_kokkos> geoData,
301 const LO dofsPerNode,
302 const int numInterpolationPoints,
303 const LO numLocalRows,
304 constIntTupleView coarseRate,
305 constLOTupleView lFineNodesPerDir,
306 non_const_row_map_type rowPtr) :
307 geoData_(*geoData), dofsPerNode_(dofsPerNode),
308 numInterpolationPoints_(numInterpolationPoints), numLocalRows_(numLocalRows),
309 coarseRate_(coarseRate), lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr) {}
310
311 template <class LocalOrdinal, class GlobalOrdinal, class Node>
312 KOKKOS_INLINE_FUNCTION
314 computeGraphRowPtrFunctor::operator() (const LO rowIdx, GO& update, const bool final) const {
315 if (final) {
316 // Kokkos uses a multipass algorithm to implement scan.
317 // Only update the array on the final pass. Updating the
318 // array before changing 'update' means that we do an
319 // exclusive scan. Update the array after for an inclusive
320 // scan.
321 rowPtr_(rowIdx) = update;
322 }
323 if (rowIdx < numLocalRows_) {
324 LO nodeIdx = rowIdx / dofsPerNode_;
325 bool allCoarse = true;
326 LO nodeFineTuple[3] = {0, 0, 0};
327 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
328 for(int dim = 0; dim < 3; ++dim) {
329 const LO rem = nodeFineTuple[dim] % coarseRate_(dim);
330
331 // Check if Fine node lies on Coarse Node
332 allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1)));
333 }
334 update += (allCoarse ? 1 : numInterpolationPoints_);
335 }
336 } // computeGraphRowPtrFunctor::operator()
337
338 template <class LocalOrdinal, class GlobalOrdinal, class Node>
341 const int numDimensions,
342 const LO numGhostedNodes,
343 const LO dofsPerNode,
344 const int numInterpolationPoints,
345 constIntTupleView coarseRate,
346 constIntTupleView endRate,
347 constLOTupleView lFineNodesPerDir,
348 constLOTupleView ghostedNodesPerDir,
350 entries_type colIndex) :
351 geoData_(*geoData), numDimensions_(numDimensions),
352 numGhostedNodes_(numGhostedNodes),
353 dofsPerNode_(dofsPerNode), numInterpolationPoints_(numInterpolationPoints),
354 coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
355 ghostedNodesPerDir_(ghostedNodesPerDir), rowPtr_(rowPtr), colIndex_(colIndex) {
356
357 } // computeGraphDataLinearFunctor()
358
359 template <class LocalOrdinal, class GlobalOrdinal, class Node>
360 KOKKOS_INLINE_FUNCTION
362 computeGraphDataLinearFunctor::operator() (const LO nodeIdx) const {
363 LO nodeFineTuple[3] = {0, 0, 0};
364 LO nodeCoarseTuple[3] = {0, 0, 0};
365
366 // Compute coarse ID associated with fine LID
367 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
368
369 LO coarseNodeCoarseLID;
370 bool allCoarse = false;
371 for(int dim = 0; dim < 3; ++dim) {
372 nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
373 }
374 if(rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) {allCoarse = true;}
375
376 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
377 coarseNodeCoarseLID);
378
379 if(allCoarse) {
380 // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node.
381 for(LO dof = 0; dof < dofsPerNode_; ++dof) {
382 colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)) = coarseNodeCoarseLID*dofsPerNode_ + dof;
383 }
384 } else {
385
386 for(int dim = 0; dim < numDimensions_; ++dim) {
387 if(nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { --nodeCoarseTuple[dim]; }
388 }
389 // Compute Coarse Node LID
390 // Note lbv 10-06-2018: it is likely benefitial to remove the two if statments and somehow
391 // find out the number of dimensions before calling the opertor() of the functor.
392 for(LO dof = 0; dof < dofsPerNode_; ++dof) {
393 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+0));
394 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+1));
395 if(numDimensions_ > 1) {
396 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+2));
397 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+3));
398 if(numDimensions_ > 2) {
399 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+4));
400 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+5));
401 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+6));
402 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+7));
403 }
404 }
405 }
406 }
407 } // computeGraphDataLinearFunctor::operator()
408
409} // end namespace
410
411
412#endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ */
decltype(std::declval< LOVector >().getDeviceLocalView(Xpetra::Access::ReadWrite)) LOVectorView
typename Kokkos::View< const int[3], device_type > constIntTupleView
typename local_graph_type::row_map_type::non_const_type non_const_row_map_type
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Build aggregates object.
typename Kokkos::View< const LO[3], device_type > constLOTupleView
void BuildGraph(const LWGraph_kokkos &graph, RCP< IndexManager_kokkos > &geoData, const LO dofsPerNode, RCP< CrsGraph > &myGraph) const
Build a CrsGraph instead of aggregates.
Lightweight MueLu representation of a compressed row storage graph.
Timer to be used in non-factories.
Namespace for MueLu classes and methods.
@ AGGREGATED
Definition: MueLu_Types.hpp:73
computeGraphDataConstantFunctor(RCP< IndexManager_kokkos > geoData, const LO numGhostedNodes, const LO dofsPerNode, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
computeGraphDataLinearFunctor(RCP< IndexManager_kokkos > geoData, const int numDimensions, const LO numGhostedNodes, const LO dofsPerNode, const int numInterpolationPoints, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
computeGraphRowPtrFunctor(RCP< IndexManager_kokkos > geoData, const LO dofsPerNode, const int numInterpolationPoints, const LO numLocalRows, constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr)
KOKKOS_INLINE_FUNCTION void operator()(const LO rowIdx, GO &update, const bool final) const
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx, LO &lNumAggregatedNodes) const
fillAggregatesFunctor(RCP< IndexManager_kokkos > geoData, const int myRank, Kokkos::View< unsigned *, device_type > aggStat, LOVectorView vertex2AggID, LOVectorView procWinner)