MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
47#define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
48
49#include <Teuchos_Comm.hpp>
50#include <Teuchos_CommHelpers.hpp>
51
52#include <Xpetra_Vector.hpp>
53
55
56#include "MueLu_Aggregates_kokkos.hpp"
57#include "MueLu_Exceptions.hpp"
58#include "MueLu_LWGraph_kokkos.hpp"
59#include "MueLu_Monitor.hpp"
60
61namespace MueLu {
62
63 // Try to stick unaggregated nodes into a neighboring aggregate if they are
64 // not already too big
65 template <class LocalOrdinal, class GlobalOrdinal, class Node>
67 BuildAggregates(const ParameterList& params,
68 const LWGraph_kokkos& graph,
69 Aggregates_kokkos& aggregates,
70 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
71 LO& numNonAggregatedNodes) const {
72
73 if(params.get<bool>("aggregation: deterministic")) {
74 Monitor m(*this, "BuildAggregatesDeterministic");
75 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
76 } else {
77 Monitor m(*this, "BuildAggregatesRandom");
78 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79 }
80
81 } // BuildAggregates
82
83 template <class LO, class GO, class Node>
85 BuildAggregatesRandom(const ParameterList& params,
86 const LWGraph_kokkos& graph,
87 Aggregates_kokkos& aggregates,
88 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
89 LO& numNonAggregatedNodes) const {
90
91 const LO numRows = graph.GetNodeNumVertices();
92 const int myRank = graph.GetComm()->getRank();
93
94 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
95 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
96 auto colors = aggregates.GetGraphColors();
97 const LO numColors = aggregates.GetGraphNumColors();
98 const LO numLocalAggregates = aggregates.GetNumAggregates();
99
100 auto lclLWGraph = graph.getLocalLWGraph();
101
102 const LO defaultConnectWeight = 100;
103 const LO penaltyConnectWeight = 10;
104
105 Kokkos::View<LO*, device_type> aggWeight ("aggWeight", numLocalAggregates);
106 Kokkos::View<LO*, device_type> connectWeight("connectWeight", numRows);
107 Kokkos::View<LO*, device_type> aggPenalties ("aggPenalties", numLocalAggregates);
108
109 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
110
111 // taw: by running the aggregation routine more than once there is a chance that also
112 // non-aggregated nodes with a node distance of two are added to existing aggregates.
113 // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
114 // should be sufficient.
115 // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
116 // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
117 // is needed to reach distance 2 neighbors.
118 int maxIters = 2;
119 int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
120 if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
121 for (int iter = 0; iter < maxIters; ++iter) {
122 for(LO color = 1; color <= numColors; ++color) {
123 Kokkos::deep_copy(aggWeight, 0);
124
125 //the reduce counts how many nodes are aggregated by this phase,
126 //which will then be subtracted from numNonAggregatedNodes
127 LO numAggregated = 0;
128 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
129 Kokkos::RangePolicy<execution_space>(0, numRows),
130 KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) {
131 if (aggStat(i) != READY || colors(i) != color)
132 return;
133
134 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
135 for (int j = 0; j < neighOfINode.length; j++) {
136 LO neigh = neighOfINode(j);
137
138 // We don't check (neigh != i), as it is covered by checking
139 // (aggStat[neigh] == AGGREGATED)
140 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
141 aggStat(neigh) == AGGREGATED)
142 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
143 connectWeight(neigh));
144 }
145
146 int bestScore = -100000;
147 int bestAggId = -1;
148 int bestConnect = -1;
149
150 for (int j = 0; j < neighOfINode.length; j++) {
151 LO neigh = neighOfINode(j);
152
153 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
154 aggStat(neigh) == AGGREGATED) {
155 auto aggId = vertex2AggId(neigh, 0);
156 int score = aggWeight(aggId) - aggPenalties(aggId);
157
158 if (score > bestScore) {
159 bestAggId = aggId;
160 bestScore = score;
161 bestConnect = connectWeight(neigh);
162
163 } else if (aggId == bestAggId &&
164 connectWeight(neigh) > bestConnect) {
165 bestConnect = connectWeight(neigh);
166 }
167 }
168 }
169 if (bestScore >= 0) {
170 aggStat(i) = AGGREGATED;
171 vertex2AggId(i, 0) = bestAggId;
172 procWinner(i, 0) = myRank;
173
174 Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
175 connectWeight(i) = bestConnect - penaltyConnectWeight;
176 tmpNumAggregated++;
177 }
178 }, numAggregated); //parallel_for
179 numNonAggregatedNodes -= numAggregated;
180 }
181 } // loop over maxIters
182
183 } // BuildAggregatesRandom
184
185
186
187 template <class LO, class GO, class Node>
189 BuildAggregatesDeterministic(const ParameterList& params,
190 const LWGraph_kokkos& graph,
191 Aggregates_kokkos& aggregates,
192 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
193 LO& numNonAggregatedNodes) const {
194
195 const LO numRows = graph.GetNodeNumVertices();
196 const int myRank = graph.GetComm()->getRank();
197
198 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
199 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
200 auto colors = aggregates.GetGraphColors();
201 const LO numColors = aggregates.GetGraphNumColors();
202 LO numLocalAggregates = aggregates.GetNumAggregates();
203
204 auto lclLWGraph = graph.getLocalLWGraph();
205
206 const int defaultConnectWeight = 100;
207 const int penaltyConnectWeight = 10;
208
209 Kokkos::View<int*, device_type> connectWeight ("connectWeight", numRows);
210 Kokkos::View<int*, device_type> aggWeight ("aggWeight", numLocalAggregates);
211 Kokkos::View<int*, device_type> aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates);
212 Kokkos::View<int*, device_type> aggPenalties ("aggPenalties", numLocalAggregates);
213
214 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
215
216 // We do this cycle twice.
217 // I don't know why, but ML does it too
218 // taw: by running the aggregation routine more than once there is a chance that also
219 // non-aggregated nodes with a node distance of two are added to existing aggregates.
220 // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
221 // should be sufficient.
222 int maxIters = 2;
223 int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
224 if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
225 for (int iter = 0; iter < maxIters; ++iter) {
226 for(LO color = 1; color <= numColors; color++) {
227 Kokkos::deep_copy(aggWeight, 0);
228
229 //the reduce counts how many nodes are aggregated by this phase,
230 //which will then be subtracted from numNonAggregatedNodes
231 LO numAggregated = 0;
232 Kokkos::parallel_for("Aggregation Phase 2b: updating agg weights",
233 Kokkos::RangePolicy<execution_space>(0, numRows),
234 KOKKOS_LAMBDA (const LO i)
235 {
236 if (aggStat(i) != READY || colors(i) != color)
237 return;
238 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
239 for (int j = 0; j < neighOfINode.length; j++) {
240 LO neigh = neighOfINode(j);
241 // We don't check (neigh != i), as it is covered by checking
242 // (aggStat[neigh] == AGGREGATED)
243 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
244 aggStat(neigh) == AGGREGATED)
245 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
246 connectWeight(neigh));
247 }
248 });
249
250 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
251 Kokkos::RangePolicy<execution_space>(0, numRows),
252 KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated)
253 {
254 if (aggStat(i) != READY || colors(i) != color)
255 return;
256 int bestScore = -100000;
257 int bestAggId = -1;
258 int bestConnect = -1;
259
260 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
261 for (int j = 0; j < neighOfINode.length; j++) {
262 LO neigh = neighOfINode(j);
263
264 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
265 aggStat(neigh) == AGGREGATED) {
266 auto aggId = vertex2AggId(neigh, 0);
267 int score = aggWeight(aggId) - aggPenalties(aggId);
268
269 if (score > bestScore) {
270 bestAggId = aggId;
271 bestScore = score;
272 bestConnect = connectWeight(neigh);
273
274 } else if (aggId == bestAggId &&
275 connectWeight(neigh) > bestConnect) {
276 bestConnect = connectWeight(neigh);
277 }
278 }
279 }
280 if (bestScore >= 0) {
281 aggStat(i) = AGGREGATED;
282 vertex2AggId(i, 0) = bestAggId;
283 procWinner(i, 0) = myRank;
284
285 Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
286 connectWeight(i) = bestConnect - penaltyConnectWeight;
287 tmpNumAggregated++;
288 }
289 }, numAggregated); //parallel_reduce
290
291 Kokkos::parallel_for("Aggregation Phase 2b: updating agg penalties",
292 Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
293 KOKKOS_LAMBDA (const LO agg)
294 {
295 aggPenalties(agg) += aggPenaltyUpdates(agg);
296 aggPenaltyUpdates(agg) = 0;
297 });
298 numNonAggregatedNodes -= numAggregated;
299 }
300 } // loop over k
301 } // BuildAggregatesDeterministic
302} // end namespace
303
304#endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
void BuildAggregatesRandom(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregates(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
void BuildAggregatesDeterministic(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
Timer to be used in non-factories.
Namespace for MueLu classes and methods.
@ AGGREGATED
Definition: MueLu_Types.hpp:73