Zoltan2
Loading...
Searching...
No Matches
Zoltan2_MachineDragonflyRCAForTesting.hpp
Go to the documentation of this file.
1#ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_
2#define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_
3
4#include <Teuchos_Comm.hpp>
5#include <Teuchos_CommHelpers.hpp>
6#include <Zoltan2_Machine.hpp>
7
8#include <cstdlib> /* srand, rand */
9#include <fstream>
10#include <string>
11
12namespace Zoltan2{
13
21template <typename pcoord_t, typename part_t>
22class MachineDragonflyRCAForTesting : public Machine <pcoord_t, part_t> {
23
24public:
33 MachineDragonflyRCAForTesting(const Teuchos::Comm<int> &comm):
34 Machine<pcoord_t,part_t>(comm),
35 transformed_networkDim(3),
36 actual_networkDim(3),
37 transformed_procCoords(NULL),
38 actual_procCoords(NULL),
39 transformed_machine_extent(NULL),
40 actual_machine_extent(NULL),
41 num_unique_groups(0),
42 group_count(NULL),
43 is_transformed(false),
44 pl(NULL) {
45
46 actual_machine_extent = new int[actual_networkDim];
47 this->getActualMachineExtent(this->actual_machine_extent);
48
49 // Number of ranks in each Dragonfly network group
50 // (i.e. RCA's X coord == Grp g)
51 group_count = new part_t[actual_machine_extent[0]];
52
53 memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
54
55 // Transformed dims = 1 + N_y + N_z
56 transformed_networkDim = 1 + actual_machine_extent[1] +
57 actual_machine_extent[2];
58 transformed_machine_extent = new int[transformed_networkDim];
59
60 // Allocate memory for processor coords
61 actual_procCoords = new pcoord_t *[actual_networkDim];
62 transformed_procCoords = new pcoord_t *[transformed_networkDim];
63
64 for (int i = 0; i < actual_networkDim; ++i) {
65 actual_procCoords[i] = new pcoord_t[this->numRanks];
66 memset(actual_procCoords[i], 0,
67 sizeof(pcoord_t) * this->numRanks);
68 }
69
70 pcoord_t *xyz = new pcoord_t[transformed_networkDim];
72 for (int i = 0; i < actual_networkDim; ++i)
73 actual_procCoords[i][this->myRank] = xyz[i];
74 delete [] xyz;
75
76 // Gather number of ranks in each Dragonfly network group
77 // from across all ranks
78 part_t *tmp_vec = new part_t[actual_machine_extent[0]];
79 memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
80
81 Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
82 actual_machine_extent[0],
83 group_count,
84 tmp_vec);
85
86 // remove zero entries from reduced array
87 num_unique_groups = 0;
88
89 for (int i = 0; i < actual_machine_extent[0]; ++i) {
90 if (tmp_vec[i] > 0) {
91 ++num_unique_groups;
92 }
93 }
94
95 // Reset group_count array to new size
96 delete[] group_count;
97 group_count = new part_t[num_unique_groups];
98
99 int pos = 0;
100 for (int i = 0; i < actual_machine_extent[0]; ++i) {
101 if (tmp_vec[i] > 0) {
102 group_count[pos] = tmp_vec[i];
103 ++pos;
104 }
105 }
106
107 delete[] tmp_vec;
108
109 // reduceAll the coordinates of each processor.
110 gatherMachineCoordinates(this->actual_procCoords,
111 this->actual_networkDim, comm);
112 }
113
114 // No necessary wrap arounds for dragonfly networks. Groups
115 // have wrap around, but group all-to-all connection makes unneccessary.
116 virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
117 return false;
118 }
119
120
130 MachineDragonflyRCAForTesting(const Teuchos::Comm<int> &comm,
131 const Teuchos::ParameterList &pl_ ):
132 Machine<pcoord_t,part_t>(comm),
133 transformed_networkDim(3),
134 actual_networkDim(3),
135 transformed_procCoords(NULL),
136 actual_procCoords(NULL),
137 transformed_machine_extent(NULL),
138 actual_machine_extent(NULL),
139 num_unique_groups(0),
140 group_count(NULL),
141 is_transformed(false),
142 pl(&pl_) {
143
144 actual_machine_extent = new int[actual_networkDim];
145 this->getActualMachineExtent(this->actual_machine_extent);
146
147 // Number of parts in each Dragonfly network group
148 // (i.e. RCA's X coord == Grp g)
149 group_count = new part_t[actual_machine_extent[0]];
150
151 memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
152
153 // Allocate memory for processor coords
154 actual_procCoords = new pcoord_t *[actual_networkDim];
155 transformed_procCoords = new pcoord_t *[transformed_networkDim];
156
157 pcoord_t *xyz = new pcoord_t[actual_networkDim];
159
160 // Gather number of ranks in each Dragonfly network group
161 // from across all ranks
162 part_t *tmp_vec = new part_t[actual_machine_extent[0]];
163 memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
164
165 Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
166 actual_machine_extent[0],
167 group_count,
168 tmp_vec);
169
170 // remove zero entries from reduced vector
171 num_unique_groups = 0;
172
173 for (int i = 0; i < actual_machine_extent[0]; ++i) {
174 if (tmp_vec[i] > 0) {
175 ++num_unique_groups;
176 }
177 }
178
179 // Reset group_count array to new size (# of nonzero groups)
180 delete[] group_count;
181 group_count = new part_t[num_unique_groups];
182
183 int pos = 0;
184 for (int i = 0; i < actual_machine_extent[0]; ++i)
185 {
186 if (tmp_vec[i] > 0) {
187 group_count[pos] = tmp_vec[i];
188 ++pos;
189 }
190 }
191 delete[] tmp_vec;
192
193 const Teuchos::ParameterEntry *pe2 =
194 this->pl->getEntryPtr("Machine_Optimization_Level");
195
196 // Transform with mach opt level
197 if (pe2) {
198 int optimization_level;
199 optimization_level = pe2->getValue<int>(&optimization_level);
200
201 if (optimization_level > 0) {
202 is_transformed = true;
203
204 // Transformed dims = 1 + N_y + N_z
205 transformed_networkDim = 1 + actual_machine_extent[1] +
206 actual_machine_extent[2];
207 transformed_machine_extent = new int[transformed_networkDim];
208
209 transformed_procCoords = new pcoord_t *[transformed_networkDim];
210
211 // Allocate memory for transformed coordinates
212 for (int i = 0; i < transformed_networkDim; ++i) {
213 transformed_procCoords[i] = new pcoord_t[this->numRanks];
214 memset(transformed_procCoords[i], 0,
215 sizeof(pcoord_t) * this->numRanks);
216 }
217
218 // Calculate transformed coordinates and machine extents
219 int nx = this->actual_machine_extent[0];
220 int ny = this->actual_machine_extent[1];
221 int nz = this->actual_machine_extent[2];
222
223 const Teuchos::ParameterEntry *pe_x =
224 this->pl->getEntryPtr("Machine_X_Stretch");
225 const Teuchos::ParameterEntry *pe_y =
226 this->pl->getEntryPtr("Machine_Y_Stretch");
227 const Teuchos::ParameterEntry *pe_z =
228 this->pl->getEntryPtr("Machine_Z_Stretch");
229
230 // Default X,Y,Z stretches
231 int x_stretch = 3;
232 int y_stretch = 2;
233 int z_stretch = 1;
234
235 if (pe_x)
236 x_stretch = pe_x->getValue<int>(&x_stretch);
237 if (pe_y)
238 y_stretch = pe_y->getValue<int>(&y_stretch);
239 if (pe_x)
240 z_stretch = pe_z->getValue<int>(&z_stretch);
241
242 // Transform X coords
243 transformed_procCoords[0][this->myRank] =
244 x_stretch * xyz[0] * ny * nz;
245
246 // Transform Y coords
247 for (int i = 1; i < 1 + ny; ++i) {
248 // Shift y-coord given a group, xyz[0];
249 transformed_procCoords[i][this->myRank] = 0;
250 // Increment in the dim where y-coord present
251 if (xyz[1] == i - 1) {
252 transformed_procCoords[i][this->myRank] = y_stretch;
253 }
254 }
255 // Transform Z coords
256 for (int i = 1 + ny; i < transformed_networkDim; ++i) {
257 // Shift z-coord given a group, xyz[0];
258 transformed_procCoords[i][this->myRank] = 0;
259 // Increment in the dim where z-coord present
260 if (xyz[2] == i - (1 + ny))
261 transformed_procCoords[i][this->myRank] = z_stretch;
262 }
263
264 this->transformed_machine_extent = new int[transformed_networkDim];
265
266 // Maximum extents in shifted high dim coordinate system
267 this->transformed_machine_extent[0] = x_stretch * (nx - 1) * ny * nz;
268 for (int i = 1; i < 1 + ny; ++i) {
269 this->transformed_machine_extent[i] = y_stretch;
270 }
271 for (int i = 1 + ny; i < transformed_networkDim; ++i) {
272 this->transformed_machine_extent[i] = z_stretch;
273 }
274
275 // reduceAll the transformed coordinates of each processor.
276 gatherMachineCoordinates(this->transformed_procCoords,
277 this->transformed_networkDim, comm);
278
279 this->printAllocation();
280 }
281 }
282 // If no coordinate transformation, gather actual coords
283 if (!is_transformed) {
284
285 for (int i = 0; i < actual_networkDim; ++i) {
286 actual_procCoords[i] = new pcoord_t[this->numRanks];
287 memset(actual_procCoords[i], 0,
288 sizeof(pcoord_t) * this->numRanks);
289 }
290
291 for (int i = 0; i < actual_networkDim; ++i)
292 actual_procCoords[i][this->myRank] = xyz[i];
293
294 // reduceAll the actual coordinates of each processor
295 gatherMachineCoordinates(this->actual_procCoords,
296 this->actual_networkDim, comm);
297
298 this->printAllocation();
299 }
300 delete [] xyz;
301 }
302
303 // Destructor
305 if (is_transformed) {
306 is_transformed = false;
307 if (this->numRanks > 1) {
308 for (int i = 0; i < transformed_networkDim; ++i) {
309 delete [] transformed_procCoords[i];
310 }
311 }
312 delete [] transformed_machine_extent;
313 }
314 else {
315 if (this->numRanks > 1) {
316 for (int i = 0; i < actual_networkDim; ++i) {
317 delete [] actual_procCoords[i];
318 }
319 }
320 }
321
322 delete [] actual_procCoords;
323 delete [] transformed_procCoords;
324
325 delete [] actual_machine_extent;
326 delete [] group_count;
327 }
328
329 bool hasMachineCoordinates() const { return true; }
330
331 // Return dimensions of coords, transformed or actual
332 int getMachineDim() const {
333 if (is_transformed)
334 return this->transformed_networkDim;
335 else
336 return this->actual_networkDim;
337 }
338
339 // Return the transformed maximum machine extents
340 bool getTransformedMachineExtent(int *nxyz) const {
341 if (is_transformed) {
342 for (int dim = 0; dim < transformed_networkDim; ++dim)
343 nxyz[dim] = this->transformed_machine_extent[dim];
344
345 return true;
346 }
347 else
348 return false;
349 }
350
351 // Return the fake "RCA" machine extents for testing
352 bool getActualMachineExtent(int *nxyz) const {
353/*
354#if defined (HAVE_ZOLTAN2_RCALIB)
355 mesh_coord_t mxyz;
356 rca_get_max_dimension(&mxyz);
357
358 int dim = 0;
359 nxyz[dim++] = mxyz.mesh_x + 1; // X - group [0, ~100]
360 nxyz[dim++] = mxyz.mesh_y + 1; // Y - row within group [0, 5]
361 nxyz[dim++] = mxyz.mesh_z + 1; // Z - col within row [0, 15]
362 return true;
363#else
364 return false;
365#endif
366*/
367
368 nxyz[0] = 11; // X - group
369 nxyz[1] = 6; // Y - row within group
370 nxyz[2] = 16; // Z - col within group
371
372 // Needed for test/unit_test/Machine.cpp PASS
373// nxyz[0] = 4;
374// nxyz[1] = 8;
375// nxyz[2] = 12;
376
377 return true;
378 }
379
380 // Return machine extents, transformed or actual
381 bool getMachineExtent(int *nxyz) const {
382 if (is_transformed)
383 this->getTransformedMachineExtent(nxyz);
384 else
385 this->getActualMachineExtent(nxyz);
386
387 return true;
388 }
389
390 // Return number of groups (RCA X-dim) with allocated nodes
391 part_t getNumUniqueGroups() const override{
392 return this->num_unique_groups;
393 }
394
395 // Return number of ranks in each group (RCA X-dim) in an allocation
396 bool getGroupCount(part_t *grp_count) const override {
397
398 if (group_count != NULL) {
399 for (int i = 0; i < num_unique_groups; ++i) {
400 grp_count[i] = this->group_count[i];
401 }
402
403 return true;
404 }
405 else
406 return false;
407 }
408
409 // Print allocation coords and extents on rank 0, transformed or actual
411 if (this->myRank >= 0) {
412 // Print transformed coordinates and extents
413 if (is_transformed) {
414 for (int i = 0; i < this->numRanks; ++i) {
415 std::cout << "Rank:" << i << " ";
416 for (int j = 0; j < this->transformed_networkDim; ++j) {
417 std::cout << " " << this->transformed_procCoords[j][i];
418 }
419 std::cout << std::endl;
420 }
421
422 std::cout << std::endl << "Transformed Machine Extent: ";
423 for (int i = 0; i < this->transformed_networkDim; ++i) {
424 std::cout << " " << this->transformed_machine_extent[i];
425 }
426 std::cout << std::endl;
427 }
428 // Print actual coordinates and extents
429 else {
430 for (int i = 0; i < this->numRanks; ++i) {
431 std::cout << "Rank:" << i;
432 for (int j = 0; j < this->actual_networkDim; ++j) {
433 std::cout << " " << actual_procCoords[j][i];
434 }
435 std::cout << std::endl;
436 }
437
438 std::cout << std::endl << "Actual Machine Extent: ";
439 for (int i = 0; i < this->actual_networkDim; ++i) {
440 std::cout << " " << this->actual_machine_extent[i];
441 }
442 std::cout << std::endl;
443 }
444 }
445 }
446
447 // Return transformed coord for this rank
449 if (is_transformed) {
450 for (int i = 0; i < this->transformed_networkDim; ++i) {
451 xyz[i] = transformed_procCoords[i][this->myRank];
452 }
453
454 return true;
455 }
456 else
457 return false;
458 }
459
460 // Return the fake "RCA" coord for this rank for testing
461 bool getMyActualMachineCoordinate(pcoord_t *xyz) {
462/*
463#if defined (HAVE_ZOLTAN2_RCALIB)
464 // Cray node info for current node
465 rs_node_t nodeInfo;
466 rca_get_nodeid(&nodeInfo);
467
468 // Current node ID
469 int NIDs = (int)nodeInfo.rs_node_s._node_id;
470
471 mesh_coord_t node_coord;
472 int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
473 if (returnval == -1) {
474 return false;
475 }
476 xyz[0] = node_coord.mesh_x;
477 xyz[1] = node_coord.mesh_y;
478 xyz[2] = node_coord.mesh_z;
479 return true;
480#else
481 return false;
482#endif
483*/
484 srand(this->myRank);
485
486 int x = rand() % 11;
487 int y = rand() % 6;
488 int z = rand() % 16;
489
490 xyz[0] = x;
491 xyz[1] = y;
492 xyz[2] = z;
493
494 // Needed for test/unit_test/Machine.cpp PASS
495// xyz[0] = this->myRank;
496// xyz[1] = this->numRanks;
497// xyz[2] = this->numRanks + 1;
498
499 group_count[x]++;
500
501 return true;
502 }
503
504 // Return machine coordinate for this rank, transformed or actual
505 bool getMyMachineCoordinate(pcoord_t *xyz) {
506 if (is_transformed)
508 else
510
511 return true;
512 }
513
514 // Return machine coord of given rank, transformed or actual
515 inline bool getMachineCoordinate(const int rank,
516 pcoord_t *xyz) const {
517 if (is_transformed) {
518 for (int i = 0; i < this->transformed_networkDim; ++i) {
519 xyz[i] = transformed_procCoords[i][rank];
520 }
521 }
522 else {
523 for (int i = 0; i < this->actual_networkDim; ++i) {
524 xyz[i] = actual_procCoords[i][rank];
525 }
526 }
527
528 return true;
529 }
530
531 bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
532 return false; // cannot yet return from nodename
533 }
534
535 // Return view of all machine coords, transformed or actual
536 bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
537 if (is_transformed) {
538 allCoords = transformed_procCoords;
539 }
540 else {
541 allCoords = actual_procCoords;
542 }
543
544 return true;
545 }
546
547 // Return (approx) hop count from rank1 to rank2. Does not account for
548 // Dragonfly's dynamic routing.
549 virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
550 hops = 0;
551
552 if (rank1 == rank2)
553 return true;
554 if (rank1 >= this->numRanks || rank2 >= this->numRanks) {
555 std::cerr << "Rank outside bounds for the machine ranks";
556 exit(1);
557 }
558
559 if (this->is_transformed) {
560 // Case: ranks in different groups (i.e. different RCA x-coords)
561 // Does not account for location of group to group connection.
562 // (Most group to group messages will take 5 hops)
563 if (this->transformed_procCoords[0][rank1] !=
564 this->transformed_procCoords[0][rank2])
565 {
566 hops = 5;
567
568 return true;
569 }
570
571 // Case: ranks in same group
572 // For each 2 differences in transformed_coordinates then
573 // 1 hop
574 for (int i = 1; i < this->transformed_networkDim; ++i) {
575 if (this->transformed_procCoords[i][rank1] !=
576 this->transformed_procCoords[i][rank2])
577 ++hops;
578 }
579 hops /= 2;
580 }
581 else {
582 // Case: ranks in different groups
583 // Does not account for location of group to group connection.
584 // (Nearly all group to group messages will take 5 hops)
585 if (this->actual_procCoords[0][rank1] !=
586 this->actual_procCoords[0][rank2])
587 {
588 hops = 5;
589 return true;
590 }
591
592 // Case: ranks in same group
593 // For each difference in actual_coordinates then
594 // 1 hop
595 for (int i = 1; i < this->actual_networkDim; ++i) {
596 if (this->actual_procCoords[i][rank1] !=
597 this->actual_procCoords[i][rank2])
598 ++hops;
599 }
600 }
601
602 return true;
603 }
604
605private:
606
607 // # of dimensions in the stored coordinates, transformed or actual
608 int transformed_networkDim;
609 int actual_networkDim;
610
611 // Machine Coordinates
612 pcoord_t **transformed_procCoords;
613 pcoord_t **actual_procCoords;
614
615 // Maximum extents for each dimension, transformed or actual
616 part_t *transformed_machine_extent;
617 part_t *actual_machine_extent;
618
619 // Number of groups (RCA X-dim) with nonzero nodes allocated
620 part_t num_unique_groups;
621 // Distribution of nodes in each group (zero node groups have been trimmed)
622 part_t *group_count;
623
624 // Are out coordinates transformed?
625 bool is_transformed;
626
627 const Teuchos::ParameterList *pl;
628
629
630 // reduceAll the machine coordinates
631 void gatherMachineCoordinates(pcoord_t **&coords, int netDim,
632 const Teuchos::Comm<int> &comm) {
633 // Reduces and stores all machine coordinates.
634 pcoord_t *tmpVect = new pcoord_t [this->numRanks];
635
636 for (int i = 0; i < netDim; ++i) {
637 Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
638 this->numRanks,
639 coords[i], tmpVect);
640 pcoord_t *tmp = tmpVect;
641 tmpVect = coords[i];
642 coords[i] = tmp;
643 }
644 delete [] tmpVect;
645 }
646
647};
648
649} // namespace Zoltan2
650
651#endif
A Dragonfly (e.g. Cori, Trinity, Theta) Machine Class for testing only. A more realistic machine shou...
MachineDragonflyRCAForTesting(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
Constructor: Dragonfly (e.g. Cori & Trinity) network machine description;.
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
part_t getNumUniqueGroups() const override
getNumUniqueGroups function return the number of unique Dragonfly network groups in provided allocati...
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
bool getGroupCount(part_t *grp_count) const override
getGroupCount function return the number of ranks in each group (RCA X-dim, e.g. first dim)
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
MachineDragonflyRCAForTesting(const Teuchos::Comm< int > &comm)
Constructor: Dragonfly (e.g. Cori & Trinity) RCA network machine description;.
MachineClass Base class for representing machine coordinates, networks, etc.
Created by mbenlioglu on Aug 31, 2020.
SparseMatrixAdapter_t::part_t part_t