Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// Amesos2: Templated Direct Sparse Solver Package
6// Copyright 2011 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ***********************************************************************
41//
42// @HEADER
43
52#ifndef AMESOS2_SUPERLUDIST_DEF_HPP
53#define AMESOS2_SUPERLUDIST_DEF_HPP
54
55#include <Teuchos_Tuple.hpp>
56#include <Teuchos_StandardParameterEntryValidators.hpp>
57#include <Teuchos_DefaultMpiComm.hpp>
58
61#include "Amesos2_Util.hpp"
62
63
64namespace Amesos2 {
65
66
67 template <class Matrix, class Vector>
68 Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
69 Teuchos::RCP<Vector> X,
70 Teuchos::RCP<const Vector> B)
71 : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
72 , bvals_()
73 , xvals_()
74 , in_grid_(false)
75 , is_contiguous_(true)
76 {
77 using Teuchos::Comm;
78 // It's OK to depend on MpiComm explicitly here, because
79 // SuperLU_DIST requires MPI anyway.
80 using Teuchos::MpiComm;
81 using Teuchos::outArg;
82 using Teuchos::ParameterList;
83 using Teuchos::parameterList;
84 using Teuchos::RCP;
85 using Teuchos::rcp;
86 using Teuchos::rcp_dynamic_cast;
87 using Teuchos::REDUCE_SUM;
88 using Teuchos::reduceAll;
89 typedef global_ordinal_type GO;
90 typedef Tpetra::Map<local_ordinal_type, GO, node_type> map_type;
91
93 // Set up the SuperLU_DIST processor grid //
95
96 RCP<const Comm<int> > comm = this->getComm ();
97 const int myRank = comm->getRank ();
98 const int numProcs = comm->getSize ();
99
100 SLUD::int_t nprow, npcol;
101 get_default_grid_size (numProcs, nprow, npcol);
102
103 {
104 // FIXME (mfh 16 Dec 2014) getComm() just returns
105 // matrixA_->getComm(), so it's not clear why we need to ask for
106 // the matrix's communicator separately here.
107 RCP<const Comm<int> > matComm = this->matrixA_->getComm ();
108 TEUCHOS_TEST_FOR_EXCEPTION(
109 matComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
110 "constructor: The matrix's communicator is null!");
111 RCP<const MpiComm<int> > matMpiComm =
112 rcp_dynamic_cast<const MpiComm<int> > (matComm);
113 // FIXME (mfh 16 Dec 2014) If the matrix's communicator is a
114 // SerialComm, we probably could just use MPI_COMM_SELF here.
115 // I'm not sure if SuperLU_DIST is smart enough to handle that
116 // case, though.
117 TEUCHOS_TEST_FOR_EXCEPTION(
118 matMpiComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
119 "constructor: The matrix's communicator is not an MpiComm!");
120 TEUCHOS_TEST_FOR_EXCEPTION(
121 matMpiComm->getRawMpiComm ().is_null (), std::logic_error, "Amesos2::"
122 "Superlustdist constructor: The matrix's communicator claims to be a "
123 "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
124 "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
125 "exist, which likely implies that the Teuchos::MpiComm was constructed "
126 "incorrectly. It means something different than if the MPI_Comm were "
127 "MPI_COMM_NULL.");
128 MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
129 data_.mat_comm = rawMpiComm;
130 // This looks a bit like ScaLAPACK's grid initialization (which
131 // technically takes place in the BLACS, not in ScaLAPACK
132 // proper). See http://netlib.org/scalapack/slug/node34.html.
133 // The main difference is that SuperLU_DIST depends explicitly
134 // on MPI, while the BLACS hides its communication protocol.
135 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
136 }
137
139 // Set some default parameters. //
140 // //
141 // Must do this after grid has been created in //
142 // case user specifies the nprow and npcol parameters //
144 SLUD::set_default_options_dist(&data_.options);
145
146 RCP<ParameterList> default_params =
147 parameterList (* (this->getValidParameters ()));
148 this->setParameters (default_params);
149
150 // Set some internal options
151 data_.options.Fact = SLUD::DOFACT;
152 data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
153 data_.options.SolveInitialized = SLUD::NO;
154 data_.options.RefineInitialized = SLUD::NO;
155 data_.rowequ = false;
156 data_.colequ = false;
157 data_.perm_r.resize(this->globalNumRows_);
158 data_.perm_c.resize(this->globalNumCols_);
159
161 // Set up a communicator for the parallel column ordering and //
162 // parallel symbolic factorization. //
164 data_.symb_comm = MPI_COMM_NULL;
165
166 // domains is the next power of 2 less than nprow*npcol. This
167 // value will be used for creating an MPI communicator for the
168 // pre-ordering and symbolic factorization methods.
169 data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
170
171 const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
172 MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
173
175 // Set up a row Map that only includes processes that are in the
176 // SuperLU process grid. This will be used for redistributing A.
178
179 // mfh 16 Dec 2014: We could use createWeightedContigMapWithNode
180 // with myProcParticipates as the weight, but that costs an extra
181 // all-reduce.
182
183 // Set to 1 if I am in the grid, and I get some of the matrix rows.
184 int myProcParticipates = 0;
185 if (myRank < nprow * npcol) {
186 in_grid_ = true;
187 myProcParticipates = 1;
188 }
189
190 // Compute how many processes in the communicator belong to the
191 // process grid.
192 int numParticipatingProcs = 0;
193 reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
194 outArg (numParticipatingProcs));
195 TEUCHOS_TEST_FOR_EXCEPTION(
196 this->globalNumRows_ != 0 && numParticipatingProcs == 0,
197 std::logic_error, "Amesos2::Superludist constructor: The matrix has "
198 << this->globalNumRows_ << " > 0 global row(s), but no processes in the "
199 "communicator want to participate in its factorization! nprow = "
200 << nprow << " and npcol = " << npcol << ".");
201
202 // Divide up the rows among the participating processes.
203 size_t myNumRows = 0;
204 {
205 const GO GNR = static_cast<GO> (this->globalNumRows_);
206 const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
207 GNR / static_cast<GO> (numParticipatingProcs);
208 const GO remainder =
209 GNR - quotient * static_cast<GO> (numParticipatingProcs);
210 const GO lclNumRows = (static_cast<GO> (myRank) < remainder) ?
211 (quotient + static_cast<GO> (1)) : quotient;
212 myNumRows = static_cast<size_t> (lclNumRows);
213 }
214
215 // TODO: might only need to initialize if parallel symbolic factorization is requested.
216 const GO indexBase = this->matrixA_->getRowMap ()->getIndexBase ();
218 rcp (new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
219
221 // Do some other initialization //
223
224 data_.A.Store = NULL;
225 function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
226 SLUD::PStatInit(&(data_.stat));
227 // We do not use ScalePermstructInit because we will use our own
228 // arrays for storing perm_r and perm_c
229 data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
230 data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
231 }
232
233
234 template <class Matrix, class Vector>
236 {
237 /* Free SuperLU_DIST data_types
238 * - Matrices
239 * - Vectors
240 * - Stat object
241 * - ScalePerm, LUstruct, grid, and solve objects
242 *
243 * Note: the function definitions are the same regardless whether
244 * complex or real, so we arbitrarily use the D namespace
245 */
246 if ( this->status_.getNumPreOrder() > 0 ){
248#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
249 SUPERLU_FREE( data_.sizes );
250 SUPERLU_FREE( data_.fstVtxSep );
251#else
252 free( data_.sizes );
253 free( data_.fstVtxSep );
254#endif
255 }
256
257 // Cleanup old matrix store memory if it's non-NULL. Our
258 // Teuchos::Array's will destroy rowind, colptr, and nzval for us
259 if( data_.A.Store != NULL ){
260 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
261 }
262
263 // LU data is initialized in numericFactorization_impl()
264 if ( this->status_.getNumNumericFact() > 0 ){
265 function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
266 }
267 function_map::LUstructFree(&(data_.lu));
268
269 // If a symbolic factorization is ever performed without a
270 // follow-up numericfactorization, there are some arrays in the
271 // Pslu_freeable struct which will never be free'd by
272 // SuperLU_DIST.
273 if ( this->status_.symbolicFactorizationDone() &&
274 !this->status_.numericFactorizationDone() ){
275 if ( data_.pslu_freeable.xlsub != NULL ){
276#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
277 SUPERLU_FREE( data_.pslu_freeable.xlsub );
278 SUPERLU_FREE( data_.pslu_freeable.lsub );
279#else
280 free( data_.pslu_freeable.xlsub );
281 free( data_.pslu_freeable.lsub );
282#endif
283 }
284 if ( data_.pslu_freeable.xusub != NULL ){
285#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
286 SUPERLU_FREE( data_.pslu_freeable.xusub );
287 SUPERLU_FREE( data_.pslu_freeable.usub );
288#else
289 free( data_.pslu_freeable.xusub );
290 free( data_.pslu_freeable.usub );
291#endif
292 }
293 if ( data_.pslu_freeable.supno_loc != NULL ){
294#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
295 SUPERLU_FREE( data_.pslu_freeable.supno_loc );
296 SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
297 SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
298#else
299 free( data_.pslu_freeable.supno_loc );
300 free( data_.pslu_freeable.xsup_beg_loc );
301 free( data_.pslu_freeable.xsup_end_loc );
302#endif
303 }
304#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
305 SUPERLU_FREE( data_.pslu_freeable.globToLoc );
306#else
307 free( data_.pslu_freeable.globToLoc );
308#endif
309 }
310
311 SLUD::PStatFree( &(data_.stat) ) ;
312
313 // Teuchos::Arrays will free R, C, perm_r, and perm_c
314 // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
315
316 if ( data_.options.SolveInitialized == SLUD::YES )
317 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
318
319 SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
320 // cases where grid
321 // wouldn't be initialized?
322
323 if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
324 }
325
326 template<class Matrix, class Vector>
327 int
329 {
330 // We will always use the NATURAL row ordering to avoid the
331 // sequential bottleneck present when doing any other row
332 // ordering scheme from SuperLU_DIST
333 //
334 // Set perm_r to be the natural ordering
335 SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
336 for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
337
338 // loadA_impl(); // Refresh matrix values
339
340 if( in_grid_ ){
341 // If this function has been called at least once, then the
342 // sizes, and fstVtxSep arrays were allocated in
343 // get_perm_c_parmetis. Delete them before calling that
344 // function again. These arrays will also be dealloc'd in the
345 // deconstructor.
346 if( this->status_.getNumPreOrder() > 0 ){
347#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
348 SUPERLU_FREE( data_.sizes );
349 SUPERLU_FREE( data_.fstVtxSep );
350#else
351 free( data_.sizes );
352 free( data_.fstVtxSep );
353#endif
354 }
355 float info = 0.0;
356 {
357#ifdef HAVE_AMESOS2_TIMERS
358 Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
359#endif
360 info = SLUD::get_perm_c_parmetis( &(data_.A),
361 data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
362 data_.grid.nprow * data_.grid.npcol, data_.domains,
363 &(data_.sizes), &(data_.fstVtxSep),
364 &(data_.grid), &(data_.symb_comm) );
365 }
366 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
367 std::runtime_error,
368 "SuperLU_DIST pre-ordering ran out of memory after allocating "
369 << info << " bytes of memory" );
370 }
371
372 // Ordering will be applied directly before numeric factorization,
373 // after we have a chance to get updated coefficients from the
374 // matrix
375
376 return EXIT_SUCCESS;
377 }
378
379
380
381 template <class Matrix, class Vector>
382 int
384 {
385 // loadA_impl(); // Refresh matrix values
386
387 if( in_grid_ ){
388
389 float info = 0.0;
390 {
391#ifdef HAVE_AMESOS2_TIMERS
392 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
393#endif
394
395#if (SUPERLU_DIST_MAJOR_VERSION > 7)
396 info = SLUD::symbfact_dist(&(data_.options), (data_.grid.nprow) * (data_.grid.npcol),
397 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
398 data_.perm_r.getRawPtr(), data_.sizes,
399 data_.fstVtxSep, &(data_.pslu_freeable),
400 &(data_.grid.comm), &(data_.symb_comm),
401 &(data_.mem_usage));
402
403#else
404 info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
405 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
406 data_.perm_r.getRawPtr(), data_.sizes,
407 data_.fstVtxSep, &(data_.pslu_freeable),
408 &(data_.grid.comm), &(data_.symb_comm),
409 &(data_.mem_usage));
410#endif
411 }
412 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
413 std::runtime_error,
414 "SuperLU_DIST symbolic factorization ran out of memory after"
415 " allocating " << info << " bytes of memory" );
416 }
417 same_symbolic_ = false;
418 same_solve_struct_ = false;
419
420 return EXIT_SUCCESS;
421 }
422
423
424 template <class Matrix, class Vector>
425 int
427 using Teuchos::as;
428
429 // loadA_impl(); // Refresh the matrix values
430
431 if( in_grid_ ) {
432 if( data_.options.Equil == SLUD::YES ) {
433 SLUD::int_t info = 0;
434
435 // Compute scaling
436 data_.R.resize(this->globalNumRows_);
437 data_.C.resize(this->globalNumCols_);
438 function_map::gsequ_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
439 &(data_.rowcnd), &(data_.colcnd), &(data_.amax), &info, &(data_.grid));
440
441 // Apply the scalings
442 function_map::laqgs_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
443 data_.rowcnd, data_.colcnd, data_.amax,
444 &(data_.equed));
445
446 data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
447 data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
448 }
449
450 // Apply the column ordering, so that AC is the column-permuted A, and compute etree
451 size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
452 for( size_t i = 0; i < nnz_loc; ++i ) colind_view_(i) = data_.perm_c[colind_view_(i)];
453
454 // Distribute data from the symbolic factorization
455 if( same_symbolic_ ){
456 // Note: with the SamePattern_SameRowPerm options, it does not
457 // matter that the glu_freeable member has never been
458 // initialized, because it is never accessed. It is a
459 // placeholder arg. The real work is done in data_.lu
460#if (SUPERLU_DIST_MAJOR_VERSION > 7)
461 data_.options.Fact = SLUD::SamePattern_SameRowPerm;
462 function_map::pdistribute(&(data_.options),
463 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
464 &(data_.A), &(data_.scale_perm),
465 &(data_.glu_freeable), &(data_.lu),
466 &(data_.grid));
467#else
468 function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
469 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
470 &(data_.A), &(data_.scale_perm),
471 &(data_.glu_freeable), &(data_.lu),
472 &(data_.grid));
473#endif
474 } else {
475#if (SUPERLU_DIST_MAJOR_VERSION > 7)
476 data_.options.Fact = SLUD::DOFACT;
477 function_map::dist_psymbtonum(&(data_.options),
478 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
479 &(data_.A), &(data_.scale_perm),
480 &(data_.pslu_freeable), &(data_.lu),
481 &(data_.grid));
482#else
483 function_map::dist_psymbtonum(SLUD::DOFACT,
484 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
485 &(data_.A), &(data_.scale_perm),
486 &(data_.pslu_freeable), &(data_.lu),
487 &(data_.grid));
488#endif
489 }
490
491 // Retrieve the normI of A (required by gstrf).
492 bool notran = (data_.options.Trans == SLUD::NOTRANS);
493 double anorm = function_map::plangs((notran ? (char *)"1" : (char *)"I"), &(data_.A), &(data_.grid));
494
495 int info = 0;
496 {
497#ifdef HAVE_AMESOS2_TIMERS
498 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
499#endif
500 function_map::gstrf(&(data_.options), this->globalNumRows_,
501 this->globalNumCols_, anorm, &(data_.lu),
502 &(data_.grid), &(data_.stat), &info);
503 }
504
505 // Check output
506 TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
507 std::runtime_error,
508 "L and U factors have been computed but U("
509 << info << "," << info << ") is exactly zero "
510 "(i.e. U is singular)");
511 }
512
513 // The other option, that info_st < 0, denotes invalid parameters
514 // to the function, but we'll assume for now that that won't
515 // happen.
516
517 data_.options.Fact = SLUD::FACTORED;
518 same_symbolic_ = true;
519
520 return EXIT_SUCCESS;
521 }
522
523
524 template <class Matrix, class Vector>
525 int
527 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
528 {
529 using Teuchos::as;
530
531 // local_len_rhs is how many of the multivector rows belong to
532 // this processor in the SuperLU_DIST processor grid.
533 const size_t local_len_rhs = superlu_rowmap_->getLocalNumElements();
534 const global_size_type nrhs = X->getGlobalNumVectors();
535 const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
536
537 // make sure our multivector storage is sized appropriately
538 bvals_.resize(nrhs * local_len_rhs);
539 xvals_.resize(nrhs * local_len_rhs);
540
541 // We assume the global length of the two vectors have already been
542 // checked for compatibility
543
544 { // get the values from B
545#ifdef HAVE_AMESOS2_TIMERS
546 Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
547#endif
548 {
549 // The input dense matrix for B should be distributed in the
550 // same manner as the superlu_dist matrix. That is, if a
551 // processor has m_loc rows of A, then it should also have
552 // m_loc rows of B (and the same rows). We accomplish this by
553 // distributing the multivector rows with the same Map that
554 // the matrix A's rows are distributed.
555#ifdef HAVE_AMESOS2_TIMERS
556 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
557#endif
558 // get grid-distributed mv data. The multivector data will be
559 // distributed across the processes in the SuperLU_DIST grid.
560 typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
561 copy_helper::do_get(B,
562 bvals_(),
563 local_len_rhs,
564 Teuchos::ptrInArg(*superlu_rowmap_));
565 }
566 } // end block for conversion time
567
568 if( in_grid_ ){
569 // if( data_.options.trans == SLUD::NOTRANS ){
570 // if( data_.rowequ ){ // row equilibration has been done on AC
571 // // scale bxvals_ by diag(R)
572 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
573 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
574 // }
575 // } else if( data_.colequ ){ // column equilibration has been done on AC
576 // // scale bxvals_ by diag(C)
577 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
578 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
579 // }
580
581 // Initialize the SOLVEstruct_t.
582 //
583 // We are able to reuse the solve struct if we have not changed
584 // the sparsity pattern of L and U since the last solve
585 if( !same_solve_struct_ ){
586 if( data_.options.SolveInitialized == SLUD::YES ){
587 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
588 }
589 function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
590 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
591 &(data_.grid), &(data_.solve_struct));
592 // Flag that we can reuse this solve_struct unless another
593 // symbolicFactorization is called between here and the next
594 // solve.
595 same_solve_struct_ = true;
596 }
597
598 // Apply row-scaling if requested
599 if (data_.options.Equil == SLUD::YES && data_.rowequ) {
600 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
601 for(global_size_type j = 0; j < nrhs; ++j) {
602 for(size_t i = 0; i < local_len_rhs; ++i) {
603 bvals_[i + j*ld] *= data_.R[first_global_row_b + i];
604 }
605 }
606 }
607
608 // Solve
609 int ierr = 0; // returned error code
610 {
611#ifdef HAVE_AMESOS2_TIMERS
612 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
613#endif
614
615#if (SUPERLU_DIST_MAJOR_VERSION > 7)
616 function_map::gstrs(&(data_.options), as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
617 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
618 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
619 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
620 &(data_.solve_struct), &(data_.stat), &ierr);
621#else
622 function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
623 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
624 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
625 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
626 &(data_.solve_struct), &(data_.stat), &ierr);
627#endif
628 } // end block for solve time
629
630 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
631 std::runtime_error,
632 "Argument " << -ierr << " to gstrs had an illegal value" );
633
634 // "Un-scale" the solution so that it is a solution of the original system
635 // if( data_.options.trans == SLUD::NOTRANS ){
636 // if( data_.colequ ){ // column equilibration has been done on AC
637 // // scale bxvals_ by diag(C)
638 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
639 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
640 // }
641 // } else if( data_.rowequ ){ // row equilibration has been done on AC
642 // // scale bxvals_ by diag(R)
643 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
644 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
645 // }
646 { // permute B to a solution of the original system
647#ifdef HAVE_AMESOS2_TIMERS
648 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
649#endif
650 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
651 function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
652 as<SLUD::int_t>(local_len_rhs),
653 data_.solve_struct.row_to_proc,
654 data_.solve_struct.inv_perm_c,
655 bvals_.getRawPtr(), ld,
656 xvals_.getRawPtr(), ld,
657 as<int>(nrhs),
658 &(data_.grid));
659 }
660
661 // Apply col-scaling if requested
662 if (data_.options.Equil == SLUD::YES && data_.colequ) {
663 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
664 for(global_size_type j = 0; j < nrhs; ++j) {
665 for(size_t i = 0; i < local_len_rhs; ++i) {
666 xvals_[i + j*ld] *= data_.C[first_global_row_b + i];
667 }
668 }
669 }
670 }
671
672 /* Update X's global values */
673 {
674#ifdef HAVE_AMESOS2_TIMERS
675 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
676#endif
677 typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
678 put_helper::do_put(X,
679 xvals_(),
680 local_len_rhs,
681 Teuchos::ptrInArg(*superlu_rowmap_));
682 }
683
684 return EXIT_SUCCESS;
685 }
686
687
688 template <class Matrix, class Vector>
689 bool
691 {
692 // SuperLU_DIST requires square matrices
693 return( this->globalNumRows_ == this->globalNumCols_ );
694 }
695
696
697 template <class Matrix, class Vector>
698 void
699 Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
700 {
701 using Teuchos::as;
702 using Teuchos::RCP;
703 using Teuchos::getIntegralValue;
704 using Teuchos::ParameterEntryValidator;
705
706 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
707
708 if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
709 TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
710 parameterList->isParameter("npcol")),
711 std::invalid_argument,
712 "nprow and npcol must be set together" );
713
714 SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
715 SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
716
717 TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
718 std::invalid_argument,
719 "nprow and npcol combination invalid" );
720
721 if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
722 // De-allocate the default grid that was initialized in the constructor
723 SLUD::superlu_gridexit(&(data_.grid));
724 // Create a new grid
725 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
726 } // else our grid has not changed size since the last initialization
727 }
728
729 TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
730 std::invalid_argument,
731 "SuperLU_DIST does not support solving the tranpose system" );
732
733 data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
734
735 // Equilbration option
736 bool equil = parameterList->get<bool>("Equil", false);
737 data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
738
739 if( parameterList->isParameter("ColPerm") ){
740 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
741 parameterList->getEntry("ColPerm").setValidator(colperm_validator);
742
743 data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
744 }
745
746 // Always use the "NOROWPERM" option to avoid a serial bottleneck
747 // with the weighted bipartite matching algorithm used for the
748 // "LargeDiag" RowPerm. Note the inconsistency with the SuperLU
749 // User guide (which states that the value should be "NATURAL").
750 data_.options.RowPerm = SLUD::NOROWPERM;
751
752 // TODO: Uncomment when supported
753 // if( parameterList->isParameter("IterRefine") ){
754 // RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
755 // parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
756
757 // data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
758 // }
759 data_.options.IterRefine = SLUD::NOREFINE;
760
761 bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
762 data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
763
764 if( parameterList->isParameter("IsContiguous") ){
765 is_contiguous_ = parameterList->get<bool>("IsContiguous");
766 }
767 }
768
769
770 template <class Matrix, class Vector>
771 Teuchos::RCP<const Teuchos::ParameterList>
773 {
774 using std::string;
775 using Teuchos::tuple;
776 using Teuchos::ParameterList;
777 using Teuchos::EnhancedNumberValidator;
778 using Teuchos::setStringToIntegralParameter;
779 using Teuchos::stringToIntegralParameterEntryValidator;
780
781 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
782
783 if( is_null(valid_params) ){
784 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
785
786 Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
787 = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
788 col_row_validator->setMin(1);
789
790 pl->set("npcol", data_.grid.npcol,
791 "Number of columns in the processor grid. "
792 "Must be set with nprow", col_row_validator);
793 pl->set("nprow", data_.grid.nprow,
794 "Number of rows in the SuperLU_DIST processor grid. "
795 "Must be set together with npcol", col_row_validator);
796
797 // validator will catch any value besides NOTRANS
798 setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
799 "Solve for the transpose system or not",
800 tuple<string>("NOTRANS"),
801 tuple<string>("Do not solve with transpose"),
802 tuple<SLUD::trans_t>(SLUD::NOTRANS),
803 pl.getRawPtr());
804
805 // Equillbration
806 pl->set("Equil", false, "Whether to equilibrate the system before solve");
807
808 // TODO: uncomment when supported
809 // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
810 // "Type of iterative refinement to use",
811 // tuple<string>("NOREFINE", "DOUBLE"),
812 // tuple<string>("Do not use iterative refinement",
813 // "Do double iterative refinement"),
814 // tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
815 // SLUD::DOUBLE),
816 // pl.getRawPtr());
817
818 pl->set("ReplaceTinyPivot", true,
819 "Specifies whether to replace tiny diagonals during LU factorization");
820
821 setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
822 "Specifies how to permute the columns of the "
823 "matrix for sparsity preservation",
824 tuple<string>("NATURAL", "PARMETIS"),
825 tuple<string>("Natural ordering",
826 "ParMETIS ordering on A^T + A"),
827 tuple<SLUD::colperm_t>(SLUD::NATURAL,
828 SLUD::PARMETIS),
829 pl.getRawPtr());
830
831 pl->set("IsContiguous", true, "Whether GIDs contiguous");
832
833 valid_params = pl;
834 }
835
836 return valid_params;
837 }
838
839
840 template <class Matrix, class Vector>
841 void
843 SLUD::int_t& nprow,
844 SLUD::int_t& npcol) const {
845 TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
846 std::invalid_argument,
847 "Number of MPI processes must be at least 1" );
848 SLUD::int_t c, r = 1;
849 while( r*r <= nprocs ) r++;
850 nprow = npcol = --r; // fall back to square grid
851 c = nprocs / r;
852 while( (r--)*c != nprocs ){
853 c = nprocs / r; // note integer division
854 }
855 ++r;
856 // prefer the square grid over a single row (which will only happen
857 // in the case of a prime nprocs
858 if( r > 1 || nprocs < 9){ // nprocs < 9 is a heuristic for the small cases
859 nprow = r;
860 npcol = c;
861 }
862 }
863
864
865 template <class Matrix, class Vector>
866 bool
868 // Extract the necessary information from mat and call SLU function
869 using Teuchos::Array;
870 using Teuchos::ArrayView;
871 using Teuchos::ptrInArg;
872 using Teuchos::as;
873
874 using SLUD::int_t;
875
876#ifdef HAVE_AMESOS2_TIMERS
877 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
878#endif
879
880 // Cleanup old store memory if it's non-NULL
881 if( data_.A.Store != NULL ){
882 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
883 data_.A.Store = NULL;
884 }
885
886 Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
887 = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
888
889 int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
890 l_nnz = as<int_t>(redist_mat->getLocalNNZ());
891 l_rows = as<int_t>(redist_mat->getLocalNumRows());
892 g_rows = as<int_t>(redist_mat->getGlobalNumRows());
893 g_cols = g_rows; // we deal with square matrices
894 fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
895
896 Kokkos::resize(nzvals_view_, l_nnz);
897 Kokkos::resize(colind_view_, l_nnz);
898 Kokkos::resize(rowptr_view_, l_rows + 1);
899 int_t nnz_ret = 0;
900 {
901#ifdef HAVE_AMESOS2_TIMERS
902 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
903#endif
904
906 host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
907 redist_mat.ptr(),
908 nzvals_view_, colind_view_, rowptr_view_,
909 nnz_ret,
910 ptrInArg(*superlu_rowmap_),
911 ROOTED,
912 ARBITRARY);
913 }
914
915 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
916 std::runtime_error,
917 "Did not get the expected number of non-zero vals");
918
919 // Get the SLU data type for this type of matrix
920 SLUD::Dtype_t dtype = type_map::dtype;
921
922 if( in_grid_ ){
923 function_map::create_CompRowLoc_Matrix(&(data_.A),
924 g_rows, g_cols,
925 l_nnz, l_rows, fst_global_row,
926 nzvals_view_.data(),
927 colind_view_.data(),
928 rowptr_view_.data(),
929 SLUD::SLU_NR_loc,
930 dtype, SLUD::SLU_GE);
931 }
932
933 return true;
934}
935
936
937 template<class Matrix, class Vector>
938 const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
939
940
941} // end namespace Amesos2
942
943#endif // AMESOS2_SUPERLUDIST_DEF_HPP
Provides definition of SuperLU_DIST types as well as conversions and type traits.
@ ROOTED
Definition: Amesos2_TypeDecl.hpp:127
@ ARBITRARY
Definition: Amesos2_TypeDecl.hpp:143
Utility functions for Amesos2.
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition: Amesos2_SolverCore_decl.hpp:106
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
The LHS operator.
Definition: Amesos2_SolverCore_decl.hpp:455
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Set/update internal variables and solver options.
Definition: Amesos2_SolverCore_def.hpp:518
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a const parameter list of all of the valid parameters that this->setParameterList(....
Definition: Amesos2_SolverCore_def.hpp:550
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:479
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns a pointer to the Teuchos::Comm communicator with this operator.
Definition: Amesos2_SolverCore_decl.hpp:363
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:476
Amesos2 interface to the distributed memory version of SuperLU.
Definition: Amesos2_Superludist_decl.hpp:91
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition: Amesos2_Superludist_decl.hpp:333
bool in_grid_
true if this processor is in SuperLU_DISTS's 2D process grid
Definition: Amesos2_Superludist_decl.hpp:326
~Superludist()
Destructor.
Definition: Amesos2_Superludist_def.hpp:235
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superludist_def.hpp:690
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superludist_def.hpp:699
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition: Amesos2_Superludist_def.hpp:426
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition: Amesos2_Superludist_def.hpp:867
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superludist_def.hpp:328
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition: Amesos2_Superludist_def.hpp:526
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition: Amesos2_Superludist_def.hpp:842
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superludist_def.hpp:68
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition: Amesos2_Superludist_def.hpp:383
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superludist_def.hpp:772
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
Helper class for getting 1-D copies of multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:267
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:663
Helper class for putting 1-D data arrays into multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:373