Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_DenseContainer_def.hpp
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41*/
42
43#ifndef IFPACK2_DENSECONTAINER_DEF_HPP
44#define IFPACK2_DENSECONTAINER_DEF_HPP
45
46#include "Tpetra_CrsMatrix.hpp"
47#include "Teuchos_LAPACK.hpp"
48#include "Tpetra_BlockMultiVector.hpp"
49
50#ifdef HAVE_MPI
51# include <mpi.h>
52# include "Teuchos_DefaultMpiComm.hpp"
53#else
54# include "Teuchos_DefaultSerialComm.hpp"
55#endif // HAVE_MPI
56
57namespace Ifpack2 {
58
59template<class MatrixType, class LocalScalarType>
61DenseContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
62 const Teuchos::Array<Teuchos::Array<LO> >& partitions,
63 const Teuchos::RCP<const import_type>&,
64 bool pointIndexed) :
65 ContainerImpl<MatrixType, LocalScalarType> (matrix, partitions, pointIndexed),
66 scalarOffsets_ (this->numBlocks_)
67{
68 TEUCHOS_TEST_FOR_EXCEPTION(
69 !matrix->hasColMap(), std::invalid_argument, "Ifpack2::DenseContainer: "
70 "The constructor's input matrix must have a column Map.");
71
72 //compute scalarOffsets_
73 GO totalScalars = 0;
74 for(LO i = 0; i < this->numBlocks_; i++)
75 {
76 scalarOffsets_[i] = totalScalars;
77 totalScalars += this->blockSizes_[i] * this->scalarsPerRow_ *
78 this->blockSizes_[i] * this->scalarsPerRow_;
79 }
80 scalars_.resize(totalScalars);
81 for(int i = 0; i < this->numBlocks_; i++)
82 {
83 LO denseRows = this->blockSizes_[i] * this->scalarsPerRow_;
84 //create square dense matrix (stride is same as rows and cols)
85 diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[i], denseRows, denseRows, denseRows);
86 }
87
88 ipiv_.resize(this->blockRows_.size() * this->scalarsPerRow_);
89}
90
91template<class MatrixType, class LocalScalarType>
94
95template<class MatrixType, class LocalScalarType>
96void
99{
100 // Fill the diagonal block and LU permutation array with zeros.
101 for(int i = 0; i < this->numBlocks_; i++)
102 diagBlocks_[i].putScalar(Teuchos::ScalarTraits<LSC>::zero());
103 std::fill (ipiv_.begin (), ipiv_.end (), 0);
104
105 this->IsInitialized_ = true;
106 // We assume that if you called this method, you intend to recompute
107 // everything.
108 this->IsComputed_ = false;
109}
110
111template<class MatrixType, class LocalScalarType>
112void
114compute ()
115{
116// FIXME: I am commenting this out because it breaks block CRS support
117// TEUCHOS_TEST_FOR_EXCEPTION(
118// static_cast<size_t> (ipiv_.size ()) != numRows_, std::logic_error,
119// "Ifpack2::DenseContainer::compute: ipiv_ array has the wrong size. "
120// "Please report this bug to the Ifpack2 developers.");
121
122 this->IsComputed_ = false;
123 if (!this->isInitialized ()) {
124 this->initialize();
125 }
126
127 extract (); // Extract the submatrices
128 factor (); // Factor them
129 this->IsComputed_ = true;
130}
131
132template<class MatrixType, class LocalScalarType>
134{
135 using Teuchos::Array;
136 using Teuchos::ArrayView;
137 using STS = Teuchos::ScalarTraits<SC>;
138 SC zero = STS::zero();
139 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
140 //To extract diagonal blocks, need to translate local rows to local columns.
141 //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
142 //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
143 //offset - blockOffsets_[b]: gives the column within block b.
144 //
145 //This provides the block and col within a block in O(1).
146 if(this->scalarsPerRow_ > 1)
147 {
148 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
149 for(int i = 0; i < this->numBlocks_; i++)
150 {
151 //Get the interval where block i is defined in blockRows_
152 LO blockStart = this->blockOffsets_[i];
153 LO blockEnd = blockStart + this->blockSizes_[i];
154 ArrayView<const LO> blockRows = this->getBlockRows(i);
155 //Set the lookup table entries for the columns appearing in block i.
156 //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
157 //this is OK. The values updated here are only needed to process block i's entries.
158 for(size_t j = 0; j < size_t(blockRows.size()); j++)
159 {
160 LO localCol = this->translateRowToCol(blockRows[j]);
161 colToBlockOffset[localCol] = blockStart + j;
162 }
163 using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
164 using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
165 for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
166 {
167 //get a raw view of the whole block row
168 h_inds_type indices;
169 h_vals_type values;
170 LO inputRow = this->blockRows_[blockStart + blockRow];
171 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
172 LO numEntries = (LO) indices.size();
173 for(LO k = 0; k < numEntries; k++)
174 {
175 LO colOffset = colToBlockOffset[indices[k]];
176 if(blockStart <= colOffset && colOffset < blockEnd)
177 {
178 //This entry does appear in the diagonal block.
179 //(br, bc) identifies the scalar's position in the BlockCrs block.
180 //Convert this to (r, c) which is its position in the container block.
181 LO blockCol = colOffset - blockStart;
182 for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
183 {
184 for(LO br = 0; br < this->bcrsBlockSize_; br++)
185 {
186 LO r = this->bcrsBlockSize_ * blockRow + br;
187 LO c = this->bcrsBlockSize_ * blockCol + bc;
188 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
189 if(val != zero)
190 diagBlocks_[i](r, c) = val;
191 }
192 }
193 }
194 }
195 }
196 }
197 }
198 else
199 {
200 //get the mapping from point-indexed matrix columns to offsets in blockRows_
201 //(this includes regular CrsMatrix columns, in which case bcrsBlockSize_ == 1)
202 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
203 for(int i = 0; i < this->numBlocks_; i++)
204 {
205 //Get the interval where block i is defined in blockRows_
206 LO blockStart = this->blockOffsets_[i];
207 LO blockEnd = blockStart + this->blockSizes_[i];
208 ArrayView<const LO> blockRows = this->getBlockRows(i);
209 //Set the lookup table entries for the columns appearing in block i.
210 //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
211 //this is OK. The values updated here are only needed to process block i's entries.
212 for(size_t j = 0; j < size_t(blockRows.size()); j++)
213 {
214 //translateRowToCol will return the corresponding split column
215 LO localCol = this->translateRowToCol(blockRows[j]);
216 colToBlockOffset[localCol] = blockStart + j;
217 }
218 for(size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++)
219 {
220 //get a view of the split row
221 LO inputPointRow = this->blockRows_[blockStart + blockRow];
222 auto rowView = this->getInputRowView(inputPointRow);
223 for(size_t k = 0; k < rowView.size(); k++)
224 {
225 LO colOffset = colToBlockOffset[rowView.ind(k)];
226 if(blockStart <= colOffset && colOffset < blockEnd)
227 {
228 LO blockCol = colOffset - blockStart;
229 auto val = rowView.val(k);
230 if(val != zero)
231 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
232 }
233 }
234 }
235 }
236 }
237}
238
239
240template<class MatrixType, class LocalScalarType>
241void
242DenseContainer<MatrixType, LocalScalarType>::
243factor ()
244{
245 Teuchos::LAPACK<int, LSC> lapack;
246 for(int i = 0; i < this->numBlocks_; i++)
247 {
248 int INFO = 0;
249 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
250 lapack.GETRF(diagBlocks_[i].numRows(),
251 diagBlocks_[i].numCols(),
252 diagBlocks_[i].values(),
253 diagBlocks_[i].stride(),
254 blockIpiv, &INFO);
255 // INFO < 0 is a bug.
256 TEUCHOS_TEST_FOR_EXCEPTION(
257 INFO < 0, std::logic_error, "Ifpack2::DenseContainer::factor: "
258 "LAPACK's _GETRF (LU factorization with partial pivoting) was called "
259 "incorrectly. INFO = " << INFO << " < 0. "
260 "Please report this bug to the Ifpack2 developers.");
261 // INFO > 0 means the matrix is singular. This is probably an issue
262 // either with the choice of rows the rows we extracted, or with the
263 // input matrix itself.
264 TEUCHOS_TEST_FOR_EXCEPTION(
265 INFO > 0, std::runtime_error, "Ifpack2::DenseContainer::factor: "
266 "LAPACK's _GETRF (LU factorization with partial pivoting) reports that the "
267 "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") "
268 "(one-based index i) is exactly zero. This probably means that the input "
269 "matrix has a singular diagonal block.");
270 }
271}
272
273template<class MatrixType, class LocalScalarType>
274void
275DenseContainer<MatrixType, LocalScalarType>::
276solveBlock(ConstHostSubviewLocal X,
277 HostSubviewLocal Y,
278 int blockIndex,
279 Teuchos::ETransp mode,
280 LSC alpha,
281 LSC beta) const
282{
283 #ifdef HAVE_IFPACK2_DEBUG
284 TEUCHOS_TEST_FOR_EXCEPTION(
285 X.extent (0) != Y.extent (0),
286 std::logic_error, "Ifpack2::DenseContainer::solveBlock: X and Y have "
287 "incompatible dimensions (" << X.extent (0) << " resp. "
288 << Y.extent (0) << "). Please report this bug to "
289 "the Ifpack2 developers.");
290
291 TEUCHOS_TEST_FOR_EXCEPTION(
292 X.extent (1) != Y.extent(1),
293 std::logic_error, "Ifpack2::DenseContainer::solveBlock: X and Y have "
294 "incompatible numbers of vectors (" << X.extent (1) << " resp. "
295 << Y.extent (1) << "). Please report this bug to "
296 "the Ifpack2 developers.");
297 #endif
298
299 typedef Teuchos::ScalarTraits<LSC> STS;
300 size_t numRows = X.extent(0);
301 size_t numVecs = X.extent(1);
302 if(alpha == STS::zero()) { // don't need to solve the linear system
303 if(beta == STS::zero()) {
304 // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
305 // any Inf or NaN values in Y (rather than multiplying them by
306 // zero, resulting in NaN values).
307 for(size_t j = 0; j < numVecs; j++)
308 {
309 for(size_t i = 0; i < numRows; i++)
310 Y(i, j) = STS::zero();
311 }
312 }
313 else // beta != 0
314 for(size_t j = 0; j < numVecs; j++)
315 {
316 for(size_t i = 0; i < numRows; i++)
317 Y(i, j) = beta * Y(i, j);
318 }
319 }
320 else { // alpha != 0; must solve the linear system
321 Teuchos::LAPACK<int, LSC> lapack;
322 // If beta is nonzero or Y is not constant stride, we have to use
323 // a temporary output multivector. It gets a (deep) copy of X,
324 // since GETRS overwrites its (multi)vector input with its output.
325 std::vector<LSC> yTemp(numVecs * numRows);
326 for(size_t j = 0; j < numVecs; j++)
327 {
328 for(size_t i = 0; i < numRows; i++)
329 yTemp[j * numRows + i] = X(i, j);
330 }
331
332 int INFO = 0;
333 int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
334 const char trans =
335 (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
336 lapack.GETRS (trans,
337 diagBlocks_[blockIndex].numRows (),
338 numVecs,
339 diagBlocks_[blockIndex].values (),
340 diagBlocks_[blockIndex].stride (),
341 blockIpiv,
342 yTemp.data(),
343 numRows,
344 &INFO);
345
346 if (beta != STS::zero ()) {
347 for(size_t j = 0; j < numVecs; j++)
348 {
349 for(size_t i = 0; i < numRows; i++)
350 {
351 Y(i, j) *= ISC(beta);
352 Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
353 }
354 }
355 }
356 else {
357 for(size_t j = 0; j < numVecs; j++)
358 {
359 for(size_t i = 0; i < numRows; i++)
360 Y(i, j) = ISC(yTemp[j * numRows + i]);
361 }
362 }
363
364 TEUCHOS_TEST_FOR_EXCEPTION(
365 INFO != 0, std::runtime_error, "Ifpack2::DenseContainer::solveBlock: "
366 "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
367 "failed with INFO = " << INFO << " != 0.");
368 }
369}
370
371template<class MatrixType, class LocalScalarType>
372std::ostream&
374print (std::ostream& os) const
375{
376 Teuchos::FancyOStream fos (Teuchos::rcpFromRef (os));
377 fos.setOutputToRootOnly (0);
378 this->describe (fos);
379 return os;
380}
381
382template<class MatrixType, class LocalScalarType>
383std::string
385description () const
386{
387 std::ostringstream oss;
388 oss << "Ifpack::DenseContainer: ";
389 if (this->isInitialized()) {
390 if (this->isComputed()) {
391 oss << "{status = initialized, computed";
392 }
393 else {
394 oss << "{status = initialized, not computed";
395 }
396 }
397 else {
398 oss << "{status = not initialized, not computed";
399 }
400
401 oss << "}";
402 return oss.str();
403}
404
405template<class MatrixType, class LocalScalarType>
406void
408describe (Teuchos::FancyOStream& os,
409 const Teuchos::EVerbosityLevel verbLevel) const
410{
411 using std::endl;
412 if(verbLevel==Teuchos::VERB_NONE) return;
413 os << "================================================================================" << endl;
414 os << "Ifpack2::DenseContainer" << endl;
415 for(int i = 0; i < this->numBlocks_; i++)
416 {
417 os << "Block " << i << " number of rows = " << this->blockSizes_[i] << endl;
418 }
419 os << "isInitialized() = " << this->IsInitialized_ << endl;
420 os << "isComputed() = " << this->IsComputed_ << endl;
421 os << "================================================================================" << endl;
422 os << endl;
423}
424
425template<class MatrixType, class LocalScalarType>
427{
428 diagBlocks_.clear();
429 scalars_.clear();
431}
432
433template<class MatrixType, class LocalScalarType>
435{
436 return "Dense";
437}
438
439} // namespace Ifpack2
440
441// There's no need to instantiate for CrsMatrix too. All Ifpack2
442// preconditioners can and should do dynamic casts if they need a type
443// more specific than RowMatrix.
444
445#define IFPACK2_DENSECONTAINER_INSTANT(S,LO,GO,N) \
446 template class Ifpack2::DenseContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
447
448#endif // IFPACK2_DENSECONTAINER_DEF_HPP
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS)....
Definition: Ifpack2_Container_decl.hpp:344
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container_decl.hpp:292
LO scalarsPerRow_
Definition: Ifpack2_Container_decl.hpp:317
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition: Ifpack2_Container_decl.hpp:296
Teuchos::Array< LO > blockRows_
Local indices of the rows of the input matrix that belong to this block.
Definition: Ifpack2_Container_decl.hpp:294
Store and solve a local dense linear problem.
Definition: Ifpack2_DenseContainer_decl.hpp:105
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_DenseContainer_def.hpp:385
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_DenseContainer_def.hpp:408
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_DenseContainer_def.hpp:434
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_DenseContainer_def.hpp:374
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition: Ifpack2_DenseContainer_def.hpp:114
DenseContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_DenseContainer_def.hpp:61
virtual ~DenseContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_DenseContainer_def.hpp:93
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_DenseContainer_def.hpp:98
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:74