41#ifndef TPETRA_DETAILS_RESIDUAL_HPP
42#define TPETRA_DETAILS_RESIDUAL_HPP
44#include "TpetraCore_config.h"
45#include "Tpetra_CrsMatrix.hpp"
46#include "Tpetra_LocalCrsMatrixOperator.hpp"
47#include "Tpetra_MultiVector.hpp"
48#include "Teuchos_RCP.hpp"
49#include "Teuchos_ScalarTraits.hpp"
52#include "KokkosSparse_spmv_impl.hpp"
70template<
class AMatrix,
class MV,
class ConstMV,
class Offsets,
bool is_MV,
bool restrictedMode,
bool skipOffRank>
73 using execution_space =
typename AMatrix::execution_space;
74 using LO =
typename AMatrix::non_const_ordinal_type;
75 using value_type =
typename AMatrix::non_const_value_type;
76 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
77 using team_member =
typename team_policy::member_type;
78 using ATV = Kokkos::ArithTraits<value_type>;
86 ConstMV X_domainmap_lcl;
90 const ConstMV& X_colmap_lcl_,
91 const ConstMV& B_lcl_,
93 const int rows_per_team_,
94 const Offsets& offsets_,
95 const ConstMV& X_domainmap_lcl_) :
97 X_colmap_lcl(X_colmap_lcl_),
100 rows_per_team(rows_per_team_),
102 X_domainmap_lcl(X_domainmap_lcl_)
105 KOKKOS_INLINE_FUNCTION
106 void operator() (
const team_member& dev)
const
109 Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (
const LO& loop) {
110 const LO lclRow =
static_cast<LO
> (dev.league_rank ()) * rows_per_team + loop;
112 if (lclRow >= A_lcl.numRows ()) {
118 value_type A_x = ATV::zero ();
120 if (!restrictedMode) {
121 const auto A_row = A_lcl.rowConst(lclRow);
122 const LO row_length =
static_cast<LO
> (A_row.length);
124 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, row_length), [&] (
const LO iEntry, value_type& lsum) {
125 const auto A_val = A_row.value(iEntry);
126 lsum += A_val * X_colmap_lcl(A_row.colidx(iEntry),0);
132 const LO offRankOffset = offsets(lclRow);
133 const size_t start = A_lcl.graph.row_map(lclRow);
134 const size_t end = A_lcl.graph.row_map(lclRow+1);
136 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, start, end), [&] (
const LO iEntry, value_type& lsum) {
137 const auto A_val = A_lcl.values(iEntry);
138 const auto lclCol = A_lcl.graph.entries(iEntry);
139 if (iEntry < offRankOffset)
140 lsum += A_val * X_domainmap_lcl(lclCol,0);
141 else if (!skipOffRank)
142 lsum += A_val * X_colmap_lcl(lclCol,0);
146 Kokkos::single(Kokkos::PerThread(dev),[&] () {
147 R_lcl(lclRow,0) = B_lcl(lclRow,0) - A_x;
152 const LO numVectors =
static_cast<LO
>(X_colmap_lcl.extent(1));
154 for(LO v=0; v<numVectors; v++) {
156 value_type A_x = ATV::zero ();
158 if (!restrictedMode) {
160 const auto A_row = A_lcl.rowConst(lclRow);
161 const LO row_length =
static_cast<LO
> (A_row.length);
163 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, row_length), [&] (
const LO iEntry, value_type& lsum) {
164 const auto A_val = A_row.value(iEntry);
165 lsum += A_val * X_colmap_lcl(A_row.colidx(iEntry),v);
169 const LO offRankOffset = offsets(lclRow);
170 const size_t start = A_lcl.graph.row_map(lclRow);
171 const size_t end = A_lcl.graph.row_map(lclRow+1);
173 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, start, end), [&] (
const LO iEntry, value_type& lsum) {
174 const auto A_val = A_lcl.values(iEntry);
175 const auto lclCol = A_lcl.graph.entries(iEntry);
176 if (iEntry < offRankOffset)
177 lsum += A_val * X_domainmap_lcl(lclCol,v);
178 else if (!skipOffRank)
179 lsum += A_val * X_colmap_lcl(lclCol,v);
183 Kokkos::single(Kokkos::PerThread(dev),[&] () {
184 R_lcl(lclRow,v) = B_lcl(lclRow,v) - A_x;
195template<
class AMatrix,
class MV,
class ConstMV,
class Offsets,
bool is_MV>
198 using execution_space =
typename AMatrix::execution_space;
199 using LO =
typename AMatrix::non_const_ordinal_type;
200 using value_type =
typename AMatrix::non_const_value_type;
201 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
202 using team_member =
typename team_policy::member_type;
203 using ATV = Kokkos::ArithTraits<value_type>;
206 ConstMV X_colmap_lcl;
213 const ConstMV& X_colmap_lcl_,
215 const int rows_per_team_,
216 const Offsets& offsets_) :
218 X_colmap_lcl(X_colmap_lcl_),
220 rows_per_team(rows_per_team_),
224 KOKKOS_INLINE_FUNCTION
225 void operator() (
const team_member& dev)
const
228 Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (
const LO& loop) {
229 const LO lclRow =
static_cast<LO
> (dev.league_rank ()) * rows_per_team + loop;
231 if (lclRow >= A_lcl.numRows ()) {
235 const LO offRankOffset = offsets(lclRow);
236 const size_t end = A_lcl.graph.row_map(lclRow+1);
240 value_type A_x = ATV::zero ();
242 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, offRankOffset, end), [&] (
const LO iEntry, value_type& lsum) {
243 const auto A_val = A_lcl.values(iEntry);
244 const auto lclCol = A_lcl.graph.entries(iEntry);
245 lsum += A_val * X_colmap_lcl(lclCol,0);
248 Kokkos::single(Kokkos::PerThread(dev),[&] () {
249 R_lcl(lclRow,0) -= A_x;
254 const LO numVectors =
static_cast<LO
>(X_colmap_lcl.extent(1));
256 for(LO v=0; v<numVectors; v++) {
258 value_type A_x = ATV::zero ();
260 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, offRankOffset, end), [&] (
const LO iEntry, value_type& lsum) {
261 const auto A_val = A_lcl.values(iEntry);
262 const auto lclCol = A_lcl.graph.entries(iEntry);
263 lsum += A_val * X_colmap_lcl(lclCol,v);
266 Kokkos::single(Kokkos::PerThread(dev),[&] () {
267 R_lcl(lclRow,v) -= A_x;
276template<
class SC,
class LO,
class GO,
class NO>
281 const Kokkos::View<const size_t*, typename NO::device_type>& offsets,
284 using Teuchos::NO_TRANS;
290 using offset_type = Kokkos::View<const size_t*, typename NO::device_type>;
293 const_local_view_device_type X_colmap_lcl = X_colmap.
getLocalViewDevice(Access::ReadOnly);
296 const_local_view_device_type X_domainmap_lcl;
300 TEUCHOS_TEST_FOR_EXCEPTION
302 "X.getNumVectors() = " << X_colmap.
getNumVectors () <<
" != "
304 TEUCHOS_TEST_FOR_EXCEPTION
306 A.
getColMap ()->getLocalNumElements (), std::runtime_error,
307 "X has the wrong number of local rows. "
309 "A.getColMap()->getLocalNumElements() = " <<
310 A.
getColMap ()->getLocalNumElements () <<
".");
311 TEUCHOS_TEST_FOR_EXCEPTION
313 A.
getRowMap ()->getLocalNumElements (), std::runtime_error,
314 "R has the wrong number of local rows. "
316 "A.getRowMap()->getLocalNumElements() = " <<
317 A.
getRowMap ()->getLocalNumElements () <<
".");
318 TEUCHOS_TEST_FOR_EXCEPTION
320 A.
getRowMap ()->getLocalNumElements (), std::runtime_error,
321 "B has the wrong number of local rows. "
323 "A.getRowMap()->getLocalNumElements() = " <<
324 A.
getRowMap ()->getLocalNumElements () <<
".");
326 TEUCHOS_TEST_FOR_EXCEPTION
327 (! A.
isFillComplete (), std::runtime_error,
"The matrix A is not "
328 "fill complete. You must call fillComplete() (possibly with "
329 "domain and range Map arguments) without an intervening "
330 "resumeFill() call before you may call this method.");
331 TEUCHOS_TEST_FOR_EXCEPTION
333 std::runtime_error,
"X, Y and B must be constant stride.");
336 TEUCHOS_TEST_FOR_EXCEPTION
337 ((X_colmap_lcl.data () == R_lcl.data () && X_colmap_lcl.data () !=
nullptr) ||
338 (X_colmap_lcl.data () == B_lcl.data () && X_colmap_lcl.data () !=
nullptr),
339 std::runtime_error,
"X, Y and R may not alias one another.");
342 SC one = Teuchos::ScalarTraits<SC>::one(), negone = -one, zero = Teuchos::ScalarTraits<SC>::zero();
343#ifdef TPETRA_DETAILS_USE_REFERENCE_RESIDUAL
346 A.
localApply(X_colmap,R,Teuchos::NO_TRANS, one, zero);
350 if (A_lcl.numRows() == 0) {
354 int64_t numLocalRows = A_lcl.numRows ();
355 int64_t myNnz = A_lcl.nnz();
359 LO maxRowImbalance = 0;
360 if(numLocalRows != 0)
367 lclOp->applyImbalancedRows (X_colmap_lcl, R_lcl, Teuchos::NO_TRANS, one, zero);
373 int vector_length = -1;
374 int64_t rows_per_thread = -1;
377 using policy_type =
typename Kokkos::TeamPolicy<execution_space>;
379 int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
380 int64_t worksets = (B_lcl.extent (0) + rows_per_team - 1) / rows_per_team;
382 policy_type policy (1, 1);
384 policy = policy_type (worksets, Kokkos::AUTO, vector_length);
387 policy = policy_type (worksets, team_size, vector_length);
390 bool is_vector = (X_colmap_lcl.extent(1) == 1);
394 if (X_domainmap ==
nullptr) {
396 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,false,false>;
397 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
398 Kokkos::parallel_for(
"residual-vector",policy,func);
403 X_domainmap_lcl = X_domainmap->getLocalViewDevice(Access::ReadOnly);
404 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,true,false>;
405 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
406 Kokkos::parallel_for(
"residual-vector",policy,func);
412 if (X_domainmap ==
nullptr) {
414 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,false,false>;
415 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
416 Kokkos::parallel_for(
"residual-multivector",policy,func);
421 X_domainmap_lcl = X_domainmap->getLocalViewDevice(Access::ReadOnly);
422 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,true,false>;
423 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
424 Kokkos::parallel_for(
"residual-multivector",policy,func);
432template<
class SC,
class LO,
class GO,
class NO>
433void localResidualWithCommCompOverlap(
const CrsMatrix<SC,LO,GO,NO> & A,
434 MultiVector<SC,LO,GO,NO> & X_colmap,
435 const MultiVector<SC,LO,GO,NO> & B,
436 MultiVector<SC,LO,GO,NO> & R,
437 const Kokkos::View<const size_t*, typename NO::device_type>& offsets,
438 const MultiVector<SC,LO,GO,NO> & X_domainmap) {
440 using Teuchos::NO_TRANS;
444 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localResidualWithCommCompOverlap");
447 using local_view_device_type =
typename MultiVector<SC,LO,GO,NO>::dual_view_type::t_dev::non_const_type;
448 using const_local_view_device_type =
typename MultiVector<SC,LO,GO,NO>::dual_view_type::t_dev::const_type;
449 using offset_type = Kokkos::View<const size_t*, typename NO::device_type>;
451 local_matrix_device_type A_lcl = A.getLocalMatrixDevice ();
452 const_local_view_device_type X_colmap_lcl = X_colmap.getLocalViewDevice(Access::ReadOnly);
453 const_local_view_device_type B_lcl = B.getLocalViewDevice(Access::ReadOnly);
454 local_view_device_type R_lcl = R.getLocalViewDevice(Access::OverwriteAll);
455 const_local_view_device_type X_domainmap_lcl = X_domainmap.getLocalViewDevice(Access::ReadOnly);;
459 TEUCHOS_TEST_FOR_EXCEPTION
460 (X_colmap.getNumVectors () != R.getNumVectors (), std::runtime_error,
461 "X.getNumVectors() = " << X_colmap.getNumVectors () <<
" != "
462 "R.getNumVectors() = " << R.getNumVectors () <<
".");
463 TEUCHOS_TEST_FOR_EXCEPTION
464 (X_colmap.getLocalLength () !=
465 A.getColMap ()->getLocalNumElements (), std::runtime_error,
466 "X has the wrong number of local rows. "
467 "X.getLocalLength() = " << X_colmap.getLocalLength () <<
" != "
468 "A.getColMap()->getLocalNumElements() = " <<
469 A.getColMap ()->getLocalNumElements () <<
".");
470 TEUCHOS_TEST_FOR_EXCEPTION
471 (R.getLocalLength () !=
472 A.getRowMap ()->getLocalNumElements (), std::runtime_error,
473 "R has the wrong number of local rows. "
474 "R.getLocalLength() = " << R.getLocalLength () <<
" != "
475 "A.getRowMap()->getLocalNumElements() = " <<
476 A.getRowMap ()->getLocalNumElements () <<
".");
477 TEUCHOS_TEST_FOR_EXCEPTION
478 (B.getLocalLength () !=
479 A.getRowMap ()->getLocalNumElements (), std::runtime_error,
480 "B has the wrong number of local rows. "
481 "B.getLocalLength() = " << B.getLocalLength () <<
" != "
482 "A.getRowMap()->getLocalNumElements() = " <<
483 A.getRowMap ()->getLocalNumElements () <<
".");
485 TEUCHOS_TEST_FOR_EXCEPTION
486 (! A.isFillComplete (), std::runtime_error,
"The matrix A is not "
487 "fill complete. You must call fillComplete() (possibly with "
488 "domain and range Map arguments) without an intervening "
489 "resumeFill() call before you may call this method.");
490 TEUCHOS_TEST_FOR_EXCEPTION
491 (! X_colmap.isConstantStride () || ! R.isConstantStride () || ! B.isConstantStride (),
492 std::runtime_error,
"X, Y and B must be constant stride.");
495 TEUCHOS_TEST_FOR_EXCEPTION
496 ((X_colmap_lcl.data () == R_lcl.data () && X_colmap_lcl.data () !=
nullptr) ||
497 (X_colmap_lcl.data () == B_lcl.data () && X_colmap_lcl.data () !=
nullptr),
498 std::runtime_error,
"X, Y and R may not alias one another.");
501 if (A_lcl.numRows() == 0) {
505 int64_t numLocalRows = A_lcl.numRows ();
506 int64_t myNnz = A_lcl.nnz();
524 int vector_length = -1;
525 int64_t rows_per_thread = -1;
528 using policy_type =
typename Kokkos::TeamPolicy<execution_space>;
530 int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
531 int64_t worksets = (B_lcl.extent (0) + rows_per_team - 1) / rows_per_team;
533 policy_type policy (1, 1);
535 policy = policy_type (worksets, Kokkos::AUTO, vector_length);
538 policy = policy_type (worksets, team_size, vector_length);
541 bool is_vector = (X_colmap_lcl.extent(1) == 1);
545 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,true,true>;
546 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
547 Kokkos::parallel_for(
"residual-vector",policy,func);
549 RCP<const import_type> importer = A.getGraph ()->getImporter ();
550 X_colmap.endImport (X_domainmap, *importer,
INSERT,
true);
554 using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false>;
555 functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
556 Kokkos::parallel_for(
"residual-vector-offrank",policy,func2);
561 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,true,true>;
562 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
563 Kokkos::parallel_for(
"residual-multivector",policy,func);
565 RCP<const import_type> importer = A.getGraph ()->getImporter ();
566 X_colmap.endImport (X_domainmap, *importer,
INSERT,
true);
570 using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true>;
571 functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
572 Kokkos::parallel_for(
"residual-vector-offrank",policy,func2);
579template<
class SC,
class LO,
class GO,
class NO>
587 using Teuchos::rcp_const_cast;
588 using Teuchos::rcpFromRef;
593 if (overlapCommunicationAndComputation)
594 TEUCHOS_ASSERT(skipCopyAndPermuteIfPossible);
601 bool restrictedMode =
false;
606 SC one = Teuchos::ScalarTraits<SC>::one(), negone = -one, zero = Teuchos::ScalarTraits<SC>::zero();
607 Aop.
apply(X_in,R_in,Teuchos::NO_TRANS, one, zero);
608 R_in.
update(one,B_in,negone);
617 using offset_type =
typename graph_type::offset_device_view_type;
620 const bool R_is_replicated =
629 RCP<const import_type> importer = A.
getGraph ()->getImporter ();
630 RCP<const export_type> exporter = A.
getGraph ()->getExporter ();
636 if (importer.is_null ()) {
651 X_colMap = rcp_const_cast<MV> (rcpFromRef (X_in) );
663 restrictedMode = skipCopyAndPermuteIfPossible && importer->isLocallyFitted();
665 if (debug && restrictedMode) {
666 TEUCHOS_TEST_FOR_EXCEPTION
667 (!importer->getTargetMap()->isLocallyFitted(*importer->getSourceMap()), std::runtime_error,
668 "Source map and target map are not locally fitted, but Tpetra::residual thinks they are.");
672 X_colMap->beginImport (X_in, *importer,
INSERT, restrictedMode);
683 if(exporter.is_null()) {
688 R_rowMap = rcpFromRef (R_in);
697 RCP<const MV> B_rowMap;
698 if(exporter.is_null()) {
704 B_rowMap = rcp_const_cast<const MV> (B_rowMapNonConst);
707 B_rowMap = rcpFromRef (B_in);
713 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::residual: B Import");
715 B_rowMapNonConst->doImport(B_in, *exporter,
ADD);
716 B_rowMap = rcp_const_cast<const MV> (B_rowMapNonConst);
724 if (! exporter.is_null ()) {
725 if ( ! importer.is_null ())
726 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
727 if (restrictedMode && !importer.is_null ())
728 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in);
730 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets);
733 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::residual: R Export");
742 if (overlapCommunicationAndComputation) {
743 localResidualWithCommCompOverlap (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, X_in);
745 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
746 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in);
749 if ( ! importer.is_null ())
750 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
751 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets);
768 if (R_is_replicated) {
769 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::residual: Reduce Y");
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
typename device_type::execution_space execution_space
The Kokkos execution space.
std::shared_ptr< local_multiply_op_type > getLocalMultiplyOperator() const
The local sparse matrix operator (a wrapper of getLocalMatrixDevice() that supports local matrix-vect...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
Import< LocalOrdinal, GlobalOrdinal, Node > import_type
The Import specialization suitable for this CrsMatrix specialization.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
static bool debug()
Whether Tpetra is in debug mode.
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row....
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
bool isDistributed() const
Whether this is a globally distributed object.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
One or more distributed dense vectors.
void reduce()
Sum values of a locally replicated multivector across all processes.
size_t getLocalLength() const
Local number of rows on the calling process.
size_t getNumVectors() const
Number of columns in the multivector.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
Abstract interface for operators (e.g., matrices and preconditioners).
virtual void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const =0
Computes the operator-multivector application.
Implementation details of Tpetra.
void residual(const Operator< SC, LO, GO, NO > &A, const MultiVector< SC, LO, GO, NO > &X, const MultiVector< SC, LO, GO, NO > &B, MultiVector< SC, LO, GO, NO > &R)
Computes R = B - A * X.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
@ INSERT
Insert new values that don't currently exist.
Functor for computing the residual.
Functor for computing R -= A_offRank*X_colmap.