40#ifndef TPETRA_DETAILS_DISTRIBUTOR_ACTOR_HPP
41#define TPETRA_DETAILS_DISTRIBUTOR_ACTOR_HPP
43#include "Tpetra_Details_DistributorPlan.hpp"
46#include "Teuchos_Array.hpp"
47#include "Teuchos_Comm.hpp"
49#include "Teuchos_RCP.hpp"
50#include "Teuchos_Time.hpp"
52#include "Kokkos_TeuchosCommAdapters.hpp"
61template <
class View1,
class View2>
62constexpr bool areKokkosViews = Kokkos::is_view<View1>::value && Kokkos::is_view<View2>::value;
64class DistributorActor {
65 static constexpr int DEFAULT_MPI_TAG = 1;
69 DistributorActor(
const DistributorActor& otherActor);
71 template <
class ExpView,
class ImpView>
72 void doPostsAndWaits(
const DistributorPlan& plan,
73 const ExpView &exports,
75 const ImpView &imports);
77 template <
class ExpView,
class ImpView>
78 void doPostsAndWaits(
const DistributorPlan& plan,
79 const ExpView &exports,
80 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
81 const ImpView &imports,
82 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
84 template <
class ExpView,
class ImpView>
85 void doPosts(
const DistributorPlan& plan,
86 const ExpView& exports,
88 const ImpView& imports);
90 template <
class ExpView,
class ImpView>
91 void doPosts(
const DistributorPlan& plan,
92 const ExpView &exports,
93 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
94 const ImpView &imports,
95 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
97 void doWaits(
const DistributorPlan& plan);
104 Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest<int>>> requests_;
106#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
107 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_;
108 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_;
109 Teuchos::RCP<Teuchos::Time> timer_doWaits_;
110 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_recvs_;
111 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_recvs_;
112 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_barrier_;
113 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_barrier_;
114 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_sends_;
115 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_sends_;
116 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_sends_slow_;
117 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_sends_slow_;
118 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_sends_fast_;
119 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_sends_fast_;
126template <
class ExpView,
class ImpView>
127void DistributorActor::doPostsAndWaits(
const DistributorPlan& plan,
128 const ExpView& exports,
130 const ImpView& imports)
132 static_assert(areKokkosViews<ExpView, ImpView>,
133 "Data arrays for DistributorActor::doPostsAndWaits must be Kokkos::Views");
134 doPosts(plan, exports, numPackets, imports);
138template <
class ExpView,
class ImpView>
139void DistributorActor::doPostsAndWaits(
const DistributorPlan& plan,
140 const ExpView& exports,
141 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
142 const ImpView& imports,
143 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
145 static_assert(areKokkosViews<ExpView, ImpView>,
146 "Data arrays for DistributorActor::doPostsAndWaits must be Kokkos::Views");
147 doPosts(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID);
151template <
typename ViewType>
152using HostAccessibility = Kokkos::SpaceAccessibility<Kokkos::DefaultHostExecutionSpace, typename ViewType::memory_space>;
154template <
typename DstViewType,
typename SrcViewType>
155using enableIfHostAccessible = std::enable_if_t<HostAccessibility<DstViewType>::accessible &&
156 HostAccessibility<SrcViewType>::accessible>;
158template <
typename DstViewType,
typename SrcViewType>
159using enableIfNotHostAccessible = std::enable_if_t<!HostAccessibility<DstViewType>::accessible ||
160 !HostAccessibility<SrcViewType>::accessible>;
162template <
typename DstViewType,
typename SrcViewType>
163enableIfHostAccessible<DstViewType, SrcViewType>
164packOffset(
const DstViewType& dst,
165 const SrcViewType& src,
166 const size_t dst_offset,
167 const size_t src_offset,
170 memcpy(dst.data()+dst_offset, src.data()+src_offset, size*
sizeof(
typename DstViewType::value_type));
173template <
typename DstViewType,
typename SrcViewType>
174enableIfNotHostAccessible<DstViewType, SrcViewType>
175packOffset(
const DstViewType& dst,
176 const SrcViewType& src,
177 const size_t dst_offset,
178 const size_t src_offset,
181 Kokkos::Compat::deep_copy_offset(dst, src, dst_offset, src_offset, size);
184template <
class ExpView,
class ImpView>
185void DistributorActor::doPosts(
const DistributorPlan& plan,
186 const ExpView& exports,
188 const ImpView& imports)
190 static_assert(areKokkosViews<ExpView, ImpView>,
191 "Data arrays for DistributorActor::doPosts must be Kokkos::Views");
192 using Teuchos::Array;
194 using Teuchos::FancyOStream;
195 using Teuchos::includesVerbLevel;
196 using Teuchos::ireceive;
197 using Teuchos::isend;
199 using Teuchos::TypeNameTraits;
200 using Teuchos::typeName;
202 using Kokkos::Compat::create_const_view;
203 using Kokkos::Compat::create_view;
204 using Kokkos::Compat::subview_offset;
205 using Kokkos::Compat::deep_copy_offset;
206 typedef Array<size_t>::size_type size_type;
207 typedef ExpView exports_view_type;
208 typedef ImpView imports_view_type;
210#ifdef KOKKOS_ENABLE_CUDA
212 (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
213 ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
214 "Please do not use Tpetra::Distributor with UVM allocations. "
215 "See Trilinos GitHub issue #1088.");
218#ifdef KOKKOS_ENABLE_SYCL
220 (! std::is_same<typename ExpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value &&
221 ! std::is_same<typename ImpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value,
222 "Please do not use Tpetra::Distributor with SharedUSM allocations. "
223 "See Trilinos GitHub issue #1088 (corresponding to CUDA).");
226#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
227 Teuchos::TimeMonitor timeMon (*timer_doPosts3KV_);
230 const int myRank = plan.getComm()->getRank ();
237 if (sendType == Details::DISTRIBUTOR_ALLTOALL) {
238#ifdef HAVE_TPETRA_MPI
239 TEUCHOS_TEST_FOR_EXCEPTION(!plan.getIndicesTo().is_null(),
241 "Send Type=\"Alltoall\" only works for fast-path communication.");
243 auto comm = plan.getComm();
244 std::vector<int> sendcounts (comm->getSize(), 0);
245 std::vector<int> sdispls (comm->getSize(), 0);
246 std::vector<int> recvcounts (comm->getSize(), 0);
247 std::vector<int> rdispls (comm->getSize(), 0);
249 size_t numBlocks = plan.getNumSends() + plan.hasSelfMessage();
250 for (
size_t p = 0; p < numBlocks; ++p) {
251 sdispls[plan.getProcsTo()[p]] = plan.getStartsTo()[p] * numPackets;
252 sendcounts[plan.getProcsTo()[p]] = plan.getLengthsTo()[p] * numPackets;
255 const size_type actualNumReceives = as<size_type> (plan.getNumReceives()) +
256 as<size_type> (plan.hasSelfMessage() ? 1 : 0);
257 size_t curBufferOffset = 0;
258 for (size_type i = 0; i < actualNumReceives; ++i) {
259 const size_t curBufLen = plan.getLengthsFrom()[i] * numPackets;
260 TEUCHOS_TEST_FOR_EXCEPTION(
261 curBufferOffset + curBufLen >
static_cast<size_t> (imports.size ()),
262 std::logic_error,
"Tpetra::Distributor::doPosts(3 args, Kokkos): "
263 "Exceeded size of 'imports' array in packing loop on Process " <<
264 myRank <<
". imports.size() = " << imports.size () <<
" < "
265 "curBufferOffset(" << curBufferOffset <<
") + curBufLen(" <<
267 rdispls [plan.getProcsFrom()[i]] = curBufferOffset;
268 recvcounts[plan.getProcsFrom()[i]] = curBufLen;
269 curBufferOffset += curBufLen;
272 Teuchos::RCP<const Teuchos::MpiComm<int> > mpiComm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
273 Teuchos::RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawComm = mpiComm->getRawMpiComm();
274 using T =
typename exports_view_type::non_const_value_type;
276 MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits<T>::getType (t);
277 const int err = MPI_Alltoallv(exports.data(), sendcounts.data(), sdispls.data(), rawType,
278 imports.data(), recvcounts.data(), rdispls.data(), rawType,
281 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
282 "MPI_Alltoallv failed with error \""
283 << Teuchos::mpiErrorCodeToString (err) <<
"\".");
287 if (plan.hasSelfMessage()) {
295 size_t selfReceiveOffset = 0;
296 deep_copy_offset(imports, exports, selfReceiveOffset,
297 plan.getStartsTo()[0]*numPackets,
298 plan.getLengthsTo()[0]*numPackets);
303 size_t selfReceiveOffset = 0;
305#ifdef HAVE_TPETRA_DEBUG
306 TEUCHOS_TEST_FOR_EXCEPTION
307 (requests_.size () != 0,
309 "Tpetra::Distributor::doPosts(3 args, Kokkos): Process "
310 << myRank <<
": requests_.size() = " << requests_.size () <<
" != 0.");
326 const size_type actualNumReceives = as<size_type> (plan.getNumReceives()) +
327 as<size_type> (plan.hasSelfMessage() ? 1 : 0);
328 requests_.resize (0);
336#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
337 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3KV_recvs_);
340 size_t curBufferOffset = 0;
341 for (size_type i = 0; i < actualNumReceives; ++i) {
342 const size_t curBufLen = plan.getLengthsFrom()[i] * numPackets;
343 if (plan.getProcsFrom()[i] != myRank) {
351 TEUCHOS_TEST_FOR_EXCEPTION(
352 curBufferOffset + curBufLen >
static_cast<size_t> (imports.size ()),
353 std::logic_error,
"Tpetra::Distributor::doPosts(3 args, Kokkos): "
354 "Exceeded size of 'imports' array in packing loop on Process " <<
355 myRank <<
". imports.size() = " << imports.size () <<
" < "
356 "curBufferOffset(" << curBufferOffset <<
") + curBufLen(" <<
358 imports_view_type recvBuf =
359 subview_offset (imports, curBufferOffset, curBufLen);
360 requests_.push_back (ireceive<int> (recvBuf, plan.getProcsFrom()[i],
361 mpiTag_, *plan.getComm()));
364 selfReceiveOffset = curBufferOffset;
366 curBufferOffset += curBufLen;
370#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
371 Teuchos::TimeMonitor timeMonSends (*timer_doPosts3KV_sends_);
379 size_t numBlocks = plan.getNumSends() + plan.hasSelfMessage();
380 size_t procIndex = 0;
381 while ((procIndex < numBlocks) && (plan.getProcsTo()[procIndex] < myRank)) {
384 if (procIndex == numBlocks) {
389 size_t selfIndex = 0;
391 if (plan.getIndicesTo().is_null()) {
393#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
394 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts3KV_sends_fast_);
399 for (
size_t i = 0; i < numBlocks; ++i) {
400 size_t p = i + procIndex;
401 if (p > (numBlocks - 1)) {
405 if (plan.getProcsTo()[p] != myRank) {
406 exports_view_type tmpSend = subview_offset(
407 exports, plan.getStartsTo()[p]*numPackets, plan.getLengthsTo()[p]*numPackets);
409 if (sendType == Details::DISTRIBUTOR_ISEND) {
413 exports_view_type tmpSendBuf =
414 subview_offset (exports, plan.getStartsTo()[p] * numPackets,
415 plan.getLengthsTo()[p] * numPackets);
416 requests_.push_back (isend<int> (tmpSendBuf, plan.getProcsTo()[p],
417 mpiTag_, *plan.getComm()));
421 as<int> (tmpSend.size ()),
422 plan.getProcsTo()[p], mpiTag_, *plan.getComm());
430 if (plan.hasSelfMessage()) {
438 deep_copy_offset(imports, exports, selfReceiveOffset,
439 plan.getStartsTo()[selfNum]*numPackets,
440 plan.getLengthsTo()[selfNum]*numPackets);
446#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
447 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts3KV_sends_slow_);
450 typedef typename ExpView::non_const_value_type Packet;
451 typedef typename ExpView::array_layout Layout;
452 typedef typename ExpView::device_type Device;
453 typedef typename ExpView::memory_traits Mem;
463#ifdef HAVE_TPETRA_DEBUG
464 if (sendType != Details::DISTRIBUTOR_SEND) {
465 if (plan.getComm()->getRank() == 0)
466 std::cout <<
"The requested Tpetra send type "
468 <<
" requires Distributor data to be ordered by"
469 <<
" the receiving processor rank. Since these"
470 <<
" data are not ordered, Tpetra will use Send"
471 <<
" instead." << std::endl;
475 Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray",
476 plan.getMaxSendLength() * numPackets);
478 for (
size_t i = 0; i < numBlocks; ++i) {
479 size_t p = i + procIndex;
480 if (p > (numBlocks - 1)) {
484 if (plan.getProcsTo()[p] != myRank) {
485 size_t sendArrayOffset = 0;
486 size_t j = plan.getStartsTo()[p];
487 for (
size_t k = 0; k < plan.getLengthsTo()[p]; ++k, ++j) {
488 packOffset(sendArray, exports, sendArrayOffset, plan.getIndicesTo()[j]*numPackets, numPackets);
489 sendArrayOffset += numPackets;
491 typename ExpView::execution_space().fence();
494 subview_offset(sendArray,
size_t(0), plan.getLengthsTo()[p]*numPackets);
497 as<int> (tmpSend.size ()),
498 plan.getProcsTo()[p], mpiTag_, *plan.getComm());
502 selfIndex = plan.getStartsTo()[p];
506 if (plan.hasSelfMessage()) {
507 for (
size_t k = 0; k < plan.getLengthsTo()[selfNum]; ++k) {
508 packOffset(imports, exports, selfReceiveOffset, plan.getIndicesTo()[selfIndex]*numPackets, numPackets);
510 selfReceiveOffset += numPackets;
516template <
class ExpView,
class ImpView>
517void DistributorActor::doPosts(
const DistributorPlan& plan,
518 const ExpView &exports,
519 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
520 const ImpView &imports,
521 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
523 static_assert(areKokkosViews<ExpView, ImpView>,
524 "Data arrays for DistributorActor::doPosts must be Kokkos::Views");
525 using Teuchos::Array;
527 using Teuchos::ireceive;
528 using Teuchos::isend;
530 using Teuchos::TypeNameTraits;
532 using Kokkos::Compat::create_const_view;
533 using Kokkos::Compat::create_view;
534 using Kokkos::Compat::subview_offset;
535 using Kokkos::Compat::deep_copy_offset;
536 typedef Array<size_t>::size_type size_type;
537 typedef ExpView exports_view_type;
538 typedef ImpView imports_view_type;
540#ifdef KOKKOS_ENABLE_CUDA
541 static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
542 ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
543 "Please do not use Tpetra::Distributor with UVM "
544 "allocations. See GitHub issue #1088.");
547#ifdef KOKKOS_ENABLE_SYCL
548 static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value &&
549 ! std::is_same<typename ImpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value,
550 "Please do not use Tpetra::Distributor with SharedUSM "
551 "allocations. See GitHub issue #1088 (corresponding to CUDA).");
554#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
555 Teuchos::TimeMonitor timeMon (*timer_doPosts4KV_);
564 if (sendType == Details::DISTRIBUTOR_ALLTOALL) {
565#ifdef HAVE_TPETRA_MPI
566 TEUCHOS_TEST_FOR_EXCEPTION(!plan.getIndicesTo().is_null(),
568 "Send Type=\"Alltoall\" only works for fast-path communication.");
570 auto comm = plan.getComm();
571 std::vector<int> sendcounts (comm->getSize(), 0);
572 std::vector<int> sdispls (comm->getSize(), 0);
573 std::vector<int> recvcounts (comm->getSize(), 0);
574 std::vector<int> rdispls (comm->getSize(), 0);
576 size_t curPKToffset = 0;
577 for (
size_t pp=0; pp<plan.getNumSends(); ++pp) {
578 sdispls[plan.getProcsTo()[pp]] = curPKToffset;
579 size_t numPackets = 0;
580 for (
size_t j=plan.getStartsTo()[pp]; j<plan.getStartsTo()[pp]+plan.getLengthsTo()[pp]; ++j) {
581 numPackets += numExportPacketsPerLID[j];
583 sendcounts[plan.getProcsTo()[pp]] = numPackets;
584 curPKToffset += numPackets;
587 const size_type actualNumReceives = as<size_type> (plan.getNumReceives()) +
588 as<size_type> (plan.hasSelfMessage() ? 1 : 0);
590 size_t curBufferOffset = 0;
591 size_t curLIDoffset = 0;
592 for (size_type i = 0; i < actualNumReceives; ++i) {
593 size_t totalPacketsFrom_i = 0;
594 for (
size_t j = 0; j < plan.getLengthsFrom()[i]; ++j) {
595 totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
597 curLIDoffset += plan.getLengthsFrom()[i];
599 rdispls [plan.getProcsFrom()[i]] = curBufferOffset;
600 recvcounts[plan.getProcsFrom()[i]] = totalPacketsFrom_i;
601 curBufferOffset += totalPacketsFrom_i;
604 Teuchos::RCP<const Teuchos::MpiComm<int> > mpiComm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
605 Teuchos::RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawComm = mpiComm->getRawMpiComm();
606 using T =
typename exports_view_type::non_const_value_type;
608 MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits<T>::getType (t);
609 const int err = MPI_Alltoallv(exports.data(), sendcounts.data(), sdispls.data(), rawType,
610 imports.data(), recvcounts.data(), rdispls.data(), rawType,
613 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
614 "MPI_Alltoallv failed with error \""
615 << Teuchos::mpiErrorCodeToString (err) <<
"\".");
619 if (plan.hasSelfMessage()) {
621 size_t selfReceiveOffset = 0;
625 Array<size_t> sendPacketOffsets(plan.getNumSends(),0), packetsPerSend(plan.getNumSends(),0);
626 size_t maxNumPackets = 0;
627 size_t curPKToffset = 0;
628 for (
size_t pp=0; pp<plan.getNumSends(); ++pp) {
629 sendPacketOffsets[pp] = curPKToffset;
630 size_t numPackets = 0;
631 for (
size_t j=plan.getStartsTo()[pp]; j<plan.getStartsTo()[pp]+plan.getLengthsTo()[pp]; ++j) {
632 numPackets += numExportPacketsPerLID[j];
634 if (numPackets > maxNumPackets) maxNumPackets = numPackets;
635 packetsPerSend[pp] = numPackets;
636 curPKToffset += numPackets;
639 deep_copy_offset(imports, exports, selfReceiveOffset,
640 sendPacketOffsets[0], packetsPerSend[0]);
645 const int myProcID = plan.getComm()->getRank ();
646 size_t selfReceiveOffset = 0;
648#ifdef HAVE_TPETRA_DEBUG
650 size_t totalNumImportPackets = 0;
651 for (size_type ii = 0; ii < numImportPacketsPerLID.size (); ++ii) {
652 totalNumImportPackets += numImportPacketsPerLID[ii];
654 TEUCHOS_TEST_FOR_EXCEPTION(
655 imports.extent (0) < totalNumImportPackets, std::runtime_error,
656 "Tpetra::Distributor::doPosts(4 args, Kokkos): The 'imports' array must have "
657 "enough entries to hold the expected number of import packets. "
658 "imports.extent(0) = " << imports.extent (0) <<
" < "
659 "totalNumImportPackets = " << totalNumImportPackets <<
".");
660 TEUCHOS_TEST_FOR_EXCEPTION
661 (requests_.size () != 0, std::logic_error,
"Tpetra::Distributor::"
662 "doPosts(4 args, Kokkos): Process " << myProcID <<
": requests_.size () = "
663 << requests_.size () <<
" != 0.");
678 const size_type actualNumReceives = as<size_type> (plan.getNumReceives()) +
679 as<size_type> (plan.hasSelfMessage() ? 1 : 0);
680 requests_.resize (0);
688#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
689 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4KV_recvs_);
692 size_t curBufferOffset = 0;
693 size_t curLIDoffset = 0;
694 for (size_type i = 0; i < actualNumReceives; ++i) {
695 size_t totalPacketsFrom_i = 0;
696 for (
size_t j = 0; j < plan.getLengthsFrom()[i]; ++j) {
697 totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
699 curLIDoffset += plan.getLengthsFrom()[i];
700 if (plan.getProcsFrom()[i] != myProcID && totalPacketsFrom_i) {
709 imports_view_type recvBuf =
710 subview_offset (imports, curBufferOffset, totalPacketsFrom_i);
711 requests_.push_back (ireceive<int> (recvBuf, plan.getProcsFrom()[i],
712 mpiTag_, *plan.getComm()));
715 selfReceiveOffset = curBufferOffset;
717 curBufferOffset += totalPacketsFrom_i;
721#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
722 Teuchos::TimeMonitor timeMonSends (*timer_doPosts4KV_sends_);
727 Array<size_t> sendPacketOffsets(plan.getNumSends(),0), packetsPerSend(plan.getNumSends(),0);
728 size_t maxNumPackets = 0;
729 size_t curPKToffset = 0;
730 for (
size_t pp=0; pp<plan.getNumSends(); ++pp) {
731 sendPacketOffsets[pp] = curPKToffset;
732 size_t numPackets = 0;
733 for (
size_t j=plan.getStartsTo()[pp]; j<plan.getStartsTo()[pp]+plan.getLengthsTo()[pp]; ++j) {
734 numPackets += numExportPacketsPerLID[j];
736 if (numPackets > maxNumPackets) maxNumPackets = numPackets;
737 packetsPerSend[pp] = numPackets;
738 curPKToffset += numPackets;
743 size_t numBlocks = plan.getNumSends() + plan.hasSelfMessage();
744 size_t procIndex = 0;
745 while ((procIndex < numBlocks) && (plan.getProcsTo()[procIndex] < myProcID)) {
748 if (procIndex == numBlocks) {
753 size_t selfIndex = 0;
754 if (plan.getIndicesTo().is_null()) {
756#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
757 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4KV_sends_fast_);
762 for (
size_t i = 0; i < numBlocks; ++i) {
763 size_t p = i + procIndex;
764 if (p > (numBlocks - 1)) {
768 if (plan.getProcsTo()[p] != myProcID && packetsPerSend[p] > 0) {
769 exports_view_type tmpSend =
770 subview_offset(exports, sendPacketOffsets[p], packetsPerSend[p]);
772 if (sendType == Details::DISTRIBUTOR_ISEND) {
773 exports_view_type tmpSendBuf =
774 subview_offset (exports, sendPacketOffsets[p], packetsPerSend[p]);
775 requests_.push_back (isend<int> (tmpSendBuf, plan.getProcsTo()[p],
776 mpiTag_, *plan.getComm()));
780 as<int> (tmpSend.size ()),
781 plan.getProcsTo()[p], mpiTag_, *plan.getComm());
789 if (plan.hasSelfMessage()) {
790 deep_copy_offset(imports, exports, selfReceiveOffset,
791 sendPacketOffsets[selfNum], packetsPerSend[selfNum]);
796#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
797 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4KV_sends_slow_);
801 typedef typename ExpView::non_const_value_type Packet;
802 typedef typename ExpView::array_layout Layout;
803 typedef typename ExpView::device_type Device;
804 typedef typename ExpView::memory_traits Mem;
814#ifdef HAVE_TPETRA_DEBUG
815 if (sendType != Details::DISTRIBUTOR_SEND) {
816 if (plan.getComm()->getRank() == 0)
817 std::cout <<
"The requested Tpetra send type "
819 <<
" requires Distributor data to be ordered by"
820 <<
" the receiving processor rank. Since these"
821 <<
" data are not ordered, Tpetra will use Send"
822 <<
" instead." << std::endl;
826 Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray",
829 Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
831 for (
int j=0; j<numExportPacketsPerLID.size(); ++j) {
832 indicesOffsets[j] = ioffset;
833 ioffset += numExportPacketsPerLID[j];
836 for (
size_t i = 0; i < numBlocks; ++i) {
837 size_t p = i + procIndex;
838 if (p > (numBlocks - 1)) {
842 if (plan.getProcsTo()[p] != myProcID) {
843 size_t sendArrayOffset = 0;
844 size_t j = plan.getStartsTo()[p];
845 size_t numPacketsTo_p = 0;
846 for (
size_t k = 0; k < plan.getLengthsTo()[p]; ++k, ++j) {
847 numPacketsTo_p += numExportPacketsPerLID[j];
848 deep_copy_offset(sendArray, exports, sendArrayOffset,
849 indicesOffsets[j], numExportPacketsPerLID[j]);
850 sendArrayOffset += numExportPacketsPerLID[j];
852 typename ExpView::execution_space().fence();
854 if (numPacketsTo_p > 0) {
856 subview_offset(sendArray,
size_t(0), numPacketsTo_p);
859 as<int> (tmpSend.size ()),
860 plan.getProcsTo()[p], mpiTag_, *plan.getComm());
865 selfIndex = plan.getStartsTo()[p];
869 if (plan.hasSelfMessage()) {
870 for (
size_t k = 0; k < plan.getLengthsTo()[selfNum]; ++k) {
871 deep_copy_offset(imports, exports, selfReceiveOffset,
872 indicesOffsets[selfIndex],
873 numExportPacketsPerLID[selfIndex]);
874 selfReceiveOffset += numExportPacketsPerLID[selfIndex];
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex<float> and Kokkos::complex...
Stand-alone utility functions and macros.
Implementation details of Tpetra.
std::string DistributorSendTypeEnumToString(EDistributorSendType sendType)
Convert an EDistributorSendType enum value to a string.
EDistributorSendType
The type of MPI send that Distributor should use.
Namespace Tpetra contains the class and methods constituting the Tpetra library.