17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
20 "Including non-public Kokkos header files is not allowed.");
22#ifndef KOKKOS_EXECPOLICY_HPP
23#define KOKKOS_EXECPOLICY_HPP
25#include <Kokkos_Core_fwd.hpp>
26#include <impl/Kokkos_Traits.hpp>
27#include <impl/Kokkos_Error.hpp>
28#include <impl/Kokkos_AnalyzePolicy.hpp>
29#include <Kokkos_Concepts.hpp>
36struct ParallelForTag {};
37struct ParallelScanTag {};
38struct ParallelReduceTag {};
42 ChunkSize(
int value_) : value(value_) {}
66template <
class... Properties>
69 using traits = Impl::PolicyTraits<Properties...>;
72 typename traits::execution_space m_space;
73 typename traits::index_type m_begin;
74 typename traits::index_type m_end;
75 typename traits::index_type m_granularity;
76 typename traits::index_type m_granularity_mask;
78 template <
class... OtherProperties>
84 using member_type =
typename traits::index_type;
85 using index_type =
typename traits::index_type;
87 KOKKOS_INLINE_FUNCTION
const typename traits::execution_space& space()
const {
90 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
91 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
98 void operator()(
const int&)
const {}
100 template <
class... OtherProperties>
101 RangePolicy(
const RangePolicy<OtherProperties...>& p)
106 m_granularity(p.m_granularity),
107 m_granularity_mask(p.m_granularity_mask) {}
114 m_granularity_mask(0) {}
117 inline RangePolicy(
const typename traits::execution_space& work_space,
118 const member_type work_begin,
const member_type work_end)
119 : m_space(work_space),
120 m_begin(work_begin < work_end ? work_begin : 0),
121 m_end(work_begin < work_end ? work_end : 0),
123 m_granularity_mask(0) {
124 set_auto_chunk_size();
128 inline RangePolicy(
const member_type work_begin,
const member_type work_end)
129 :
RangePolicy(typename traits::execution_space(), work_begin, work_end) {
130 set_auto_chunk_size();
134 template <
class... Args>
135 inline RangePolicy(
const typename traits::execution_space& work_space,
136 const member_type work_begin,
const member_type work_end,
138 : m_space(work_space),
139 m_begin(work_begin < work_end ? work_begin : 0),
140 m_end(work_begin < work_end ? work_end : 0),
142 m_granularity_mask(0) {
143 set_auto_chunk_size();
148 template <
class... Args>
149 inline RangePolicy(
const member_type work_begin,
const member_type work_end,
151 :
RangePolicy(typename traits::execution_space(), work_begin, work_end) {
152 set_auto_chunk_size();
160 template <
class... Args>
161 inline void set(Args...) {
163 0 ==
sizeof...(Args),
164 "Kokkos::RangePolicy: unhandled constructor arguments encountered.");
167 template <
class... Args>
168 inline void set(
const ChunkSize& chunksize, Args... args) {
169 m_granularity = chunksize.value;
170 m_granularity_mask = m_granularity - 1;
176 inline member_type
chunk_size()
const {
return m_granularity; }
181 m_granularity_mask = m_granularity - 1;
187 inline void set_auto_chunk_size() {
188#ifdef KOKKOS_ENABLE_SYCL
189 if (std::is_same_v<
typename traits::execution_space,
190 Kokkos::Experimental::SYCL>) {
194 m_granularity_mask = 0;
198 auto concurrency =
static_cast<int64_t
>(m_space.concurrency());
199 if (concurrency == 0) concurrency = 1;
201 if (m_granularity > 0) {
202 if (!Impl::is_integral_power_of_two(m_granularity))
203 Kokkos::abort(
"RangePolicy blocking granularity must be power of two");
206 int64_t new_chunk_size = 1;
207 while (new_chunk_size * 100 * concurrency <
208 static_cast<int64_t
>(m_end - m_begin))
210 if (new_chunk_size < 128) {
212 while ((new_chunk_size * 40 * concurrency <
213 static_cast<int64_t
>(m_end - m_begin)) &&
214 (new_chunk_size < 128))
217 m_granularity = new_chunk_size;
218 m_granularity_mask = m_granularity - 1;
227 using work_tag =
typename RangePolicy<Properties...>::work_tag;
228 using member_type =
typename RangePolicy<Properties...>::member_type;
230 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
231 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
237 KOKKOS_INLINE_FUNCTION
240 : m_begin(0), m_end(0) {
243 const member_type work_part =
244 ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
245 range.m_granularity_mask) &
246 ~member_type(range.m_granularity_mask);
248 m_begin = range.begin() + work_part * part_rank;
249 m_end = m_begin + work_part;
251 if (range.end() < m_begin) m_begin = range.end();
252 if (range.end() < m_end) m_end = range.end();
273template <
class ExecSpace,
class... Properties>
274class TeamPolicyInternal :
public Impl::PolicyTraits<Properties...> {
276 using traits = Impl::PolicyTraits<Properties...>;
279 using index_type =
typename traits::index_type;
292 template <
class FunctorType>
293 static int team_size_max(
const FunctorType&);
305 template <
class FunctorType>
306 static int team_size_recommended(
const FunctorType&);
308 template <
class FunctorType>
309 static int team_size_recommended(
const FunctorType&,
const int&);
311 template <
class FunctorType>
312 int team_size_recommended(
const FunctorType& functor,
313 const int vector_length);
317 TeamPolicyInternal(
const typename traits::execution_space&,
318 int league_size_request,
int team_size_request,
319 int vector_length_request = 1);
321 TeamPolicyInternal(
const typename traits::execution_space&,
322 int league_size_request,
const Kokkos::AUTO_t&,
323 int vector_length_request = 1);
327 TeamPolicyInternal(
int league_size_request,
int team_size_request,
328 int vector_length_request = 1);
330 TeamPolicyInternal(
int league_size_request,
const Kokkos::AUTO_t&,
331 int vector_length_request = 1);
342 KOKKOS_INLINE_FUNCTION
int league_size()
const;
349 KOKKOS_INLINE_FUNCTION
int team_size()
const;
353 inline bool impl_auto_team_size()
const;
356 inline bool impl_auto_vector_length()
const;
358 static int vector_length_max();
360 KOKKOS_INLINE_FUNCTION
int impl_vector_length()
const;
362 inline typename traits::index_type chunk_size()
const;
364 inline TeamPolicyInternal& set_chunk_size(
int chunk_size);
371 KOKKOS_INLINE_FUNCTION
372 typename traits::execution_space::scratch_memory_space
team_shmem()
const;
391 template <
class JoinOp>
393 const typename JoinOp::value_type,
const JoinOp&)
const;
400 template <
typename Type>
401 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type& value)
const;
412 template <
typename Type>
413 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type& value,
414 Type*
const global_accum)
const;
420 PerTeamValue(
size_t arg);
423struct PerThreadValue {
425 PerThreadValue(
size_t arg);
428template <
class iType,
class... Args>
429struct ExtractVectorLength {
430 static inline iType value(
431 std::enable_if_t<std::is_integral<iType>::value, iType> val, Args...) {
434 static inline std::enable_if_t<!std::is_integral<iType>::value,
int> value(
435 std::enable_if_t<!std::is_integral<iType>::value, iType>, Args...) {
440template <
class iType,
class... Args>
441inline std::enable_if_t<std::is_integral<iType>::value, iType>
442extract_vector_length(iType val, Args...) {
446template <
class iType,
class... Args>
447inline std::enable_if_t<!std::is_integral<iType>::value,
int>
448extract_vector_length(iType, Args...) {
454Impl::PerTeamValue PerTeam(
const size_t& arg);
455Impl::PerThreadValue PerThread(
const size_t& arg);
457struct ScratchRequest {
463 inline ScratchRequest(
const int& level_,
464 const Impl::PerTeamValue& team_value) {
466 per_team = team_value.value;
470 inline ScratchRequest(
const int& level_,
471 const Impl::PerThreadValue& thread_value) {
474 per_thread = thread_value.value;
477 inline ScratchRequest(
const int& level_,
const Impl::PerTeamValue& team_value,
478 const Impl::PerThreadValue& thread_value) {
480 per_team = team_value.value;
481 per_thread = thread_value.value;
484 inline ScratchRequest(
const int& level_,
485 const Impl::PerThreadValue& thread_value,
486 const Impl::PerTeamValue& team_value) {
488 per_team = team_value.value;
489 per_thread = thread_value.value;
494void team_policy_check_valid_storage_level_argument(
int level);
522template <
class... Properties>
524 :
public Impl::TeamPolicyInternal<
525 typename Impl::PolicyTraits<Properties...>::execution_space,
527 using internal_policy = Impl::TeamPolicyInternal<
528 typename Impl::PolicyTraits<Properties...>::execution_space,
531 template <
class... OtherProperties>
535 using traits = Impl::PolicyTraits<Properties...>;
543 int league_size_request,
int team_size_request,
544 int vector_length_request = 1)
545 : internal_policy(space_, league_size_request, team_size_request,
546 vector_length_request) {}
548 TeamPolicy(
const typename traits::execution_space& space_,
549 int league_size_request,
const Kokkos::AUTO_t&,
550 int vector_length_request = 1)
551 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
552 vector_length_request) {}
554 TeamPolicy(
const typename traits::execution_space& space_,
555 int league_size_request,
const Kokkos::AUTO_t&,
556 const Kokkos::AUTO_t&)
557 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
559 TeamPolicy(
const typename traits::execution_space& space_,
560 int league_size_request,
const int team_size_request,
561 const Kokkos::AUTO_t&)
562 : internal_policy(space_, league_size_request, team_size_request,
567 int vector_length_request = 1)
568 : internal_policy(league_size_request, team_size_request,
569 vector_length_request) {}
571 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
572 int vector_length_request = 1)
573 : internal_policy(league_size_request, Kokkos::AUTO(),
574 vector_length_request) {}
576 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
577 const Kokkos::AUTO_t&)
578 : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
579 TeamPolicy(
int league_size_request,
const int team_size_request,
580 const Kokkos::AUTO_t&)
581 : internal_policy(league_size_request, team_size_request,
584 template <
class... OtherProperties>
585 TeamPolicy(
const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
588 internal_policy::traits::operator=(p);
592 TeamPolicy(
const internal_policy& p) : internal_policy(p) {}
595 inline TeamPolicy& set_chunk_size(
int chunk) {
596 static_assert(std::is_same<
decltype(internal_policy::set_chunk_size(chunk)),
597 internal_policy&>::value,
598 "internal set_chunk_size should return a reference");
599 return static_cast<TeamPolicy&
>(internal_policy::set_chunk_size(chunk));
602 inline TeamPolicy& set_scratch_size(
const int& level,
603 const Impl::PerTeamValue& per_team) {
604 static_assert(std::is_same<
decltype(internal_policy::set_scratch_size(
606 internal_policy&>::value,
607 "internal set_chunk_size should return a reference");
609 team_policy_check_valid_storage_level_argument(level);
610 return static_cast<TeamPolicy&
>(
611 internal_policy::set_scratch_size(level, per_team));
613 inline TeamPolicy& set_scratch_size(
const int& level,
614 const Impl::PerThreadValue& per_thread) {
615 team_policy_check_valid_storage_level_argument(level);
616 return static_cast<TeamPolicy&
>(
617 internal_policy::set_scratch_size(level, per_thread));
619 inline TeamPolicy& set_scratch_size(
const int& level,
620 const Impl::PerTeamValue& per_team,
621 const Impl::PerThreadValue& per_thread) {
622 team_policy_check_valid_storage_level_argument(level);
623 return static_cast<TeamPolicy&
>(
624 internal_policy::set_scratch_size(level, per_team, per_thread));
626 inline TeamPolicy& set_scratch_size(
const int& level,
627 const Impl::PerThreadValue& per_thread,
628 const Impl::PerTeamValue& per_team) {
629 team_policy_check_valid_storage_level_argument(level);
630 return static_cast<TeamPolicy&
>(
631 internal_policy::set_scratch_size(level, per_team, per_thread));
637template <
typename iType,
class TeamMemberType>
638struct TeamThreadRangeBoundariesStruct {
640 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
641 const iType& arg_end,
642 const iType& arg_rank,
643 const iType& arg_size) {
645 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
648 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
649 const iType& arg_end,
650 const iType& arg_rank,
651 const iType& arg_size) {
654 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
655 return end_ < arg_end ? end_ : arg_end;
659 using index_type = iType;
662 enum { increment = 1 };
663 const TeamMemberType& thread;
665 KOKKOS_INLINE_FUNCTION
666 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
667 const iType& arg_end)
669 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
670 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
671 thread(arg_thread) {}
673 KOKKOS_INLINE_FUNCTION
674 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
675 const iType& arg_begin,
const iType& arg_end)
676 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
677 arg_thread.team_size())),
678 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
679 arg_thread.team_size())),
680 thread(arg_thread) {}
683template <
typename iType,
class TeamMemberType>
684struct TeamVectorRangeBoundariesStruct {
686 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
687 const iType& arg_end,
688 const iType& arg_rank,
689 const iType& arg_size) {
691 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
694 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
695 const iType& arg_end,
696 const iType& arg_rank,
697 const iType& arg_size) {
700 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
701 return end_ < arg_end ? end_ : arg_end;
705 using index_type = iType;
708 enum { increment = 1 };
709 const TeamMemberType& thread;
711 KOKKOS_INLINE_FUNCTION
712 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
713 const iType& arg_end)
715 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
716 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
717 thread(arg_thread) {}
719 KOKKOS_INLINE_FUNCTION
720 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
721 const iType& arg_begin,
const iType& arg_end)
722 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
723 arg_thread.team_size())),
724 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
725 arg_thread.team_size())),
726 thread(arg_thread) {}
729template <
typename iType,
class TeamMemberType>
730struct ThreadVectorRangeBoundariesStruct {
731 using index_type = iType;
732 const index_type start;
733 const index_type end;
734 enum { increment = 1 };
736 KOKKOS_INLINE_FUNCTION
737 constexpr ThreadVectorRangeBoundariesStruct(
const TeamMemberType,
738 const index_type& count) noexcept
739 : start(
static_cast<index_type
>(0)), end(count) {}
741 KOKKOS_INLINE_FUNCTION
742 constexpr ThreadVectorRangeBoundariesStruct(
const index_type& count) noexcept
743 : start(
static_cast<index_type
>(0)), end(count) {}
745 KOKKOS_INLINE_FUNCTION
746 constexpr ThreadVectorRangeBoundariesStruct(
747 const TeamMemberType,
const index_type& arg_begin,
748 const index_type& arg_end) noexcept
749 : start(
static_cast<index_type
>(arg_begin)), end(arg_end) {}
751 KOKKOS_INLINE_FUNCTION
752 constexpr ThreadVectorRangeBoundariesStruct(
753 const index_type& arg_begin,
const index_type& arg_end) noexcept
754 : start(
static_cast<index_type
>(arg_begin)), end(arg_end) {}
757template <
class TeamMemberType>
758struct ThreadSingleStruct {
759 const TeamMemberType& team_member;
760 KOKKOS_INLINE_FUNCTION
761 ThreadSingleStruct(
const TeamMemberType& team_member_)
762 : team_member(team_member_) {}
765template <
class TeamMemberType>
766struct VectorSingleStruct {
767 const TeamMemberType& team_member;
768 KOKKOS_INLINE_FUNCTION
769 VectorSingleStruct(
const TeamMemberType& team_member_)
770 : team_member(team_member_) {}
782template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
783KOKKOS_INLINE_FUNCTION_DELETED
784 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
785 TeamThreadRange(
const TeamMemberType&,
const iType& count) =
delete;
794template <
typename iType1,
typename iType2,
class TeamMemberType,
795 class _never_use_this_overload>
796KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
797 std::common_type_t<iType1, iType2>, TeamMemberType>
798TeamThreadRange(
const TeamMemberType&,
const iType1& begin,
799 const iType2& end) =
delete;
808template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
809KOKKOS_INLINE_FUNCTION_DELETED
810 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
811 TeamVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
820template <
typename iType1,
typename iType2,
class TeamMemberType,
821 class _never_use_this_overload>
822KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
823 std::common_type_t<iType1, iType2>, TeamMemberType>
824TeamVectorRange(
const TeamMemberType&,
const iType1& begin,
825 const iType2& end) =
delete;
834template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
835KOKKOS_INLINE_FUNCTION_DELETED
836 Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
837 ThreadVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
839template <
typename iType1,
typename iType2,
class TeamMemberType,
840 class _never_use_this_overload>
841KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
842 std::common_type_t<iType1, iType2>, TeamMemberType>
843ThreadVectorRange(
const TeamMemberType&,
const iType1& arg_begin,
844 const iType2& arg_end) =
delete;
848enum class TeamMDRangeLastNestLevel :
bool { NotLastNestLevel, LastNestLevel };
849enum class TeamMDRangeParThread :
bool { NotParThread, ParThread };
850enum class TeamMDRangeParVector :
bool { NotParVector, ParVector };
851enum class TeamMDRangeThreadAndVector :
bool { NotBoth, Both };
853template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
854struct HostBasedNestLevel;
856template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
857struct AcceleratorBasedNestLevel;
867template <
typename Rank,
typename ExecSpace,
868 TeamMDRangeThreadAndVector ThreadAndVector>
869struct ThreadAndVectorNestLevel;
871struct NoReductionTag {};
873template <
typename Rank,
typename TeamMDPolicy,
typename Lambda,
874 typename ReductionValueType>
875KOKKOS_INLINE_FUNCTION
void md_parallel_impl(TeamMDPolicy
const& policy,
876 Lambda
const& lambda,
877 ReductionValueType&& val);
880template <
typename Rank,
typename TeamHandle>
881struct TeamThreadMDRange;
883template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
884struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
885 using NestLevelType = int;
886 using BoundaryType = int;
887 using TeamHandleType = TeamHandle;
888 using ExecutionSpace =
typename TeamHandleType::execution_space;
889 using ArrayLayout =
typename ExecutionSpace::array_layout;
891 static constexpr NestLevelType total_nest_level =
892 Rank<N, OuterDir, InnerDir>::rank;
893 static constexpr Iterate iter = OuterDir;
894 static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
895 static constexpr auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
897 static constexpr Iterate direction =
898 OuterDir == Iterate::Default
899 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
902 template <
class... Args>
903 KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType
const& team_, Args&&... args)
904 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
905 static_assert(
sizeof...(Args) == total_nest_level);
908 TeamHandleType
const& team;
909 BoundaryType boundaries[total_nest_level];
912template <
typename TeamHandle,
typename... Args>
913TeamThreadMDRange(TeamHandle
const&, Args&&...)
914 ->TeamThreadMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
916template <
typename Rank,
typename TeamHandle>
917struct ThreadVectorMDRange;
919template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
920struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
921 using NestLevelType = int;
922 using BoundaryType = int;
923 using TeamHandleType = TeamHandle;
924 using ExecutionSpace =
typename TeamHandleType::execution_space;
925 using ArrayLayout =
typename ExecutionSpace::array_layout;
927 static constexpr NestLevelType total_nest_level =
928 Rank<N, OuterDir, InnerDir>::rank;
929 static constexpr Iterate iter = OuterDir;
930 static constexpr auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
931 static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
933 static constexpr Iterate direction =
934 OuterDir == Iterate::Default
935 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
938 template <
class... Args>
939 KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType
const& team_,
941 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
942 static_assert(
sizeof...(Args) == total_nest_level);
945 TeamHandleType
const& team;
946 BoundaryType boundaries[total_nest_level];
949template <
typename TeamHandle,
typename... Args>
950ThreadVectorMDRange(TeamHandle
const&, Args&&...)
951 ->ThreadVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
953template <
typename Rank,
typename TeamHandle>
954struct TeamVectorMDRange;
956template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
957struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
958 using NestLevelType = int;
959 using BoundaryType = int;
960 using TeamHandleType = TeamHandle;
961 using ExecutionSpace =
typename TeamHandleType::execution_space;
962 using ArrayLayout =
typename ExecutionSpace::array_layout;
964 static constexpr NestLevelType total_nest_level =
965 Rank<N, OuterDir, InnerDir>::rank;
966 static constexpr Iterate iter = OuterDir;
967 static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
968 static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
970 static constexpr Iterate direction =
971 iter == Iterate::Default
972 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
975 template <
class... Args>
976 KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType
const& team_,
978 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
979 static_assert(
sizeof...(Args) == total_nest_level);
982 TeamHandleType
const& team;
983 BoundaryType boundaries[total_nest_level];
986template <
typename TeamHandle,
typename... Args>
987TeamVectorMDRange(TeamHandle
const&, Args&&...)
988 ->TeamVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
990template <
typename Rank,
typename TeamHandle,
typename Lambda,
991 typename ReducerValueType>
992KOKKOS_INLINE_FUNCTION
void parallel_reduce(
993 TeamThreadMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
994 ReducerValueType& val) {
995 Impl::md_parallel_impl<Rank>(policy, lambda, val);
998template <
typename Rank,
typename TeamHandle,
typename Lambda>
999KOKKOS_INLINE_FUNCTION
void parallel_for(
1000 TeamThreadMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1001 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1004template <
typename Rank,
typename TeamHandle,
typename Lambda,
1005 typename ReducerValueType>
1006KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1007 ThreadVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1008 ReducerValueType& val) {
1009 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1012template <
typename Rank,
typename TeamHandle,
typename Lambda>
1013KOKKOS_INLINE_FUNCTION
void parallel_for(
1014 ThreadVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1015 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1018template <
typename Rank,
typename TeamHandle,
typename Lambda,
1019 typename ReducerValueType>
1020KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1021 TeamVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1022 ReducerValueType& val) {
1023 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1026template <
typename Rank,
typename TeamHandle,
typename Lambda>
1027KOKKOS_INLINE_FUNCTION
void parallel_for(
1028 TeamVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1029 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1034template <
typename FunctorType,
typename TagType,
1035 bool HasTag = !std::is_void<TagType>::value>
1036struct ParallelConstructName;
1038template <
typename FunctorType,
typename TagType>
1039struct ParallelConstructName<FunctorType, TagType, true> {
1040 ParallelConstructName(std::string
const& label) : label_ref(label) {
1041 if (label.empty()) {
1042 default_name = std::string(
typeid(FunctorType).name()) +
"/" +
1043 typeid(TagType).name();
1046 std::string
const& get() {
1047 return (label_ref.empty()) ? default_name : label_ref;
1049 std::string
const& label_ref;
1050 std::string default_name;
1053template <
typename FunctorType,
typename TagType>
1054struct ParallelConstructName<FunctorType, TagType, false> {
1055 ParallelConstructName(std::string
const& label) : label_ref(label) {
1056 if (label.empty()) {
1057 default_name = std::string(
typeid(FunctorType).name());
1060 std::string
const& get() {
1061 return (label_ref.empty()) ? default_name : label_ref;
1063 std::string
const& label_ref;
1064 std::string default_name;
1075template <
class PatternTag,
class... Args>
1076struct PatternImplSpecializationFromTag;
1078template <
class... Args>
1079struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
1080 : type_identity<ParallelFor<Args...>> {};
1082template <
class... Args>
1083struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
1084 : type_identity<ParallelReduce<Args...>> {};
1086template <
class... Args>
1087struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
1088 : type_identity<ParallelScan<Args...>> {};
1090template <
class PatternImpl>
1091struct PatternTagFromImplSpecialization;
1093template <
class... Args>
1094struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1095 : type_identity<ParallelForTag> {};
1097template <
class... Args>
1098struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1099 : type_identity<ParallelReduceTag> {};
1101template <
class... Args>
1102struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1103 : type_identity<ParallelScanTag> {};
Execution policy for work over a range of an integral type.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
member_type chunk_size() const
return chunk_size
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end, Args... args)
Total range.
RangePolicy(const member_type work_begin, const member_type work_end, Args... args)
Total range.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
Execution policy for parallel work over a league of teams of threads.
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
Parallel execution of a functor calls the functor once with each member of the execution policy.
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value, Type *const global_accum) const
Intra-team exclusive prefix sum with team_rank() ordering with intra-team non-deterministic ordering ...
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
Subrange for a partition's rank and size.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.