Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_ScratchSpace.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
19static_assert(false,
20 "Including non-public Kokkos header files is not allowed.");
21#endif
22#ifndef KOKKOS_SCRATCHSPACE_HPP
23#define KOKKOS_SCRATCHSPACE_HPP
24
25#include <cstdio>
26#include <cstddef>
27#include <Kokkos_Core_fwd.hpp>
28#include <Kokkos_Concepts.hpp>
29
30/*--------------------------------------------------------------------------*/
31
32namespace Kokkos {
33
37template <class ExecSpace>
39 static_assert(
40 is_execution_space<ExecSpace>::value,
41 "Instantiating ScratchMemorySpace on non-execution-space type.");
42
43 public:
44 // Minimal overalignment used by view scratch allocations
45 constexpr static int ALIGN = 8;
46
47 private:
48 mutable char* m_iter_L0 = nullptr;
49 mutable char* m_iter_L1 = nullptr;
50 char* m_end_L0 = nullptr;
51 char* m_end_L1 = nullptr;
52
53 mutable int m_multiplier = 0;
54 mutable int m_offset = 0;
55 mutable int m_default_level = 0;
56
57#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
58 constexpr static int DEFAULT_ALIGNMENT_MASK = ALIGN - 1;
59#endif
60
61 public:
64 using execution_space = ExecSpace;
66 using device_type = Kokkos::Device<execution_space, memory_space>;
67
68 using array_layout = typename ExecSpace::array_layout;
69 using size_type = typename ExecSpace::size_type;
70
71 static constexpr const char* name() { return "ScratchMemorySpace"; }
72
73#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
74 // This function is unused
75 template <typename IntType>
76 KOKKOS_DEPRECATED KOKKOS_INLINE_FUNCTION static constexpr IntType align(
77 const IntType& size) {
78 return (size + DEFAULT_ALIGNMENT_MASK) & ~DEFAULT_ALIGNMENT_MASK;
79 }
80#endif
81
82 template <typename IntType>
83 KOKKOS_INLINE_FUNCTION void* get_shmem(const IntType& size,
84 int level = -1) const {
85 return get_shmem_common</*alignment_requested*/ false>(size, 1, level);
86 }
87
88 template <typename IntType>
89 KOKKOS_INLINE_FUNCTION void* get_shmem_aligned(const IntType& size,
90 const ptrdiff_t alignment,
91 int level = -1) const {
92 return get_shmem_common</*alignment_requested*/ true>(size, alignment,
93 level);
94 }
95
96 private:
97 template <bool alignment_requested, typename IntType>
98 KOKKOS_INLINE_FUNCTION void* get_shmem_common(
99 const IntType& size, [[maybe_unused]] const ptrdiff_t alignment,
100 int level = -1) const {
101 if (level == -1) level = m_default_level;
102 auto& m_iter = (level == 0) ? m_iter_L0 : m_iter_L1;
103 auto m_iter_old = m_iter;
104 if constexpr (alignment_requested) {
105 const ptrdiff_t missalign = size_t(m_iter) % alignment;
106 if (missalign) m_iter += alignment - missalign;
107 }
108
109 // This is each thread's start pointer for its allocation
110 // Note: for team scratch m_offset is 0, since every
111 // thread will get back the same shared pointer
112 void* tmp = m_iter + m_offset * size;
113 uintptr_t increment = size * m_multiplier;
114
115 // Cast to uintptr_t to avoid problems with pointer arithmetic using SYCL
116 const auto end_iter =
117 reinterpret_cast<uintptr_t>((level == 0) ? m_end_L0 : m_end_L1);
118 auto current_iter = reinterpret_cast<uintptr_t>(m_iter);
119 auto capacity = end_iter - current_iter;
120
121 if (increment > capacity) {
122 // Request did overflow: return nullptr and reset m_iter
123 m_iter = m_iter_old;
124 tmp = nullptr;
125#ifdef KOKKOS_ENABLE_DEBUG
126 // mfh 23 Jun 2015: printf call consumes 25 registers
127 // in a CUDA build, so only print in debug mode. The
128 // function still returns nullptr if not enough memory.
129 KOKKOS_IMPL_DO_NOT_USE_PRINTF(
130 "ScratchMemorySpace<...>::get_shmem: Failed to allocate "
131 "%ld byte(s); remaining capacity is %ld byte(s)\n",
132 long(size), long(capacity));
133#endif // KOKKOS_ENABLE_DEBUG
134 } else {
135 m_iter += increment;
136 }
137 return tmp;
138 }
139
140 public:
141 KOKKOS_DEFAULTED_FUNCTION
142 ScratchMemorySpace() = default;
143
144 template <typename IntType>
145 KOKKOS_INLINE_FUNCTION ScratchMemorySpace(void* ptr_L0,
146 const IntType& size_L0,
147 void* ptr_L1 = nullptr,
148 const IntType& size_L1 = 0)
149 : m_iter_L0(static_cast<char*>(ptr_L0)),
150 m_iter_L1(static_cast<char*>(ptr_L1)),
151 m_end_L0(static_cast<char*>(ptr_L0) + size_L0),
152 m_end_L1(static_cast<char*>(ptr_L1) + size_L1),
153 m_multiplier(1),
154 m_offset(0),
155 m_default_level(0) {}
156
157 KOKKOS_INLINE_FUNCTION
158 const ScratchMemorySpace& set_team_thread_mode(const int& level,
159 const int& multiplier,
160 const int& offset) const {
161 m_default_level = level;
162 m_multiplier = multiplier;
163 m_offset = offset;
164 return *this;
165 }
166};
167
168} // namespace Kokkos
169
170#endif /* #ifndef KOKKOS_SCRATCHSPACE_HPP */
Scratch memory space associated with an execution space.
Kokkos::Device< execution_space, memory_space > device_type
This execution space preferred device_type.