Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
mat_vec.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Sacado Package
5// Copyright (2006) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25// (etphipp@sandia.gov).
26//
27// ***********************************************************************
28// @HEADER
29
30//#define SACADO_DISABLE_FAD_VIEW_SPEC
31
32#include "Sacado.hpp"
33
34#include "mat_vec.hpp"
35
36#include "Kokkos_Timer.hpp"
37
38template <typename ViewTypeA, typename ViewTypeB, typename ViewTypeC>
39void run_mat_vec(const ViewTypeA& A, const ViewTypeB& b, const ViewTypeC& c) {
40 typedef typename ViewTypeC::value_type scalar_type;
41 typedef typename ViewTypeC::execution_space execution_space;
42
43 const int m = A.extent(0);
44 const int n = A.extent(1);
45 Kokkos::parallel_for(
46 Kokkos::RangePolicy<execution_space>( 0,m ),
47 KOKKOS_LAMBDA (const int i) {
48 scalar_type t = 0.0;
49 for (int j=0; j<n; ++j)
50 t += A(i,j)*b(j);
51 c(i) = t;
52 }
53 );
54}
55
56template <typename ViewTypeA, typename ViewTypeB, typename ViewTypeC>
57void
58run_mat_vec_scratch(const ViewTypeA& A, const ViewTypeB& b, const ViewTypeC& c)
59{
60 typedef typename ViewTypeC::value_type scalar_type;
61 typedef typename ViewTypeC::execution_space execution_space;
62 typedef Kokkos::TeamPolicy<execution_space> Policy;
63 typedef typename Policy::member_type team_member;
64 typedef Kokkos::View<scalar_type*,Kokkos::LayoutLeft, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged> TmpScratchSpace;
65
66 const int m = A.extent(0);
67 const int n = A.extent(1);
68 const int p = dimension_scalar(A);
69
70#ifdef KOKKOS_ENABLE_CUDA
71 const bool is_cuda = std::is_same<execution_space,Kokkos::Cuda>::value;
72#else
73 const bool is_cuda = false;
74#endif
75 const int TeamSize = is_cuda ? 128 : 1;
76 const int N = (m+TeamSize-1)/TeamSize;
77 Policy policy(N, TeamSize, 1);
78 const size_t bytes = TmpScratchSpace::shmem_size(TeamSize,p);
79 Kokkos::parallel_for(
80 policy.set_scratch_size(0, Kokkos::PerTeam(bytes)),
81 KOKKOS_LAMBDA (const team_member& team) {
82 const int team_rank = team.team_rank();
83 const int team_size = team.team_size();
84 TmpScratchSpace t(team.team_scratch(0), team_size, p);
85 const int i = team.league_rank()*team_size + team_rank;
86 if (i < m) {
87 t(team_rank) = 0.0;
88 for (int j=0; j<n; ++j)
89 t(team_rank) += A(i,j)*b(j);
90 c(i) = t(team_rank);
91 }
92 }
93 );
94}
95
96template <typename ViewTypeA, typename ViewTypeB, typename ViewTypeC>
97void
98run_mat_vec_deriv(const ViewTypeA& A, const ViewTypeB& b, const ViewTypeC& c)
99{
100 typedef typename ViewTypeC::execution_space execution_space;
101
102 const int m = A.extent(0);
103 const int n = A.extent(1);
104 const int p = A.extent(2)-1;
105 Kokkos::parallel_for(
106 Kokkos::RangePolicy<execution_space>( 0,m ),
107 KOKKOS_LAMBDA (const int i) {
108 c(i,p) = 0.0;
109 for (int k=0; k<p; ++k)
110 c(i,k) = 0.0;
111 for (int j=0; j<n; ++j) {
112 c(i,p) += A(i,j,p)*b(j,p);
113 for (int k=0; k<p; ++k) {
114 c(i,k) += A(i,j,k)*b(j,p) + A(i,j,p)*b(j,k);
115 }
116 }
117 }
118 );
119}
120
121template <int MaxP, typename ViewTypeA, typename ViewTypeB, typename ViewTypeC>
122void
123run_mat_vec_deriv_sl(const ViewTypeA& A, const ViewTypeB& b, const ViewTypeC& c)
124{
125 typedef typename ViewTypeC::value_type scalar_type;
126 typedef typename ViewTypeC::execution_space execution_space;
127
128 const int m = A.extent(0);
129 const int n = A.extent(1);
130 const int p = A.extent(2)-1;
131 Kokkos::parallel_for(
132 Kokkos::RangePolicy<execution_space>( 0,m ),
133 KOKKOS_LAMBDA (const int i) {
134 scalar_type cv = 0.0;
135 scalar_type t[MaxP];
136 for (int k=0; k<p; ++k)
137 t[k] = 0.0;
138
139 for (int j=0; j<n; ++j) {
140 scalar_type av = A(i,j,p);
141 scalar_type bv = b(j,p);
142 cv += av*bv;
143 for (int k=0; k<p; ++k) {
144 t[k] += A(i,j,k)*bv + av*b(j,k);
145 }
146 }
147
148 for (int k=0; k<p; ++k)
149 c(i,k) = t[k];
150 c(i,p) = cv;
151 }
152 );
153}
154
155template <int p, typename ViewTypeA, typename ViewTypeB, typename ViewTypeC>
156void
157run_mat_vec_deriv_s(const ViewTypeA& A, const ViewTypeB& b, const ViewTypeC& c)
158{
159 typedef typename ViewTypeC::value_type scalar_type;
160 typedef typename ViewTypeC::execution_space execution_space;
161
162 const int m = A.extent(0);
163 const int n = A.extent(1);
164 Kokkos::parallel_for(
165 Kokkos::RangePolicy<execution_space>( 0,m ),
166 KOKKOS_LAMBDA (const int i) {
167 scalar_type cv = 0.0;
168 scalar_type t[p];
169 for (int k=0; k<p; ++k)
170 t[k] = 0.0;
171
172 for (int j=0; j<n; ++j) {
173 const scalar_type av = A(i,j,p);
174 const scalar_type bv = b(j,p);
175 cv += av*bv;
176
177// Using simd here results in much better performance. Othewise the compiler
178// appears to try and vectorize the j loop with gather instructions, which
179// doesn't work very well.
180#if defined(__INTEL_COMPILER) && ! defined(__CUDA_ARCH__)
181#pragma simd
182#endif
183 for (int k=0; k<p; ++k) {
184 t[k] += A(i,j,k)*bv + av*b(j,k);
185 }
186 }
187
188 for (int k=0; k<p; ++k)
189 c(i,k) = t[k];
190 c(i,p) = cv;
191 }
192 );
193}
194
195template <typename ViewTypeA, typename ViewTypeB, typename ViewTypeC>
196void
197check_val(const ViewTypeA& A, const ViewTypeB& b, const ViewTypeC& c)
198{
199 const double tol = 1.0e-14;
200 typedef typename ViewTypeC::value_type value_type;
201 typename ViewTypeC::HostMirror h_c = Kokkos::create_mirror_view(c);
202 Kokkos::deep_copy(h_c, c);
203 const size_t m = A.extent(0);
204 const size_t n = A.extent(1);
205 for (size_t i=0; i<m; ++i) {
206 value_type t = n;
207 if (std::abs(h_c(i)- t) > tol) {
208 std::cout << "Comparison failed! " << i << " : " << h_c(i) << " , " << t
209 << std::endl;
210 }
211 }
212}
213
214template <typename ViewTypeA, typename ViewTypeB, typename ViewTypeC>
215void
216check_deriv(const ViewTypeA& A, const ViewTypeB& b, const ViewTypeC& c)
217{
218 const double tol = 1.0e-14;
219 typedef typename ViewTypeC::value_type value_type;
220 typename ViewTypeC::HostMirror h_c = Kokkos::create_mirror_view(c);
221 Kokkos::deep_copy(h_c, c);
222 const size_t m = A.extent(0);
223 const size_t n = A.extent(1);
224 const size_t p = A.extent(2);
225 for (size_t i=0; i<m; ++i) {
226 for (size_t j=0; j<p; ++j) {
227 value_type t = (j == p-1 ? n : 2*n);
228 if (std::abs(h_c(i,j)- t) > tol) {
229 std::cout << "Comparison failed! " << i << "," << j << " : "
230 << h_c(i,j) << " , " << t << std::endl;
231 }
232 }
233 }
234}
235
236template <typename ... ViewArgs>
237Perf
238do_time_val(const size_t m, const size_t n, const size_t nloop,
239 const bool check)
240{
241 typedef Kokkos::View<double**, ViewArgs...> ViewTypeA;
242 typedef Kokkos::View<double*, ViewArgs...> ViewTypeB;
243 typedef Kokkos::View<double*, ViewArgs...> ViewTypeC;
244 typedef typename ViewTypeA::execution_space execution_space;
245
246 ViewTypeA A("A",m,n);
247 ViewTypeB b("B",n);
248 ViewTypeC c("c",m);
249
250 Kokkos::deep_copy(A, 1.0);
251 Kokkos::deep_copy(b, 1.0);
252
253 Kokkos::Timer wall_clock;
254 Perf perf;
255
256 // Execute the kernel once to warm up
257 run_mat_vec( A, b, c );
258 execution_space().fence();
259
260 wall_clock.reset();
261 for (size_t l=0; l<nloop; l++) {
262 run_mat_vec( A, b, c );
263 }
264 execution_space().fence();
265
266 perf.time = wall_clock.seconds() / nloop;
267 perf.flops = m*n*2;
268 perf.throughput = perf.flops / perf.time / 1.0e9;
269
270 if (check)
271 check_val(A,b,c);
272
273 return perf;
274}
275
276template <typename FadType, typename ... ViewArgs>
277Perf
278do_time_fad(const size_t m, const size_t n, const size_t p, const size_t nloop,
279 const bool check)
280{
281 typedef Kokkos::View<FadType**, ViewArgs...> ViewTypeA;
282 typedef Kokkos::View<FadType*, ViewArgs...> ViewTypeB;
283 typedef Kokkos::View<FadType*, ViewArgs...> ViewTypeC;
284 typedef typename ViewTypeA::execution_space execution_space;
285
286 // Set amount of memory available for dynamic memory allocation on GPU
287#ifdef KOKKOS_ENABLE_CUDA
288 if (std::is_same<execution_space,Kokkos::Cuda>::value &&
290 const size_t concurrency = execution_space().concurrency();
291 const size_t mem = std::min(m,concurrency) * p * sizeof(double);
292 //std::cout << "mem = " << mem / (1024*1024) << " MB" << std::endl;
293 cudaDeviceSetLimit(cudaLimitMallocHeapSize, mem);
294 }
295#endif
296
297#ifndef SACADO_DISABLE_FAD_VIEW_SPEC
298 ViewTypeA A("A",m,n,p+1);
299 ViewTypeB b("B",n,p+1);
300 ViewTypeC c("c",m,p+1);
301#else
302 ViewTypeA A("A",m,n);
303 ViewTypeB b("B",n);
304 ViewTypeC c("c",m);
305#endif
306
307 // FadType a(p, 1.0);
308 // for (size_t k=0; k<p; ++k)
309 // a.fastAccessDx(k) = 1.0;
310 Kokkos::deep_copy(typename ViewTypeA::array_type(A), 1.0);
311 Kokkos::deep_copy(typename ViewTypeB::array_type(b), 1.0);
312
313 Kokkos::Timer wall_clock;
314 Perf perf;
315
316 // Execute the kernel once to warm up
317 run_mat_vec( A, b, c );
318 execution_space().fence();
319
320 wall_clock.reset();
321 for (size_t l=0; l<nloop; l++) {
322 run_mat_vec( A, b, c );
323 }
324 execution_space().fence();
325
326 perf.time = wall_clock.seconds() / nloop;
327 perf.flops = m*n*(2+4*p);
328 perf.throughput = perf.flops / perf.time / 1.0e9;
329
330#ifndef SACADO_DISABLE_FAD_VIEW_SPEC
331 if (check) {
332 typename ViewTypeA::array_type A_flat = A;
333 typename ViewTypeB::array_type b_flat = b;
334 typename ViewTypeC::array_type c_flat = c;
335 check_deriv(A_flat, b_flat, c_flat);
336 }
337#endif
338
339 return perf;
340}
341
342template <typename FadType, typename ... ViewArgs>
343Perf
344do_time_scratch(const size_t m, const size_t n, const size_t p, const size_t nloop,
345 const bool check)
346{
347 typedef Kokkos::View<FadType**, ViewArgs...> ViewTypeA;
348 typedef Kokkos::View<FadType*, ViewArgs...> ViewTypeB;
349 typedef Kokkos::View<FadType*, ViewArgs...> ViewTypeC;
350 typedef typename ViewTypeA::execution_space execution_space;
351
352#ifndef SACADO_DISABLE_FAD_VIEW_SPEC
353 ViewTypeA A("A",m,n,p+1);
354 ViewTypeB b("B",n,p+1);
355 ViewTypeC c("c",m,p+1);
356#else
357 ViewTypeA A("A",m,n);
358 ViewTypeB b("B",n);
359 ViewTypeC c("c",m);
360#endif
361
362 // FadType a(p, 1.0);
363 // for (size_t k=0; k<p; ++k)
364 // a.fastAccessDx(k) = 1.0;
365 Kokkos::deep_copy(typename ViewTypeA::array_type(A), 1.0);
366 Kokkos::deep_copy(typename ViewTypeB::array_type(b), 1.0);
367
368 Kokkos::Timer wall_clock;
369 Perf perf;
370
371 // Execute the kernel once to warm up
372 run_mat_vec_scratch( A, b, c );
373 execution_space().fence();
374
375 wall_clock.reset();
376 for (size_t l=0; l<nloop; l++) {
377 run_mat_vec_scratch( A, b, c );
378 }
379 execution_space().fence();
380
381 perf.time = wall_clock.seconds() / nloop;
382 perf.flops = m*n*(2+4*p);
383 perf.throughput = perf.flops / perf.time / 1.0e9;
384
385#ifndef SACADO_DISABLE_FAD_VIEW_SPEC
386 if (check) {
387 typename ViewTypeA::array_type A_flat = A;
388 typename ViewTypeB::array_type b_flat = b;
389 typename ViewTypeC::array_type c_flat = c;
390 check_deriv(A_flat, b_flat, c_flat);
391 }
392#endif
393
394 return perf;
395}
396
397template <typename ... ViewArgs>
398Perf
399do_time_analytic(const size_t m, const size_t n, const size_t p,
400 const size_t nloop, const bool check)
401{
402 typedef Kokkos::View<double***, ViewArgs...> ViewTypeA;
403 typedef Kokkos::View<double**, ViewArgs...> ViewTypeB;
404 typedef Kokkos::View<double**, ViewArgs...> ViewTypeC;
405 typedef typename ViewTypeA::execution_space execution_space;
406
407 ViewTypeA A("A",m,n,p+1);
408 ViewTypeB b("B",n,p+1);
409 ViewTypeC c("c",m,p+1);
410
411 Kokkos::deep_copy(A, 1.0);
412 Kokkos::deep_copy(b, 1.0);
413
414 Kokkos::Timer wall_clock;
415 Perf perf;
416
417 // Execute the kernel once to warm up
418 run_mat_vec_deriv( A, b, c );
419 execution_space().fence();
420
421 for (size_t l=0; l<nloop; l++) {
422 run_mat_vec_deriv( A, b, c );
423 }
424 execution_space().fence();
425
426 perf.time = wall_clock.seconds() / nloop;
427 perf.flops = m*n*(2+4*p);
428 perf.throughput = perf.flops / perf.time / 1.0e9;
429
430 if (check)
431 check_deriv(A,b,c);
432
433 return perf;
434}
435
436template <int MaxP, typename ... ViewArgs>
437Perf
438do_time_analytic_sl(const size_t m, const size_t n, const size_t p,
439 const size_t nloop, const bool check)
440{
441 typedef Kokkos::View<double***, ViewArgs...> ViewTypeA;
442 typedef Kokkos::View<double**, ViewArgs...> ViewTypeB;
443 typedef Kokkos::View<double**, ViewArgs...> ViewTypeC;
444 typedef typename ViewTypeA::execution_space execution_space;
445
446 ViewTypeA A("A",m,n,p+1);
447 ViewTypeB b("B",n,p+1);
448 ViewTypeC c("c",m,p+1);
449
450 Kokkos::deep_copy(A, 1.0);
451 Kokkos::deep_copy(b, 1.0);
452
453 Kokkos::Timer wall_clock;
454 Perf perf;
455
456 // Execute the kernel once to warm up
457 run_mat_vec_deriv_sl<MaxP>( A, b, c );
458 execution_space().fence();
459
460 for (size_t l=0; l<nloop; l++) {
461 run_mat_vec_deriv_sl<MaxP>( A, b, c );
462 }
463 execution_space().fence();
464
465 perf.time = wall_clock.seconds() / nloop;
466 perf.flops = m*n*(2+4*p);
467 perf.throughput = perf.flops / perf.time / 1.0e9;
468
469 if (check)
470 check_deriv(A,b,c);
471
472 return perf;
473}
474
475template <int p, typename ... ViewArgs>
476Perf
477do_time_analytic_s(const size_t m, const size_t n,
478 const size_t nloop, const bool check)
479{
480 typedef Kokkos::View<double**[p+1], ViewArgs...> ViewTypeA;
481 typedef Kokkos::View<double**, ViewArgs...> ViewTypeB;
482 typedef Kokkos::View<double**, ViewArgs...> ViewTypeC;
483 typedef typename ViewTypeA::execution_space execution_space;
484
485 ViewTypeA A("A",m,n);
486 ViewTypeB b("B",n,p+1);
487 ViewTypeC c("c",m,p+1);
488
489 Kokkos::deep_copy(A, 1.0);
490 Kokkos::deep_copy(b, 1.0);
491
492 Kokkos::Timer wall_clock;
493 Perf perf;
494
495 // Execute the kernel once to warm up
496 run_mat_vec_deriv_s<p>( A, b, c );
497 execution_space().fence();
498
499 for (size_t l=0; l<nloop; l++) {
500 run_mat_vec_deriv_s<p>( A, b, c );
501 }
502 execution_space().fence();
503
504 perf.time = wall_clock.seconds() / nloop;
505 perf.flops = m*n*(2+4*p);
506 perf.throughput = perf.flops / perf.time / 1.0e9;
507
508 if (check)
509 check_deriv(A,b,c);
510
511 return perf;
512}
513
517
518#define INST_FUNC_VAL_DEV(DEV) \
519 template Perf do_time_val< Kokkos::LayoutLeft, DEV > ( const size_t m, const size_t n, const size_t nloop, const bool check ); \
520 template Perf do_time_val< Kokkos::LayoutRight, DEV > ( const size_t m, const size_t n, const size_t nloop, const bool check ); \
521 template Perf do_time_val< DEV > ( const size_t m, const size_t n, const size_t nloop, const bool check ); \
522 template Perf do_time_analytic< Kokkos::LayoutLeft, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check); \
523 template Perf do_time_analytic< Kokkos::LayoutRight, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check); \
524 template Perf do_time_analytic< DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check); \
525 template Perf do_time_analytic_sl< SLFadSize, Kokkos::LayoutLeft, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check); \
526 template Perf do_time_analytic_sl< SLFadSize, Kokkos::LayoutRight, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check); \
527 template Perf do_time_analytic_sl< SLFadSize, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check); \
528 template Perf do_time_analytic_s< SFadSize, Kokkos::LayoutLeft, DEV > ( const size_t m, const size_t n, const size_t nloop, const bool check); \
529 template Perf do_time_analytic_s< SFadSize, Kokkos::LayoutRight, DEV > ( const size_t m, const size_t n, const size_t nloop, const bool check); \
530 template Perf do_time_analytic_s< SFadSize, DEV > ( const size_t m, const size_t n, const size_t nloop, const bool check);
531
532#define INST_FUNC_FAD_DEV(FAD,DEV) \
533 template Perf do_time_fad< FAD, Kokkos::LayoutLeft, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check ); \
534 template Perf do_time_fad< FAD, Kokkos::LayoutRight, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check ); \
535 template Perf do_time_fad< FAD, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check ); \
536 template Perf do_time_scratch< FAD, Kokkos::LayoutLeft, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check ); \
537 template Perf do_time_scratch< FAD, Kokkos::LayoutRight, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check ); \
538 template Perf do_time_scratch< FAD, DEV > ( const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check );
539
540#define INST_FUNC_DEV(DEV) \
541 INST_FUNC_VAL_DEV( DEV ) \
542 INST_FUNC_FAD_DEV( SFad_type, DEV ) \
543 INST_FUNC_FAD_DEV( SLFad_type, DEV ) \
544 INST_FUNC_FAD_DEV( DFad_type, DEV )
545
546#ifdef KOKKOS_ENABLE_SERIAL
547INST_FUNC_DEV(Kokkos::Serial)
548#endif
549
550#ifdef KOKKOS_ENABLE_OPENMP
551INST_FUNC_DEV(Kokkos::OpenMP)
552#endif
553
554#ifdef KOKKOS_ENABLE_THREADS
555INST_FUNC_DEV(Kokkos::Threads)
556#endif
557
558#ifdef KOKKOS_ENABLE_CUDA
559INST_FUNC_DEV(Kokkos::Cuda)
560#endif
expr expr1 expr1 expr1 c expr2 expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 c expr2 expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 c *expr2 expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 c expr2 expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr2 expr1 expr2 expr1 expr1 expr1 c
#define A
Definition: Sacado_rad.hpp:572
const int N
Sacado::Fad::DFad< double > FadType
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
int value
const char * p
void run_mat_vec_scratch(const ViewTypeA &A, const ViewTypeB &b, const ViewTypeC &c)
Definition: mat_vec.cpp:58
Perf do_time_val(const size_t m, const size_t n, const size_t nloop, const bool check)
Definition: mat_vec.cpp:238
Perf do_time_analytic_s(const size_t m, const size_t n, const size_t nloop, const bool check)
Definition: mat_vec.cpp:477
#define INST_FUNC_DEV(DEV)
Definition: mat_vec.cpp:540
Perf do_time_analytic(const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check)
Definition: mat_vec.cpp:399
Sacado::Fad::SLFad< double, SLFadSize > SLFad_type
Definition: mat_vec.cpp:515
void run_mat_vec(const ViewTypeA &A, const ViewTypeB &b, const ViewTypeC &c)
Definition: mat_vec.cpp:39
void check_val(const ViewTypeA &A, const ViewTypeB &b, const ViewTypeC &c)
Definition: mat_vec.cpp:197
void check_deriv(const ViewTypeA &A, const ViewTypeB &b, const ViewTypeC &c)
Definition: mat_vec.cpp:216
void run_mat_vec_deriv_s(const ViewTypeA &A, const ViewTypeB &b, const ViewTypeC &c)
Definition: mat_vec.cpp:157
Sacado::Fad::SFad< double, SFadSize > SFad_type
Definition: mat_vec.cpp:514
Perf do_time_fad(const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check)
Definition: mat_vec.cpp:278
Perf do_time_scratch(const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check)
Definition: mat_vec.cpp:344
void run_mat_vec_deriv_sl(const ViewTypeA &A, const ViewTypeB &b, const ViewTypeC &c)
Definition: mat_vec.cpp:123
Sacado::Fad::DFad< double > DFad_type
Definition: mat_vec.cpp:516
void run_mat_vec_deriv(const ViewTypeA &A, const ViewTypeB &b, const ViewTypeC &c)
Definition: mat_vec.cpp:98
Perf do_time_analytic_sl(const size_t m, const size_t n, const size_t p, const size_t nloop, const bool check)
Definition: mat_vec.cpp:438
double time
double flops
double throughput
const double tol