Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_TimeMonitor.cpp
1// @HEADER
2// ***********************************************************************
3//
4// Teuchos: Common Tools Package
5// Copyright (2004) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40// @HEADER
41
43#include "Teuchos_CommHelpers.hpp"
44#include "Teuchos_DefaultComm.hpp"
47#include "Teuchos_StandardParameterEntryValidators.hpp"
49#include "Teuchos_StackedTimer.hpp"
50
51#include <functional>
52#include <iomanip>
53#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
54#include <sstream>
55#endif
56
57namespace Teuchos {
110 template<class Ordinal, class ScalarType, class IndexType>
111 class MaxLoc :
112 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
113 public:
114 void
115 reduce (const Ordinal count,
116 const std::pair<ScalarType, IndexType> inBuffer[],
117 std::pair<ScalarType, IndexType> inoutBuffer[]) const;
118 };
119
120 template<class Ordinal>
121 class MaxLoc<Ordinal, double, int> :
122 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
123 public:
124 void
125 reduce (const Ordinal count,
126 const std::pair<double, int> inBuffer[],
127 std::pair<double, int> inoutBuffer[]) const
128 {
129 for (Ordinal ind = 0; ind < count; ++ind) {
130 const std::pair<double, int>& in = inBuffer[ind];
131 std::pair<double, int>& inout = inoutBuffer[ind];
132
133 if (in.first > inout.first) {
134 inout.first = in.first;
135 inout.second = in.second;
136 } else if (in.first < inout.first) {
137 // Don't need to do anything; inout has the values.
138 } else { // equal, or at least one is NaN.
139 inout.first = in.first;
140 inout.second = std::min (in.second, inout.second);
141 }
142 }
143 }
144 };
145
172 template<class Ordinal, class ScalarType, class IndexType>
173 class MinLoc :
174 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
175 public:
176 void
177 reduce (const Ordinal count,
178 const std::pair<ScalarType, IndexType> inBuffer[],
179 std::pair<ScalarType, IndexType> inoutBuffer[]) const;
180 };
181
182 template<class Ordinal>
183 class MinLoc<Ordinal, double, int> :
184 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
185 public:
186 void
187 reduce (const Ordinal count,
188 const std::pair<double, int> inBuffer[],
189 std::pair<double, int> inoutBuffer[]) const
190 {
191 for (Ordinal ind = 0; ind < count; ++ind) {
192 const std::pair<double, int>& in = inBuffer[ind];
193 std::pair<double, int>& inout = inoutBuffer[ind];
194
195 if (in.first < inout.first) {
196 inout.first = in.first;
197 inout.second = in.second;
198 } else if (in.first > inout.first) {
199 // Don't need to do anything; inout has the values.
200 } else { // equal, or at least one is NaN.
201 inout.first = in.first;
202 inout.second = std::min (in.second, inout.second);
203 }
204 }
205 }
206 };
207
211 template<class Ordinal, class ScalarType, class IndexType>
213 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
214 public:
215 void
216 reduce (const Ordinal count,
217 const std::pair<ScalarType, IndexType> inBuffer[],
218 std::pair<ScalarType, IndexType> inoutBuffer[]) const;
219 };
220
221 template<class Ordinal>
222 class MinLocNonzero<Ordinal, double, int> :
223 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
224 public:
225 void
226 reduce (const Ordinal count,
227 const std::pair<double, int> inBuffer[],
228 std::pair<double, int> inoutBuffer[]) const
229 {
230 for (Ordinal ind = 0; ind < count; ++ind) {
231 const std::pair<double, int>& in = inBuffer[ind];
232 std::pair<double, int>& inout = inoutBuffer[ind];
233
234 if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
235 inout.first = in.first;
236 inout.second = in.second;
237 } else if (in.first > inout.first) {
238 // Don't need to do anything; inout has the values.
239 } else { // equal, or at least one is NaN.
240 inout.first = in.first;
241 inout.second = std::min (in.second, inout.second);
242 }
243 }
244 }
245 };
246
247 // Typedef used internally by TimeMonitor::summarize() and its
248 // helper functions. The map is keyed on timer label (a string).
249 // Each value is a pair: (total number of seconds over all calls to
250 // that timer, total number of calls to that timer).
251 typedef std::map<std::string, std::pair<double, int> > timer_map_t;
252
253 // static initialization
255
256 TimeMonitor::TimeMonitor (Time& timer, bool reset)
257 : PerformanceMonitorBase<Time>(timer, reset)
258 {
259 if (!isRecursiveCall()) {
260 counter().start(reset);
261#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
263 stackedTimer_->start(counter().name(),false);
264#endif
265 }
266 }
267
269 if (!isRecursiveCall()) {
270 counter().stop();
271#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
272 try {
274 stackedTimer_->stop(counter().name(),false);
275 }
276 catch (std::runtime_error& e) {
277 std::ostringstream warning;
278 warning <<
279 "\n*********************************************************************\n"
280 "WARNING: Overlapping timers detected! Near: " <<counter().name()<<"\n"
281 "A TimeMonitor timer was stopped before a nested subtimer was\n"
282 "stopped. This is not allowed by the StackedTimer. This corner case\n"
283 "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
284 "assigned to a new timer. To disable this warning, either fix the\n"
285 "ordering of timer creation and destuction or disable the StackedTimer\n"
286 "support in the TimeMonitor by setting the StackedTimer to null\n"
287 "Example:\n"
288 " RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"Junk\"))));\n"
289 "///code to time \n"
290 "MM = Teuchos::null;\n"
291 "MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"SecondJunk\"))));\n"
292 "*********************************************************************\n";
293 std::cout << warning.str() << std::endl << e.what() << std::endl;
295 }
296#endif
297 }
298 }
299
300 void
301 TimeMonitor::disableTimer (const std::string& name)
302 {
303 RCP<Time> timer = lookupCounter (name);
305 timer == null, std::invalid_argument,
306 "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
307 timer->disable ();
308 }
309
310 void
311 TimeMonitor::enableTimer (const std::string& name)
312 {
313 RCP<Time> timer = lookupCounter (name);
315 timer == null, std::invalid_argument,
316 "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
317 timer->enable ();
318 }
319
320 void
322 {
323 typedef std::map<std::string, RCP<Time> > map_type;
324 typedef map_type::iterator iter_type;
325 map_type& ctrs = counters ();
326
327 // In debug mode, loop first to check whether any of the timers
328 // are running, before resetting them. This ensures that this
329 // method satisfies the strong exception guarantee (either it
330 // completes normally, or there are no side effects).
331#ifdef TEUCHOS_DEBUG
332 for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
333 // We throw a runtime_error rather than a logic_error, because
334 // logic_error suggests a bug in the implementation of
335 // TimeMonitor. Calling zeroOutTimers() when a timer is running
336 // is not TimeMonitor's fault.
338 it->second->isRunning (), std::runtime_error,
339 "Timer \"" << it->second->name () << "\" is currently running. "
340 "You are not allowed to reset running timers.");
341 }
342#endif // TEUCHOS_DEBUG
343
344 for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
345 it->second->reset ();
346 }
347 }
348
349 // An anonymous namespace is the standard way of limiting linkage of
350 // its contained routines to file scope.
351 namespace {
352 // \brief Return an "empty" local timer datum.
353 //
354 // "Empty" means the datum has zero elapsed time and zero call
355 // count. This function does not actually create a timer.
356 //
357 // \param name The timer's name.
358 std::pair<std::string, std::pair<double, int> >
359 makeEmptyTimerDatum (const std::string& name)
360 {
361 return std::make_pair (name, std::make_pair (double(0), int(0)));
362 }
363
364 // \fn collectLocalTimerData
365 // \brief Collect and sort local timer data by timer names.
366 //
367 // \param localData [out] Map whose keys are the timer names, and
368 // whose value for each key is the total elapsed time (in
369 // seconds) and the call count for the timer with that name.
370 //
371 // \param localCounters [in] Timers from which to extract data.
372 //
373 // \param filter [in] Filter for timer labels. If filter is not
374 // empty, this method will only collect data for local timers
375 // whose labels begin with this string.
376 //
377 // Extract the total elapsed time and call count from each timer
378 // in the given array. Merge results for timers with duplicate
379 // labels, by summing their total elapsed times and call counts
380 // pairwise.
381 void
382 collectLocalTimerData (timer_map_t& localData,
383 const std::map<std::string, RCP<Time> >& localCounters,
384 const std::string& filter="")
385 {
386 using std::make_pair;
387 typedef timer_map_t::iterator iter_t;
388
389 timer_map_t theLocalData;
390 for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
391 it != localCounters.end(); ++it) {
392 const std::string& name = it->second->name ();
393
394 // Filter current timer name, if provided filter is nonempty.
395 // Filter string must _start_ the timer label, not just be in it.
396 const bool skipThisOne = (filter != "" && name.find (filter) != 0);
397 if (! skipThisOne) {
398 const double timing = it->second->totalElapsedTime ();
399 const int numCalls = it->second->numCalls ();
400
401 // Merge timers with duplicate labels, by summing their
402 // total elapsed times and call counts.
403 iter_t loc = theLocalData.find (name);
404 if (loc == theLocalData.end()) {
405 // Use loc as an insertion location hint.
406 theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
407 }
408 else {
409 loc->second.first += timing;
410 loc->second.second += numCalls;
411 }
412 }
413 }
414 // This avoids copying the map, and also makes this method
415 // satisfy the strong exception guarantee.
416 localData.swap (theLocalData);
417 }
418
419 // \brief Locally filter out timer data with zero call counts.
420 //
421 // \param timerData [in/out]
422 void
423 filterZeroData (timer_map_t& timerData)
424 {
425 // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
426 // iterator hint, instead of rebuilding the map completely.
427 timer_map_t newTimerData;
428 for (timer_map_t::const_iterator it = timerData.begin();
429 it != timerData.end(); ++it) {
430 if (it->second.second > 0) {
431 newTimerData[it->first] = it->second;
432 }
433 }
434 timerData.swap (newTimerData);
435 }
436
458 void
459 collectLocalTimerDataAndNames (timer_map_t& localTimerData,
460 Array<std::string>& localTimerNames,
461 const std::map<std::string, RCP<Time> >& localTimers,
462 const bool writeZeroTimers,
463 const std::string& filter="")
464 {
465 // Collect and sort local timer data by timer names.
466 collectLocalTimerData (localTimerData, localTimers, filter);
467
468 // Filter out zero data locally first. This ensures that if we
469 // are writing global stats, and if a timer name exists in the
470 // set of global names, then that timer has a nonzero call count
471 // on at least one MPI process.
472 if (! writeZeroTimers) {
473 filterZeroData (localTimerData);
474 }
475
476 // Extract the set of local timer names. The std::map keeps
477 // them sorted alphabetically.
478 localTimerNames.reserve (localTimerData.size());
479 for (timer_map_t::const_iterator it = localTimerData.begin();
480 it != localTimerData.end(); ++it) {
481 localTimerNames.push_back (it->first);
482 }
483 }
484
519 void
520 collectGlobalTimerData (timer_map_t& globalTimerData,
521 Array<std::string>& globalTimerNames,
522 timer_map_t& localTimerData,
523 Array<std::string>& localTimerNames,
524 Ptr<const Comm<int> > comm,
525 const bool alwaysWriteLocal,
526 const ECounterSetOp setOp)
527 {
528 // There may be some global timers that are not local timers on
529 // the calling MPI process(es). In that case, if
530 // alwaysWriteLocal is true, then we need to fill in the
531 // "missing" local timers. That will ensure that both global
532 // and local timer columns in the output table have the same
533 // number of rows. The collectLocalTimerDataAndNames() method
534 // may have already filtered out local timers with zero call
535 // counts (if its writeZeroTimers argument was false), but we
536 // won't be filtering again. Thus, any local timer data we
537 // insert here won't get filtered out.
538 //
539 // Note that calling summarize() with writeZeroTimers == false
540 // will still do what it says, even if we insert local timers
541 // with zero call counts here.
542
543 // This does the correct and inexpensive thing (just copies the
544 // timer data) if numProcs == 1. Otherwise, it initiates a
545 // communication with \f$O(\log P)\f$ messages along the
546 // critical path, where \f$P\f$ is the number of participating
547 // processes.
548 mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
549
550#ifdef TEUCHOS_DEBUG
551 {
552 // Sanity check that all processes have the name number of
553 // global timer names.
554 const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
555 timer_map_t::size_type minNumGlobalNames = 0;
556 timer_map_t::size_type maxNumGlobalNames = 0;
557 reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
558 outArg (minNumGlobalNames));
559 reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
560 outArg (maxNumGlobalNames));
561 TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
562 std::logic_error, "Min # global timer names = " << minNumGlobalNames
563 << " != max # global timer names = " << maxNumGlobalNames
564 << ". Please report this bug to the Teuchos developers.");
565 TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
566 std::logic_error, "My # global timer names = " << myNumGlobalNames
567 << " != min # global timer names = " << minNumGlobalNames
568 << ". Please report this bug to the Teuchos developers.");
569 }
570#endif // TEUCHOS_DEBUG
571
572 // mergeCounterNames() just merges the counters' names, not
573 // their actual data. Now we need to fill globalTimerData with
574 // this process' timer data for the timers in globalTimerNames.
575 //
576 // All processes need the full list of global timers, since
577 // there may be some global timers that are not local timers.
578 // That's why mergeCounterNames() has to be an all-reduce, not
579 // just a reduction to Proc 0.
580 //
581 // Insertion optimization: if the iterator given to map::insert
582 // points right before where we want to insert, insertion is
583 // O(1). globalTimerNames is sorted, so feeding the iterator
584 // output of map::insert into the next invocation's input should
585 // make the whole insertion O(N) where N is the number of
586 // entries in globalTimerNames.
587 timer_map_t::iterator globalMapIter = globalTimerData.begin();
588 timer_map_t::iterator localMapIter;
589 for (Array<string>::const_iterator it = globalTimerNames.begin();
590 it != globalTimerNames.end(); ++it) {
591 const std::string& globalName = *it;
592 localMapIter = localTimerData.find (globalName);
593
594 if (localMapIter == localTimerData.end()) {
595 if (alwaysWriteLocal) {
596 // If there are some global timers that are not local
597 // timers, and if we want to print local timers, we insert
598 // a local timer datum with zero elapsed time and zero
599 // call count into localTimerData as well. This will
600 // ensure that both global and local timer columns in the
601 // output table have the same number of rows.
602 //
603 // We really only need to do this on Proc 0, which is the
604 // only process that currently may print local timers.
605 // However, we do it on all processes, just in case
606 // someone later wants to modify this function to print
607 // out local timer data for some process other than Proc
608 // 0. This extra computation won't affect the cost along
609 // the critical path, for future computations in which
610 // Proc 0 participates.
611 localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
612
613 // Make sure the missing global name gets added to the
614 // list of local names. We'll re-sort the list of local
615 // names below.
616 localTimerNames.push_back (globalName);
617 }
618 // There's a global timer that's not a local timer. Add it
619 // to our pre-merge version of the global timer data so that
620 // we can safely merge the global timer data later.
621 globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
622 }
623 else {
624 // We have this global timer name in our local timer list.
625 // Fill in our pre-merge version of the global timer data
626 // with our local data.
627 globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
628 }
629 }
630
631 if (alwaysWriteLocal) {
632 // Re-sort the list of local timer names, since we may have
633 // inserted "missing" names above.
634 std::sort (localTimerNames.begin(), localTimerNames.end());
635 }
636
637#ifdef TEUCHOS_DEBUG
638 {
639 // Sanity check that all processes have the name number of
640 // global timers.
641 const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
642 timer_map_t::size_type minNumGlobalTimers = 0;
643 timer_map_t::size_type maxNumGlobalTimers = 0;
644 reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
645 outArg (minNumGlobalTimers));
646 reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
647 outArg (maxNumGlobalTimers));
648 TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
649 std::logic_error, "Min # global timers = " << minNumGlobalTimers
650 << " != max # global timers = " << maxNumGlobalTimers
651 << ". Please report this bug to the Teuchos developers.");
652 TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
653 std::logic_error, "My # global timers = " << myNumGlobalTimers
654 << " != min # global timers = " << minNumGlobalTimers
655 << ". Please report this bug to the Teuchos developers.");
656 }
657#endif // TEUCHOS_DEBUG
658 }
659
706 void
707 computeGlobalTimerStats (stat_map_type& statData,
708 std::vector<std::string>& statNames,
709 Ptr<const Comm<int> > comm,
710 const timer_map_t& globalTimerData,
711 const bool ignoreZeroTimers)
712 {
714
715 const int numTimers = static_cast<int> (globalTimerData.size());
716 const int numProcs = comm->getSize();
717
718 // Extract pre-reduction timings and call counts into a
719 // sequential array. This array will be in the same order as
720 // the global timer names are in the map.
721 Array<std::pair<double, int> > timingsAndCallCounts;
722 timingsAndCallCounts.reserve (numTimers);
723 for (timer_map_t::const_iterator it = globalTimerData.begin();
724 it != globalTimerData.end(); ++it) {
725 timingsAndCallCounts.push_back (it->second);
726 }
727
728 // For each timer name, compute the min timing and its
729 // corresponding call count. If two processes have the same
730 // timing but different call counts, the minimum call count will
731 // be used.
732 Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
733 if (numTimers > 0) {
734 if (ignoreZeroTimers)
735 reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
736 &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
737 else
738 reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
739 &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
740 }
741
742 // For each timer name, compute the max timing and its
743 // corresponding call count. If two processes have the same
744 // timing but different call counts, the minimum call count will
745 // be used.
746 Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
747 if (numTimers > 0) {
748 reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
749 &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
750 }
751
752 // For each timer name, compute the mean-over-processes timing,
753 // the mean call count, and the mean-over-call-counts timing.
754 // The mean call count is reported as a double to allow a
755 // fractional value.
756 //
757 // Each local timing is really the total timing over all local
758 // invocations. The number of local invocations is the call
759 // count. Thus, the mean-over-call-counts timing is the sum of
760 // all the timings (over all processes), divided by the sum of
761 // all the call counts (over all processes). We compute it in a
762 // different way to over unnecessary overflow.
763 Array<double> meanOverCallCountsTimings (numTimers);
764 Array<double> meanOverProcsTimings (numTimers);
765 Array<double> meanCallCounts (numTimers);
766 Array<int> ICallThisTimer (numTimers);
767 Array<int> numProcsCallingEachTimer (numTimers);
768 {
769 // Figure out how many processors actually call each timer.
770 if (ignoreZeroTimers) {
771 for (int k = 0; k < numTimers; ++k) {
772 const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
773 if (callCount > 0) ICallThisTimer[k] = 1;
774 else ICallThisTimer[k] = 0;
775 }
776 if (numTimers > 0) {
777 reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
778 &numProcsCallingEachTimer[0]);
779 }
780 }
781
782 // When summing, first scale by the number of processes. This
783 // avoids unnecessary overflow, and also gives us the mean
784 // call count automatically.
785 Array<double> scaledTimings (numTimers);
786 Array<double> scaledCallCounts (numTimers);
787 const double P = static_cast<double> (numProcs);
788
789 if (ignoreZeroTimers) {
790 for (int k = 0; k < numTimers; ++k) {
791 const double timing = timingsAndCallCounts[k].first;
792 const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
793
794 scaledTimings[k] = timing / numProcsCallingEachTimer[k];
795 scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
796 }
797 }
798 else {
799 for (int k = 0; k < numTimers; ++k) {
800 const double timing = timingsAndCallCounts[k].first;
801 const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
802
803 scaledTimings[k] = timing / P;
804 scaledCallCounts[k] = callCount / P;
805 }
806 }
807
808 if (numTimers > 0) {
809 reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
810 &meanOverProcsTimings[0]);
811 reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
812 &meanCallCounts[0]);
813 }
814 // We don't have to undo the scaling for the mean timings;
815 // just divide by the scaled call count.
816 for (int k = 0; k < numTimers; ++k) {
817 if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
818 meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
819 }
820 else {
821 meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
822 }
823 }
824 }
825
826 // Reformat the data into the map of statistics. Be sure that
827 // each value (the std::vector of (timing, call count) pairs,
828 // each entry of which is a different statistic) preserves the
829 // order of statNames.
830 statNames.resize (4);
831 statNames[0] = "MinOverProcs";
832 statNames[1] = "MeanOverProcs";
833 statNames[2] = "MaxOverProcs";
834 statNames[3] = "MeanOverCallCounts";
835
836 stat_map_type::iterator statIter = statData.end();
837 timer_map_t::const_iterator it = globalTimerData.begin();
838 for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
839 std::vector<std::pair<double, double> > curData (4);
840 curData[0] = minTimingsAndCallCounts[k];
841 curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
842 curData[2] = maxTimingsAndCallCounts[k];
843 curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
844
845 // statIter gives an insertion location hint that makes each
846 // insertion O(1), since we remember the location of the last
847 // insertion.
848 statIter = statData.insert (statIter, std::make_pair (it->first, curData));
849 }
850 }
851
852
869 RCP<const Comm<int> >
870 getDefaultComm ()
871 {
872 // The default communicator. If Trilinos was built with MPI
873 // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet
874 // been initialized, it's not valid to use the communicator!)
875 // Otherwise, this should be a "serial" (no MPI, one "process")
876 // communicator.
877 RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
878
879#ifdef HAVE_MPI
880 {
881 int mpiHasBeenStarted = 0;
882 MPI_Initialized (&mpiHasBeenStarted);
883 if (! mpiHasBeenStarted) {
884 // Make pComm a new "serial communicator."
885 comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
886 }
887 }
888#endif // HAVE_MPI
889 return comm;
890 }
891
892 } // namespace (anonymous)
893
894
895 void
897 std::vector<std::string>& statNames,
898 Ptr<const Comm<int> > comm,
899 const ECounterSetOp setOp,
900 const std::string& filter)
901 {
902 // Collect local timer data and names. Filter out timers with
903 // zero call counts if writeZeroTimers is false. Also, apply the
904 // timer label filter at this point, so we don't have to compute
905 // statistics on timers we don't want to display anyway.
906 timer_map_t localTimerData;
907 Array<std::string> localTimerNames;
908 const bool writeZeroTimers = false;
909 collectLocalTimerDataAndNames (localTimerData, localTimerNames,
910 counters(), writeZeroTimers, filter);
911 // Merge the local timer data and names into global timer data and
912 // names.
913 timer_map_t globalTimerData;
914 Array<std::string> globalTimerNames;
915 const bool alwaysWriteLocal = false;
916 collectGlobalTimerData (globalTimerData, globalTimerNames,
917 localTimerData, localTimerNames,
918 comm, alwaysWriteLocal, setOp);
919 // Compute statistics on the data.
920 computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false);
921 }
922
923
924 void
926 std::ostream& out,
927 const bool alwaysWriteLocal,
928 const bool writeGlobalStats,
929 const bool writeZeroTimers,
930 const ECounterSetOp setOp,
931 const std::string& filter,
932 const bool ignoreZeroTimers)
933 {
934 //
935 // We can't just call computeGlobalTimerStatistics(), since
936 // summarize() has different options that affect whether global
937 // statistics are computed and printed.
938 //
939 const int numProcs = comm->getSize();
940 const int myRank = comm->getRank();
941
942 // Collect local timer data and names. Filter out timers with
943 // zero call counts if writeZeroTimers is false. Also, apply the
944 // timer label filter at this point, so we don't have to compute
945 // statistics on timers we don't want to display anyway.
946 timer_map_t localTimerData;
947 Array<std::string> localTimerNames;
948 collectLocalTimerDataAndNames (localTimerData, localTimerNames,
949 counters(), writeZeroTimers, filter);
950
951 // If we're computing global statistics, merge the local timer
952 // data and names into global timer data and names, and compute
953 // global timer statistics. Otherwise, leave the global data
954 // empty.
955 timer_map_t globalTimerData;
956 Array<std::string> globalTimerNames;
957 stat_map_type statData;
958 std::vector<std::string> statNames;
959 if (writeGlobalStats) {
960 collectGlobalTimerData (globalTimerData, globalTimerNames,
961 localTimerData, localTimerNames,
962 comm, alwaysWriteLocal, setOp);
963 // Compute statistics on the data, but only if the communicator
964 // contains more than one process. Otherwise, statistics don't
965 // make sense and we don't print them (see below).
966 if (numProcs > 1) {
967 computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers);
968 }
969 }
970
971 // Precision of floating-point numbers in the table.
972 const int precision = format().precision();
973 const std::ios_base::fmtflags& flags = out.flags();
974
975 // All columns of the table, in order.
976 Array<TableColumn> tableColumns;
977
978 // Labels of all the columns of the table.
979 // We will append to this when we add each column.
980 Array<std::string> titles;
981
982 // Widths (in number of characters) of each column.
983 // We will append to this when we add each column.
984 Array<int> columnWidths;
985
986 // Table column containing all timer names. If writeGlobalStats
987 // is true, we use the global timer names, otherwise we use the
988 // local timer names. We build the table on all processes
989 // redundantly, but only print on Rank 0.
990 {
991 titles.append ("Timer Name");
992
993 // The column labels depend on whether we are computing global statistics.
994 TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
995 tableColumns.append (nameCol);
996
997 // Each column is as wide as it needs to be to hold both its
998 // title and all of the column data. This column's title is the
999 // current last entry of the titles array.
1000 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
1001 }
1002
1003 // Table column containing local timer stats, if applicable. We
1004 // only write local stats if asked, only on MPI Proc 0, and only
1005 // if there is more than one MPI process in the communicator
1006 // (otherwise local stats == global stats, so we just print the
1007 // global stats). In this case, we've padded the local data on
1008 // Proc 0 if necessary to match the global timer list, so that the
1009 // columns have the same number of rows.
1010 if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
1011 titles.append ("Local time (num calls)");
1012
1013 // Copy local timer data out of the array-of-structs into
1014 // separate arrays, for display in the table.
1015 Array<double> localTimings;
1016 Array<double> localNumCalls;
1017 for (timer_map_t::const_iterator it = localTimerData.begin();
1018 it != localTimerData.end(); ++it) {
1019 localTimings.push_back (it->second.first);
1020 localNumCalls.push_back (static_cast<double> (it->second.second));
1021 }
1022 TableColumn timeAndCalls (localTimings, localNumCalls, precision, flags, true);
1023 tableColumns.append (timeAndCalls);
1024 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1025 }
1026
1027 if (writeGlobalStats) {
1028 // If there's only 1 process in the communicator, don't display
1029 // statistics; statistics don't make sense in that case. Just
1030 // display the timings and call counts. If there's more than 1
1031 // process, do display statistics.
1032 if (numProcs == 1) {
1033 // Extract timings and the call counts from globalTimerData.
1034 Array<double> globalTimings;
1035 Array<double> globalNumCalls;
1036 for (timer_map_t::const_iterator it = globalTimerData.begin();
1037 it != globalTimerData.end(); ++it) {
1038 globalTimings.push_back (it->second.first);
1039 globalNumCalls.push_back (static_cast<double> (it->second.second));
1040 }
1041 // Print the table column.
1042 titles.append ("Global time (num calls)");
1043 TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, flags, true);
1044 tableColumns.append (timeAndCalls);
1045 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1046 }
1047 else { // numProcs > 1
1048 // Print a table column for each statistic. statNames and
1049 // each value in statData use the same ordering, so we can
1050 // iterate over valid indices of statNames to display the
1051 // statistics in the right order.
1052 const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
1053 for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
1054 // Extract lists of timings and their call counts for the
1055 // current statistic.
1056 Array<double> statTimings (numGlobalTimers);
1057 Array<double> statCallCounts (numGlobalTimers);
1058 stat_map_type::const_iterator it = statData.begin();
1059 for (int k = 0; it != statData.end(); ++it, ++k) {
1060 statTimings[k] = (it->second[statInd]).first;
1061 statCallCounts[k] = (it->second[statInd]).second;
1062 }
1063 // Print the table column.
1064 const std::string& statisticName = statNames[statInd];
1065 const std::string titleString = statisticName;
1066 titles.append (titleString);
1067 TableColumn timeAndCalls (statTimings, statCallCounts, precision, flags, true);
1068 tableColumns.append (timeAndCalls);
1069 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1070 }
1071 }
1072 }
1073
1074 // Print the whole table to the given output stream on MPI Rank 0.
1075 format().setColumnWidths (columnWidths);
1076 if (myRank == 0) {
1077 std::ostringstream theTitle;
1078 theTitle << "TimeMonitor results over " << numProcs << " processor"
1079 << (numProcs > 1 ? "s" : "");
1080 format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
1081 }
1082 }
1083
1084 void
1085 TimeMonitor::summarize (std::ostream &out,
1086 const bool alwaysWriteLocal,
1087 const bool writeGlobalStats,
1088 const bool writeZeroTimers,
1089 const ECounterSetOp setOp,
1090 const std::string& filter,
1091 const bool ignoreZeroTimers)
1092 {
1093 // The default communicator. If Trilinos was built with MPI
1094 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1095 // be a "serial" (no MPI, one "process") communicator.
1096 RCP<const Comm<int> > comm = getDefaultComm();
1097
1098 summarize (comm.ptr(), out, alwaysWriteLocal,
1099 writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers);
1100 }
1101
1102 void
1104 std::vector<std::string>& statNames,
1105 const ECounterSetOp setOp,
1106 const std::string& filter)
1107 {
1108 // The default communicator. If Trilinos was built with MPI
1109 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1110 // be a "serial" (no MPI, one "process") communicator.
1111 RCP<const Comm<int> > comm = getDefaultComm();
1112
1113 computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
1114 }
1115
1116 SyncTimeMonitor::SyncTimeMonitor(Time& timer, Ptr<const Comm<int> > comm, bool reset)
1117 : TimeMonitor(timer, reset), comm_(comm)
1118 { }
1119
1121 comm_->barrier();
1122 }
1123
1124
1125 namespace {
1149 std::string
1150 quoteLabelForYaml (const std::string& label)
1151 {
1152 // YAML allows empty keys in key: value pairs. See Section 7.2
1153 // of the YAML 1.2 spec. We thus let an empty label pass
1154 // through without quoting or other special treatment.
1155 if (label.empty ()) {
1156 return label;
1157 }
1158
1159 // Check whether the label is already quoted. If so, we don't
1160 // need to quote it again. However, we do need to quote any
1161 // quote symbols in the string inside the outer quotes.
1162 const bool alreadyQuoted = label.size () >= 2 &&
1163 label[0] == '"' && label[label.size() - 1] == '"';
1164
1165 // We need to quote if there are any colons or (inner) quotes in
1166 // the string. We'll determine this as we read through the
1167 // string and escape any characters that need escaping.
1168 bool needToQuote = false;
1169
1170 std::string out; // To fill with the return value
1171 out.reserve (label.size ());
1172
1173 const size_t startPos = alreadyQuoted ? 1 : 0;
1174 const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
1175 for (size_t i = startPos; i < endPos; ++i) {
1176 const char c = label[i];
1177 if (c == '"' || c == '\\') {
1178 out.push_back ('\\'); // Escape the quote or backslash.
1179 needToQuote = true;
1180 }
1181 else if (c == ':') {
1182 needToQuote = true;
1183 }
1184 out.push_back (c);
1185 }
1186
1187 if (needToQuote || alreadyQuoted) {
1188 // If the input string was already quoted, then out doesn't
1189 // include its quotes, so we have to add them back in.
1190 return "\"" + out + "\"";
1191 }
1192 else {
1193 return out;
1194 }
1195 }
1196
1197 } // namespace (anonymous)
1198
1199
1200 void TimeMonitor::
1201 summarizeToYaml (Ptr<const Comm<int> > comm,
1202 std::ostream &out,
1203 const ETimeMonitorYamlFormat yamlStyle,
1204 const std::string& filter)
1205 {
1207 using Teuchos::fancyOStream;
1208 using Teuchos::getFancyOStream;
1209 using Teuchos::OSTab;
1210 using Teuchos::RCP;
1211 using Teuchos::rcpFromRef;
1212 using std::endl;
1213 typedef std::vector<std::string>::size_type size_type;
1214
1215 const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
1216
1217 // const bool writeGlobalStats = true;
1218 // const bool writeZeroTimers = true;
1219 // const bool alwaysWriteLocal = false;
1220 const ECounterSetOp setOp = Intersection;
1221
1222 stat_map_type statData;
1223 std::vector<std::string> statNames;
1224 computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
1225
1226 const int numProcs = comm->getSize();
1227
1228 // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
1229 // " as the line prefix does not work, else I would prefer that
1230 // method for printing each line of a YAML block sequence (see
1231 // Section 8.2.1 of the YAML 1.2 spec).
1232 //
1233 // Also, I have to set the tab indent string here, rather than in
1234 // OSTab's constructor. This is because line prefix (which for
1235 // some reason is what OSTab's constructor takes, rather than tab
1236 // indent string) means something different from tab indent
1237 // string, and turning on the line prefix prints all sorts of
1238 // things including "|" for some reason.
1239 RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
1240 pfout->setTabIndentStr (" ");
1241 FancyOStream& fout = *pfout;
1242
1243 fout << "# Teuchos::TimeMonitor report" << endl
1244 << "---" << endl;
1245
1246 // mfh 19 Aug 2012: An important goal of our chosen output format
1247 // was to minimize the nesting depth. We have managed to keep the
1248 // nesting depth to 3, which is the limit that the current version
1249 // of PylotDB imposes for its YAML input.
1250
1251 // Outermost level is a dictionary. (Individual entries of a
1252 // dictionary do _not_ begin with "- ".) We always print the
1253 // outermost level in standard style, not flow style, for better
1254 // readability. We begin the outermost level with metadata.
1255 fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
1256 << "Number of processes: " << numProcs << endl
1257 << "Time unit: s" << endl;
1258 // For a key: value pair where the value is a sequence or
1259 // dictionary on the following line, YAML requires a space after
1260 // the colon.
1261 fout << "Statistics collected: ";
1262 // Print list of the names of all the statistics we collected.
1263 if (compact) {
1264 fout << " [";
1265 for (size_type i = 0; i < statNames.size (); ++i) {
1266 fout << quoteLabelForYaml (statNames[i]);
1267 if (i + 1 < statNames.size ()) {
1268 fout << ", ";
1269 }
1270 }
1271 fout << "]" << endl;
1272 }
1273 else {
1274 fout << endl;
1275 OSTab tab1 (pfout);
1276 for (size_type i = 0; i < statNames.size (); ++i) {
1277 fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
1278 }
1279 }
1280
1281 // Print the list of timer names.
1282 //
1283 // It might be nicer instead to print a map from timer name to all
1284 // of its data, but keeping the maximum nesting depth small
1285 // ensures better compatibility with different parsing tools.
1286 fout << "Timer names: ";
1287 if (compact) {
1288 fout << " [";
1289 size_type ind = 0;
1290 for (stat_map_type::const_iterator it = statData.begin();
1291 it != statData.end(); ++it, ++ind) {
1292 fout << quoteLabelForYaml (it->first);
1293 if (ind + 1 < statData.size ()) {
1294 fout << ", ";
1295 }
1296 }
1297 fout << "]" << endl;
1298 }
1299 else {
1300 fout << endl;
1301 OSTab tab1 (pfout);
1302 for (stat_map_type::const_iterator it = statData.begin();
1303 it != statData.end(); ++it) {
1304 fout << "- " << quoteLabelForYaml (it->first) << endl;
1305 }
1306 }
1307
1308 // Print times for each timer, as a map from statistic name to its time.
1309 fout << "Total times: ";
1310 if (compact) {
1311 fout << " {";
1312 size_type outerInd = 0;
1313 for (stat_map_type::const_iterator outerIter = statData.begin();
1314 outerIter != statData.end(); ++outerIter, ++outerInd) {
1315 // Print timer name.
1316 fout << quoteLabelForYaml (outerIter->first) << ": ";
1317 // Print that timer's data.
1318 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1319 fout << "{";
1320 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1321 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1322 << curData[innerInd].first;
1323 if (innerInd + 1 < curData.size ()) {
1324 fout << ", ";
1325 }
1326 }
1327 fout << "}";
1328 if (outerInd + 1 < statData.size ()) {
1329 fout << ", ";
1330 }
1331 }
1332 fout << "}" << endl;
1333 }
1334 else {
1335 fout << endl;
1336 OSTab tab1 (pfout);
1337 size_type outerInd = 0;
1338 for (stat_map_type::const_iterator outerIter = statData.begin();
1339 outerIter != statData.end(); ++outerIter, ++outerInd) {
1340 // Print timer name.
1341 fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1342 // Print that timer's data.
1343 OSTab tab2 (pfout);
1344 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1345 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1346 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1347 << curData[innerInd].first << endl;
1348 }
1349 }
1350 }
1351
1352 // Print call counts for each timer, for each statistic name.
1353 fout << "Call counts:";
1354 if (compact) {
1355 fout << " {";
1356 size_type outerInd = 0;
1357 for (stat_map_type::const_iterator outerIter = statData.begin();
1358 outerIter != statData.end(); ++outerIter, ++outerInd) {
1359 // Print timer name.
1360 fout << quoteLabelForYaml (outerIter->first) << ": ";
1361 // Print that timer's data.
1362 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1363 fout << "{";
1364 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1365 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1366 << curData[innerInd].second;
1367 if (innerInd + 1 < curData.size ()) {
1368 fout << ", ";
1369 }
1370 }
1371 fout << "}";
1372 if (outerInd + 1 < statData.size ()) {
1373 fout << ", ";
1374 }
1375 }
1376 fout << "}" << endl;
1377 }
1378 else {
1379 fout << endl;
1380 OSTab tab1 (pfout);
1381 size_type outerInd = 0;
1382 for (stat_map_type::const_iterator outerIter = statData.begin();
1383 outerIter != statData.end(); ++outerIter, ++outerInd) {
1384 // Print timer name.
1385 fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1386 // Print that timer's data.
1387 OSTab tab2 (pfout);
1388 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1389 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1390 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1391 << curData[innerInd].second << endl;
1392 }
1393 }
1394 }
1395 }
1396
1397 void TimeMonitor::
1398 summarizeToYaml (std::ostream &out,
1399 const ETimeMonitorYamlFormat yamlStyle,
1400 const std::string& filter)
1401 {
1402 // The default communicator. If Trilinos was built with MPI
1403 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1404 // be a "serial" (no MPI, one "process") communicator.
1405 RCP<const Comm<int> > comm = getDefaultComm ();
1406
1407 summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
1408 }
1409
1410 // Default value is false. We'll set to true once
1411 // setReportParameters() completes successfully.
1412 bool TimeMonitor::setParams_ = false;
1413
1414 // We have to declare all of these here in order to avoid linker errors.
1415 TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
1416 TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
1417 ECounterSetOp TimeMonitor::setOp_ = Intersection;
1418 bool TimeMonitor::alwaysWriteLocal_ = false;
1419 bool TimeMonitor::writeGlobalStats_ = true;
1420 bool TimeMonitor::writeZeroTimers_ = true;
1421
1422 void
1423 TimeMonitor::setReportFormatParameter (ParameterList& plist)
1424 {
1425 const std::string name ("Report format");
1426 const std::string defaultValue ("Table");
1427 const std::string docString ("Output format for report of timer statistics");
1428 Array<std::string> strings;
1429 Array<std::string> docs;
1430 Array<ETimeMonitorReportFormat> values;
1431
1432 strings.push_back ("YAML");
1433 docs.push_back ("YAML (see yaml.org) format");
1434 values.push_back (REPORT_FORMAT_YAML);
1435 strings.push_back ("Table");
1436 docs.push_back ("Tabular format via Teuchos::TableFormat");
1437 values.push_back (REPORT_FORMAT_TABLE);
1438
1439 setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
1440 docString,
1441 strings (), docs (),
1442 values (), &plist);
1443 }
1444
1445 void
1446 TimeMonitor::setYamlFormatParameter (ParameterList& plist)
1447 {
1448 const std::string name ("YAML style");
1449 const std::string defaultValue ("spacious");
1450 const std::string docString ("YAML-specific output format");
1451 Array<std::string> strings;
1452 Array<std::string> docs;
1453 Array<ETimeMonitorYamlFormat> values;
1454
1455 strings.push_back ("compact");
1456 docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
1457 "yaml.org) for most sequences except the outermost sequence");
1458 values.push_back (YAML_FORMAT_COMPACT);
1459
1460 strings.push_back ("spacious");
1461 docs.push_back ("Spacious format: avoid flow style");
1462 values.push_back (YAML_FORMAT_SPACIOUS);
1463
1464 setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
1465 docString,
1466 strings (), docs (),
1467 values (), &plist);
1468 }
1469
1470 void
1471 TimeMonitor::setSetOpParameter (ParameterList& plist)
1472 {
1473 const std::string name ("How to merge timer sets");
1474 const std::string defaultValue ("Intersection");
1475 const std::string docString ("How to merge differing sets of timers "
1476 "across processes");
1477 Array<std::string> strings;
1478 Array<std::string> docs;
1479 Array<ECounterSetOp> values;
1480
1481 strings.push_back ("Intersection");
1482 docs.push_back ("Compute intersection of timer sets over processes");
1483 values.push_back (Intersection);
1484 strings.push_back ("Union");
1485 docs.push_back ("Compute union of timer sets over processes");
1486 values.push_back (Union);
1487
1488 setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
1489 strings (), docs (), values (),
1490 &plist);
1491 }
1492
1493 void
1495 {
1496 stackedTimer_ = t;
1497 }
1498
1501 {
1502 return stackedTimer_;
1503 }
1504
1507 {
1508 // Our implementation favors recomputation over persistent
1509 // storage. That is, we simply recreate the list every time we
1510 // need it.
1511 RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
1512
1513 const bool alwaysWriteLocal = false;
1514 const bool writeGlobalStats = true;
1515 const bool writeZeroTimers = true;
1516
1517 setReportFormatParameter (*plist);
1518 setYamlFormatParameter (*plist);
1519 setSetOpParameter (*plist);
1520 plist->set ("alwaysWriteLocal", alwaysWriteLocal,
1521 "Always output local timers' values on Proc 0");
1522 plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
1523 "statistics, even if there is only one process in the "
1524 "communicator");
1525 plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
1526 "timers that have never been called");
1527
1528 return rcp_const_cast<const ParameterList> (plist);
1529 }
1530
1531 void
1532 TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
1533 {
1534 ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
1535 ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
1536 ECounterSetOp setOp = Intersection;
1537 bool alwaysWriteLocal = false;
1538 bool writeGlobalStats = true;
1539 bool writeZeroTimers = true;
1540
1541 if (params.is_null ()) {
1542 // If we've set parameters before, leave their current values.
1543 // Otherwise, set defaults (below).
1544 if (setParams_) {
1545 return;
1546 }
1547 }
1548 else { // params is nonnull. Let's read it!
1549 params->validateParametersAndSetDefaults (*getValidReportParameters ());
1550
1551 reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
1552 yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
1553 setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
1554 alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
1555 writeGlobalStats = params->get<bool> ("writeGlobalStats");
1556 writeZeroTimers = params->get<bool> ("writeZeroTimers");
1557 }
1558 // Defer setting state until here, to ensure the strong exception
1559 // guarantee for this method (either it throws with no externally
1560 // visible state changes, or it returns normally).
1561 reportFormat_ = reportFormat;
1562 yamlStyle_ = yamlStyle;
1563 setOp_ = setOp;
1564 alwaysWriteLocal_ = alwaysWriteLocal;
1565 writeGlobalStats_ = writeGlobalStats;
1566 writeZeroTimers_ = writeZeroTimers;
1567
1568 setParams_ = true; // Yay, we successfully set parameters!
1569 }
1570
1571 void
1573 std::ostream& out,
1574 const std::string& filter,
1575 const RCP<ParameterList>& params)
1576 {
1577 setReportParameters (params);
1578
1579 if (reportFormat_ == REPORT_FORMAT_YAML) {
1580 summarizeToYaml (comm, out, yamlStyle_, filter);
1581 }
1582 else if (reportFormat_ == REPORT_FORMAT_TABLE) {
1583 summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
1584 writeZeroTimers_, setOp_, filter);
1585 }
1586 else {
1587 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
1588 "Invalid report format. This should never happen; ParameterList "
1589 "validation should have caught this. Please report this bug to the "
1590 "Teuchos developers.");
1591 }
1592 }
1593
1594 void
1596 std::ostream& out,
1597 const RCP<ParameterList>& params)
1598 {
1599 report (comm, out, "", params);
1600 }
1601
1602 void
1603 TimeMonitor::report (std::ostream& out,
1604 const std::string& filter,
1605 const RCP<ParameterList>& params)
1606 {
1607 RCP<const Comm<int> > comm = getDefaultComm ();
1608 report (comm.ptr (), out, filter, params);
1609 }
1610
1611 void
1612 TimeMonitor::report (std::ostream& out,
1613 const RCP<ParameterList>& params)
1614 {
1615 RCP<const Comm<int> > comm = getDefaultComm ();
1616 report (comm.ptr (), out, "", params);
1617 }
1618
1619} // namespace Teuchos
Defines basic traits for the scalar field type.
A column of TableEntry objects.
Provides utilities for formatting tabular output.
Scope guard for Teuchos::Time, with MPI collective timer reporting.
Replacement for std::vector that is compatible with the Teuchos Memory Management classes.
reference back()
void push_back(const value_type &x)
std::vector< T >::const_iterator const_iterator
The type of a const forward iterator.
Array< T > & append(const T &x)
Add a new entry at the end of the array.
Abstract interface for distributed-memory communication.
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
Teuchos version of MPI_MAXLOC.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
same as MinLoc, but don't allow zero
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
Teuchos version of MPI_MINLOC.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
Common capabilities for collecting and reporting performance data across processors.
static TableFormat & format()
Table format that will be used to print a summary of timer results.
static std::map< std::string, RCP< Time > > & counters()
Array of all counters that were created with getNewCounter() on the calling (MPI) process.
static RCP< Time > lookupCounter(const std::string &name)
Return the first counter with the given name, or null if none.
const Time & counter() const
Constant access to the instance's counter reference.
bool isRecursiveCall() const
Whether we are currently in a recursive call of the counter.
Simple wrapper class for raw pointers to single objects where no persisting relationship exists.
Smart reference counting pointer class for automatic garbage collection.
bool is_null() const
Returns true if the underlying pointer is null.
Ptr< T > ptr() const
Get a safer wrapper raw C++ pointer to the underlying object.
T * get() const
Get the raw C++ pointer to the underlying object.
This class allows one to push and pop timers on and off a stack.
void start(const std::string name, const bool push_kokkos_profiling_region=true)
void stop(const std::string &name, const bool pop_kokkos_profiling_region=true)
~SyncTimeMonitor() override
Destructor: stops the timer.
SyncTimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
void setColumnWidths(const Array< int > &colWidths)
Set the column widths to be used for subsequent rows.
int precision() const
Get the precision for writing doubles. Default is 4.
void writeWholeTable(std::ostream &out, const std::string &tableTitle, const Array< std::string > &columnNames, const Array< TableColumn > &columns) const
Scope guard for Time, that can compute MPI collective timer statistics.
~TimeMonitor() override
Destructor: stops the timer.
static void computeGlobalTimerStatistics(stat_map_type &statData, std::vector< std::string > &statNames, Ptr< const Comm< int > > comm, const ECounterSetOp setOp=Intersection, const std::string &filter="")
Compute global timer statistics for all timers on the given communicator.
static Teuchos::RCP< Teuchos::StackedTimer > getStackedTimer()
The StackedTimer used by the TimeMonitor.
static void enableTimer(const std::string &name)
Enable the timer with the given name.
static void zeroOutTimers()
Reset all global timers to zero.
static void setStackedTimer(const Teuchos::RCP< Teuchos::StackedTimer > &t)
Sets the StackedTimer into which the TimeMonitor will insert timings.
static void report(Ptr< const Comm< int > > comm, std::ostream &out, const std::string &filter, const RCP< ParameterList > &params=null)
Report timer statistics to the given output stream.
static void summarize(Ptr< const Comm< int > > comm, std::ostream &out=std::cout, const bool alwaysWriteLocal=false, const bool writeGlobalStats=true, const bool writeZeroTimers=true, const ECounterSetOp setOp=Intersection, const std::string &filter="", const bool ignoreZeroTimers=false)
Print summary statistics for all timers on the given communicator.
static Teuchos::RCP< Teuchos::StackedTimer > stackedTimer_
Stacked timer for optional injection of timing from TimeMonitor-enabled objects.
static RCP< const ParameterList > getValidReportParameters()
Default parameters (with validators) for report().
static void disableTimer(const std::string &name)
Disable the timer with the given name.
TimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
Wall-clock timer.
const std::string & name() const
The name of this timer.
void start(bool reset=false)
Start the timer, if the timer is enabled (see disable()).
double stop()
Stop the timer, if the timer is enabled (see disable()).
Base interface class for user-defined reduction operations for objects that use value semantics.
std::ostream subclass that performs the magic of indenting data sent to an std::ostream object among ...
Tabbing class for helping to create formated, indented output for a basic_FancyOStream object.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
bool nonnull(const std::shared_ptr< T > &p)
Returns true if p.get()!=NULL.
basic_FancyOStream< char > FancyOStream
basic_OSTab< char > OSTab
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
ECounterSetOp
Set operation type for mergeCounterNames() to perform.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
void mergeCounterNames(const Comm< int > &comm, const Array< std::string > &localNames, Array< std::string > &globalNames, const ECounterSetOp setOp)
Merge counter names over all processors.
std::map< std::string, std::vector< std::pair< double, double > > > stat_map_type
Global statistics collected from timer data.
This structure defines some basic traits for a scalar field type.
static T zero()
Returns representation of zero for this scalar type.