DNDSR 0.1.0.dev1+gcd065ad
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
Profiling.hpp
Go to the documentation of this file.
1#pragma once
2/// @file Profiling.hpp
3/// @brief Wall-clock performance timer and running scalar statistics utilities.
4
5#include "Defines.hpp"
6#include "MPI.hpp"
7
8namespace DNDS
9{
10 /**
11 * @brief Process-wide singleton aggregating wall-clock timings by category.
12 *
13 * @details Provides a fixed set of named timer slots (RHS, Comm, LinSolve, ...)
14 * that callers start / stop at well-known phases of the solver. Calls are
15 * expected to nest correctly (each @ref StartTimer matched by a @ref StopTimer).
16 *
17 * The buffer holds two copies of each slot (`Ntype_All == 2 * Ntype`) so
18 * current and previous-iteration timings can be retained for reporting.
19 *
20 * Thread-safe C++11 singleton; not reentrant for the same timer id.
21 */
22 class PerformanceTimer // cxx11 + thread-safe singleton
23 {
24 public:
25 /// @brief Named timer slots. New categories can be added before `__EndTimerType`.
27 {
29 RHS = 1, ///< Total RHS evaluation.
30 Dt = 2, ///< Time-step computation.
31 Reconstruction = 3, ///< Variational reconstruction.
32 ReconstructionCR = 4,///< CR (compact reconstruction) branch.
33 Limiter = 5, ///< Slope / variable limiter.
34 LimiterA = 6, ///< Limiter sub-phase A.
35 LimiterB = 7, ///< Limiter sub-phase B.
36 Basis = 8, ///< Basis-function evaluation.
37 Comm = 9, ///< Catch-all MPI comm.
38 Comm1 = 10, ///< Comm phase 1 (e.g., cell-ghost).
39 Comm2 = 11, ///< Comm phase 2 (e.g., face-ghost).
40 Comm3 = 12, ///< Comm phase 3.
41 LinSolve = 13, ///< Linear solve (total).
42 LinSolve1 = 14, ///< Linear solve phase 1.
43 LinSolve2 = 15, ///< Linear solve phase 2.
44 LinSolve3 = 16, ///< Linear solve phase 3.
45 Positivity = 17, ///< Positivity preservation.
46 PositivityOuter = 18,///< Outer-iteration positivity.
47 __EndTimerType = 64 ///< One past the last valid id.
48 };
49
50 static const int Ntype = __EndTimerType;
51 static const int Ntype_Past = 64;
52 static const int Ntype_All = Ntype + Ntype_Past;
53
54 private:
55 std::array<real, Ntype_All> timer = {0};
56 std::array<real, Ntype_All> tStart;
57 PerformanceTimer() = default;
59 PerformanceTimer &operator=(const PerformanceTimer &);
60
61 public:
62 /// @brief Access the process-wide singleton.
63 static PerformanceTimer &Instance();
64 /// @brief Record the current wall time in the "start" slot for timer `t`.
65 void StartTimer(TimerType t);
66 /// @brief Integer-id overload of @ref StartTimer.
67 void StartTimer(int t);
68 /// @brief Add (now - start) to the accumulated time for timer `t`.
69 void StopTimer(TimerType t);
70 /// @brief Integer-id overload of @ref StopTimer.
71 void StopTimer(int t);
72 /// @brief Current local (this-rank) accumulated wall time (seconds).
74 real getTimer(int t);
75 /// @brief Global maximum across ranks (collective on `mpi.comm`).
77 real getTimerCollective(int t, const MPIInfo &mpi);
78 /// @brief Either #getTimerCollective (when `col == true`) or #getTimer.
79 template <typename T>
80 real getTimerColOrLoc(T t, const MPIInfo &mpi, bool col)
81 {
82 return col ? getTimerCollective(t, mpi) : getTimer(t);
83 }
84 /// @brief Zero the accumulated time for one timer slot.
85 void clearTimer(TimerType t);
86 void clearTimer(int t);
87 /// @brief Zero every timer slot.
88 void clearAllTimer();
89 };
90
91 /**
92 * @brief Running-statistics accumulator using Welford's online algorithm.
93 *
94 * @details Updates mean and variance in a single pass without storing the
95 * sample history. Numerically stable for long sequences. Used to report
96 * residual / CFL / iteration-count statistics during solver runs.
97 */
99 {
100 real average = 0;
101 index count = 0;
102 real sigmaS = 0;
103
104 public:
105 /// @brief Reset counts and moments to zero.
106 void clear()
107 {
108 average = 0;
109 count = 0;
110 sigmaS = 0;
111 }
112 /// @brief Incorporate a new sample `v`.
114 {
115 count++;
116 real newAverage = average + (v - average) / real(count);
117 sigmaS += ((v - newAverage) * (v - average) - sigmaS) / real(count);
118 average = newAverage;
119 return *this;
120 }
121
122 /// @brief `(mean, stddev)` pair for the samples so far.
123 [[nodiscard]] std::tuple<real, real> get()
124 {
125 return std::make_tuple(average, std::sqrt(std::max(0., sigmaS)));
126 }
127
128 /// @brief Total sum of the samples (reconstructed from the running mean).
129 [[nodiscard]] real getSum() const
130 {
131 return average * real(count);
132 }
133 };
134
135 /// @brief Short-hand accessor to the @ref DNDS::PerformanceTimer "PerformanceTimer" singleton.
137 {
139 }
140
141}
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
MPI wrappers: MPIInfo, collective operations, type mapping, CommStrategy.
Process-wide singleton aggregating wall-clock timings by category.
Definition Profiling.hpp:23
static const int Ntype_Past
Definition Profiling.hpp:51
void clearAllTimer()
Zero every timer slot.
Definition Profiling.cpp:69
static const int Ntype
Definition Profiling.hpp:50
TimerType
Named timer slots. New categories can be added before __EndTimerType.
Definition Profiling.hpp:27
@ LinSolve3
Linear solve phase 3.
Definition Profiling.hpp:44
@ Positivity
Positivity preservation.
Definition Profiling.hpp:45
@ ReconstructionCR
CR (compact reconstruction) branch.
Definition Profiling.hpp:32
@ LinSolve1
Linear solve phase 1.
Definition Profiling.hpp:42
@ Limiter
Slope / variable limiter.
Definition Profiling.hpp:33
@ Comm3
Comm phase 3.
Definition Profiling.hpp:40
@ PositivityOuter
Outer-iteration positivity.
Definition Profiling.hpp:46
@ LimiterA
Limiter sub-phase A.
Definition Profiling.hpp:34
@ Comm2
Comm phase 2 (e.g., face-ghost).
Definition Profiling.hpp:39
@ Comm1
Comm phase 1 (e.g., cell-ghost).
Definition Profiling.hpp:38
@ Comm
Catch-all MPI comm.
Definition Profiling.hpp:37
@ LinSolve2
Linear solve phase 2.
Definition Profiling.hpp:43
@ Dt
Time-step computation.
Definition Profiling.hpp:30
@ __EndTimerType
One past the last valid id.
Definition Profiling.hpp:47
@ Basis
Basis-function evaluation.
Definition Profiling.hpp:36
@ LinSolve
Linear solve (total).
Definition Profiling.hpp:41
@ Reconstruction
Variational reconstruction.
Definition Profiling.hpp:31
@ LimiterB
Limiter sub-phase B.
Definition Profiling.hpp:35
@ RHS
Total RHS evaluation.
Definition Profiling.hpp:29
static PerformanceTimer & Instance()
Access the process-wide singleton.
Definition Profiling.cpp:9
void clearTimer(TimerType t)
Zero the accumulated time for one timer slot.
Definition Profiling.cpp:59
void StartTimer(TimerType t)
Record the current wall time in the "start" slot for timer t.
Definition Profiling.cpp:15
real getTimerCollective(TimerType t, const MPIInfo &mpi)
Global maximum across ranks (collective on mpi.comm).
Definition Profiling.cpp:45
static const int Ntype_All
Definition Profiling.hpp:52
void StopTimer(TimerType t)
Add (now - start) to the accumulated time for timer t.
Definition Profiling.cpp:25
real getTimer(TimerType t)
Current local (this-rank) accumulated wall time (seconds).
Definition Profiling.cpp:35
real getTimerColOrLoc(T t, const MPIInfo &mpi, bool col)
Either getTimerCollective (when col == true) or getTimer.
Definition Profiling.hpp:80
Running-statistics accumulator using Welford's online algorithm.
Definition Profiling.hpp:99
std::tuple< real, real > get()
(mean, stddev) pair for the samples so far.
void clear()
Reset counts and moments to zero.
ScalarStatistics & update(real v)
Incorporate a new sample v.
real getSum() const
Total sum of the samples (reconstructed from the running mean).
the host side operators are provided as implemented
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:107
double real
Canonical floating-point scalar used throughout DNDSR (double precision).
Definition Defines.hpp:105
PerformanceTimer & Timer()
Short-hand accessor to the PerformanceTimer singleton.
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:215
Eigen::Matrix< real, 5, 1 > v