DNDSR 0.1.0.dev1+gcd065ad
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
EulerP_Evaluator.cpp
Go to the documentation of this file.
1/** @file EulerP_Evaluator.cpp
2 * @brief Host-side implementation of EulerP Evaluator methods.
3 *
4 * Provides the constructor for EvaluatorConfig (with default JSON settings),
5 * configuration validation, VTK-HDF5 output, and the dispatch methods for each
6 * evaluation stage. Each dispatch method validates arguments, detects the active
7 * device backend, constructs the corresponding Evaluator_impl<B> args, and calls
8 * the backend-specific kernel methods.
9 */
10#include "EulerP_Evaluator.hpp"
12#include "DNDS/Errors.hpp"
14#include "EulerP/EulerP.hpp"
16#include <type_traits>
17
18namespace DNDS::EulerP
19{
20
21 /** @brief Initializes EvaluatorConfig with default JSON values.
22 *
23 * Default settings:
24 * - threadsPerBlock: 128 (CUDA block size)
25 * - pullAllInputArgs: true (wait for all MPI pulls before kernel dispatch)
26 * - serializeCUDAExecution: false (allow concurrent CUDA operations)
27 */
29 {
30 auto &c = this->config_data = t_jsonconfig::object();
31 c["threadsPerBlock"] = 128;
32 c["pullAllInputArgs"] = true;
33 c["serializeCUDAExecution"] = false;
34 }
35
36 /**
37 * @brief Recursively validates that all keys in the user config exist in the defaults.
38 *
39 * Walks both the user-provided and default JSON objects in parallel. If any key in
40 * the user config does not exist in the defaults, throws a std::runtime_error with
41 * the offending key path.
42 *
43 * @param config_in User-provided JSON configuration patch to validate.
44 */
46 {
47 auto validate_keys_run = [](const t_jsonconfig &user, const t_jsonconfig &def, const std::string &path, auto &&validate_keys_runF)
48 {
49 if (!user.is_object() || !def.is_object())
50 return;
51 for (const auto &item : user.items())
52 {
53 const std::string &key = item.key();
54 const t_jsonconfig &uval = item.value();
55 if (!def.contains(key))
56 throw std::runtime_error(
57 "Invalid configuration key at path '" + path + key + "'");
58
59 const t_jsonconfig &dval = def.at(key);
60
61 // If both sides are objects, recurse
62 if (uval.is_object() && dval.is_object())
63 validate_keys_runF(uval, dval, path + key + ".", validate_keys_runF);
64 }
65 };
66 validate_keys_run(config_in, config_data, "", validate_keys_run);
67 }
68
69 /**
70 * @brief Writes cell-centered and node-centered field data to a parallel VTK-HDF5 file.
71 *
72 * Prepends primitive variable fields (density as scalar, velocity as vector) if
73 * uPrimCell / uPrimNode are provided, then appends all user-specified scalar and
74 * vector field arrays. Validates array sizes against the mesh topology before output.
75 * Uses a duplicated MPI communicator for thread-safe HDF5 I/O.
76 */
77 void Evaluator::PrintDataVTKHDF(std::string fname, std::string series_name,
78 std::vector<ssp<TUScalar>> &arrCellCentScalar,
79 const std::vector<std::string> &arrCellCentScalar_names_in,
80 std::vector<ssp<TUVec>> &arrCellCentVec,
81 const std::vector<std::string> &arrCellCentVec_names_in,
82 std::vector<ssp<TUScalar>> &arrNodeScalar,
83 const std::vector<std::string> &arrNodeScalar_names_in,
84 std::vector<ssp<TUVec>> &arrNodeVec,
85 const std::vector<std::string> &arrNodeVec_names_in,
88 double t)
89 {
90 MPI_Comm commDup = MPI_COMM_NULL;
91 MPI_Comm_dup(fv->mesh->getMPI().comm, &commDup);
96 std::vector<std::string> arrCellCentScalar_names;
97 std::vector<std::string> arrCellCentVec_names;
98 std::vector<std::string> arrNodeScalar_names;
99 std::vector<std::string> arrNodeVec_names;
101 int arrCellCentVec_offset = 0;
102 int arrNodeScalar_offset = 0;
103 int arrNodeVec_offset = 0;
104 if (uPrimCell)
105 {
106 arrCellCentScalar_names.emplace_back("R");
107 arrCellCentVec_names.emplace_back("velo");
110 }
111
112 if (uPrimNode)
113 {
114 arrNodeScalar_names.emplace_back("R");
115 arrNodeVec_names.emplace_back("velo");
118 }
119
120 for (auto &s : arrCellCentScalar_names_in)
121 arrCellCentScalar_names.emplace_back(s);
122 for (auto &s : arrCellCentVec_names_in)
123 arrCellCentVec_names.emplace_back(s);
124 for (auto &s : arrNodeScalar_names_in)
125 arrNodeScalar_names.emplace_back(s);
126 for (auto &s : arrNodeVec_names_in)
127 arrNodeVec_names.emplace_back(s);
128
129 for (auto &arr : arrCellCentScalar)
130 {
131 DNDS_check_throw(arr->father);
132 DNDS_check_throw(arr->father->Size() == fv->mesh->NumCell());
133 }
134 for (auto &arr : arrCellCentVec)
135 {
136 DNDS_check_throw(arr->father);
137 DNDS_check_throw(arr->father->Size() == fv->mesh->NumCell());
138 }
139 for (auto &arr : arrNodeScalar)
140 {
141 DNDS_check_throw(arr->father);
142 DNDS_check_throw(arr->father->Size() == fv->mesh->NumNode());
143 }
144 for (auto &arr : arrNodeVec)
145 {
146 DNDS_check_throw(arr->father);
147 DNDS_check_throw(arr->father->Size() == fv->mesh->NumNode());
148 }
149
150 fv->mesh->PrintParallelVTKHDFDataArray(
151 fname,
157 [&](int i)
158 { return arrCellCentScalar_names.at(i); },
159 [&](int i, index iC)
160 {
161 return i < arrCellCentScalar_offset
162 ? uPrimCell->father->operator()(iC, 0)
163 : arrCellCentScalar.at(i - arrCellCentScalar_offset)->father->operator[](iC)(0);
164 },
165 [&](int i)
166 { return arrCellCentVec_names.at(i); },
167 [&](int i, index iC, rowsize iV)
168 {
169 return i < arrCellCentVec_offset
170 ? uPrimCell->father->operator()(iC, iV + 1)
171 : arrCellCentVec.at(i - arrCellCentVec_offset)->father->operator[](iC)(iV);
172 },
173 [&](int i)
174 { return arrNodeScalar_names.at(i); },
175 [&](int i, index iN)
176 {
177 return i < arrNodeScalar_offset
178 ? uPrimNode->father->operator()(iN, 0)
179 : arrNodeScalar.at(i - arrNodeScalar_offset)->father->operator[](iN)(0);
180 },
181 [&](int i)
182 { return arrNodeVec_names.at(i); },
183 [&](int i, index iN, rowsize iV)
184 {
185 return i < arrNodeVec_offset
186 ? uPrimNode->father->operator()(iN, iV + 1)
187 : arrNodeVec.at(i - arrNodeVec_offset)->father->operator[](iN)(iV);
188 },
189 t,
190 commDup);
191 MPI_Comm_free(&commDup);
192 }
193
194 /**
195 * @brief Dispatches Green-Gauss gradient reconstruction + Barth-Jespersen limiter.
196 *
197 * Validates arguments, prepares face buffers, detects the active backend (Host or CUDA),
198 * constructs the impl-level arg struct with device views, and calls both the GG
199 * reconstruction and Barth limiter sub-kernels.
200 */
203 {
204 DeviceBackend B = this->device();
205 arg.Validate(*this);
206 arg.WaitAllPull(B);
207
208 // DNDS_check_throw(u_face_bufferL.father);
209 // DNDS_check_throw(uScalar_face_bufferL.size() >= uScalar.size());
210 // for (int i = 0; i < uScalar.size(); i++)
211 // DNDS_check_throw(uScalar_face_bufferL[i].father);
212 PrepareFaceBuffer(arg.uScalar.size());
213
215 {
218
220
222 }
223#ifdef DNDS_USE_CUDA
224 else if (B == DeviceBackend::CUDA)
225 {
226 constexpr DeviceBackend B = DeviceBackend::CUDA;
228
230
232 }
233#endif
234 else
235 DNDS_check_throw(false);
236 }
237
238 /**
239 * @brief Dispatches conservative-to-primitive conversion with viscosity computation.
240 *
241 * Validates arguments, waits for MPI data pulls, and dispatches to the backend-specific
242 * Cons2PrimMu kernel via a compile-time lambda parameterized by DeviceBackend.
243 */
245 {
246 DeviceBackend B = this->device();
247 arg.Validate(*this);
248 arg.WaitAllPull(B);
249
250 auto execute = [&](auto b = std::integral_constant<DeviceBackend, DeviceBackend::Host>())
251 {
252 constexpr DeviceBackend B = decltype(b)::value;
253
255
256 // todo: conditionally disable comm
257 // arg.u->trans.waitPersistentPull();
258 // for (auto &uS : arg.uScalar)
259 // uS->trans.waitPersistentPull();
260 // arg.uGrad->trans.waitPersistentPull();
261 // for (auto &uS : arg.uScalarGrad)
262 // uS->trans.waitPersistentPull();
263
265
266 // todo: conditionally disable comm
267 // arg.uPrim->trans.startPersistentPull();
268 // for (auto &uS : arg.uScalarPrim)
269 // uS->trans.startPersistentPull();
270 // arg.p->trans.startPersistentPull();
271 // arg.T->trans.startPersistentPull();
272 // arg.a->trans.startPersistentPull();
273 // arg.mu->trans.startPersistentPull();
274 // for (auto &uS : arg.muComp)
275 // uS->trans.startPersistentPull();
276 };
277
279 {
280 execute(std::integral_constant<DeviceBackend, DeviceBackend::Host>());
281 }
282#ifdef DNDS_USE_CUDA
283 else if (B == DeviceBackend::CUDA)
284 {
285 execute(std::integral_constant<DeviceBackend, DeviceBackend::CUDA>());
286 }
287#endif
288 else
289 DNDS_check_throw(false);
290 }
291
292 /**
293 * @brief Dispatches conservative-to-primitive conversion (no gradients or viscosity).
294 *
295 * Simplified dispatch: validates, waits for MPI pulls, and calls the Cons2Prim kernel.
296 */
298 {
299 DeviceBackend B = this->device();
300 arg.Validate(*this);
301 arg.WaitAllPull(B);
302
303 auto execute = [&](auto b)
304 {
305 constexpr DeviceBackend B = decltype(b)::value;
306
308
309 // todo: conditionally disable comm
310 // arg.u->trans.waitPersistentPull();
311 // for (auto &uS : arg.uScalar)
312 // uS->trans.waitPersistentPull();
313
315
316 // todo: conditionally disable comm
317 // arg.uPrim->trans.startPersistentPull();
318 // for (auto &uS : arg.uScalarPrim)
319 // uS->trans.startPersistentPull();
320 // arg.p->trans.startPersistentPull();
321 // arg.T->trans.startPersistentPull();
322 // arg.a->trans.startPersistentPull();
323 };
324
326 {
327 execute(std::integral_constant<DeviceBackend, DeviceBackend::Host>());
328 }
329#ifdef DNDS_USE_CUDA
330 else if (B == DeviceBackend::CUDA)
331 {
332 execute(std::integral_constant<DeviceBackend, DeviceBackend::CUDA>());
333 }
334#endif
335 else
336 DNDS_check_throw(false);
337 }
338
339 /**
340 * @brief Dispatches eigenvalue estimation and local time-step computation.
341 *
342 * Two-pass dispatch: first computes per-face eigenvalue estimates (GetFaceLam),
343 * then accumulates to per-cell local time steps (FaceLam2CellDt).
344 */
346 {
347 DeviceBackend B = this->device();
348 arg.Validate(*this);
349 arg.WaitAllPull(B);
350
351 auto execute = [&](auto b = std::integral_constant<DeviceBackend, DeviceBackend::Host>())
352 {
353 constexpr DeviceBackend B = decltype(b)::value;
354
356
358
359 //! this is not needed, as we have ensured that all valid-internal faces have valid state
360 // faceLamEst->trans.startPersistentPull();
361 // faceLamVisEst->trans.startPersistentPull();
362 // deltaLamFace->trans.startPersistentPull();
363
364 // faceLamEst->trans.waitPersistentPull();
365 // faceLamVisEst->trans.waitPersistentPull();
366 // deltaLamFace->trans.waitPersistentPull();
367
369 };
370
372 {
373 execute(std::integral_constant<DeviceBackend, DeviceBackend::Host>());
374 }
375#ifdef DNDS_USE_CUDA
376 else if (B == DeviceBackend::CUDA)
377 {
378 execute(std::integral_constant<DeviceBackend, DeviceBackend::CUDA>());
379 }
380#endif
381 else
382 DNDS_check_throw(false);
383 }
384
385 /**
386 * @brief Dispatches 2nd-order face value reconstruction from cell-centered data.
387 *
388 * Validates arguments, waits for MPI pulls, and calls the RecFace2nd kernel which
389 * extrapolates cell values/gradients to face quadrature points.
390 */
392 {
393 DeviceBackend B = this->device();
394 arg.Validate(*this);
395 arg.WaitAllPull(B);
396 auto execute = [&](auto b = std::integral_constant<DeviceBackend, DeviceBackend::Host>())
397 {
398 constexpr DeviceBackend B = decltype(b)::value;
399
401
403 };
404
406 {
407 execute(std::integral_constant<DeviceBackend, DeviceBackend::Host>());
408 }
409#ifdef DNDS_USE_CUDA
410 else if (B == DeviceBackend::CUDA)
411 {
412 execute(std::integral_constant<DeviceBackend, DeviceBackend::CUDA>());
413 }
414#endif
415 else
416 DNDS_check_throw(false);
417 }
418
419 /**
420 * @brief Dispatches 2nd-order Roe flux evaluation and RHS accumulation.
421 *
422 * Validates arguments, prepares face buffers, waits for all scalar MPI pulls,
423 * and dispatches the Flux2nd kernel which computes per-face Roe flux and scatters
424 * to per-cell RHS.
425 */
427 {
428 DeviceBackend B = this->device();
429 arg.Validate(*this);
430 PrepareFaceBuffer(arg.uScalar.size());
431 arg.WaitAllPull(B);
432
433 auto execute = [&](auto b = std::integral_constant<DeviceBackend, DeviceBackend::Host>())
434 {
435 constexpr DeviceBackend B = decltype(b)::value;
436
438
439 for (auto &uS : arg.uScalar)
440 uS->trans.waitPersistentPull();
441 for (auto &uS : arg.uScalarPrim)
442 uS->trans.waitPersistentPull();
443 for (auto &uS : arg.uScalarGrad)
444 uS->trans.waitPersistentPull();
445 for (auto &uS : arg.uScalarGradPrim)
446 uS->trans.waitPersistentPull();
447
449
450 // arg.rhs->trans.startPersistentPull();
451 // for (auto &uS : arg.rhsScalar)
452 // uS->trans.startPersistentPull();
453 };
454
456 {
457 execute(std::integral_constant<DeviceBackend, DeviceBackend::Host>());
458 }
459#ifdef DNDS_USE_CUDA
460 else if (B == DeviceBackend::CUDA)
461 {
462 execute(std::integral_constant<DeviceBackend, DeviceBackend::CUDA>());
463 }
464#endif
465 else
466 DNDS_check_throw(false);
467 }
468}
Helpers for shipping an array-of-views (e.g., ArrayDeviceView) to the device in one contiguous buffer...
Device memory abstraction layer with backend-specific storage and factory creation.
Assertion / error-handling macros and supporting helper functions.
#define DNDS_check_throw(expr)
Runtime check active in both debug and release builds. Throws std::runtime_error if expr evaluates to...
Definition Errors.hpp:89
Core type definitions and utilities for the EulerP alternative Navier-Stokes evaluator module.
Main evaluator class for the EulerP compressible Navier-Stokes solver module.
Backend-specific implementation layer for EulerP Evaluator kernel dispatch.
EvaluatorConfig()
Constructs the config with default values (threadsPerBlock=128, etc.).
void valid_patch_keys(const t_jsonconfig &config_in)
Validates that all keys in config_in exist in the current defaults.
void RecGradient(RecGradient_Arg &arg)
Performs Green-Gauss gradient reconstruction with Barth-Jespersen limiting.
void Flux2nd(Flux2nd_Arg &arg)
Evaluates 2nd-order inviscid (Roe) flux and accumulates into the cell RHS residual.
DeviceBackend device()
Queries and validates the device backend across all sub-objects.
void PrintDataVTKHDF(std::string fname, std::string series_name, std::vector< ssp< TUScalar > > &arrCellCentScalar, const std::vector< std::string > &arrCellCentScalar_names_in, std::vector< ssp< TUVec > > &arrCellCentVec, const std::vector< std::string > &arrCellCentVec_names_in, std::vector< ssp< TUScalar > > &arrNodeScalar, const std::vector< std::string > &arrNodeScalar_names_in, std::vector< ssp< TUVec > > &arrNodeVec, const std::vector< std::string > &arrNodeVec_names_in, ssp< TUDof > uPrimCell, ssp< TUDof > uPrimNode, double t)
Writes cell-centered and node-centered field data to a VTK-HDF5 file.
void PrepareFaceBuffer(int nVarsScalar)
Prepares all face buffers (DOF + scalars) for left and right states.
void EstEigenDt(EstEigenDt_Arg &arg)
Estimates per-face eigenvalues and computes per-cell local time steps.
void Cons2Prim(Cons2Prim_Arg &arg)
Converts conservative variables to primitive variables (no gradients or viscosity).
t_fv fv
Shared pointer to the Compact Finite Volume (VFV) reconstruction object.
void RecFace2nd(RecFace2nd_Arg &arg)
Performs 2nd-order face value reconstruction from cell-centered data.
void Cons2PrimMu(Cons2PrimMu_Arg &arg)
Converts conservative variables to primitive variables and computes viscosity.
Namespace for the EulerP alternative evaluator module with GPU support.
Definition EulerP.hpp:29
DeviceBackend
Enumerates the backends a DeviceStorage / Array can live on.
@ Unknown
Unset / sentinel.
@ Host
Plain CPU memory.
int32_t rowsize
Row-width / per-row element-count type (signed 32-bit).
Definition Defines.hpp:109
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:107
nlohmann::ordered_json t_jsonconfig
Project-wide JSON type alias: nlohmann/json with ordered keys.
Definition JsonUtil.hpp:19
Packed argument struct for conservative-to-primitive conversion with viscosity computation.
void Validate(Evaluator &self)
Validates all member arrays against the mesh topology and device backend.
Packed argument struct for conservative-to-primitive conversion without gradients or viscosity.
void Validate(Evaluator &self)
Validates all member arrays against the mesh topology and device backend.
Packed argument struct for eigenvalue estimation and local time-step computation.
void Validate(Evaluator &self)
Validates all member arrays, distinguishing cell-located from face-located fields.
Packed argument struct for 2nd-order inviscid (and eventually viscous) flux evaluation.
void Validate(Evaluator &self)
Validates member arrays, auto-detecting cell vs. face location by name suffix.
Packed argument struct for 2nd-order face value reconstruction.
void Validate(Evaluator &self)
Validates member arrays, auto-detecting cell vs. face location by name suffix.
Packed argument struct for Green-Gauss gradient reconstruction with Barth-Jespersen limiting.
void Validate(Evaluator &self)
Validates all member arrays against the mesh topology and device backend.
Non-trivially-copyable device view holding shared_ptr to fv and device views of BC/physics.
Device-side argument struct for conservative-to-primitive + viscosity kernel.
Device-side argument struct for conservative-to-primitive conversion (no gradients/viscosity).
Device-side argument struct for eigenvalue estimation and time-step computation.
Device-side argument struct for 2nd-order flux evaluation and RHS accumulation.
Device-side argument struct for 2nd-order face value reconstruction.
Device-side argument struct for gradient reconstruction kernels.
static void Flux2nd(Flux2nd_Arg &arg)
Evaluates 2nd-order Roe flux per face and scatters to cell RHS.
static void Cons2PrimMu(Cons2PrimMu_Arg &arg)
Executes conservative-to-primitive conversion with viscosity computation.
static void EstEigenDt_GetFaceLam(EstEigenDt_Arg &arg)
First pass: computes per-face eigenvalue estimates from cell states.
static void Cons2Prim(Cons2Prim_Arg &arg)
Executes conservative-to-primitive conversion (no gradients/viscosity).
static void EstEigenDt_FaceLam2CellDt(EstEigenDt_Arg &arg)
Second pass: accumulates face eigenvalues to cells and computes local dt.
static void RecFace2nd(RecFace2nd_Arg &arg)
Executes 2nd-order face value reconstruction from cell-centered data.
static void RecGradient_BarthLimiter(RecGradient_Arg &arg)
Barth-Jespersen gradient limiter applied to reconstructed gradients.
static void RecGradient_GGRec(RecGradient_Arg &arg)
Green-Gauss gradient reconstruction: boundary ghost values + cell gradient computation.
tVec b(NCells)