DNDSR 0.1.0.dev1+gcd065ad
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
EulerP_Evaluator_impl_common.hxx
Go to the documentation of this file.
1/** @file EulerP_Evaluator_impl_common.hxx
2 * @brief Shared device-callable kernel implementations for the EulerP Evaluator.
3 *
4 * Contains the per-element kernel functions (per-cell or per-face) that implement the
5 * computational core of each EulerP evaluation stage. These kernels are called from both
6 * the Host backend (via OpenMP loops in EulerP_Evaluator_impl.cpp) and the CUDA backend
7 * (via CUDA kernel launches).
8 *
9 * Each kernel operates on a single element index range [i, iEnd) and uses the Portable
10 * sub-struct from the corresponding Evaluator_impl<B> arg for data access. CUDA-specific
11 * shared memory buffers are conditionally compiled with DNDS_USE_CUDA.
12 *
13 * Kernels defined here:
14 * - RecGradient_GGRec_Kernel_BndVal — boundary ghost value generation for gradient reconstruction
15 * - RecGradient_GGRec_Kernel_GG — Green-Gauss gradient computation (per-cell)
16 * - RecGradient_BarthLimiter_Kernel_FlowPart — Barth-Jespersen limiter for flow variables (per-cell)
17 * - RecGradient_BarthLimiter_Kernel_ScalarPart — Barth-Jespersen limiter for scalar variables (per-cell)
18 * - Cons2PrimMu_Kernel — conservative-to-primitive + viscosity (per-cell)
19 * - Cons2Prim_Kernel — conservative-to-primitive only (per-cell)
20 * - EstEigenDt_GetFaceLam_Kernel — face eigenvalue estimation (per-face)
21 * - EstEigenDt_FaceLam2CellDt_Kernel — face-to-cell eigenvalue accumulation + dt (per-cell)
22 * - RecFace2nd_Kernel — 2nd-order face reconstruction (per-face)
23 * - Flux2nd_Kernel_FluxFace — Roe inviscid flux computation (per-face)
24 * - Flux2nd_Kernel_Face2Cell — face flux scatter to cell RHS (per-cell)
25 */
26#include "DNDS/Defines.hpp"
28#include "DNDS/Errors.hpp"
29#include "EulerP/EulerP.hpp"
30#include "EulerP_ARS.hpp"
32#include "Geom/Geometric.hpp"
34namespace DNDS::EulerP
35{
36 /**
37 * @brief Generates boundary ghost values for Green-Gauss gradient reconstruction.
38 *
39 * For a given boundary face, applies the boundary condition to produce a ghost-cell
40 * state stored in the face BC buffer. Internal faces are skipped. The ghost values
41 * are later used by the Green-Gauss kernel in place of a neighbor cell's state.
42 *
43 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
44 * @param self_view Device view providing mesh, BC handler, and physics access.
45 * @param arg Portable struct with device views of state arrays and face BC buffers.
46 * @param iBnd Boundary index to process.
47 * @param iBndEnd Upper bound of the boundary index range (used for bounds checking).
48 * @param nVars Total number of variables (flow + scalar).
49 * @param nVarsScalar Number of additional transported scalar variables.
50 */
51 template <DeviceBackend B = DeviceBackend::Host>
53 EvaluatorDeviceView<B> &self_view,
55 index iBnd, index iBndEnd,
56 int nVars, int nVarsScalar)
57 {
58 using namespace Geom;
59 auto &mesh = self_view.fv.mesh;
60 auto &fv = self_view.fv;
61 auto &bcHandler = self_view.bcHandler;
62 auto &phy = self_view.physics;
68 DNDS_EULERP_IMPL_ARG_GET_REF(faceBCScalarBuffer)
69
70 if (iBnd >= iBndEnd)
71 return;
72
73 auto bcid = mesh.GetBndZone(iBnd);
74 if (Geom::FaceIDIsInternal(bcid))
75 return;
76
77 index iFace = mesh.bnd2face(iBnd, 0);
78 index iCell = mesh.bnd2cell(iBnd, 0);
79
80 auto uI = [u, uScalar, iCell] DNDS_DEVICE(int i) mutable -> real &
81 {
82 if (i < nVarsFlow)
83 return u(iCell, i);
84 else
85 return uScalar[i - nVarsFlow](iCell);
86 };
87 auto uOut = [faceBCBuffer, faceBCScalarBuffer, iFace] DNDS_DEVICE(int i) mutable -> real &
88 {
89 if (i < nVarsFlow)
90 return faceBCBuffer(iFace, i);
91 else
92 return faceBCScalarBuffer[i - nVarsFlow](iFace);
93 };
94
95 auto bc = bcHandler.id2bc(bcid);
96 bc.apply(uI, uOut, nVars,
97 fv.GetFaceQuadraturePPhys(iFace, -1),
98 fv.GetFaceNorm(iFace, -1),
99 phy);
100 }
101 // only for intellisense hint
105 index iBnd, index iBndEnd,
106 int nVars, int nVarsScalar);
107
108 /**
109 * @brief Green-Gauss gradient reconstruction kernel (per-cell).
110 *
111 * Computes the cell gradient of the conservative state using the Green-Gauss theorem:
112 * grad(u)_cell = (1/V) * sum_faces [ 0.5 * (u_neighbor - u_cell) (x) n * A ].
113 * Boundary faces use ghost values from the face BC buffer. Also computes gradients
114 * for transported scalar variables.
115 *
116 * On CUDA, uses shared memory for coalesced global writes via CUDA_Local2GlobalAssign.
117 *
118 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
119 * @param self_view Device view providing mesh, BC handler, and physics access.
120 * @param arg Portable struct with device views of state, gradient, and BC buffer arrays.
121 * @param iCell Cell index to process.
122 * @param iCellEnd Upper bound of cell index range (used for bounds checking).
123 * @param nVars Total number of variables (flow + scalar).
124 * @param nVarsScalar Number of additional transported scalar variables.
125 */
126 template <DeviceBackend B = DeviceBackend::Host>
128 EvaluatorDeviceView<B> &self_view,
130 index iCell, index iCellEnd,
131 int nVars, int nVarsScalar)
132 {
133 using namespace Geom;
134 auto &mesh = self_view.fv.mesh;
135 auto &fv = self_view.fv;
136 auto &bcHandler = self_view.bcHandler;
137 auto &phy = self_view.physics;
142 DNDS_EULERP_IMPL_ARG_GET_REF(faceBCBuffer)
143 DNDS_EULERP_IMPL_ARG_GET_REF(faceBCScalarBuffer)
144
145#ifdef DNDS_USE_CUDA
146 __shared__ CUDA::SharedBuffer<index, 128> buf_idx;
147 __shared__ CUDA::SharedBuffer<real, 128 * (3 * nVarsFlow)> buf_val;
148#endif
149
150 TDiffU grad_flow;
151 grad_flow.setZero();
152 if (iCell < iCellEnd)
153 {
154 for (int iVarS = 0; iVarS < nVarsScalar; iVarS++)
155 uScalarGrad[iVarS].father[iCell].setZero();
156 auto c2f = mesh.cell2face[iCell];
157 TU uI = u[iCell];
158
159 for (long iFace : c2f)
160 {
161 index iCellOther = mesh.CellFaceOther(iCell, iFace);
162 rowsize if2c = mesh.CellIsFaceBack(iCell, iFace) ? 0 : 1;
163 tPoint norm = 0.5 * fv.GetFaceNormFromCell(iFace, iCell, if2c, -1) *
164 ((if2c ? -1.0 : 1.0) * fv.GetFaceArea(iFace) / fv.GetCellVol(iCell));
165 // ! the 0.5 is because we use arithmetic mean
166 // ! change to 0.5 * OtherWeight if needed
167
168 TU uOther;
169 uOther.setZero();
170 if (iCellOther != UnInitIndex)
171 uOther = u[iCellOther];
172 else
173 uOther = faceBCBuffer.father[iFace];
174
175 grad_flow.noalias() += norm * (uOther - uI).transpose();
176 for (int iVarS = 0; iVarS < nVarsScalar; iVarS++)
177 {
178 real uI = uScalar[iVarS].father(iCell);
179 real uOther = 0.;
180 if (iCellOther != UnInitIndex)
181 uOther = uScalar[iVarS](iCell);
182 else
183 uOther = faceBCScalarBuffer[iVarS](iCell);
184 uScalarGrad[iVarS].father[iCell].noalias() += norm * (uOther - uI);
185 }
186 }
187 }
188#ifdef DNDS_USE_CUDA
189 if constexpr (B == DeviceBackend::CUDA && true)
190 {
191# ifdef __CUDA_ARCH__
192 // using t_BufMatrix = Eigen::Matrix<real, 3, nVarsFlow * 128>;
193 // __shared__ real grad_buf_data[3 * (nVarsFlow * 128)];
194 // Eigen::Map<t_BufMatrix> grad_buf(grad_buf_data);
195
196 // __shared__ index iCellThread[128];
197 // int tid = threadIdx.x;
198 // int bDim = blockDim.x;
199 // DNDS_HD_assert(tid < 128 && tid >= 0);
200 // iCellThread[tid] = iCell;
201 // grad_buf.block(0, nVarsFlow * tid, 3, nVarsFlow) = grad_flow;
202 // __syncthreads();
203 // for (int i = 0; i < 3 * nVarsFlow; i++)
204 // {
205 // int iComp = (i * bDim + tid);
206 // int iCellInBlock = iComp / (3 * nVarsFlow);
207 // int iCompSub = iComp % (3 * nVarsFlow);
208 // // int iComp = (i * bDim + tid) % (3 * nVarsFlow);
209 // index iCellC = iCellThread[iCellInBlock];
210 // uGrad.father[iCellC](iCompSub) = grad_buf(iComp);
211 // }
212# endif
213 detail::CUDA_Local2GlobalAssign<3 * nVarsFlow, 128>(
214 [grad_flow] DNDS_DEVICE(int i) mutable -> real &
215 { return grad_flow(i); },
216 [uGrad] DNDS_DEVICE(index iCellC, int i) mutable -> real &
217 { return uGrad.father[iCellC](i); },
218 buf_idx, buf_val,
219 iCell, iCellEnd);
220 }
221 else
222#endif
223 {
224 if (iCell < iCellEnd)
225 uGrad.father[iCell] = grad_flow;
226 }
227 }
228 // only for intellisense hint
232 index iCell, index iCellEnd,
233 int nVars, int nVarsScalar);
234
235 /**
236 * @brief Barth-Jespersen gradient limiter kernel for flow variables (per-cell).
237 *
238 * Limits cell gradients to prevent the reconstructed face values from creating
239 * new extrema. Operates on density, total energy, and velocity magnitude independently.
240 * Also enforces positivity of internal energy by computing a secondary limiting factor
241 * based on the minimum reconstructed internal energy across cell faces.
242 *
243 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
244 * @param self_view Device view providing mesh, BC handler, and physics access.
245 * @param arg Portable struct with device views of state and gradient arrays.
246 * @param iCell Cell index to process.
247 * @param iCellEnd Upper bound of cell index range.
248 * @param nVars Total number of variables (flow + scalar).
249 * @param nVarsScalar Number of additional transported scalar variables.
250 */
251 template <DeviceBackend B = DeviceBackend::Host>
253 EvaluatorDeviceView<B> &self_view,
255 index iCell, index iCellEnd,
256 int nVars, int nVarsScalar)
257 {
258 using namespace Geom;
259 auto &mesh = self_view.fv.mesh;
260 auto &fv = self_view.fv;
261 auto &bcHandler = self_view.bcHandler;
262 auto &phy = self_view.physics;
265
266#ifdef DNDS_USE_CUDA
267 __shared__ CUDA::SharedBuffer<index, 128> buf_idx;
268 __shared__ CUDA::SharedBuffer<real, 128 * (3 * nVarsFlow)> buf_val;
269#endif
270
271 TDiffU grad;
272
273 if (iCell < iCellEnd)
274 {
275 auto c2f = mesh.cell2face[iCell];
276 TU uI = u[iCell];
277 grad = uGrad.father[iCell];
278
279 tPoint uI_mag;
280 uI_mag << uI(0), uI(I4), U123(uI).norm();
281 tPoint uIIncMax;
282 tPoint uIIncMin;
283
284 tPoint uOtherMax = uI_mag;
285 tPoint uOtherMin = uI_mag;
286
287 // we use 0.0 here to avoid invert situation
288 uIIncMax.setConstant(0.0);
289 uIIncMin.setConstant(0.0);
290
291 real EInternalI = phy.Cons2EInternal(uI, nVarsFlow);
292 real EInternalMin = EInternalI;
293
294 for (int ic2f = 0; ic2f < c2f.size(); ic2f++)
295 {
296 index iFace = c2f[ic2f];
297 index iCellOther = mesh.CellFaceOther(iCell, iFace);
298 TU uIncPoint = grad.transpose() *
299 (fv.GetFaceQuadraturePPhysFromCell(iFace, iCell, -1, -1) -
300 fv.GetCellQuadraturePPhys(iCell, -1));
301 tPoint uIncPoint_mag;
302 uIncPoint_mag << uIncPoint(0), uIncPoint(I4), U123(uIncPoint).norm();
303 uIIncMax = uIIncMax.array().max(uIncPoint_mag.array());
304 uIIncMin = uIIncMin.array().min(uIncPoint_mag.array());
305
306 if (iCellOther != UnInitIndex)
307 {
308 uIncPoint = u[iCellOther];
309 uIncPoint_mag << uIncPoint(0), uIncPoint(I4), U123(uIncPoint).norm();
310 uOtherMax = uOtherMax.array().max(uIncPoint_mag.array());
311 uOtherMin = uOtherMin.array().min(uIncPoint_mag.array());
312 real EOther = phy.Cons2EInternal(uIncPoint, nVarsFlow);
313 EInternalMin = std::min(EOther, EInternalMin);
314 }
315 }
316
317 uOtherMax -= uI_mag;
318 uOtherMin -= uI_mag;
319 uOtherMax = (uOtherMax.array().abs()) / (uIIncMax.array().abs() + verySmallReal);
320 uOtherMin = (uOtherMin.array().abs()) / (uIIncMin.array().abs() + verySmallReal);
321 uOtherMin = uOtherMin.array().min(uOtherMax.array());
322 uOtherMin = uOtherMin.array().max(0.0).min(1.0);
323 // grad.array().rowwise() *= (uOtherMax.array().min(uOtherMin.array())).transpose();
324 // grad *= uOtherMin(0.0);
325 grad.col(0) *= uOtherMin(0);
326 grad.col(I4) *= uOtherMin(1);
327 for (int i = 1; i < 1 + 3; i++)
328 grad.col(i) *= uOtherMin(2);
329
330 // use the new grad for EInternal reconstruction!
331 real EInternalPointMin = EInternalI;
332 for (int ic2f = 0; ic2f < c2f.size(); ic2f++)
333 {
334 index iFace = c2f[ic2f];
335 index iCellOther = mesh.CellFaceOther(iCell, iFace);
336 TU uIncPoint = grad.transpose() *
337 (fv.GetFaceQuadraturePPhysFromCell(iFace, iCell, -1, -1) -
338 fv.GetCellQuadraturePPhys(iCell, -1));
339 uIncPoint += uI;
340 real EOther = phy.Cons2EInternal(uIncPoint, nVarsFlow);
341 EInternalPointMin = std::min(EOther, EInternalPointMin);
342 }
343 real alphaE =
344 (EInternalI - EInternalMin * 0.1) / std::max(EInternalI - EInternalPointMin, verySmallReal);
345 grad *= std::clamp(alphaE, 0., 1.);
346
347 // for (int ic2f = 0; ic2f < c2f.size(); ic2f++)
348 // {
349 // index iFace = c2f[ic2f];
350 // index iCellOther = mesh.CellFaceOther(iCell, iFace);
351 // TU uIncPoint = grad.transpose() *
352 // (fv.GetFaceQuadraturePPhysFromCell(iFace, iCell, -1, -1) -
353 // fv.GetCellQuadraturePPhys(iCell, -1));
354 // uIncPoint += uI;
355 // real EOther = phy.Cons2EInternal(uIncPoint, nVarsFlow);
356 // DNDS_HD_assert(EOther > 0);
357 // }
358 }
359#ifdef DNDS_USE_CUDA
360 if constexpr (B == DeviceBackend::CUDA)
361 {
362 detail::CUDA_Local2GlobalAssign<3 * nVarsFlow, 128>(
363 [grad] DNDS_DEVICE(int i) mutable -> real &
364 { return grad(i); },
365 [uGrad] DNDS_DEVICE(index iCellC, int i) mutable -> real &
366 { return uGrad.father[iCellC](i); },
367 buf_idx, buf_val,
368 iCell, iCellEnd);
369 }
370 else
371#endif
372 {
373 if (iCell < iCellEnd)
374 uGrad.father[iCell] = grad;
375 }
376 }
377 // only for intellisense hint
381 index iCell, index iCellEnd,
382 int nVars, int nVarsScalar);
383
384 /**
385 * @brief Barth-Jespersen gradient limiter kernel for transported scalar variables (per-cell).
386 *
387 * Applies the same min/max bounding logic as the flow limiter but for additional
388 * transported scalar fields. Processes scalars in batches of bufSize (nVarsFlow)
389 * to reuse the TU-sized buffers.
390 *
391 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
392 * @param self_view Device view providing mesh, BC handler, and physics access.
393 * @param arg Portable struct with device views of scalar and scalar gradient arrays.
394 * @param iCell Cell index to process.
395 * @param iCellEnd Upper bound of cell index range.
396 * @param nVars Total number of variables (flow + scalar).
397 * @param nVarsScalar Number of additional transported scalar variables.
398 */
399 template <DeviceBackend B = DeviceBackend::Host>
401 EvaluatorDeviceView<B> &self_view,
403 index iCell, index iCellEnd,
404 int nVars, int nVarsScalar)
405 {
406 using namespace Geom;
407 auto &mesh = self_view.fv.mesh;
408 auto &fv = self_view.fv;
409 auto &bcHandler = self_view.bcHandler;
410 auto &phy = self_view.physics;
413 DNDS_EULERP_IMPL_ARG_GET_REF(faceBCBuffer)
414 DNDS_EULERP_IMPL_ARG_GET_REF(faceBCScalarBuffer)
415
416 if (iCell >= iCellEnd)
417 return;
418
419 auto c2f = mesh.cell2face[iCell];
420 TU uI;
421 TU uIIncMax;
422 TU uIIncMin;
423
424 TU uOtherMax;
425 TU uOtherMin;
426
427 constexpr int bufSize = nVarsFlow; // todo: use another bufsize
428 for (int iVar = 0; iVar < nVarsScalar; iVar += bufSize)
429 {
430 int nCur = std::min(nVarsScalar - iVar, bufSize);
431 for (int iVarS = 0; iVarS < nCur; iVarS++)
432 uI(iVarS) = uScalar[iVar + iVarS](iCell);
433 // we use 0.0 here to avoid invert situation
434 uIIncMax.setConstant(-0.0);
435 uIIncMin.setConstant(0.0);
436 uOtherMax = uOtherMin = uI;
437
438 for (auto iFace : c2f)
439 {
440 index iCellOther = mesh.CellFaceOther(iCell, iFace);
441 tPoint p = (fv.GetFaceQuadraturePPhysFromCell(iFace, iCell, -1, -1) -
442 fv.GetCellQuadraturePPhys(iCell, -1));
443 for (int iVarS = 0; iVarS < nCur; iVarS++)
444 {
445 real uIncPoint = uScalarGrad[iVar + iVarS].father[iCell].dot(p);
446 uIIncMax[iVarS] = std::max(uIIncMax[iVarS], uIncPoint);
447 uIIncMin[iVarS] = std::max(uIIncMin[iVarS], uIncPoint);
448 }
449 if (iCellOther != UnInitIndex)
450 {
451 for (int iVarS = 0; iVarS < nCur; iVarS++)
452 {
453 real uIncPoint = uScalar[iVar + iVarS](iCellOther);
454 uOtherMax[iVarS] = std::max(uIIncMax[iVarS], uIncPoint);
455 uOtherMin[iVarS] = std::min(uIIncMin[iVarS], uIncPoint);
456 }
457 }
458 }
459 uOtherMax -= uI;
460 uOtherMin -= uI;
461 uOtherMax = uOtherMax.array().abs() / (uIIncMax.array().abs() + verySmallReal);
462 uOtherMin = uOtherMin.array().abs() / (uIIncMin.array().abs() + verySmallReal);
463 uOtherMax = uOtherMax.array().max(0.0).min(1.0);
464 uOtherMin = uOtherMin.array().max(0.0).min(1.0);
465 for (int iVarS = 0; iVarS < nCur; iVarS++)
466 uScalarGrad[iVar + iVarS].father[iCell] *= std::min(uOtherMax[iVarS], uOtherMin[iVarS]);
467 }
468 }
469 // only for intellisense hint
473 index iCell, index iCellEnd,
474 int nVars, int nVarsScalar);
475
476 /**
477 * @brief Conservative-to-primitive conversion with gradient transformation and viscosity (per-cell).
478 *
479 * For each cell, converts conservative variables (rho, rho*u, rho*E, ...) to primitive
480 * variables (rho, u, v, w, p_or_E_internal, ...) and transforms the conservative
481 * gradients into primitive gradients using the Jacobian of the transformation.
482 * Also computes thermodynamic quantities (p, T, a, gamma) and total viscosity (mu).
483 *
484 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
485 * @param self_view Device view providing mesh, BC handler, and physics access.
486 * @param arg Portable struct with all input/output device views.
487 * @param iPt Point (cell) index to process.
488 * @param iPtEnd Upper bound of point index range.
489 * @param nVars Total number of variables (flow + scalar).
490 * @param nVarsScalar Number of additional transported scalar variables.
491 */
492 template <DeviceBackend B = DeviceBackend::Host>
494 EvaluatorDeviceView<B> &self_view,
496 index iPt, index iPtEnd, int nVars, int nVarsScalar)
497 {
498 using namespace Geom;
499 auto &mesh = self_view.fv.mesh;
500 auto &fv = self_view.fv;
501 auto &bcHandler = self_view.bcHandler;
502 auto &phy = self_view.physics;
507 //
511 DNDS_EULERP_IMPL_ARG_GET_REF(uScalarGradPrim)
518
519#ifdef DNDS_USE_CUDA
520 __shared__ CUDA::SharedBuffer<index, 128> buf_idx;
521 __shared__ CUDA::SharedBuffer<real, 128 * (3 * nVarsFlow)> buf_val;
522#endif
523
524 TU uPrimC;
525 Eigen::Map<TU> uPrimCM(uPrimC.data());
526 TDiffU uGradPrimC;
527 Eigen::Map<TDiffU> uGradPrimCM(uGradPrimC.data());
528 if (iPt < iPtEnd)
529 {
530 // uI(Seq01234) = u[iPt];
531 // for (int iVarS = 0; iVarS < nVarsScalar; iVarS++)
532 // uI[nVarsFlow + iVarS] = uScalar[iVarS](iPt);
533
534 // gradI(EigenAll, Seq01234) = u[iPt];
535 // for (int iVarS = 0; iVarS < nVarsScalar; iVarS++)
536 // gradI(EigenAll, nVarsFlow + iVarS) = uScalarGrad[iVarS][iPt];
537 auto uConsI = [u, uScalar, iPt] DNDS_DEVICE(int i) mutable -> real &
538 {
539 if (i < nVarsFlow)
540 return u(iPt, i);
541 else
542 return uScalar[i - nVarsFlow](iPt);
543 };
544 auto uPrimI = [uPrimCM, uScalarPrim, iPt] DNDS_DEVICE(int i) mutable -> real &
545 {
546 if (i < nVarsFlow)
547 return uPrimCM(i);
548 else
549 return uScalarPrim[i - nVarsFlow](iPt);
550 };
551 auto diffUConsI = [uGrad, uScalarGrad, iPt] DNDS_DEVICE(int d, int i) mutable -> real &
552 {
553 if (i < nVarsFlow)
554 return uGrad[iPt](d, i);
555 else
556 return uScalarGrad[i - nVarsFlow](iPt, d);
557 };
558 auto diffUPrimI = [uGradPrimCM, uScalarGradPrim, iPt] DNDS_DEVICE(int d, int i) mutable -> real &
559 {
560 if (i < nVarsFlow)
561 return uGradPrimCM(d, i);
562 else
563 return uScalarGradPrim[i - nVarsFlow](iPt, d);
564 };
565 phy.Cons2Prim(uConsI, uPrimI, nVars);
566 phy.Cons2PrimDiff(uConsI, uPrimI,
567 diffUConsI, diffUPrimI, nVars);
568
569 real TT = phy.Prim2Temperature(uPrimI, nVars);
570 real pp = phy.Prim2Pressure(uPrimI, nVars, TT);
571 auto [gammaG, aa] = phy.Prim2GammaAcousticSpeed(uPrimI, nVars, pp);
572 real muTot = phy.getMuTot(uPrimI, diffUPrimI, nVars, pp, TT);
573
574 T(iPt) = TT;
575 p(iPt) = pp;
576 a(iPt) = aa;
577 gamma(iPt) = gammaG;
578 mu(iPt) = muTot;
579 // TODO: tend to muComp!!
580 }
581#ifdef DNDS_USE_CUDA
582 if constexpr (B == DeviceBackend::CUDA)
583 {
584 detail::CUDA_Local2GlobalAssign<3 * nVarsFlow, 128>(
585 [uGradPrimCM] DNDS_DEVICE(int i) mutable -> real &
586 { return uGradPrimCM(i); },
587 [uGradPrim] DNDS_DEVICE(index iPtC, int i) mutable -> real &
588 { return uGradPrim[iPtC](i); },
589 buf_idx, buf_val,
590 iPt, iPtEnd);
591
592 detail::CUDA_Local2GlobalAssign<nVarsFlow, 128>(
593 [uPrimCM] DNDS_DEVICE(int i) mutable -> real &
594 { return uPrimCM(i); },
595 [uPrim] DNDS_DEVICE(index iPtC, int i) mutable -> real &
596 { return uPrim[iPtC](i); },
597 buf_idx, buf_val,
598 iPt, iPtEnd);
599 }
600 else
601#endif
602 {
603 if (iPt < iPtEnd)
604 {
605 uPrim[iPt] = uPrimC;
606 uGradPrim[iPt] = uGradPrimC;
607 }
608 }
609 }
610 // only for intellisense hint
614 index iPt, index iPtEnd,
615 int nVars, int nVarsScalar);
616
617 /**
618 * @brief Conservative-to-primitive conversion without gradient transformation or viscosity (per-cell).
619 *
620 * Simplified version of Cons2PrimMu_Kernel that only converts conservative to primitive
621 * variables and computes thermodynamic scalars (T, p, a, gamma). No gradient
622 * transformation or viscosity computation is performed.
623 *
624 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
625 * @param self_view Device view providing mesh, BC handler, and physics access.
626 * @param arg Portable struct with input/output device views.
627 * @param iPt Point (cell) index to process.
628 * @param iPtEnd Upper bound of point index range.
629 * @param nVars Total number of variables (flow + scalar).
630 * @param nVarsScalar Number of additional transported scalar variables.
631 */
632 template <DeviceBackend B = DeviceBackend::Host>
634 EvaluatorDeviceView<B> &self_view,
636 index iPt, index iPtEnd,
637 int nVars, int nVarsScalar)
638 {
639 using namespace Geom;
640 auto &mesh = self_view.fv.mesh;
641 auto &fv = self_view.fv;
642 auto &bcHandler = self_view.bcHandler;
643 auto &phy = self_view.physics;
644
647 //
654
655#ifdef DNDS_USE_CUDA
656 __shared__ CUDA::SharedBuffer<index, 128> buf_idx;
657 __shared__ CUDA::SharedBuffer<real, 128 * (1 * nVarsFlow)> buf_val;
658#endif
659 TU uPrimC;
660 Eigen::Map<TU> uPrimCM(uPrimC.data());
661 if (iPt < iPtEnd)
662 {
663 auto uConsI = [u, uScalar, iPt] DNDS_DEVICE(int i) mutable -> real &
664 {
665 if (i < nVarsFlow)
666 return u(iPt, i);
667 else
668 return uScalar[i - nVarsFlow](iPt);
669 };
670 auto uPrimI = [uPrimCM, uScalarPrim, iPt] DNDS_DEVICE(int i) mutable -> real &
671 {
672 if (i < nVarsFlow)
673 return uPrimCM(i);
674 else
675 return uScalarPrim[i - nVarsFlow](iPt);
676 };
677 phy.Cons2Prim(uConsI, uPrimI, nVars);
678
679 real TT = phy.Prim2Temperature(uPrimI, nVars);
680 real pp = phy.Prim2Pressure(uPrimI, nVars, TT);
681 auto [gammaG, aa] = phy.Prim2GammaAcousticSpeed(uPrimI, nVars, pp);
682
683 T(iPt) = TT;
684 p(iPt) = pp;
685 a(iPt) = aa;
686 gamma(iPt) = gammaG;
687 }
688#ifdef DNDS_USE_CUDA
689 if constexpr (B == DeviceBackend::CUDA)
690 {
691 detail::CUDA_Local2GlobalAssign<nVarsFlow, 128>(
692 [uPrimCM] DNDS_DEVICE(int i) mutable -> real &
693 { return uPrimCM(i); },
694 [uPrim] DNDS_DEVICE(index iPtC, int i) mutable -> real &
695 { return uPrim[iPtC](i); },
696 buf_idx, buf_val,
697 iPt, iPtEnd);
698 }
699 else
700#endif
701 {
702 if (iPt < iPtEnd)
703 {
704 uPrim[iPt] = uPrimC;
705 }
706 }
707 }
708 // only for intellisense hint
712 index iPt, index iPtEnd,
713 int nVars, int nVarsScalar);
714
715 /**
716 * @brief Per-face eigenvalue estimation kernel for time-step computation.
717 *
718 * For each face, estimates the convective eigenvalues (lam-, lam0, lam+) as the
719 * maximum of left/right normal velocity +/- speed of sound. Also estimates the
720 * viscous eigenvalue contribution based on face-averaged viscosity and the
721 * face-area-to-volume ratio. Computes deltaLamFace for H-correction.
722 *
723 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
724 * @param self_view Device view providing mesh, BC handler, and physics access.
725 * @param arg Portable struct with device views of state, eigenvalue, and dt arrays.
726 * @param iFace Face index to process.
727 * @param iFaceEnd Upper bound of face index range.
728 * @param nVars Total number of variables (flow + scalar).
729 * @param nVarsScalar Number of additional transported scalar variables.
730 */
731 template <DeviceBackend B = DeviceBackend::Host>
733 EvaluatorDeviceView<B> &self_view,
735 index iFace, index iFaceEnd,
736 int nVars, int nVarsScalar)
737 {
738 using namespace Geom;
739 auto &mesh = self_view.fv.mesh;
740 auto &fv = self_view.fv;
741 auto &bcHandler = self_view.bcHandler;
742 auto &phy = self_view.physics;
743
748 DNDS_EULERP_IMPL_ARG_GET_REF(faceLamVisEst)
749 DNDS_EULERP_IMPL_ARG_GET_REF(deltaLamFace)
750
751 if (iFace >= iFaceEnd)
752 return;
753
754 index iCellL = mesh.face2cell(iFace, 0);
755 index iCellR = mesh.face2cell(iFace, 1);
756 if ((0 > iCellL || iCellL >= mesh.NumCell()) &&
757 (0 > iCellR || iCellR >= mesh.NumCell()) && iCellR != UnInitIndex)
758 {
759 faceLamEst(iFace, 0) = UnInitReal;
760 faceLamEst(iFace, 1) = UnInitReal;
761 faceLamEst(iFace, 2) = UnInitReal;
762 faceLamVisEst(iFace) = UnInitReal;
763 deltaLamFace(iFace) = UnInitReal;
764 // return; // skip if either side is not needed
765 }
766 TU uL = u[iCellL];
767 TU uR = uL;
768 tPoint nFace = fv.GetFaceNorm(iFace, -1);
769 real volL = fv.GetCellVol(iCellL);
770 real volR = volL;
771 real vnL = U123(uL).dot(nFace) / uL[0];
772 real vnR = U123(uR).dot(nFace) / uR[0];
773 real aL = aCell(iCellL);
774 real aR = aL;
775 real muFace = muCell(iCellL);
776 if (iCellR != UnInitIndex)
777 {
778 uR = u[iCellR];
779 aR = aCell(iCellR);
780 muFace = 0.5 * (muFace + muCell(iCellR));
781 volR = fv.GetCellVol(iCellR);
782 }
783 faceLamEst(iFace, 0) = std::max(std::abs(vnL - aL), std::abs(vnR - aR));
784 faceLamEst(iFace, 1) = std::max(std::abs(vnL), std::abs(vnR));
785 faceLamEst(iFace, 2) = std::max(std::abs(vnL + aL), std::abs(vnR + aR));
786 real rhoMean = 0.5 * (uL[0] + uR[0]);
787 real nuVis = muFace / rhoMean * 2.;
788 faceLamVisEst(iFace) = nuVis * fv.GetFaceArea(iFace) *
789 (1. / volL + 1. / volR);
790
791 //! need to verify this
792 deltaLamFace(iFace) = std::max({std::abs(vnL - aL - vnR + aR),
793 std::abs(vnL - vnR),
794 std::abs(vnL - aL - vnR + aR)});
795 }
796
797 // only for intellisense hint
801 index iFace, index iFaceEnd,
802 int nVars, int nVarsScalar);
803
804 /**
805 * @brief Per-cell kernel converting face eigenvalues to a local CFL time step.
806 *
807 * For each cell, sums the maximum face eigenvalue (convective + viscous) weighted by
808 * face area across all cell faces. The local time step is computed as:
809 * dt = V * smoothScaleRatio / sum(lambda_face * A_face).
810 * Also computes the per-cell maximum deltaLamFace for H-correction.
811 *
812 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
813 * @param self_view Device view providing mesh and FV geometry access.
814 * @param arg Portable struct with device views of eigenvalue and dt arrays.
815 * @param iCell Cell index to process.
816 * @param iCellEnd Upper bound of cell index range.
817 * @param nVars Total number of variables (flow + scalar).
818 * @param nVarsScalar Number of additional transported scalar variables.
819 */
820 template <DeviceBackend B = DeviceBackend::Host>
822 EvaluatorDeviceView<B> &self_view,
824 index iCell, index iCellEnd,
825 int nVars, int nVarsScalar)
826 {
827 using namespace Geom;
828 auto &mesh = self_view.fv.mesh;
829 auto &fv = self_view.fv;
830 auto &bcHandler = self_view.bcHandler;
831 auto &phy = self_view.physics;
832
834 DNDS_EULERP_IMPL_ARG_GET_REF(faceLamVisEst)
835 DNDS_EULERP_IMPL_ARG_GET_REF(deltaLamFace)
836 DNDS_EULERP_IMPL_ARG_GET_REF(deltaLamCell)
838
839 if (iCell >= iCellEnd)
840 return;
841
842 real lambdaCellC = 0.0;
843 real dLambdaCellC = 0.0;
844 auto c2f = mesh.cell2face.father[iCell];
845 for (auto iFace : c2f)
846 {
847 real lambdaFace =
848 std::max({faceLamEst(iFace, 0),
849 faceLamEst(iFace, 1),
850 faceLamEst(iFace, 2)}) +
851 faceLamVisEst(iFace);
852 lambdaCellC += lambdaFace * fv.GetFaceArea(iFace);
853 dLambdaCellC = std::max(dLambdaCellC, deltaLamFace(iFace));
854 }
855 dt(iCell) = fv.GetCellVol(iCell) * fv.GetCellSmoothScaleRatio(iCell) /
856 (lambdaCellC + verySmallReal);
857 deltaLamCell(iCell) = dLambdaCellC;
858 }
859
860 // only for intellisense hint
864 index iCell, index iCellEnd,
865 int nVars, int nVarsScalar);
866
867 /**
868 * @brief 2nd-order face value reconstruction kernel (per-face).
869 *
870 * For each face, extrapolates cell-centered values using the cell gradient:
871 * u_face = u_cell + grad(u) . (x_face - x_cell).
872 * For internal faces, computes both left and right states and a face-averaged gradient
873 * with a correction term proportional to the jump (uR - uL) / distance.
874 * For boundary faces, applies the BC handler to generate the right (ghost) state.
875 * Also reconstructs transported scalar face values and gradients.
876 *
877 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
878 * @param self_view Device view providing mesh, BC handler, and physics access.
879 * @param arg Portable struct with device views of state, gradient, and face output arrays.
880 * @param iFace Face index to process.
881 * @param iFaceEnd Upper bound of face index range.
882 * @param nVars Total number of variables (flow + scalar).
883 * @param nVarsScalar Number of additional transported scalar variables.
884 */
885 template <DeviceBackend B = DeviceBackend::Host>
887 EvaluatorDeviceView<B> &self_view,
889 index iFace, index iFaceEnd,
890 int nVars, int nVarsScalar)
891 {
892 using namespace Geom;
893 auto &mesh = self_view.fv.mesh;
894 auto &fv = self_view.fv;
895 auto &bcHandler = self_view.bcHandler;
896 auto &phy = self_view.physics;
897
902
906
909 DNDS_EULERP_IMPL_ARG_GET_REF(uScalarGradFF)
910
911#ifdef DNDS_USE_CUDA
912 __shared__ CUDA::SharedBuffer<index, 128> buf_idx;
913 __shared__ CUDA::SharedBuffer<real, 128 * (3 * nVarsFlow)> buf_val;
914#endif
915
916 TDiffU uGradFFC;
917 Eigen::Map<TDiffU> uGradFFCM(uGradFFC.data());
918
919 if (iFace < iFaceEnd)
920 {
921 index iCellL = mesh.face2cell(iFace, 0);
922 index iCellR = mesh.face2cell(iFace, 1);
923 if ((0 > iCellL || iCellL >= mesh.NumCell()) &&
924 (0 > iCellR || iCellR >= mesh.NumCell()) && iCellR != UnInitIndex)
925 {
926 //?
927 // return; // skip if either side is not needed
928 }
929 tPoint xrel_L = (fv.GetFaceQuadraturePPhys(iFace, -1) -
930 fv.GetCellQuadraturePPhys(iCellL, -1));
931 tPoint xrel_R;
932 tPoint faceNorm = fv.GetFaceNorm(iFace, -1);
933 real faceLScale = veryLargeReal;
934 if (iCellR != UnInitIndex)
935 {
936 // relative to cellR!
937 xrel_R = (fv.GetFaceQuadraturePPhysFromCell(iFace, iCellR, 1, -1) -
938 fv.GetCellQuadraturePPhys(iCellR, -1));
939 faceLScale = (fv.GetOtherCellBaryFromCell(iCellL, iCellR, iFace) - fv.GetCellBary(iCellL)).norm();
940 }
941
942 uFL[iFace].noalias() = u[iCellL] + uGrad[iCellL].transpose() * xrel_L;
943 uGradFFC = uGrad[iCellL];
944 if (iCellR != UnInitIndex)
945 {
946 uFR[iFace].noalias() = u[iCellR] + uGrad[iCellR].transpose() * xrel_R;
947 uGradFFC.noalias() += uGrad[iCellR];
948 uGradFFC *= 0.5;
949 uGradFFC.noalias() += faceNorm * (uFR[iFace] - uFL[iFace]).transpose() * (1. / faceLScale);
950 }
951
952 for (int iVarS = 0; iVarS < nVarsScalar; iVarS++)
953 {
954 uScalarFL[iVarS](iFace) = uScalarGrad[iVarS][iCellL].dot(xrel_L);
955 uScalarGradFF[iVarS][iFace] = uScalarGrad[iVarS][iCellL];
956 if (iCellR != UnInitIndex)
957 {
958 uScalarFR[iVarS](iFace) = uScalarGrad[iVarS][iCellL].dot(xrel_L);
959 uScalarGradFF[iVarS][iFace].noalias() += uScalarGrad[iVarS][iCellR];
960 uScalarGradFF[iVarS][iFace] *= 0.5;
961 uScalarGradFF[iVarS][iFace].noalias() += faceNorm * (uScalarFR[iVarS](iFace) - uScalarFL[iVarS](iFace)) * (1. / faceLScale);
962 }
963 }
964
965 auto uL = [uFL, uScalarFL, iFace] DNDS_DEVICE(int i) mutable -> real &
966 {
967 if (i < nVarsFlow)
968 return uFL(iFace, i);
969 else
970 return uScalarFL[i - nVarsFlow](iFace);
971 };
972 auto uR = [uFR, uScalarFR, iFace] DNDS_DEVICE(int i) mutable -> real &
973 {
974 if (i < nVarsFlow)
975 return uFR(iFace, i);
976 else
977 return uScalarFR[i - nVarsFlow](iFace);
978 };
979 if (iCellR == UnInitIndex)
980 {
981 auto bc = bcHandler.id2bc(mesh.GetFaceZone(iFace));
982 bc.apply(uL, uR, nVars,
983 fv.GetFaceQuadraturePPhys(iFace, -1),
984 fv.GetFaceNorm(iFace, -1),
985 phy);
986 }
987 // TODO: periodic handling of vectors from cell to face
988 }
989#ifdef DNDS_USE_CUDA
990 if constexpr (B == DeviceBackend::CUDA)
991 {
992 detail::CUDA_Local2GlobalAssign<3 * nVarsFlow, 128>(
993 [uGradFFCM] DNDS_DEVICE(int i) mutable -> real &
994 { return uGradFFCM(i); },
995 [uGradFF] DNDS_DEVICE(index iFaceC, int i) mutable -> real &
996 { return uGradFF[iFaceC](i); },
997 buf_idx, buf_val,
998 iFace, iFaceEnd);
999 }
1000 else
1001#endif
1002 {
1003 if (iFace < iFaceEnd)
1004 {
1005 uGradFF[iFace] = uGradFFC;
1006 }
1007 }
1008 }
1009
1010 // only for intellisense hint
1014 index iFace, index iFaceEnd,
1015 int nVars, int nVarsScalar);
1016
1017 /**
1018 * @brief 2nd-order Roe inviscid flux computation kernel (per-face).
1019 *
1020 * For each face, computes the Roe-averaged state (velocity, enthalpy, speed of sound),
1021 * evaluates the three characteristic eigenvalues with H-correction fixing, and assembles
1022 * the numerical flux via RoeFluxFlow. The result is stored in fluxFF.
1023 *
1024 * @note Scalar flux and viscous flux are not yet implemented (marked TODO).
1025 *
1026 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
1027 * @param self_view Device view providing mesh, BC handler, and physics access.
1028 * @param arg Portable struct with device views of all state, face, thermodynamic, and flux arrays.
1029 * @param iFace Face index to process.
1030 * @param iFaceEnd Upper bound of face index range.
1031 * @param nVars Total number of variables (flow + scalar).
1032 * @param nVarsScalar Number of additional transported scalar variables.
1033 */
1034 template <DeviceBackend B = DeviceBackend::Host>
1036 EvaluatorDeviceView<B> &self_view,
1038 index iFace, index iFaceEnd,
1039 int nVars, int nVarsScalar)
1040 {
1041 using namespace Geom;
1042 auto &mesh = self_view.fv.mesh;
1043 auto &fv = self_view.fv;
1044 auto &bcHandler = self_view.bcHandler;
1045 auto &phy = self_view.physics;
1046
1050 DNDS_EULERP_IMPL_ARG_GET_REF(uScalarGrad)
1051
1054 DNDS_EULERP_IMPL_ARG_GET_REF(uScalarPrim)
1055 DNDS_EULERP_IMPL_ARG_GET_REF(uScalarGradPrim)
1062 DNDS_EULERP_IMPL_ARG_GET_REF(deltaLamCell)
1063
1067
1070 DNDS_EULERP_IMPL_ARG_GET_REF(uScalarGradFF)
1071
1074
1076 DNDS_EULERP_IMPL_ARG_GET_REF(fluxScalarFF)
1077 // DNDS_EULERP_IMPL_ARG_GET_REF(rhs)
1078 // DNDS_EULERP_IMPL_ARG_GET_REF(rhsScalar)
1079
1080#ifdef DNDS_USE_CUDA
1081 __shared__ CUDA::SharedBuffer<index, 128> buf_idx;
1082 __shared__ CUDA::SharedBuffer<real, 128 * (1 * nVarsFlow)> buf_val;
1083#endif
1084
1085 TU FFlow;
1086 Eigen::Map<TU> FFlowM(FFlow.data());
1087
1088 if (iFace < iFaceEnd)
1089 {
1090 index iCellL = mesh.face2cell(iFace, 0);
1091 index iCellR = mesh.face2cell(iFace, 1);
1092 if ((0 > iCellL || iCellL >= mesh.NumCell()) &&
1093 (0 > iCellR || iCellR >= mesh.NumCell()) && iCellR != UnInitIndex)
1094 {
1095 //?
1096 // continue; // skip if either side is not needed
1097 }
1098 tPoint n = fv.GetFaceNorm(iFace, -1);
1099
1100 index iCellRR = iCellR == UnInitIndex ? iCellL : iCellR;
1101 auto uLm = [u, uScalar, iCellL] DNDS_DEVICE(int i) mutable -> real &
1102 {
1103 if (i < nVarsFlow)
1104 return u(iCellL, i);
1105 else
1106 return uScalar[i - nVarsFlow](iCellL);
1107 };
1108 auto uRm = [u, uScalar, iCellRR] DNDS_DEVICE(int i) mutable -> real &
1109 {
1110 if (i < nVarsFlow)
1111 return u(iCellRR, i);
1112 else
1113 return uScalar[i - nVarsFlow](iCellRR);
1114 };
1115 auto uLmPrim = [uPrim, uScalarPrim, iCellL] DNDS_DEVICE(int i) mutable -> real &
1116 {
1117 if (i < nVarsFlow)
1118 return uPrim(iCellL, i);
1119 else
1120 return uScalarPrim[i - nVarsFlow](iCellL);
1121 };
1122 auto uRmPrim = [uPrim, uScalarPrim, iCellRR] DNDS_DEVICE(int i) mutable -> real &
1123 {
1124 if (i < nVarsFlow)
1125 return uPrim(iCellRR, i);
1126 else
1127 return uScalarPrim[i - nVarsFlow](iCellRR);
1128 };
1129 tPoint veloRoe;
1130 real vsqrRoe{0}, HRoe{0}, rhoRoe{0}, aSqrRoe{0};
1131
1132 RoeAverageNS(uLm, uRm, uLmPrim, uRmPrim, nVars, p(iCellL), p(iCellRR),
1133 phy, veloRoe, vsqrRoe, HRoe, rhoRoe, aSqrRoe);
1134 real veloRoeN = veloRoe.dot(n);
1135 real aRoe = std::sqrt(std::abs(aSqrRoe));
1136 real lam0 = std::abs(veloRoeN - aRoe);
1137 real lam123 = std::abs(veloRoeN);
1138 real lam4 = std::abs(veloRoeN + aRoe);
1139
1140 tPoint vL = U123(uPrim[iCellL]);
1141 tPoint vR = U123(uPrim[iCellRR]);
1142 real aL = a(iCellL);
1143 real aR = a(iCellRR);
1144 real pL = p(iCellL);
1145 real pR = p(iCellRR);
1146 TU UL = uFL[iFace];
1147 TU UR = uFR[iFace];
1148
1150 aRoe, aRoe, veloRoeN, veloRoeN,
1151 std::max(deltaLamCell(iCellL), deltaLamCell(iCellRR)),
1152 1.0, lam0, lam123, lam4);
1153
1154 real lamm = std::max(aL + std::abs(vL.dot(n)), aR + std::abs(vR.dot(n)));
1155 // lam0 = lam123 = lam4 = lamm;
1156
1157 FFlow.setZero();
1158
1159 RoeFluxFlow(UL, UR,
1160 pFL(iFace), pFR(iFace), veloRoe,
1161 vsqrRoe, 0.0 /* vgn = 0 for now*/,
1162 n, aSqrRoe, aRoe,
1163 HRoe, phy, lam0, lam123, lam4, FFlow);
1164
1165 fluxFF[iFace] = FFlow;
1166
1167 // TODO: scalar's flux
1168 // TODO: viscous flux
1169 }
1170#ifdef DNDS_USE_CUDA
1171 if constexpr (B == DeviceBackend::CUDA)
1172 {
1173 detail::CUDA_Local2GlobalAssign<1 * nVarsFlow, 128>(
1174 [FFlowM] DNDS_DEVICE(int i) mutable -> real &
1175 { return FFlowM(i); },
1176 [fluxFF] DNDS_DEVICE(index iFaceC, int i) mutable -> real &
1177 { return fluxFF[iFaceC](i); },
1178 buf_idx, buf_val,
1179 iFace, iFaceEnd);
1180 }
1181 else
1182#endif
1183 {
1184 if (iFace < iFaceEnd)
1185 {
1186 fluxFF[iFace] = FFlow;
1187 }
1188 }
1189 }
1190
1191 // only for intellisense hint
1195 index iFace, index iFaceEnd,
1196 int nVars, int nVarsScalar);
1197
1198 /**
1199 * @brief Scatters face fluxes to cell RHS residual (per-cell).
1200 *
1201 * For each cell, accumulates the face-level numerical fluxes into the cell's RHS
1202 * vector. Each face flux is weighted by -Area / Volume * sign, where sign accounts
1203 * for the face normal orientation relative to the cell. Also accumulates scalar
1204 * fluxes into rhsScalar.
1205 *
1206 * @tparam B DeviceBackend (Host or CUDA), defaulting to Host.
1207 * @param self_view Device view providing mesh and FV geometry access.
1208 * @param arg Portable struct with device views of flux and RHS arrays.
1209 * @param iCell Cell index to process.
1210 * @param iCellEnd Upper bound of cell index range.
1211 * @param nVars Total number of variables (flow + scalar).
1212 * @param nVarsScalar Number of additional transported scalar variables.
1213 */
1214 template <DeviceBackend B = DeviceBackend::Host>
1216 EvaluatorDeviceView<B> &self_view,
1218 index iCell, index iCellEnd,
1219 int nVars, int nVarsScalar)
1220 {
1221 using namespace Geom;
1222 auto &mesh = self_view.fv.mesh;
1223 auto &fv = self_view.fv;
1224 auto &bcHandler = self_view.bcHandler;
1225 auto &phy = self_view.physics;
1226
1228 DNDS_EULERP_IMPL_ARG_GET_REF(fluxScalarFF)
1231
1232#ifdef DNDS_USE_CUDA
1233 __shared__ CUDA::SharedBuffer<index, 128> buf_idx;
1234 __shared__ CUDA::SharedBuffer<real, 128 * (1 * nVarsFlow)> buf_val;
1235#endif
1236
1237 TU rhsC;
1238 Eigen::Map<TU> rhsCM(rhsC.data());
1239
1240 if (iCell < iCellEnd)
1241 {
1242 rhsC = rhs[iCell];
1243 auto c2f = mesh.cell2face.father[iCell];
1244 for (auto iFace : c2f)
1245 {
1246 real face_sign = mesh.CellIsFaceBack(iCell, iFace) ? 1 : -1;
1247 real face_coef = -fv.GetFaceArea(iFace) / fv.GetCellVol(iCell) * face_sign;
1248 rhsC += fluxFF[iFace] * face_coef;
1249 for (int iVarS = 0; iVarS < nVarsScalar; iVarS++)
1250 rhsScalar[iVarS](iCell) += fluxScalarFF[iVarS](iFace) * face_coef;
1251 }
1252 }
1253#ifdef DNDS_USE_CUDA
1254 if constexpr (B == DeviceBackend::CUDA)
1255 {
1256 detail::CUDA_Local2GlobalAssign<1 * nVarsFlow, 128>(
1257 [rhsCM] DNDS_DEVICE(int i) mutable -> real &
1258 { return rhsCM(i); },
1259 [rhs] DNDS_DEVICE(index iCellC, int i) mutable -> real &
1260 { return rhs[iCellC](i); },
1261 buf_idx, buf_val,
1262 iCell, iCellEnd);
1263 }
1264 else
1265#endif
1266 {
1267 if (iCell < iCellEnd)
1268 {
1269 rhs[iCell] = rhsC;
1270 }
1271 }
1272 }
1273
1274 // only for intellisense hint
1278 index iCell, index iCellEnd,
1279 int nVars, int nVarsScalar);
1280}
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
#define DNDS_DEVICE
Definition Defines.hpp:77
Device memory abstraction layer with backend-specific storage and factory creation.
Assertion / error-handling macros and supporting helper functions.
Core type definitions and utilities for the EulerP alternative Navier-Stokes evaluator module.
Approximate Riemann Solver (Roe scheme) for the EulerP 5-equation Navier-Stokes system.
Backend-specific implementation layer for EulerP Evaluator kernel dispatch.
#define DNDS_EULERP_IMPL_ARG_GET_REF(member)
Internal utility types for the EulerP Evaluator kernel implementations.
Geom::UnstructuredMeshDeviceView< B > mesh
Device-callable view of the Evaluator, combining finite volume, BC handler, and physics views.
t_bcHandler bcHandler
Device view of the boundary condition handler.
t_fv fv
Device view of the finite volume / VFV reconstruction object.
t_physics physics
Device view of the gas physics model.
Namespace for the EulerP alternative evaluator module with GPU support.
Definition EulerP.hpp:29
DNDS_DEVICE void EstEigenDt_FaceLam2CellDt_Kernel(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::EstEigenDt_Arg::Portable &arg, index iCell, index iCellEnd, int nVars, int nVarsScalar)
Per-cell kernel converting face eigenvalues to a local CFL time step.
DNDS_DEVICE_CALLABLE void RoeAverageNS(TUL &&UL, TUR &&UR, TULPrim &&ULPrim, TURPrim &&URPrim, int nVars, real pL, real pR, PhysicsDeviceView< B > &phy, Geom::tPoint &veloRoe, real &vsqrRoe, real &HRoe, real &rhoRoe, real &aSqrRoe)
Computes Roe-averaged quantities from left and right states.
template DNDS_DEVICE void EstEigenDt_GetFaceLam_Kernel< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::EstEigenDt_Arg::Portable &arg, index iFace, index iFaceEnd, int nVars, int nVarsScalar)
DNDS_DEVICE void Flux2nd_Kernel_FluxFace(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::Flux2nd_Arg::Portable &arg, index iFace, index iFaceEnd, int nVars, int nVarsScalar)
2nd-order Roe inviscid flux computation kernel (per-face).
template DNDS_DEVICE void Flux2nd_Kernel_FluxFace< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::Flux2nd_Arg::Portable &arg, index iFace, index iFaceEnd, int nVars, int nVarsScalar)
template DNDS_DEVICE void RecGradient_BarthLimiter_Kernel_FlowPart< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::RecGradient_Arg::Portable &arg, index iCell, index iCellEnd, int nVars, int nVarsScalar)
template DNDS_DEVICE void Flux2nd_Kernel_Face2Cell< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::Flux2nd_Arg::Portable &arg, index iCell, index iCellEnd, int nVars, int nVarsScalar)
DNDS_DEVICE void RecGradient_BarthLimiter_Kernel_FlowPart(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::RecGradient_Arg::Portable &arg, index iCell, index iCellEnd, int nVars, int nVarsScalar)
Barth-Jespersen gradient limiter kernel for flow variables (per-cell).
DNDS_DEVICE void RecFace2nd_Kernel(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::RecFace2nd_Arg::Portable &arg, index iFace, index iFaceEnd, int nVars, int nVarsScalar)
2nd-order face value reconstruction kernel (per-face).
DNDS_DEVICE void RecGradient_GGRec_Kernel_GG(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::RecGradient_Arg::Portable &arg, index iCell, index iCellEnd, int nVars, int nVarsScalar)
Green-Gauss gradient reconstruction kernel (per-cell).
template DNDS_DEVICE void RecFace2nd_Kernel< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::RecFace2nd_Arg::Portable &arg, index iFace, index iFaceEnd, int nVars, int nVarsScalar)
DNDS_DEVICE_CALLABLE void RoeFluxFlow(const TU &UL, const TU &UR, real pL, real pR, const Geom::tPoint &veloRoe, real vsqrRoe, real vgn, const Geom::tPoint &n, real asqrRoe, real aRoe, real HRoe, PhysicsDeviceView< B > &phy, real lam0, real lam123, real lam4, TU &F)
Computes the complete Roe numerical flux for the 5-equation flow system.
DNDS_DEVICE void Cons2Prim_Kernel(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::Cons2Prim_Arg::Portable &arg, index iPt, index iPtEnd, int nVars, int nVarsScalar)
Conservative-to-primitive conversion without gradient transformation or viscosity (per-cell).
DNDS_DEVICE_CALLABLE DNDS_FORCEINLINE auto U123(TU &&v)
Extracts the momentum components (indices 1,2,3) from a state vector as a 3x1 block.
Definition EulerP.hpp:60
DNDS_DEVICE void RecGradient_BarthLimiter_Kernel_ScalarPart(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::RecGradient_Arg::Portable &arg, index iCell, index iCellEnd, int nVars, int nVarsScalar)
Barth-Jespersen gradient limiter kernel for transported scalar variables (per-cell).
template DNDS_DEVICE void EstEigenDt_FaceLam2CellDt_Kernel< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::EstEigenDt_Arg::Portable &arg, index iCell, index iCellEnd, int nVars, int nVarsScalar)
template DNDS_DEVICE void RecGradient_GGRec_Kernel_BndVal< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::RecGradient_Arg::Portable &arg, index iBnd, index iBndEnd, int nVars, int nVarsScalar)
template DNDS_DEVICE void RecGradient_BarthLimiter_Kernel_ScalarPart< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::RecGradient_Arg::Portable &arg, index iCell, index iCellEnd, int nVars, int nVarsScalar)
DNDS_DEVICE void Flux2nd_Kernel_Face2Cell(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::Flux2nd_Arg::Portable &arg, index iCell, index iCellEnd, int nVars, int nVarsScalar)
Scatters face fluxes to cell RHS residual (per-cell).
template DNDS_DEVICE void Cons2PrimMu_Kernel< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::Cons2PrimMu_Arg::Portable &arg, index iPt, index iPtEnd, int nVars, int nVarsScalar)
DNDS_DEVICE void RecGradient_GGRec_Kernel_BndVal(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::RecGradient_Arg::Portable &arg, index iBnd, index iBndEnd, int nVars, int nVarsScalar)
Generates boundary ghost values for Green-Gauss gradient reconstruction.
Eigen::Vector< real, nVarsFlow > TU
Fixed-size 5-component conservative state vector (rho, rhoU, rhoV, rhoW, E).
Definition EulerP.hpp:33
DNDS_DEVICE void EstEigenDt_GetFaceLam_Kernel(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::EstEigenDt_Arg::Portable &arg, index iFace, index iFaceEnd, int nVars, int nVarsScalar)
Per-face eigenvalue estimation kernel for time-step computation.
DNDS_DEVICE_CALLABLE void RoeEigenValueFixer(real aL, real aR, real vnL, real vnR, real dLambda, real fixScale, real &lam0, real &lam123, real &lam4)
Applies entropy fix to Roe eigenvalues to prevent expansion shocks.
template DNDS_DEVICE void RecGradient_GGRec_Kernel_GG< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::RecGradient_Arg::Portable &arg, index iCell, index iCellEnd, int nVars, int nVarsScalar)
DNDS_DEVICE void Cons2PrimMu_Kernel(EvaluatorDeviceView< B > &self_view, typename Evaluator_impl< B >::Cons2PrimMu_Arg::Portable &arg, index iPt, index iPtEnd, int nVars, int nVarsScalar)
Conservative-to-primitive conversion with gradient transformation and viscosity (per-cell).
template DNDS_DEVICE void Cons2Prim_Kernel< DeviceBackend::Host >(EvaluatorDeviceView< DeviceBackend::Host > &self_view, typename Evaluator_impl< DeviceBackend::Host >::Cons2Prim_Arg::Portable &arg, index iPt, index iPtEnd, int nVars, int nVarsScalar)
Eigen::Matrix< real, 3, nVarsFlow > TDiffU
Fixed-size 3x5 spatial gradient of the conservative state (one row per spatial dimension).
Definition EulerP.hpp:34
DNDS_DEVICE_CALLABLE bool FaceIDIsInternal(t_index id)
DNDS_CONSTANT const index UnInitIndex
Sentinel "not initialised" index value (= INT64_MIN).
Definition Defines.hpp:176
DNDS_CONSTANT const real verySmallReal
Catch-all lower bound ("effectively zero").
Definition Defines.hpp:194
int32_t rowsize
Row-width / per-row element-count type (signed 32-bit).
Definition Defines.hpp:109
DNDS_CONSTANT const real UnInitReal
Sentinel "not initialised" real value (NaN). Cheap to detect with std::isnan or IsUnInitReal; survive...
Definition Defines.hpp:174
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:107
double real
Canonical floating-point scalar used throughout DNDSR (double precision).
Definition Defines.hpp:105
DNDS_CONSTANT const real veryLargeReal
Catch-all upper bound ("practically infinity") for physical scalars.
Definition Defines.hpp:190
Trivially-copyable payload for Cons2PrimMu kernel data.
Trivially-copyable payload holding device views of all kernel data.
Eigen::Vector3d n(1.0, 0.0, 0.0)