DNDSR 0.1.0.dev1+gcd065ad
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
test_Direct.cpp
Go to the documentation of this file.
1/**
2 * @file test_Direct.cpp
3 * @brief Unit tests for block-sparse LU factorization in Solver/Direct.hpp.
4 *
5 * Tests a 2D periodic Laplacian with 2x2 blocks on an NxN grid:
6 * A = 5-point stencil with periodic boundary (4 neighbors per cell).
7 * Each cell (i,j) connects to (i±1,j) and (i,j±1) with wrap-around.
8 * Diag = 4*I + delta*diag(i,2i), off-diag = -I (non-symmetric diag
9 * to exercise the full LU, not just LDLT).
10 *
11 * Verifies:
12 * - SerialSymLUStructure: symbolic factorization with fill-in
13 * - LocalLUBase: MatMul (A*x == b), InPlaceDecompose, Solve (A^{-1}b == x)
14 * - ILU(0): approximate solve, residual reduction
15 * - Complete LU (iluCode=-1): exact solve
16 * - ILU(1): intermediate fill, better than ILU(0)
17 */
18
19#define DOCTEST_CONFIG_IMPLEMENT
20#include "doctest.h"
21
22#include "DNDS/Defines.hpp"
23#include "DNDS/MPI.hpp"
24#include "Solver/Direct.hpp"
25#include <Eigen/Dense>
26#include <iostream>
27#include <iomanip>
28#include <vector>
29#include <memory>
30#include <cmath>
31
32using namespace DNDS;
33
34static constexpr int BS = 2; // block size
35using Block = Eigen::Matrix<real, BS, BS>;
36using BVec = Eigen::Vector<real, BS>;
37
38// ===================================================================
39// Concrete LocalLU derived class for 2x2 dense blocks
40// ===================================================================
41struct TestBlockLU;
42using tVec = std::vector<BVec>;
44
45struct TestBlockLU : public tBase
46{
47 std::vector<Block> diag;
48 std::vector<std::vector<Block>> lower;
49 std::vector<std::vector<Block>> upper;
50
52 : tBase(s), diag(N), lower(N), upper(N)
53 {
54 for (DNDS::index i = 0; i < N; i++)
55 {
56 diag[i].setZero();
57 lower[i].resize(s->lowerTriStructure[i].size());
58 upper[i].resize(s->upperTriStructure[i].size());
59 for (auto &b : lower[i]) b.setZero();
60 for (auto &b : upper[i]) b.setZero();
61 }
62 }
63
64 Block &GetDiag(DNDS::index i) { return diag[i]; }
65 Block &GetLower(DNDS::index i, int ij) { return lower[i][ij]; }
66 Block &GetUpper(DNDS::index i, int ij) { return upper[i][ij]; }
67 Block InvertDiag(const Block &v) { return v.inverse(); }
68};
69
70// ===================================================================
71// 2D periodic grid: NxN cells, cell (r,c) has 4 neighbors with wrap
72// ===================================================================
73static constexpr int G = 4; // grid side length => G*G = 16 cells
74static const int NCells = G * G;
75
76static DNDS::index cellIdx(int r, int c)
77{
78 return ((r % G) + G) % G * G + ((c % G) + G) % G;
79}
80
81static std::vector<std::vector<DNDS::index>> build2DPeriodicAdj()
82{
83 std::vector<std::vector<DNDS::index>> adj(NCells);
84 for (int r = 0; r < G; r++)
85 for (int c = 0; c < G; c++)
86 {
87 DNDS::index me = cellIdx(r, c);
88 adj[me].push_back(cellIdx(r - 1, c));
89 adj[me].push_back(cellIdx(r + 1, c));
90 adj[me].push_back(cellIdx(r, c - 1));
91 adj[me].push_back(cellIdx(r, c + 1));
92 }
93 return adj;
94}
95
96/// Fill 2D periodic Laplacian with non-symmetric diagonal perturbation.
97/// Diag_i = 4*I + delta * diag(i, 2*i) (makes the diagonal non-symmetric).
98/// Off-diag entries corresponding to original adjacency = -I.
99/// Fill-in entries (from ILU(k>0) or complete LU) = 0 (zeroed by constructor).
100/// Uses cell2cellFaceVLocal2FullRowPos to identify which entries in
101/// lower/upper correspond to original neighbors.
102static void fill2DLaplacian(TestBlockLU &lu, double delta = 0.1)
103{
104 auto adj = build2DPeriodicAdj();
105 Block I2 = Block::Identity();
106 auto &symLU = *lu.symLU;
107
108 for (int i = 0; i < NCells; i++)
109 {
110 Block D = 4.0 * I2;
111 D(0, 0) += delta * i;
112 D(1, 1) += delta * 2 * i;
113 lu.diag[i] = D;
114
115 // Only fill original adjacency entries with -I; fill-in stays 0
116 auto &c2cPos = symLU.cell2cellFaceVLocal2FullRowPos[i];
117 for (int ic2c = 0; ic2c < (int)adj[i].size(); ic2c++)
118 {
119 int pos = c2cPos[ic2c]; // 0 = diag, 1..nLower = lower, nLower+1.. = upper
120 if (pos == 0)
121 continue; // diagonal (self)
122 int nLower = symLU.lowerTriStructure[i].size();
123 if (pos <= nLower)
124 lu.lower[i][pos - 1] = -I2;
125 else
126 lu.upper[i][pos - nLower - 1] = -I2;
127 }
128 }
129}
130
131static MPIInfo g_mpi;
132
133// ===================================================================
134// Tests
135// ===================================================================
136
137TEST_CASE("Direct 2D periodic: symbolic factorization has fill-in")
138{
139 auto symLU = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
140 auto adj = build2DPeriodicAdj();
141 symLU->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, -1); // complete LU
142
143 // Complete LU on a 2D grid has fill-in: some rows should have > 4 entries
144 int maxLower = 0;
145 for (int i = 0; i < NCells; i++)
146 maxLower = std::max(maxLower, (int)symLU->lowerTriStructure[i].size());
147
148 std::cout << "[2D-sym] maxLower entries = " << maxLower
149 << " (> 4 means fill-in)" << std::endl;
150 CHECK(maxLower > 2); // definitely more than the original 2 lower neighbors
151}
152
153TEST_CASE("Direct 2D periodic: MatMul is correct")
154{
155 auto symLU = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
156 auto adj = build2DPeriodicAdj();
157 symLU->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, 0);
158
159 TestBlockLU lu(symLU, NCells);
160 fill2DLaplacian(lu);
161
162 // Build a known x vector
163 tVec x(NCells), Ax(NCells);
164 for (int i = 0; i < NCells; i++)
165 {
166 x[i] = BVec::Zero();
167 x[i](0) = std::sin(0.5 * i);
168 x[i](1) = std::cos(0.3 * i);
169 Ax[i] = BVec::Zero();
170 }
171
172 lu.MatMul(x, Ax);
173
174 // Verify by manual computation for cell 0
175 int r0 = 0, c0 = 0;
176 BVec expected = lu.diag[0] * x[0];
177 // 4 neighbors of cell 0 (periodic: left=G-1, right=1, up=(G-1)*G, down=G)
178 DNDS::index neighbors[] = {cellIdx(r0 - 1, c0), cellIdx(r0 + 1, c0),
179 cellIdx(r0, c0 - 1), cellIdx(r0, c0 + 1)};
180 for (auto nb : neighbors)
181 expected -= x[nb]; // off-diag = -I
182
183 real err0 = (Ax[0] - expected).norm();
184 CHECK(err0 < 1e-14);
185
186 // Check all entries are finite
187 for (int i = 0; i < NCells; i++)
188 CHECK_FALSE(Ax[i].hasNaN());
189}
190
191TEST_CASE("Direct 2D periodic: complete LU solve is exact")
192{
193 auto symLU = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
194 auto adj = build2DPeriodicAdj();
195 symLU->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, -1); // complete LU
196
197 // Build A for MatMul, then decompose a copy for Solve
198 TestBlockLU luMat(symLU, NCells);
199 fill2DLaplacian(luMat);
200
201 tVec x_known(NCells), b(NCells), x_solved(NCells);
202 for (int i = 0; i < NCells; i++)
203 {
204 x_known[i] = BVec::Zero();
205 x_known[i](0) = std::sin(0.7 * i + 0.1);
206 x_known[i](1) = std::cos(0.4 * i + 0.2);
207 b[i] = BVec::Zero();
208 x_solved[i] = BVec::Zero();
209 }
210
211 luMat.MatMul(x_known, b); // b = A * x_known
212
213 TestBlockLU luSolve(symLU, NCells);
214 fill2DLaplacian(luSolve);
215 luSolve.InPlaceDecompose();
216 luSolve.Solve(b, x_solved);
217
218 real maxErr = 0;
219 for (int i = 0; i < NCells; i++)
220 maxErr = std::max(maxErr, (x_solved[i] - x_known[i]).norm());
221
222 std::cout << "[2D-LU-full] maxErr = " << std::scientific << maxErr << std::endl;
223 CHECK(maxErr < 1e-10);
224}
225
226TEST_CASE("Direct 2D periodic: ILU(0) is approximate but reduces residual")
227{
228 auto symLU0 = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
229 auto adj = build2DPeriodicAdj();
230 symLU0->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, 0); // ILU(0)
231
232 // Build b = A * x_known using the original (non-decomposed) matrix
233 auto symLUorig = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
234 symLUorig->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, 0);
236 fill2DLaplacian(luOrig);
237
238 tVec x_known(NCells), b(NCells), x_ilu(NCells), Ax_ilu(NCells);
239 for (int i = 0; i < NCells; i++)
240 {
241 x_known[i] = BVec::Zero();
242 x_known[i](0) = std::sin(0.7 * i + 0.1);
243 x_known[i](1) = std::cos(0.4 * i + 0.2);
244 b[i] = BVec::Zero();
245 x_ilu[i] = BVec::Zero();
246 Ax_ilu[i] = BVec::Zero();
247 }
248
250
251 // ILU(0) solve
252 TestBlockLU luILU(symLU0, NCells);
253 fill2DLaplacian(luILU);
256
257 // Compute residual ||b - A*x_ilu||
258 // Need a fresh non-decomposed matrix for MatMul
259 auto symLUmul = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
260 symLUmul->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, 0);
262 fill2DLaplacian(luMul);
264
266 for (int i = 0; i < NCells; i++)
267 {
268 residualNorm += (b[i] - Ax_ilu[i]).squaredNorm();
269 bNorm += b[i].squaredNorm();
270 }
271 residualNorm = std::sqrt(residualNorm);
272 bNorm = std::sqrt(bNorm);
273
275 std::cout << "[2D-ILU0] relResidual = " << std::scientific << relResidual << std::endl;
276
277 // ILU(0) on a 2D periodic grid is NOT exact (has fill-in), but should reduce residual
278 CHECK(relResidual < 0.5); // significant reduction from initial residual of ~1
279 CHECK(relResidual > 1e-14); // not exact
280}
281
282/// Use ILU as preconditioner in fixed-point iteration: x_{k+1} = x_k + M^{-1}(b - Ax_k).
283/// Returns error norm after nIter iterations.
284static real iluPrecIterSolve(int iluCode, const tVec &b, const tVec &x_known, int nIter)
285{
286 auto adj = build2DPeriodicAdj();
287
288 // Build non-decomposed A for MatMul
289 auto symA = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
290 symA->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, 0);
291 TestBlockLU luA(symA, NCells);
292 fill2DLaplacian(luA);
293
294 // Build ILU preconditioner M
295 auto symM = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
296 symM->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, iluCode);
298 fill2DLaplacian(luM);
300
301 tVec x(NCells), r(NCells), z(NCells), Ax(NCells);
302 for (int i = 0; i < NCells; i++)
303 x[i] = BVec::Zero();
304
305 for (int iter = 0; iter < nIter; iter++)
306 {
307 luA.MatMul(x, Ax);
308 for (int i = 0; i < NCells; i++)
309 r[i] = b[i] - Ax[i];
310 luM.Solve(r, z);
311 for (int i = 0; i < NCells; i++)
312 x[i] += z[i];
313 }
314
316 for (int i = 0; i < NCells; i++)
317 err += (x[i] - x_known[i]).squaredNorm();
318 return std::sqrt(err);
319}
320
321TEST_CASE("Direct 2D periodic: ILU(1) converges faster than ILU(0) as preconditioner")
322{
323 auto adj = build2DPeriodicAdj();
324
325 // Build b = A * x_known
326 auto symLUorig = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
327 symLUorig->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, 0);
329 fill2DLaplacian(luOrig);
330
331 tVec x_known(NCells), b(NCells);
332 for (int i = 0; i < NCells; i++)
333 {
334 x_known[i] = BVec::Zero();
335 x_known[i](0) = std::sin(0.7 * i + 0.1);
336 x_known[i](1) = std::cos(0.4 * i + 0.2);
337 b[i] = BVec::Zero();
338 }
340
341 int nIter = 10;
342 real errILU0 = iluPrecIterSolve(0, b, x_known, nIter);
343 real errILU1 = iluPrecIterSolve(1, b, x_known, nIter);
344 real errFull = iluPrecIterSolve(-1, b, x_known, nIter);
345
346 std::cout << "[2D-ILU-prec-iter] ILU(0)=" << std::scientific << errILU0
347 << " ILU(1)=" << errILU1
348 << " Full=" << errFull
349 << " (after " << nIter << " iters)" << std::endl;
350
351 CHECK(errFull < 1e-10); // complete LU converges in 1 iteration
352 CHECK(errILU1 < errILU0); // ILU(1) converges faster
353 CHECK(errILU0 < 1e-3); // ILU(0) also converges
354}
355
356// ===================================================================
357// LDLT: concrete derived class for symmetric 2x2 blocks
358// ===================================================================
359struct TestBlockLDLT;
361
363{
364 std::vector<Block> diag;
365 std::vector<std::vector<Block>> lower;
366
368 : tLDLTBase(s), diag(N), lower(N)
369 {
370 for (DNDS::index i = 0; i < N; i++)
371 {
372 diag[i].setZero();
373 lower[i].resize(s->lowerTriStructure[i].size());
374 for (auto &b : lower[i]) b.setZero();
375 }
376 }
377
378 Block &GetDiag(DNDS::index i) { return diag[i]; }
379 Block &GetLower(DNDS::index i, int ij) { return lower[i][ij]; }
380 Block InvertDiag(const Block &v) { return v.inverse(); }
381};
382
383/// Fill 2D periodic Laplacian with SYMMETRIC diagonal: Diag = (4+0.1*i)*I.
384/// Lower = -I at original adjacency positions; fill-in positions stay 0.
385static void fill2DLaplacianSym(TestBlockLDLT &lu, double delta = 0.1)
386{
387 auto adj = build2DPeriodicAdj();
388 Block I2 = Block::Identity();
389 auto &symLU = *lu.symLU;
390
391 for (int i = 0; i < NCells; i++)
392 {
393 lu.diag[i] = (4.0 + delta * i) * I2;
394 auto &c2cPos = symLU.cell2cellFaceVLocal2FullRowPos[i];
395 int nLower = symLU.lowerTriStructure[i].size();
396 for (int ic2c = 0; ic2c < (int)adj[i].size(); ic2c++)
397 {
398 int pos = c2cPos[ic2c];
399 if (pos == 0)
400 continue;
401 // LDLT only stores lower; pos 1..nLower is lower
402 if (pos <= nLower)
403 lu.lower[i][pos - 1] = -I2;
404 // Upper positions: LDLT uses lower^T, no explicit upper storage
405 }
406 }
407}
408
409// Symmetric version for LU (MatMul reference) — same fill logic
410static void fill2DLaplacianSymLU(TestBlockLU &lu, double delta = 0.1)
411{
412 auto adj = build2DPeriodicAdj();
413 Block I2 = Block::Identity();
414 auto &symLU = *lu.symLU;
415
416 for (int i = 0; i < NCells; i++)
417 {
418 lu.diag[i] = (4.0 + delta * i) * I2;
419 auto &c2cPos = symLU.cell2cellFaceVLocal2FullRowPos[i];
420 for (int ic2c = 0; ic2c < (int)adj[i].size(); ic2c++)
421 {
422 int pos = c2cPos[ic2c];
423 if (pos == 0)
424 continue;
425 int nLower = symLU.lowerTriStructure[i].size();
426 if (pos <= nLower)
427 lu.lower[i][pos - 1] = -I2;
428 else
429 lu.upper[i][pos - nLower - 1] = -I2;
430 }
431 }
432}
433
434// ===================================================================
435// LDLT tests
436// ===================================================================
437
438TEST_CASE("Direct 2D periodic LDLT: MatMul matches LU MatMul for symmetric system")
439{
440 auto adj = build2DPeriodicAdj();
441
442 auto symLU = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
443 symLU->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, -1);
444
445 // LU reference
446 TestBlockLU luRef(symLU, NCells);
447 fill2DLaplacianSymLU(luRef);
448
449 // LDLT
450 TestBlockLDLT ldlt(symLU, NCells);
451 fill2DLaplacianSym(ldlt);
452
453 tVec x(NCells), Ax_lu(NCells), Ax_ldlt(NCells);
454 for (int i = 0; i < NCells; i++)
455 {
456 x[i] = BVec::Zero();
457 x[i](0) = std::sin(0.3 * i);
458 x[i](1) = std::cos(0.7 * i);
459 Ax_lu[i] = BVec::Zero();
460 Ax_ldlt[i] = BVec::Zero();
461 }
462
463 luRef.MatMul(x, Ax_lu);
464 ldlt.MatMul(x, Ax_ldlt);
465
466 real maxDiff = 0;
467 for (int i = 0; i < NCells; i++)
468 maxDiff = std::max(maxDiff, (Ax_lu[i] - Ax_ldlt[i]).norm());
469
470 std::cout << "[2D-LDLT-MatMul] maxDiff = " << std::scientific << maxDiff << std::endl;
471 CHECK(maxDiff < 1e-14);
472}
473
474TEST_CASE("Direct 2D periodic LDLT: complete decompose + solve is exact")
475{
476 auto adj = build2DPeriodicAdj();
477
478 // Build reference b = A*x_known using LU MatMul
479 auto symLU = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
480 symLU->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, -1);
481 TestBlockLU luRef(symLU, NCells);
482 fill2DLaplacianSymLU(luRef);
483
484 tVec x_known(NCells), b(NCells), x_solved(NCells);
485 for (int i = 0; i < NCells; i++)
486 {
487 x_known[i] = BVec::Zero();
488 x_known[i](0) = std::sin(0.7 * i + 0.1);
489 x_known[i](1) = std::cos(0.4 * i + 0.2);
490 b[i] = BVec::Zero();
491 x_solved[i] = BVec::Zero();
492 }
493 luRef.MatMul(x_known, b);
494
495 // Solve with LDLT
496 auto symLU2 = std::make_shared<Direct::SerialSymLUStructure>(g_mpi, NCells);
497 symLU2->ObtainSymmetricSymbolicFactorization(adj, {0, NCells}, -1);
498 TestBlockLDLT ldlt(symLU2, NCells);
499 fill2DLaplacianSym(ldlt);
500 ldlt.InPlaceDecompose();
501 ldlt.Solve(b, x_solved);
502
503 real maxErr = 0;
504 for (int i = 0; i < NCells; i++)
505 maxErr = std::max(maxErr, (x_solved[i] - x_known[i]).norm());
506
507 std::cout << "[2D-LDLT-Solve] maxErr = " << std::scientific << maxErr << std::endl;
508 CHECK(maxErr < 1e-10);
509}
510
511// ===================================================================
512// main with MPI (needed for SerialSymLUStructure internals)
513// ===================================================================
514int main(int argc, char **argv)
515{
516 MPI_Init(&argc, &argv);
517 g_mpi.setWorld();
518 doctest::Context ctx;
519 ctx.applyCommandLine(argc, argv);
520 int res = ctx.run();
521 MPI_Finalize();
522 return res;
523}
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
MPI wrappers: MPIInfo, collective operations, type mapping, CommStrategy.
int main()
Definition dummy.cpp:3
the host side operators are provided as implemented
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:107
std::shared_ptr< T > ssp
Shortened alias for std::shared_ptr used pervasively in DNDSR.
Definition Defines.hpp:138
double real
Canonical floating-point scalar used throughout DNDSR (double precision).
Definition Defines.hpp:105
ssp< Direct::SerialSymLUStructure > symLU
Definition Direct.hpp:529
void Solve(tVec &b, tVec &result)
Definition Direct.hpp:621
void MatMul(tVec &x, tVec &result)
Definition Direct.hpp:598
ssp< Direct::SerialSymLUStructure > symLU
Definition Direct.hpp:291
void Solve(tVec &b, tVec &result)
Definition Direct.hpp:485
void MatMul(tVec &x, tVec &result)
Definition Direct.hpp:468
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:215
void setWorld()
Initialise the object to MPI_COMM_WORLD. Requires MPI_Init to have run.
Definition MPI.hpp:242
Block & GetLower(DNDS::index i, int ij)
Block InvertDiag(const Block &v)
std::vector< std::vector< Block > > lower
TestBlockLDLT(ssp< Direct::SerialSymLUStructure > s, int N)
std::vector< Block > diag
Block & GetDiag(DNDS::index i)
Block & GetDiag(DNDS::index i)
TestBlockLU(ssp< Direct::SerialSymLUStructure > s, int N)
std::vector< std::vector< Block > > lower
std::vector< std::vector< Block > > upper
std::vector< Block > diag
Block InvertDiag(const Block &v)
Block & GetLower(DNDS::index i, int ij)
Block & GetUpper(DNDS::index i, int ij)
Eigen::Matrix< real, 5, 1 > v
constexpr DNDS::index N
TestBlockLU luILU(symLU0, NCells)
real bNorm
tVec x_known(NCells)
Eigen::Matrix< real, BS, BS > Block
Eigen::Vector< real, BS > BVec
auto symM
tVec Ax(NCells)
tVec x_ilu(NCells)
real err
TestBlockLU luMul(symLUmul, NCells)
TestBlockLU luM(symM, NCells)
tVec r(NCells)
TestBlockLU luA(symA, NCells)
tVec z(NCells)
auto symLUorig
real relResidual
tVec x(NCells)
tVec b(NCells)
TestBlockLU luOrig(symLUorig, NCells)
auto adj
real residualNorm
std::vector< BVec > tVec
tVec Ax_ilu(NCells)
auto symLUmul
CHECK(result.size()==3)
real expected
auto res
Definition test_ODE.cpp:196
TEST_CASE("3D: VFV P2 HQM error < P1 on sinCos3D")