DNDSR 0.1.0.dev1+gcd065ad
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
Direct.hpp
Go to the documentation of this file.
1#pragma once
2
3// #ifndef __DNDS_REALLY_COMPILING__
4// #define __DNDS_REALLY_COMPILING__
5// #define __DNDS_REALLY_COMPILING__HEADER_ON__
6// #endif
7#include "DNDS/JsonUtil.hpp"
9#include "DNDS/Defines.hpp"
10// #ifdef __DNDS_REALLY_COMPILING__HEADER_ON__
11// #undef __DNDS_REALLY_COMPILING__
12// #endif
13
14#include <unordered_set>
15
16namespace DNDS::Direct
17{
19 {
20 bool useDirectPrec = false;
21 int32_t iluCode = 0; // 0 for no fill, -1 for complete
22 int32_t orderingCode = INT32_MIN; // INT32_MIN for auto 0 for natural, 1 for metis, 2 for MMD
23
25 {
26 DNDS_FIELD(useDirectPrec, "Enable direct preconditioner");
27 DNDS_FIELD(iluCode, "ILU fill level: 0=no fill, -1=complete");
28 DNDS_FIELD(orderingCode, "Ordering: INT32_MIN=auto, 0=natural, 1=metis, 2=MMD");
29 }
30
31 [[nodiscard]] int getOrderingCode() const
32 {
33 return orderingCode == INT32_MIN
34 ? (iluCode == -1 ? 2 : 0) // auto decide
36 }
37 [[nodiscard]] int getILUCode() const { return iluCode; }
38 };
39}
40
41namespace DNDS::Direct
42{
44 {
47 using tLocalMatStruct = std::vector<std::vector<index>>;
55 std::vector<index> localFillOrderingOld2New;
56 std::vector<index> localFillOrderingNew2Old;
57
58 std::vector<index> localPartStarts;
59
60 SerialSymLUStructure(const MPIInfo &nMpi, index nN) : mpi(nMpi), N(nN) {};
61
62 [[nodiscard]] index Num() const { return N; }
63
72
73 /**
74 * @brief get symmetric symbolic matrix factorization over cell2cellFaceV
75 *
76 * @tparam TAdj
77 * @param cell2cellFaceV is std::vector<std::vector<index>> -like which holds **symmetric** local adjacency
78 * @param iluCode -1: full LU, 0,1,2... incomplete LU defined using expanded stencil
79 */
80 template <class TAdj>
82 const TAdj &cell2cellFaceV,
83 const std::vector<index> localPartStarts_in,
84 int iluCode)
85 {
86 localPartStarts = localPartStarts_in;
87 { // assert on partition
88 int nParts = localPartStarts.size() - 1;
89 if (nParts >= 1)
90 DNDS_assert(localPartStarts[0] == 0 && localPartStarts.back() == this->Num());
91 else // handle when localPartStarts_in is not large enough
92 {
93 localPartStarts.resize(2);
94 localPartStarts[0] = 0;
95 localPartStarts[1] = this->Num();
96 }
97
98 for (int iPart = 0; iPart < nParts; iPart++)
99 {
100 index start = localPartStarts[iPart];
101 index end = localPartStarts[iPart + 1];
102 DNDS_assert(start <= end);
103 for (index iCell = localPartStarts[iPart]; iCell < localPartStarts[iPart + 1]; iCell++)
104 {
105 index n2o = this->FillingReorderNew2Old(iCell);
106 index o2n = this->FillingReorderOld2New(iCell);
107 DNDS_assert(start <= n2o && n2o < end);
108 DNDS_assert(start <= o2n && o2n < end);
109 }
110 }
111 }
112
113 std::vector<std::unordered_set<index>> cell2cellFaceVEnlarged;
114 if (iluCode > 0) // do expansion of stencil
115 {
116 DNDS_assert(iluCode <= 5);
117 cell2cellFaceVEnlarged.resize(this->Num());
118 for (index iCell = 0; iCell < this->Num(); iCell++)
119 for (auto iCO : cell2cellFaceV[iCell])
120 cell2cellFaceVEnlarged[iCell].insert(iCO);
121 for (int iFill = 0; iFill < iluCode; iFill++)
122 {
123 for (index iCell = 0; iCell < this->Num(); iCell++)
124 {
125 std::unordered_set<index> newNeighbor;
126 for (auto iCO : cell2cellFaceVEnlarged[iCell])
127 for (auto iCOO : cell2cellFaceV[iCO])
128 newNeighbor.insert(iCOO);
129 for (auto iCN : newNeighbor)
130 cell2cellFaceVEnlarged[iCell].insert(iCN);
131 }
132 }
133 for (index iCell = 0; iCell < this->Num(); iCell++)
134 cell2cellFaceVEnlarged[iCell].erase(iCell); // this is redundant
135 }
136
137 std::vector<std::unordered_set<index>> triLowRows;
138 std::vector<std::unordered_set<index>> triUppRows;
139 std::vector<std::unordered_set<index>> midSymMatCols;
140 triLowRows.resize(this->Num());
141 triUppRows.resize(this->Num());
142 midSymMatCols.resize(this->Num());
143 index nnzOrig{this->Num()};
144
145 for (index iCellP = 0; iCellP < this->Num(); iCellP++) // iterate over the columns
146 {
147 index iCell = this->FillingReorderNew2Old(iCellP);
148 midSymMatCols[iCellP].insert(iCellP);
149 for (auto iCellOther : cell2cellFaceV[iCell])
150 {
151 index iCellOtherP = this->FillingReorderOld2New(iCellOther);
152 midSymMatCols[iCellP].insert(iCellOtherP); // assuming cell2cellFaceV is symmetric
153 }
154 nnzOrig += cell2cellFaceV[iCell].size();
155 }
156
157 for (index iCellP = 0; iCellP < this->Num(); iCellP++) // iterate over the columns
158 {
159 for (auto iCellOtherP : midSymMatCols[iCellP]) // emulate the symmetric factorization A L1L2L3...LN D LNT...L1T
160 if (iCellOtherP > iCellP)
161 {
162 index iCellOther = this->FillingReorderNew2Old(iCellOtherP);
163 for (auto iCellOtherPP : midSymMatCols[iCellP])
164 if (iCellOtherPP > iCellOtherP)
165 {
166 index iCellOtherOther = this->FillingReorderNew2Old(iCellOtherPP);
167 // to be always symmetric
168 // control over here to get incomplete LU structure
169 if (iluCode < 0 || (iluCode > 0 && cell2cellFaceVEnlarged[iCellOther].count(iCellOtherOther)))
170 {
171 midSymMatCols[iCellOtherPP].insert(iCellOtherP); // upper part
172 midSymMatCols[iCellOtherP].insert(iCellOtherPP); // lower part
173 }
174 }
175 triLowRows[iCellOtherP].insert(iCellP); // iCellP is iCol, iCellOtherP is iRow
176 triUppRows[iCellP].insert(iCellOtherP);
177 }
178 }
179
180 lowerTriStructure.resize(this->Num());
181 upperTriStructure.resize(this->Num());
182 lowerTriStructureNew.resize(this->Num());
183 upperTriStructureNew.resize(this->Num());
184 index nnzLower{0}, nnzUpper{0};
185 for (index iCellP = 0; iCellP < this->Num(); iCellP++)
186 {
187 index iCell = this->FillingReorderNew2Old(iCellP);
188 lowerTriStructure[iCell].reserve(triLowRows[iCellP].size());
189 upperTriStructure[iCell].reserve(triUppRows[iCellP].size());
190 lowerTriStructureNew[iCellP].reserve(triLowRows[iCellP].size());
191 upperTriStructureNew[iCellP].reserve(triUppRows[iCellP].size());
192 for (auto iCellOtherP : triLowRows[iCellP])
193 lowerTriStructureNew[iCellP].push_back(iCellOtherP);
194 for (auto iCellOtherP : triUppRows[iCellP])
195 upperTriStructureNew[iCellP].push_back(iCellOtherP);
196 std::sort(lowerTriStructureNew[iCellP].begin(), lowerTriStructureNew[iCellP].end());
197 std::sort(upperTriStructureNew[iCellP].begin(), upperTriStructureNew[iCellP].end());
198 for (auto iCellOtherP : lowerTriStructureNew[iCellP])
199 lowerTriStructure[iCell].push_back(this->FillingReorderNew2Old(iCellOtherP));
200 for (auto iCellOtherP : upperTriStructureNew[iCellP])
201 upperTriStructure[iCell].push_back(this->FillingReorderNew2Old(iCellOtherP));
202
203 nnzLower += lowerTriStructure[iCell].size();
204 nnzUpper += upperTriStructure[iCell].size();
205 // the lowerTriStructure and upperTriStructure's col indices are corresponding to
206 // those of in *New Rows
207 // col indices in *New Rows are sorted
208 }
209 if (mpi.rank == 0)
210 log() << "Direct::ObtainSymmetricSymbolicFactorization(): Factorizing Done" << std::endl;
211 MPISerialDo(mpi, [&]()
212 { log() << fmt::format(" ({}, {}/{})", mpi.rank,
213 nnzLower + nnzUpper + this->Num(),
214 nnzOrig)
215 << (((mpi.rank + 1) % 10) ? "" : "\n")
216 << std::flush; });
218 if (mpi.rank == 0)
219 log() << std::endl;
220 DNDS_assert_info(nnzLower == nnzUpper, fmt::format("nnzLower,Upper: [{},{}]", nnzLower, nnzUpper));
221
222 // pre-search
224 for (index iP = 0; iP < this->Num(); iP++)
225 {
226 auto &&lowerRow = lowerTriStructureNew[iP];
227 lowerTriStructureNewInUpper[iP].resize(lowerRow.size());
228 for (int ijP = 0; ijP < lowerRow.size(); ijP++)
229 {
230 index jP = lowerRow[ijP];
231 auto &&upperRow = upperTriStructureNew[jP];
232 auto ret = std::lower_bound(upperRow.begin(), upperRow.end(), iP);
233 // DNDS_assert(ret != upperRow.end()); // has to be found
234 lowerTriStructureNewInUpper[iP][ijP] = ret != upperRow.end()
235 ? (ret - upperRow.begin())
236 : -1; // !not found in upper
237 }
238 }
239
240 // pre-search
242 for (index iP = 0; iP < this->Num(); iP++)
243 {
244 auto &&upperRow = upperTriStructureNew[iP];
245 upperTriStructureNewInLower[iP].resize(upperRow.size());
246 for (int ijP = 0; ijP < upperRow.size(); ijP++)
247 {
248 index jP = upperRow[ijP];
249 auto &&lowerRow = lowerTriStructureNew[jP];
250 auto ret = std::lower_bound(lowerRow.begin(), lowerRow.end(), iP);
251 // DNDS_assert(ret != upperRow.end()); // has to be found
252 upperTriStructureNewInLower[iP][ijP] = ret != lowerRow.end()
253 ? (ret - lowerRow.begin())
254 : -1; // !not found in upper
255 }
256 }
257
258 cell2cellFaceVLocal2FullRowPos.resize(this->Num());
259 for (index i = 0; i < this->Num(); i++)
260 {
261 index iP = this->FillingReorderOld2New(i);
262 cell2cellFaceVLocal2FullRowPos[i].resize(cell2cellFaceV[i].size(), -1);
263 for (int ic2c = 0; ic2c < cell2cellFaceV[i].size(); ic2c++)
264 {
265 index j = cell2cellFaceV[i][ic2c];
266 index jP = this->FillingReorderOld2New(j);
267 if (jP < iP)
268 {
269 auto &&row = lowerTriStructureNew[iP];
270 auto ret = std::lower_bound(row.begin(), row.end(), jP);
271 DNDS_assert(ret != row.end());
273 (ret - row.begin()) + 1;
274 }
275 else if (jP > iP)
276 {
277 auto &&row = upperTriStructureNew[iP];
278 auto ret = std::lower_bound(row.begin(), row.end(), jP);
279 DNDS_assert(ret != row.end());
281 (ret - row.begin()) + lowerTriStructureNew[iP].size() + 1;
282 }
283 }
284 }
285 }
286 };
287
288 template <class Derived, class tComponent, class tVec>
290 {
292 bool isDecomposed = false;
293
297
298 virtual ~LocalLUBase() = default;
299
300 void GetDiag(index i); // pure "virtual" do not implement
301 void GetLower(index i, int ij); // pure "virtual" do not implement
302 void GetUpper(index i, int ij); // pure "virtual" do not implement
303 void InvertDiag(const tComponent &v);
304
306 {
308 }
309
311 {
312 auto dThis = static_cast<Derived *>(this);
313 const auto &localPartStarts = symLU->localPartStarts;
314 int nParts = localPartStarts.size() - 1;
315#if defined(DNDS_DIST_MT_USE_OMP)
316#pragma omp parallel for schedule(static)
317#endif
318 for (int iPart = 0; iPart < nParts; iPart++)
319 for (index iP = localPartStarts.at(iPart); iP < localPartStarts.at(iPart + 1); iP++)
320 {
321 index i = symLU->FillingReorderNew2Old(iP);
322
323 auto &&lowerRow = symLU->lowerTriStructureNew[iP];
324 /*********************/
325 // lower part
326 for (int ijP = 0; ijP < lowerRow.size(); ijP++) //! for(int jP = 0; jP < iP; jP++)
327 {
328 auto jP = lowerRow[ijP];
329 DNDS_assert(jP < iP);
330 auto j = symLU->FillingReorderNew2Old(jP);
331 // handle last j's job
332 if (ijP > 0) //! if(jP > 0)
333 {
334 auto &&lowerRowJP = symLU->lowerTriStructureNew[jP];
335 iterateIdentical( //! for(int kP = 0; kP < jP; kP++)
336 lowerRow.begin(), lowerRow.end(), lowerRowJP.begin(), lowerRowJP.end(),
337 [&](index kP, auto pos1, auto pos2)
338 {
339 if (kP > jP - 1)
340 return true; // early end
341 DNDS_assert(kP < symLU->Num()); // a safe guarantee
342 auto k = symLU->FillingReorderNew2Old(kP);
343 int jPInUpperPos = symLU->lowerTriStructureNewInUpper[jP][pos2];
344 if (jPInUpperPos != -1) // in case not symbolically symmetric
345 dThis->GetLower(i, ijP) -=
346 dThis->GetLower(i, pos1) * dThis->GetUpper(k, jPInUpperPos);
347 //! LU(iP, jP) -= LU(iP, kP) * LU(kP, jP);
348 // if (iP < 3)
349 // log() << fmt::format("Lower Add at {},{},{} === {}", iP, jP - 1, kP, (dThis->GetLower(i, pos1) * dThis->GetUpper(k, jPInUpperPos))(0)) << std::endl;
350 return false;
351 });
352 }
353
354 // auto luDiag = dThis->GetDiag(j).fullPivLu();
355 // tComponent Aij = luDiag.solve(dThis->GetLower(i, ijP));
356 dThis->GetLower(i, ijP) *= dThis->GetDiag(j); //! LU(iP, jP) *= LU(jP, jP);
357 }
358 /*********************/
359 // diag part
360 for (int ikP = 0; ikP < lowerRow.size(); ikP++) //! for(int kP = 0; kP < iP; kP++)
361 {
362 auto kP = lowerRow[ikP];
363 auto k = symLU->FillingReorderNew2Old(kP);
364 int iPInUpperPos = symLU->lowerTriStructureNewInUpper[iP][ikP];
365 if (iPInUpperPos != -1)
366 dThis->GetDiag(i) -= dThis->GetLower(i, ikP) * dThis->GetUpper(k, iPInUpperPos);
367 //! LU(iP, iP) -= LU(iP, kP) * LU(kP, iP);
368 }
369
370 dThis->GetDiag(i) = dThis->InvertDiag(dThis->GetDiag(i)); // * note here only stores
371 //! LU(iP, iP) = inv(LU(iP, iP));
372 /*********************/
373 // upper part
374 auto &&upperRow = symLU->upperTriStructureNew[iP];
375 for (int ijP = 0; ijP < upperRow.size(); ijP++) //! for(int jP = iP+1; jP < N; jP++)
376 {
377 auto jP = upperRow[ijP];
378 DNDS_assert(jP > iP);
379 auto j = symLU->FillingReorderNew2Old(jP);
380 auto &&lowerRowJP = symLU->lowerTriStructureNew[jP];
381
382 iterateIdentical( //! for(int kP = 0; kP < iP; kP++)
383 lowerRow.begin(), lowerRow.end(), lowerRowJP.begin(), lowerRowJP.end(),
384 [&](index kP, auto pos1, auto pos2)
385 {
386 if (kP >= iP)
387 return true;
388 DNDS_assert(kP < symLU->Num());
389 auto k = symLU->FillingReorderNew2Old(kP);
390 int jPInUpperPos = symLU->lowerTriStructureNewInUpper[jP][pos2];
391 if (jPInUpperPos != -1)
392 dThis->GetUpper(i, ijP) -= dThis->GetLower(i, pos1) * dThis->GetUpper(k, jPInUpperPos);
393 //! LU(iP, jP) -= LU(iP, kP) * LU(kP, jP);
394 // if (iP < 3)
395 // log() << fmt::format("Upper Add at {},{},{} === {}", iP, jP, kP,
396 // (this->GetLower(i, pos1) * this->GetUpper(k, jPInUpperPos))(0))
397 // << std::endl;
398 return false;
399 });
400 }
401 }
402 isDecomposed = true;
403 }
404
406 {
407 auto dThis = static_cast<Derived *>(this);
408 for (index iP = 0; iP < symLU->Num(); iP++)
409 {
410 index i = symLU->FillingReorderNew2Old(iP);
411
412 auto &&lowerRow = symLU->lowerTriStructureNew[iP];
413
414 /*********************/
415 // diag part
416 for (int ikP = 0; ikP < lowerRow.size(); ikP++) //! for(int kP = 0; kP < iP; kP++)
417 {
418 auto kP = lowerRow[ikP];
419 auto k = symLU->FillingReorderNew2Old(kP);
420 int iPInUpperPos = symLU->lowerTriStructureNewInUpper[iP][ikP];
421 if (iPInUpperPos != -1)
422 dThis->GetDiag(i) -= dThis->GetLower(i, ikP) * dThis->GetUpper(k, iPInUpperPos);
423 //! LU(iP, iP) -= LU(iP, kP) * LU(kP, iP);
424 }
425
426 dThis->GetDiag(i) = dThis->InvertDiag(dThis->GetDiag(i)); // * note here only stores
427 //! LU(iP, iP) = inv(LU(iP, iP));
428 /*********************/
429 // upper and lower part
430 auto &&upperRow = symLU->upperTriStructureNew[iP];
431
432 // #if defined(DNDS_DIST_MT_USE_OMP) // this OMP does not improve
433 // #pragma omp parallel for schedule(static)
434 // #endif
435 for (int ijP = 0; ijP < upperRow.size(); ijP++) //! for(int jP = iP+1; jP < N; jP++)
436 {
437 auto jP = upperRow[ijP];
438 DNDS_assert(jP > iP);
439 auto j = symLU->FillingReorderNew2Old(jP);
440 auto &&lowerRowJP = symLU->lowerTriStructureNew[jP];
441 int iPInLowerPosOfJRow = symLU->upperTriStructureNewInLower[iP][ijP];
442
443 iterateIdentical( //! for(int kP = 0; kP < iP; kP++)
444 lowerRow.begin(), lowerRow.end(), lowerRowJP.begin(), lowerRowJP.end(),
445 [&](index kP, auto pos1, auto pos2)
446 {
447 if (kP >= iP)
448 return true;
449 DNDS_assert(kP < symLU->Num());
450 auto k = symLU->FillingReorderNew2Old(kP);
451 int jPInUpperPos = symLU->lowerTriStructureNewInUpper[jP][pos2];
452 int iPInUpperPos = symLU->lowerTriStructureNewInUpper[iP][pos1];
453 if (jPInUpperPos != -1)
454 dThis->GetUpper(i, ijP) -= dThis->GetLower(i, pos1) * dThis->GetUpper(k, jPInUpperPos);
455 //! LU(iP, jP) -= LU(iP, kP) * LU(kP, jP);
456 if (iPInUpperPos != -1)
457 dThis->GetLower(j, iPInLowerPosOfJRow) -= dThis->GetLower(j, pos2) * dThis->GetUpper(k, iPInUpperPos);
458 // !LU(jP, iP) -= LU(jP, kP) * LU(kP, iP);
459 return false;
460 });
461 dThis->GetLower(j, iPInLowerPosOfJRow) *= dThis->GetDiag(i);
462 // !LU(jP, iP) *= inv(LU(iP, iP));
463 }
464 }
465 isDecomposed = true;
466 }
467
469 {
470 auto dThis = static_cast<Derived *>(this);
472#if defined(DNDS_DIST_MT_USE_OMP)
473#pragma omp parallel for schedule(static)
474#endif
475 for (index iCell = 0; iCell < symLU->Num(); iCell++)
476 {
477 result[iCell] = dThis->GetDiag(iCell) * x[iCell];
478 for (int ij = 0; ij < symLU->lowerTriStructure[iCell].size(); ij++)
479 result[iCell] += dThis->GetLower(iCell, ij) * x[symLU->lowerTriStructure[iCell][ij]];
480 for (int ij = 0; ij < symLU->upperTriStructure[iCell].size(); ij++)
481 result[iCell] += dThis->GetUpper(iCell, ij) * x[symLU->upperTriStructure[iCell][ij]];
482 }
483 }
484
486 {
487 auto dThis = static_cast<Derived *>(this);
489 const auto &localPartStarts = symLU->localPartStarts;
490 int nParts = localPartStarts.size() - 1;
491#if defined(DNDS_DIST_MT_USE_OMP)
492#pragma omp parallel for schedule(static)
493#endif
494 for (int iPart = 0; iPart < nParts; iPart++)
495 for (index iP = localPartStarts.at(iPart); iP < localPartStarts.at(iPart + 1); iP++)
496 {
497 index i = symLU->FillingReorderNew2Old(iP);
498 result[i] = b[i];
499 auto &&lowerRowOld = symLU->lowerTriStructure[i];
500 for (int ij = 0; ij < lowerRowOld.size(); ij++)
501 {
502 index j = lowerRowOld[ij];
503 result[i] -= dThis->GetLower(i, ij) * result[j];
504 }
505 }
506#if defined(DNDS_DIST_MT_USE_OMP)
507#pragma omp parallel for schedule(static)
508#endif
509 for (int iPart = 0; iPart < nParts; iPart++)
510 for (index iP = localPartStarts.at(iPart + 1) - 1; iP >= localPartStarts.at(iPart); iP--)
511 {
512 index i = symLU->FillingReorderNew2Old(iP);
513 auto &&upperRowOld = symLU->upperTriStructure[i];
514 for (int ij = 0; ij < upperRowOld.size(); ij++)
515 {
516 index j = upperRowOld[ij];
517 result[i] -= dThis->GetUpper(i, ij) * result[j];
518 }
519 // auto luDiag = dThis->GetDiag(i).fullPivLu();
520 // result[i] = luDiag.solve(result[i]);
521 result[i] = dThis->GetDiag(i) * result[i];
522 }
523 }
524 };
525
526 template <class Derived, class tComponent, class tVec>
528 {
530 bool isDecomposed = false;
531
535
536 virtual ~LocalLDLTBase() = default;
537
538 void GetDiag(index i);
539 void GetLower(index i, int ij);
540 void InvertDiag(const tComponent &v);
541
543 {
544 //todo: add pseudo code
545 //todo: make multithread
546 auto dThis = static_cast<Derived *>(this);
547 std::vector<tComponent> diagNoInv(symLU->Num());
548 for (index iP = 0; iP < symLU->Num(); iP++)
549 {
550 index i = symLU->FillingReorderNew2Old(iP);
551
552 auto &&lowerRow = symLU->lowerTriStructureNew[iP];
553 /*********************/
554 // lower part
555 for (int ijP = 0; ijP < lowerRow.size(); ijP++)
556 {
557 auto jP = lowerRow[ijP];
558 DNDS_assert(jP < iP);
559 auto j = symLU->FillingReorderNew2Old(jP);
560 // handle last j's job
561 if (ijP > 0)
562 {
563 auto &&lowerRowJP = symLU->lowerTriStructureNew[jP];
565 lowerRow.begin(), lowerRow.end(), lowerRowJP.begin(), lowerRowJP.end(),
566 [&](index kP, auto pos1, auto pos2)
567 {
568 if (kP > jP - 1)
569 return true; // early end
570 DNDS_assert(kP < symLU->Num()); // a safe guarantee
571 auto k = symLU->FillingReorderNew2Old(kP);
572 dThis->GetLower(i, ijP) -=
573 dThis->GetLower(i, pos1) * diagNoInv[k] * dThis->GetLower(j, pos2).transpose();
574 // if (iP < 3)
575 // log() << fmt::format("Lower Add at {},{},{} === {}", iP, jP - 1, kP, (dThis->GetLower(i, pos1) * dThis->GetUpper(k, jPInUpperPos))(0)) << std::endl;
576 return false;
577 });
578 }
579
580 // auto luDiag = dThis->GetDiag(j).fullPivLu();
581 // tComponent Aij = luDiag.solve(dThis->GetLower(i, ijP));
582 dThis->GetLower(i, ijP) *= dThis->GetDiag(j);
583 }
584 /*********************/
585 // diag part
586 for (int ikP = 0; ikP < lowerRow.size(); ikP++)
587 {
588 auto kP = lowerRow[ikP];
589 auto k = symLU->FillingReorderNew2Old(kP);
590 dThis->GetDiag(i) -= dThis->GetLower(i, ikP) * diagNoInv[k] * dThis->GetLower(i, ikP).transpose();
591 }
592 diagNoInv[i] = dThis->GetDiag(i);
593 dThis->GetDiag(i) = dThis->InvertDiag(dThis->GetDiag(i)); // * note here only stores the inverse
594 }
595 isDecomposed = true;
596 }
597
599 {
600 auto dThis = static_cast<Derived *>(this);
601 DNDS_assert(!isDecomposed); // being before the decomposition
602#if defined(DNDS_DIST_MT_USE_OMP)
603#pragma omp parallel for schedule(static)
604#endif
605 for (index iCell = 0; iCell < symLU->Num(); iCell++)
606 {
607 result[iCell] = dThis->GetDiag(iCell) * x[iCell];
608 for (int ij = 0; ij < symLU->lowerTriStructure[iCell].size(); ij++)
609 result[iCell] += dThis->GetLower(iCell, ij) * x[symLU->lowerTriStructure[iCell][ij]];
610 }
611#if defined(DNDS_DIST_MT_USE_OMP)
612#pragma omp parallel for schedule(static)
613#endif
614 for (index iCell = 0; iCell < symLU->Num(); iCell++)
615 {
616 for (int ij = 0; ij < symLU->lowerTriStructure[iCell].size(); ij++)
617 result[symLU->lowerTriStructure[iCell][ij]] += dThis->GetLower(iCell, ij).transpose() * x[iCell]; // transposed mat-vec
618 }
619 }
620
622 {
623 auto dThis = static_cast<Derived *>(this);
625 const auto &localPartStarts = symLU->localPartStarts;
626 int nParts = localPartStarts.size() - 1;
627#if defined(DNDS_DIST_MT_USE_OMP)
628#pragma omp parallel for schedule(static)
629#endif
630 for (int iPart = 0; iPart < nParts; iPart++)
631 for (index iP = localPartStarts.at(iPart); iP < localPartStarts.at(iPart + 1); iP++)
632 {
633 index i = symLU->FillingReorderNew2Old(iP);
634 result[i] = b[i];
635 auto &&lowerRowOld = symLU->lowerTriStructure[i];
636 for (int ij = 0; ij < lowerRowOld.size(); ij++)
637 {
638 index j = lowerRowOld[ij];
639 result[i] -= dThis->GetLower(i, ij) * result[j];
640 }
641 }
642#if defined(DNDS_DIST_MT_USE_OMP)
643#pragma omp parallel for schedule(static)
644#endif
645 for (index i = 0; i < symLU->Num(); i++)
646 {
647 result[i] = dThis->GetDiag(i) * result[i];
648 }
649#if defined(DNDS_DIST_MT_USE_OMP)
650#pragma omp parallel for schedule(static)
651#endif
652 for (int iPart = 0; iPart < nParts; iPart++)
653 for (index iP = localPartStarts.at(iPart + 1) - 1; iP >= localPartStarts.at(iPart); iP--)
654 {
655 index i = symLU->FillingReorderNew2Old(iP);
656 auto &&upperRow = symLU->upperTriStructureNew[iP];
657 for (int ij = 0; ij < upperRow.size(); ij++)
658 {
659 index jP = upperRow[ij];
660 index j = symLU->FillingReorderNew2Old(jP);
661 index ji = symLU->upperTriStructureNewInLower[iP][ij];
662 DNDS_assert(ji != -1); // has to be found
663 result[i] -= dThis->GetLower(j, ji).transpose() * result[j];
664 }
665 }
666 }
667 };
668}
pybind11-style configuration registration with macro-based field declaration and namespace-scoped tag...
#define DNDS_FIELD(name_, desc_,...)
Register a field inside a DNDS_DECLARE_CONFIG body.
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
#define DNDS_assert_info(expr, info)
Debug-only assertion with an extra std::string info message.
Definition Errors.hpp:113
#define DNDS_assert(expr)
Debug-only assertion (compiled out when DNDS_NDEBUG is defined). Prints the expression + file/line + ...
Definition Errors.hpp:108
JSON-to-Eigen conversion utilities and nlohmann_json helper macros.
MPI_int Barrier(MPI_Comm comm)
Wrapper over MPI_Barrier.
Definition MPI.cpp:248
void MPISerialDo(const MPIInfo &mpi, F f)
Execute f on each rank serially, in rank order.
Definition MPI.hpp:698
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:107
std::shared_ptr< T > ssp
Shortened alias for std::shared_ptr used pervasively in DNDSR.
Definition Defines.hpp:138
std::ostream & log()
Return the current DNDSR log stream (either std::cout or the installed file).
Definition Defines.cpp:49
bool iterateIdentical(tIt1 it1, tIt1end it1end, tIt2 it2, tIt2end it2end, tF F)
Walk two ordered ranges in lockstep, calling F on each match.
Definition Defines.hpp:596
DNDS_DECLARE_CONFIG(DirectPrecControl)
Definition Direct.hpp:24
void GetLower(index i, int ij)
ssp< Direct::SerialSymLUStructure > symLU
Definition Direct.hpp:529
void Solve(tVec &b, tVec &result)
Definition Direct.hpp:621
virtual ~LocalLDLTBase()=default
void InvertDiag(const tComponent &v)
LocalLDLTBase(ssp< Direct::SerialSymLUStructure > _symLU)
Definition Direct.hpp:532
void MatMul(tVec &x, tVec &result)
Definition Direct.hpp:598
ssp< Direct::SerialSymLUStructure > symLU
Definition Direct.hpp:291
void Solve(tVec &b, tVec &result)
Definition Direct.hpp:485
void GetLower(index i, int ij)
LocalLUBase(ssp< Direct::SerialSymLUStructure > _symLU)
Definition Direct.hpp:294
virtual ~LocalLUBase()=default
void MatMul(tVec &x, tVec &result)
Definition Direct.hpp:468
void InvertDiag(const tComponent &v)
void GetUpper(index i, int ij)
index FillingReorderOld2New(index v)
Definition Direct.hpp:64
tLocalMatStruct upperTriStructureNew
Definition Direct.hpp:51
tLocalMatStruct lowerTriStructureNewInUpper
Definition Direct.hpp:52
tLocalMatStruct lowerTriStructure
Definition Direct.hpp:48
std::vector< index > localFillOrderingNew2Old
Definition Direct.hpp:56
tLocalMatStruct upperTriStructureNewInLower
Definition Direct.hpp:53
void ObtainSymmetricSymbolicFactorization(const TAdj &cell2cellFaceV, const std::vector< index > localPartStarts_in, int iluCode)
get symmetric symbolic matrix factorization over cell2cellFaceV
Definition Direct.hpp:81
index FillingReorderNew2Old(index v)
Definition Direct.hpp:68
SerialSymLUStructure(const MPIInfo &nMpi, index nN)
Definition Direct.hpp:60
tLocalMatStruct upperTriStructure
Definition Direct.hpp:49
tLocalMatStruct cell2cellFaceVLocal2FullRowPos
Definition Direct.hpp:54
std::vector< index > localFillOrderingOld2New
Definition Direct.hpp:55
std::vector< index > localPartStarts
Definition Direct.hpp:58
tLocalMatStruct lowerTriStructureNew
Definition Direct.hpp:50
std::vector< std::vector< index > > tLocalMatStruct
Definition Direct.hpp:47
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:215
int rank
This rank's 0-based index within comm (-1 until initialised).
Definition MPI.hpp:219
MPI_Comm comm
The underlying MPI communicator handle.
Definition MPI.hpp:217
Eigen::Matrix< real, 5, 1 > v
tVec x(NCells)
tVec b(NCells)
std::vector< BVec > tVec
auto result