DNDSR 0.2.1
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
Direct.hpp
Go to the documentation of this file.
1#pragma once
2
3// #ifndef __DNDS_REALLY_COMPILING__
4// #define __DNDS_REALLY_COMPILING__
5// #define __DNDS_REALLY_COMPILING__HEADER_ON__
6// #endif
9#include "DNDS/Defines.hpp"
10// #ifdef __DNDS_REALLY_COMPILING__HEADER_ON__
11// #undef __DNDS_REALLY_COMPILING__
12// #endif
13
14#include <unordered_set>
15
16namespace DNDS::Direct
17{
19 {
20 bool useDirectPrec = false;
21 int32_t iluCode = 0; // 0 for no fill, -1 for complete
22 int32_t orderingCode = INT32_MIN; // INT32_MIN for auto 0 for natural, 1 for metis, 2 for MMD
23
25 {
26 // clang-format off
27 DNDS_FIELD(useDirectPrec, "Enable direct preconditioner");
28 DNDS_FIELD(iluCode, "ILU fill level: 0=no fill, -1=complete");
29 DNDS_FIELD(orderingCode, "Ordering: INT32_MIN=auto, 0=natural, 1=metis, 2=MMD");
30 // clang-format on
31 }
32
33 [[nodiscard]] int getOrderingCode() const
34 {
35 return orderingCode == INT32_MIN
36 ? (iluCode == -1 ? 2 : 0) // auto decide
38 }
39 [[nodiscard]] int getILUCode() const { return iluCode; }
40 };
41}
42
43namespace DNDS::Direct
44{
46 {
49 using tLocalMatStruct = std::vector<std::vector<index>>;
57 std::vector<index> localFillOrderingOld2New;
58 std::vector<index> localFillOrderingNew2Old;
59
60 std::vector<index> localPartStarts;
61
62 SerialSymLUStructure(const MPIInfo &nMpi, index nN) : mpi(nMpi), N(nN){};
63
64 [[nodiscard]] index Num() const { return N; }
65
74
75 /**
76 * @brief get symmetric symbolic matrix factorization over cell2cellFaceV
77 *
78 * @tparam TAdj
79 * @param cell2cellFaceV is std::vector<std::vector<index>> -like which holds **symmetric** local adjacency
80 * @param iluCode -1: full LU, 0,1,2... incomplete LU defined using expanded stencil
81 */
82 template <class TAdj>
84 const TAdj &cell2cellFaceV,
85 const std::vector<index> localPartStarts_in,
86 int iluCode)
87 {
88 localPartStarts = localPartStarts_in;
89 { // assert on partition
90 int nParts = localPartStarts.size() - 1;
91 if (nParts >= 1)
92 DNDS_assert(localPartStarts[0] == 0 && localPartStarts.back() == this->Num());
93 else // handle when localPartStarts_in is not large enough
94 {
95 localPartStarts.resize(2);
96 localPartStarts[0] = 0;
97 localPartStarts[1] = this->Num();
98 }
99
100 for (int iPart = 0; iPart < nParts; iPart++)
101 {
102 index start = localPartStarts[iPart];
103 index end = localPartStarts[iPart + 1];
104 DNDS_assert(start <= end);
105 for (index iCell = localPartStarts[iPart]; iCell < localPartStarts[iPart + 1]; iCell++)
106 {
107 index n2o = this->FillingReorderNew2Old(iCell);
108 index o2n = this->FillingReorderOld2New(iCell);
109 DNDS_assert(start <= n2o && n2o < end);
110 DNDS_assert(start <= o2n && o2n < end);
111 }
112 }
113 }
114
115 std::vector<std::unordered_set<index>> cell2cellFaceVEnlarged;
116 if (iluCode > 0) // do expansion of stencil
117 {
118 DNDS_assert(iluCode <= 5);
119 cell2cellFaceVEnlarged.resize(this->Num());
120 for (index iCell = 0; iCell < this->Num(); iCell++)
121 for (auto iCO : cell2cellFaceV[iCell])
122 cell2cellFaceVEnlarged[iCell].insert(iCO);
123 for (int iFill = 0; iFill < iluCode; iFill++)
124 {
125 for (index iCell = 0; iCell < this->Num(); iCell++)
126 {
127 std::unordered_set<index> newNeighbor;
128 for (auto iCO : cell2cellFaceVEnlarged[iCell])
129 for (auto iCOO : cell2cellFaceV[iCO])
130 newNeighbor.insert(iCOO);
131 for (auto iCN : newNeighbor)
132 cell2cellFaceVEnlarged[iCell].insert(iCN);
133 }
134 }
135 for (index iCell = 0; iCell < this->Num(); iCell++)
136 cell2cellFaceVEnlarged[iCell].erase(iCell); // this is redundant
137 }
138
139 std::vector<std::unordered_set<index>> triLowRows;
140 std::vector<std::unordered_set<index>> triUppRows;
141 std::vector<std::unordered_set<index>> midSymMatCols;
142 triLowRows.resize(this->Num());
143 triUppRows.resize(this->Num());
144 midSymMatCols.resize(this->Num());
145 index nnzOrig{this->Num()};
146
147 for (index iCellP = 0; iCellP < this->Num(); iCellP++) // iterate over the columns
148 {
149 index iCell = this->FillingReorderNew2Old(iCellP);
150 midSymMatCols[iCellP].insert(iCellP);
151 for (auto iCellOther : cell2cellFaceV[iCell])
152 {
153 index iCellOtherP = this->FillingReorderOld2New(iCellOther);
154 midSymMatCols[iCellP].insert(iCellOtherP); // assuming cell2cellFaceV is symmetric
155 }
156 nnzOrig += cell2cellFaceV[iCell].size();
157 }
158
159 for (index iCellP = 0; iCellP < this->Num(); iCellP++) // iterate over the columns
160 {
161 for (auto iCellOtherP : midSymMatCols[iCellP]) // emulate the symmetric factorization A L1L2L3...LN D LNT...L1T
162 if (iCellOtherP > iCellP)
163 {
164 index iCellOther = this->FillingReorderNew2Old(iCellOtherP);
165 for (auto iCellOtherPP : midSymMatCols[iCellP])
166 if (iCellOtherPP > iCellOtherP)
167 {
168 index iCellOtherOther = this->FillingReorderNew2Old(iCellOtherPP);
169 // to be always symmetric
170 // control over here to get incomplete LU structure
171 if (iluCode < 0 || (iluCode > 0 && cell2cellFaceVEnlarged[iCellOther].count(iCellOtherOther)))
172 {
173 midSymMatCols[iCellOtherPP].insert(iCellOtherP); // upper part
174 midSymMatCols[iCellOtherP].insert(iCellOtherPP); // lower part
175 }
176 }
177 triLowRows[iCellOtherP].insert(iCellP); // iCellP is iCol, iCellOtherP is iRow
178 triUppRows[iCellP].insert(iCellOtherP);
179 }
180 }
181
182 lowerTriStructure.resize(this->Num());
183 upperTriStructure.resize(this->Num());
184 lowerTriStructureNew.resize(this->Num());
185 upperTriStructureNew.resize(this->Num());
186 index nnzLower{0}, nnzUpper{0};
187 for (index iCellP = 0; iCellP < this->Num(); iCellP++)
188 {
189 index iCell = this->FillingReorderNew2Old(iCellP);
190 lowerTriStructure[iCell].reserve(triLowRows[iCellP].size());
191 upperTriStructure[iCell].reserve(triUppRows[iCellP].size());
192 lowerTriStructureNew[iCellP].reserve(triLowRows[iCellP].size());
193 upperTriStructureNew[iCellP].reserve(triUppRows[iCellP].size());
194 for (auto iCellOtherP : triLowRows[iCellP])
195 lowerTriStructureNew[iCellP].push_back(iCellOtherP);
196 for (auto iCellOtherP : triUppRows[iCellP])
197 upperTriStructureNew[iCellP].push_back(iCellOtherP);
198 std::sort(lowerTriStructureNew[iCellP].begin(), lowerTriStructureNew[iCellP].end());
199 std::sort(upperTriStructureNew[iCellP].begin(), upperTriStructureNew[iCellP].end());
200 for (auto iCellOtherP : lowerTriStructureNew[iCellP])
201 lowerTriStructure[iCell].push_back(this->FillingReorderNew2Old(iCellOtherP));
202 for (auto iCellOtherP : upperTriStructureNew[iCellP])
203 upperTriStructure[iCell].push_back(this->FillingReorderNew2Old(iCellOtherP));
204
205 nnzLower += lowerTriStructure[iCell].size();
206 nnzUpper += upperTriStructure[iCell].size();
207 // the lowerTriStructure and upperTriStructure's col indices are corresponding to
208 // those of in *New Rows
209 // col indices in *New Rows are sorted
210 }
211 if (mpi.rank == 0)
212 log() << "Direct::ObtainSymmetricSymbolicFactorization(): Factorizing Done" << std::endl;
213 MPISerialDo(mpi, [&]()
214 { log() << fmt::format(" ({}, {}/{})", mpi.rank,
215 nnzLower + nnzUpper + this->Num(),
216 nnzOrig)
217 << (((mpi.rank + 1) % 10) ? "" : "\n")
218 << std::flush; });
220 if (mpi.rank == 0)
221 log() << std::endl;
222 DNDS_assert_info(nnzLower == nnzUpper, fmt::format("nnzLower,Upper: [{},{}]", nnzLower, nnzUpper));
223
224 // pre-search
226 for (index iP = 0; iP < this->Num(); iP++)
227 {
228 auto &&lowerRow = lowerTriStructureNew[iP];
229 lowerTriStructureNewInUpper[iP].resize(lowerRow.size());
230 for (int ijP = 0; ijP < lowerRow.size(); ijP++)
231 {
232 index jP = lowerRow[ijP];
233 auto &&upperRow = upperTriStructureNew[jP];
234 auto ret = std::lower_bound(upperRow.begin(), upperRow.end(), iP);
235 // DNDS_assert(ret != upperRow.end()); // has to be found
236 lowerTriStructureNewInUpper[iP][ijP] = ret != upperRow.end()
237 ? (ret - upperRow.begin())
238 : -1; // !not found in upper
239 }
240 }
241
242 // pre-search
244 for (index iP = 0; iP < this->Num(); iP++)
245 {
246 auto &&upperRow = upperTriStructureNew[iP];
247 upperTriStructureNewInLower[iP].resize(upperRow.size());
248 for (int ijP = 0; ijP < upperRow.size(); ijP++)
249 {
250 index jP = upperRow[ijP];
251 auto &&lowerRow = lowerTriStructureNew[jP];
252 auto ret = std::lower_bound(lowerRow.begin(), lowerRow.end(), iP);
253 // DNDS_assert(ret != upperRow.end()); // has to be found
254 upperTriStructureNewInLower[iP][ijP] = ret != lowerRow.end()
255 ? (ret - lowerRow.begin())
256 : -1; // !not found in upper
257 }
258 }
259
260 cell2cellFaceVLocal2FullRowPos.resize(this->Num());
261 for (index i = 0; i < this->Num(); i++)
262 {
263 index iP = this->FillingReorderOld2New(i);
264 cell2cellFaceVLocal2FullRowPos[i].resize(cell2cellFaceV[i].size(), -1);
265 for (int ic2c = 0; ic2c < cell2cellFaceV[i].size(); ic2c++)
266 {
267 index j = cell2cellFaceV[i][ic2c];
268 index jP = this->FillingReorderOld2New(j);
269 if (jP < iP)
270 {
271 auto &&row = lowerTriStructureNew[iP];
272 auto ret = std::lower_bound(row.begin(), row.end(), jP);
273 DNDS_assert(ret != row.end());
275 (ret - row.begin()) + 1;
276 }
277 else if (jP > iP)
278 {
279 auto &&row = upperTriStructureNew[iP];
280 auto ret = std::lower_bound(row.begin(), row.end(), jP);
281 DNDS_assert(ret != row.end());
283 (ret - row.begin()) + lowerTriStructureNew[iP].size() + 1;
284 }
285 }
286 }
287 }
288 };
289
290 template <class Derived, class tComponent, class tVec>
292 {
294 bool isDecomposed = false;
295
299
300 virtual ~LocalLUBase() = default;
301
302 void GetDiag(index i); // pure "virtual" do not implement
303 void GetLower(index i, int ij); // pure "virtual" do not implement
304 void GetUpper(index i, int ij); // pure "virtual" do not implement
305 void InvertDiag(const tComponent &v);
306
308 {
310 }
311
313 {
314 auto dThis = static_cast<Derived *>(this);
315 const auto &localPartStarts = symLU->localPartStarts;
316 int nParts = localPartStarts.size() - 1;
317#if defined(DNDS_DIST_MT_USE_OMP)
318# pragma omp parallel for schedule(static)
319#endif
320 for (int iPart = 0; iPart < nParts; iPart++)
321 for (index iP = localPartStarts.at(iPart); iP < localPartStarts.at(iPart + 1); iP++)
322 {
323 index i = symLU->FillingReorderNew2Old(iP);
324
325 auto &&lowerRow = symLU->lowerTriStructureNew[iP];
326 /*********************/
327 // lower part
328 for (int ijP = 0; ijP < lowerRow.size(); ijP++) //! for(int jP = 0; jP < iP; jP++)
329 {
330 auto jP = lowerRow[ijP];
331 DNDS_assert(jP < iP);
332 auto j = symLU->FillingReorderNew2Old(jP);
333 // handle last j's job
334 if (ijP > 0) //! if(jP > 0)
335 {
336 auto &&lowerRowJP = symLU->lowerTriStructureNew[jP];
337 iterateIdentical( //! for(int kP = 0; kP < jP; kP++)
338 lowerRow.begin(), lowerRow.end(), lowerRowJP.begin(), lowerRowJP.end(),
339 [&](index kP, auto pos1, auto pos2)
340 {
341 if (kP > jP - 1)
342 return true; // early end
343 DNDS_assert(kP < symLU->Num()); // a safe guarantee
344 auto k = symLU->FillingReorderNew2Old(kP);
345 int jPInUpperPos = symLU->lowerTriStructureNewInUpper[jP][pos2];
346 if (jPInUpperPos != -1) // in case not symbolically symmetric
347 dThis->GetLower(i, ijP) -=
348 dThis->GetLower(i, pos1) * dThis->GetUpper(k, jPInUpperPos);
349 //! LU(iP, jP) -= LU(iP, kP) * LU(kP, jP);
350 // if (iP < 3)
351 // log() << fmt::format("Lower Add at {},{},{} === {}", iP, jP - 1, kP, (dThis->GetLower(i, pos1) * dThis->GetUpper(k, jPInUpperPos))(0)) << std::endl;
352 return false;
353 });
354 }
355
356 // auto luDiag = dThis->GetDiag(j).fullPivLu();
357 // tComponent Aij = luDiag.solve(dThis->GetLower(i, ijP));
358 dThis->GetLower(i, ijP) *= dThis->GetDiag(j); //! LU(iP, jP) *= LU(jP, jP);
359 }
360 /*********************/
361 // diag part
362 for (int ikP = 0; ikP < lowerRow.size(); ikP++) //! for(int kP = 0; kP < iP; kP++)
363 {
364 auto kP = lowerRow[ikP];
365 auto k = symLU->FillingReorderNew2Old(kP);
366 int iPInUpperPos = symLU->lowerTriStructureNewInUpper[iP][ikP];
367 if (iPInUpperPos != -1)
368 dThis->GetDiag(i) -= dThis->GetLower(i, ikP) * dThis->GetUpper(k, iPInUpperPos);
369 //! LU(iP, iP) -= LU(iP, kP) * LU(kP, iP);
370 }
371
372 dThis->GetDiag(i) = dThis->InvertDiag(dThis->GetDiag(i)); // * note here only stores
373 //! LU(iP, iP) = inv(LU(iP, iP));
374 /*********************/
375 // upper part
376 auto &&upperRow = symLU->upperTriStructureNew[iP];
377 for (int ijP = 0; ijP < upperRow.size(); ijP++) //! for(int jP = iP+1; jP < N; jP++)
378 {
379 auto jP = upperRow[ijP];
380 DNDS_assert(jP > iP);
381 auto j = symLU->FillingReorderNew2Old(jP);
382 auto &&lowerRowJP = symLU->lowerTriStructureNew[jP];
383
384 iterateIdentical( //! for(int kP = 0; kP < iP; kP++)
385 lowerRow.begin(), lowerRow.end(), lowerRowJP.begin(), lowerRowJP.end(),
386 [&](index kP, auto pos1, auto pos2)
387 {
388 if (kP >= iP)
389 return true;
390 DNDS_assert(kP < symLU->Num());
391 auto k = symLU->FillingReorderNew2Old(kP);
392 int jPInUpperPos = symLU->lowerTriStructureNewInUpper[jP][pos2];
393 if (jPInUpperPos != -1)
394 dThis->GetUpper(i, ijP) -= dThis->GetLower(i, pos1) * dThis->GetUpper(k, jPInUpperPos);
395 //! LU(iP, jP) -= LU(iP, kP) * LU(kP, jP);
396 // if (iP < 3)
397 // log() << fmt::format("Upper Add at {},{},{} === {}", iP, jP, kP,
398 // (this->GetLower(i, pos1) * this->GetUpper(k, jPInUpperPos))(0))
399 // << std::endl;
400 return false;
401 });
402 }
403 }
404 isDecomposed = true;
405 }
406
408 {
409 auto dThis = static_cast<Derived *>(this);
410 for (index iP = 0; iP < symLU->Num(); iP++)
411 {
412 index i = symLU->FillingReorderNew2Old(iP);
413
414 auto &&lowerRow = symLU->lowerTriStructureNew[iP];
415
416 /*********************/
417 // diag part
418 for (int ikP = 0; ikP < lowerRow.size(); ikP++) //! for(int kP = 0; kP < iP; kP++)
419 {
420 auto kP = lowerRow[ikP];
421 auto k = symLU->FillingReorderNew2Old(kP);
422 int iPInUpperPos = symLU->lowerTriStructureNewInUpper[iP][ikP];
423 if (iPInUpperPos != -1)
424 dThis->GetDiag(i) -= dThis->GetLower(i, ikP) * dThis->GetUpper(k, iPInUpperPos);
425 //! LU(iP, iP) -= LU(iP, kP) * LU(kP, iP);
426 }
427
428 dThis->GetDiag(i) = dThis->InvertDiag(dThis->GetDiag(i)); // * note here only stores
429 //! LU(iP, iP) = inv(LU(iP, iP));
430 /*********************/
431 // upper and lower part
432 auto &&upperRow = symLU->upperTriStructureNew[iP];
433
434 // #if defined(DNDS_DIST_MT_USE_OMP) // this OMP does not improve
435 // #pragma omp parallel for schedule(static)
436 // #endif
437 for (int ijP = 0; ijP < upperRow.size(); ijP++) //! for(int jP = iP+1; jP < N; jP++)
438 {
439 auto jP = upperRow[ijP];
440 DNDS_assert(jP > iP);
441 auto j = symLU->FillingReorderNew2Old(jP);
442 auto &&lowerRowJP = symLU->lowerTriStructureNew[jP];
443 int iPInLowerPosOfJRow = symLU->upperTriStructureNewInLower[iP][ijP];
444
445 iterateIdentical( //! for(int kP = 0; kP < iP; kP++)
446 lowerRow.begin(), lowerRow.end(), lowerRowJP.begin(), lowerRowJP.end(),
447 [&](index kP, auto pos1, auto pos2)
448 {
449 if (kP >= iP)
450 return true;
451 DNDS_assert(kP < symLU->Num());
452 auto k = symLU->FillingReorderNew2Old(kP);
453 int jPInUpperPos = symLU->lowerTriStructureNewInUpper[jP][pos2];
454 int iPInUpperPos = symLU->lowerTriStructureNewInUpper[iP][pos1];
455 if (jPInUpperPos != -1)
456 dThis->GetUpper(i, ijP) -= dThis->GetLower(i, pos1) * dThis->GetUpper(k, jPInUpperPos);
457 //! LU(iP, jP) -= LU(iP, kP) * LU(kP, jP);
458 if (iPInUpperPos != -1)
459 dThis->GetLower(j, iPInLowerPosOfJRow) -= dThis->GetLower(j, pos2) * dThis->GetUpper(k, iPInUpperPos);
460 // !LU(jP, iP) -= LU(jP, kP) * LU(kP, iP);
461 return false;
462 });
463 dThis->GetLower(j, iPInLowerPosOfJRow) *= dThis->GetDiag(i);
464 // !LU(jP, iP) *= inv(LU(iP, iP));
465 }
466 }
467 isDecomposed = true;
468 }
469
471 {
472 auto dThis = static_cast<Derived *>(this);
474#if defined(DNDS_DIST_MT_USE_OMP)
475# pragma omp parallel for schedule(static)
476#endif
477 for (index iCell = 0; iCell < symLU->Num(); iCell++)
478 {
479 result[iCell] = dThis->GetDiag(iCell) * x[iCell];
480 for (int ij = 0; ij < symLU->lowerTriStructure[iCell].size(); ij++)
481 result[iCell] += dThis->GetLower(iCell, ij) * x[symLU->lowerTriStructure[iCell][ij]];
482 for (int ij = 0; ij < symLU->upperTriStructure[iCell].size(); ij++)
483 result[iCell] += dThis->GetUpper(iCell, ij) * x[symLU->upperTriStructure[iCell][ij]];
484 }
485 }
486
488 {
489 auto dThis = static_cast<Derived *>(this);
491 const auto &localPartStarts = symLU->localPartStarts;
492 int nParts = localPartStarts.size() - 1;
493#if defined(DNDS_DIST_MT_USE_OMP)
494# pragma omp parallel for schedule(static)
495#endif
496 for (int iPart = 0; iPart < nParts; iPart++)
497 for (index iP = localPartStarts.at(iPart); iP < localPartStarts.at(iPart + 1); iP++)
498 {
499 index i = symLU->FillingReorderNew2Old(iP);
500 result[i] = b[i];
501 auto &&lowerRowOld = symLU->lowerTriStructure[i];
502 for (int ij = 0; ij < lowerRowOld.size(); ij++)
503 {
504 index j = lowerRowOld[ij];
505 result[i] -= dThis->GetLower(i, ij) * result[j];
506 }
507 }
508#if defined(DNDS_DIST_MT_USE_OMP)
509# pragma omp parallel for schedule(static)
510#endif
511 for (int iPart = 0; iPart < nParts; iPart++)
512 for (index iP = localPartStarts.at(iPart + 1) - 1; iP >= localPartStarts.at(iPart); iP--)
513 {
514 index i = symLU->FillingReorderNew2Old(iP);
515 auto &&upperRowOld = symLU->upperTriStructure[i];
516 for (int ij = 0; ij < upperRowOld.size(); ij++)
517 {
518 index j = upperRowOld[ij];
519 result[i] -= dThis->GetUpper(i, ij) * result[j];
520 }
521 // auto luDiag = dThis->GetDiag(i).fullPivLu();
522 // result[i] = luDiag.solve(result[i]);
523 result[i] = dThis->GetDiag(i) * result[i];
524 }
525 }
526 };
527
528 template <class Derived, class tComponent, class tVec>
530 {
532 bool isDecomposed = false;
533
537
538 virtual ~LocalLDLTBase() = default;
539
541 void GetLower(index i, int ij);
542 void InvertDiag(const tComponent &v);
543
545 {
546 // todo: add pseudo code
547 // todo: make multithread
548 auto dThis = static_cast<Derived *>(this);
549 std::vector<tComponent> diagNoInv(symLU->Num());
550 for (index iP = 0; iP < symLU->Num(); iP++)
551 {
552 index i = symLU->FillingReorderNew2Old(iP);
553
554 auto &&lowerRow = symLU->lowerTriStructureNew[iP];
555 /*********************/
556 // lower part
557 for (int ijP = 0; ijP < lowerRow.size(); ijP++)
558 {
559 auto jP = lowerRow[ijP];
560 DNDS_assert(jP < iP);
561 auto j = symLU->FillingReorderNew2Old(jP);
562 // handle last j's job
563 if (ijP > 0)
564 {
565 auto &&lowerRowJP = symLU->lowerTriStructureNew[jP];
567 lowerRow.begin(), lowerRow.end(), lowerRowJP.begin(), lowerRowJP.end(),
568 [&](index kP, auto pos1, auto pos2)
569 {
570 if (kP > jP - 1)
571 return true; // early end
572 DNDS_assert(kP < symLU->Num()); // a safe guarantee
573 auto k = symLU->FillingReorderNew2Old(kP);
574 dThis->GetLower(i, ijP) -=
575 dThis->GetLower(i, pos1) * diagNoInv[k] * dThis->GetLower(j, pos2).transpose();
576 // if (iP < 3)
577 // log() << fmt::format("Lower Add at {},{},{} === {}", iP, jP - 1, kP, (dThis->GetLower(i, pos1) * dThis->GetUpper(k, jPInUpperPos))(0)) << std::endl;
578 return false;
579 });
580 }
581
582 // auto luDiag = dThis->GetDiag(j).fullPivLu();
583 // tComponent Aij = luDiag.solve(dThis->GetLower(i, ijP));
584 dThis->GetLower(i, ijP) *= dThis->GetDiag(j);
585 }
586 /*********************/
587 // diag part
588 for (int ikP = 0; ikP < lowerRow.size(); ikP++)
589 {
590 auto kP = lowerRow[ikP];
591 auto k = symLU->FillingReorderNew2Old(kP);
592 dThis->GetDiag(i) -= dThis->GetLower(i, ikP) * diagNoInv[k] * dThis->GetLower(i, ikP).transpose();
593 }
594 diagNoInv[i] = dThis->GetDiag(i);
595 dThis->GetDiag(i) = dThis->InvertDiag(dThis->GetDiag(i)); // * note here only stores the inverse
596 }
597 isDecomposed = true;
598 }
599
601 {
602 auto dThis = static_cast<Derived *>(this);
603 DNDS_assert(!isDecomposed); // being before the decomposition
604#if defined(DNDS_DIST_MT_USE_OMP)
605# pragma omp parallel for schedule(static)
606#endif
607 for (index iCell = 0; iCell < symLU->Num(); iCell++)
608 {
609 result[iCell] = dThis->GetDiag(iCell) * x[iCell];
610 for (int ij = 0; ij < symLU->lowerTriStructure[iCell].size(); ij++)
611 result[iCell] += dThis->GetLower(iCell, ij) * x[symLU->lowerTriStructure[iCell][ij]];
612 }
613#if defined(DNDS_DIST_MT_USE_OMP)
614# pragma omp parallel for schedule(static)
615#endif
616 for (index iCell = 0; iCell < symLU->Num(); iCell++)
617 {
618 for (int ij = 0; ij < symLU->lowerTriStructure[iCell].size(); ij++)
619 result[symLU->lowerTriStructure[iCell][ij]] += dThis->GetLower(iCell, ij).transpose() * x[iCell]; // transposed mat-vec
620 }
621 }
622
624 {
625 auto dThis = static_cast<Derived *>(this);
627 const auto &localPartStarts = symLU->localPartStarts;
628 int nParts = localPartStarts.size() - 1;
629#if defined(DNDS_DIST_MT_USE_OMP)
630# pragma omp parallel for schedule(static)
631#endif
632 for (int iPart = 0; iPart < nParts; iPart++)
633 for (index iP = localPartStarts.at(iPart); iP < localPartStarts.at(iPart + 1); iP++)
634 {
635 index i = symLU->FillingReorderNew2Old(iP);
636 result[i] = b[i];
637 auto &&lowerRowOld = symLU->lowerTriStructure[i];
638 for (int ij = 0; ij < lowerRowOld.size(); ij++)
639 {
640 index j = lowerRowOld[ij];
641 result[i] -= dThis->GetLower(i, ij) * result[j];
642 }
643 }
644#if defined(DNDS_DIST_MT_USE_OMP)
645# pragma omp parallel for schedule(static)
646#endif
647 for (index i = 0; i < symLU->Num(); i++)
648 {
649 result[i] = dThis->GetDiag(i) * result[i];
650 }
651#if defined(DNDS_DIST_MT_USE_OMP)
652# pragma omp parallel for schedule(static)
653#endif
654 for (int iPart = 0; iPart < nParts; iPart++)
655 for (index iP = localPartStarts.at(iPart + 1) - 1; iP >= localPartStarts.at(iPart); iP--)
656 {
657 index i = symLU->FillingReorderNew2Old(iP);
658 auto &&upperRow = symLU->upperTriStructureNew[iP];
659 for (int ij = 0; ij < upperRow.size(); ij++)
660 {
661 index jP = upperRow[ij];
662 index j = symLU->FillingReorderNew2Old(jP);
663 index ji = symLU->upperTriStructureNewInLower[iP][ij];
664 DNDS_assert(ji != -1); // has to be found
665 result[i] -= dThis->GetLower(j, ji).transpose() * result[j];
666 }
667 }
668 }
669 };
670}
pybind11-style configuration registration with macro-based field declaration and namespace-scoped tag...
#define DNDS_FIELD(name_, desc_,...)
Register a field inside a DNDS_DECLARE_CONFIG body.
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
#define DNDS_assert_info(expr, info)
Debug-only assertion with an extra std::string info message.
Definition Errors.hpp:117
#define DNDS_assert(expr)
Debug-only assertion (compiled out when DNDS_NDEBUG is defined). Prints the expression + file/line + ...
Definition Errors.hpp:112
JSON-to-Eigen conversion utilities and nlohmann_json helper macros.
MPI_int Barrier(MPI_Comm comm)
Wrapper over MPI_Barrier.
Definition MPI.cpp:247
void MPISerialDo(const MPIInfo &mpi, F f)
Execute f on each rank serially, in rank order.
Definition MPI.hpp:749
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:112
std::shared_ptr< T > ssp
Shortened alias for std::shared_ptr used pervasively in DNDSR.
Definition Defines.hpp:143
std::ostream & log()
Return the current DNDSR log stream (either std::cout or the installed file).
Definition Defines.cpp:50
bool iterateIdentical(tIt1 it1, tIt1end it1end, tIt2 it2, tIt2end it2end, tF F)
Walk two ordered ranges in lockstep, calling F on each match.
Definition Defines.hpp:601
DNDS_DECLARE_CONFIG(DirectPrecControl)
Definition Direct.hpp:24
void GetLower(index i, int ij)
ssp< Direct::SerialSymLUStructure > symLU
Definition Direct.hpp:531
void Solve(tVec &b, tVec &result)
Definition Direct.hpp:623
virtual ~LocalLDLTBase()=default
void InvertDiag(const tComponent &v)
LocalLDLTBase(ssp< Direct::SerialSymLUStructure > _symLU)
Definition Direct.hpp:534
void MatMul(tVec &x, tVec &result)
Definition Direct.hpp:600
ssp< Direct::SerialSymLUStructure > symLU
Definition Direct.hpp:293
void Solve(tVec &b, tVec &result)
Definition Direct.hpp:487
void GetLower(index i, int ij)
LocalLUBase(ssp< Direct::SerialSymLUStructure > _symLU)
Definition Direct.hpp:296
virtual ~LocalLUBase()=default
void MatMul(tVec &x, tVec &result)
Definition Direct.hpp:470
void InvertDiag(const tComponent &v)
void GetUpper(index i, int ij)
index FillingReorderOld2New(index v)
Definition Direct.hpp:66
tLocalMatStruct upperTriStructureNew
Definition Direct.hpp:53
tLocalMatStruct lowerTriStructureNewInUpper
Definition Direct.hpp:54
tLocalMatStruct lowerTriStructure
Definition Direct.hpp:50
std::vector< index > localFillOrderingNew2Old
Definition Direct.hpp:58
tLocalMatStruct upperTriStructureNewInLower
Definition Direct.hpp:55
void ObtainSymmetricSymbolicFactorization(const TAdj &cell2cellFaceV, const std::vector< index > localPartStarts_in, int iluCode)
get symmetric symbolic matrix factorization over cell2cellFaceV
Definition Direct.hpp:83
index FillingReorderNew2Old(index v)
Definition Direct.hpp:70
SerialSymLUStructure(const MPIInfo &nMpi, index nN)
Definition Direct.hpp:62
tLocalMatStruct upperTriStructure
Definition Direct.hpp:51
tLocalMatStruct cell2cellFaceVLocal2FullRowPos
Definition Direct.hpp:56
std::vector< index > localFillOrderingOld2New
Definition Direct.hpp:57
std::vector< index > localPartStarts
Definition Direct.hpp:60
tLocalMatStruct lowerTriStructureNew
Definition Direct.hpp:52
std::vector< std::vector< index > > tLocalMatStruct
Definition Direct.hpp:49
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:231
int rank
This rank's 0-based index within comm (-1 until initialised).
Definition MPI.hpp:235
MPI_Comm comm
The underlying MPI communicator handle.
Definition MPI.hpp:233
Eigen::Matrix< real, 5, 1 > v
tVec x(NCells)
tVec b(NCells)
std::vector< BVec > tVec
auto result
input explicitMaps push_back(EntityReorderMap{EntityKind::Cell, cellPartition})