DNDSR 0.1.0.dev1+gcd065ad
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
ArrayPair.hpp
Go to the documentation of this file.
1#pragma once
2/// @file ArrayPair.hpp
3/// @brief Father-son array pairs with device views and ghost communication.
4
12#include "DNDS/Defines.hpp"
14#include "DNDS/Errors.hpp"
15#include "DeviceView.hpp"
16#include <fmt/format.h>
17namespace DNDS
18{
19
20 /**
21 * @brief CRTP base implementing the unified-index accessors shared by
22 * @ref DNDS::ArrayPairDeviceView "ArrayPairDeviceView" and @ref DNDS::ArrayPairDeviceViewConst "ArrayPairDeviceViewConst".
23 *
24 * @details Indices in `[0, father.Size())` map to the owned-side view; indices
25 * in `[father.Size(), father.Size() + son.Size())` map to the ghost-side view
26 * with an offset subtraction. This lets stencil loops treat the father/son
27 * pair as one contiguous array. Device-callable.
28 */
29 template <class Derived>
31 {
32 /// @brief Combined father + son row count.
33 DNDS_DEVICE_CALLABLE [[nodiscard]] index Size() const
34 {
35 auto dThis = static_cast<const Derived *>(this);
36 return dThis->father.Size() + dThis->son.Size();
37 }
38
39 /// @brief Uniform row width (delegates to father; father/son share it).
41 {
42 auto dThis = static_cast<const Derived *>(this);
43 return dThis->father.RowSize();
44 }
45
46 /// @brief Per-row width in the combined address space.
48 {
49 auto dThis = static_cast<const Derived *>(this);
50 if (i >= 0 && i < dThis->father.Size())
51 return dThis->father.RowSize(i);
52 else
53 return dThis->son.RowSize(i - dThis->father.Size());
54 }
55
56 /// @brief Row pointer for index `i` in the combined address space (const).
58 {
59 auto dThis = static_cast<const Derived *>(this);
60 if (i >= 0 && i < dThis->father.Size())
61 return dThis->father.operator[](i);
62 else
63 return dThis->son.operator[](i - dThis->father.Size());
64 }
65
66 /// @brief Row pointer for index `i` (mutable).
68 {
69 auto dThis = static_cast<Derived *>(this);
70 if (i >= 0 && i < dThis->father.Size())
71 return dThis->father.operator[](i);
72 else
73 return dThis->son.operator[](i - dThis->father.Size());
74 }
75
76 /// @brief N-ary element access in the combined address space (mutable).
77 /// Forwards extra arguments to the underlying `operator()`.
78 template <class... TOthers>
79 DNDS_DEVICE_CALLABLE decltype(auto) operator()(index i, TOthers... aOthers)
80 {
81 auto dThis = static_cast<Derived *>(this);
82 if (i >= 0 && i < dThis->father.Size())
83 return dThis->father.operator()(i, aOthers...);
84 else
85 return dThis->son.operator()(i - dThis->father.Size(), aOthers...);
86 }
87
88 /// @brief N-ary element access (const).
89 template <class... TOthers>
90 DNDS_DEVICE_CALLABLE decltype(auto) operator()(index i, TOthers... aOthers) const
91 {
92 auto dThis = static_cast<const Derived *>(this);
93 if (i >= 0 && i < dThis->father.Size())
94 return dThis->father.operator()(i, aOthers...);
95 else
96 return dThis->son.operator()(i - dThis->father.Size(), aOthers...);
97 }
98 };
99
100 /// @brief Mutable device view onto an @ref DNDS::ArrayPair "ArrayPair" (for CUDA kernels).
101 /// @details Captures both father and son device views by value; must not
102 /// outlive the owning pair.
103 template <DeviceBackend B, class TArray = ParArray<real, 1>>
104 struct ArrayPairDeviceView : public ArrayPairDeviceView_Base<ArrayPairDeviceView<B, TArray>>
105 {
106 using t_arrayDeviceView = typename TArray::template t_deviceView<B>;
107
110
112
114
116 : father(n_father), son(n_son) {}
117 };
118
119 /// @brief Const device view of a father-son array pair.
120 template <DeviceBackend B, class TArray = ParArray<real, 1>>
121 struct ArrayPairDeviceViewConst : public ArrayPairDeviceView_Base<ArrayPairDeviceViewConst<B, TArray>>
122 {
123 using t_arrayDeviceView = typename TArray::template t_deviceViewConst<B>; //! the only difference from non-const
124
127
129
131
133 : father(n_father), son(n_son) {}
134 };
135
136 /**
137 * @brief Convenience bundle of a father, son, and attached @ref DNDS::ArrayTransformer "ArrayTransformer".
138 *
139 * @details @ref DNDS::ArrayPair "ArrayPair" is what most application code uses instead of
140 * manipulating a raw transformer. It wraps:
141 * - `father` (owned rows) and `son` (ghost rows) as `shared_ptr<TArray>`,
142 * - a `trans` transformer that binds the two together.
143 *
144 * `operator[]` / `operator()` treat the pair as one contiguous array of
145 * size `father->Size() + son->Size()`. Typical construction pattern:
146 *
147 * ```cpp
148 * ArrayPair<ParArray<real, 5>> u;
149 * u.InitPair("u", mpi); // allocates father and son
150 * u.father->Resize(nLocal); // fill father with local data
151 * u.BorrowAndPull(primaryPair); // ghost layout inherited; pull
152 * ```
153 *
154 * See `docs/guides/array_usage.md` for the broader "primary pair"
155 * pattern: one pair (typically `cell2cell`) does the full four-step
156 * ghost setup; every other pair on the same partition borrows from it.
157 *
158 * @tparam TArray Underlying array type (e.g., `ParArray<real, 5>`,
159 * @ref DNDS::ArrayAdjacency "ArrayAdjacency", @ref DNDS::ArrayEigenVector "ArrayEigenVector").
160 */
161 template <class TArray = ParArray<real, 1>>
163 {
165 using t_arr = TArray;
166 /// @brief Whether the underlying array uses CSR storage.
167 static constexpr bool IsCSR() { return t_arr::IsCSR(); }
168
169 /// @brief Owned-side array (must be resized before ghost setup).
171 /// @brief Ghost-side array (sized automatically by #createMPITypes / @ref BorrowAndPull).
174 /// @brief Ghost-communication engine bound to #father and #son.
176
177 /// @brief Deep-copy: allocate new father / son and copy their data; rebind trans.
178 /// @details Recreates the arrays through @ref TArray's copy ctor, then
179 /// assigns `trans` from `R`. If the source's transformer was already
180 /// attached, re-attaches to the new local arrays.
181 void clone(const t_self &R)
182 {
184 //! rely on TArray's copy ctor!
185 father = make_ssp<TArray>(*(R.father)); // call TArray copy ctor
186 son = make_ssp<TArray>(*(R.son)); // call TArray copy ctor
187 DNDS_check_throw(father->getMPI().comm == son->getMPI().comm);
188 //! rely on TTrans's copy assignment!
189 trans = R.trans;
190 //! if R.trans already attached, then self trans attach self arrays
191 if (R.trans.father)
193 if (R.trans.son)
194 trans.son = son;
195 }
196
197 /// @brief Read-only row-pointer access in the combined address space.
198 decltype(father->operator[](index(0))) operator[](index i) const
200 if (i >= 0 && i < father->Size())
201 return father->operator[](i);
202 else
203 return son->operator[](i - father->Size());
204 }
205
206 /// @brief Mutable row-pointer access in the combined address space.
207 decltype(father->operator[](index(0))) operator[](index i)
208 {
209 if (i >= 0 && i < father->Size())
210 return father->operator[](i);
211 else
212 return son->operator[](i - father->Size());
213 }
214
215 // decltype(father->operator()(index(0), rowsize(0))) operator()(index i, rowsize j)
216 // {
217 // if (i >= 0 && i < father->Size())
218 // return father->operator()(i, j);
219 // else
220 // return son->operator()(i - father->Size(), j);
221 // }
222
223 /// @brief N-ary element access in the combined space (mutable). Arguments
224 /// after the row index are forwarded to the underlying `operator()`.
225 template <class... TOthers>
226 decltype(auto) operator()(index i, TOthers... aOthers)
227 {
228 if (i >= 0 && i < father->Size())
229 return father->operator()(i, aOthers...);
230 else
231 return son->operator()(i - father->Size(), aOthers...);
232 }
233
234 /// @brief N-ary element access (const).
235 template <class... TOthers>
236 decltype(auto) operator()(index i, TOthers... aOthers) const
237 {
238 if (i >= 0 && i < father->Size())
239 return father->operator()(i, aOthers...);
240 else
241 return son->operator()(i - father->Size(), aOthers...);
242 }
243
244 /// @brief Invoke `F(array, localIndex)` on either father or son
245 /// depending on which range `i` falls into.
246 /// @details Useful when per-side state must be updated alongside the
247 /// indexed row (e.g., logging father vs son modifications).
248 template <class TF>
250 {
251 if (i >= 0 && i < father->Size())
252 return F(*father, i);
253 else
254 return F(*son, i - father->Size());
255 }
256
257 /// @brief Uniform row width (delegates to father).
258 auto RowSize() const
259 {
260 return father->RowSize();
261 }
262
263 /// @brief Per-row width in the combined address space.
264 auto RowSize(index i) const
265 {
266 if (i >= 0 && i < father->Size())
267 return father->RowSize(i);
268 else
269 return son->RowSize(i - father->Size());
270 }
271
272 /// @brief Resize a single row in the combined address space.
274 {
275 if (i >= 0 && i < father->Size())
276 father->ResizeRow(i, rs);
277 else
278 son->ResizeRow(i - father->Size(), rs);
279 }
280
281 /// @brief Variadic ResizeRow overload that forwards extra args.
282 template <class... TOthers>
284 {
285 if (i >= 0 && i < father->Size())
286 father->ResizeRow(i, aOthers...);
287 else
288 son->ResizeRow(i - father->Size(), aOthers...);
289 }
290
291 /// @brief Combined row count (`father->Size() + son->Size()`).
292 [[nodiscard]] index Size() const
293 {
295 return father->Size() + son->Size();
296 }
297
298 /// @brief Bind the transformer to the current father / son pointers.
299 /// @details First step of the four-step ghost setup when not using
300 /// @ref BorrowAndPull. Both arrays must already be allocated.
302 {
303 DNDS_check_throw_info(bool(father) && bool(son),
304 fmt::format("father and son need to be constructed before Trans Attach. Array is {}",
305 father ? father->getObjectIdentity(TArray::GetArrayName()) : TArray::GetArrayName()));
306 trans.setFatherSon(father, son);
307 }
308
309 /// @brief Allocate both father and son arrays, forwarding all args to TArray constructor.
310 ///
311 /// Replaces the common two-line DNDS_MAKE_SSP(pair.father, ...) +
312 /// DNDS_MAKE_SSP(pair.son, ...) pattern.
313 ///
314 /// The name tag is set on both arrays as "name.father" / "name.son".
315 /// Constructor args are forwarded as-is to TArray (same order as ParArray
316 /// constructors, e.g., `(mpi)` or `(dataType, commMult, mpi)`).
317 ///
318 /// Usage:
319 /// pair.InitPair("cell2node", mpi);
320 /// pair.InitPair("cellElemInfo", ElemInfo::CommType(), ElemInfo::CommMult(), mpi);
321 template <typename... Args>
322 void InitPair(const std::string &name, Args &&...args)
323 {
324 father = make_ssp<TArray>(ObjName{name + ".father"}, std::forward<Args>(args)...);
325 son = make_ssp<TArray>(ObjName{name + ".son"}, std::forward<Args>(args)...);
326 }
327
328 /// @brief Attach, borrow ghost indexing from a primary pair, create MPI types, and pull once.
329 ///
330 /// Replaces the 4-line sequence:
331 /// this->TransAttach();
332 /// this->trans.BorrowGGIndexing(primary.trans);
333 /// this->trans.createMPITypes();
334 /// this->trans.pullOnce();
335 template <class TPrimaryPair>
337 {
338 this->TransAttach();
339 this->trans.BorrowGGIndexing(primary.trans);
340 this->trans.createMPITypes();
341 this->trans.pullOnce();
342 }
343
344 /// @brief Attach, borrow ghost indexing from a primary pair, and create MPI types (no pull).
345 ///
346 /// Useful when you need to set up communication but defer the pull
347 /// (e.g., for persistent communication patterns).
348 template <class TPrimaryPair>
350 {
351 this->TransAttach();
352 this->trans.BorrowGGIndexing(primary.trans);
353 this->trans.createMPITypes();
354 }
355
356 /// @brief Compress both father and son CSR arrays (no-op for non-CSR layouts).
358 {
359 father->Compress();
360 son->Compress();
361 }
362
363 /// @brief Copy only the father's data from another pair (shallow).
365 {
366 father->CopyData(*R.father);
367 }
368
369 /**
370 * @brief Swap both father and son data with another pair of the same type.
371 * @warning Because the data pointers change, persistent MPI requests
372 * (if any) are rebuilt on both sides via #reInitPersistentPullPush.
373 */
374 // TODO: make a data change listener in transformer?
375 //! a situation: the data pointer should remain static as long as initPersistentPuxx is done
377 {
378 father->SwapData(*R.father);
379 son->SwapData(*R.son);
380 trans.reInitPersistentPullPush();
381 R.trans.reInitPersistentPullPush();
382 }
383
384 /// @brief Combined hash across ranks. Used for determinism / equality checks in tests.
385 std::size_t hash()
386 {
387 auto fatherHash = father->hash();
388 auto sonHash = son->hash();
390 MPIInfo mpi = father->getMPI();
391 std::vector<index> hashes;
392 hashes.resize(mpi.size);
393 MPI::Allgather(&localHash, 1, DNDS_MPI_INDEX, hashes.data(), 1, DNDS_MPI_INDEX, mpi.comm);
394 return vector_hash<index>()(hashes);
395 }
396
397 /// @brief Writes the ArrayPair (father, optional son, optional ghost mapping).
398 ///
399 /// Creates a sub-path `name` containing:
400 /// - `MPIRank` (per-rank only), `MPISize` — partition metadata.
401 /// - `father` — the father array via ParArray::WriteSerializer (Parts offset).
402 /// - `son` — the son (ghost) array, if `includeSon` is true.
403 /// - `pullingIndexGlobal` — ghost pull indices, if `includePIG` is true.
404 ///
405 /// All writes are collective for H5 serializers. Every rank must call this.
406 void WriteSerialize(Serializer::SerializerBaseSSP serializerP, const std::string &name, bool includePIG = true, bool includeSon = true)
407 {
408 if (includePIG)
409 DNDS_check_throw_info(trans.pLGlobalMapping && trans.pLGhostMapping, "pair's trans not having ghost info");
410
411 auto cwd = serializerP->GetCurrentPath();
412 serializerP->CreatePath(name);
413 serializerP->GoToPath(name);
414
415 if (serializerP->IsPerRank())
416 serializerP->WriteIndex("MPIRank", father->getMPI().rank);
417 serializerP->WriteIndex("MPISize", father->getMPI().size);
418 // std::cout << trans.pLGlobalMapping->operator()(trans.mpi.rank, 0) << ",,," << trans.pLGlobalMapping->globalSize() << std::endl;
419 // ! this is wrong as pLGlobalMapping stores the row index, not the data index!!
420 // father->WriteSerializer(serializerP, "father",
421 // Serializer::ArrayGlobalOffset{
422 // trans.pLGlobalMapping->globalSize(),
423 // trans.pLGlobalMapping->operator()(trans.mpi.rank, 0),
424 // }); // trans.pLGlobalMapping == father->pLGlobalMapping
425 // TODO: overwrite all the Resize()/ResizeRow() for ParArray so that it handles global size and offset internally?
426
427 // now using the parts (calculate offsets)
428 father->WriteSerializer(serializerP, "father", Serializer::ArrayGlobalOffset_Parts);
429 if (includeSon)
430 son->WriteSerializer(serializerP, "son", Serializer::ArrayGlobalOffset_Parts);
431 /***************************/
432 // ghost info
433 // static_assert(std::is_same_v<rowsize, MPI_int>);
434 // *writing pullingIndexGlobal, trusting the GlobalMapping to remain the same
435 if (includePIG)
436 serializerP->WriteIndexVector("pullingIndexGlobal", trans.pLGhostMapping->ghostIndex, Serializer::ArrayGlobalOffset_Parts);
437 /***************************/
438
439 serializerP->GoToPath(cwd);
440 }
441
442 /// @brief Writes the ArrayPair with an origIndex companion dataset for redistribution support.
443 ///
444 /// When origIndex is provided and the serializer is H5 (collective), an additional
445 /// dataset "origIndex" is written alongside the father data. This enables reading
446 /// the data back with a different MPI partition.
447 ///
448 /// @param serializerP The serializer (H5 for redistribution support, JSON ignores origIndex)
449 /// @param name Path name for this array in the serializer hierarchy
450 /// @param origIndex Partition-independent key for each row (e.g., from cell2cellOrig).
451 /// Must have size == father->Size().
452 /// @param includePIG Whether to include ghost pull-index-global data
453 /// @param includeSon Whether to include the son (ghost) array
455 const std::vector<index> &origIndex,
456 bool includePIG = true, bool includeSon = true)
457 {
459 fmt::format("origIndex size {} != father size {}", origIndex.size(), father->Size()));
460
461 // Write the array data normally
463
464 // Write the origIndex companion dataset alongside the father data (H5 only)
465 if (!serializerP->IsPerRank())
466 {
467 auto cwd = serializerP->GetCurrentPath();
468 serializerP->GoToPath(name);
469 serializerP->WriteIndexVector("origIndex", origIndex, Serializer::ArrayGlobalOffset_Parts);
470 serializerP->WriteInt("redistributable", 1);
471 serializerP->GoToPath(cwd);
472 }
473 }
474
475 /// @brief Reads an ArrayPair written by WriteSerialize (same partition count).
476 ///
477 /// Reads father (and optionally son and ghost mapping) from sub-path `name`.
478 /// Requires the file to have been written with the same MPI size.
479 /// The father and son arrays are resized internally by Array::ReadSerializer.
480 ///
481 /// All reads are collective for H5 serializers. Every rank must call this,
482 /// including ranks whose local size is 0.
483 ///
484 /// @warning If `includePIG` is true, the caller must call
485 /// `trans.createMPITypes()` after this method returns.
486 void ReadSerialize(Serializer::SerializerBaseSSP serializerP, const std::string &name, bool includePIG = true, bool includeSon = true)
487 {
489 this->TransAttach();
490
491 auto cwd = serializerP->GetCurrentPath();
492 // serializerP->CreatePath(name); //!remember no create!
493 serializerP->GoToPath(name);
494
495 index readRank{0}, readSize{0};
496 if (serializerP->IsPerRank())
497 serializerP->ReadIndex("MPIRank", readRank);
498 serializerP->ReadIndex("MPISize", readSize);
499 DNDS_check_throw((!serializerP->IsPerRank() || readRank == father->getMPI().rank) &&
500 readSize == father->getMPI().size);
501 auto offsetV_father = Serializer::ArrayGlobalOffset_Unknown;
502 auto offsetV_son = Serializer::ArrayGlobalOffset_Unknown;
503 father->ReadSerializer(serializerP, "father", offsetV_father);
504 if (includeSon)
505 son->ReadSerializer(serializerP, "son", offsetV_son);
506 /***************************/
507 // ghost info
508 // static_assert(std::is_same_v<rowsize, MPI_int>);
509 // *writing pullingIndexGlobal, trusting the GlobalMapping to remain the same
510 if (includePIG)
511 {
512 std::vector<index> pullingIndexGlobal;
513 auto offsetV_PIG = Serializer::ArrayGlobalOffset_Unknown; // TODO: check the offsets?
514 serializerP->ReadIndexVector("pullingIndexGlobal", pullingIndexGlobal, offsetV_PIG);
515 trans.createFatherGlobalMapping();
516 trans.createGhostMapping(pullingIndexGlobal);
517 }
518 /***************************/
519
520 serializerP->GoToPath(cwd);
521 }
522
523 /// @brief Reads ArrayPair data from HDF5 with redistribution support.
524 ///
525 /// Handles three cases depending on the file contents and partition count:
526 ///
527 /// 1. **No origIndex in file, same np**: falls back to ReadSerialize
528 /// (same-partition read, no redistribution).
529 ///
530 /// 2. **Has origIndex, same np**: reads father via ReadSerialize, reads
531 /// origIndex, then uses RedistributeArrayWithTransformer to move data
532 /// from the file's partition layout to the caller's partition layout.
533 ///
534 /// 3. **Has origIndex, different np**: reads father via EvenSplit (each
535 /// rank reads ~nGlobal/nRanks rows regardless of the original partition),
536 /// reads origIndex the same way, then uses
537 /// RedistributeArrayWithTransformer to pull each rank's needed cells.
538 ///
539 /// In case 3, some ranks may get 0 rows from EvenSplit when nGlobal < nRanks.
540 /// This is handled correctly: the H5 collective reads proceed with 0-count
541 /// hyperslab selections, and the redistribution operates on empty arrays.
542 ///
543 /// All operations are collective. Every rank must call this method.
544 ///
545 /// @param serializerP The serializer (must be H5 / collective)
546 /// @param name Path name in the serializer hierarchy
547 /// @param newOrigIndex Partition-independent keys for this rank's cells
548 /// (e.g., from cell2cellOrig). Size must equal
549 /// father->Size(). May be empty for ranks with 0 cells.
552 const std::string &name,
553 const std::vector<index> &newOrigIndex)
554 {
557 "Redistribution read only supported for collective (H5) serializers");
558
559 auto cwd = serializerP->GetCurrentPath();
560 serializerP->GoToPath(name);
561
562 // Check if origIndex exists in the file
563 auto pathContents = serializerP->ListCurrentPath();
564 bool hasOrigIndex = pathContents.count("origIndex") > 0;
565
566 index readSize{0};
567 serializerP->ReadIndex("MPISize", readSize);
568 bool sameNumPartition = (readSize == father->getMPI().size);
569
570 if (!hasOrigIndex)
571 {
572 // No origIndex in file -- fall back to same-partition read
573 serializerP->GoToPath(cwd);
575 fmt::format("File has no origIndex and was written with np={}, "
576 "but reading with np={}. Cannot redistribute.",
577 readSize, father->getMPI().size));
578 DNDS_check_throw_info(false, "Redistribution fallback requires same-partition read");
579 ReadSerialize(serializerP, name, /*includePIG*/ false, /*includeSon*/ false);
580 return;
581 }
582
583 // Use redistribution: even-split read + rendezvous.
584 // This works for both same-np (different layout) and different-np cases.
585 serializerP->GoToPath(cwd); // go back so we can navigate cleanly
586
587 // Read the full ArrayPair normally into a temporary (same-np read)
588 // or use even-split for different-np.
589 auto mpi = father->getMPI();
590
592 {
593 // Same np: we can read normally (rank slices match), then redistribute locally
595 readPair.InitPair("readPair", mpi);
596 readPair.ReadSerialize(serializerP, name, /*includePIG*/ false, /*includeSon*/ false);
597
598 // Read origIndex from the file
599 serializerP->GoToPath(name);
600 std::vector<index> fileOrigIndex;
601 auto offsetOrigIdx = Serializer::ArrayGlobalOffset_Unknown;
602 serializerP->ReadIndexVector("origIndex", fileOrigIndex, offsetOrigIdx);
603 serializerP->GoToPath(cwd);
604
606 fmt::format("readPair.father size {} != fileOrigIndex size {}",
607 readPair.father->Size(), fileOrigIndex.size()));
608
609 // Redistribute using ArrayTransformer
611 fmt::format("father size {} != newOrigIndex size {}",
612 father->Size(), newOrigIndex.size()));
615 }
616 else
617 {
618 // Different np: use even-split read + rendezvous redistribution
619 serializerP->GoToPath(name);
620
621 // Read origIndex from file using even-split
622 std::vector<index> fileOrigIndex;
623 auto offsetOrigIdx = Serializer::ArrayGlobalOffset_EvenSplit;
624 serializerP->ReadIndexVector("origIndex", fileOrigIndex, offsetOrigIdx);
625 DNDS_assert(offsetOrigIdx.isDist()); // resolved by even-split read
626
627 // Read father array data using even-split via TArray::ReadSerializer.
628 // Pass EvenSplit offset so Array::ReadSerializer reads size correctly.
629 auto readFather = std::make_shared<TArray>(mpi);
630 {
631 auto offsetFather = Serializer::ArrayGlobalOffset_EvenSplit;
632 readFather->ReadSerializer(serializerP, "father", offsetFather);
633 }
634
636 fmt::format("readFather size {} != fileOrigIndex size {}",
637 readFather->Size(), fileOrigIndex.size()));
638
639 // Redistribute using ArrayTransformer
641 fmt::format("father size {} != newOrigIndex size {}",
642 father->Size(), newOrigIndex.size()));
645
646 serializerP->GoToPath(cwd);
647 }
648 }
649
650 /// @brief Device-view template alias: `t_deviceView<DeviceBackend::CUDA>`
651 /// gives the mutable CUDA view type for this pair.
652 template <DeviceBackend B>
654
655 /// @brief Const-device-view template alias.
656 template <DeviceBackend B>
658
659 /// @brief Produce a mutable device view; both father and son must be allocated.
660 template <DeviceBackend B>
662 {
664 fmt::format("need both father and son to exist for device view: {}",
665 father ? father->getObjectIdentity(TArray::GetArrayName()) : TArray::GetArrayName()));
666 return t_deviceView<B>{
667 father->template deviceView<B>(),
668 son->template deviceView<B>()};
669 }
670
671 /// @brief Produce a const device view.
672 template <DeviceBackend B>
673 auto deviceView() const
674 {
676 fmt::format("need both father and son to exist for device view: {}",
677 father ? father->getObjectIdentity(TArray::GetArrayName()) : TArray::GetArrayName()));
679 std::as_const(*father).template deviceView<B>(),
680 std::as_const(*son).template deviceView<B>()};
681 }
682
683 /// @brief Mirror both father and son to the given device backend.
685 {
686 if (father)
687 father->to_device(backend);
688 if (son)
689 son->to_device(backend);
690 }
691
692 /// @brief Bring both father and son mirrors back to host memory.
693 void to_host()
694 {
695 if (father)
696 father->to_host();
697 if (son)
698 son->to_host();
699 }
700 };
701
702 /// @brief @ref DNDS::ArrayPair "ArrayPair" alias for mesh adjacency (variable-width integer rows).
703 template <rowsize _row_size = 1, rowsize _row_max = _row_size, rowsize _align = NoAlign>
705
706 /// @brief @ref DNDS::ArrayPair "ArrayPair" alias for per-row Eigen vectors (e.g., node coords with N=3).
707 template <rowsize _vec_size = 1, rowsize _row_max = _vec_size, rowsize _align = NoAlign>
709
710 /// @brief @ref DNDS::ArrayPair "ArrayPair" alias for per-row Eigen matrices.
711 template <rowsize _mat_ni = 1, rowsize _mat_nj = 1,
712 rowsize _mat_ni_max = _mat_ni, rowsize _mat_nj_max = _mat_nj, rowsize _align = NoAlign>
714
715 /// @brief @ref DNDS::ArrayPair "ArrayPair" alias for per-row variable-size Eigen matrix batches.
717
718 /// @brief @ref DNDS::ArrayPair "ArrayPair" alias for per-row batches of uniform `_n_row x _n_col` matrices.
719 /// @details Used by @ref FiniteVolume / @ref VariationalReconstruction to store
720 /// per-quadrature-point Jacobians and basis coefficients.
721 template <int _n_row, int _n_col>
723}
Adjacency array (CSR-like index storage) built on ParArray.
Batch of variable-sized Eigen matrices stored in CSR layout.
Eigen-matrix array: each row is an Eigen::Map<Matrix> over contiguous real storage.
Batch of uniform-sized Eigen matrices per row, with variable batch count.
Eigen-vector array: each row is an Eigen::Map over contiguous real storage.
Redistributes ArrayPair data across different MPI partitions using ArrayTransformer.
ParArray (MPI-aware array) and ArrayTransformer (ghost/halo communication).
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
#define DNDS_DEVICE_TRIVIAL_COPY_DEFINE(T, T_Self)
Definition Defines.hpp:83
#define DNDS_DEVICE_CALLABLE
Definition Defines.hpp:76
Device memory abstraction layer with backend-specific storage and factory creation.
Non-owning device-side views of Array objects for host and CUDA backends.
Assertion / error-handling macros and supporting helper functions.
#define DNDS_assert_info(expr, info)
Debug-only assertion with an extra std::string info message.
Definition Errors.hpp:113
#define DNDS_assert(expr)
Debug-only assertion (compiled out when DNDS_NDEBUG is defined). Prints the expression + file/line + ...
Definition Errors.hpp:108
#define DNDS_check_throw_info(expr, info)
Same as DNDS_check_throw but attaches a user-supplied info message to the thrown std::runtime_error.
Definition Errors.hpp:96
#define DNDS_check_throw(expr)
Runtime check active in both debug and release builds. Throws std::runtime_error if expr evaluates to...
Definition Errors.hpp:89
ArrayDofDeviceView< B, n_m, n_n > t_deviceView
Mutable device view alias.
Definition ArrayDOF.hpp:179
Ghost-communication engine for a father / son ParArray pair.
MPI_int Allgather(const void *sendbuf, MPI_int sendcount, MPI_Datatype sendtype, void *recvbuf, MPI_int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
Wrapper over MPI_Allgather.
Definition MPI.cpp:230
ssp< SerializerBase > SerializerBaseSSP
the host side operators are provided as implemented
const MPI_Datatype DNDS_MPI_INDEX
MPI datatype matching index (= MPI_INT64_T).
Definition MPI.hpp:90
DeviceBackend
Enumerates the backends a DeviceStorage / Array can live on.
DNDS_CONSTANT const rowsize NoAlign
Alignment flag: no padding applied to rows (the only currently-supported value).
Definition Defines.hpp:282
int32_t rowsize
Row-width / per-row element-count type (signed 32-bit).
Definition Defines.hpp:109
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:107
Const device view of a father-son array pair.
t_arrayDeviceView father
the only difference from non-const
typename TArray::template t_deviceViewConst< B > t_arrayDeviceView
CRTP base implementing the unified-index accessors shared by ArrayPairDeviceView and ArrayPairDeviceV...
Definition ArrayPair.hpp:31
DNDS_DEVICE_CALLABLE auto operator[](index i)
Row pointer for index i (mutable).
Definition ArrayPair.hpp:67
DNDS_DEVICE_CALLABLE auto RowSize() const
Uniform row width (delegates to father; father/son share it).
Definition ArrayPair.hpp:40
DNDS_DEVICE_CALLABLE index Size() const
Combined father + son row count.
Definition ArrayPair.hpp:33
DNDS_DEVICE_CALLABLE auto operator[](index i) const
Row pointer for index i in the combined address space (const).
Definition ArrayPair.hpp:57
DNDS_DEVICE_CALLABLE auto RowSize(index i) const
Per-row width in the combined address space.
Definition ArrayPair.hpp:47
Mutable device view onto an ArrayPair (for CUDA kernels).
t_arrayDeviceView father
t_arrayDeviceView son
typename TArray::template t_deviceView< B > t_arrayDeviceView
Convenience bundle of a father, son, and attached ArrayTransformer.
void TransAttach()
Bind the transformer to the current father / son pointers.
void ReadSerializeRedistributed(Serializer::SerializerBaseSSP serializerP, const std::string &name, const std::vector< index > &newOrigIndex)
Reads ArrayPair data from HDF5 with redistribution support.
decltype(father->operator[](index(0))) operator[](index i) const
Read-only row-pointer access in the combined address space.
void to_device(DeviceBackend backend)
Mirror both father and son to the given device backend.
ArrayPairDeviceView< B, TArray > t_deviceView
Device-view template alias: t_deviceView<DeviceBackend::CUDA> gives the mutable CUDA view type for th...
void SwapDataFatherSon(t_self &R)
Swap both father and son data with another pair of the same type.
void to_host()
Bring both father and son mirrors back to host memory.
ssp< TArray > father
Owned-side array (must be resized before ghost setup).
auto deviceView()
Produce a mutable device view; both father and son must be allocated.
void WriteSerialize(Serializer::SerializerBaseSSP serializerP, const std::string &name, bool includePIG=true, bool includeSon=true)
Writes the ArrayPair (father, optional son, optional ghost mapping).
auto deviceView() const
Produce a const device view.
void ResizeRow(index i, TOthers... aOthers)
Variadic ResizeRow overload that forwards extra args.
index Size() const
Combined row count (father->Size() + son->Size()).
void BorrowSetup(TPrimaryPair &primary)
Attach, borrow ghost indexing from a primary pair, and create MPI types (no pull).
static constexpr bool IsCSR()
Whether the underlying array uses CSR storage.
void InitPair(const std::string &name, Args &&...args)
Allocate both father and son arrays, forwarding all args to TArray constructor.
ssp< TArray > son
Ghost-side array (sized automatically by createMPITypes / BorrowAndPull).
std::size_t hash()
Combined hash across ranks. Used for determinism / equality checks in tests.
void clone(const t_self &R)
Deep-copy: allocate new father / son and copy their data; rebind trans.
void BorrowAndPull(TPrimaryPair &primary)
Attach, borrow ghost indexing from a primary pair, create MPI types, and pull once.
auto RowSize() const
Uniform row width (delegates to father).
void ResizeRow(index i, rowsize rs)
Resize a single row in the combined address space.
void ReadSerialize(Serializer::SerializerBaseSSP serializerP, const std::string &name, bool includePIG=true, bool includeSon=true)
Reads an ArrayPair written by WriteSerialize (same partition count).
TTrans trans
Ghost-communication engine bound to father and son.
void WriteSerialize(Serializer::SerializerBaseSSP serializerP, const std::string &name, const std::vector< index > &origIndex, bool includePIG=true, bool includeSon=true)
Writes the ArrayPair with an origIndex companion dataset for redistribution support.
decltype(father->operator[](index(0))) operator[](index i)
Mutable row-pointer access in the combined address space.
void CopyFather(t_self &R)
Copy only the father's data from another pair (shallow).
auto RowSize(index i) const
Per-row width in the combined address space.
void CompressBoth()
Compress both father and son CSR arrays (no-op for non-CSR layouts).
auto runFunctionAppendedIndex(index i, TF &&F)
Invoke F(array, localIndex) on either father or son depending on which range i falls into.
typename ArrayTransformerType< TArray >::Type TTrans
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:215
Tag type for naming objects created via make_ssp.
Definition Defines.hpp:249