DNDSR 0.2.1
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
ArrayPair.hpp
Go to the documentation of this file.
1#pragma once
2/// @file ArrayPair.hpp
3/// @brief Father-son array pairs with device views and ghost communication.
4
12#include "DNDS/Defines.hpp"
14#include "DNDS/Errors.hpp"
15#include "Device/DeviceView.hpp"
16#include <fmt/format.h>
17namespace DNDS
18{
19
20 /**
21 * @brief CRTP base implementing the unified-index accessors shared by
22 * @ref DNDS::ArrayPairDeviceView "ArrayPairDeviceView" and @ref DNDS::ArrayPairDeviceViewConst "ArrayPairDeviceViewConst".
23 *
24 * @details Indices in `[0, father.Size())` map to the owned-side view; indices
25 * in `[father.Size(), father.Size() + son.Size())` map to the ghost-side view
26 * with an offset subtraction. This lets stencil loops treat the father/son
27 * pair as one contiguous array. Device-callable.
28 */
29 template <class Derived>
31 {
32 /// @brief Combined father + son row count.
33 DNDS_DEVICE_CALLABLE [[nodiscard]] index Size() const
34 {
35 auto dThis = static_cast<const Derived *>(this);
36 return dThis->father.Size() + dThis->son.Size();
37 }
38
39 /// @brief Uniform row width (delegates to father; father/son share it).
41 {
42 auto dThis = static_cast<const Derived *>(this);
43 return dThis->father.RowSize();
44 }
45
46 /// @brief Per-row width in the combined address space.
48 {
49 auto dThis = static_cast<const Derived *>(this);
50 if (i >= 0 && i < dThis->father.Size())
51 return dThis->father.RowSize(i);
52 else
53 return dThis->son.RowSize(i - dThis->father.Size());
54 }
55
56 /// @brief Row pointer for index `i` in the combined address space (const).
58 {
59 auto dThis = static_cast<const Derived *>(this);
60 if (i >= 0 && i < dThis->father.Size())
61 return dThis->father.operator[](i);
62 else
63 return dThis->son.operator[](i - dThis->father.Size());
64 }
65
66 /// @brief Row pointer for index `i` (mutable).
68 {
69 auto dThis = static_cast<Derived *>(this);
70 if (i >= 0 && i < dThis->father.Size())
71 return dThis->father.operator[](i);
72 else
73 return dThis->son.operator[](i - dThis->father.Size());
74 }
75
76 /// @brief N-ary element access in the combined address space (mutable).
77 /// Forwards extra arguments to the underlying `operator()`.
78 template <class... TOthers>
79 DNDS_DEVICE_CALLABLE decltype(auto) operator()(index i, TOthers... aOthers)
80 {
81 auto dThis = static_cast<Derived *>(this);
82 if (i >= 0 && i < dThis->father.Size())
83 return dThis->father.operator()(i, aOthers...);
84 else
85 return dThis->son.operator()(i - dThis->father.Size(), aOthers...);
86 }
87
88 /// @brief N-ary element access (const).
89 template <class... TOthers>
90 DNDS_DEVICE_CALLABLE decltype(auto) operator()(index i, TOthers... aOthers) const
91 {
92 auto dThis = static_cast<const Derived *>(this);
93 if (i >= 0 && i < dThis->father.Size())
94 return dThis->father.operator()(i, aOthers...);
95 else
96 return dThis->son.operator()(i - dThis->father.Size(), aOthers...);
97 }
98 };
99
100 /// @brief Mutable device view onto an @ref DNDS::ArrayPair "ArrayPair" (for CUDA kernels).
101 /// @details Captures both father and son device views by value; must not
102 /// outlive the owning pair.
103 template <DeviceBackend B, class TArray = ParArray<real, 1>>
104 struct ArrayPairDeviceView : public ArrayPairDeviceView_Base<ArrayPairDeviceView<B, TArray>>
105 {
106 using t_arrayDeviceView = typename TArray::template t_deviceView<B>;
107
110
112
114
116 : father(n_father), son(n_son) {}
117 };
118
119 /// @brief Const device view of a father-son array pair.
120 template <DeviceBackend B, class TArray = ParArray<real, 1>>
121 struct ArrayPairDeviceViewConst : public ArrayPairDeviceView_Base<ArrayPairDeviceViewConst<B, TArray>>
122 {
123 using t_arrayDeviceView = typename TArray::template t_deviceViewConst<B>; //! the only difference from non-const
124
127
129
131
133 : father(n_father), son(n_son) {}
134 };
135
136 /**
137 * @brief Convenience bundle of a father, son, and attached @ref DNDS::ArrayTransformer "ArrayTransformer".
138 *
139 * @details @ref DNDS::ArrayPair "ArrayPair" is what most application code uses instead of
140 * manipulating a raw transformer. It wraps:
141 * - `father` (owned rows) and `son` (ghost rows) as `shared_ptr<TArray>`,
142 * - a `trans` transformer that binds the two together.
143 *
144 * `operator[]` / `operator()` treat the pair as one contiguous array of
145 * size `father->Size() + son->Size()`. Typical construction pattern:
146 *
147 * ```cpp
148 * ArrayPair<ParArray<real, 5>> u;
149 * u.InitPair("u", mpi); // allocates father and son
150 * u.father->Resize(nLocal); // fill father with local data
151 * u.BorrowAndPull(primaryPair); // ghost layout inherited; pull
152 * ```
153 *
154 * See `docs/guides/array_usage.md` for the broader "primary pair"
155 * pattern: one pair (typically `cell2cell`) does the full four-step
156 * ghost setup; every other pair on the same partition borrows from it.
157 *
158 * @tparam TArray Underlying array type (e.g., `ParArray<real, 5>`,
159 * @ref DNDS::ArrayAdjacency "ArrayAdjacency", @ref DNDS::ArrayEigenVector "ArrayEigenVector").
160 */
161 template <class TArray = ParArray<real, 1>>
163 {
165 using t_arr = TArray;
166 /// @brief Whether the underlying array uses CSR storage.
167 static constexpr bool IsCSR() { return t_arr::IsCSR(); }
168
169 /// @brief Owned-side array (must be resized before ghost setup).
171 /// @brief Ghost-side array (sized automatically by #createMPITypes / @ref BorrowAndPull).
174 /// @brief Ghost-communication engine bound to #father and #son.
176
177 /// @brief Deep-copy: allocate new father / son and copy their data; rebind trans.
178 /// @details Recreates the arrays through @ref TArray's copy ctor, then
179 /// assigns `trans` from `R`. If the source's transformer was already
180 /// attached, re-attaches to the new local arrays.
181 void clone(const t_self &R)
182 {
184 //! rely on TArray's copy ctor!
185 father = make_ssp<TArray>(*(R.father)); // call TArray copy ctor
186 son = make_ssp<TArray>(*(R.son)); // call TArray copy ctor
187 DNDS_check_throw(father->getMPI().comm == son->getMPI().comm);
188 //! rely on TTrans's copy assignment!
189 trans = R.trans;
190 //! if R.trans already attached, then self trans attach self arrays
191 if (R.trans.father)
193 if (R.trans.son)
194 trans.son = son;
195 //! Re-create persistent MPI requests pointing to the NEW arrays.
196 //! Without this, persistent requests still reference R's buffers.
197 if (R.trans.father && R.trans.son && trans.pLGhostMapping)
198 trans.createMPITypes();
199 }
200
201 /// @brief Read-only row-pointer access in the combined address space.
202 decltype(father->operator[](index(0))) operator[](index i) const
204 if (i >= 0 && i < father->Size())
205 return father->operator[](i);
206 else
207 return son->operator[](i - father->Size());
208 }
209
210 /// @brief Mutable row-pointer access in the combined address space.
211 decltype(father->operator[](index(0))) operator[](index i)
212 {
213 if (i >= 0 && i < father->Size())
214 return father->operator[](i);
215 else
216 return son->operator[](i - father->Size());
217 }
218
219 // decltype(father->operator()(index(0), rowsize(0))) operator()(index i, rowsize j)
220 // {
221 // if (i >= 0 && i < father->Size())
222 // return father->operator()(i, j);
223 // else
224 // return son->operator()(i - father->Size(), j);
225 // }
226
227 /// @brief N-ary element access in the combined space (mutable). Arguments
228 /// after the row index are forwarded to the underlying `operator()`.
229 template <class... TOthers>
230 decltype(auto) operator()(index i, TOthers... aOthers)
231 {
232 if (i >= 0 && i < father->Size())
233 return father->operator()(i, aOthers...);
234 else
235 return son->operator()(i - father->Size(), aOthers...);
236 }
237
238 /// @brief N-ary element access (const).
239 template <class... TOthers>
240 decltype(auto) operator()(index i, TOthers... aOthers) const
241 {
242 if (i >= 0 && i < father->Size())
243 return father->operator()(i, aOthers...);
244 else
245 return son->operator()(i - father->Size(), aOthers...);
246 }
247
248 /// @brief Invoke `F(array, localIndex)` on either father or son
249 /// depending on which range `i` falls into.
250 /// @details Useful when per-side state must be updated alongside the
251 /// indexed row (e.g., logging father vs son modifications).
252 template <class TF>
254 {
255 if (i >= 0 && i < father->Size())
256 return F(*father, i);
257 else
258 return F(*son, i - father->Size());
259 }
260
261 /// @brief Uniform row width (delegates to father).
262 [[nodiscard]] auto RowSize() const
263 {
264 return father->RowSize();
265 }
266
267 /// @brief Per-row width in the combined address space.
268 [[nodiscard]] auto RowSize(index i) const
269 {
270 if (i >= 0 && i < father->Size())
271 return father->RowSize(i);
272 else
273 return son->RowSize(i - father->Size());
274 }
275
276 /// @brief Resize a single row in the combined address space.
278 {
279 if (i >= 0 && i < father->Size())
280 father->ResizeRow(i, rs);
281 else
282 son->ResizeRow(i - father->Size(), rs);
283 }
284
285 /// @brief Variadic ResizeRow overload that forwards extra args.
286 template <class... TOthers>
288 {
289 if (i >= 0 && i < father->Size())
290 father->ResizeRow(i, aOthers...);
291 else
292 son->ResizeRow(i - father->Size(), aOthers...);
293 }
294
295 /// @brief Combined row count (`father->Size() + son->Size()`).
296 [[nodiscard]] index Size() const
297 {
299 return father->Size() + son->Size();
300 }
301
302 /// @brief Bind the transformer to the current father / son pointers.
303 /// @details First step of the four-step ghost setup when not using
304 /// @ref BorrowAndPull. Both arrays must already be allocated.
306 {
307 DNDS_check_throw_info(bool(father) && bool(son),
308 fmt::format("father and son need to be constructed before Trans Attach. Array is {}",
309 father ? father->getObjectIdentity(TArray::GetArrayName()) : TArray::GetArrayName()));
310 trans.setFatherSon(father, son);
311 }
312
313 /// @brief Allocate both father and son arrays, forwarding all args to TArray constructor.
314 ///
315 /// Replaces the common two-line DNDS_MAKE_SSP(pair.father, ...) +
316 /// DNDS_MAKE_SSP(pair.son, ...) pattern.
317 ///
318 /// The name tag is set on both arrays as "name.father" / "name.son".
319 /// Constructor args are forwarded as-is to TArray (same order as ParArray
320 /// constructors, e.g., `(mpi)` or `(dataType, commMult, mpi)`).
321 ///
322 /// Usage:
323 /// pair.InitPair("cell2node", mpi);
324 /// pair.InitPair("cellElemInfo", ElemInfo::CommType(), ElemInfo::CommMult(), mpi);
325 template <typename... Args>
326 void InitPair(const std::string &name, Args &&...args)
327 {
328 father = make_ssp<TArray>(ObjName{name + ".father"}, std::forward<Args>(args)...);
329 son = make_ssp<TArray>(ObjName{name + ".son"}, std::forward<Args>(args)...);
330 }
331
332 /// @brief Attach, borrow ghost indexing from a primary pair, create MPI types, and pull once.
333 ///
334 /// Replaces the 4-line sequence:
335 /// this->TransAttach();
336 /// this->trans.BorrowGGIndexing(primary.trans);
337 /// this->trans.createMPITypes();
338 /// this->trans.pullOnce();
339 template <class TPrimaryPair>
341 {
342 this->TransAttach();
343 this->trans.BorrowGGIndexing(primary.trans);
344 this->trans.createMPITypes();
345 this->trans.pullOnce();
346 }
347
348 /// @brief Attach, borrow ghost indexing from a primary pair, and create MPI types (no pull).
349 ///
350 /// Useful when you need to set up communication but defer the pull
351 /// (e.g., for persistent communication patterns).
352 template <class TPrimaryPair>
354 {
355 this->TransAttach();
356 this->trans.BorrowGGIndexing(primary.trans);
357 this->trans.createMPITypes();
358 }
359
360 /// @brief Compress both father and son CSR arrays (no-op for non-CSR layouts).
362 {
363 father->Compress();
364 son->Compress();
365 }
366
367 /// @brief Copy only the father's data from another pair (shallow).
369 {
370 father->CopyData(*R.father);
371 }
372
373 /**
374 * @brief Swap both father and son data with another pair of the same type.
375 * @warning Because the data pointers change, persistent MPI requests
376 * (if any) are rebuilt on both sides via #reInitPersistentPullPush.
377 */
378 // TODO: make a data change listener in transformer?
379 //! a situation: the data pointer should remain static as long as initPersistentPuxx is done
381 {
382 father->SwapData(*R.father);
383 son->SwapData(*R.son);
384 trans.reInitPersistentPullPush();
385 R.trans.reInitPersistentPullPush();
386 }
387
388 /// @brief Combined hash across ranks. Used for determinism / equality checks in tests.
389 std::size_t hash()
390 {
391 auto fatherHash = father->hash();
392 auto sonHash = son->hash();
394 MPIInfo mpi = father->getMPI();
395 std::vector<index> hashes;
396 hashes.resize(mpi.size);
397 MPI::Allgather(&localHash, 1, DNDS_MPI_INDEX, hashes.data(), 1, DNDS_MPI_INDEX, mpi.comm);
398 return vector_hash<index>()(hashes);
399 }
400
401 /// @brief Writes the ArrayPair (father, optional son, optional ghost mapping).
402 ///
403 /// Creates a sub-path `name` containing:
404 /// - `MPIRank` (per-rank only), `MPISize` — partition metadata.
405 /// - `father` — the father array via ParArray::WriteSerializer (Parts offset).
406 /// - `son` — the son (ghost) array, if `includeSon` is true.
407 /// - `pullingIndexGlobal` — ghost pull indices, if `includePIG` is true.
408 ///
409 /// All writes are collective for H5 serializers. Every rank must call this.
410 void WriteSerialize(Serializer::SerializerBaseSSP serializerP, const std::string &name, bool includePIG = true, bool includeSon = true)
411 {
412 if (includePIG)
413 DNDS_check_throw_info(trans.pLGlobalMapping && trans.pLGhostMapping, "pair's trans not having ghost info");
414
415 auto cwd = serializerP->GetCurrentPath();
416 serializerP->CreatePath(name);
417 serializerP->GoToPath(name);
418
419 if (serializerP->IsPerRank())
420 serializerP->WriteIndex("MPIRank", father->getMPI().rank);
421 serializerP->WriteIndex("MPISize", father->getMPI().size);
422 // std::cout << trans.pLGlobalMapping->operator()(trans.mpi.rank, 0) << ",,," << trans.pLGlobalMapping->globalSize() << std::endl;
423 // ! this is wrong as pLGlobalMapping stores the row index, not the data index!!
424 // father->WriteSerializer(serializerP, "father",
425 // Serializer::ArrayGlobalOffset{
426 // trans.pLGlobalMapping->globalSize(),
427 // trans.pLGlobalMapping->operator()(trans.mpi.rank, 0),
428 // }); // trans.pLGlobalMapping == father->pLGlobalMapping
429 // TODO: overwrite all the Resize()/ResizeRow() for ParArray so that it handles global size and offset internally?
430
431 // now using the parts (calculate offsets)
432 father->WriteSerializer(serializerP, "father", Serializer::ArrayGlobalOffset_Parts);
433 if (includeSon)
434 son->WriteSerializer(serializerP, "son", Serializer::ArrayGlobalOffset_Parts);
435 /***************************/
436 // ghost info
437 // static_assert(std::is_same_v<rowsize, MPI_int>);
438 // *writing pullingIndexGlobal, trusting the GlobalMapping to remain the same
439 if (includePIG)
440 serializerP->WriteIndexVector("pullingIndexGlobal", trans.pLGhostMapping->ghostIndex, Serializer::ArrayGlobalOffset_Parts);
441 /***************************/
442
443 serializerP->GoToPath(cwd);
444 }
445
446 /// @brief Writes the ArrayPair with an origIndex companion dataset for redistribution support.
447 ///
448 /// When origIndex is provided and the serializer is H5 (collective), an additional
449 /// dataset "origIndex" is written alongside the father data. This enables reading
450 /// the data back with a different MPI partition.
451 ///
452 /// @param serializerP The serializer (H5 for redistribution support, JSON ignores origIndex)
453 /// @param name Path name for this array in the serializer hierarchy
454 /// @param origIndex Partition-independent key for each row (e.g., from cell2cellOrig).
455 /// Must have size == father->Size().
456 /// @param includePIG Whether to include ghost pull-index-global data
457 /// @param includeSon Whether to include the son (ghost) array
459 const std::vector<index> &origIndex,
460 bool includePIG = true, bool includeSon = true)
461 {
463 fmt::format("origIndex size {} != father size {}", origIndex.size(), father->Size()));
464
465 // Write the array data normally
467
468 // Write the origIndex companion dataset alongside the father data (H5 only)
469 if (!serializerP->IsPerRank())
470 {
471 auto cwd = serializerP->GetCurrentPath();
472 serializerP->GoToPath(name);
473 serializerP->WriteIndexVector("origIndex", origIndex, Serializer::ArrayGlobalOffset_Parts);
474 serializerP->WriteInt("redistributable", 1);
475 serializerP->GoToPath(cwd);
476 }
477 }
478
479 /// @brief Reads an ArrayPair written by WriteSerialize (same partition count).
480 ///
481 /// Reads father (and optionally son and ghost mapping) from sub-path `name`.
482 /// Requires the file to have been written with the same MPI size.
483 /// The father and son arrays are resized internally by Array::ReadSerializer.
484 ///
485 /// All reads are collective for H5 serializers. Every rank must call this,
486 /// including ranks whose local size is 0.
487 ///
488 /// @warning If `includePIG` is true, the caller must call
489 /// `trans.createMPITypes()` after this method returns.
490 void ReadSerialize(Serializer::SerializerBaseSSP serializerP, const std::string &name, bool includePIG = true, bool includeSon = true)
491 {
493 this->TransAttach();
494
495 auto cwd = serializerP->GetCurrentPath();
496 // serializerP->CreatePath(name); //!remember no create!
497 serializerP->GoToPath(name);
498
499 index readRank{0}, readSize{0};
500 if (serializerP->IsPerRank())
501 serializerP->ReadIndex("MPIRank", readRank);
502 serializerP->ReadIndex("MPISize", readSize);
503 DNDS_check_throw((!serializerP->IsPerRank() || readRank == father->getMPI().rank) &&
504 readSize == father->getMPI().size);
505 auto offsetV_father = Serializer::ArrayGlobalOffset_Unknown;
506 auto offsetV_son = Serializer::ArrayGlobalOffset_Unknown;
507 father->ReadSerializer(serializerP, "father", offsetV_father);
508 if (includeSon)
509 son->ReadSerializer(serializerP, "son", offsetV_son);
510 /***************************/
511 // ghost info
512 // static_assert(std::is_same_v<rowsize, MPI_int>);
513 // *writing pullingIndexGlobal, trusting the GlobalMapping to remain the same
514 if (includePIG)
515 {
516 std::vector<index> pullingIndexGlobal;
517 auto offsetV_PIG = Serializer::ArrayGlobalOffset_Unknown; // TODO: check the offsets?
518 serializerP->ReadIndexVector("pullingIndexGlobal", pullingIndexGlobal, offsetV_PIG);
519 trans.createFatherGlobalMapping();
520 trans.createGhostMapping(pullingIndexGlobal);
521 }
522 /***************************/
523
524 serializerP->GoToPath(cwd);
525 }
526
527 /// @brief Reads ArrayPair data from HDF5 with redistribution support.
528 ///
529 /// Handles three cases depending on the file contents and partition count:
530 ///
531 /// 1. **No origIndex in file, same np**: falls back to ReadSerialize
532 /// (same-partition read, no redistribution).
533 ///
534 /// 2. **Has origIndex, same np**: reads father via ReadSerialize, reads
535 /// origIndex, then uses RedistributeArrayWithTransformer to move data
536 /// from the file's partition layout to the caller's partition layout.
537 ///
538 /// 3. **Has origIndex, different np**: reads father via EvenSplit (each
539 /// rank reads ~nGlobal/nRanks rows regardless of the original partition),
540 /// reads origIndex the same way, then uses
541 /// RedistributeArrayWithTransformer to pull each rank's needed cells.
542 ///
543 /// In case 3, some ranks may get 0 rows from EvenSplit when nGlobal < nRanks.
544 /// This is handled correctly: the H5 collective reads proceed with 0-count
545 /// hyperslab selections, and the redistribution operates on empty arrays.
546 ///
547 /// All operations are collective. Every rank must call this method.
548 ///
549 /// @param serializerP The serializer (must be H5 / collective)
550 /// @param name Path name in the serializer hierarchy
551 /// @param newOrigIndex Partition-independent keys for this rank's cells
552 /// (e.g., from cell2cellOrig). Size must equal
553 /// father->Size(). May be empty for ranks with 0 cells.
556 const std::string &name,
557 const std::vector<index> &newOrigIndex)
558 {
561 "Redistribution read only supported for collective (H5) serializers");
562
563 auto cwd = serializerP->GetCurrentPath();
564 serializerP->GoToPath(name);
565
566 // Check if origIndex exists in the file
567 auto pathContents = serializerP->ListCurrentPath();
568 bool hasOrigIndex = pathContents.count("origIndex") > 0;
569
570 index readSize{0};
571 serializerP->ReadIndex("MPISize", readSize);
572 bool sameNumPartition = (readSize == father->getMPI().size);
573
574 if (!hasOrigIndex)
575 {
576 // No origIndex in file -- fall back to same-partition read
577 serializerP->GoToPath(cwd);
579 fmt::format("File has no origIndex and was written with np={}, "
580 "but reading with np={}. Cannot redistribute.",
581 readSize, father->getMPI().size));
582 DNDS_check_throw_info(false, "Redistribution fallback requires same-partition read");
583 ReadSerialize(serializerP, name, /*includePIG*/ false, /*includeSon*/ false);
584 return;
585 }
586
587 // Use redistribution: even-split read + rendezvous.
588 // This works for both same-np (different layout) and different-np cases.
589 serializerP->GoToPath(cwd); // go back so we can navigate cleanly
590
591 // Read the full ArrayPair normally into a temporary (same-np read)
592 // or use even-split for different-np.
593 auto mpi = father->getMPI();
594
596 {
597 // Same np: we can read normally (rank slices match), then redistribute locally
599 readPair.InitPair("readPair", mpi);
600 readPair.ReadSerialize(serializerP, name, /*includePIG*/ false, /*includeSon*/ false);
601
602 // Read origIndex from the file
603 serializerP->GoToPath(name);
604 std::vector<index> fileOrigIndex;
605 auto offsetOrigIdx = Serializer::ArrayGlobalOffset_Unknown;
606 serializerP->ReadIndexVector("origIndex", fileOrigIndex, offsetOrigIdx);
607 serializerP->GoToPath(cwd);
608
610 fmt::format("readPair.father size {} != fileOrigIndex size {}",
611 readPair.father->Size(), fileOrigIndex.size()));
612
613 // Redistribute using ArrayTransformer
615 fmt::format("father size {} != newOrigIndex size {}",
616 father->Size(), newOrigIndex.size()));
619 }
620 else
621 {
622 // Different np: use even-split read + rendezvous redistribution
623 serializerP->GoToPath(name);
624
625 // Read origIndex from file using even-split
626 std::vector<index> fileOrigIndex;
627 auto offsetOrigIdx = Serializer::ArrayGlobalOffset_EvenSplit;
628 serializerP->ReadIndexVector("origIndex", fileOrigIndex, offsetOrigIdx);
629 DNDS_assert(offsetOrigIdx.isDist()); // resolved by even-split read
630
631 // Read father array data using even-split via TArray::ReadSerializer.
632 // Pass EvenSplit offset so Array::ReadSerializer reads size correctly.
633 auto readFather = std::make_shared<TArray>(mpi);
634 {
635 auto offsetFather = Serializer::ArrayGlobalOffset_EvenSplit;
636 readFather->ReadSerializer(serializerP, "father", offsetFather);
637 }
638
640 fmt::format("readFather size {} != fileOrigIndex size {}",
641 readFather->Size(), fileOrigIndex.size()));
642
643 // Redistribute using ArrayTransformer
645 fmt::format("father size {} != newOrigIndex size {}",
646 father->Size(), newOrigIndex.size()));
649
650 serializerP->GoToPath(cwd);
651 }
652 }
653
654 /// @brief Device-view template alias: `t_deviceView<DeviceBackend::CUDA>`
655 /// gives the mutable CUDA view type for this pair.
656 template <DeviceBackend B>
658
659 /// @brief Const-device-view template alias.
660 template <DeviceBackend B>
662
663 /// @brief Produce a mutable device view; both father and son must be allocated.
664 template <DeviceBackend B>
666 {
668 fmt::format("need both father and son to exist for device view: {}",
669 father ? father->getObjectIdentity(TArray::GetArrayName()) : TArray::GetArrayName()));
670 return t_deviceView<B>{
671 father->template deviceView<B>(),
672 son->template deviceView<B>()};
673 }
674
675 /// @brief Produce a const device view.
676 template <DeviceBackend B>
677 [[nodiscard]] auto deviceView() const
678 {
680 fmt::format("need both father and son to exist for device view: {}",
681 father ? father->getObjectIdentity(TArray::GetArrayName()) : TArray::GetArrayName()));
683 std::as_const(*father).template deviceView<B>(),
684 std::as_const(*son).template deviceView<B>()};
685 }
686
687 /// @brief Mirror both father and son to the given device backend.
689 {
690 if (father)
691 father->to_device(backend);
692 if (son)
693 son->to_device(backend);
694 }
695
696 /// @brief Bring both father and son mirrors back to host memory.
697 void to_host()
698 {
699 if (father)
700 father->to_host();
701 if (son)
702 son->to_host();
703 }
704 };
705
706 /// @brief @ref DNDS::ArrayPair "ArrayPair" alias for mesh adjacency (variable-width integer rows).
707 template <rowsize _row_size = 1, rowsize _row_max = _row_size, rowsize _align = NoAlign>
709
710 /// @brief @ref DNDS::ArrayPair "ArrayPair" alias for per-row Eigen vectors (e.g., node coords with N=3).
711 template <rowsize _vec_size = 1, rowsize _row_max = _vec_size, rowsize _align = NoAlign>
713
714 /// @brief @ref DNDS::ArrayPair "ArrayPair" alias for per-row Eigen matrices.
715 template <rowsize _mat_ni = 1, rowsize _mat_nj = 1,
716 rowsize _mat_ni_max = _mat_ni, rowsize _mat_nj_max = _mat_nj, rowsize _align = NoAlign>
718
719 /// @brief @ref DNDS::ArrayPair "ArrayPair" alias for per-row variable-size Eigen matrix batches.
721
722 /// @brief @ref DNDS::ArrayPair "ArrayPair" alias for per-row batches of uniform `_n_row x _n_col` matrices.
723 /// @details Used by @ref FiniteVolume / @ref VariationalReconstruction to store
724 /// per-quadrature-point Jacobians and basis coefficients.
725 template <int _n_row, int _n_col>
727}
Adjacency array (CSR-like index storage) built on ParArray.
Batch of variable-sized Eigen matrices stored in CSR layout.
Eigen-matrix array: each row is an Eigen::Map<Matrix> over contiguous real storage.
Batch of uniform-sized Eigen matrices per row, with variable batch count.
Eigen-vector array: each row is an Eigen::Map over contiguous real storage.
Redistributes ArrayPair data across different MPI partitions using ArrayTransformer.
ParArray (MPI-aware array) and ArrayTransformer (ghost/halo communication).
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
#define DNDS_DEVICE_TRIVIAL_COPY_DEFINE(T, T_Self)
Definition Defines.hpp:87
#define DNDS_DEVICE_CALLABLE
Definition Defines.hpp:76
Device memory abstraction layer with backend-specific storage and factory creation.
Non-owning device-side views of Array objects for host and CUDA backends.
Assertion / error-handling macros and supporting helper functions.
#define DNDS_assert_info(expr, info)
Debug-only assertion with an extra std::string info message.
Definition Errors.hpp:117
#define DNDS_assert(expr)
Debug-only assertion (compiled out when DNDS_NDEBUG is defined). Prints the expression + file/line + ...
Definition Errors.hpp:112
#define DNDS_check_throw_info(expr, info)
Same as DNDS_check_throw but attaches a user-supplied info message to the thrown std::runtime_error.
Definition Errors.hpp:100
#define DNDS_check_throw(expr)
Runtime check active in both debug and release builds. Throws std::runtime_error if expr evaluates to...
Definition Errors.hpp:93
ArrayDofDeviceView< B, n_m, n_n > t_deviceView
Mutable device view alias.
Definition ArrayDOF.hpp:183
Ghost-communication engine for a father / son ParArray pair.
MPI_int Allgather(const void *sendbuf, MPI_int sendcount, MPI_Datatype sendtype, void *recvbuf, MPI_int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
Wrapper over MPI_Allgather.
Definition MPI.cpp:229
ssp< SerializerBase > SerializerBaseSSP
the host side operators are provided as implemented
const MPI_Datatype DNDS_MPI_INDEX
MPI datatype matching index (= MPI_INT64_T).
Definition MPI.hpp:106
DeviceBackend
Enumerates the backends a DeviceStorage / Array can live on.
DNDS_CONSTANT const rowsize NoAlign
Alignment flag: no padding applied to rows (the only currently-supported value).
Definition Defines.hpp:287
int32_t rowsize
Row-width / per-row element-count type (signed 32-bit).
Definition Defines.hpp:114
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:112
Const device view of a father-son array pair.
t_arrayDeviceView father
the only difference from non-const
typename TArray::template t_deviceViewConst< B > t_arrayDeviceView
CRTP base implementing the unified-index accessors shared by ArrayPairDeviceView and ArrayPairDeviceV...
Definition ArrayPair.hpp:31
DNDS_DEVICE_CALLABLE auto operator[](index i)
Row pointer for index i (mutable).
Definition ArrayPair.hpp:67
DNDS_DEVICE_CALLABLE auto RowSize() const
Uniform row width (delegates to father; father/son share it).
Definition ArrayPair.hpp:40
DNDS_DEVICE_CALLABLE index Size() const
Combined father + son row count.
Definition ArrayPair.hpp:33
DNDS_DEVICE_CALLABLE auto operator[](index i) const
Row pointer for index i in the combined address space (const).
Definition ArrayPair.hpp:57
DNDS_DEVICE_CALLABLE auto RowSize(index i) const
Per-row width in the combined address space.
Definition ArrayPair.hpp:47
Mutable device view onto an ArrayPair (for CUDA kernels).
t_arrayDeviceView father
t_arrayDeviceView son
typename TArray::template t_deviceView< B > t_arrayDeviceView
Convenience bundle of a father, son, and attached ArrayTransformer.
void TransAttach()
Bind the transformer to the current father / son pointers.
void ReadSerializeRedistributed(Serializer::SerializerBaseSSP serializerP, const std::string &name, const std::vector< index > &newOrigIndex)
Reads ArrayPair data from HDF5 with redistribution support.
decltype(father->operator[](index(0))) operator[](index i) const
Read-only row-pointer access in the combined address space.
void to_device(DeviceBackend backend)
Mirror both father and son to the given device backend.
ArrayPairDeviceView< B, TArray > t_deviceView
Device-view template alias: t_deviceView<DeviceBackend::CUDA> gives the mutable CUDA view type for th...
void SwapDataFatherSon(t_self &R)
Swap both father and son data with another pair of the same type.
void to_host()
Bring both father and son mirrors back to host memory.
ssp< TArray > father
Owned-side array (must be resized before ghost setup).
auto deviceView()
Produce a mutable device view; both father and son must be allocated.
void WriteSerialize(Serializer::SerializerBaseSSP serializerP, const std::string &name, bool includePIG=true, bool includeSon=true)
Writes the ArrayPair (father, optional son, optional ghost mapping).
auto deviceView() const
Produce a const device view.
void ResizeRow(index i, TOthers... aOthers)
Variadic ResizeRow overload that forwards extra args.
index Size() const
Combined row count (father->Size() + son->Size()).
void BorrowSetup(TPrimaryPair &primary)
Attach, borrow ghost indexing from a primary pair, and create MPI types (no pull).
static constexpr bool IsCSR()
Whether the underlying array uses CSR storage.
void InitPair(const std::string &name, Args &&...args)
Allocate both father and son arrays, forwarding all args to TArray constructor.
ssp< TArray > son
Ghost-side array (sized automatically by createMPITypes / BorrowAndPull).
std::size_t hash()
Combined hash across ranks. Used for determinism / equality checks in tests.
void clone(const t_self &R)
Deep-copy: allocate new father / son and copy their data; rebind trans.
void BorrowAndPull(TPrimaryPair &primary)
Attach, borrow ghost indexing from a primary pair, create MPI types, and pull once.
auto RowSize() const
Uniform row width (delegates to father).
void ResizeRow(index i, rowsize rs)
Resize a single row in the combined address space.
void ReadSerialize(Serializer::SerializerBaseSSP serializerP, const std::string &name, bool includePIG=true, bool includeSon=true)
Reads an ArrayPair written by WriteSerialize (same partition count).
TTrans trans
Ghost-communication engine bound to father and son.
void WriteSerialize(Serializer::SerializerBaseSSP serializerP, const std::string &name, const std::vector< index > &origIndex, bool includePIG=true, bool includeSon=true)
Writes the ArrayPair with an origIndex companion dataset for redistribution support.
decltype(father->operator[](index(0))) operator[](index i)
Mutable row-pointer access in the combined address space.
void CopyFather(t_self &R)
Copy only the father's data from another pair (shallow).
auto RowSize(index i) const
Per-row width in the combined address space.
void CompressBoth()
Compress both father and son CSR arrays (no-op for non-CSR layouts).
auto runFunctionAppendedIndex(index i, TF &&F)
Invoke F(array, localIndex) on either father or son depending on which range i falls into.
typename ArrayTransformerType< TArray >::Type TTrans
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:231
Tag type for naming objects created via make_ssp.
Definition Defines.hpp:254