DNDSR 0.2.1
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
ArrayTransformer.hpp
Go to the documentation of this file.
1#pragma once
2/// @file ArrayTransformer.hpp
3/// @brief ParArray (MPI-aware array) and ArrayTransformer (ghost/halo communication).
4/// @par Unit Test Coverage (test_ArrayTransformer.cpp, MPI np=1,2,4)
5/// - ParArray: setMPI, Resize, createGlobalMapping, globalSize, AssertConsistent
6/// - ArrayTransformer pull-based ghost: setFatherSon, createFatherGlobalMapping,
7/// createGhostMapping (pull), createMPITypes, pullOnce
8/// -- layouts: TABLE_StaticFixed, TABLE_Fixed, CSR, std::array compound type
9/// - Persistent pull: initPersistentPull, startPersistentPull, waitPersistentPull,
10/// clearPersistentPull (with father data update between pulls)
11/// - BorrowGGIndexing: second array shares ghost mapping of first
12/// - pushOnce: write to son, push back to father
13/// @par Not Yet Tested
14/// - Push-based createGhostMapping(pushingIndexLocal, pushStarts)
15/// - Persistent push (initPersistentPush, startPersistentPush, etc.)
16/// - clearMPITypes, clearGlobalMapping, clearGhostMapping (independent)
17/// - reInitPersistentPullPush
18/// - getFatherSonData(DeviceBackend), AssertDataType, setDataType
19
20#include "Array.hpp"
22#include "DNDS/Errors.hpp"
23#include "IndexMapping.hpp"
24#include "Profiling.hpp"
25#include <utility>
26#include "VectorUtils.hpp"
27
28namespace DNDS
29{
30 /// @brief Shared pointer to a @ref DNDS::GlobalOffsetsMapping "GlobalOffsetsMapping" (globally replicated).
32 /// @brief Shared pointer to an @ref DNDS::OffsetAscendIndexMapping "OffsetAscendIndexMapping" (per-rank ghost layout).
33 using t_pLGhostMapping = ssp<OffsetAscendIndexMapping>; // TODO: change to unique_ptr and modify corresponding copy constructor/assigner
34
35 /**
36 * @brief MPI-aware @ref DNDS::Array "Array": adds a communicator, rank, and global index mapping.
37 *
38 * @details Inherits everything from @ref DNDS::Array "Array" and layers on:
39 * - an @ref DNDS::MPIInfo "MPIInfo" `mpi` context;
40 * - a @ref DNDS::GlobalOffsetsMapping "GlobalOffsetsMapping" `pLGlobalMapping` that maps local row indices
41 * to the global index space (populated by #createGlobalMapping);
42 * - collective serialization I/O that coordinates writes / reads across ranks.
43 *
44 * Typical usage:
45 * ```cpp
46 * auto father = std::make_shared<ParArray<real, 5>>(mpi);
47 * father->Resize(nLocal);
48 * father->createGlobalMapping(); // collective
49 * index nGlobal = father->globalSize(); // total rows across ranks
50 * ```
51 *
52 * Ghost (halo) data is not managed here; pair with @ref DNDS::ArrayTransformer "ArrayTransformer" or
53 * wrap in an @ref DNDS::ArrayPair "ArrayPair" for that.
54 *
55 * @sa ArrayTransformer, ArrayPair, docs/architecture/array_infrastructure.md.
56 */
57 template <class T, rowsize _row_size = 1, rowsize _row_max = _row_size, rowsize _align = NoAlign>
58 class ParArray : public Array<T, _row_size, _row_max, _align>
59 {
60 public:
65
66 using TArray::Array;
67 // TODO: privatize these
68 /// @brief Shared pointer to the global-offsets table. Populated by
69 /// #createGlobalMapping; may be pointed at an existing table to skip
70 /// the collective setup.
72 /// @brief MPI context associated with this array (must be set before collectives).
75
76 public:
77 // default copy
78 ParArray(const t_self &R) = default;
79 t_self &operator=(const t_self &R) = default;
80 // Move (suppressed by explicit copy above, re-declare).
84
85 // operator= handled automatically
86
87 /// @brief Copy-assign from another ParArray. Shallow copy semantics
88 /// (mirrors @ref DNDS::Array "Array"::clone): shares structural/data buffers.
90 {
91 this->operator=(R);
92 }
93
94 public:
95 /// @brief Serialize (write) the parallel array with MPI-aware metadata.
96 ///
97 /// Delegates to Array::WriteSerializer for metadata, structure, and data.
98 /// Additionally for collective (H5) serializers:
99 /// - Writes `sizeGlobal` (sum of all ranks' _size) as a scalar attribute.
100 /// - For CSR: computes global data offsets via MPI_Scan and writes pRowStart
101 /// in global data coordinates as a contiguous (nRowsGlobal+1) dataset.
102 /// Non-last ranks write nRows entries (dropping the redundant tail),
103 /// last rank writes nRows+1 (including the global data total).
104 ///
105 /// Asserts MPI context consistency with the serializer.
106 ///
107 /// @param serializerP Serializer instance.
108 /// @param name Sub-path name for this array.
109 /// @param offset [in] Row-level partitioning (typically ArrayGlobalOffset_Parts).
111 {
112 if (!serializerP->IsPerRank())
113 {
115 mpi == serializerP->getMPI(),
116 fmt::format("ParArray MPI context (rank={}, size={}) doesn't match serializer (rank={}, size={})",
117 mpi.rank, mpi.size, serializerP->GetMPIRank(), serializerP->GetMPISize()));
118 }
119
120 // For collective CSR, compute global data offset and pass to Array
121 // so it skips its own pRowStart write.
122 Serializer::ArrayGlobalOffset dataOffset = Serializer::ArrayGlobalOffset_Unknown;
123 if constexpr (_dataLayout == CSR)
124 {
125 if (!this->IfCompressed())
126 this->Compress();
127 if (!serializerP->IsPerRank() && this->_pRowStart)
128 {
129 index localDataCount = this->_pRowStart->at(this->_size);
134 }
135 }
136
138
139 if (!serializerP->IsPerRank())
140 {
141 auto cwd = serializerP->GetCurrentPath();
142 serializerP->GoToPath(name);
143
144 // Write sizeGlobal
145 index sizeGlobal = 0;
146 MPI::Allreduce(&this->_size, &sizeGlobal, 1, DNDS_MPI_INDEX, MPI_SUM, mpi.comm);
147 serializerP->WriteIndex("sizeGlobal", sizeGlobal);
148
149 // For CSR, write pRowStart in global data coordinates.
150 // Non-last ranks write nRows entries; last rank writes nRows+1.
151 // Total = nRowsGlobal + 1 (no overlap, contiguous).
152 if constexpr (_dataLayout == CSR)
153 {
154 if (dataOffset.isDist())
155 {
157 index nWrite = (mpi.rank == mpi.size - 1) ? (this->_size + 1) : this->_size;
158 auto prsGlobal = std::make_shared<host_device_vector<index>>(nWrite);
159 for (index i = 0; i < nWrite; i++)
161 serializerP->WriteSharedIndexVector("pRowStart", prsGlobal,
162 Serializer::ArrayGlobalOffset_Parts);
163 }
164 }
165
166 serializerP->GoToPath(cwd);
167 }
168 }
169
170 /// @brief Deserialize (read) the parallel array with MPI-aware metadata.
171 ///
172 /// Resolves the input `offset` before delegating to Array::ReadSerializer:
173 /// - EvenSplit: reads `sizeGlobal`, computes even-split range, resolves to
174 /// isDist({localRows, globalRowStart}).
175 /// - CSR with collective serializer: reads per-rank size, computes row offset
176 /// via MPI_Scan, resolves to isDist. This is required because CSR pRowStart
177 /// is stored in global coordinates.
178 /// - Otherwise: passes offset through unchanged.
179 ///
180 /// Asserts MPI context consistency with the serializer.
181 ///
182 /// @param serializerP Serializer instance.
183 /// @param name Sub-path name for this array.
184 /// @param offset [in/out] Row-level offset. EvenSplit is resolved here.
185 /// After return, reflects the resolved row-level position.
187 {
188 if (!serializerP->IsPerRank())
189 {
191 mpi == serializerP->getMPI(),
192 fmt::format("ParArray MPI context (rank={}, size={}) doesn't match serializer (rank={}, size={})",
193 mpi.rank, mpi.size, serializerP->GetMPIRank(), serializerP->GetMPISize()));
194 }
195
196 if (!serializerP->IsPerRank() && !offset.isDist())
197 {
198 if (offset == Serializer::ArrayGlobalOffset_EvenSplit)
199 {
200 // Read sizeGlobal, compute even-split range
201 auto cwd = serializerP->GetCurrentPath();
202 serializerP->GoToPath(name);
203 index sizeGlobal = 0;
204 serializerP->ReadIndex("sizeGlobal", sizeGlobal);
205 serializerP->GoToPath(cwd);
206
209 }
210 else if constexpr (_dataLayout == CSR)
211 {
212 // For CSR with collective serializer, pRowStart is stored in global
213 // coordinates (nRowsGlobal+1 contiguous entries). We must always
214 // resolve to isDist offset so Array reads the correct slice.
215 // Read this rank's _size from the per-rank size dataset, compute
216 // row offset via MPI_Scan, then set isDist offset.
217 auto cwd = serializerP->GetCurrentPath();
218 serializerP->GoToPath(name);
219 std::vector<index> _size_vv;
220 Serializer::ArrayGlobalOffset offsetV = Serializer::ArrayGlobalOffset_Unknown;
221 serializerP->ReadIndexVector("size", _size_vv, offsetV);
222 DNDS_check_throw(_size_vv.size() == 1);
223 index localSize = _size_vv.front();
224 serializerP->GoToPath(cwd);
225
226 index globalEnd = 0;
230 }
231 }
232
233 TArray::ReadSerializer(serializerP, name, offset);
234 }
235
236 private:
237 MPI_Datatype dataType = BasicType_To_MPIIntType<T>().first;
238 MPI_int typeMult = BasicType_To_MPIIntType<T>().second;
239
240 public:
241 /// @brief MPI element datatype used for ghost exchange (deduced from `T`).
242 MPI_Datatype getDataType() { return dataType; }
243 /// @brief Per-element count multiplier that goes with #getDataType.
244 MPI_int getTypeMult() { return typeMult; }
245
246 public:
247 /**
248 * @brief Install the MPI context after default construction.
249 * @details Calls @ref AssertDataType to verify the deduced datatype / multiplier
250 * match `sizeof(T)`.
251 */
252 void setMPI(const MPIInfo &n_mpi)
253 {
254 mpi = n_mpi;
256 }
257
258 /// @brief Mutable MPI context accessor.
260 {
261 return mpi;
262 }
263
264 /// @brief Read-only MPI context accessor.
265 [[nodiscard]] const MPIInfo &getMPI() const
266 {
267 return mpi;
268 }
269
270 /// @brief Override the deduced MPI datatype and element multiplier
271 /// (advanced; needed for custom compound element types).
273 {
274 dataType = n_dType;
275 typeMult = n_TypeMult;
276 }
277
278 /// @brief Default-construct an uninitialised ParArray; call #setMPI and @ref Resize later.
279 ParArray() = default;
280
281 /// @brief Construct a ParArray bound to the given MPI context.
283 {
285 }
286 /// @brief Construct with a custom (MPI datatype, multiplier) pair.
287 /// @details Useful for element types whose in-memory layout differs from
288 /// the default `BasicType_To_MPIIntType<T>()` deduction.
290 : mpi(n_mpi), dataType(n_dType), typeMult(n_TypeMult)
291 {
293 }
294
295 /// @brief Named constructor: sets the object name for tracing/debugging.
296 /// All existing constructor overloads are supported via perfect forwarding.
297 /// Inherited by derived classes (ArrayAdjacency, ArrayEigenVector, etc.)
298 /// through `using t_base::t_base`.
299 ///
300 /// Usage:
301 /// ParArray<index> arr(ObjName{"cell2node"}, mpi);
302 /// ArrayAdjacency<> adj(ObjName{"cell2cell"}, mpi);
303 template <typename... Args>
305 : ParArray(std::forward<Args>(args)...)
306 {
307 this->setObjectName(std::move(objName.name));
308 }
309
310 /// @brief Assert the MPI datatype matches `sizeof(T)` exactly.
311 /// @details Called from constructors / #setMPI / #setDataType. Guards
312 /// against size mismatches that would silently corrupt comms.
314 {
318 MPI_Type_get_extent(dataType, &lb, &extent);
319 DNDS_check_throw(lb == 0 && extent * typeMult == sizeof(T));
320 }
321
322 /**
323 * @brief Check array consistency across all ranks.
324 *
325 * @details Uses `MPI_Allgather` to verify that row sizes (for `TABLE_Fixed`
326 * and `TABLE_Max` layouts) and MPI type multipliers are the same on every
327 * rank. Intended as a post-setup sanity check before entering ghost exchange.
328 *
329 * @warning Must be called collectively. O(nRanks) memory and communication.
330 * @return true Always (failures are reported via @ref DNDS_check_throw_info).
331 */
333 {
335 MPI::Barrier(mpi.comm); // must be globally existent
336 if constexpr (_dataLayout == TABLE_Max ||
337 _dataLayout == TABLE_Fixed) // must have the same dynamic size
338 {
339 // checking if is uniform across all procs
341 MPI_int rowsizeC = this->RowSizeField();
342 static_assert(sizeof(MPI_int) == sizeof(rowsize));
344 for (auto i : uniformSizes)
345 DNDS_check_throw_info(i == rowsizeC, "sizes not uniform across procs");
346 }
347
348 std::vector<MPI_int> uniform_typeMult(mpi.size);
350 for (auto i : uniform_typeMult)
351 DNDS_check_throw_info(i == typeMult, "typeMults not uniform across procs");
352
353 return true; // currently all errors aborts inside
354 }
355
356 /**
357 * @brief Collective: build the global offsets table.
358 *
359 * @details Every rank broadcasts its local `Size()`; after the call,
360 * #pLGlobalMapping holds the full #GlobalOffsetsMapping::RLengths /
361 * #GlobalOffsetsMapping::ROffsets on every
362 * rank. Must be invoked before #globalSize, @ref DNDS::ArrayTransformer "ArrayTransformer"::createFatherGlobalMapping,
363 * or any CSR collective serialization.
364 *
365 * @warning Must be called collectively on `mpi.comm`.
366 */
367 void createGlobalMapping() // collective;
368 {
370 // phase1.1: create localGlobal mapping (broadcast)
371 pLGlobalMapping = std::make_shared<GlobalOffsetsMapping>();
372 pLGlobalMapping->setMPIAlignBcast(mpi, this->Size());
373 }
374
375 /**
376 * @brief Returns the total global size (sum of sizes across all ranks).
377 *
378 * @note This method was previously collective (using MPI_Allreduce) but is now
379 * non-collective. It requires that the global mapping has been created
380 * first (which is done via the collective createGlobalMapping() method
381 * on the underlying ParArray).
382 *
383 * @pre createGlobalMapping() must have been called on the underlying array.
384 * @return index The global size (cached from global mapping).
385 */
387 {
389 "globalSize() requires global mapping. "
390 "Ensure createGlobalMapping() was called first (typically via ArrayPair operations).");
391 return pLGlobalMapping->globalSize();
392 }
393 };
394 /********************************************************************************************************/
395
396 /********************************************************************************************************/
397
398 /**
399 * @brief Ghost-communication engine for a father / son @ref DNDS::ParArray "ParArray" pair.
400 *
401 * @details Distributed-mesh stencil schemes need data from cells owned by
402 * other ranks. @ref DNDS::ArrayTransformer "ArrayTransformer" stores two @ref DNDS::ParArray "ParArray" pointers -- the
403 * *father* (owned rows) and the *son* (incoming ghost rows) -- plus the
404 * MPI machinery to move data between them.
405 *
406 * ## Setup (done once)
407 * 1. #setFatherSon -- attach the two arrays.
408 * 2. #createFatherGlobalMapping -- collective; populate global offsets.
409 * 3. #createGhostMapping -- specify which global rows this rank needs as ghosts.
410 * 4. #createMPITypes -- build `MPI_Type_create_hindexed` derived
411 * types (or in-situ pack buffers) for send/recv.
412 *
413 * ## Communication
414 * - One-shot: #pullOnce / #pushOnce (short-lived `MPI_Isend`/`MPI_Irecv` pair).
415 * - Persistent: #initPersistentPull -> repeated #startPersistentPull /
416 * #waitPersistentPull cycles -> #clearPersistentPull when done.
417 * Persistent requests avoid re-posting sends and receives on every
418 * iteration, saving per-step overhead.
419 *
420 * ## Reuse
421 * When multiple arrays share the same ghost pattern (e.g. the DOF array
422 * and the gradient array both use the `cell2cell` partition), call
423 * @ref BorrowGGIndexing on the secondary transformer to copy the mapping
424 * without redoing collective setup -- only #createMPITypes must be redone,
425 * because the element size differs.
426 *
427 * @sa ArrayPair, ArrayDof, docs/architecture/array_infrastructure.md.
428 */
429 template <class T, rowsize _row_size = 1, rowsize _row_max = _row_size, rowsize _align = NoAlign>
430 // template <class TArray>
432 {
434
435 public:
438 // using T = TArray::value_type;
439 // static const rowsize _align = TArray::al;
440 // static const rowsize _row_size = TArray::rs;
441 // static const rowsize _row_max = TArray::rm;
442
445
446 /*********************************/
447 /* MEMBER */
448 /*********************************/
449
450 /// @brief MPI context; copied from the attached father array.
452 /// @brief Ghost index mapping (rank-local layout). Populated by #createGhostMapping.
454 /// @brief The "owned" side of the father/son pair.
456 /// @brief The "ghost" side of the father/son pair (receives from other ranks).
458
459 /// @brief Shared pointer to the global offsets table (shared with father).
460 t_pLGlobalMapping pLGlobalMapping; // reference from father
461
462 /// @brief Cached `(count, MPI_Datatype)` pairs for push (son -> father).
464 /// @brief Cached `(count, MPI_Datatype)` pairs for pull (father -> son).
466
467 // ** comm aux info: comm running structures **
468 // TODO: make these aux info (sized) shared and thread-safe
469 /// @brief Persistent request handles for push.
471 /// @brief Persistent request handles for pull.
473 /// @brief Device currently holding push buffers (@ref Unknown if not initialised).
475 /// @brief Device currently holding pull buffers (@ref Unknown if not initialised).
477 /// @brief Number of receive requests in #PushReqVec (the rest are sends).
479 /// @brief Number of receive requests in #PullReqVec.
481 /// @brief Status buffer for push completion.
483 /// @brief Status buffer for pull completion.
485 /// @brief Total bytes sent per push call (for buffer sizing).
486 MPI_Aint pushSendSize;
487 /// @brief Total bytes sent per pull call.
488 MPI_Aint pullSendSize;
489
490 tMPI_intVec pushingSizes; ///< temp: per-peer count for #createMPITypes.
491 tMPI_AintVec pushingDisps; ///< temp: per-peer byte displacements for #createMPITypes.
492 std::vector<index> pushingIndexLocal; ///< for InSituPack strategy
493 std::vector<std::vector<T>> inSituBuffer; ///< for InSituPack strategy
494
495 /*********************************/
496 /* MEMBER */
497 /*********************************/
498
499 /// @brief Copy-assign the transformer state. Persistent requests are
500 /// re-created rather than shared because they point to different
501 /// memory than the source object.
503 {
504 if (this == &R)
505 return *this;
506 // must have commTypeCurrent copied as a result of createMPITypes()
507 commTypeCurrent = R.commTypeCurrent;
508
509 mpi = R.mpi;
511 father = R.father;
512 son = R.son;
513
515
516 // these are shared as results of createMPITypes()
519
520 // ** comm aux info: comm running structures **
521 // PushReqVec;
522 // PullReqVec;
523 // pushDevice;
524 // pullDevice;
525 // nRecvPushReq{-1};
526 // nRecvPullReq{-1};
527 // PushStatVec;
528 // PullStatVec;
529 // pushSendSize;
530 // pullSendSize;
531 // ! check comm aux info status and correctly duplicate them
532 // ! cannot share because point to different data
533 if (R.PullReqVec)
534 this->initPersistentPull();
535 if (R.PushReqVec)
536 this->initPersistentPush();
537
538 // these are createMPITypes() temporaries,
539 // TODO: maybe remove from member?
540 // pushingSizes;
541 // inSituBuffer;
542
543 // comm aux info but created in createMPITypes()
544 // TODO (remove from createMPITypes() maybe?)
545 pushingIndexLocal = R.pushingIndexLocal;
547 return *this;
548 }
549
550 /// @brief Default-construct an empty transformer; attach arrays later via #setFatherSon.
551 ArrayTransformer() = default;
552
553 /// @brief Copy-construct via operator=.
555 {
556 // initial-safe operator= call
557 this->operator=(R);
558 }
559
560 /// @brief Move constructor: transfers all handles (shared_ptrs, MPI
561 /// request holders). Source is left in a valid but uninitialized state.
562 ArrayTransformer(TSelf &&) noexcept = default;
563 TSelf &operator=(TSelf &&) noexcept = default;
564 ~ArrayTransformer() = default;
565
566 /**
567 * @brief Attach father and son arrays. First setup step.
568 *
569 * @details Both arrays must share the same MPI context and element
570 * MPI datatype. The transformer cannot be used until the remaining
571 * setup calls (#createFatherGlobalMapping, #createGhostMapping,
572 * #createMPITypes) have run.
573 *
574 * @param n_father Owned-side array (must not be null).
575 * @param n_son Ghost-side array (must not be null).
576 */
577 void setFatherSon(const t_pArray &n_father, const t_pArray &n_son)
578 {
579 DNDS_check_throw(n_father && n_son);
580 father = n_father;
581 son = n_son;
582 mpi = father->getMPI();
583 DNDS_check_throw_info(son->getMPI() == father->getMPI(), "MPI inconsistent between father & son");
584 DNDS_check_throw_info(father->getDataType() == son->getDataType(), "MPI datatype inconsistent between father & son");
585 DNDS_check_throw_info(father->getTypeMult() == son->getTypeMult(), "MPI datatype multiplication inconsistent between father & son");
586 DNDS_check_throw_info(father->getDataType() != MPI_DATATYPE_NULL, "MPI datatype invalid");
587 DNDS_check_throw_info(father->getTypeMult() > 0, "MPI datatype multiplication invalid");
588 pLGhostMapping.reset();
589 pLGlobalMapping.reset();
590 pLGlobalMapping = father->pLGlobalMapping;
591 }
592
593 /**
594 * @brief Borrow the ghost and global mapping from another transformer.
595 *
596 * @details Intended for the common case where several arrays share the
597 * same partition (e.g., the DOF array and the gradient array both
598 * live on the same cell partitioning). Copies the shared pointers --
599 * no collective work is performed. After this call, #createMPITypes
600 * still needs to be invoked because the element size differs.
601 *
602 * @tparam TRArrayTrans A compatible transformer type; must expose
603 * `father`, `pLGhostMapping`, `pLGlobalMapping`.
604 */
605 template <class TRArrayTrans>
606 void BorrowGGIndexing(const TRArrayTrans &RArrayTrans)
607 {
608 // DNDS_check_throw(father && Rarray.father); // Rarray's father is not visible...
609 // DNDS_check_throw(father->obtainTotalSize() == Rarray.father->obtainTotalSize());
610 DNDS_check_throw(RArrayTrans.father && father);
611 DNDS_check_throw(RArrayTrans.pLGhostMapping && RArrayTrans.pLGlobalMapping);
612 DNDS_check_throw(RArrayTrans.father->Size() == father->Size());
613 pLGhostMapping = RArrayTrans.pLGhostMapping;
614 pLGlobalMapping = RArrayTrans.pLGlobalMapping;
615 father->pLGlobalMapping = RArrayTrans.pLGlobalMapping;
616 }
617
618 /// @brief Collective: build the global offsets table on the father array.
619 /// @details Thin wrapper over `father->createGlobalMapping()` that also
620 /// caches the pointer in this transformer. Second setup step.
622 {
623 father->createGlobalMapping();
624 pLGlobalMapping = father->pLGlobalMapping;
625 }
626
627 /** @brief create ghost by pulling data
628 * @details
629 * pulling data indicates the data put in son (received in pulling operation)
630 * pullingIndexGlobal is the global indices in son
631 * pullingIndexGlobal should be mutually different, otherwise behavior undefined
632 *
633 * @warning pullingIndexGlobal is **sorted and deduplicated in-place** by
634 * OffsetAscendIndexMapping. After this call the input vector's element order
635 * is destroyed. If you need to keep the original order (e.g., for a
636 * redistribution mapping), save a copy before calling this method.
637 * The son array after pullOnce() will contain data in the sorted order
638 * of pullingIndexGlobal, NOT in the original input order.
639 */
640 template <class TPullSet>
641 void createGhostMapping(TPullSet &&pullingIndexGlobal) // collective;
642 {
643 DNDS_check_throw(bool(father) && bool(son));
644 DNDS_check_throw_info(bool(father->pLGlobalMapping), "Father needs to createGlobalMapping");
645 pLGlobalMapping = father->pLGlobalMapping;
646 // phase1.2: count how many to pull and allocate the localGhost mapping, fill the mapping
647 // counting could overflow
648 // tMPI_intVec ghostSizes(mpi.size, 0); // == pulling sizes
649 pLGhostMapping = std::make_shared<OffsetAscendIndexMapping>(
650 (*pLGlobalMapping)(mpi.rank, 0), father->Size(),
651 std::forward<TPullSet>(pullingIndexGlobal),
653 mpi);
654 }
655
656 /**
657 * @brief Create the ghost mapping from a *push* specification. Collective.
658 *
659 * @details Each rank supplies, grouped per receiver, the local indices it
660 * will push to that receiver. Row `i` of this rank's father will be sent
661 * to every rank listed for `i` across the CSR `(pushingIndexLocal, pushStarts)`.
662 * The son array is resized to hold the incoming entries on return from
663 * #createMPITypes.
664 *
665 * @param pushingIndexLocal Flat vector of local indices to push, grouped
666 * by receiver in ascending rank order.
667 * @param pushStarts Prefix-sum offsets into `pushingIndexLocal`,
668 * size `mpi.size + 1`.
669 *
670 * @warning Each local index must appear at most once across the entire
671 * CSR, otherwise the resulting ghost layout is undefined.
672 */
673 template <class TPushSet, class TPushStart>
674 void createGhostMapping(TPushSet &&pushingIndexLocal, TPushStart &&pushStarts) // collective;
675 {
676 DNDS_check_throw(bool(father) && bool(son));
677 DNDS_check_throw_info(bool(father->pLGlobalMapping), "Father needs to createGlobalMapping");
678 pLGlobalMapping = father->pLGlobalMapping;
679 // phase1.2: calculate over pushing
680 // counting could overflow
681 pLGhostMapping = std::make_shared<OffsetAscendIndexMapping>(
682 (*pLGlobalMapping)(mpi.rank, 0), father->Size(),
683 std::forward<TPushSet>(pushingIndexLocal),
684 std::forward<TPushStart>(pushStarts),
686 mpi);
687 }
688 /**
689 * @brief Collective: build the MPI derived datatypes (or in-situ buffers)
690 * that describe the ghost send/recv layout. Resizes the son array.
691 *
692 * @details Fourth (and final) setup step. Consumes the per-rank push
693 * sizes and the ghost mapping produced by #createGhostMapping, then:
694 * - for @ref HIndexed: builds `MPI_Type_create_hindexed` types that
695 * describe the scattered memory layout of the rows being sent and
696 * received;
697 * - for @ref InSituPack: allocates contiguous pack buffers.
698 *
699 * Also resizes `son` to hold exactly the incoming ghost rows.
700 *
701 * @pre `father`, `son`, #pLGlobalMapping, #pLGhostMapping are set.
702 * @post `pPullTypeVec` and `pPushTypeVec` (or the in-situ buffers) are
703 * valid; son has been resized.
704 */
705 void createMPITypes() // collective;
706 {
707 DNDS_check_throw(bool(father) && bool(son));
709 commTypeCurrent = MPI::CommStrategy::Instance().GetArrayStrategy();
710 if (commTypeCurrent == MPI::CommStrategy::HIndexed)
711 father->Compress(); //! assure CSR is in compressed form
712 // TODO: support comm for uncompressed: add in-situ packaging working mode
713 // TODO: support actual MAX arrays' size communicating: append comm types: ? needed?
714 // TODO: add manual packaging mode
715
716 /*********************************************/ // starts to deal with actual byte sizes
717
718 //*phase2.1: build push sizes and push disps
719 index nSend = pLGhostMapping->pushingIndexGlobal.size();
720 pushingSizes.resize(nSend); // pushing sizes xx in bytes xx now in num of remove_all_extents_t<T>
721
722 pushingDisps.resize(nSend); // pushing disps in bytes
723
725 auto fatherDataStart = father->operator[](0);
726 if (commTypeCurrent == MPI::CommStrategy::InSituPack)
727 pushingIndexLocal.resize(nSend);
728 for (index i = 0; i < index(nSend); i++)
729 {
730 MPI_int rank = -1;
731 index loc = -1;
732 bool found = pLGhostMapping->search(pLGhostMapping->pushingIndexGlobal[i], rank, loc);
733 DNDS_check_throw_info(found && rank == -1, "must be at local main"); // must be at local main
734 pushingDisps[i] = (father->operator[](loc) - father->operator[](0)) * sizeof(T); //* in bytes
735 if constexpr (_dataLayout == CSR)
736 pushingSizes[i] = father->RowSizeField(loc) * father->getTypeMult();
737 if constexpr (isTABLE_Max(_dataLayout)) //! init sizes
738 pushingSizes[i] = father->RowSize(loc) * father->getTypeMult();
739 if constexpr (isTABLE_Fixed(_dataLayout))
740 pushingSizes[i] = father->RowSizeField() * father->getTypeMult();
741
742 if (commTypeCurrent == MPI::CommStrategy::InSituPack)
743 pushingIndexLocal[i] = loc;
744 }
745 // PrintVec(pushingSizes, std::cout);
746 // std::cout << std::endl;
747
748 //*phase2.2: be informed of pulled sub-indexer
749 // equals to: building pullingSizes and pullingDisps, bytes size and disps of ghost
750 // - legacy: indexer.buildAsGhostAlltoall(father->indexer, pushingSizes, *pLGhostMapping, mpi); // cascade from father
751 auto do_son_resizing = [&]()
752 {
753 auto &LGhostMapping = *pLGhostMapping;
754 index ghostArraySiz = LGhostMapping.ghostStart[LGhostMapping.ghostStart.size() - 1];
755 DNDS_check_throw(mpi.size == LGhostMapping.ghostStart.size() - 1);
756 if constexpr (_dataLayout == TABLE_StaticFixed)
757 {
758 son->Resize(ghostArraySiz);
759 return;
760 }
761 if constexpr (_dataLayout == TABLE_Fixed)
762 {
763 son->Resize(ghostArraySiz, father->RowSize()); // using father's row size
764 return;
765 }
766 if constexpr (_dataLayout == TABLE_Max)
767 {
768 son->Resize(ghostArraySiz, father->RowSizeMax());
769 // and go on for non-uniform resizing
770 }
771 if constexpr (_dataLayout == TABLE_StaticMax)
772 {
773 son->Resize(ghostArraySiz);
774 // and go on for non-uniform resizing
775 }
776
777 // obtain pulling sizes with pushing sizes
778 tMPI_intVec pullingSizes(ghostArraySiz);
779 MPI_Alltoallv(pushingSizes.data(), LGhostMapping.pushIndexSizes.data(), LGhostMapping.pushIndexStarts.data(), MPI_INT,
780 pullingSizes.data(), LGhostMapping.ghostSizes.data(), LGhostMapping.ghostStart.data(), MPI_INT,
781 mpi.comm);
782
783 // std::cout << LGhostMapping.gStarts().size() << std::endl;
784 if constexpr (_dataLayout == CSR)
785 son->Resize(ghostArraySiz, [&](index i)
786 { return pullingSizes[i] / father->getTypeMult(); });
787 if constexpr (_dataLayout == TABLE_Max)
788 {
789 son->Resize(ghostArraySiz, father->RowSizeMax());
790 for (index i = 0; i < son->Size(); i++)
791 son->ResizeRow(i, pullingSizes[i] / father->getTypeMult());
792 }
793 if constexpr (_dataLayout == TABLE_StaticMax)
794 {
795 son->Resize(ghostArraySiz);
796 for (index i = 0; i < son->Size(); i++)
797 son->ResizeRow(i, pullingSizes[i] / father->getTypeMult());
798 }
799 // is actually pulling disps, but is contiguous anyway
800
801 // DNDS_MPI_InsertCheck(mpi);
802 // std::cout << mpi.rank << " VEC ";
803 // PrintVec(pullingSizes, std::cout);
804 // std::cout << std::endl;
805 // DNDS_MPI_InsertCheck(mpi);
806
807 // note that Rowstart and pullingSizes are in bytes
808 // pullingSizes is actual but Rowstart is before indexModder(), use indexModder[] to invert
809 };
810 do_son_resizing();
811
812 // phase3: create and register MPI types of pushing and pulling
813 if constexpr (isTABLE_Max(_dataLayout)) // convert back to full pushing sizes
814 {
815 for (auto &i : pushingSizes)
816 i = son->RowSizeField() * father->getTypeMult();
817 }
818
819 if (commTypeCurrent == MPI::CommStrategy::HIndexed) // record types
820 {
823 for (MPI_int r = 0; r < mpi.size; r++)
824 {
825 /************************************************************/
826 // push
827 MPI_int pushNumber = pLGhostMapping->pushIndexSizes[r];
828 // std::cout << "PN" << pushNumber << std::endl;
829 MPI_Aint *pPushDisps = pushingDisps.data() + pLGhostMapping->pushIndexStarts[r];
830 MPI_int *pPushSizes = pushingSizes.data() + pLGhostMapping->pushIndexStarts[r];
831 index sumPushSizes = 0; // using upgraded integer
832 for (MPI_int i = 0; i < pushNumber; i++)
833 sumPushSizes += pPushSizes[i];
834 if (sumPushSizes > 0) // if no actuall data is to be sent
835 {
836 // std::cout <<mpi.rank<< " pushSlice " << pPushDisps[0] << outputDelim << pPushSizes[0] << std::endl;
837
838 // if (mpi.rank == 0)
839 // {
840 // std::cout << "pushing to " << r << " size" << pushNumber << "\n";
841 // for (int i = 0; i < pushNumber; i++)
842 // std::cout << "b[" << i << "] = " << pPushSizes[i] << std::endl;
843 // for (int i = 0; i < pushNumber; i++)
844 // std::cout << "d[" << i << "] = " << pPushDisps[i] << std::endl;
845 // }
846 // std::cout << "=== PUSH TYPE : " << mpi.rank << " from " << r << std::endl;
847
848 MPI_Datatype dtype = MPI_DATATYPE_NULL;
849 int sizeof_T = MPI_UNDEFINED;
850 MPI_Type_size(father->getDataType(), &sizeof_T);
851 DNDS_check_throw(sizeof_T != MPI_UNDEFINED);
852 auto [n_number, new_Sizes, new_Disps] =
853 optimize_hindexed_layout(pushNumber, pPushSizes, pPushDisps, sizeof_T);
854 MPI_Type_create_hindexed(n_number, new_Sizes.data(), new_Disps.data(), father->getDataType(), &dtype);
855 // MPI_Type_create_hindexed(PushDispsMPI.size(), PushSizesMPI.data(), PushDispsMPI.data(), father->getDataType(), &dtype);
856
857 MPI_Type_commit(&dtype);
858 pPushTypeVec->push_back(std::make_pair(r, dtype));
859 // OPT: could use MPI_Type_create_hindexed_block to save some space
860 }
861 /************************************************************/
862 // pull
863 std::array<MPI_Aint, 1> pullDisp;
864
865 std::array<MPI_int, 1> pullSizes; // same as pushSizes
866 auto gRPtr = son->operator[](index(pLGhostMapping->ghostStart[r + 1]));
867 auto gLPtr = son->operator[](index(pLGhostMapping->ghostStart[r]));
868 auto gStartPtr = son->operator[](index(0));
869 auto ghostSpan = gRPtr - gLPtr;
870 auto ghostStart = gLPtr - gStartPtr;
871 DNDS_check_throw(ghostSpan < INT_MAX && ghostStart < INT_MAX);
872 pullSizes[0] = MPI_int(ghostSpan) * father->getTypeMult();
873 pullDisp[0] = ghostStart * sizeof(T);
874 if (pullSizes[0] > 0)
875 {
876 // std::cout << "=== PULL TYPE : " << mpi.rank << " from " << r << std::endl;
877 MPI_Datatype dtype = MPI_DATATYPE_NULL;
878
879 MPI_Type_create_hindexed(1, pullSizes.data(), pullDisp.data(), father->getDataType(), &dtype);
880
881 // std::cout << mpi.rank << " pullSlice " << pullDisp[0] << outputDelim << pullBytes[0] << std::endl;
882 MPI_Type_commit(&dtype);
883 pPullTypeVec->push_back(std::make_pair(r, dtype));
884 }
885 }
886 pPullTypeVec->shrink_to_fit();
887 pPushTypeVec->shrink_to_fit();
888
889 pushingDisps.clear();
890 pushingSizes.clear(); // no need
891 }
892 else if (commTypeCurrent == MPI::CommStrategy::CommStrategy::InSituPack)
893 {
894 // could simplify some info on sparse comm?
895 pushingDisps.clear();
896 }
897 else
898 {
899 DNDS_check_throw(false);
900 }
901 }
902 /******************************************************************************************************************************/
903
905 {
906 T *fatherData = nullptr;
907 T *sonData = nullptr;
908 if (B == DeviceBackend::Unknown)
909 {
910 fatherData = father->data();
911 sonData = son->data();
912 }
913 else
914 {
915 DNDS_check_throw(B == father->device());
916 DNDS_check_throw(B == son->device());
917 switch (B)
918 {
920 {
921 fatherData = father->data(B);
922 sonData = son->data(B);
923 }
924 break;
925#ifdef DNDS_USE_CUDA
926 case DeviceBackend::CUDA:
927 {
928 //!
929 DNDS_check_throw_info(MPI::isCudaAware(), "we require CUDA-aware MPI here");
930 fatherData = father->data(B);
931 sonData = son->data(B);
932 }
933 break;
934#endif
935 default:
936 {
937 DNDS_check_throw(false);
938 }
939 }
940 }
941 return std::make_pair(fatherData, sonData);
942 }
943
944 /******************************************************************************************************************************/
945 /**
946 * @brief Initialise persistent, non-blocking, non-buffered MPI requests
947 * for the push direction (son -> father).
948 *
949 * @details Once persistent requests are created, many push cycles may
950 * be run via #startPersistentPush / #waitPersistentPush without
951 * re-posting sends and receives.
952 *
953 * @pre #createMPITypes has been called; #pPullTypeVec and #pPushTypeVec are valid.
954 * @post @ref PushReqVec is populated with `MPI_Send_init` / `MPI_Recv_init` requests.
955 *
956 * @param B Device backend for the send/recv buffers
957 * (`DeviceBackend::Unknown` to use host; requires CUDA-aware
958 * MPI for non-host backends).
959 * @warning After init, the raw data pointers of both father and son
960 * must remain stable until #clearPersistentPush is called.
961 */
963 {
964 if (commTypeCurrent == MPI::CommStrategy::HIndexed)
965 {
966 pushDevice = B;
967 auto [fatherData, sonData] = getFatherSonData(B);
968 // DNDS_check_throw(pPullTypeVec && pPushTypeVec);
969 DNDS_check_throw(pPullTypeVec.use_count() > 0 && pPushTypeVec.use_count() > 0);
970 pushSendSize = 0;
971 auto nReqs = pPullTypeVec->size() + pPushTypeVec->size();
972 // DNDS_check_throw(nReqs > 0);
974 PushReqVec->resize(nReqs, (MPI_REQUEST_NULL)), PushStatVec.resize(nReqs);
975 nRecvPushReq = 0;
976 for (auto ip = 0; ip < pPushTypeVec->size(); ip++)
977 {
978 auto dtypeInfo = (*pPushTypeVec)[ip];
979 MPI_int rankOther = dtypeInfo.first;
980 MPI_int tag = rankOther + mpi.rank;
981 MPI_Recv_init(fatherData, 1, dtypeInfo.second, rankOther, tag, mpi.comm, PushReqVec->data() + pPullTypeVec->size() + ip);
982 // cascade from father
983 nRecvPushReq++;
984 }
985 for (auto ip = 0; ip < pPullTypeVec->size(); ip++)
986 {
987 auto dtypeInfo = (*pPullTypeVec)[ip];
988 MPI_int rankOther = dtypeInfo.first;
989 MPI_int tag = rankOther + mpi.rank;
990
991#ifndef ARRAY_COMM_USE_BUFFERED_SEND
992 // MPI_Ssend_init
993 MPI_Send_init
994#else
995 MPI_Bsend_init
996#endif
997 (sonData, 1, dtypeInfo.second, rankOther, tag, mpi.comm, PushReqVec->data() + ip);
998
999 // cascade from father
1000
1001 // // buffer calculate //!deprecated because of size limit
1002 // MPI_Aint csize;
1003 // MPI_Pack_external_size(1, dtypeInfo.second, mpi.comm, &csize);
1004 // csize += MPI_BSEND_OVERHEAD;
1005 // DNDS_check_throw(MAX_MPI_Aint - pushSendSize >= csize && csize > 0);
1006 // pushSendSize += csize * 2;
1007 }
1008#ifdef ARRAY_COMM_USE_BUFFERED_SEND
1009 // MPIBufferHandler::Instance().claim(pushSendSize, mpi.rank);
1010#endif
1011 }
1012 else if (commTypeCurrent == MPI::CommStrategy::CommStrategy::InSituPack)
1013 {
1014 // could simplify some info on sparse comm?
1016 }
1017 else
1018 {
1019 DNDS_check_throw(false);
1020 }
1021 }
1022 /******************************************************************************************************************************/
1023
1024 /******************************************************************************************************************************/
1025 /**
1026 * @brief Initialise persistent, non-blocking MPI requests for the pull
1027 * direction (father -> son). Counterpart to #initPersistentPush.
1028 *
1029 * @pre #createMPITypes has been called; #pPullTypeVec and #pPushTypeVec are valid.
1030 * @post @ref PullReqVec is populated.
1031 *
1032 * @param B Device backend for the send/recv buffers.
1033 * @warning Raw data pointers for both father and son must remain stable
1034 * until #clearPersistentPull.
1035 */
1037 {
1038 if (commTypeCurrent == MPI::CommStrategy::HIndexed)
1039 {
1040 pullDevice = B;
1041 auto [fatherData, sonData] = getFatherSonData(B);
1042 // DNDS_check_throw(pPullTypeVec && pPushTypeVec);
1043 DNDS_check_throw(pPullTypeVec.use_count() > 0 && pPushTypeVec.use_count() > 0);
1044 auto nReqs = pPullTypeVec->size() + pPushTypeVec->size();
1045 pullSendSize = 0;
1046 // DNDS_check_throw(nReqs > 0);
1048 PullReqVec->resize(nReqs, (MPI_REQUEST_NULL)), PullStatVec.resize(nReqs);
1049 nRecvPullReq = 0;
1050 for (typename decltype(pPullTypeVec)::element_type::size_type ip = 0; ip < pPullTypeVec->size(); ip++)
1051 {
1052 auto dtypeInfo = (*pPullTypeVec)[ip];
1053 MPI_int rankOther = dtypeInfo.first;
1054 MPI_int tag = rankOther + mpi.rank; //! receives a lot of messages, this distinguishes them
1055 // std::cout << mpi.rank << " Recv " << rankOther << std::endl;
1056 MPI_Recv_init(sonData, 1, dtypeInfo.second, rankOther, tag, mpi.comm, PullReqVec->data() + ip);
1057 nRecvPullReq++;
1058 // std::cout << *(real *)(dataGhost.data() + 8 * 0) << std::endl;
1059 // cascade from father
1060 }
1061 for (typename decltype(pPullTypeVec)::element_type::size_type ip = 0; ip < pPushTypeVec->size(); ip++)
1062 {
1063 auto dtypeInfo = (*pPushTypeVec)[ip];
1064 MPI_int rankOther = dtypeInfo.first;
1065 MPI_int tag = rankOther + mpi.rank;
1066 // std::cout << mpi.rank << " Send " << rankOther << std::endl;
1067#ifndef ARRAY_COMM_USE_BUFFERED_SEND
1068 // MPI_Ssend_init
1069 MPI_Send_init
1070#else
1071 MPI_Bsend_init
1072#endif
1073 (fatherData, 1, dtypeInfo.second, rankOther, tag, mpi.comm, PullReqVec->data() + pPullTypeVec->size() + ip);
1074 // std::cout << *(real *)(data.data() + 8 * 1) << std::endl;
1075 // cascade from father
1076
1077 // // buffer calculate //!deprecated because of size limit
1078 // MPI_Aint csize;
1079 // MPI_Pack_external_size(1, dtypeInfo.second, mpi.comm, &csize);
1080 // csize += MPI_BSEND_OVERHEAD * 8;
1081 // DNDS_check_throw(MAX_MPI_Aint - pullSendSize >= csize && csize > 0);
1082 // pullSendSize += csize * 2;
1083 }
1084#ifdef ARRAY_COMM_USE_BUFFERED_SEND
1085 // MPIBufferHandler::Instance().claim(pullSendSize, mpi.rank);
1086#endif
1087 }
1088 else if (commTypeCurrent == MPI::CommStrategy::CommStrategy::InSituPack)
1089 {
1090 // could simplify some info on sparse comm?
1092 }
1093 else
1094 {
1095 DNDS_check_throw(false);
1096 }
1097 }
1098 /******************************************************************************************************************************/
1099
1101 {
1102 if (B != DeviceBackend::Unknown)
1103 DNDS_check_throw_info(false, "in-situ pack not yet implemented for device");
1104 nRecvPushReq = 0;
1105 for (MPI_int r = 0; r < mpi.size; r++)
1106 {
1107 // push
1108 MPI_int pushNumber = pLGhostMapping->pushIndexSizes[r];
1109 // std::cout << "PN" << pushNumber << std::endl;
1110 if (pushNumber > 0)
1111 {
1112 index nPushData{0};
1113 for (index i = 0; i < pushNumber; i++)
1114 {
1115 auto loc = pushingIndexLocal.at(pLGhostMapping->pushIndexStarts[r] + i);
1116 index nPush = 0;
1117 if constexpr (_dataLayout == CSR)
1118 nPush = father->RowSizeField(loc);
1119 if constexpr (isTABLE_Max(_dataLayout)) //! init sizes
1120 nPush = father->RowSize(loc);
1121 if constexpr (isTABLE_Fixed(_dataLayout))
1122 nPush = father->RowSizeField();
1123 nPushData += nPush;
1124 }
1125 inSituBuffer.emplace_back(nPushData);
1126 PushReqVec->emplace_back(MPI_REQUEST_NULL);
1127 MPI_Irecv(inSituBuffer.back().data(), nPushData * father->getTypeMult(), father->getDataType(),
1128 r, mpi.rank + r, mpi.comm, &PushReqVec->back());
1129 nRecvPushReq++;
1130 }
1131 }
1132 for (MPI_int r = 0; r < mpi.size; r++)
1133 {
1134 // pull
1135 MPI_Aint pullDisp = UnInitMPIAint;
1136 MPI_int pullSize = UnInitMPIInt; // same as pushSizes
1137 auto gRPtr = son->operator[](index(pLGhostMapping->ghostStart[r + 1]));
1138 auto gLPtr = son->operator[](index(pLGhostMapping->ghostStart[r]));
1139 auto ghostSpan = gRPtr - gLPtr;
1140 pullSize = MPI_int(ghostSpan);
1141
1142 if (pullSize > 0)
1143 {
1144 PushReqVec->emplace_back(MPI_REQUEST_NULL);
1145 MPI_Issend(gLPtr, pullSize * father->getTypeMult(), father->getDataType(), r, r + mpi.rank, mpi.comm, &PushReqVec->back());
1146 }
1147 }
1148 }
1149
1150 /// @brief Start all persistent push requests (@ref MPI_Startall).
1151 /// @param B Device backend; must match the one passed to #initPersistentPush.
1153 {
1154 if (commTypeCurrent == MPI::CommStrategy::HIndexed)
1155 {
1157 // req already ready
1158 DNDS_check_throw(nRecvPushReq <= PushReqVec->size());
1159 if (!PushReqVec->empty())
1160 {
1161 if (MPI::CommStrategy::Instance().GetUseAsyncOneByOne())
1162 {
1163 }
1164 else
1165 MPI_Startall(PushReqVec->size(), PushReqVec->data());
1166 }
1167 }
1168 else if (commTypeCurrent == MPI::CommStrategy::InSituPack)
1169 {
1171 }
1172 else
1173 {
1174 DNDS_check_throw(false);
1175 }
1177#ifdef ARRAY_COMM_USE_BUFFERED_SEND
1179#endif
1180
1182 }
1183
1185 {
1186 if (B != DeviceBackend::Unknown)
1187 DNDS_check_throw_info(false, "in-situ pack not yet implemented for device");
1188 nRecvPullReq = 0;
1189 for (MPI_int r = 0; r < mpi.size; r++)
1190 {
1191 // pull
1192 MPI_Aint pullDisp = UnInitMPIAint;
1193 MPI_int pullSize = UnInitMPIInt; // same as pushSizes
1194 auto gRPtr = son->operator[](index(pLGhostMapping->ghostStart[r + 1]));
1195 auto gLPtr = son->operator[](index(pLGhostMapping->ghostStart[r]));
1196 auto ghostSpan = gRPtr - gLPtr;
1197 pullSize = MPI_int(ghostSpan);
1198
1199 if (pullSize > 0)
1200 {
1201 PullReqVec->emplace_back(MPI_REQUEST_NULL);
1202 MPI_Irecv(gLPtr, pullSize * father->getTypeMult(), father->getDataType(), r, r + mpi.rank, mpi.comm, &PullReqVec->back());
1203 nRecvPullReq++;
1204 }
1205 }
1206 for (MPI_int r = 0; r < mpi.size; r++)
1207 {
1208 // push
1209 MPI_int pushNumber = pLGhostMapping->pushIndexSizes[r];
1210 // std::cout << "PN" << pushNumber << std::endl;
1211 if (pushNumber > 0)
1212 {
1213 index nPushData{0};
1214 for (index i = 0; i < pushNumber; i++)
1215 {
1216 auto loc = pushingIndexLocal.at(pLGhostMapping->pushIndexStarts[r] + i);
1217 index nPush = 0;
1218 if constexpr (_dataLayout == CSR)
1219 nPush = father->RowSizeField(loc);
1220 if constexpr (isTABLE_Max(_dataLayout)) //! init sizes
1221 nPush = father->RowSize(loc);
1222 if constexpr (isTABLE_Fixed(_dataLayout))
1223 nPush = father->RowSizeField();
1224 nPushData += nPush;
1225 }
1226 inSituBuffer.emplace_back(nPushData);
1227 nPushData = 0;
1228 for (index i = 0; i < pushNumber; i++)
1229 {
1230 auto loc = pushingIndexLocal.at(pLGhostMapping->pushIndexStarts[r] + i);
1231 index nPush = 0;
1232 if constexpr (_dataLayout == CSR)
1233 nPush = father->RowSizeField(loc);
1234 if constexpr (isTABLE_Max(_dataLayout)) //! init sizes
1235 nPush = father->RowSize(loc);
1236 if constexpr (isTABLE_Fixed(_dataLayout))
1237 nPush = father->RowSizeField();
1238 std::copy((*father)[loc], (*father)[loc] + nPush, inSituBuffer.back().begin() + nPushData);
1239 nPushData += nPush;
1240 }
1241 PullReqVec->emplace_back(MPI_REQUEST_NULL);
1242 MPI_Issend(inSituBuffer.back().data(), nPushData * father->getTypeMult(), father->getDataType(),
1243 r, mpi.rank + r, mpi.comm, &PullReqVec->back());
1244 }
1245 }
1246 }
1247
1248 /// @brief Start all persistent pull requests (@ref MPI_Startall).
1249 /// @details After this call the sends/recvs are in flight; call
1250 /// #waitPersistentPull to consume the incoming ghost data.
1251 /// @param B Device backend; must match the one passed to #initPersistentPull.
1253 {
1255 if (commTypeCurrent == MPI::CommStrategy::HIndexed)
1256 {
1258 DNDS_check_throw(nRecvPullReq <= PullReqVec->size());
1259 // req already ready
1260 if (!PullReqVec->empty())
1261 {
1262 if (MPI::CommStrategy::Instance().GetUseAsyncOneByOne())
1263 {
1264 }
1265 else
1266 MPI_Startall(int(PullReqVec->size()), PullReqVec->data());
1267 }
1268 }
1269 else if (commTypeCurrent == MPI::CommStrategy::InSituPack)
1270 {
1272 }
1273 else
1274 {
1275 DNDS_check_throw(false);
1276 }
1277#ifdef ARRAY_COMM_USE_BUFFERED_SEND
1279#endif
1280
1282 }
1283
1284 /// @brief Wait for all outstanding persistent push requests to complete.
1286 {
1287 if (MPI::CommStrategy::Instance().GetUseStrongSyncWait())
1290 PushStatVec.resize(PushReqVec->size());
1291#ifdef ARRAY_COMM_USE_BUFFERED_SEND
1293#endif
1294 if (commTypeCurrent == MPI::CommStrategy::HIndexed)
1295 {
1296 // data alright
1297 if (!PushReqVec->empty())
1298 {
1299 DNDS_check_throw(nRecvPushReq <= PushReqVec->size());
1300 if (MPI::CommStrategy::Instance().GetUseAsyncOneByOne())
1301 {
1302 MPI_Startall(nRecvPushReq, PushReqVec->data());
1303 for (int iReq = nRecvPushReq; iReq < PushReqVec->size(); iReq++)
1304 {
1305 MPI_Start(&PushReqVec->operator[](iReq));
1306 MPI_Wait(&PushReqVec->operator[](iReq), MPI_STATUS_IGNORE);
1307 }
1308 MPI::WaitallAuto(nRecvPushReq, PushReqVec->data(), MPI_STATUSES_IGNORE);
1309 }
1310 else
1311 MPI::WaitallAuto(PushReqVec->size(), PushReqVec->data(), MPI_STATUSES_IGNORE);
1312 }
1313 }
1314 else if (commTypeCurrent == MPI::CommStrategy::InSituPack)
1315 {
1316 if (B != DeviceBackend::Unknown)
1317 DNDS_check_throw_info(false, "in-situ pack not yet implemented for device");
1318 if (!PushReqVec->empty())
1319 MPI::WaitallAuto(PushReqVec->size(), PushReqVec->data(), PushStatVec.data());
1320 auto bufferVec = inSituBuffer.begin();
1321 for (MPI_int r = 0; r < mpi.size; r++)
1322 {
1323 // push
1324 DNDS_check_throw(bufferVec < inSituBuffer.end());
1325 MPI_int pushNumber = pLGhostMapping->pushIndexSizes[r];
1326 // std::cout << "PN" << pushNumber << std::endl;
1327 if (pushNumber > 0)
1328 {
1329 index nPushData = 0;
1330 for (index i = 0; i < pushNumber; i++)
1331 {
1332 auto loc = pushingIndexLocal.at(pLGhostMapping->pushIndexStarts[r] + i);
1333 index nPush = 0;
1334 if constexpr (_dataLayout == CSR)
1335 nPush = father->RowSizeField(loc);
1336 if constexpr (isTABLE_Max(_dataLayout)) //! init sizes
1337 nPush = father->RowSize(loc);
1338 if constexpr (isTABLE_Fixed(_dataLayout))
1339 nPush = father->RowSizeField();
1340 std::copy(bufferVec->begin() + nPushData, bufferVec->begin() + nPushData + nPush, (*father)[loc]);
1341 nPushData += nPush;
1342 }
1343 bufferVec++;
1344 }
1345 }
1346 inSituBuffer.clear();
1347 PushReqVec->clear();
1348 }
1349 else
1350 {
1351 DNDS_check_throw(false);
1352 }
1354 if (MPI::CommStrategy::Instance().GetUseStrongSyncWait())
1356 }
1357 /// @brief Wait for all outstanding persistent pull requests. After this
1358 /// returns, the son array holds fresh ghost data.
1360 {
1362 PullStatVec.resize(PullReqVec->size());
1363
1364#ifdef ARRAY_COMM_USE_BUFFERED_SEND
1366#endif
1367 if (commTypeCurrent == MPI::CommStrategy::HIndexed)
1368 {
1369 // data alright
1370 if (!PullReqVec->empty())
1371 {
1372 DNDS_check_throw(nRecvPullReq <= PullReqVec->size());
1373 if (MPI::CommStrategy::Instance().GetUseAsyncOneByOne())
1374 {
1375 MPI_Startall(nRecvPullReq, PullReqVec->data());
1376 for (int iReq = nRecvPullReq; iReq < PullReqVec->size(); iReq++)
1377 {
1378 MPI_Start(&PullReqVec->operator[](iReq));
1379 MPI_Wait(&PullReqVec->operator[](iReq), MPI_STATUS_IGNORE);
1380 // if (mpi.rank == 0)
1381 // log() << "waited a req" << std::endl;
1382 }
1383 MPI::WaitallAuto(nRecvPullReq, PullReqVec->data(), MPI_STATUSES_IGNORE);
1384 }
1385 else
1386 {
1387 MPI::WaitallAuto(PullReqVec->size(), PullReqVec->data(), MPI_STATUSES_IGNORE);
1388 }
1389 }
1390 }
1391 else if (commTypeCurrent == MPI::CommStrategy::InSituPack)
1392 {
1393 if (B != DeviceBackend::Unknown)
1394 DNDS_check_throw_info(false, "in-situ pack not yet implemented for device");
1395 if (!PullReqVec->empty())
1396 MPI::WaitallAuto(PullReqVec->size(), PullReqVec->data(), PullStatVec.data());
1397 // std::cout << "waiting DONE" << std::endl;
1398 inSituBuffer.clear();
1399 PullReqVec->clear();
1400 }
1401 else
1402 {
1403 DNDS_check_throw(false);
1404 }
1406 }
1407
1408 /// @brief Wait on outstanding push requests then free them.
1409 void clearPersistentPush() // collective;
1410 {
1412 PushReqVec->clear(); // stat vec is left untouched here
1413 }
1414 /// @brief Wait on outstanding pull requests then free them.
1415 void clearPersistentPull() // collective;
1416 {
1418 PullReqVec->clear();
1419 }
1420
1421 /// @brief Release the MPI derived datatypes built by #createMPITypes.
1422 /// Rebuild with another call if you wish to continue using the transformer.
1423 void clearMPITypes() // collective;
1424 {
1425 pPullTypeVec.reset();
1426 pPushTypeVec.reset();
1427 }
1428
1429 /// @brief Drop the shared pointer to the global offsets table.
1430 void clearGlobalMapping() // collective;
1431 {
1432 pLGlobalMapping.reset();
1433 }
1434
1435 /// @brief Drop the ghost mapping (#pLGhostMapping).
1436 void clearGhostMapping() // collective;
1437 {
1438 pLGhostMapping.reset();
1439 }
1440
1441 /// @brief Convenience: init + start + wait + clear a single pull.
1442 /// @details Suitable when ghosts are updated only once (e.g., post-restart);
1443 /// use the persistent API in hot loops.
1444 void pullOnce() // collective;
1445 {
1450 }
1451
1452 /// @brief Convenience: init + start + wait + clear a single push.
1453 void pushOnce() // collective;
1454 {
1459 }
1460
1461 /// @brief Re-initialise persistent requests; useful after rebuilding MPI
1462 /// types but wanting to resume persistent comms. Idempotent w.r.t.
1463 /// whichever direction(s) were previously initialised.
1465 {
1466 bool clearedPull{false}, clearedPush{false};
1467 if (!PullReqVec->empty())
1468 {
1469 clearedPull = true;
1472 }
1473 if (!PushReqVec->empty())
1474 {
1475 clearedPush = true;
1478 }
1479 if (clearedPull)
1481 if (clearedPush)
1483 }
1484 };
1485
1486 /// @brief Type trait computing the ArrayTransformer type for a given Array type.
1487 template <class TArray>
1492
1493 template <class TArray>
1495
1496}
Core 2D variable-length array container with five data layouts.
#define DNDS_INDEX_MAX
Definition Defines.hpp:121
Device memory abstraction layer with backend-specific storage and factory creation.
Assertion / error-handling macros and supporting helper functions.
#define DNDS_assert_info(expr, info)
Debug-only assertion with an extra std::string info message.
Definition Errors.hpp:117
#define DNDS_check_throw_info(expr, info)
Same as DNDS_check_throw but attaches a user-supplied info message to the thrown std::runtime_error.
Definition Errors.hpp:100
#define DNDS_check_throw(expr)
Runtime check active in both debug and release builds. Throws std::runtime_error if expr evaluates to...
Definition Errors.hpp:93
Global-to-local index mapping for distributed arrays.
Wall-clock performance timer and running scalar statistics utilities.
Small utilities for MPI-indexed type layouts (hindexed optimisation).
Non-owning device-callable view of an Array, specialised per DeviceBackend.
static const DataLayout _dataLayout
Ghost-communication engine for a father / son ParArray pair.
DeviceBackend pullDevice
Device currently holding pull buffers (Unknown if not initialised).
ssp< tMPI_typePairVec > pPushTypeVec
Cached (count, MPI_Datatype) pairs for push (son -> father).
tMPI_statVec PullStatVec
Status buffer for pull completion.
MPI_int nRecvPushReq
Number of receive requests in PushReqVec (the rest are sends).
ArrayTransformer()=default
Default-construct an empty transformer; attach arrays later via setFatherSon.
void createGhostMapping(TPushSet &&pushingIndexLocal, TPushStart &&pushStarts)
Create the ghost mapping from a push specification. Collective.
void startPersistentPull(DeviceBackend B=DeviceBackend::Unknown)
Start all persistent pull requests (MPI_Startall).
void clearPersistentPush()
Wait on outstanding push requests then free them.
t_pLGlobalMapping pLGlobalMapping
Shared pointer to the global offsets table (shared with father).
TSelf & operator=(const TSelf &R)
Copy-assign the transformer state. Persistent requests are re-created rather than shared because they...
t_pArray son
The "ghost" side of the father/son pair (receives from other ranks).
t_pLGhostMapping pLGhostMapping
Ghost index mapping (rank-local layout). Populated by createGhostMapping.
void createMPITypes()
Collective: build the MPI derived datatypes (or in-situ buffers) that describe the ghost send/recv la...
void pullOnce()
Convenience: init + start + wait + clear a single pull.
MPIInfo mpi
MPI context; copied from the attached father array.
void setFatherSon(const t_pArray &n_father, const t_pArray &n_son)
Attach father and son arrays. First setup step.
MPI_int nRecvPullReq
Number of receive requests in PullReqVec.
tMPI_statVec PushStatVec
Status buffer for push completion.
void initPersistentPull(DeviceBackend B=DeviceBackend::Unknown)
Initialise persistent, non-blocking MPI requests for the pull direction (father -> son)....
void reInitPersistentPullPush()
Re-initialise persistent requests; useful after rebuilding MPI types but wanting to resume persistent...
void startPersistentPush(DeviceBackend B=DeviceBackend::Unknown)
Start all persistent push requests (MPI_Startall).
void InSituPackStartPull(DeviceBackend B)
void pushOnce()
Convenience: init + start + wait + clear a single push.
MPI_Aint pushSendSize
Total bytes sent per push call (for buffer sizing).
DeviceBackend pushDevice
Device currently holding push buffers (Unknown if not initialised).
tMPI_intVec pushingSizes
temp: per-peer count for createMPITypes.
ssp< MPIReqHolder > PushReqVec
Persistent request handles for push.
void clearPersistentPull()
Wait on outstanding pull requests then free them.
ssp< MPIReqHolder > PullReqVec
Persistent request handles for pull.
MPI_Aint pullSendSize
Total bytes sent per pull call.
void clearGlobalMapping()
Drop the shared pointer to the global offsets table.
void InSituPackStartPush(DeviceBackend B)
static const DataLayout _dataLayout
void clearGhostMapping()
Drop the ghost mapping (pLGhostMapping).
void createGhostMapping(TPullSet &&pullingIndexGlobal)
create ghost by pulling data
std::vector< std::vector< T > > inSituBuffer
for InSituPack strategy
void waitPersistentPush(DeviceBackend B=DeviceBackend::Unknown)
Wait for all outstanding persistent push requests to complete.
void BorrowGGIndexing(const TRArrayTrans &RArrayTrans)
Borrow the ghost and global mapping from another transformer.
auto getFatherSonData(DeviceBackend B)
ssp< tMPI_typePairVec > pPullTypeVec
Cached (count, MPI_Datatype) pairs for pull (father -> son).
void initPersistentPush(DeviceBackend B=DeviceBackend::Unknown)
Initialise persistent, non-blocking, non-buffered MPI requests for the push direction (son -> father)...
t_pArray father
The "owned" side of the father/son pair.
ArrayTransformer(TSelf &&) noexcept=default
Move constructor: transfers all handles (shared_ptrs, MPI request holders). Source is left in a valid...
void clearMPITypes()
Release the MPI derived datatypes built by createMPITypes. Rebuild with another call if you wish to c...
ArrayTransformer(const TSelf &R)
Copy-construct via operator=.
void createFatherGlobalMapping()
Collective: build the global offsets table on the father array.
void waitPersistentPull(DeviceBackend B=DeviceBackend::Unknown)
Wait for all outstanding persistent pull requests. After this returns, the son array holds fresh ghos...
tMPI_AintVec pushingDisps
temp: per-peer byte displacements for createMPITypes.
std::vector< index > pushingIndexLocal
for InSituPack strategy
const T & at(index iRow, rowsize iCol) const
Bounds-checked element read (not device-callable because CSR decompressed uses std::vector::at which ...
DNDS_DEVICE_CALLABLE T * data()
Raw pointer to the start of the flat data buffer.
Core 2D variable-length array container, the storage foundation of DNDSR.
Definition Array.hpp:97
index _size
Definition Array.hpp:143
t_pRowStart _pRowStart
Definition Array.hpp:136
void WriteSerializer(Serializer::SerializerBaseSSP serializerP, const std::string &name, Serializer::ArrayGlobalOffset offset, Serializer::ArrayGlobalOffset dataOffset=Serializer::ArrayGlobalOffset_Unknown)
Serialize (write) array data to a serializer.
Definition Array.hpp:946
bool IfCompressed() const
(CSR only) Whether the array is in packed / flat form.
Definition Array.hpp:294
ArrayDeviceView< B, T, _row_size, _row_max, _align > t_deviceView
Definition Array.hpp:1247
Array()=default
Default-constructed array: empty, no storage.
ssp< t_RowSizes > t_pRowSizes
Definition Array.hpp:133
void Compress()
Layout-polymorphic compress: no-op for non-CSR, calls CSRCompress for CSR.
Definition Array.hpp:355
index Size() const
Number of rows currently stored. O(1).
Definition Array.hpp:171
rowsize RowSizeField() const
"Logical" row-field width used by derived (Eigen) arrays: max for padded layouts, uniform width for f...
Definition Array.hpp:255
iterator< B > end()
Iterator one past the last row, viewed on device backend B.
Definition Array.hpp:1341
void ReadSerializer(Serializer::SerializerBaseSSP serializerP, const std::string &name, Serializer::ArrayGlobalOffset &offset)
Convenience overload that discards the dataOffset output.
Definition Array.hpp:999
void unclaim(MPI_int cs)
Release cs previously-claim ed bytes (only updates accounting; does not shrink the buffer).
Definition MPI.hpp:663
static MPIBufferHandler & Instance()
Access the process-wide singleton.
Definition MPI.cpp:106
void claim(MPI_Aint cs, int reportRank=0)
Reserve cs additional bytes, growing and re-attaching the MPI buffer if needed. reportRank is only us...
Definition MPI.hpp:640
static CommStrategy & Instance()
Access the process-wide singleton.
Definition MPI.cpp:434
ArrayCommType GetArrayStrategy()
Current array-pack strategy.
Definition MPI.cpp:440
ArrayCommType
Which derived-type strategy ArrayTransformer should use.
Definition MPI.hpp:784
@ InSituPack
Manually pack / unpack into contiguous buffers.
Definition MPI.hpp:787
@ UnknownArrayCommType
Sentinel / uninitialised.
Definition MPI.hpp:785
@ HIndexed
Use MPI_Type_create_hindexed derived types (default).
Definition MPI.hpp:786
void setObjectName(const std::string &name)
Definition Defines.hpp:237
MPI-aware Array: adds a communicator, rank, and global index mapping.
void setDataType(MPI_Datatype n_dType, MPI_int n_TypeMult)
Override the deduced MPI datatype and element multiplier (advanced; needed for custom compound elemen...
ParArray()=default
Default-construct an uninitialised ParArray; call setMPI and Resize later.
t_self & operator=(const t_self &R)=default
void setMPI(const MPIInfo &n_mpi)
Install the MPI context after default construction.
typename TArray::t_pRowSizes t_pRowSizes
void clone(const t_self &R)
Copy-assign from another ParArray. Shallow copy semantics (mirrors Arrayclone): shares structural/dat...
static const DataLayout _dataLayout
MPI_int getTypeMult()
Per-element count multiplier that goes with getDataType.
void AssertDataType()
Assert the MPI datatype matches sizeof(T) exactly.
index globalSize() const
Returns the total global size (sum of sizes across all ranks).
bool AssertConsistent()
Check array consistency across all ranks.
const MPIInfo & getMPI() const
Read-only MPI context accessor.
MPI_Datatype getDataType()
MPI element datatype used for ghost exchange (deduced from T).
t_pLGlobalMapping pLGlobalMapping
Shared pointer to the global-offsets table. Populated by createGlobalMapping; may be pointed at an ex...
void WriteSerializer(Serializer::SerializerBaseSSP serializerP, const std::string &name, Serializer::ArrayGlobalOffset offset)
Serialize (write) the parallel array with MPI-aware metadata.
void createGlobalMapping()
Collective: build the global offsets table.
ParArray(const t_self &R)=default
ParArray(ObjName objName, Args &&...args)
Named constructor: sets the object name for tracing/debugging. All existing constructor overloads are...
MPIInfo mpi
MPI context associated with this array (must be set before collectives).
ParArray(MPI_Datatype n_dType, MPI_int n_TypeMult, const MPIInfo &n_mpi)
Construct with a custom (MPI datatype, multiplier) pair.
ParArray(const MPIInfo &n_mpi)
Construct a ParArray bound to the given MPI context.
ParArray(t_self &&) noexcept=default
MPIInfo & getMPI()
Mutable MPI context accessor.
void ReadSerializer(Serializer::SerializerBaseSSP serializerP, const std::string &name, Serializer::ArrayGlobalOffset &offset)
Deserialize (read) the parallel array with MPI-aware metadata.
@ Comm
Catch-all MPI comm.
Definition Profiling.hpp:37
static PerformanceTimer & Instance()
Access the process-wide singleton.
Definition Profiling.cpp:9
void StartTimer(TimerType t)
Record the current wall time in the "start" slot for timer t.
Definition Profiling.cpp:15
void StopTimer(TimerType t)
Add (now - start) to the accumulated time for timer t.
Definition Profiling.cpp:25
Describes one rank's window into a globally-distributed dataset.
bool isDist() const
Whether this descriptor carries a real distributed offset (rather than a sentinel like Offset_Parts).
MPI_int Allgather(const void *sendbuf, MPI_int sendcount, MPI_Datatype sendtype, void *recvbuf, MPI_int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
Wrapper over MPI_Allgather.
Definition MPI.cpp:229
MPI_int WaitallAuto(MPI_int count, MPI_Request *reqs, MPI_Status *statuses)
Wait on an array of requests, choosing between MPI_Waitall and the lazy-poll variant based on CommStr...
Definition MPI.cpp:282
MPI_int Scan(const void *sendbuf, void *recvbuf, MPI_int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
Wrapper over MPI_Scan (inclusive prefix reduction).
Definition MPI.cpp:219
MPI_int Allreduce(const void *sendbuf, void *recvbuf, MPI_int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
Wrapper over MPI_Allreduce.
Definition MPI.cpp:202
MPI_int Barrier(MPI_Comm comm)
Wrapper over MPI_Barrier.
Definition MPI.cpp:247
bool isCudaAware()
Runtime probe: is the current MPI implementation configured with CUDA-aware support?...
Definition MPI.cpp:297
ssp< SerializerBase > SerializerBaseSSP
the host side operators are provided as implemented
constexpr MPI_Aint UnInitMPIAint
Sentinel "not initialised" MPI_Aint value (= -1).
Definition MPI.hpp:68
ssp< GlobalOffsetsMapping > t_pLGlobalMapping
Shared pointer to a GlobalOffsetsMapping (globally replicated).
std::vector< rowsize > t_RowsizeVec
Vector of row widths (one rowsize per row).
Definition Defines.hpp:164
ssp< OffsetAscendIndexMapping > t_pLGhostMapping
Shared pointer to an OffsetAscendIndexMapping (per-rank ghost layout).
const MPI_Datatype DNDS_MPI_INDEX
MPI datatype matching index (= MPI_INT64_T).
Definition MPI.hpp:106
constexpr bool isTABLE_Fixed(DataLayout lo)
Whether the layout has a uniform row width (no per-row size needed).
DeviceBackend
Enumerates the backends a DeviceStorage / Array can live on.
@ Unknown
Unset / sentinel.
@ Host
Plain CPU memory.
tMPI_indexVec tMPI_AintVec
Alias for tMPI_indexVec to match MPI_Aint terminology.
Definition MPI.hpp:74
int32_t rowsize
Row-width / per-row element-count type (signed 32-bit).
Definition Defines.hpp:114
DataLayout
Enumeration of the five concrete data layouts supported by Array.
@ TABLE_StaticFixed
Fixed row width, known at compile time.
@ TABLE_Max
Padded variable rows; max width set at runtime.
@ CSR
Compressed Sparse Row (flat buffer + row-start index).
@ TABLE_StaticMax
Padded variable rows; max width fixed at compile time.
@ TABLE_Fixed
Fixed row width, set at runtime (uniform across rows).
constexpr MPI_int UnInitMPIInt
Sentinel "not initialised" MPI_int value (= -1, invalid rank).
Definition MPI.hpp:66
std::pair< index, index > EvenSplitRange(int rank, int nRanks, index nGlobal)
Split a global range [0, nGlobal) evenly among nRanks workers.
Definition Defines.hpp:134
constexpr bool isTABLE_Max(DataLayout lo)
Whether the layout is a padded-max variant (uses _pRowSizes).
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:112
tMPI_sizeVec tMPI_intVec
Alias for tMPI_sizeVec; used where the name "int vec" reads better.
Definition MPI.hpp:70
std::shared_ptr< T > ssp
Shortened alias for std::shared_ptr used pervasively in DNDSR.
Definition Defines.hpp:143
auto optimize_hindexed_layout(index o_size, TBlkSiz *blk_sizes, TDisp *disps, TSizeof sizeofElem)
Coalesce contiguous blocks in an MPI_Type_create_hindexed layout.
std::vector< MPI_Status > tMPI_statVec
Vector of MPI_Status, for MPI_Waitall / MPI_Testall.
Definition MPI.hpp:77
typename ArrayTransformerType< TArray >::Type ArrayTransformerType_t
int MPI_int
MPI counterpart type for MPI_int (= C int). Used for counts and ranks in MPI calls.
Definition MPI.hpp:54
Type trait computing the ArrayTransformer type for a given Array type.
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:231
int size
Number of ranks in comm (-1 until initialised).
Definition MPI.hpp:237
int rank
This rank's 0-based index within comm (-1 until initialised).
Definition MPI.hpp:235
MPI_Comm comm
The underlying MPI communicator handle.
Definition MPI.hpp:233
static ssp< MPIReqHolder > create(Args &&...args)
Only public path to construct an instance.
Definition MPI.hpp:441
static ssp< MPITypePairHolder > create(Args &&...args)
Only public path to construct an instance; forwards to the private constructor.
Definition MPI.hpp:381
Tag type for naming objects created via make_ssp.
Definition Defines.hpp:254
tVec r(NCells)