DNDSR 0.2.1
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
PermutationTransfer.hpp
Go to the documentation of this file.
1#pragma once
2/// @file PermutationTransfer.hpp
3/// @brief Utility for distributed or local row permutation/transfer of arrays.
4///
5/// Encapsulates the common pattern: given a partition assignment (or forward map)
6/// for a set of entities, compute new global indices and transfer array rows to
7/// their target ranks. Supports both distributed (MPI push) and local-only
8/// (in-place permutation) paths.
9
11#include "DNDS/ArrayPair.hpp"
12#include "DNDS/MPI.hpp"
13
14namespace DNDS
15{
16 /// Encapsulates a distributed or local permutation of array rows.
17 ///
18 /// Given a per-slot target rank assignment, computes:
19 /// - New global indices (prefix-sum across ranks grouped by target)
20 /// - Push CSR indices for MPI communication
21 /// - Local permutation vector (when all rows stay on the same rank)
22 ///
23 /// Then provides `transferRows` to actually move/permute the data,
24 /// and `buildLookup` to create a ghost-pullable old->new global map.
26 {
27 /// Per father slot: target rank after reorder.
28 std::vector<MPI_int> targetRanks;
29
30 /// New global index for each father slot (same size as targetRanks).
31 std::vector<index> newGlobalIndices;
32
33 /// Push CSR: pushIndex[pushStart[r]..pushStart[r+1]) are the local
34 /// father indices that go to rank r.
35 std::vector<index> pushIndex;
36 std::vector<index> pushStart; // size = nRanks + 1
37
38 /// Local permutation: localOld2New[i] = new local index for old local i.
39 /// Only valid when isLocalOnly == true. Empty otherwise.
40 std::vector<index> localOld2New;
41
42 /// Whether this is a pure rank-local permutation (no cross-rank traffic).
43 bool isLocalOnly{false};
44
45 /// New global offsets: newGlobalOffsets[r] = first global index owned by
46 /// rank r after reorder. Size = nRanks + 1.
47 std::vector<index> newGlobalOffsets;
48
49 // =============================================================
50 // Factory methods
51 // =============================================================
52
53 /// Build from partition assignment (target ranks only).
54 /// New global indices are computed automatically via prefix-sum.
55 ///
56 /// @param partition Per-slot target rank. Size == father size.
57 /// @param oldGlobalMapping Current global offsets mapping for this entity.
58 /// @param mpi MPI communicator.
59 /// @warning Collective.
61 const std::vector<MPI_int> &partition,
62 const ssp<GlobalOffsetsMapping> &oldGlobalMapping,
63 const MPIInfo &mpi);
64
65 /// Build from a local-only permutation vector.
66 /// All entities stay on the same rank. targetRanks all == mpi.rank.
67 ///
68 /// @param old2new Local permutation: old local index -> new local index.
69 /// Must be a valid permutation of [0, N).
70 /// @param oldGlobalMapping Current global offsets mapping.
71 /// @param mpi MPI communicator (used only for rank/size, no communication).
72 /// @note Non-collective: performs zero MPI communication.
74 const std::vector<index> &old2new,
75 const ssp<GlobalOffsetsMapping> &oldGlobalMapping,
76 const MPIInfo &mpi);
77
78 // =============================================================
79 // Core operations
80 // =============================================================
81
82 /// Transfer (or permute) rows of an ArrayPair.
83 ///
84 /// - Local-only: in-place row permutation via PermuteRows.
85 /// - Distributed: father=old, son=new ArrayTransformer push trick.
86 ///
87 /// After return, pair.father contains the new data.
88 /// pair.son is reset (stale after distributed transfer).
89 ///
90 /// @warning Collective (when !isLocalOnly).
91 template <class TPair>
92 void transferRows(TPair &pair, const MPIInfo &mpi) const;
93
94 /// Result of buildLookup: ghost-pullable old-global -> new-global map.
96 {
97 ArrayAdjacencyPair<1> pair; // pair(localSlot, 0) = newGlobalIndices[localSlot]
98 // Ghost-pulled for off-rank entries.
99
100 /// Resolve an old global index to its new global index.
101 /// The old global must be in the local father or ghost (son) range.
102 [[nodiscard]] index resolve(index oldGlobal) const
103 {
104 DNDS_assert(pair.trans.pLGhostMapping);
105 if (oldGlobal == UnInitIndex)
106 return UnInitIndex;
107 MPI_int rank = UnInitMPIInt;
108 index val = UnInitIndex;
109 bool found = pair.trans.pLGhostMapping->search_indexAppend(
110 oldGlobal, rank, val);
111 DNDS_assert_info(found,
112 fmt::format("LookupResult::resolve: old global {} not found", oldGlobal));
113 return pair(val, 0);
114 }
115 };
116
117 /// Build a ghost-pullable lookup array for old->new global conversion.
118 ///
119 /// @param pullSet Sorted, deduplicated set of off-rank old globals that
120 /// need to be resolvable. Typically collected from adj
121 /// entries pointing to this entity kind.
122 /// @param mpi MPI communicator.
123 /// @return LookupResult with resolve() method.
124 /// @warning Collective.
125 [[nodiscard]] LookupResult buildLookup(
126 const std::vector<index> &pullSet,
127 const MPIInfo &mpi) const;
128
129 // =============================================================
130 // Queries
131 // =============================================================
132
133 /// Number of entities (father slots) in this transfer.
134 [[nodiscard]] index size() const { return static_cast<index>(targetRanks.size()); }
135 };
136
137 // =====================================================================
138 // Implementation: fromPartition
139 // =====================================================================
140
142 const std::vector<MPI_int> &partition,
143 const ssp<GlobalOffsetsMapping> &oldGlobalMapping,
144 const MPIInfo &mpi)
145 {
146 DNDS_assert(oldGlobalMapping);
148 pt.targetRanks = partition;
149 const index nLocal = static_cast<index>(partition.size());
150
151 // --- Push CSR ---
152 std::vector<index> pushSizes(mpi.size, 0);
153 for (auto r : partition)
154 {
155 DNDS_assert(r >= 0 && r < mpi.size);
156 pushSizes[r]++;
157 }
158 AccumulateRowSize(pushSizes, pt.pushStart);
159 pt.pushIndex.resize(pt.pushStart[mpi.size]);
160 pushSizes.assign(mpi.size, 0);
161 for (index i = 0; i < nLocal; i++)
162 pt.pushIndex[pt.pushStart[partition[i]] + (pushSizes[partition[i]]++)] = i;
163
164 // --- New global indices (prefix-sum grouped by target rank) ---
165 // Count how many entities each rank sends to each target:
166 std::vector<index> localCounts(mpi.size, 0);
167 for (auto r : partition)
168 localCounts[r]++;
169
170 // Total entities per target rank (across all senders):
171 std::vector<index> totalCounts(mpi.size);
172 MPI_Allreduce(localCounts.data(), totalCounts.data(), mpi.size,
173 DNDS_MPI_INDEX, MPI_SUM, mpi.comm);
174
175 // Global offsets per target rank:
176 pt.newGlobalOffsets.resize(mpi.size + 1);
177 pt.newGlobalOffsets[0] = 0;
178 for (int r = 0; r < mpi.size; r++)
179 pt.newGlobalOffsets[r + 1] = pt.newGlobalOffsets[r] + totalCounts[r];
180
181 // Exclusive prefix per target rank (how many entities before mine):
182 std::vector<index> prevCounts(mpi.size);
183 MPI_Scan(localCounts.data(), prevCounts.data(), mpi.size,
184 DNDS_MPI_INDEX, MPI_SUM, mpi.comm);
185 // MPI_Scan is inclusive, convert to exclusive:
186 for (int r = 0; r < mpi.size; r++)
187 prevCounts[r] -= localCounts[r];
188
189 // Assign new globals: within each target rank bucket, entities are
190 // ordered by (sender rank, local slot within sender).
191 pt.newGlobalIndices.resize(nLocal);
192 std::vector<index> fillCounters(mpi.size, 0);
193 for (index i = 0; i < nLocal; i++)
194 {
195 MPI_int target = partition[i];
196 pt.newGlobalIndices[i] =
197 pt.newGlobalOffsets[target] + prevCounts[target] + fillCounters[target];
198 fillCounters[target]++;
199 }
200
201 // --- Detect local-only ---
202 int localFlag = 1;
203 for (auto r : partition)
204 if (r != mpi.rank)
205 {
206 localFlag = 0;
207 break;
208 }
209 int globalFlag = 0;
210 MPI_Allreduce(&localFlag, &globalFlag, 1, MPI_INT, MPI_LAND, mpi.comm);
211 pt.isLocalOnly = (globalFlag != 0);
212
213 // --- Build local permutation if local-only ---
214 if (pt.isLocalOnly)
215 {
216 index myOffset = pt.newGlobalOffsets[mpi.rank];
217 DNDS_assert(myOffset == (*oldGlobalMapping)(mpi.rank, 0));
218 pt.localOld2New.resize(nLocal);
219 for (index i = 0; i < nLocal; i++)
220 pt.localOld2New[i] = pt.newGlobalIndices[i] - myOffset;
221 }
222
223 return pt;
224 }
225
226 // =====================================================================
227 // Implementation: fromLocalPermutation
228 // =====================================================================
229
231 const std::vector<index> &old2new,
232 const ssp<GlobalOffsetsMapping> &oldGlobalMapping,
233 const MPIInfo &mpi)
234 {
235 DNDS_assert(oldGlobalMapping);
237 const index nLocal = static_cast<index>(old2new.size());
238
239#ifndef NDEBUG
240 // Validate that old2new is a valid permutation of [0, nLocal)
241 std::vector<bool> seen(nLocal, false);
242 for (index i = 0; i < nLocal; i++)
243 {
244 DNDS_assert_info(old2new[i] >= 0 && old2new[i] < nLocal,
245 fmt::format("fromLocalPermutation: old2new[{}] = {} out of [0, {})",
246 i, old2new[i], nLocal));
247 DNDS_assert_info(!seen[old2new[i]],
248 fmt::format("fromLocalPermutation: duplicate mapping to {}",
249 old2new[i]));
250 seen[old2new[i]] = true;
251 }
252#endif
253
254 pt.isLocalOnly = true;
255 pt.localOld2New = old2new;
256 pt.targetRanks.assign(nLocal, mpi.rank);
257
258 index myOffset = (*oldGlobalMapping)(mpi.rank, 0);
259 pt.newGlobalIndices.resize(nLocal);
260 for (index i = 0; i < nLocal; i++)
261 pt.newGlobalIndices[i] = myOffset + old2new[i];
262
263 // Push CSR (trivial: all go to self)
264 pt.pushStart.assign(mpi.size + 1, 0);
265 pt.pushStart[mpi.rank + 1] = nLocal;
266 for (int r = mpi.rank + 2; r <= mpi.size; r++)
267 pt.pushStart[r] = nLocal;
268 pt.pushIndex.resize(nLocal);
269 std::iota(pt.pushIndex.begin(), pt.pushIndex.end(), index{0});
270
271 // Global offsets unchanged
272 pt.newGlobalOffsets.resize(mpi.size + 1);
273 for (int r = 0; r < mpi.size; r++)
274 pt.newGlobalOffsets[r] = (*oldGlobalMapping)(r, 0);
275 pt.newGlobalOffsets[mpi.size] = oldGlobalMapping->globalSize();
276
277 return pt;
278 }
279
280 // =====================================================================
281 // Implementation: transferRows
282 // =====================================================================
283
284 template <class TPair>
285 void PermutationTransfer::transferRows(TPair &pair, const MPIInfo &mpi) const
286 {
287 DNDS_assert(pair.father);
288 DNDS_assert(pair.father->Size() == size());
289
290 if (isLocalOnly)
291 {
292 // In-place permutation
293 DNDS_assert_info(static_cast<index>(localOld2New.size()) == size(),
294 "transferRows: localOld2New size mismatch");
295 using TArr = typename decltype(pair.father)::element_type;
296 auto tmp = std::make_shared<TArr>(*pair.father); // deep copy
297#ifndef NDEBUG
298 for (index i = 0; i < size(); i++)
300 fmt::format("transferRows: localOld2New[{}] = {} out of [0, {})",
301 i, localOld2New[i], size()));
302#endif
303 if constexpr (TPair::IsCSR())
304 pair.father->Decompress();
305 for (index i = 0; i < size(); i++)
306 {
307 index iNew = localOld2New[i];
308 if constexpr (TPair::IsCSR())
309 pair.father->ResizeRow(iNew, tmp->RowSize(i));
310 for (rowsize j = 0; j < tmp->RowSize(i); j++)
311 pair.father->operator()(iNew, j) = (*tmp)(i, j);
312 }
313 if constexpr (TPair::IsCSR())
314 pair.father->Compress();
315 }
316 else
317 {
318 // Distributed: father=old, son=new ArrayTransformer push
319 using TArr = typename decltype(pair.father)::element_type;
320 auto oldFather = pair.father;
321 pair.father = make_ssp<TArr>(ObjName{"PermTransfer.new"}, mpi);
322
324 trans.setFatherSon(oldFather, pair.father);
325 trans.createFatherGlobalMapping();
326 trans.createGhostMapping(pushIndex, pushStart);
327 trans.createMPITypes();
328 trans.pullOnce();
329 // pair.father is now the new array with redistributed data.
330 // Rebuild its global mapping so the result is self-contained.
331 pair.father->createGlobalMapping();
332 }
333
334 // Son is stale after either path — reset it
335 if (pair.son)
336 pair.son.reset();
337 }
338
339 // =====================================================================
340 // Implementation: buildLookup
341 // =====================================================================
342
344 const std::vector<index> &pullSet,
345 const MPIInfo &mpi) const
346 {
348 result.pair.InitPair("PermTransfer_lookup", mpi);
349 result.pair.father->Resize(size());
350 for (index i = 0; i < size(); i++)
351 result.pair(i, 0) = newGlobalIndices[i];
352
353 result.pair.TransAttach();
354 result.pair.trans.createFatherGlobalMapping();
355 std::vector<index> pullSetMut(pullSet); // mutable copy for createGhostMapping
356 result.pair.trans.createGhostMapping(pullSetMut);
357 result.pair.trans.createMPITypes();
358 result.pair.trans.pullOnce();
359
360 return result;
361 }
362
363} // namespace DNDS
Father-son array pairs with device views and ghost communication.
ParArray (MPI-aware array) and ArrayTransformer (ghost/halo communication).
#define DNDS_assert_info(expr, info)
Debug-only assertion with an extra std::string info message.
Definition Errors.hpp:117
#define DNDS_assert(expr)
Debug-only assertion (compiled out when DNDS_NDEBUG is defined). Prints the expression + file/line + ...
Definition Errors.hpp:112
MPI wrappers: MPIInfo, collective operations, type mapping, CommStrategy.
Ghost-communication engine for a father / son ParArray pair.
void setFatherSon(const t_pArray &n_father, const t_pArray &n_son)
Attach father and son arrays. First setup step.
the host side operators are provided as implemented
const MPI_Datatype DNDS_MPI_INDEX
MPI datatype matching index (= MPI_INT64_T).
Definition MPI.hpp:106
DNDS_CONSTANT const index UnInitIndex
Sentinel "not initialised" index value (= INT64_MIN).
Definition Defines.hpp:181
int32_t rowsize
Row-width / per-row element-count type (signed 32-bit).
Definition Defines.hpp:114
void AccumulateRowSize(const TtRowsizeVec &rowsizes, TtIndexVec &rowstarts)
Build a prefix-sum table from a row-size vector.
Definition Defines.hpp:457
constexpr MPI_int UnInitMPIInt
Sentinel "not initialised" MPI_int value (= -1, invalid rank).
Definition MPI.hpp:66
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:112
std::shared_ptr< T > ssp
Shortened alias for std::shared_ptr used pervasively in DNDSR.
Definition Defines.hpp:143
int MPI_int
MPI counterpart type for MPI_int (= C int). Used for counts and ranks in MPI calls.
Definition MPI.hpp:54
Convenience bundle of a father, son, and attached ArrayTransformer.
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:231
Tag type for naming objects created via make_ssp.
Definition Defines.hpp:254
Result of buildLookup: ghost-pullable old-global -> new-global map.
LookupResult buildLookup(const std::vector< index > &pullSet, const MPIInfo &mpi) const
std::vector< index > newGlobalIndices
New global index for each father slot (same size as targetRanks).
index size() const
Number of entities (father slots) in this transfer.
std::vector< index > localOld2New
static PermutationTransfer fromPartition(const std::vector< MPI_int > &partition, const ssp< GlobalOffsetsMapping > &oldGlobalMapping, const MPIInfo &mpi)
std::vector< MPI_int > targetRanks
Per father slot: target rank after reorder.
std::vector< index > newGlobalOffsets
void transferRows(TPair &pair, const MPIInfo &mpi) const
static PermutationTransfer fromLocalPermutation(const std::vector< index > &old2new, const ssp< GlobalOffsetsMapping > &oldGlobalMapping, const MPIInfo &mpi)
bool isLocalOnly
Whether this is a pure rank-local permutation (no cross-rank traffic).
ArrayTransformer< DNDS::real, DynamicSize > trans
constexpr DNDS::index nLocal
tVec r(NCells)
auto result