72 const std::vector<index> &readOrigIndex,
73 const std::vector<index> &newOrigIndex,
90 const int nRanks = mpi.size;
91 index nGlobal = readGlobalMapping->globalSize();
93 auto directoryRank = [&](
index origIdx) ->
int
95 if (nGlobal == 0)
return 0;
96 return static_cast<int>(std::min(
index(nRanks - 1), origIdx *
index(nRanks) / nGlobal));
103 std::vector<int> sendCounts(nRanks, 0);
104 for (
index i = 0; i <
index(readOrigIndex.size()); i++)
106 int dr = directoryRank(readOrigIndex[i]);
110 std::vector<int> sendDisps(nRanks + 1, 0);
111 std::partial_sum(sendCounts.begin(), sendCounts.end(), sendDisps.begin() + 1);
114 std::vector<index> sendBuf(
index(sendDisps[nRanks]) * 2);
115 std::vector<int> sendPos(nRanks, 0);
116 for (
index i = 0; i <
index(readOrigIndex.size()); i++)
118 int dr = directoryRank(readOrigIndex[i]);
119 index pos = sendDisps[dr] + sendPos[dr];
120 sendBuf[pos * 2] = readOrigIndex[i];
121 sendBuf[pos * 2 + 1] = readGlobalMapping->operator()(mpi.rank, i);
126 std::vector<int> recvCounts(nRanks, 0);
127 MPI_Alltoall(sendCounts.data(), 1, MPI_INT, recvCounts.data(), 1, MPI_INT, mpi.comm);
129 std::vector<int> recvDisps(nRanks + 1, 0);
130 std::partial_sum(recvCounts.begin(), recvCounts.end(), recvDisps.begin() + 1);
134 std::vector<int> sendCounts2(nRanks), sendDisps2(nRanks);
135 std::vector<int> recvCounts2(nRanks), recvDisps2(nRanks);
136 for (
int r = 0;
r < nRanks;
r++)
138 sendCounts2[
r] = sendCounts[
r] * 2;
139 sendDisps2[
r] = sendDisps[
r] * 2;
140 recvCounts2[
r] = recvCounts[
r] * 2;
141 recvDisps2[
r] = recvDisps[
r] * 2;
144 std::vector<index> recvBuf(
index(recvDisps[nRanks]) * 2);
145 MPI_Alltoallv(sendBuf.data(), sendCounts2.data(), sendDisps2.data(),
DNDS_MPI_INDEX,
146 recvBuf.data(), recvCounts2.data(), recvDisps2.data(),
DNDS_MPI_INDEX,
150 std::unordered_map<index, index> directoryMap;
151 directoryMap.reserve(recvDisps[nRanks]);
152 for (
index i = 0; i < recvDisps[nRanks]; i++)
154 directoryMap[recvBuf[i * 2]] = recvBuf[i * 2 + 1];
159 std::vector<int> querySendCounts(nRanks, 0);
160 for (
index i = 0; i <
index(newOrigIndex.size()); i++)
162 int dr = directoryRank(newOrigIndex[i]);
163 querySendCounts[dr]++;
166 std::vector<int> querySendDisps(nRanks + 1, 0);
167 std::partial_sum(querySendCounts.begin(), querySendCounts.end(), querySendDisps.begin() + 1);
170 std::vector<index> querySendBuf(querySendDisps[nRanks]);
172 std::vector<index> queryOrderMap(querySendDisps[nRanks]);
173 std::vector<int> queryPos(nRanks, 0);
174 for (
index i = 0; i <
index(newOrigIndex.size()); i++)
176 int dr = directoryRank(newOrigIndex[i]);
177 index pos = querySendDisps[dr] + queryPos[dr];
178 querySendBuf[pos] = newOrigIndex[i];
179 queryOrderMap[pos] = i;
184 std::vector<int> queryRecvCounts(nRanks, 0);
185 MPI_Alltoall(querySendCounts.data(), 1, MPI_INT, queryRecvCounts.data(), 1, MPI_INT, mpi.comm);
187 std::vector<int> queryRecvDisps(nRanks + 1, 0);
188 std::partial_sum(queryRecvCounts.begin(), queryRecvCounts.end(), queryRecvDisps.begin() + 1);
191 std::vector<index> queryRecvBuf(queryRecvDisps[nRanks]);
192 MPI_Alltoallv(querySendBuf.data(), querySendCounts.data(), querySendDisps.data(),
DNDS_MPI_INDEX,
193 queryRecvBuf.data(), queryRecvCounts.data(), queryRecvDisps.data(),
DNDS_MPI_INDEX,
197 std::vector<index> queryReplyBuf(queryRecvDisps[nRanks]);
198 for (
index i = 0; i < queryRecvDisps[nRanks]; i++)
200 auto it = directoryMap.find(queryRecvBuf[i]);
202 fmt::format(
"origIdx {} not found in directory on rank {}", queryRecvBuf[i], mpi.rank));
203 queryReplyBuf[i] = it->second;
207 std::vector<index> replyRecvBuf(querySendDisps[nRanks]);
208 MPI_Alltoallv(queryReplyBuf.data(), queryRecvCounts.data(), queryRecvDisps.data(),
DNDS_MPI_INDEX,
209 replyRecvBuf.data(), querySendCounts.data(), querySendDisps.data(),
DNDS_MPI_INDEX,
213 std::vector<index> pullingIndexGlobal(newOrigIndex.size());
214 for (
index i = 0; i < querySendDisps[nRanks]; i++)
216 pullingIndexGlobal[queryOrderMap[i]] = replyRecvBuf[i];
219 return pullingIndexGlobal;
238 const std::vector<index> &readOrigIndex,
239 const std::vector<index> &newOrigIndex,
247 readFather->createGlobalMapping();
248 auto readGlobalMapping = readFather->pLGlobalMapping;
252 mpi, readOrigIndex, newOrigIndex, readGlobalMapping);
255 std::vector<index> pullingIndexOrig(pullingIndexGlobal);
258 auto readSon = std::make_shared<TArray>(mpi);
261 TArrayTransformer
trans;
263 trans.createFatherGlobalMapping();
264 trans.createGhostMapping(pullingIndexGlobal);
267 trans.createMPITypes();
277 std::unordered_map<index, index> globalIdx2SonPos;
284 if constexpr (TArray::_dataLayout ==
CSR)
286 outFather->ResizeRowsAndCompress(
289 auto it = globalIdx2SonPos.find(pullingIndexOrig[i]);
291 return readSon->RowSize(it->second);
295 for (
index i = 0; i <
index(newOrigIndex.size()); i++)
297 index globalReadIdx = pullingIndexOrig[i];
298 auto it = globalIdx2SonPos.find(globalReadIdx);
300 fmt::format(
"globalReadIdx {} not found in ghostIndex on rank {}", globalReadIdx, mpi.rank));
301 index sonPos = it->second;
302 outFather->CopyRowFrom(i, *readSon, sonPos);