72 const std::vector<index> &readOrigIndex,
73 const std::vector<index> &newOrigIndex,
90 const int nRanks = mpi.size;
91 index nGlobal = readGlobalMapping->globalSize();
93 auto directoryRank = [&](
index origIdx) ->
int
97 return static_cast<int>(std::min(
index(nRanks - 1), origIdx *
index(nRanks) / nGlobal));
104 std::vector<int> sendCounts(nRanks, 0);
105 for (
long i : readOrigIndex)
107 int dr = directoryRank(
i);
111 std::vector<int> sendDisps(nRanks + 1, 0);
112 std::partial_sum(sendCounts.begin(), sendCounts.end(), sendDisps.begin() + 1);
115 std::vector<index> sendBuf(
index(sendDisps[nRanks]) * 2);
116 std::vector<int> sendPos(nRanks, 0);
119 int dr = directoryRank(readOrigIndex[
i]);
120 index pos = sendDisps[dr] + sendPos[dr];
121 sendBuf[pos * 2] = readOrigIndex[
i];
122 sendBuf[pos * 2 + 1] = readGlobalMapping->operator()(mpi.rank,
i);
127 std::vector<int> recvCounts(nRanks, 0);
128 MPI_Alltoall(sendCounts.data(), 1, MPI_INT, recvCounts.data(), 1, MPI_INT, mpi.comm);
130 std::vector<int> recvDisps(nRanks + 1, 0);
131 std::partial_sum(recvCounts.begin(), recvCounts.end(), recvDisps.begin() + 1);
135 std::vector<int> sendCounts2(nRanks), sendDisps2(nRanks);
136 std::vector<int> recvCounts2(nRanks), recvDisps2(nRanks);
137 for (
int r = 0;
r < nRanks;
r++)
139 sendCounts2[
r] = sendCounts[
r] * 2;
140 sendDisps2[
r] = sendDisps[
r] * 2;
141 recvCounts2[
r] = recvCounts[
r] * 2;
142 recvDisps2[
r] = recvDisps[
r] * 2;
145 std::vector<index> recvBuf(
index(recvDisps[nRanks]) * 2);
146 MPI_Alltoallv(sendBuf.data(), sendCounts2.data(), sendDisps2.data(),
DNDS_MPI_INDEX,
147 recvBuf.data(), recvCounts2.data(), recvDisps2.data(),
DNDS_MPI_INDEX,
151 std::unordered_map<index, index> directoryMap;
152 directoryMap.reserve(recvDisps[nRanks]);
153 for (
index i = 0;
i < recvDisps[nRanks];
i++)
155 directoryMap[recvBuf[
i * 2]] = recvBuf[
i * 2 + 1];
160 std::vector<int> querySendCounts(nRanks, 0);
161 for (
long i : newOrigIndex)
163 int dr = directoryRank(
i);
164 querySendCounts[dr]++;
167 std::vector<int> querySendDisps(nRanks + 1, 0);
168 std::partial_sum(querySendCounts.begin(), querySendCounts.end(), querySendDisps.begin() + 1);
171 std::vector<index> querySendBuf(querySendDisps[nRanks]);
173 std::vector<index> queryOrderMap(querySendDisps[nRanks]);
174 std::vector<int> queryPos(nRanks, 0);
177 int dr = directoryRank(newOrigIndex[
i]);
178 index pos = querySendDisps[dr] + queryPos[dr];
179 querySendBuf[pos] = newOrigIndex[
i];
180 queryOrderMap[pos] =
i;
185 std::vector<int> queryRecvCounts(nRanks, 0);
186 MPI_Alltoall(querySendCounts.data(), 1, MPI_INT, queryRecvCounts.data(), 1, MPI_INT, mpi.comm);
188 std::vector<int> queryRecvDisps(nRanks + 1, 0);
189 std::partial_sum(queryRecvCounts.begin(), queryRecvCounts.end(), queryRecvDisps.begin() + 1);
192 std::vector<index> queryRecvBuf(queryRecvDisps[nRanks]);
193 MPI_Alltoallv(querySendBuf.data(), querySendCounts.data(), querySendDisps.data(),
DNDS_MPI_INDEX,
194 queryRecvBuf.data(), queryRecvCounts.data(), queryRecvDisps.data(),
DNDS_MPI_INDEX,
198 std::vector<index> queryReplyBuf(queryRecvDisps[nRanks]);
199 for (
index i = 0;
i < queryRecvDisps[nRanks];
i++)
201 auto it = directoryMap.find(queryRecvBuf[
i]);
203 fmt::format(
"origIdx {} not found in directory on rank {}", queryRecvBuf[
i], mpi.rank));
204 queryReplyBuf[
i] = it->second;
208 std::vector<index> replyRecvBuf(querySendDisps[nRanks]);
209 MPI_Alltoallv(queryReplyBuf.data(), queryRecvCounts.data(), queryRecvDisps.data(),
DNDS_MPI_INDEX,
210 replyRecvBuf.data(), querySendCounts.data(), querySendDisps.data(),
DNDS_MPI_INDEX,
214 std::vector<index> pullingIndexGlobal(newOrigIndex.size());
215 for (
index i = 0;
i < querySendDisps[nRanks];
i++)
217 pullingIndexGlobal[queryOrderMap[
i]] = replyRecvBuf[
i];
220 return pullingIndexGlobal;
239 const std::vector<index> &readOrigIndex,
240 const std::vector<index> &newOrigIndex,
248 readFather->createGlobalMapping();
249 auto readGlobalMapping = readFather->pLGlobalMapping;
253 mpi, readOrigIndex, newOrigIndex, readGlobalMapping);
256 std::vector<index> pullingIndexOrig(pullingIndexGlobal);
259 auto readSon = std::make_shared<TArray>(mpi);
262 TArrayTransformer
trans;
264 trans.createFatherGlobalMapping();
265 trans.createGhostMapping(pullingIndexGlobal);
268 trans.createMPITypes();
278 std::unordered_map<index, index> globalIdx2SonPos;
285 if constexpr (TArray::_dataLayout ==
CSR)
287 outFather->ResizeRowsAndCompress(
290 auto it = globalIdx2SonPos.find(pullingIndexOrig[
i]);
292 return readSon->RowSize(it->second);
298 index globalReadIdx = pullingIndexOrig[
i];
299 auto it = globalIdx2SonPos.find(globalReadIdx);
301 fmt::format(
"globalReadIdx {} not found in ghostIndex on rank {}", globalReadIdx, mpi.rank));
302 index sonPos = it->second;
303 outFather->CopyRowFrom(
i, *readSon, sonPos);