17# include <sys/ptrace.h>
21#if defined(_WIN32) || defined(__WINDOWS_)
28# define NDEBUG_DISABLED
38 std::ifstream fin(
"/proc/self/status");
44 if (buf ==
"TracerPid:")
53#if defined(_WIN32) || defined(__WINDOWS_)
54 return IsDebuggerPresent();
62 {
log() <<
"Rank " << mpi.rank <<
" PID: " << getpid() << std::endl; });
64#if defined(_WIN32) || defined(__WINDOWS_)
66 {
log() <<
"Rank " << mpi.rank <<
" PID: " << _getpid() << std::endl; });
71 for (
MPI_int ir = 0; ir < mpi.size; ir++)
77 MPI_Bcast(&newDebugFlag, 1, MPI_INT, ir, mpi.comm);
80 MPI_Bcast(&newDebugFlag, 1, MPI_INT, ir, mpi.comm);
97 std::cerr <<
"\033[91m DNDS_assertion failed\033[39m: \"" << expr <<
"\" at [ " << file <<
":" << line <<
" ]\n"
100 MPI_Barrier(mpi.comm);
117 auto result =
static_cast<int64_t
>(std::time(
nullptr));
118 std::array<char, 512> bufTime{};
119 std::array<char, 512 + 32> buf{};
125#if defined(_WIN32) || defined(__WINDOWS_)
127 pid = GetCurrentProcessId();
129 MPI_Bcast(&
result, 1, MPI_INT64_T, 0, mpi.comm);
130 MPI_Bcast(&pid, 1, MPI_INT64_T, 0, mpi.comm);
132 auto time_result =
static_cast<time_t
>(
result);
134 std::strftime(bufTime.data(), 512,
"%F_%H-%M-%S", std::localtime(&time_result));
136 long pidc =
static_cast<long>(pid);
137 std::sprintf(buf.data(),
"%s_%ld", bufTime.data(), pidc);
146#define start_timer PerformanceTimer::Instance().StartTimer(PerformanceTimer::Comm)
147#define stop_timer PerformanceTimer::Instance().StopTimer(PerformanceTimer::Comm)
154 ret = MPI_Bcast(buf, num, type, source_rank, comm);
157 MPI_Request req{MPI_REQUEST_NULL};
158 ret = MPI_Ibcast(buf, num, type, source_rank, comm, &req);
170 ret = MPI_Alltoall(send, sendNum, typeSend, recv, recvNum, typeRecv, comm);
173 MPI_Request req{MPI_REQUEST_NULL};
174 ret = MPI_Ialltoall(send, sendNum, typeSend, recv, recvNum, typeRecv, comm, &req);
182 void *send,
MPI_int *sendSizes,
MPI_int *sendStarts, MPI_Datatype sendType,
183 void *recv,
MPI_int *recvSizes,
MPI_int *recvStarts, MPI_Datatype recvType, MPI_Comm comm)
189 send, sendSizes, sendStarts, sendType,
190 recv, recvSizes, recvStarts, recvType, comm);
193 MPI_Request req{MPI_REQUEST_NULL};
194 ret = MPI_Ialltoallv(send, sendSizes, sendStarts, sendType,
195 recv, recvSizes, recvStarts, recvType, comm, &req);
203 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
208 ret = MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
211 MPI_Request req{MPI_REQUEST_NULL};
212 ret = MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, &req);
220 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
224 ret = MPI_Scan(sendbuf, recvbuf, count, datatype, op, comm);
230 void *recvbuf,
MPI_int recvcount,
231 MPI_Datatype recvtype, MPI_Comm comm)
236 ret = MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
239 MPI_Request req{MPI_REQUEST_NULL};
240 ret = MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, &req);
252 ret = MPI_Barrier(comm);
261 MPI_Request req{MPI_REQUEST_NULL};
263 MPI_Ibarrier(comm, &req);
265 if (req != MPI_REQUEST_NULL)
266 MPI_Request_free(&req);
276 ret = MPI_Testall(count, reqs, &flag, statuses);
277 std::this_thread::sleep_for(std::chrono::nanoseconds(checkNanoSecs));
285 return MPI_Waitall(count, reqs, statuses);
300 return 1 == MPIX_Query_cuda_support();
318 cleaners.emplace(
p, std::move(nCleaner));
329 for (
auto &[k, f] : cleaners)
336 CommStrategy::CommStrategy()
340 auto *ret = std::getenv(
"DNDS_USE_LAZY_WAIT");
341 if (ret !=
nullptr && (std::stod(ret) != 0))
343 _use_lazy_wait = std::stod(ret);
348 log() <<
"Detected DNDS_USE_LAZY_WAIT, setting to " << _use_lazy_wait << std::endl;
362 auto *ret = std::getenv(
"DNDS_ARRAY_STRATEGY_USE_IN_SITU");
363 if (ret !=
nullptr && (std::stoi(ret) != 0))
366 auto mpi = MPIInfo();
369 log() <<
"Detected DNDS_ARRAY_STRATEGY_USE_IN_SITU, setting" << std::endl;
373 MPI_Barrier(
mpi.comm);
386 auto *ret = std::getenv(
"DNDS_USE_STRONG_SYNC_WAIT");
387 if (ret !=
nullptr && (std::stoi(ret) != 0))
389 _use_strong_sync_wait =
true;
390 auto mpi = MPIInfo();
393 log() <<
"Detected DNDS_USE_STRONG_SYNC_WAIT, setting" << std::endl;
397 MPI_Barrier(
mpi.comm);
410 auto *ret = std::getenv(
"DNDS_USE_ASYNC_ONE_BY_ONE");
411 if (ret !=
nullptr && (std::stoi(ret) != 0))
413 _use_async_one_by_one =
true;
414 auto mpi = MPIInfo();
417 log() <<
"Detected DNDS_USE_ASYNC_ONE_BY_ONE, setting" << std::endl;
418 if (
bool(_use_lazy_wait))
421 MPI_Barrier(
mpi.comm);
442 return _array_strategy;
452 return _use_strong_sync_wait;
457 return _use_async_one_by_one;
462 return _use_lazy_wait;
471#ifdef NDEBUG_DISABLED
473# undef NDEBUG_DISABLED
#define DNDS_assert(expr)
Debug-only assertion (compiled out when DNDS_NDEBUG is defined). Prints the expression + file/line + ...
MPI wrappers: MPIInfo, collective operations, type mapping, CommStrategy.
Wall-clock performance timer and running scalar statistics utilities.
Process-singleton managing the buffer attached to MPI for MPI_Bsend (buffered sends).
static MPIBufferHandler & Instance()
Access the process-wide singleton.
Process-wide singleton that selects how ArrayTransformer packs and waits for MPI messages.
static CommStrategy & Instance()
Access the process-wide singleton.
bool GetUseStrongSyncWait() const
Whether barriers are inserted around Waitall for profiling.
double GetUseLazyWait() const
Polling interval (ns) for MPI::WaitallLazy. 0 means use MPI_Waitall.
ArrayCommType GetArrayStrategy()
Current array-pack strategy.
bool GetUseAsyncOneByOne() const
Whether transformers should use one-by-one Isend/Irecv.
ArrayCommType
Which derived-type strategy ArrayTransformer should use.
@ InSituPack
Manually pack / unpack into contiguous buffers.
void SetArrayStrategy(ArrayCommType t)
Override the array-pack strategy (affects subsequently-created transformers).
Singleton that tracks and releases long-lived MPI resources at MPI_Finalize time.
static ResourceRecycler & Instance()
Access the process-wide singleton.
void RegisterCleaner(void *p, std::function< void()> nCleaner)
Register a cleanup callback keyed by p.
void clean()
Invoke all registered cleaners and drop them. Called by MPI::Finalize().
void RemoveCleaner(void *p)
Remove a previously-registered cleaner.
bool IsDebugged()
Whether the current process is running under a debugger. Implemented via /proc/self/status TracerPid ...
void MPIDebugHold(const MPIInfo &mpi)
If isDebugging is set, block every rank in a busy-wait loop so the user can attach a debugger and ins...
bool isDebugging
Flag consulted by MPIDebugHold and assert_false_info_mpi.
MPI_int Allgather(const void *sendbuf, MPI_int sendcount, MPI_Datatype sendtype, void *recvbuf, MPI_int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
Wrapper over MPI_Allgather.
MPI_int Bcast(void *buf, MPI_int num, MPI_Datatype type, MPI_int source_rank, MPI_Comm comm)
dumb wrapper
MPI_int WaitallAuto(MPI_int count, MPI_Request *reqs, MPI_Status *statuses)
Wait on an array of requests, choosing between MPI_Waitall and the lazy-poll variant based on CommStr...
MPI_int BarrierLazy(MPI_Comm comm, uint64_t checkNanoSecs)
Polling barrier that sleeps checkNanoSecs ns between MPI_Test calls. Reduces CPU spin when many ranks...
MPI_int Alltoall(void *send, MPI_int sendNum, MPI_Datatype typeSend, void *recv, MPI_int recvNum, MPI_Datatype typeRecv, MPI_Comm comm)
Wrapper over MPI_Alltoall (fixed per-peer count).
MPI_int Scan(const void *sendbuf, void *recvbuf, MPI_int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
Wrapper over MPI_Scan (inclusive prefix reduction).
MPI_int Alltoallv(void *send, MPI_int *sendSizes, MPI_int *sendStarts, MPI_Datatype sendType, void *recv, MPI_int *recvSizes, MPI_int *recvStarts, MPI_Datatype recvType, MPI_Comm comm)
Wrapper over MPI_Alltoallv (variable per-peer counts + displacements).
MPI_int Allreduce(const void *sendbuf, void *recvbuf, MPI_int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
Wrapper over MPI_Allreduce.
MPI_int Barrier(MPI_Comm comm)
Wrapper over MPI_Barrier.
bool isCudaAware()
Runtime probe: is the current MPI implementation configured with CUDA-aware support?...
MPI_int WaitallLazy(MPI_int count, MPI_Request *reqs, MPI_Status *statuses, uint64_t checkNanoSecs)
Like WaitallAuto but sleeps checkNanoSecs ns between polls.
the host side operators are provided as implemented
void MPISerialDo(const MPIInfo &mpi, F f)
Execute f on each rank serially, in rank order.
std::string getTraceString()
Return a symbolicated stack trace for the calling thread.
std::string getTimeStamp(const MPIInfo &mpi)
Format a human-readable timestamp using the calling rank as context.
std::ostream & log()
Return the current DNDSR log stream (either std::cout or the installed file).
void assert_false_info_mpi(const char *expr, const char *file, int line, const std::string &info, const DNDS::MPIInfo &mpi)
MPI-aware assertion-failure reporter.
std::mutex HDF_mutex
Global mutex serialising host-side HDF5 calls.
int MPI_int
MPI counterpart type for MPI_int (= C int). Used for counts and ranks in MPI calls.
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
const tPoint const tPoint const tPoint & p