28#ifndef MPICH_SKIP_MPICXX
29# define MPICH_SKIP_MPICXX 1
31#ifndef OMPI_SKIP_MPICXX
32# define OMPI_SKIP_MPICXX 1
45# define NDEBUG_DISABLED
58#define MAX_MPI_int INT32_MAX
59#define MAX_MPI_Aint INT64_MAX
60 static_assert(
sizeof(MPI_Aint) == 8);
86 template <
class Tbasic>
89 static_assert(
sizeof(Tbasic) == 8 ||
sizeof(Tbasic) == 4,
"DNDS::Tbasic is not right size");
90 return sizeof(Tbasic) == 8 ? MPI_INT64_T : (
sizeof(Tbasic) == 4 ? MPI_INT32_T : MPI_DATATYPE_NULL);
98 template <
class Tbasic>
101 static_assert(
sizeof(Tbasic) == 8 ||
sizeof(Tbasic) == 4,
"DNDS::Tbasic is not right size");
102 return sizeof(Tbasic) == 8 ? MPI_REAL8 : (
sizeof(Tbasic) == 4 ? MPI_REAL4 : MPI_DATATYPE_NULL);
114 template <
typename T,
typename =
void>
119 template <
typename T>
125 template <
typename T,
typename =
void>
130 template <
typename T>
143 return std::make_pair(T::CommType(), T::CommMult());
145 return T::CommPair();
167 static const auto badReturn = std::make_pair(MPI_DATATYPE_NULL,
MPI_int(-1));
168 if constexpr (std::is_scalar_v<T>)
170 if constexpr (std::is_same_v<T, float>)
171 return std::make_pair(MPI_FLOAT,
MPI_int(1));
172 if constexpr (std::is_same_v<T, double>)
173 return std::make_pair(MPI_DOUBLE,
MPI_int(1));
174 if constexpr (std::is_same_v<T, long double>)
175 return std::make_pair(MPI_LONG_DOUBLE,
MPI_int(1));
177 if constexpr (std::is_same_v<T, int8_t>)
178 return std::make_pair(MPI_INT8_T,
MPI_int(1));
179 if constexpr (std::is_same_v<T, int16_t>)
180 return std::make_pair(MPI_INT16_T,
MPI_int(1));
181 if constexpr (std::is_same_v<T, int32_t>)
182 return std::make_pair(MPI_INT32_T,
MPI_int(1));
183 if constexpr (std::is_same_v<T, int64_t>)
184 return std::make_pair(MPI_INT64_T,
MPI_int(1));
186 if constexpr (
sizeof(T) == 1)
187 return std::make_pair(MPI_UINT8_T,
MPI_int(1));
188 else if constexpr (
sizeof(T) == 2)
189 return std::make_pair(MPI_UINT16_T,
MPI_int(1));
190 else if constexpr (
sizeof(T) == 4)
191 return std::make_pair(MPI_UINT32_T,
MPI_int(1));
192 else if constexpr (
sizeof(T) == 8)
193 return std::make_pair(MPI_UINT64_T,
MPI_int(1));
195 return BasicType_To_MPIIntType_Custom<T>();
197 else if constexpr (std::is_array_v<T>)
199 std::pair<MPI_Datatype, MPI_int> SizCom = BasicType_To_MPIIntType<std::remove_extent_t<T>>();
200 return std::make_pair(SizCom.first, SizCom.second * std::extent_v<T>);
202 else if constexpr (std::is_trivially_copyable_v<T>)
204 if constexpr (Meta::is_std_array_v<T>)
205 return std::make_pair(
206 BasicType_To_MPIIntType<typename T::value_type>().first,
207 BasicType_To_MPIIntType<typename T::value_type>().second * T().size());
209 return BasicType_To_MPIIntType_Custom<T>();
211 else if constexpr (Meta::is_fixed_data_real_eigen_matrix_v<T>)
214 return BasicType_To_MPIIntType_Custom<T>();
260 comm = MPI_COMM_WORLD;
291 std::unordered_map<
void *, std::function<void()>> cleaners;
325 MPI_Comm_size(MPI_COMM_WORLD, &ret);
333 MPI_Comm_rank(MPI_COMM_WORLD, &ret);
343#define DNDS_MPI_InsertCheck(mpi, info) \
344 InsertCheck(mpi, info, __FUNCTION__, __FILE__, __LINE__)
364 struct shared_ctor_guard
370 template <
typename... Args>
373 if (!(std::shared_ptr<tSelf>(
this, [](
tSelf *) {}).use_count() == 1))
374 throw std::runtime_error(
"tSelf must be created via shared_ptr");
380 template <
typename... Args>
383 return std::make_shared<MPITypePairHolder>(shared_ctor_guard{}, std::forward<Args>(args)...);
401 for (
auto &
i : (*this))
402 if (
i.first >= 0 &&
i.second != 0 &&
i.second != MPI_DATATYPE_NULL)
403 MPI_Type_free(&
i.second);
404 this->tMPI_typePairVec::clear();
424 struct shared_ctor_guard
430 template <
typename... Args>
433 if (!(std::shared_ptr<tSelf>(
this, [](
tSelf *) {}).use_count() == 1))
434 throw std::runtime_error(
"tSelf must be created via shared_ptr");
440 template <
typename... Args>
443 return std::make_shared<MPIReqHolder>(shared_ctor_guard{}, std::forward<Args>(args)...);
461 for (
auto &
i : (*this))
462 if (
i != MPI_REQUEST_NULL)
463 MPI_Request_free(&
i);
464 this->tMPI_reqVec::clear();
493# define DNDS_assert_info_mpi(expr, mpi, info) (void(0))
497# define DNDS_assert_info_mpi(expr, mpi, info) \
498 ((static_cast<bool>(expr)) \
500 : ::DNDS::assert_false_info_mpi(#expr, __FILE__, __LINE__, info, mpi))
517 ierr = MPI_Query_thread(&ret),
DNDS_assert(ierr == MPI_SUCCESS);
537 MPI_Initialized(&init_flag);
539 int provided_MPI_THREAD_LEVEL{0};
540 int needed_MPI_THREAD_LEVEL = MPI_THREAD_MULTIPLE;
542 auto *env = std::getenv(
"DNDS_DISABLE_ASYNC_MPI");
543 if (env !=
nullptr && (std::stod(env) != 0))
545 int ienv =
static_cast<int>(std::stod(env));
547 needed_MPI_THREAD_LEVEL = MPI_THREAD_SERIALIZED;
549 needed_MPI_THREAD_LEVEL = MPI_THREAD_FUNNELED;
551 needed_MPI_THREAD_LEVEL = MPI_THREAD_SINGLE;
555 ret = MPI_Init_thread(argc, argv, needed_MPI_THREAD_LEVEL, &provided_MPI_THREAD_LEVEL);
559 if (provided_MPI_THREAD_LEVEL < needed_MPI_THREAD_LEVEL)
561 printf(
"ERROR: The MPI library does not have full thread support\n");
562 MPI_Abort(MPI_COMM_WORLD, 1);
574 int err = MPI_Finalized(&finalized);
576 err |= MPI_Finalize();
583#define MPIBufferHandler_REPORT_CHANGE
602 std::vector<uint8_t> buf;
613 uint8_t *obuf =
nullptr;
615 MPI_Buffer_detach(
reinterpret_cast<void *
>(&obuf) , &osize);
617 buf.resize(1024ULL * 1024ULL);
618 MPI_Buffer_attach(buf.data(),
int(buf.size()));
640 void claim(MPI_Aint cs,
int reportRank = 0)
642 if (buf.size() - claimed <
static_cast<size_type>(cs))
645 uint8_t *obuf =
nullptr;
647 MPI_Buffer_detach(
reinterpret_cast<void *
>(&obuf) , &osize);
648#ifdef MPIBufferHandler_REPORT_CHANGE
649 std::cout <<
"MPIBufferHandler: New BUf at " << reportRank << std::endl
650 << osize << std::endl;
653 buf.resize(claimed + cs);
654 MPI_Buffer_attach(buf.data(), size_t_to_signed<MPI_int>(buf.size()));
655#ifdef MPIBufferHandler_REPORT_CHANGE
656 std::cout <<
" -> " << buf.size() << std::endl;
665 DNDS_assert(size_t_to_signed<MPI_int>(claimed) >= cs);
671 return (
void *)(buf.data());
687 void *send,
MPI_int *sendSizes,
MPI_int *sendStarts, MPI_Datatype sendType,
688 void *recv,
MPI_int *recvSizes,
MPI_int *recvStarts, MPI_Datatype recvType, MPI_Comm comm);
692 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
696 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
700 void *recvbuf,
MPI_int recvcount,
701 MPI_Datatype recvtype, MPI_Comm comm);
794 bool _use_strong_sync_wait =
false;
795 bool _use_async_one_by_one =
false;
796 double _use_lazy_wait = 0;
837 const std::string &FUNCTION =
"",
const std::string &FILE =
"",
int LINE = -1)
839#if !(defined(NDEBUG) || defined(NINSERT))
841 std::cout <<
"=== CHECK \"" << info <<
"\" RANK " << mpi.rank <<
" ==="
842 <<
" @ FName: " << FUNCTION
843 <<
" @ Place: " << FILE <<
":" << LINE << std::endl;
849#ifdef NDEBUG_DISABLED
851# undef NDEBUG_DISABLED
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
#define DNDS_assert(expr)
Debug-only assertion (compiled out when DNDS_NDEBUG is defined). Prints the expression + file/line + ...
#define DISABLE_WARNING_PUSH
#define DISABLE_WARNING_UNUSED_VALUE
#define DISABLE_WARNING_POP
Process-singleton managing the buffer attached to MPI for MPI_Bsend (buffered sends).
decltype(buf)::size_type size_type
void unclaim(MPI_int cs)
Release cs previously-claim ed bytes (only updates accounting; does not shrink the buffer).
MPIBufferHandler & operator=(MPIBufferHandler &&)=delete
MPI_int size()
Current buffer size in bytes (fits in MPI_int; asserted).
MPIBufferHandler(MPIBufferHandler &&)=delete
static MPIBufferHandler & Instance()
Access the process-wide singleton.
void claim(MPI_Aint cs, int reportRank=0)
Reserve cs additional bytes, growing and re-attaching the MPI buffer if needed. reportRank is only us...
void * getBuf()
Direct pointer to the attached buffer (for diagnostics).
MPIBufferHandler(const MPIBufferHandler &)=delete
~MPIBufferHandler()=default
MPIBufferHandler & operator=(const MPIBufferHandler &)=delete
Process-wide singleton that selects how ArrayTransformer packs and waits for MPI messages.
static CommStrategy & Instance()
Access the process-wide singleton.
bool GetUseStrongSyncWait() const
Whether barriers are inserted around Waitall for profiling.
CommStrategy & operator=(CommStrategy &&)=delete
double GetUseLazyWait() const
Polling interval (ns) for MPI::WaitallLazy. 0 means use MPI_Waitall.
CommStrategy(CommStrategy &&)=delete
ArrayCommType GetArrayStrategy()
Current array-pack strategy.
bool GetUseAsyncOneByOne() const
Whether transformers should use one-by-one Isend/Irecv.
ArrayCommType
Which derived-type strategy ArrayTransformer should use.
@ InSituPack
Manually pack / unpack into contiguous buffers.
@ UnknownArrayCommType
Sentinel / uninitialised.
@ HIndexed
Use MPI_Type_create_hindexed derived types (default).
CommStrategy(const CommStrategy &)=delete
void SetArrayStrategy(ArrayCommType t)
Override the array-pack strategy (affects subsequently-created transformers).
CommStrategy & operator=(const CommStrategy &)=delete
Singleton that tracks and releases long-lived MPI resources at MPI_Finalize time.
~ResourceRecycler()=default
ResourceRecycler(ResourceRecycler &&)=delete
static ResourceRecycler & Instance()
Access the process-wide singleton.
ResourceRecycler & operator=(ResourceRecycler &&)=delete
void RegisterCleaner(void *p, std::function< void()> nCleaner)
Register a cleanup callback keyed by p.
void clean()
Invoke all registered cleaners and drop them. Called by MPI::Finalize().
ResourceRecycler(const ResourceRecycler &)=delete
ResourceRecycler & operator=(const ResourceRecycler &)=delete
void RemoveCleaner(void *p)
Remove a previously-registered cleaner.
bool IsDebugged()
Whether the current process is running under a debugger. Implemented via /proc/self/status TracerPid ...
void MPIDebugHold(const MPIInfo &mpi)
If isDebugging is set, block every rank in a busy-wait loop so the user can attach a debugger and ins...
bool isDebugging
Flag consulted by MPIDebugHold and assert_false_info_mpi.
MPI_int Allgather(const void *sendbuf, MPI_int sendcount, MPI_Datatype sendtype, void *recvbuf, MPI_int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
Wrapper over MPI_Allgather.
MPI_int Bcast(void *buf, MPI_int num, MPI_Datatype type, MPI_int source_rank, MPI_Comm comm)
dumb wrapper
MPI_int WaitallAuto(MPI_int count, MPI_Request *reqs, MPI_Status *statuses)
Wait on an array of requests, choosing between MPI_Waitall and the lazy-poll variant based on CommStr...
MPI_int BarrierLazy(MPI_Comm comm, uint64_t checkNanoSecs)
Polling barrier that sleeps checkNanoSecs ns between MPI_Test calls. Reduces CPU spin when many ranks...
MPI_int Alltoall(void *send, MPI_int sendNum, MPI_Datatype typeSend, void *recv, MPI_int recvNum, MPI_Datatype typeRecv, MPI_Comm comm)
Wrapper over MPI_Alltoall (fixed per-peer count).
void AllreduceOneReal(real &v, MPI_Op op, const MPIInfo &mpi)
Single-scalar Allreduce helper for reals (in-place, count = 1).
MPI_int Scan(const void *sendbuf, void *recvbuf, MPI_int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
Wrapper over MPI_Scan (inclusive prefix reduction).
int Finalize()
Release DNDSR-registered MPI resources then call MPI_Finalize.
MPI_int Alltoallv(void *send, MPI_int *sendSizes, MPI_int *sendStarts, MPI_Datatype sendType, void *recv, MPI_int *recvSizes, MPI_int *recvStarts, MPI_Datatype recvType, MPI_Comm comm)
Wrapper over MPI_Alltoallv (variable per-peer counts + displacements).
MPI_int Allreduce(const void *sendbuf, void *recvbuf, MPI_int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
Wrapper over MPI_Allreduce.
MPI_int Barrier(MPI_Comm comm)
Wrapper over MPI_Barrier.
MPI_int Init_thread(int *argc, char ***argv)
Initialise MPI with thread support, honouring the DNDS_DISABLE_ASYNC_MPI environment override.
bool isCudaAware()
Runtime probe: is the current MPI implementation configured with CUDA-aware support?...
MPI_int WaitallLazy(MPI_int count, MPI_Request *reqs, MPI_Status *statuses, uint64_t checkNanoSecs)
Like WaitallAuto but sleeps checkNanoSecs ns between polls.
int GetMPIThreadLevel()
Return the MPI thread-support level the current process was initialised with.
void AllreduceOneIndex(index &v, MPI_Op op, const MPIInfo &mpi)
Single-scalar Allreduce helper for indices (in-place, count = 1).
the host side operators are provided as implemented
constexpr MPI_Aint UnInitMPIAint
Sentinel "not initialised" MPI_Aint value (= -1).
MPI_Aint MPI_index
MPI-compatible address/offset type (= MPI_Aint, 64-bit on all supported platforms)....
std::vector< MPI_int > tMPI_sizeVec
Vector of MPI counts.
const MPI_Datatype DNDS_MPI_INDEX
MPI datatype matching index (= MPI_INT64_T).
tMPI_indexVec tMPI_AintVec
Alias for tMPI_indexVec to match MPI_Aint terminology.
ssp< MPITypePairHolder > tpMPITypePairHolder
Shared-pointer alias to MPITypePairHolder.
std::vector< MPI_index > tMPI_indexVec
Vector of MPI_Aint byte-offsets for hindexed datatypes.
void MPISerialDo(const MPIInfo &mpi, F f)
Execute f on each rank serially, in rank order.
std::vector< std::pair< MPI_int, MPI_Datatype > > tMPI_typePairVec
MPI_int MPIWorldRank()
Convenience: MPI_Comm_rank(MPI_COMM_WORLD).
constexpr T divide_ceil(T a, T b)
Integer ceiling division ceil(a / b). Correct for all signs.
constexpr MPI_int UnInitMPIInt
Sentinel "not initialised" MPI_int value (= -1, invalid rank).
std::pair< MPI_Datatype, MPI_int > BasicType_To_MPIIntType_Custom()
Dispatch to a user-provided CommPair / CommMult+ CommType pair on T.
MPI_int MPIWorldSize()
Convenience: MPI_Comm_size(MPI_COMM_WORLD).
constexpr MPI_Datatype DNDSToMPITypeInt()
Map a DNDS integer type size to an MPI signed-integer datatype.
std::vector< MPI_Request > tMPI_reqVec
Vector of MPI_Request, for persistent / nonblocking calls.
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
void InsertCheck(const MPIInfo &mpi, const std::string &info="", const std::string &FUNCTION="", const std::string &FILE="", int LINE=-1)
Barrier + annotated print used by DNDS_MPI_InsertCheck.
tMPI_sizeVec tMPI_intVec
Alias for tMPI_sizeVec; used where the name "int vec" reads better.
std::shared_ptr< T > ssp
Shortened alias for std::shared_ptr used pervasively in DNDSR.
double real
Canonical floating-point scalar used throughout DNDSR (double precision).
constexpr MPI_Datatype DNDSToMPITypeFloat()
Map a DNDS floating-point type size to an MPI datatype.
std::string getTimeStamp(const MPIInfo &mpi)
Format a human-readable timestamp using the calling rank as context.
void assert_false_info_mpi(const char *expr, const char *file, int line, const std::string &info, const DNDS::MPIInfo &mpi)
MPI-aware assertion-failure reporter.
std::pair< MPI_Datatype, MPI_int > BasicType_To_MPIIntType()
Deduce an (MPI_Datatype, count) pair that represents a T value.
std::vector< MPI_Status > tMPI_statVec
Vector of MPI_Status, for MPI_Waitall / MPI_Testall.
std::mutex HDF_mutex
Global mutex serialising host-side HDF5 calls.
int MPI_int
MPI counterpart type for MPI_int (= C int). Used for counts and ranks in MPI calls.
const MPI_Datatype DNDS_MPI_REAL
MPI datatype matching real (= MPI_REAL8).
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
int size
Number of ranks in comm (-1 until initialised).
MPIInfo(MPI_Comm nc, int r, int s)
Low-level constructor for callers that already know (rank, size).
MPIInfo(MPI_Comm ncomm)
Wrap an existing MPI communicator; queries rank and size.
int rank
This rank's 0-based index within comm (-1 until initialised).
MPI_Comm comm
The underlying MPI communicator handle.
void setWorld()
Initialise the object to MPI_COMM_WORLD. Requires MPI_Init to have run.
bool operator==(const MPIInfo &r) const
Exact triple equality.
RAII vector of MPI_Requests that frees each non-null handle when destroyed.
MPIReqHolder(MPIReqHolder &&)=delete
static ssp< MPIReqHolder > create(Args &&...args)
Only public path to construct an instance.
void clear()
Free every non-null request and empty the vector.
MPIReqHolder(shared_ctor_guard g, Args &&...args)
Perfect-forwarding factory; returns shared_ptr<MPIReqHolder>.
MPIReqHolder & operator=(const MPIReqHolder &)=delete
MPIReqHolder(const MPIReqHolder &)=delete
MPIReqHolder & operator=(MPIReqHolder &&)=delete
RAII vector of (count, MPI_Datatype) pairs that frees every committed datatype when destroyed.
MPITypePairHolder(shared_ctor_guard g, Args &&...args)
Perfect-forwarding factory; returns shared_ptr<MPITypePairHolder>.
MPITypePairHolder(MPITypePairHolder &&)=delete
void clear()
Free every committed datatype and empty the vector.
MPITypePairHolder(const MPITypePairHolder &)=delete
static ssp< MPITypePairHolder > create(Args &&...args)
Only public path to construct an instance; forwards to the private constructor.
MPITypePairHolder & operator=(const MPITypePairHolder &)=delete
MPITypePairHolder & operator=(MPITypePairHolder &&)=delete
here are some reasons to upgrade to C++20...
SFINAE trait detecting a static CommType member in T.
Eigen::Matrix< real, 5, 1 > v
const tPoint const tPoint const tPoint & p