DNDSR 0.2.1
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
SerializerBase.hpp
Go to the documentation of this file.
1#pragma once
2/// @file SerializerBase.hpp
3/// @brief Base types and abstract interface for array serialization.
4
5#include "DNDS/Defines.hpp"
6#include "DNDS/MPI.hpp"
7#include <limits>
8#include <set>
10#include "DNDS/Vector.hpp"
11
12namespace DNDS::Serializer
13{
14 /// @brief Sentinel: "offset = Parts", indicating each rank writes its own slab.
15 static const index Offset_Parts = -1;
16 /// @brief Sentinel: "offset = One", indicating the first rank owns the whole dataset.
17 static const index Offset_One = -2;
18 /// @brief Sentinel: "even-split read" (see @ref ArrayGlobalOffset_EvenSplit).
19 static const index Offset_EvenSplit = -3;
20 /// @brief Sentinel: offset unknown / uninitialised.
21 static const index Offset_Unknown = UnInitIndex;
22
23 /**
24 * @brief Describes one rank's window into a globally-distributed dataset.
25 *
26 * @details Stores a `(localSize, globalOffset)` pair. The offset is
27 * overloaded to carry the special sentinels @ref Offset_Parts / @ref Offset_One /
28 * @ref Offset_EvenSplit / @ref Offset_Unknown (negative values). Multiplication /
29 * division are defined so that byte offsets can be derived from element
30 * offsets (`offset * sizeof(T)`) with overflow guards.
31 */
33 {
34 index _size{0};
35 index _offset{0};
36
37 public:
38 static_assert(UnInitIndex < 0);
39 /// @brief Construct with explicit local size and global offset.
40 ArrayGlobalOffset(index sz, index ofs) : _size(sz), _offset(ofs) {}
41
42 /// @brief Local size this rank owns (in element units of the caller's choosing).
43 [[nodiscard]] index size() const { return _size; }
44 /// @brief Global offset of this rank's data (or a sentinel value, see
45 /// @ref Offset_Parts etc.).
46 [[nodiscard]] index offset() const { return _offset; }
47
48 /// @brief Scale the descriptor's element count by `R` (and the offset
49 /// too if it is a real offset, not a sentinel). Used for byte-level
50 /// offsets: `elemOffset * sizeof(T)`.
52 {
53 if (_offset >= 0)
54 {
55 DNDS_assert_info(R == 0 || _size <= std::numeric_limits<index>::max() / R,
56 "Overflow in ArrayGlobalOffset size multiplication");
57 DNDS_assert_info(R == 0 || _offset <= std::numeric_limits<index>::max() / R,
58 "Overflow in ArrayGlobalOffset offset multiplication");
59 return ArrayGlobalOffset{_size * R, _offset * R};
60 }
61 else
62 return ArrayGlobalOffset{_size * R, _offset};
63 }
64
65 /// @brief Inverse of #operator*. Real-offset descriptors scale both
66 /// size and offset; sentinel descriptors only scale the size.
68 {
69 if (_offset >= 0)
70 return ArrayGlobalOffset{_size / R, _offset / R};
71 else
72 return ArrayGlobalOffset{_size / R, _offset};
73 }
74
75 /// @brief Assert that both size and offset are multiples of `R`.
76 /// @details Used when translating between element and byte offsets.
77 void CheckMultipleOf(index R) const
78 {
79 if (_offset >= 0)
80 {
81 DNDS_assert_info(_size % R == 0, fmt::format("_size [{}] must be multiple of R [{}]", _size, R));
82 DNDS_assert_info(_offset % R == 0, fmt::format("_offset [{}] must be multiple of R [{}]", _offset, R));
83 }
84 }
85
86 /// @brief Equality: sentinel offsets compare only by offset,
87 /// real-offset descriptors compare by the full `(size, offset)` pair.
88 bool operator==(const ArrayGlobalOffset &other) const
89 {
90 if (_offset >= 0)
91 return _size == other._size && _offset == other._offset;
92 else
93 return _offset == other._offset;
94 }
95
96 /// @brief Pretty-printed representation for logging.
97 operator std::string() const
98 {
99 return fmt::format("ArrayGlobalOffset{{size: {}, offset: {}}}", _size, _offset);
100 }
101
102 /// @brief Whether this descriptor carries a real distributed offset
103 /// (rather than a sentinel like @ref Offset_Parts).
104 [[nodiscard]] bool isDist() const
105 {
106 return _offset >= 0;
107 }
108
109 friend std::ostream &operator<<(std::ostream &o, const ArrayGlobalOffset &v)
110 {
111 o << (std::string)(v);
112 return o;
113 }
114 };
115
116 /// @brief Sentinel: offset unknown / uninitialised.
117 static const ArrayGlobalOffset ArrayGlobalOffset_Unknown = ArrayGlobalOffset{0, Offset_Unknown};
118 /// @brief Sentinel: rank 0 owns the whole dataset.
119 static const ArrayGlobalOffset ArrayGlobalOffset_One = ArrayGlobalOffset{0, Offset_One};
120 /// @brief Sentinel: each rank writes its slab; serializer infers offsets via MPI_Scan.
121 static const ArrayGlobalOffset ArrayGlobalOffset_Parts = ArrayGlobalOffset{0, Offset_Parts};
122 /// @brief Sentinel: even-split read (each rank reads ~N_global/nRanks rows
123 /// starting at `rank * N_global / nRanks`). Used during repartitioned restarts.
124 static const ArrayGlobalOffset ArrayGlobalOffset_EvenSplit = ArrayGlobalOffset{0, Offset_EvenSplit};
125
126 /// @brief Abstract interface for reading/writing scalars, vectors, and byte arrays.
127 ///
128 /// ## Collective semantics (SerializerH5)
129 ///
130 /// For the HDF5 implementation, all Read/Write vector and array methods are
131 /// **MPI-collective**: every rank must call the same method in the same order,
132 /// even if a rank reads/writes 0 elements. Failing to participate causes a
133 /// hang because HDF5 collective I/O synchronizes across the communicator.
134 ///
135 /// ## Zero-size reads
136 ///
137 /// Some ranks may legitimately have 0 elements to read (e.g., when
138 /// `nGlobal < nRanks` in an even-split read). The Read*Vector and
139 /// ReadShared*Vector implementations handle this by passing a dummy
140 /// non-null pointer to the internal HDF5 read when the output container
141 /// is empty, so that `std::vector<>::data()` returning nullptr does not
142 /// cause the rank to skip the collective H5Dread call.
143 ///
144 /// @warning **ReadUint8Array** uses an explicit two-pass pattern: the
145 /// caller first calls with `data == nullptr` to query the size, then
146 /// calls again with a buffer. When the queried size is 0, the caller
147 /// must still pass a non-null `data` pointer on the second call so
148 /// that the collective H5Dread is not skipped. Use a stack dummy:
149 /// @code
150 /// uint8_t dummy;
151 /// ser->ReadUint8Array(name, bufferSize == 0 ? &dummy : buf, bufferSize, offset);
152 /// @endcode
154 {
155 protected:
156 /// Shared-pointer deduplication map: raw pointer -> (kept-alive shared_ptr, H5/JSON path).
157 /// By storing the shared_ptr itself we prevent the pointed-to memory from being
158 /// freed and its address reused, which would cause false dedup hits.
159 std::map<void *, std::pair<std::shared_ptr<void>, std::string>> ptr_2_pth;
160
161 /// Reverse map for read-side dedup: path -> raw pointer to the ssp local variable.
162 std::map<std::string, void *> pth_2_ssp;
163
164 /// Check if a shared pointer was already written; if so return its path.
165 template <class T>
166 bool dedupLookup(const ssp<T> &v, std::string &outPath)
167 {
168 auto it = ptr_2_pth.find(v.get());
169 if (it != ptr_2_pth.end())
170 {
171 outPath = it->second.second;
172 return true;
173 }
174 return false;
175 }
176
177 /// Register a shared pointer after writing its data.
178 template <class T>
179 void dedupRegister(const ssp<T> &v, const std::string &path)
180 {
181 ptr_2_pth[v.get()] = {std::static_pointer_cast<void>(v), path};
182 }
183
184 /// Clear all dedup state (call on CloseFile).
186 {
187 ptr_2_pth.clear();
188 pth_2_ssp.clear();
189 }
190
191 public:
192 virtual ~SerializerBase(); // define in CPP
193
194 // Polymorphic RAII base (subclasses hold file handles / H5 IDs):
195 // delete copy / move to prevent slicing and double-close.
196 SerializerBase() = default;
201 /// @brief Open a backing file (H5 file or JSON file depending on subclass).
202 /// @param read `true` for reading, `false` for writing.
203 virtual void OpenFile(const std::string &fName, bool read) = 0;
204 /// @brief Close the backing file, flushing buffers.
205 virtual void CloseFile() = 0;
206 /// @brief Create a sub-path (H5 group / JSON object) at the current location.
207 virtual void CreatePath(const std::string &p) = 0;
208 /// @brief Navigate to an existing path. Supports `/` -separated segments.
209 virtual void GoToPath(const std::string &p) = 0;
210 /// @brief Whether this serializer is per-rank (JSON-file-per-rank) or
211 /// collective (shared H5 file). Controls which API entry points are used.
212 virtual bool IsPerRank() = 0;
213 /// @brief String form of the current path.
214 virtual std::string GetCurrentPath() = 0;
215 /// @brief Names of direct children of the current path.
216 virtual std::set<std::string> ListCurrentPath() = 0;
217 /// @brief Rank index cached by the serializer (relevant for collective I/O).
218 virtual int GetMPIRank() = 0;
219 /// @brief Rank count cached by the serializer.
220 virtual int GetMPISize() = 0;
221 /// @brief MPI context the serializer was opened with.
222 virtual const MPIInfo &getMPI() = 0;
223
224 /// @brief Write a scalar int under `name` at the current path.
225 virtual void WriteInt(const std::string &name, int v) = 0;
226 /// @brief Write a scalar #index under `name`.
227 virtual void WriteIndex(const std::string &name, index v) = 0;
228 /// @brief Write a scalar #real under `name`.
229 virtual void WriteReal(const std::string &name, real v) = 0;
230 /// @brief Write a UTF-8 string under `name`.
231 virtual void WriteString(const std::string &name, const std::string &v) = 0;
232
233 /// @brief Write an index vector (collective for H5). `offset` carries the
234 /// distribution mode (#ArrayGlobalOffset_Parts, explicit offset, etc.).
235 virtual void WriteIndexVector(const std::string &name, const std::vector<index> &v, ArrayGlobalOffset offset) = 0;
236 /// @brief Write a rowsize vector (collective for H5).
237 virtual void WriteRowsizeVector(const std::string &name, const std::vector<rowsize> &v, ArrayGlobalOffset offset) = 0;
238 /// @brief Write a real vector (collective for H5).
239 virtual void WriteRealVector(const std::string &name, const std::vector<real> &v, ArrayGlobalOffset offset) = 0;
240 /// @brief Write a shared index vector; deduplicated across multiple writes
241 /// that share the same `shared_ptr`.
242 virtual void WriteSharedIndexVector(const std::string &name, const ssp<host_device_vector<index>> &v, ArrayGlobalOffset offset) = 0;
243 /// @brief Write a shared rowsize vector; deduplicated across multiple writes.
244 virtual void WriteSharedRowsizeVector(const std::string &name, const ssp<host_device_vector<rowsize>> &v, ArrayGlobalOffset offset) = 0;
245 /// @brief Write a raw byte buffer under `name`. `offset.isDist()` = true
246 /// means the caller provides the exact per-rank slab; otherwise the
247 /// buffer is treated according to the offset's sentinel.
248 virtual void WriteUint8Array(const std::string &name, const uint8_t *data, index size, ArrayGlobalOffset offset) = 0;
249
250 /**
251 * @brief Write a per-rank index vector (replicated name, independent values).
252 * @details Every rank writes its own vector under `name`; in the H5 case
253 * each rank's slab is placed in a separate dataset.
254 * @param name Dataset name (identical on every rank).
255 * @param v Rank-local vector; size may differ between ranks.
256 */
257 virtual void WriteIndexVectorPerRank(const std::string &name, const std::vector<index> &v) = 0;
258 // virtual void WriteIndexVectorParallel(const std::string &name, const host_device_vector<index> &v, ArrayGlobalOffset offset) = 0;
259 // virtual void WriteRowsizeVectorParallel(const std::string &name, const host_device_vector<rowsize> &v, ArrayGlobalOffset offset) = 0;
260 // virtual void WriteRealVectorParallel(const std::string &name, const host_device_vector<real> &v, ArrayGlobalOffset offset) = 0;
261 // virtual void WriteSharedIndexVectorParallel(const std::string &name, const ssp<host_device_vector<index>> &v, ArrayGlobalOffset offset) = 0;
262 // virtual void WriteSharedRowsizeVectorParallel(const std::string &name, const ssp<host_device_vector<rowsize>> &v, ArrayGlobalOffset offset) = 0;
263 // virtual void WriteUint8ArrayParallel(const std::string &name, const uint8_t *data, index size, ArrayGlobalOffset offset) = 0;
264
265 /// @brief Read a scalar int into `v`.
266 virtual void ReadInt(const std::string &name, int &v) = 0;
267 /// @brief Read a scalar #index into `v`.
268 virtual void ReadIndex(const std::string &name, index &v) = 0;
269 /// @brief Read a scalar #real into `v`.
270 virtual void ReadReal(const std::string &name, real &v) = 0;
271 /// @brief Read a UTF-8 string into `v`.
272 virtual void ReadString(const std::string &name, std::string &v) = 0;
273
274 /// Read methods resize the output container and populate it.
275 /// Internally these use a two-pass HDF5 pattern (size query, then data read).
276 /// Both passes are collective. When the local size is 0, a dummy non-null
277 /// pointer is passed to the second pass so the rank participates in H5Dread.
278 /// @{
279 virtual void ReadIndexVector(const std::string &name, std::vector<index> &v, ArrayGlobalOffset &offset) = 0;
280 virtual void ReadRowsizeVector(const std::string &name, std::vector<rowsize> &v, ArrayGlobalOffset &offset) = 0;
281 virtual void ReadRealVector(const std::string &name, std::vector<real> &v, ArrayGlobalOffset &offset) = 0;
282 virtual void ReadSharedIndexVector(const std::string &name, ssp<host_device_vector<index>> &v, ArrayGlobalOffset &offset) = 0;
283 virtual void ReadSharedRowsizeVector(const std::string &name, ssp<host_device_vector<rowsize>> &v, ArrayGlobalOffset &offset) = 0;
284 /// @}
285
286 /// @brief Two-pass byte array read.
287 ///
288 /// Pass 1 (data == nullptr): queries the local element count into `size`
289 /// and resolves `offset`.
290 /// Pass 2 (data != nullptr): reads `size` bytes into `data`.
291 ///
292 /// @warning When `size` is 0 after pass 1, the caller **must** still pass
293 /// a non-null `data` pointer on pass 2 so the rank participates in the
294 /// collective HDF5 read. Use a stack dummy (`uint8_t dummy; ... &dummy`).
295 virtual void ReadUint8Array(const std::string &name, uint8_t *data, index &size, ArrayGlobalOffset &offset) = 0;
296
297 // virtual void ReadIndexVectorParallel(const std::string &name, const host_device_vector<index> &v, ArrayGlobalOffset offset) = 0;
298 // virtual void ReadRowsizeVectorParallel(const std::string &name, const host_device_vector<rowsize> &v, ArrayGlobalOffset offset) = 0;
299 // virtual void ReadRealVectorParallel(const std::string &name, const host_device_vector<real> &v, ArrayGlobalOffset offset) = 0;
300 // virtual void ReadSharedIndexVectorParallel(const std::string &name, const ssp<host_device_vector<index>> v, ArrayGlobalOffset offset) = 0;
301 // virtual void ReadSharedRowsizeVectorParallel(const std::string &name, const ssp<host_device_vector<rowsize>> v, ArrayGlobalOffset offset) = 0;
302 // virtual void ReadUint8ArrayParallel(const std::string &name, const uint8_t *data, index size, ArrayGlobalOffset offset) = 0;
303 };
304
306}
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
Device memory abstraction layer with backend-specific storage and factory creation.
#define DNDS_assert_info(expr, info)
Debug-only assertion with an extra std::string info message.
Definition Errors.hpp:117
MPI wrappers: MPIInfo, collective operations, type mapping, CommStrategy.
Host-device vector types with optional GPU storage and device-side views.
Describes one rank's window into a globally-distributed dataset.
index offset() const
Global offset of this rank's data (or a sentinel value, see Offset_Parts etc.).
ArrayGlobalOffset(index sz, index ofs)
Construct with explicit local size and global offset.
index size() const
Local size this rank owns (in element units of the caller's choosing).
ArrayGlobalOffset operator*(index R) const
Scale the descriptor's element count by R (and the offset too if it is a real offset,...
bool isDist() const
Whether this descriptor carries a real distributed offset (rather than a sentinel like Offset_Parts).
friend std::ostream & operator<<(std::ostream &o, const ArrayGlobalOffset &v)
bool operator==(const ArrayGlobalOffset &other) const
Equality: sentinel offsets compare only by offset, real-offset descriptors compare by the full (size,...
ArrayGlobalOffset operator/(index R) const
Inverse of operator*. Real-offset descriptors scale both size and offset; sentinel descriptors only s...
void CheckMultipleOf(index R) const
Assert that both size and offset are multiples of R.
Abstract interface for reading/writing scalars, vectors, and byte arrays.
virtual bool IsPerRank()=0
Whether this serializer is per-rank (JSON-file-per-rank) or collective (shared H5 file)....
virtual std::set< std::string > ListCurrentPath()=0
Names of direct children of the current path.
virtual void CloseFile()=0
Close the backing file, flushing buffers.
SerializerBase & operator=(SerializerBase &&)=delete
virtual void ReadReal(const std::string &name, real &v)=0
Read a scalar real into v.
void dedupRegister(const ssp< T > &v, const std::string &path)
Register a shared pointer after writing its data.
virtual void WriteIndexVector(const std::string &name, const std::vector< index > &v, ArrayGlobalOffset offset)=0
Write an index vector (collective for H5). offset carries the distribution mode (ArrayGlobalOffset_Pa...
SerializerBase(SerializerBase &&)=delete
virtual void WriteString(const std::string &name, const std::string &v)=0
Write a UTF-8 string under name.
virtual void ReadInt(const std::string &name, int &v)=0
Read a scalar int into v.
std::map< void *, std::pair< std::shared_ptr< void >, std::string > > ptr_2_pth
virtual void ReadIndexVector(const std::string &name, std::vector< index > &v, ArrayGlobalOffset &offset)=0
virtual void ReadString(const std::string &name, std::string &v)=0
Read a UTF-8 string into v.
virtual void CreatePath(const std::string &p)=0
Create a sub-path (H5 group / JSON object) at the current location.
virtual const MPIInfo & getMPI()=0
MPI context the serializer was opened with.
virtual void WriteSharedIndexVector(const std::string &name, const ssp< host_device_vector< index > > &v, ArrayGlobalOffset offset)=0
Write a shared index vector; deduplicated across multiple writes that share the same shared_ptr.
virtual void WriteRealVector(const std::string &name, const std::vector< real > &v, ArrayGlobalOffset offset)=0
Write a real vector (collective for H5).
virtual void WriteSharedRowsizeVector(const std::string &name, const ssp< host_device_vector< rowsize > > &v, ArrayGlobalOffset offset)=0
Write a shared rowsize vector; deduplicated across multiple writes.
virtual void ReadUint8Array(const std::string &name, uint8_t *data, index &size, ArrayGlobalOffset &offset)=0
Two-pass byte array read.
virtual std::string GetCurrentPath()=0
String form of the current path.
bool dedupLookup(const ssp< T > &v, std::string &outPath)
Check if a shared pointer was already written; if so return its path.
SerializerBase(const SerializerBase &)=delete
std::map< std::string, void * > pth_2_ssp
Reverse map for read-side dedup: path -> raw pointer to the ssp local variable.
virtual void ReadRealVector(const std::string &name, std::vector< real > &v, ArrayGlobalOffset &offset)=0
virtual int GetMPIRank()=0
Rank index cached by the serializer (relevant for collective I/O).
virtual void WriteIndex(const std::string &name, index v)=0
Write a scalar index under name.
void dedupClear()
Clear all dedup state (call on CloseFile).
virtual void WriteIndexVectorPerRank(const std::string &name, const std::vector< index > &v)=0
Write a per-rank index vector (replicated name, independent values).
virtual void WriteInt(const std::string &name, int v)=0
Write a scalar int under name at the current path.
virtual void ReadRowsizeVector(const std::string &name, std::vector< rowsize > &v, ArrayGlobalOffset &offset)=0
virtual void WriteUint8Array(const std::string &name, const uint8_t *data, index size, ArrayGlobalOffset offset)=0
Write a raw byte buffer under name. offset.isDist() = true means the caller provides the exact per-ra...
virtual int GetMPISize()=0
Rank count cached by the serializer.
SerializerBase & operator=(const SerializerBase &)=delete
virtual void OpenFile(const std::string &fName, bool read)=0
Open a backing file (H5 file or JSON file depending on subclass).
virtual void ReadSharedRowsizeVector(const std::string &name, ssp< host_device_vector< rowsize > > &v, ArrayGlobalOffset &offset)=0
virtual void WriteReal(const std::string &name, real v)=0
Write a scalar real under name.
virtual void ReadIndex(const std::string &name, index &v)=0
Read a scalar index into v.
virtual void WriteRowsizeVector(const std::string &name, const std::vector< rowsize > &v, ArrayGlobalOffset offset)=0
Write a rowsize vector (collective for H5).
virtual void ReadSharedIndexVector(const std::string &name, ssp< host_device_vector< index > > &v, ArrayGlobalOffset &offset)=0
virtual void GoToPath(const std::string &p)=0
Navigate to an existing path. Supports / -separated segments.
Host + optional device vector of trivially copyable T.
Definition Vector.hpp:217
ssp< SerializerBase > SerializerBaseSSP
DNDS_CONSTANT const index UnInitIndex
Sentinel "not initialised" index value (= INT64_MIN).
Definition Defines.hpp:181
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:112
std::shared_ptr< T > ssp
Shortened alias for std::shared_ptr used pervasively in DNDSR.
Definition Defines.hpp:143
double real
Canonical floating-point scalar used throughout DNDSR (double precision).
Definition Defines.hpp:110
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:231
Eigen::Matrix< real, 5, 1 > v
const tPoint const tPoint const tPoint & p