DNDSR 0.1.0.dev1+gcd065ad
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
SerializerBase.hpp
Go to the documentation of this file.
1#pragma once
2/// @file SerializerBase.hpp
3/// @brief Base types and abstract interface for array serialization.
4
5#include "Defines.hpp"
6#include "MPI.hpp"
7#include <limits>
8#include <set>
9#include "DeviceStorage.hpp"
10#include "Vector.hpp"
11
12namespace DNDS::Serializer
13{
14 /// @brief Sentinel: "offset = Parts", indicating each rank writes its own slab.
15 static const index Offset_Parts = -1;
16 /// @brief Sentinel: "offset = One", indicating the first rank owns the whole dataset.
17 static const index Offset_One = -2;
18 /// @brief Sentinel: "even-split read" (see @ref ArrayGlobalOffset_EvenSplit).
19 static const index Offset_EvenSplit = -3;
20 /// @brief Sentinel: offset unknown / uninitialised.
21 static const index Offset_Unknown = UnInitIndex;
22
23 /**
24 * @brief Describes one rank's window into a globally-distributed dataset.
25 *
26 * @details Stores a `(localSize, globalOffset)` pair. The offset is
27 * overloaded to carry the special sentinels @ref Offset_Parts / @ref Offset_One /
28 * @ref Offset_EvenSplit / @ref Offset_Unknown (negative values). Multiplication /
29 * division are defined so that byte offsets can be derived from element
30 * offsets (`offset * sizeof(T)`) with overflow guards.
31 */
33 {
34 index _size{0};
35 index _offset{0};
36
37 public:
38 static_assert(UnInitIndex < 0);
39 /// @brief Construct with explicit local size and global offset.
40 ArrayGlobalOffset(index __size, index __offset) : _size(__size), _offset(__offset) {}
41
42 /// @brief Local size this rank owns (in element units of the caller's choosing).
43 [[nodiscard]] index size() const { return _size; }
44 /// @brief Global offset of this rank's data (or a sentinel value, see
45 /// @ref Offset_Parts etc.).
46 [[nodiscard]] index offset() const { return _offset; }
47
48 /// @brief Scale the descriptor's element count by `R` (and the offset
49 /// too if it is a real offset, not a sentinel). Used for byte-level
50 /// offsets: `elemOffset * sizeof(T)`.
52 {
53 if (_offset >= 0)
54 {
55 DNDS_assert_info(R == 0 || _size <= std::numeric_limits<index>::max() / R,
56 "Overflow in ArrayGlobalOffset size multiplication");
57 DNDS_assert_info(R == 0 || _offset <= std::numeric_limits<index>::max() / R,
58 "Overflow in ArrayGlobalOffset offset multiplication");
59 return ArrayGlobalOffset{_size * R, _offset * R};
60 }
61 else
62 return ArrayGlobalOffset{_size * R, _offset};
63 }
64
65 /// @brief Inverse of #operator*. Real-offset descriptors scale both
66 /// size and offset; sentinel descriptors only scale the size.
68 {
69 if (_offset >= 0)
70 return ArrayGlobalOffset{_size / R, _offset / R};
71 else
72 return ArrayGlobalOffset{_size / R, _offset};
73 }
74
75 /// @brief Assert that both size and offset are multiples of `R`.
76 /// @details Used when translating between element and byte offsets.
77 void CheckMultipleOf(index R) const
78 {
79 if (_offset >= 0)
80 {
81 DNDS_assert_info(_size % R == 0, fmt::format("_size [{}] must be multiple of R [{}]", _size, R));
82 DNDS_assert_info(_offset % R == 0, fmt::format("_offset [{}] must be multiple of R [{}]", _offset, R));
83 }
84 }
85
86 /// @brief Equality: sentinel offsets compare only by offset,
87 /// real-offset descriptors compare by the full `(size, offset)` pair.
88 bool operator==(const ArrayGlobalOffset &other) const
89 {
90 if (_offset >= 0)
91 return _size == other._size && _offset == other._offset;
92 else
93 return _offset == other._offset;
94 }
95
96 /// @brief Pretty-printed representation for logging.
97 operator std::string() const
98 {
99 return fmt::format("ArrayGlobalOffset{{size: {}, offset: {}}}", _size, _offset);
100 }
101
102 /// @brief Whether this descriptor carries a real distributed offset
103 /// (rather than a sentinel like @ref Offset_Parts).
104 [[nodiscard]] bool isDist() const
105 {
106 return _offset >= 0;
107 }
108
109 friend std::ostream &operator<<(std::ostream &o, const ArrayGlobalOffset &v)
110 {
111 o << (std::string)(v);
112 return o;
113 }
114 };
115
116 /// @brief Sentinel: offset unknown / uninitialised.
117 static const ArrayGlobalOffset ArrayGlobalOffset_Unknown = ArrayGlobalOffset{0, Offset_Unknown};
118 /// @brief Sentinel: rank 0 owns the whole dataset.
119 static const ArrayGlobalOffset ArrayGlobalOffset_One = ArrayGlobalOffset{0, Offset_One};
120 /// @brief Sentinel: each rank writes its slab; serializer infers offsets via MPI_Scan.
121 static const ArrayGlobalOffset ArrayGlobalOffset_Parts = ArrayGlobalOffset{0, Offset_Parts};
122 /// @brief Sentinel: even-split read (each rank reads ~N_global/nRanks rows
123 /// starting at `rank * N_global / nRanks`). Used during repartitioned restarts.
124 static const ArrayGlobalOffset ArrayGlobalOffset_EvenSplit = ArrayGlobalOffset{0, Offset_EvenSplit};
125
126 /// @brief Abstract interface for reading/writing scalars, vectors, and byte arrays.
127 ///
128 /// ## Collective semantics (SerializerH5)
129 ///
130 /// For the HDF5 implementation, all Read/Write vector and array methods are
131 /// **MPI-collective**: every rank must call the same method in the same order,
132 /// even if a rank reads/writes 0 elements. Failing to participate causes a
133 /// hang because HDF5 collective I/O synchronizes across the communicator.
134 ///
135 /// ## Zero-size reads
136 ///
137 /// Some ranks may legitimately have 0 elements to read (e.g., when
138 /// `nGlobal < nRanks` in an even-split read). The Read*Vector and
139 /// ReadShared*Vector implementations handle this by passing a dummy
140 /// non-null pointer to the internal HDF5 read when the output container
141 /// is empty, so that `std::vector<>::data()` returning nullptr does not
142 /// cause the rank to skip the collective H5Dread call.
143 ///
144 /// @warning **ReadUint8Array** uses an explicit two-pass pattern: the
145 /// caller first calls with `data == nullptr` to query the size, then
146 /// calls again with a buffer. When the queried size is 0, the caller
147 /// must still pass a non-null `data` pointer on the second call so
148 /// that the collective H5Dread is not skipped. Use a stack dummy:
149 /// @code
150 /// uint8_t dummy;
151 /// ser->ReadUint8Array(name, bufferSize == 0 ? &dummy : buf, bufferSize, offset);
152 /// @endcode
154 {
155 protected:
156 /// Shared-pointer deduplication map: raw pointer -> (kept-alive shared_ptr, H5/JSON path).
157 /// By storing the shared_ptr itself we prevent the pointed-to memory from being
158 /// freed and its address reused, which would cause false dedup hits.
159 std::map<void *, std::pair<std::shared_ptr<void>, std::string>> ptr_2_pth;
160
161 /// Reverse map for read-side dedup: path -> raw pointer to the ssp local variable.
162 std::map<std::string, void *> pth_2_ssp;
163
164 /// Check if a shared pointer was already written; if so return its path.
165 template <class T>
166 bool dedupLookup(const ssp<T> &v, std::string &outPath)
167 {
168 auto it = ptr_2_pth.find(v.get());
169 if (it != ptr_2_pth.end())
170 {
171 outPath = it->second.second;
172 return true;
173 }
174 return false;
175 }
176
177 /// Register a shared pointer after writing its data.
178 template <class T>
179 void dedupRegister(const ssp<T> &v, const std::string &path)
180 {
181 ptr_2_pth[v.get()] = {std::static_pointer_cast<void>(v), path};
182 }
183
184 /// Clear all dedup state (call on CloseFile).
186 {
187 ptr_2_pth.clear();
188 pth_2_ssp.clear();
189 }
190
191 public:
192 virtual ~SerializerBase(); // define in CPP
193 /// @brief Open a backing file (H5 file or JSON file depending on subclass).
194 /// @param read `true` for reading, `false` for writing.
195 virtual void OpenFile(const std::string &fName, bool read) = 0;
196 /// @brief Close the backing file, flushing buffers.
197 virtual void CloseFile() = 0;
198 /// @brief Create a sub-path (H5 group / JSON object) at the current location.
199 virtual void CreatePath(const std::string &p) = 0;
200 /// @brief Navigate to an existing path. Supports `/` -separated segments.
201 virtual void GoToPath(const std::string &p) = 0;
202 /// @brief Whether this serializer is per-rank (JSON-file-per-rank) or
203 /// collective (shared H5 file). Controls which API entry points are used.
204 virtual bool IsPerRank() = 0;
205 /// @brief String form of the current path.
206 virtual std::string GetCurrentPath() = 0;
207 /// @brief Names of direct children of the current path.
208 virtual std::set<std::string> ListCurrentPath() = 0;
209 /// @brief Rank index cached by the serializer (relevant for collective I/O).
210 virtual int GetMPIRank() = 0;
211 /// @brief Rank count cached by the serializer.
212 virtual int GetMPISize() = 0;
213 /// @brief MPI context the serializer was opened with.
214 virtual const MPIInfo& getMPI() = 0;
215
216 /// @brief Write a scalar int under `name` at the current path.
217 virtual void WriteInt(const std::string &name, int v) = 0;
218 /// @brief Write a scalar #index under `name`.
219 virtual void WriteIndex(const std::string &name, index v) = 0;
220 /// @brief Write a scalar #real under `name`.
221 virtual void WriteReal(const std::string &name, real v) = 0;
222 /// @brief Write a UTF-8 string under `name`.
223 virtual void WriteString(const std::string &name, const std::string &v) = 0;
224
225 /// @brief Write an index vector (collective for H5). `offset` carries the
226 /// distribution mode (#ArrayGlobalOffset_Parts, explicit offset, etc.).
227 virtual void WriteIndexVector(const std::string &name, const std::vector<index> &v, ArrayGlobalOffset offset) = 0;
228 /// @brief Write a rowsize vector (collective for H5).
229 virtual void WriteRowsizeVector(const std::string &name, const std::vector<rowsize> &v, ArrayGlobalOffset offset) = 0;
230 /// @brief Write a real vector (collective for H5).
231 virtual void WriteRealVector(const std::string &name, const std::vector<real> &v, ArrayGlobalOffset offset) = 0;
232 /// @brief Write a shared index vector; deduplicated across multiple writes
233 /// that share the same `shared_ptr`.
234 virtual void WriteSharedIndexVector(const std::string &name, const ssp<host_device_vector<index>> &v, ArrayGlobalOffset offset) = 0;
235 /// @brief Write a shared rowsize vector; deduplicated across multiple writes.
236 virtual void WriteSharedRowsizeVector(const std::string &name, const ssp<host_device_vector<rowsize>> &v, ArrayGlobalOffset offset) = 0;
237 /// @brief Write a raw byte buffer under `name`. `offset.isDist()` = true
238 /// means the caller provides the exact per-rank slab; otherwise the
239 /// buffer is treated according to the offset's sentinel.
240 virtual void WriteUint8Array(const std::string &name, const uint8_t *data, index size, ArrayGlobalOffset offset) = 0;
241
242 /**
243 * @brief Write a per-rank index vector (replicated name, independent values).
244 * @details Every rank writes its own vector under `name`; in the H5 case
245 * each rank's slab is placed in a separate dataset.
246 * @param name Dataset name (identical on every rank).
247 * @param v Rank-local vector; size may differ between ranks.
248 */
249 virtual void WriteIndexVectorPerRank(const std::string &name, const std::vector<index> &v) = 0;
250 // virtual void WriteIndexVectorParallel(const std::string &name, const host_device_vector<index> &v, ArrayGlobalOffset offset) = 0;
251 // virtual void WriteRowsizeVectorParallel(const std::string &name, const host_device_vector<rowsize> &v, ArrayGlobalOffset offset) = 0;
252 // virtual void WriteRealVectorParallel(const std::string &name, const host_device_vector<real> &v, ArrayGlobalOffset offset) = 0;
253 // virtual void WriteSharedIndexVectorParallel(const std::string &name, const ssp<host_device_vector<index>> &v, ArrayGlobalOffset offset) = 0;
254 // virtual void WriteSharedRowsizeVectorParallel(const std::string &name, const ssp<host_device_vector<rowsize>> &v, ArrayGlobalOffset offset) = 0;
255 // virtual void WriteUint8ArrayParallel(const std::string &name, const uint8_t *data, index size, ArrayGlobalOffset offset) = 0;
256
257 /// @brief Read a scalar int into `v`.
258 virtual void ReadInt(const std::string &name, int &v) = 0;
259 /// @brief Read a scalar #index into `v`.
260 virtual void ReadIndex(const std::string &name, index &v) = 0;
261 /// @brief Read a scalar #real into `v`.
262 virtual void ReadReal(const std::string &name, real &v) = 0;
263 /// @brief Read a UTF-8 string into `v`.
264 virtual void ReadString(const std::string &name, std::string &v) = 0;
265
266 /// Read methods resize the output container and populate it.
267 /// Internally these use a two-pass HDF5 pattern (size query, then data read).
268 /// Both passes are collective. When the local size is 0, a dummy non-null
269 /// pointer is passed to the second pass so the rank participates in H5Dread.
270 /// @{
271 virtual void ReadIndexVector(const std::string &name, std::vector<index> &v, ArrayGlobalOffset &offset) = 0;
272 virtual void ReadRowsizeVector(const std::string &name, std::vector<rowsize> &v, ArrayGlobalOffset &offset) = 0;
273 virtual void ReadRealVector(const std::string &name, std::vector<real> &v, ArrayGlobalOffset &offset) = 0;
274 virtual void ReadSharedIndexVector(const std::string &name, ssp<host_device_vector<index>> &v, ArrayGlobalOffset &offset) = 0;
275 virtual void ReadSharedRowsizeVector(const std::string &name, ssp<host_device_vector<rowsize>> &v, ArrayGlobalOffset &offset) = 0;
276 /// @}
277
278 /// @brief Two-pass byte array read.
279 ///
280 /// Pass 1 (data == nullptr): queries the local element count into `size`
281 /// and resolves `offset`.
282 /// Pass 2 (data != nullptr): reads `size` bytes into `data`.
283 ///
284 /// @warning When `size` is 0 after pass 1, the caller **must** still pass
285 /// a non-null `data` pointer on pass 2 so the rank participates in the
286 /// collective HDF5 read. Use a stack dummy (`uint8_t dummy; ... &dummy`).
287 virtual void ReadUint8Array(const std::string &name, uint8_t *data, index &size, ArrayGlobalOffset &offset) = 0;
288
289 // virtual void ReadIndexVectorParallel(const std::string &name, const host_device_vector<index> &v, ArrayGlobalOffset offset) = 0;
290 // virtual void ReadRowsizeVectorParallel(const std::string &name, const host_device_vector<rowsize> &v, ArrayGlobalOffset offset) = 0;
291 // virtual void ReadRealVectorParallel(const std::string &name, const host_device_vector<real> &v, ArrayGlobalOffset offset) = 0;
292 // virtual void ReadSharedIndexVectorParallel(const std::string &name, const ssp<host_device_vector<index>> v, ArrayGlobalOffset offset) = 0;
293 // virtual void ReadSharedRowsizeVectorParallel(const std::string &name, const ssp<host_device_vector<rowsize>> v, ArrayGlobalOffset offset) = 0;
294 // virtual void ReadUint8ArrayParallel(const std::string &name, const uint8_t *data, index size, ArrayGlobalOffset offset) = 0;
295 };
296
298}
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
Device memory abstraction layer with backend-specific storage and factory creation.
#define DNDS_assert_info(expr, info)
Debug-only assertion with an extra std::string info message.
Definition Errors.hpp:113
MPI wrappers: MPIInfo, collective operations, type mapping, CommStrategy.
Host-device vector types with optional GPU storage and device-side views.
Describes one rank's window into a globally-distributed dataset.
index offset() const
Global offset of this rank's data (or a sentinel value, see Offset_Parts etc.).
index size() const
Local size this rank owns (in element units of the caller's choosing).
ArrayGlobalOffset operator*(index R) const
Scale the descriptor's element count by R (and the offset too if it is a real offset,...
bool isDist() const
Whether this descriptor carries a real distributed offset (rather than a sentinel like Offset_Parts).
friend std::ostream & operator<<(std::ostream &o, const ArrayGlobalOffset &v)
ArrayGlobalOffset(index __size, index __offset)
Construct with explicit local size and global offset.
bool operator==(const ArrayGlobalOffset &other) const
Equality: sentinel offsets compare only by offset, real-offset descriptors compare by the full (size,...
ArrayGlobalOffset operator/(index R) const
Inverse of operator*. Real-offset descriptors scale both size and offset; sentinel descriptors only s...
void CheckMultipleOf(index R) const
Assert that both size and offset are multiples of R.
Abstract interface for reading/writing scalars, vectors, and byte arrays.
virtual bool IsPerRank()=0
Whether this serializer is per-rank (JSON-file-per-rank) or collective (shared H5 file)....
virtual std::set< std::string > ListCurrentPath()=0
Names of direct children of the current path.
virtual void CloseFile()=0
Close the backing file, flushing buffers.
virtual void ReadReal(const std::string &name, real &v)=0
Read a scalar real into v.
void dedupRegister(const ssp< T > &v, const std::string &path)
Register a shared pointer after writing its data.
virtual void WriteIndexVector(const std::string &name, const std::vector< index > &v, ArrayGlobalOffset offset)=0
Write an index vector (collective for H5). offset carries the distribution mode (ArrayGlobalOffset_Pa...
virtual void WriteString(const std::string &name, const std::string &v)=0
Write a UTF-8 string under name.
virtual void ReadInt(const std::string &name, int &v)=0
Read a scalar int into v.
std::map< void *, std::pair< std::shared_ptr< void >, std::string > > ptr_2_pth
virtual void ReadIndexVector(const std::string &name, std::vector< index > &v, ArrayGlobalOffset &offset)=0
virtual void ReadString(const std::string &name, std::string &v)=0
Read a UTF-8 string into v.
virtual void CreatePath(const std::string &p)=0
Create a sub-path (H5 group / JSON object) at the current location.
virtual const MPIInfo & getMPI()=0
MPI context the serializer was opened with.
virtual void WriteSharedIndexVector(const std::string &name, const ssp< host_device_vector< index > > &v, ArrayGlobalOffset offset)=0
Write a shared index vector; deduplicated across multiple writes that share the same shared_ptr.
virtual void WriteRealVector(const std::string &name, const std::vector< real > &v, ArrayGlobalOffset offset)=0
Write a real vector (collective for H5).
virtual void WriteSharedRowsizeVector(const std::string &name, const ssp< host_device_vector< rowsize > > &v, ArrayGlobalOffset offset)=0
Write a shared rowsize vector; deduplicated across multiple writes.
virtual void ReadUint8Array(const std::string &name, uint8_t *data, index &size, ArrayGlobalOffset &offset)=0
Two-pass byte array read.
virtual std::string GetCurrentPath()=0
String form of the current path.
bool dedupLookup(const ssp< T > &v, std::string &outPath)
Check if a shared pointer was already written; if so return its path.
std::map< std::string, void * > pth_2_ssp
Reverse map for read-side dedup: path -> raw pointer to the ssp local variable.
virtual void ReadRealVector(const std::string &name, std::vector< real > &v, ArrayGlobalOffset &offset)=0
virtual int GetMPIRank()=0
Rank index cached by the serializer (relevant for collective I/O).
virtual void WriteIndex(const std::string &name, index v)=0
Write a scalar index under name.
void dedupClear()
Clear all dedup state (call on CloseFile).
virtual void WriteIndexVectorPerRank(const std::string &name, const std::vector< index > &v)=0
Write a per-rank index vector (replicated name, independent values).
virtual void WriteInt(const std::string &name, int v)=0
Write a scalar int under name at the current path.
virtual void ReadRowsizeVector(const std::string &name, std::vector< rowsize > &v, ArrayGlobalOffset &offset)=0
virtual void WriteUint8Array(const std::string &name, const uint8_t *data, index size, ArrayGlobalOffset offset)=0
Write a raw byte buffer under name. offset.isDist() = true means the caller provides the exact per-ra...
virtual int GetMPISize()=0
Rank count cached by the serializer.
virtual void OpenFile(const std::string &fName, bool read)=0
Open a backing file (H5 file or JSON file depending on subclass).
virtual void ReadSharedRowsizeVector(const std::string &name, ssp< host_device_vector< rowsize > > &v, ArrayGlobalOffset &offset)=0
virtual void WriteReal(const std::string &name, real v)=0
Write a scalar real under name.
virtual void ReadIndex(const std::string &name, index &v)=0
Read a scalar index into v.
virtual void WriteRowsizeVector(const std::string &name, const std::vector< rowsize > &v, ArrayGlobalOffset offset)=0
Write a rowsize vector (collective for H5).
virtual void ReadSharedIndexVector(const std::string &name, ssp< host_device_vector< index > > &v, ArrayGlobalOffset &offset)=0
virtual void GoToPath(const std::string &p)=0
Navigate to an existing path. Supports / -separated segments.
Host + optional device vector of trivially copyable T.
Definition Vector.hpp:217
ssp< SerializerBase > SerializerBaseSSP
DNDS_CONSTANT const index UnInitIndex
Sentinel "not initialised" index value (= INT64_MIN).
Definition Defines.hpp:176
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:107
std::shared_ptr< T > ssp
Shortened alias for std::shared_ptr used pervasively in DNDSR.
Definition Defines.hpp:138
double real
Canonical floating-point scalar used throughout DNDSR (double precision).
Definition Defines.hpp:105
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:215
Eigen::Matrix< real, 5, 1 > v