DNDSR 0.1.0.dev1+gcd065ad
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
SerializerH5.hpp
Go to the documentation of this file.
1#pragma once
2/// @file SerializerH5.hpp
3/// @brief MPI-parallel HDF5 serializer implementing the SerializerBase interface.
4/// @par Unit Test Coverage (test_Serializer.cpp, MPI np=1,2,4)
5/// - Scalar round-trip: WriteInt/ReadInt, WriteIndex/ReadIndex,
6/// WriteReal/ReadReal, WriteString/ReadString
7/// - Vector round-trip: WriteRealVector, WriteIndexVector with explicit
8/// ArrayGlobalOffset; ReadRealVector/ReadIndexVector with ArrayGlobalOffset_Unknown
9/// - Distributed vector: non-uniform per-rank sizes, write with explicit offset,
10/// read with both ArrayGlobalOffset_Unknown (auto-detect from ::rank_offsets)
11/// and explicit offset
12/// - uint8 distributed round-trip: two-pass read (nullptr size query, then read)
13/// - Path operations: CreatePath, GoToPath, GetCurrentPath, ListCurrentPath
14/// (groups materialized by writing content), WriteInt/ReadInt on nested paths
15/// - String round-trip: WriteString/ReadString with fixed-length HDF5 attributes
16/// @par Collective I/O and zero-size partitions
17/// All Read/Write vector and byte-array methods use MPI-collective HDF5 calls.
18/// Every rank must call them in the same order, even when its local element
19/// count is 0 (which happens when nGlobal < nRanks under EvenSplit).
20///
21/// Internally, ReadDataVector uses a two-pass pattern:
22/// - Pass 1 (buf == nullptr): queries dataset size and resolves the offset.
23/// - Pass 2 (buf != nullptr): performs the collective H5Dread.
24///
25/// When local size is 0, std::vector<>::data() / host_device_vector<>::data()
26/// may return nullptr, which would skip the H5Dread block (guarded by
27/// `if (buf != nullptr)`) and hang the other ranks. To prevent this, each
28/// Read*Vector / ReadShared*Vector caller passes a dummy stack pointer when
29/// size == 0. Callers of ReadUint8Array must do the same (see SerializerBase).
30///
31/// H5_ReadDataset and H5_WriteDataset accept nLocal == 0: the hyperslab
32/// selection with count == 0 selects nothing, so no data is transferred,
33/// but the rank still participates in the collective call.
34///
35/// @par Not Yet Tested
36/// - SetChunkAndDeflate impact, SetCollectiveRW impact
37/// - WriteSharedIndexVector / ReadSharedIndexVector (H5 deduplication)
38/// - WriteRowsizeVector / ReadRowsizeVector
39/// - WriteIndexVectorPerRank
40/// - ArrayGlobalOffset_One semantics (rank-0-only write)
41/// - ArrayGlobalOffset_Parts auto-offset
42#include "SerializerBase.hpp"
43
44#include <hdf5.h>
45#include <fstream>
46#include <map>
47#include <set>
48
49namespace DNDS::Serializer
50{
51 /// @brief MPI-parallel HDF5 serializer; all ranks collectively read/write a single .h5 file.
53 {
54 bool reading = true;
55 std::vector<std::string> cPathSplit;
56 std::string cP; // current path
57 // ptr_2_pth and pth_2_ssp are inherited from SerializerBase
58
59 MPIInfo mpi;
60 MPI_Comm commDup{MPI_COMM_NULL};
61
62 hid_t h5file{NULL};
63
64 int64_t chunksize{0};
65 int deflateLevel{0};
66 bool collectiveMetadataRW = true;
67 bool collectiveDataRW = false;
68
69 public:
70 SerializerH5(const MPIInfo &_mpi) : SerializerBase(), mpi(_mpi)
71 {
72 MPI_Comm_dup(mpi.comm, &commDup);
73 }
74
75 void SetChunkAndDeflate(int64_t n_chunksize, int n_deflateLevel)
76 {
77 if (n_deflateLevel > 0)
78 DNDS_assert_info(n_chunksize > 0, "chunksize must be positive when using deflate!");
79 chunksize = n_chunksize;
80 deflateLevel = n_deflateLevel;
81 }
82
83 void SetCollectiveRW(bool metadata, bool data)
84 {
85 collectiveMetadataRW = metadata;
86 collectiveDataRW = data;
87 }
88
89 void OpenFile(const std::string &fName, bool read) override;
90 void CloseFile() override;
92 void CreatePath(const std::string &p) override;
93 void GoToPath(const std::string &p) override;
94 bool IsPerRank() override { return false; }
95 std::string GetCurrentPath() override;
96 std::set<std::string> ListCurrentPath() override;
97 int GetMPIRank() override { return mpi.rank; }
98 int GetMPISize() override { return mpi.size; }
99 const MPIInfo &getMPI() override { return mpi; }
100
101 void WriteInt(const std::string &name, int v) override;
102 void WriteIndex(const std::string &name, index v) override;
103 void WriteReal(const std::string &name, real v) override;
104 void WriteString(const std::string &name, const std::string &v) override;
105
106 void WriteIndexVector(const std::string &name, const std::vector<index> &v, ArrayGlobalOffset offset) override;
107 void WriteRowsizeVector(const std::string &name, const std::vector<rowsize> &v, ArrayGlobalOffset offset) override;
108 void WriteRealVector(const std::string &name, const std::vector<real> &v, ArrayGlobalOffset offset) override;
109 void WriteSharedIndexVector(const std::string &name, const ssp<host_device_vector<index>> &v, ArrayGlobalOffset offset) override;
110 void WriteSharedRowsizeVector(const std::string &name, const ssp<host_device_vector<rowsize>> &v, ArrayGlobalOffset offset) override;
111
112 void WriteUint8Array(const std::string &name, const uint8_t *data, index size, ArrayGlobalOffset offset) override;
113
114 void WriteIndexVectorPerRank(const std::string &name, const std::vector<index> &v) override
115 {
116 this->WriteIndexVector(name, v, ArrayGlobalOffset{index(mpi.size) * index(v.size()), index(mpi.rank) * index(v.size())});
117 }
118
119 void ReadInt(const std::string &name, int &v) override;
120 void ReadIndex(const std::string &name, index &v) override;
121 void ReadReal(const std::string &name, real &v) override;
122 void ReadString(const std::string &name, std::string &v) override;
123
124 void ReadIndexVector(const std::string &name, std::vector<index> &v, ArrayGlobalOffset &offset) override;
125 void ReadRowsizeVector(const std::string &name, std::vector<rowsize> &v, ArrayGlobalOffset &offset) override;
126 void ReadRealVector(const std::string &name, std::vector<real> &v, ArrayGlobalOffset &offset) override;
127 void ReadSharedIndexVector(const std::string &name, ssp<host_device_vector<index>> &v, ArrayGlobalOffset &offset) override;
128 void ReadSharedRowsizeVector(const std::string &name, ssp<host_device_vector<rowsize>> &v, ArrayGlobalOffset &offset) override;
129
130 void ReadUint8Array(const std::string &name, uint8_t *data, index &size, ArrayGlobalOffset &offset) override;
131
132 ~SerializerH5() override
133 {
135 MPI_Comm_free(&commDup);
136 }
137 };
138}
#define DNDS_assert_info(expr, info)
Debug-only assertion with an extra std::string info message.
Definition Errors.hpp:113
Base types and abstract interface for array serialization.
Describes one rank's window into a globally-distributed dataset.
Abstract interface for reading/writing scalars, vectors, and byte arrays.
MPI-parallel HDF5 serializer; all ranks collectively read/write a single .h5 file.
void WriteIndex(const std::string &name, index v) override
Write a scalar index under name.
void ReadRealVector(const std::string &name, std::vector< real > &v, ArrayGlobalOffset &offset) override
std::string GetCurrentPath() override
String form of the current path.
void WriteSharedIndexVector(const std::string &name, const ssp< host_device_vector< index > > &v, ArrayGlobalOffset offset) override
Write a shared index vector; deduplicated across multiple writes that share the same shared_ptr.
void ReadSharedRowsizeVector(const std::string &name, ssp< host_device_vector< rowsize > > &v, ArrayGlobalOffset &offset) override
void ReadIndexVector(const std::string &name, std::vector< index > &v, ArrayGlobalOffset &offset) override
void ReadIndex(const std::string &name, index &v) override
Read a scalar index into v.
void WriteInt(const std::string &name, int v) override
Write a scalar int under name at the current path.
void WriteUint8Array(const std::string &name, const uint8_t *data, index size, ArrayGlobalOffset offset) override
Write a raw byte buffer under name. offset.isDist() = true means the caller provides the exact per-ra...
std::set< std::string > ListCurrentPath() override
Names of direct children of the current path.
int GetMPISize() override
Rank count cached by the serializer.
void ReadInt(const std::string &name, int &v) override
Read a scalar int into v.
SerializerH5(const MPIInfo &_mpi)
void OpenFile(const std::string &fName, bool read) override
Open a backing file (H5 file or JSON file depending on subclass).
void WriteRealVector(const std::string &name, const std::vector< real > &v, ArrayGlobalOffset offset) override
Write a real vector (collective for H5).
const MPIInfo & getMPI() override
MPI context the serializer was opened with.
void GoToPath(const std::string &p) override
Navigate to an existing path. Supports / -separated segments.
void WriteReal(const std::string &name, real v) override
Write a scalar real under name.
void ReadReal(const std::string &name, real &v) override
Read a scalar real into v.
bool IsPerRank() override
Whether this serializer is per-rank (JSON-file-per-rank) or collective (shared H5 file)....
void ReadRowsizeVector(const std::string &name, std::vector< rowsize > &v, ArrayGlobalOffset &offset) override
void CloseFile() override
Close the backing file, flushing buffers.
int GetMPIRank() override
Rank index cached by the serializer (relevant for collective I/O).
void WriteString(const std::string &name, const std::string &v) override
Write a UTF-8 string under name.
void CreatePath(const std::string &p) override
Create a sub-path (H5 group / JSON object) at the current location.
void WriteIndexVectorPerRank(const std::string &name, const std::vector< index > &v) override
Write a per-rank index vector (replicated name, independent values).
void SetChunkAndDeflate(int64_t n_chunksize, int n_deflateLevel)
void SetCollectiveRW(bool metadata, bool data)
void ReadString(const std::string &name, std::string &v) override
Read a UTF-8 string into v.
void WriteSharedRowsizeVector(const std::string &name, const ssp< host_device_vector< rowsize > > &v, ArrayGlobalOffset offset) override
Write a shared rowsize vector; deduplicated across multiple writes.
void WriteRowsizeVector(const std::string &name, const std::vector< rowsize > &v, ArrayGlobalOffset offset) override
Write a rowsize vector (collective for H5).
void ReadSharedIndexVector(const std::string &name, ssp< host_device_vector< index > > &v, ArrayGlobalOffset &offset) override
void WriteIndexVector(const std::string &name, const std::vector< index > &v, ArrayGlobalOffset offset) override
Write an index vector (collective for H5). offset carries the distribution mode (ArrayGlobalOffset_Pa...
void ReadUint8Array(const std::string &name, uint8_t *data, index &size, ArrayGlobalOffset &offset) override
Two-pass byte array read.
Host + optional device vector of trivially copyable T.
Definition Vector.hpp:217
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:107
std::shared_ptr< T > ssp
Shortened alias for std::shared_ptr used pervasively in DNDSR.
Definition Defines.hpp:138
double real
Canonical floating-point scalar used throughout DNDSR (double precision).
Definition Defines.hpp:105
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:215
Eigen::Matrix< real, 5, 1 > v