2Functional test for ArrayPair redistribution via Euler solver restart.
4The test verifies that DOF data written in a restart file by one MPI partition
5can be correctly read by a different partition (different np), producing
6identical solver behaviour.
9 1. Run the Euler IV solver for 20 time steps with np=2, writing an H5 restart.
10 2. Load that restart and immediately write it back at np=2 (reference).
11 3. Load that restart and immediately write it back at np=3 (redistributed read).
12 4. Compare the two restart H5 files: the DOF data must match to machine precision.
14The file contains three test variants: same-np reseed, cross-np redistribution,
15and a large-mesh multi-np (np=4 → np=4..8) case.
18 pytest test/Euler/test_restart_redistribute.py -s
20 python test/Euler/test_restart_redistribute.py
38SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
39PROJECT_ROOT = os.path.realpath(os.path.join(SCRIPT_DIR,
"..",
".."))
40BUILD_DIR = os.path.join(PROJECT_ROOT,
"build")
41EULER_EXE = os.path.join(BUILD_DIR,
"app",
"euler.exe")
42BASE_CONFIG = os.path.join(PROJECT_ROOT,
"cases",
"euler",
"euler_config_IV.json")
43DEFAULT_CONFIG = os.path.join(PROJECT_ROOT,
"cases",
"euler",
"euler_default_config.json")
44MESH_SMALL = os.path.join(PROJECT_ROOT,
"data",
"mesh",
"IV10_10.cgns")
45MESH_LARGE = os.path.join(PROJECT_ROOT,
"data",
"mesh",
"IV10_20.cgns")
49 """Remove // comments from JSON-with-comments (DNDS convention)."""
50 return re.sub(
r"//.*",
"", text)
59 with open(path,
"w")
as f:
60 json.dump(obj, f, indent=4)
63def _run_solver(np_count, config_path, work_dir, overrides=None, timeout=300):
64 """Run the Euler solver via mpirun and return the process result."""
66 "mpirun",
"--oversubscribe",
"-np", str(np_count),
67 EULER_EXE, config_path,
70 for k, v
in overrides:
71 cmd.extend([
"-k", k,
"-v", v])
73 env = os.environ.copy()
74 result = subprocess.run(
75 cmd, cwd=work_dir, timeout=timeout,
76 stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
79 stdout_text = result.stdout.decode(
"utf-8", errors=
"replace")
80 if result.returncode != 0:
81 print(f
"=== SOLVER FAILED (np={np_count}) ===")
82 print(stdout_text[-4000:])
83 return result, stdout_text
87 """Read the DOF 'u' data from a restart H5 file.
89 Returns (origIndex, data) where:
90 - origIndex: 1-D array of original cell indices (global key)
91 - data: 2-D array of DOF values, shape (nGlobal, nVars)
93 with h5py.File(h5_path,
"r")
as f:
98 orig_idx = f[
"u"][
"origIndex"][:]
100 father_grp = f[
"u"][
"father"]
101 if "data" in father_grp:
102 raw_data = father_grp[
"data"][:]
103 elif "array" in father_grp
and "data" in father_grp[
"array"]:
104 raw_data = father_grp[
"array"][
"data"][:]
106 raise KeyError(f
"Cannot find data in {h5_path}: /u/father/ contains {list(father_grp.keys())}")
108 n_global = len(orig_idx)
109 assert raw_data.size % n_global == 0, (
110 f
"data size {raw_data.size} not divisible by nGlobal {n_global}"
112 n_vars = raw_data.size // n_global
113 data = raw_data.reshape(n_global, n_vars)
114 return orig_idx, data
118 """Reorder data rows by original index so that global ordering is canonical."""
119 order = np.argsort(orig_idx)
120 return orig_idx[order], data[order]
126@pytest.fixture(scope="module")
128 """Create a temporary working directory for the test."""
129 d = tempfile.mkdtemp(prefix=
"dnds_test_redist_")
132 shutil.rmtree(d, ignore_errors=
True)
136 """Config for the initial 20-step run (produces the restart)."""
140 cfg[
"timeMarchControl"][
"nTimeStep"] = 20
141 cfg[
"timeMarchControl"][
"tEnd"] = 1e10
142 cfg[
"timeMarchControl"][
"dtImplicit"] = 1.25e-3
143 cfg[
"timeMarchControl"][
"odeCode"] = 0
144 cfg[
"timeMarchControl"][
"steadyQuit"] =
False
145 cfg[
"timeMarchControl"][
"useRestart"] =
False
148 cfg[
"outputControl"][
"nDataOut"] = 1000000
149 cfg[
"outputControl"][
"nDataOutC"] = 1000000
150 cfg[
"outputControl"][
"nConsoleCheck"] = 5
151 cfg[
"outputControl"][
"dataOutAtInit"] =
False
153 cfg[
"outputControl"][
"nRestartOut"] = 20
154 cfg[
"outputControl"][
"nRestartOutC"] = 1000000
157 cfg[
"dataIOControl"][
"meshFile"] = mesh_file
158 out_base = os.path.join(work_dir,
"step1",
"out")
159 os.makedirs(os.path.dirname(out_base), exist_ok=
True)
160 cfg[
"dataIOControl"][
"outPltName"] = out_base
163 cfg[
"dataIOControl"][
"restartWriter"] = {
165 "hdfDeflateLevel": 0,
167 "hdfCollOnMeta":
True,
168 "hdfCollOnData":
False,
169 "jsonBinaryDeflateLevel": 5,
170 "jsonUseCodecOnUInt8":
True,
174 cfg[
"vfvSettings"][
"maxOrder"] = 1
175 cfg[
"vfvSettings"][
"intOrder"] = 1
178 cfg[
"dataIOControl"][
"meshDirectBisect"] = 0
179 cfg[
"dataIOControl"][
"meshReorderCells"] =
False
181 path = os.path.join(work_dir,
"step1_config.json")
187 reorder_cells=False, mesh_bisect=0, mesh_file=MESH_SMALL):
188 """Config for the 1-step restart run."""
192 cfg[
"timeMarchControl"][
"nTimeStep"] = 0
193 cfg[
"timeMarchControl"][
"tEnd"] = 1e10
194 cfg[
"timeMarchControl"][
"dtImplicit"] = 1.25e-3
195 cfg[
"timeMarchControl"][
"odeCode"] = 0
196 cfg[
"timeMarchControl"][
"steadyQuit"] =
False
197 cfg[
"timeMarchControl"][
"useRestart"] =
True
200 cfg[
"restartState"] = {
204 "lastRestartFile": restart_h5_path,
205 "otherRestartFile":
"",
206 "otherRestartStoreDim": [0],
210 cfg[
"outputControl"][
"nDataOut"] = 1000000
211 cfg[
"outputControl"][
"nDataOutC"] = 1000000
212 cfg[
"outputControl"][
"nConsoleCheck"] = 1
213 cfg[
"outputControl"][
"dataOutAtInit"] =
False
214 cfg[
"outputControl"][
"restartOutAtInit"] =
True
215 cfg[
"outputControl"][
"nRestartOut"] = 1000000
216 cfg[
"outputControl"][
"nRestartOutC"] = 1000000
219 cfg[
"dataIOControl"][
"meshFile"] = mesh_file
220 out_base = os.path.join(work_dir, f
"step2_{tag}",
"out")
221 os.makedirs(os.path.dirname(out_base), exist_ok=
True)
222 cfg[
"dataIOControl"][
"outPltName"] = out_base
225 cfg[
"dataIOControl"][
"restartWriter"] = {
227 "hdfDeflateLevel": 0,
229 "hdfCollOnMeta":
True,
230 "hdfCollOnData":
False,
231 "jsonBinaryDeflateLevel": 5,
232 "jsonUseCodecOnUInt8":
True,
235 cfg[
"vfvSettings"][
"maxOrder"] = 1
236 cfg[
"vfvSettings"][
"intOrder"] = 1
237 cfg[
"dataIOControl"][
"meshDirectBisect"] = mesh_bisect
238 cfg[
"dataIOControl"][
"meshReorderCells"] = reorder_cells
240 path = os.path.join(work_dir, f
"step2_{tag}_config.json")
246 """Find restart H5 files in directory tree."""
248 for root, dirs, files
in os.walk(search_dir):
250 if f.endswith(
".restart.dnds.h5"):
251 restart_files.append(os.path.join(root, f))
252 assert len(restart_files) >= 1, (
253 f
"No restart H5 files found in {search_dir} ({label})"
255 return restart_files[0]
259 """Compare two restart H5 files by origIndex. Returns max_diff, rel_norm."""
266 assert np.array_equal(orig_a_sorted, orig_b_sorted), (
267 "Original index sets differ between restarts"
270 max_diff = np.max(np.abs(data_a_sorted - data_b_sorted))
271 rel_norm = np.linalg.norm(data_a_sorted - data_b_sorted) / (np.linalg.norm(data_a_sorted) + 1e-300)
273 print(f
" nGlobal cells: {len(orig_a_sorted)}")
274 print(f
" nVars per cell: {data_a_sorted.shape[1]}")
275 print(f
" Max abs diff: {max_diff:.6e}")
276 print(f
" Rel L2 diff: {rel_norm:.6e}")
278 assert max_diff < tol, (
279 f
"DOF data differs too much: max_diff={max_diff:.6e}, rel={rel_norm:.6e}"
281 return max_diff, rel_norm
285 """Run step 1: initial 20-step run producing a restart."""
287 shutil.copy(DEFAULT_CONFIG, os.path.join(work_dir,
"euler_default_config.json"))
288 result, stdout =
_run_solver(np_write, step1_config, work_dir)
289 assert result.returncode == 0, f
"Step 1 solver failed:\n{stdout[-2000:]}"
291 print(f
" Restart file: {restart_h5}")
295def _run_step2(work_dir, restart_h5, tag, np_count, overrides=None, mesh_file=MESH_SMALL):
296 """Run step 2: load restart and immediately write it back."""
297 step2_config, _ =
_make_step2_config(work_dir, restart_h5, tag, np_count, mesh_file=mesh_file)
298 result, stdout =
_run_solver(np_count, step2_config, work_dir, overrides=overrides)
299 assert result.returncode == 0, f
"Step 2 ({tag}) solver failed:\n{stdout[-2000:]}"
301 print(f
" Output restart: {restart_out}")
308@pytest.fixture(scope="module")
310 """Create a temporary working directory for the test."""
311 d = tempfile.mkdtemp(prefix=
"dnds_test_redist_")
313 shutil.rmtree(d, ignore_errors=
True)
316@pytest.fixture(scope="module")
318 """Create a temporary working directory for the large-mesh test."""
319 d = tempfile.mkdtemp(prefix=
"dnds_test_redist_large_")
321 shutil.rmtree(d, ignore_errors=
True)
325 """Write restart at np=2, read at np=2 with different Metis seed."""
326 assert os.path.isfile(EULER_EXE), f
"euler.exe not found: {EULER_EXE}"
327 assert os.path.isfile(MESH_SMALL), f
"Mesh file not found: {MESH_SMALL}"
331 print(f
"\n=== Step 1: 20-step run with np={np_write} ===")
332 restart_h5 =
_run_step1(work_dir, np_write, mesh_file=MESH_SMALL)
334 print(f
"\n=== Step 2a: load with np=2, same partition (reference) ===")
335 restart_a =
_run_step2(work_dir, restart_h5,
"ref", 2, mesh_file=MESH_SMALL)
337 print(f
"\n=== Step 2b: load with np=2, different Metis seed ===")
339 work_dir, restart_h5,
"reseed", 2,
340 overrides=[(
"/dataIOControl/meshPartitionOptions/metisSeed",
"42")],
341 mesh_file=MESH_SMALL,
344 print(
"\n=== Comparing: same-np redistribution ===")
346 print(
"=== PASS ===")
350 """Write restart at np=2, read at np=3 (cross-np redistribution)."""
351 assert os.path.isfile(EULER_EXE), f
"euler.exe not found: {EULER_EXE}"
354 restart_h5 =
_find_restart_h5(os.path.join(work_dir,
"step1"),
"step1 (reuse)")
355 print(f
"\n Reusing Step 1 restart: {restart_h5}")
357 restart_ref =
_find_restart_h5(os.path.join(work_dir,
"step2_ref"),
"ref (reuse)")
359 print(f
"\n=== Step 2c: load with np=3 (cross-np redistribution) ===")
360 restart_c =
_run_step2(work_dir, restart_h5,
"np3", 3, mesh_file=MESH_SMALL)
362 print(
"\n=== Comparing: cross-np redistribution ===")
364 print(
"=== PASS ===")
368 """Write restart at np=4 with 20x20 mesh, read at np=4..8."""
369 assert os.path.isfile(EULER_EXE), f
"euler.exe not found: {EULER_EXE}"
370 assert os.path.isfile(MESH_LARGE), f
"Mesh file not found: {MESH_LARGE}"
374 print(f
"\n=== Step 1: 20-step run with np={np_write}, mesh=IV10_20 (400 cells) ===")
375 restart_h5 =
_run_step1(work_dir_large, np_write, mesh_file=MESH_LARGE)
377 print(f
"\n=== Reference: load with np={np_write} ===")
378 restart_ref =
_run_step2(work_dir_large, restart_h5,
"ref", np_write, mesh_file=MESH_LARGE)
380 for np_read
in [4, 5, 6, 7, 8]:
383 if np_read == np_write:
385 overrides = [(
"/dataIOControl/meshPartitionOptions/metisSeed",
"99")]
386 tag = f
"np{np_read}_reseed"
388 print(f
"\n=== Load with np={np_read} ({tag}) ===")
390 work_dir_large, restart_h5, tag, np_read,
391 overrides=overrides, mesh_file=MESH_LARGE,
394 print(f
"=== Comparing np={np_write} vs np={np_read} ===")
396 print(f
"=== PASS: np={np_read} ===")
398 print(
"\n=== ALL np=4..8 PASS ===")
401if __name__ ==
"__main__":
402 with tempfile.TemporaryDirectory(prefix=
"dnds_test_redist_")
as d:
405 with tempfile.TemporaryDirectory(prefix=
"dnds_test_redist_large_")
as d:
_run_solver(np_count, config_path, work_dir, overrides=None, timeout=300)
test_restart_redistribute_large_mesh_multi_np(work_dir_large)
_run_step1(work_dir, np_write, mesh_file=MESH_SMALL)
test_restart_redistribute_same_np(work_dir)
_make_step2_config(work_dir, restart_h5_path, tag, np_count, reorder_cells=False, mesh_bisect=0, mesh_file=MESH_SMALL)
_strip_json_comments(text)
_find_restart_h5(search_dir, label="")
_make_step1_config(work_dir, mesh_file=MESH_SMALL)
_compare_restart_h5(restart_a, restart_b, tol=1e-10)
_gather_by_orig_index(orig_idx, data)
test_restart_redistribute_different_np(work_dir)
_run_step2(work_dir, restart_h5, tag, np_count, overrides=None, mesh_file=MESH_SMALL)