2Functional test for ArrayPair redistribution via Euler solver restart.
4The test verifies that DOF data written in a restart file by one MPI partition
5can be correctly read by a different partition (different np), producing
6identical solver behaviour.
9 1. Run the Euler IV solver for 20 time steps with np=2, writing an H5 restart.
10 2. Load that restart and immediately write it back at np=2 (reference).
11 3. Load that restart and immediately write it back at np=3 (redistributed read).
12 4. Compare the two restart H5 files: the DOF data must match to machine precision.
14The file contains three test variants: same-np reseed, cross-np redistribution,
15and a large-mesh multi-np (np=4 → np=4..8) case.
18 pytest test/Euler/test_restart_redistribute.py -s
20 python test/Euler/test_restart_redistribute.py
38SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
39PROJECT_ROOT = os.path.realpath(os.path.join(SCRIPT_DIR,
"..",
".."))
40BUILD_DIR = os.path.join(PROJECT_ROOT,
"build")
41EULER_EXE = os.path.join(BUILD_DIR,
"app",
"euler.exe")
42BASE_CONFIG = os.path.join(PROJECT_ROOT,
"cases",
43 "euler",
"euler_config_IV.json")
44MESH_SMALL = os.path.join(PROJECT_ROOT,
"data",
"mesh",
46MESH_LARGE = os.path.join(PROJECT_ROOT,
"data",
"mesh",
51 """Remove // comments from JSON-with-comments (DNDS convention)."""
52 return re.sub(
r"//.*",
"", text)
61 with open(path,
"w")
as f:
62 json.dump(obj, f, indent=4)
65def _run_solver(np_count, config_path, work_dir, overrides=None, timeout=300):
66 """Run the Euler solver via mpirun and return the process result."""
68 "mpirun",
"--oversubscribe",
"-np", str(np_count),
69 EULER_EXE, config_path,
72 for k, v
in overrides:
73 cmd.extend([
"-k", k,
"-v", v])
75 env = os.environ.copy()
76 result = subprocess.run(
77 cmd, cwd=work_dir, timeout=timeout,
78 stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
81 stdout_text = result.stdout.decode(
"utf-8", errors=
"replace")
82 if result.returncode != 0:
83 print(f
"=== SOLVER FAILED (np={np_count}) ===")
84 print(stdout_text[-4000:])
85 return result, stdout_text
89 """Read the DOF 'u' data from a restart H5 file.
91 Returns (origIndex, data) where:
92 - origIndex: 1-D array of original cell indices (global key)
93 - data: 2-D array of DOF values, shape (nGlobal, nVars)
95 with h5py.File(h5_path,
"r")
as f:
100 orig_idx = f[
"u"][
"origIndex"][:]
102 father_grp = f[
"u"][
"father"]
103 if "data" in father_grp:
104 raw_data = father_grp[
"data"][:]
105 elif "array" in father_grp
and "data" in father_grp[
"array"]:
106 raw_data = father_grp[
"array"][
"data"][:]
109 f
"Cannot find data in {h5_path}: /u/father/ contains {list(father_grp.keys())}")
111 n_global = len(orig_idx)
112 assert raw_data.size % n_global == 0, (
113 f
"data size {raw_data.size} not divisible by nGlobal {n_global}"
115 n_vars = raw_data.size // n_global
116 data = raw_data.reshape(n_global, n_vars)
117 return orig_idx, data
121 """Reorder data rows by original index so that global ordering is canonical."""
122 order = np.argsort(orig_idx)
123 return orig_idx[order], data[order]
129@pytest.fixture(scope="module")
131 """Create a temporary working directory for the test."""
132 d = tempfile.mkdtemp(prefix=
"dnds_test_redist_")
135 shutil.rmtree(d, ignore_errors=
True)
139 """Config for the initial 20-step run (produces the restart)."""
143 cfg[
"timeMarchControl"][
"nTimeStep"] = 20
144 cfg[
"timeMarchControl"][
"tEnd"] = 1e10
145 cfg[
"timeMarchControl"][
"dtImplicit"] = 1.25e-3
146 cfg[
"timeMarchControl"][
"odeCode"] = 0
147 cfg[
"timeMarchControl"][
"steadyQuit"] =
False
148 cfg[
"timeMarchControl"][
"useRestart"] =
False
151 cfg[
"outputControl"][
"nDataOut"] = 1000000
152 cfg[
"outputControl"][
"nDataOutC"] = 1000000
153 cfg[
"outputControl"][
"nConsoleCheck"] = 5
154 cfg[
"outputControl"][
"dataOutAtInit"] =
False
156 cfg[
"outputControl"][
"nRestartOut"] = 20
157 cfg[
"outputControl"][
"nRestartOutC"] = 1000000
160 cfg[
"dataIOControl"][
"meshFile"] = mesh_file
161 out_base = os.path.join(work_dir,
"step1",
"out")
162 os.makedirs(os.path.dirname(out_base), exist_ok=
True)
163 cfg[
"dataIOControl"][
"outPltName"] = out_base
166 cfg[
"dataIOControl"][
"restartWriter"] = {
168 "hdfDeflateLevel": 0,
170 "hdfCollOnMeta":
True,
171 "hdfCollOnData":
False,
172 "jsonBinaryDeflateLevel": 5,
173 "jsonUseCodecOnUInt8":
True,
177 cfg[
"vfvSettings"][
"maxOrder"] = 1
178 cfg[
"vfvSettings"][
"intOrder"] = 1
181 cfg[
"dataIOControl"][
"meshDirectBisect"] = 0
182 cfg[
"dataIOControl"][
"meshReorderCells"] =
False
184 path = os.path.join(work_dir,
"step1_config.json")
190 reorder_cells=False, mesh_bisect=0, mesh_file=MESH_SMALL):
191 """Config for the 1-step restart run."""
195 cfg[
"timeMarchControl"][
"nTimeStep"] = 1
196 cfg[
"timeMarchControl"][
"tEnd"] = 1e10
197 cfg[
"timeMarchControl"][
"dtImplicit"] = 1.25e-3
198 cfg[
"timeMarchControl"][
"odeCode"] = 0
199 cfg[
"timeMarchControl"][
"steadyQuit"] =
False
200 cfg[
"timeMarchControl"][
"useRestart"] =
True
203 cfg[
"restartState"] = {
207 "lastRestartFile": restart_h5_path,
208 "otherRestartFile":
"",
209 "otherRestartStoreDim": [0],
213 cfg[
"outputControl"][
"nDataOut"] = 1000000
214 cfg[
"outputControl"][
"nDataOutC"] = 1000000
215 cfg[
"outputControl"][
"nConsoleCheck"] = 1
216 cfg[
"outputControl"][
"dataOutAtInit"] =
False
217 cfg[
"outputControl"][
"restartOutAtInit"] =
True
218 cfg[
"outputControl"][
"nRestartOut"] = 1000000
219 cfg[
"outputControl"][
"nRestartOutC"] = 1000000
222 cfg[
"dataIOControl"][
"meshFile"] = mesh_file
223 out_base = os.path.join(work_dir, f
"step2_{tag}",
"out")
224 os.makedirs(os.path.dirname(out_base), exist_ok=
True)
225 cfg[
"dataIOControl"][
"outPltName"] = out_base
228 cfg[
"dataIOControl"][
"restartWriter"] = {
230 "hdfDeflateLevel": 0,
232 "hdfCollOnMeta":
True,
233 "hdfCollOnData":
False,
234 "jsonBinaryDeflateLevel": 5,
235 "jsonUseCodecOnUInt8":
True,
238 cfg[
"vfvSettings"][
"maxOrder"] = 1
239 cfg[
"vfvSettings"][
"intOrder"] = 1
240 cfg[
"dataIOControl"][
"meshDirectBisect"] = mesh_bisect
241 cfg[
"dataIOControl"][
"meshReorderCells"] = reorder_cells
243 path = os.path.join(work_dir, f
"step2_{tag}_config.json")
249 """Find restart H5 files in directory tree."""
251 for root, dirs, files
in os.walk(search_dir):
253 if f.endswith(
".restart.dnds.h5"):
254 restart_files.append(os.path.join(root, f))
255 assert len(restart_files) >= 1, (
256 f
"No restart H5 files found in {search_dir} ({label})"
258 return restart_files[0]
262 """Compare two restart H5 files by origIndex. Returns max_diff, rel_norm."""
269 assert np.array_equal(orig_a_sorted, orig_b_sorted), (
270 "Original index sets differ between restarts"
273 max_diff = np.max(np.abs(data_a_sorted - data_b_sorted))
274 rel_norm = np.linalg.norm(
275 data_a_sorted - data_b_sorted) / (np.linalg.norm(data_a_sorted) + 1e-300)
277 print(f
" nGlobal cells: {len(orig_a_sorted)}")
278 print(f
" nVars per cell: {data_a_sorted.shape[1]}")
279 print(f
" Max abs diff: {max_diff:.6e}")
280 print(f
" Rel L2 diff: {rel_norm:.6e}")
282 assert max_diff < tol, (
283 f
"DOF data differs too much: max_diff={max_diff:.6e}, rel={rel_norm:.6e}"
285 return max_diff, rel_norm
289 """Run step 1: initial 20-step run producing a restart."""
291 result, stdout =
_run_solver(np_write, step1_config, work_dir)
292 assert result.returncode == 0, f
"Step 1 solver failed:\n{stdout[-2000:]}"
294 print(f
" Restart file: {restart_h5}")
298def _run_step2(work_dir, restart_h5, tag, np_count, overrides=None, mesh_file=MESH_SMALL):
299 """Run step 2: load restart and immediately write it back."""
301 work_dir, restart_h5, tag, np_count, mesh_file=mesh_file)
303 np_count, step2_config, work_dir, overrides=overrides)
304 assert result.returncode == 0, f
"Step 2 ({tag}) solver failed:\n{stdout[-2000:]}"
306 print(f
" Output restart: {restart_out}")
313@pytest.fixture(scope="module")
315 """Create a temporary working directory for the test."""
316 d = tempfile.mkdtemp(prefix=
"dnds_test_redist_")
318 shutil.rmtree(d, ignore_errors=
True)
321@pytest.fixture(scope="module")
323 """Create a temporary working directory for the large-mesh test."""
324 d = tempfile.mkdtemp(prefix=
"dnds_test_redist_large_")
326 shutil.rmtree(d, ignore_errors=
True)
330 """Write restart at np=2, read at np=2 with different Metis seed."""
331 if not os.path.isfile(EULER_EXE):
332 pytest.skip(f
"euler.exe not found: {EULER_EXE}")
333 if not os.path.isfile(MESH_SMALL):
334 pytest.skip(f
"Mesh file not found: {MESH_SMALL}")
338 print(f
"\n=== Step 1: 20-step run with np={np_write} ===")
339 restart_h5 =
_run_step1(work_dir, np_write, mesh_file=MESH_SMALL)
341 print(f
"\n=== Step 2a: load with np=2, same partition (reference) ===")
342 restart_a =
_run_step2(work_dir, restart_h5,
"ref",
343 2, mesh_file=MESH_SMALL)
345 print(f
"\n=== Step 2b: load with np=2, different Metis seed ===")
347 work_dir, restart_h5,
"reseed", 2,
348 overrides=[(
"/dataIOControl/meshPartitionOptions/metisSeed",
"42")],
349 mesh_file=MESH_SMALL,
352 print(
"\n=== Comparing: same-np redistribution ===")
354 print(
"=== PASS ===")
358 """Write restart at np=2, read at np=3 (cross-np redistribution)."""
359 if not os.path.isfile(EULER_EXE):
360 pytest.skip(f
"euler.exe not found: {EULER_EXE}")
364 os.path.join(work_dir,
"step1"),
"step1 (reuse)")
365 print(f
"\n Reusing Step 1 restart: {restart_h5}")
368 work_dir,
"step2_ref"),
"ref (reuse)")
370 print(f
"\n=== Step 2c: load with np=3 (cross-np redistribution) ===")
371 restart_c =
_run_step2(work_dir, restart_h5,
"np3",
372 3, mesh_file=MESH_SMALL)
374 print(
"\n=== Comparing: cross-np redistribution ===")
376 print(
"=== PASS ===")
380 """Write restart at np=4 with 20x20 mesh, read at np=4..8."""
381 if not os.path.isfile(EULER_EXE):
382 pytest.skip(f
"euler.exe not found: {EULER_EXE}")
383 if not os.path.isfile(MESH_LARGE):
384 pytest.skip(f
"Mesh file not found: {MESH_LARGE}")
389 f
"\n=== Step 1: 20-step run with np={np_write}, mesh=IV10_20 (400 cells) ===")
390 restart_h5 =
_run_step1(work_dir_large, np_write, mesh_file=MESH_LARGE)
392 print(f
"\n=== Reference: load with np={np_write} ===")
393 restart_ref =
_run_step2(work_dir_large, restart_h5,
394 "ref", np_write, mesh_file=MESH_LARGE)
396 for np_read
in [4, 5, 6, 7, 8]:
399 if np_read == np_write:
402 (
"/dataIOControl/meshPartitionOptions/metisSeed",
"99")]
403 tag = f
"np{np_read}_reseed"
405 print(f
"\n=== Load with np={np_read} ({tag}) ===")
407 work_dir_large, restart_h5, tag, np_read,
408 overrides=overrides, mesh_file=MESH_LARGE,
411 print(f
"=== Comparing np={np_write} vs np={np_read} ===")
413 print(f
"=== PASS: np={np_read} ===")
415 print(
"\n=== ALL np=4..8 PASS ===")
418if __name__ ==
"__main__":
419 with tempfile.TemporaryDirectory(prefix=
"dnds_test_redist_")
as d:
422 with tempfile.TemporaryDirectory(prefix=
"dnds_test_redist_large_")
as d:
_run_solver(np_count, config_path, work_dir, overrides=None, timeout=300)
test_restart_redistribute_large_mesh_multi_np(work_dir_large)
_run_step1(work_dir, np_write, mesh_file=MESH_SMALL)
test_restart_redistribute_same_np(work_dir)
_make_step2_config(work_dir, restart_h5_path, tag, np_count, reorder_cells=False, mesh_bisect=0, mesh_file=MESH_SMALL)
_strip_json_comments(text)
_find_restart_h5(search_dir, label="")
_make_step1_config(work_dir, mesh_file=MESH_SMALL)
_compare_restart_h5(restart_a, restart_b, tol=1e-10)
_gather_by_orig_index(orig_idx, data)
test_restart_redistribute_different_np(work_dir)
_run_step2(work_dir, restart_h5, tag, np_count, overrides=None, mesh_file=MESH_SMALL)