DNDSR 0.1.0.dev1+gcd065ad
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
test_basic_fv.py
Go to the documentation of this file.
1from DNDSR import DNDS, Geom, CFV
2from DNDSR.Geom.utils import *
3import numpy as np
4import json
5import time
6import pprint
7
8import time
9
10
12 func,
13 time_limit=1.0,
14 max_executions=None,
15 iter_pack=1,
16 report=None,
17 *args,
18 **kwargs,
19):
20 import math
21
22 if time_limit <= 0:
23 raise ValueError("time_limit must be positive")
24 if max_executions is not None and max_executions <= 0:
25 raise ValueError("max_executions must be positive if provided")
26
27 executions = 0
28 start_time = time.perf_counter()
29
30 reportTime = 1
31
32 while True:
33 # Check if we've hit the max executions
34 if max_executions is not None and executions >= max_executions:
35 break
36
37 # Execute the function
38 for _ in range(iter_pack):
39 func(*args, **kwargs)
40 executions += iter_pack
41
42 # Check elapsed time
43 elapsed = time.perf_counter() - start_time
44 if elapsed >= reportTime:
45 reportTime = math.floor(elapsed / 1) * 1 + 1
46 if report is not None:
47 report(elapsed, executions)
48 if elapsed >= time_limit:
49 break
50
51 total_time = time.perf_counter() - start_time
52 avg_time = total_time / executions if executions > 0 else 0.0
53
54 return (executions, total_time, avg_time)
55
56
57def get_fv(mpi):
58 print(CFV.tUDof_1)
59 # CFV.VariationalReconstruction_2()
60 meshFile = os.path.join(
61 os.path.dirname(__file__), "..", "..", "data", "mesh", "NACA0012_H2.cgns"
62 )
63 # meshFile = os.path.join(
64 # os.path.dirname(__file__), "..", "..", "data", "mesh", "Uniform_3x3.cgns"
65 # )
66 mesh, reader, name2Id = create_mesh_from_CGNS(
67 meshFile,
68 mpi,
69 2,
70 periodic_geometry={
71 "translation1": [3, 0, 0],
72 "translation2": [0, 3, 0],
73 },
74 meshDirectBisect=2,
75 )
76 meshBnd, readerBnd = create_bnd_mesh(mesh)
77 fv = CFV.FiniteVolume(mpi, mesh)
78 settings = fv.GetSettings()
79 settings["intOrder"] = 3
80 settings["maxOrder"] = 3
81 fv.ParseSettings(settings)
82 if mpi.rank == 0:
83 print(fv.GetSettings())
84
85 bcid_2_bcweight_map = {}
86 for name, id in name2Id.n2id_map.items():
87 # if name == "WALL":
88 bcid_2_bcweight_map[(id, 0)] = 1.0
89 if name.startswith("PERIODIC"):
90 bcid_2_bcweight_map[(id, 0)] = 1.0
91 bcid_2_bcweight_map[(id, 1)] = 1.0
92 bcid_2_bcweight_map[(id, 2)] = 1.0
93 bcid_2_bcweight_map[(id, 3)] = 1.0
94
95 # construction: volume
96 fv.SetCellAtrBasic()
97 fv.ConstructCellVolume()
98 fv.ConstructCellBary()
99 fv.ConstructCellCent()
100 fv.ConstructCellIntJacobiDet()
101 fv.ConstructCellIntPPhysics()
102 fv.ConstructCellAlignedHBox()
103 fv.ConstructCellMajorHBoxCoordInertia()
104 # construction: face
105 fv.SetFaceAtrBasic()
106 fv.ConstructFaceCent()
107 fv.ConstructFaceArea()
108 fv.ConstructFaceIntJacobiDet()
109 fv.ConstructFaceIntPPhysics()
110 fv.ConstructFaceUnitNorm()
111 fv.ConstructFaceMeanNorm()
112
113 fv.ConstructCellSmoothScale()
114
115 nB = fv.getArrayBytes() / 1024**2
116 nBMesh = mesh.getArrayBytes() / 1024**2
117 if mpi.rank == 0:
118 print(f"Bytes : {nB:.4g} MB")
119 print(f"Bytes mesh: {nBMesh:.4g} MB")
120
121 return mesh, reader, name2Id, meshBnd, readerBnd, fv
122
123
125 mpi,
126 fv: CFV.FiniteVolume,
127 mesh: Geom.UnstructuredMesh,
128 nvars=1,
129 test_time=5.0,
130 max_iter=1000 * 1000 * 1000,
131):
132 u = CFV.tUDof_D()
133 grad_u_arrs = [CFV.tUGrad_3xD() for _ in range(2)]
134 grad_u, grad_u1 = grad_u_arrs
135
136 fv.BuildUDof_D(u, nvars)
137 for arr in grad_u_arrs:
138 fv.BuildUGrad_3xD(arr, nvars)
139 grad_u1 = grad_u1.clone()
140 u.setConstant(1.23)
141 for iCell in range(mesh.NumCell()):
142 x = fv.GetCellBary(iCell)
143 ui = np.array(u[iCell], copy=False)
144 ui[:] = x[0] + np.sin(x[1] * np.pi)
145 u.trans.startPersistentPull()
146 u.trans.waitPersistentPull()
147
148 def test_CUDA():
149 CFV.finiteVolumeCellOpTest_main_CUDA(
150 fv,
151 u,
152 grad_u,
153 100,
154 {
155 "threadsPerBlock": 128,
156 # "method": "pervar",
157 },
158 )
159
160 def test_Host():
161 CFV.finiteVolumeCellOpTest_main_Host(fv, u, grad_u, 100)
162
163 print("AAA0")
164 grad_u.setConstant(0)
165 print("AAA1")
166 grad_u_norm2 = grad_u.norm2()
167 if mpi.rank == 0:
168 print(f"norm: {grad_u_norm2}")
169 executions, total_time, avg_time = time_function_until_limit(
170 test_Host,
171 test_time,
172 max_iter,
173 iter_pack=1,
174 report=lambda t, n: print(f" Host iter [{n:8}] time [{t:10.4e}]"),
175 )
176 grad_u_norm2 = grad_u.norm2()
177 grad_u_cnorm1 = grad_u.componentWiseNorm1()
178 if mpi.rank == 0:
179 print("--- HOST ---")
180 print(f"[{executions}] times, avg [{avg_time:8.04e}] s")
181 print(f"norm: {grad_u.norm2()}")
182 avg_time_host = avg_time
183
184 grad_u1.assign_value(grad_u)
185 grad_u.setConstant(0)
186
187 mesh.to_device("CUDA")
188 fv.to_device("CUDA")
189 u.to_device("CUDA")
190 grad_u.to_device("CUDA")
191 grad_u1.to_device("CUDA")
192
193 executions, total_time, avg_time = time_function_until_limit(
194 test_CUDA,
195 test_time,
196 max_iter,
197 iter_pack=1,
198 report=lambda t, n: print(f" CUDA iter [{n:8}] time [{t:8.04e}]"),
199 )
200 # grad_u.to_host()
201 grad_u_norm = grad_u.norm2()
202 grad_u_cnorm1 = grad_u.componentWiseNorm1()
203 grad_u1 *= np.ones((3, nvars))
204 grad_u_err_norm = grad_u.norm2(grad_u1)
205 grad_u_err_cnorm1 = grad_u.componentWiseNorm1(grad_u1)
206 if mpi.rank == 0:
207 print("--- CUDA ---")
208 print(f"[{executions}] times, avg [{avg_time:8.4e}] s")
209 print(f"norm: {grad_u_norm}, diff_norm: {grad_u_err_norm:.4e}")
210 pprint.pprint(grad_u_err_cnorm1.tolist())
211 print(f" -- acc [{avg_time_host / avg_time:.4g}]")
212
213
215 fv, mesh, u, grad_u, grad_u1, test_Host, test_CUDA, test_time, max_iter
216):
217
218 # mesh.to_device("Host")
219 # fv.to_device("Host")
220 if hasattr(u, "to_device"):
221 u.to_device("Host")
222 else:
223 for e in u:
224 e.to_device("Host")
225 # grad_u.to_device("Host")
226 # grad_u1.to_device("Host")
227
228 if hasattr(grad_u, "setConstant"):
229 grad_u.setConstant(0)
230 else:
231 for e in grad_u:
232 e.setConstant(0)
233
234 executions, total_time, avg_time = time_function_until_limit(
235 test_Host,
236 test_time,
237 max_iter,
238 iter_pack=1,
239 report=lambda t, n: print(f" Host iter [{n:8}] time [{t:10.4e}]"),
240 )
241 if hasattr(grad_u, "norm2"):
242 grad_u_norm2 = grad_u.norm2()
243 else:
244 grad_u_norm2 = 0.0
245 for e in grad_u:
246 grad_u_norm2 += e.norm2()
247
248 if mpi.rank == 0:
249 print("--- HOST ---")
250 print(f"[{executions}] times, avg [{avg_time:8.04e}] s")
251 print(f"norm: {grad_u_norm2}")
252 avg_time_host = avg_time
253
254 if hasattr(grad_u, "setConstant"):
255 grad_u1.assign_value(grad_u)
256 grad_u.setConstant(0)
257 else:
258 for a, b in zip(grad_u, grad_u1):
259 b.assign_value(a)
260 a.setConstant(0)
261
262 mesh.to_device("CUDA")
263 fv.to_device("CUDA")
264 if hasattr(u, "to_device"):
265 u.to_device("CUDA")
266 grad_u.to_device("CUDA")
267 grad_u1.to_device("CUDA")
268 else:
269 for e in u:
270 e.to_device("CUDA")
271 for e in grad_u:
272 e.to_device("CUDA")
273 for e in grad_u1:
274 e.to_device("CUDA")
275
276 executions, total_time, avg_time = time_function_until_limit(
277 test_CUDA,
278 test_time,
279 max_iter,
280 iter_pack=1,
281 report=lambda t, n: print(f" CUDA iter [{n:8}] time [{t:8.04e}]"),
282 )
283
284 # grad_u.to_host()
285 if hasattr(grad_u, "norm2"):
286 grad_u_norm2 = grad_u.norm2()
287 grad_u_err_norm = grad_u.norm2(grad_u1)
288 else:
289 grad_u_norm2 = 0.0
290 grad_u_err_norm = 0.0
291 for e, e1 in zip(grad_u, grad_u1):
292 grad_u_norm2 += e.norm2()
293 grad_u_err_norm += e.norm2(e1)
294
295 if mpi.rank == 0:
296 print("--- CUDA ---")
297 print(f"[{executions}] times, avg [{avg_time:8.4e}] s")
298 print(f"norm: {grad_u_norm2}, diff_norm: {grad_u_err_norm:.4e}")
299 print(f" -- acc [{avg_time_host / avg_time:.4g}]")
300
301
303 mpi,
304 fv: CFV.FiniteVolume,
305 mesh: Geom.UnstructuredMesh,
306 test_time=5.0,
307 max_iter=1000 * 1000 * 1000,
308):
309 nvars = 1
310 u = CFV.tUDof_1()
311 grad_u_arrs = [CFV.tUGrad_3x1() for _ in range(2)]
312 grad_u, grad_u1 = grad_u_arrs
313
314 fv.BuildUDof_1(u, nvars)
315 for arr in grad_u_arrs:
316 fv.BuildUGrad_3x1(arr, nvars)
317 grad_u1 = grad_u1.clone()
318 u.setConstant(1.23)
319 for iCell in range(mesh.NumCell()):
320 x = fv.GetCellBary(iCell)
321 ui = np.array(u[iCell], copy=False)
322 ui[:] = x[0] + np.sin(x[1] * np.pi)
323 u.trans.startPersistentPull()
324 u.trans.waitPersistentPull()
325
326 def test_CUDA():
327 CFV.finiteVolumeCellOpTest_Fixed_main_CUDA_N1(
328 fv,
329 u,
330 grad_u,
331 100,
332 {
333 "threadsPerBlock": 128,
334 "method": "pervar",
335 },
336 )
337
338 def test_Host():
339 CFV.finiteVolumeCellOpTest_Fixed_main_Host_N1(fv, u, grad_u, 100)
340
342 fv, mesh, u, grad_u, grad_u1, test_Host, test_CUDA, test_time, max_iter
343 )
344
345
347 mpi,
348 fv: CFV.FiniteVolume,
349 mesh: Geom.UnstructuredMesh,
350 test_time=5.0,
351 max_iter=1000 * 1000 * 1000,
352):
353 nvars = 5
354 u = CFV.tUDof_5()
355 grad_u_arrs = [CFV.tUGrad_3x5() for _ in range(2)]
356 grad_u, grad_u1 = grad_u_arrs
357
358 fv.BuildUDof_5(u, nvars)
359 for arr in grad_u_arrs:
360 fv.BuildUGrad_3x5(arr, nvars)
361 grad_u1 = grad_u1.clone()
362 u.setConstant(1.23)
363 for iCell in range(mesh.NumCell()):
364 x = fv.GetCellBary(iCell)
365 ui = np.array(u[iCell], copy=False)
366 ui[:] = x[0] + np.sin(x[1] * np.pi)
367 u.trans.startPersistentPull()
368 u.trans.waitPersistentPull()
369
370 def test_CUDA():
371 CFV.finiteVolumeCellOpTest_Fixed_main_CUDA_N5(
372 fv,
373 u,
374 grad_u,
375 100,
376 {
377 "threadsPerBlock": 64,
378 # "method": "pervar",
379 },
380 )
381
382 def test_Host():
383 CFV.finiteVolumeCellOpTest_Fixed_main_Host_N5(fv, u, grad_u, 100)
384
386 fv, mesh, u, grad_u, grad_u1, test_Host, test_CUDA, test_time, max_iter
387 )
388
389
391 mpi,
392 fv: CFV.FiniteVolume,
393 mesh: Geom.UnstructuredMesh,
394 nvars=1,
395 test_time=5.0,
396 max_iter=1000 * 1000 * 1000,
397):
398 if nvars != 1 and nvars != 5:
399 raise ValueError("nvars needs to be 1 or 5")
400 u_e = CFV.tUDof_1()
401 grad_u_e = CFV.tUGrad_3x1()
402
403 fv.BuildUDof_1(u_e, 1)
404 fv.BuildUGrad_3x1(grad_u_e, 1)
405
406 u = [u_e.clone() for _ in range(nvars)]
407 grad_u = [grad_u_e.clone() for _ in range(nvars)]
408
409 grad_u1 = [e.clone() for e in grad_u]
410
411 for e in u:
412 e.setConstant(1.23)
413 for iCell in range(mesh.NumCell()):
414 for e in u:
415 x = fv.GetCellBary(iCell)
416 ui = np.array(e[iCell], copy=False)
417 ui[:] = x[0] + np.sin(x[1] * np.pi)
418 print("X2")
419 for e in u:
420 e.trans.startPersistentPull()
421 e.trans.waitPersistentPull()
422 print("X3")
423
424 if nvars == 1:
425 run_cuda = CFV.finiteVolumeCellOpTest_SOA_ver0_main_CUDA_N1
426 run_host = CFV.finiteVolumeCellOpTest_SOA_ver0_main_Host_N1
427 elif nvars == 5:
428 run_cuda = CFV.finiteVolumeCellOpTest_SOA_ver0_main_CUDA_N5
429 run_host = CFV.finiteVolumeCellOpTest_SOA_ver0_main_Host_N5
430
431 def test_CUDA():
432 run_cuda(
433 fv,
434 u,
435 grad_u,
436 100,
437 {
438 "threadsPerBlock": 128,
439 # "method": "pervar",
440 },
441 )
442
443 def test_Host():
444 run_host(fv, u, grad_u, 100)
445
447 fv, mesh, u, grad_u, grad_u1, test_Host, test_CUDA, test_time, max_iter
448 )
449
450
451def _run_basic(mpi):
452 mesh, reader, name2Id, meshBnd, readerBnd, fv = get_fv(mpi)
453
454 test_time = 5.0
455
457 mpi,
458 fv,
459 mesh,
460 nvars=5,
461 test_time=test_time,
462 )
463
464 print("\n\nTesting Fixed")
465
467 mpi,
468 fv,
469 mesh,
470 test_time=test_time,
471 )
472
473 print("\n\nTesting SOA ver0")
474
476 mpi,
477 fv,
478 mesh,
479 nvars=5,
480 test_time=test_time,
481 )
482
483 # print(input("type anything: "))
484
485
486if __name__ == "__main__":
488 mpi.setWorld()
489 _run_basic(mpi)
_test_fv_Op_Fix5(mpi, CFV.FiniteVolume fv, Geom.UnstructuredMesh mesh, test_time=5.0, max_iter=1000 *1000 *1000)
_test_fv_Op_SOA_ver0_X(mpi, CFV.FiniteVolume fv, Geom.UnstructuredMesh mesh, nvars=1, test_time=5.0, max_iter=1000 *1000 *1000)
_test_fv_Op_Fix_Run(fv, mesh, u, grad_u, grad_u1, test_Host, test_CUDA, test_time, max_iter)
_test_fv_Op_Fix1(mpi, CFV.FiniteVolume fv, Geom.UnstructuredMesh mesh, test_time=5.0, max_iter=1000 *1000 *1000)
time_function_until_limit(func, time_limit=1.0, max_executions=None, iter_pack=1, report=None, *args, **kwargs)
_test_fv_Op(mpi, CFV.FiniteVolume fv, Geom.UnstructuredMesh mesh, nvars=1, test_time=5.0, max_iter=1000 *1000 *1000)
Lightweight bundle of an MPI communicator and the calling rank's coordinates.
Definition MPI.hpp:215