23 raise ValueError(
"time_limit must be positive")
24 if max_executions
is not None and max_executions <= 0:
25 raise ValueError(
"max_executions must be positive if provided")
28 start_time = time.perf_counter()
34 if max_executions
is not None and executions >= max_executions:
38 for _
in range(iter_pack):
40 executions += iter_pack
43 elapsed = time.perf_counter() - start_time
44 if elapsed >= reportTime:
45 reportTime = math.floor(elapsed / 1) * 1 + 1
46 if report
is not None:
47 report(elapsed, executions)
48 if elapsed >= time_limit:
51 total_time = time.perf_counter() - start_time
52 avg_time = total_time / executions
if executions > 0
else 0.0
54 return (executions, total_time, avg_time)
60 meshFile = os.path.join(
61 os.path.dirname(__file__),
"..",
"..",
"data",
"mesh",
"NACA0012_H2.cgns"
66 mesh, reader, name2Id = create_mesh_from_CGNS(
71 "translation1": [3, 0, 0],
72 "translation2": [0, 3, 0],
76 meshBnd, readerBnd = create_bnd_mesh(mesh)
77 fv = CFV.FiniteVolume(mpi, mesh)
78 settings = fv.GetSettings()
79 settings[
"intOrder"] = 3
80 settings[
"maxOrder"] = 3
81 fv.ParseSettings(settings)
83 print(fv.GetSettings())
85 bcid_2_bcweight_map = {}
86 for name, id
in name2Id.n2id_map.items():
88 bcid_2_bcweight_map[(id, 0)] = 1.0
89 if name.startswith(
"PERIODIC"):
90 bcid_2_bcweight_map[(id, 0)] = 1.0
91 bcid_2_bcweight_map[(id, 1)] = 1.0
92 bcid_2_bcweight_map[(id, 2)] = 1.0
93 bcid_2_bcweight_map[(id, 3)] = 1.0
97 fv.ConstructCellVolume()
98 fv.ConstructCellBary()
99 fv.ConstructCellCent()
100 fv.ConstructCellIntJacobiDet()
101 fv.ConstructCellIntPPhysics()
102 fv.ConstructCellAlignedHBox()
103 fv.ConstructCellMajorHBoxCoordInertia()
106 fv.ConstructFaceCent()
107 fv.ConstructFaceArea()
108 fv.ConstructFaceIntJacobiDet()
109 fv.ConstructFaceIntPPhysics()
110 fv.ConstructFaceUnitNorm()
111 fv.ConstructFaceMeanNorm()
113 fv.ConstructCellSmoothScale()
115 nB = fv.getArrayBytes() / 1024**2
116 nBMesh = mesh.getArrayBytes() / 1024**2
118 print(f
"Bytes : {nB:.4g} MB")
119 print(f
"Bytes mesh: {nBMesh:.4g} MB")
121 return mesh, reader, name2Id, meshBnd, readerBnd, fv
126 fv: CFV.FiniteVolume,
127 mesh: Geom.UnstructuredMesh,
130 max_iter=1000 * 1000 * 1000,
133 grad_u_arrs = [CFV.tUGrad_3xD()
for _
in range(2)]
134 grad_u, grad_u1 = grad_u_arrs
136 fv.BuildUDof_D(u, nvars)
137 for arr
in grad_u_arrs:
138 fv.BuildUGrad_3xD(arr, nvars)
139 grad_u1 = grad_u1.clone()
141 for iCell
in range(mesh.NumCell()):
142 x = fv.GetCellBary(iCell)
143 ui = np.array(u[iCell], copy=
False)
144 ui[:] = x[0] + np.sin(x[1] * np.pi)
145 u.trans.startPersistentPull()
146 u.trans.waitPersistentPull()
149 CFV.finiteVolumeCellOpTest_main_CUDA(
155 "threadsPerBlock": 128,
161 CFV.finiteVolumeCellOpTest_main_Host(fv, u, grad_u, 100)
164 grad_u.setConstant(0)
166 grad_u_norm2 = grad_u.norm2()
168 print(f
"norm: {grad_u_norm2}")
174 report=
lambda t, n: print(f
" Host iter [{n:8}] time [{t:10.4e}]"),
176 grad_u_norm2 = grad_u.norm2()
177 grad_u_cnorm1 = grad_u.componentWiseNorm1()
179 print(
"--- HOST ---")
180 print(f
"[{executions}] times, avg [{avg_time:8.04e}] s")
181 print(f
"norm: {grad_u.norm2()}")
182 avg_time_host = avg_time
184 grad_u1.assign_value(grad_u)
185 grad_u.setConstant(0)
187 mesh.to_device(
"CUDA")
190 grad_u.to_device(
"CUDA")
191 grad_u1.to_device(
"CUDA")
198 report=
lambda t, n: print(f
" CUDA iter [{n:8}] time [{t:8.04e}]"),
201 grad_u_norm = grad_u.norm2()
202 grad_u_cnorm1 = grad_u.componentWiseNorm1()
203 grad_u1 *= np.ones((3, nvars))
204 grad_u_err_norm = grad_u.norm2(grad_u1)
205 grad_u_err_cnorm1 = grad_u.componentWiseNorm1(grad_u1)
207 print(
"--- CUDA ---")
208 print(f
"[{executions}] times, avg [{avg_time:8.4e}] s")
209 print(f
"norm: {grad_u_norm}, diff_norm: {grad_u_err_norm:.4e}")
210 pprint.pprint(grad_u_err_cnorm1.tolist())
211 print(f
" -- acc [{avg_time_host / avg_time:.4g}]")
215 fv, mesh, u, grad_u, grad_u1, test_Host, test_CUDA, test_time, max_iter
220 if hasattr(u,
"to_device"):
228 if hasattr(grad_u,
"setConstant"):
229 grad_u.setConstant(0)
239 report=
lambda t, n: print(f
" Host iter [{n:8}] time [{t:10.4e}]"),
241 if hasattr(grad_u,
"norm2"):
242 grad_u_norm2 = grad_u.norm2()
246 grad_u_norm2 += e.norm2()
249 print(
"--- HOST ---")
250 print(f
"[{executions}] times, avg [{avg_time:8.04e}] s")
251 print(f
"norm: {grad_u_norm2}")
252 avg_time_host = avg_time
254 if hasattr(grad_u,
"setConstant"):
255 grad_u1.assign_value(grad_u)
256 grad_u.setConstant(0)
258 for a, b
in zip(grad_u, grad_u1):
262 mesh.to_device(
"CUDA")
264 if hasattr(u,
"to_device"):
266 grad_u.to_device(
"CUDA")
267 grad_u1.to_device(
"CUDA")
281 report=
lambda t, n: print(f
" CUDA iter [{n:8}] time [{t:8.04e}]"),
285 if hasattr(grad_u,
"norm2"):
286 grad_u_norm2 = grad_u.norm2()
287 grad_u_err_norm = grad_u.norm2(grad_u1)
290 grad_u_err_norm = 0.0
291 for e, e1
in zip(grad_u, grad_u1):
292 grad_u_norm2 += e.norm2()
293 grad_u_err_norm += e.norm2(e1)
296 print(
"--- CUDA ---")
297 print(f
"[{executions}] times, avg [{avg_time:8.4e}] s")
298 print(f
"norm: {grad_u_norm2}, diff_norm: {grad_u_err_norm:.4e}")
299 print(f
" -- acc [{avg_time_host / avg_time:.4g}]")
304 fv: CFV.FiniteVolume,
305 mesh: Geom.UnstructuredMesh,
307 max_iter=1000 * 1000 * 1000,
311 grad_u_arrs = [CFV.tUGrad_3x1()
for _
in range(2)]
312 grad_u, grad_u1 = grad_u_arrs
314 fv.BuildUDof_1(u, nvars)
315 for arr
in grad_u_arrs:
316 fv.BuildUGrad_3x1(arr, nvars)
317 grad_u1 = grad_u1.clone()
319 for iCell
in range(mesh.NumCell()):
320 x = fv.GetCellBary(iCell)
321 ui = np.array(u[iCell], copy=
False)
322 ui[:] = x[0] + np.sin(x[1] * np.pi)
323 u.trans.startPersistentPull()
324 u.trans.waitPersistentPull()
327 CFV.finiteVolumeCellOpTest_Fixed_main_CUDA_N1(
333 "threadsPerBlock": 128,
339 CFV.finiteVolumeCellOpTest_Fixed_main_Host_N1(fv, u, grad_u, 100)
342 fv, mesh, u, grad_u, grad_u1, test_Host, test_CUDA, test_time, max_iter
348 fv: CFV.FiniteVolume,
349 mesh: Geom.UnstructuredMesh,
351 max_iter=1000 * 1000 * 1000,
355 grad_u_arrs = [CFV.tUGrad_3x5()
for _
in range(2)]
356 grad_u, grad_u1 = grad_u_arrs
358 fv.BuildUDof_5(u, nvars)
359 for arr
in grad_u_arrs:
360 fv.BuildUGrad_3x5(arr, nvars)
361 grad_u1 = grad_u1.clone()
363 for iCell
in range(mesh.NumCell()):
364 x = fv.GetCellBary(iCell)
365 ui = np.array(u[iCell], copy=
False)
366 ui[:] = x[0] + np.sin(x[1] * np.pi)
367 u.trans.startPersistentPull()
368 u.trans.waitPersistentPull()
371 CFV.finiteVolumeCellOpTest_Fixed_main_CUDA_N5(
377 "threadsPerBlock": 64,
383 CFV.finiteVolumeCellOpTest_Fixed_main_Host_N5(fv, u, grad_u, 100)
386 fv, mesh, u, grad_u, grad_u1, test_Host, test_CUDA, test_time, max_iter
392 fv: CFV.FiniteVolume,
393 mesh: Geom.UnstructuredMesh,
396 max_iter=1000 * 1000 * 1000,
398 if nvars != 1
and nvars != 5:
399 raise ValueError(
"nvars needs to be 1 or 5")
401 grad_u_e = CFV.tUGrad_3x1()
403 fv.BuildUDof_1(u_e, 1)
404 fv.BuildUGrad_3x1(grad_u_e, 1)
406 u = [u_e.clone()
for _
in range(nvars)]
407 grad_u = [grad_u_e.clone()
for _
in range(nvars)]
409 grad_u1 = [e.clone()
for e
in grad_u]
413 for iCell
in range(mesh.NumCell()):
415 x = fv.GetCellBary(iCell)
416 ui = np.array(e[iCell], copy=
False)
417 ui[:] = x[0] + np.sin(x[1] * np.pi)
420 e.trans.startPersistentPull()
421 e.trans.waitPersistentPull()
425 run_cuda = CFV.finiteVolumeCellOpTest_SOA_ver0_main_CUDA_N1
426 run_host = CFV.finiteVolumeCellOpTest_SOA_ver0_main_Host_N1
428 run_cuda = CFV.finiteVolumeCellOpTest_SOA_ver0_main_CUDA_N5
429 run_host = CFV.finiteVolumeCellOpTest_SOA_ver0_main_Host_N5
438 "threadsPerBlock": 128,
444 run_host(fv, u, grad_u, 100)
447 fv, mesh, u, grad_u, grad_u1, test_Host, test_CUDA, test_time, max_iter
_test_fv_Op_Fix_Run(fv, mesh, u, grad_u, grad_u1, test_Host, test_CUDA, test_time, max_iter)