DNDSR 0.1.0.dev1+gcd065ad
Distributed Numeric Data Structure for CFV
Loading...
Searching...
No Matches
ArrayDOF_op.hxx
Go to the documentation of this file.
1#pragma once
2/// @file ArrayDOF_op.hxx
3/// @brief Host-side implementations of #ArrayDofOp vector-space operations.
4///
5/// Each function (setConstant, +=, -=, *=, /=, addTo, norm2, dot, reduction,
6/// componentWiseNorm1, ...) is defined once here as a template over `(n_m, n_n)`.
7/// Explicit instantiations for the supported `(n_m, n_n)` combinations live in
8/// `ArrayDOF_inst/<...>.cpp`; the CUDA mirror lives in `ArrayDOF_op_CUDA.cuh`.
9///
10/// The file uses `#pragma omp` to parallelise row-wise loops when #DNDS_DIST_MT_USE_OMP
11/// is defined, so the choice of distributed-vs-threaded execution is a
12/// build-time decision.
13
14#include "ArrayDOF.hpp"
15#include "DNDS/Defines.hpp"
16#include "DNDS/Errors.hpp"
17
18namespace DNDS
19{
20 template <int n_m, int n_n>
21 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::setConstant(t_self &self, real R)
22 {
23 DNDS_assert(self.father && self.son);
24 index iTop = self.Size();
25#if defined(DNDS_DIST_MT_USE_OMP)
26# pragma omp parallel for schedule(static)
27#endif
28 for (index i = 0; i < iTop; i++)
29 self.operator[](i).setConstant(R);
30 }
31
32 template <int n_m, int n_n>
33 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::setConstant(t_self &self, const Eigen::Ref<const t_element_mat> &R)
34 {
35 DNDS_assert(self.father && self.son);
36 index iTop = self.Size();
37#if defined(DNDS_DIST_MT_USE_OMP)
38# pragma omp parallel for schedule(static)
39#endif
40 for (index i = 0; i < iTop; i++)
41 self.operator[](i) = R;
42 }
43
44 template <int n_m, int n_n>
45 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::operator_plus_assign(t_self &self, const t_self &R)
46 {
47 DNDS_assert(self.father && self.son);
48 DNDS_assert(R.father && R.son);
49 index iTop = self.Size();
50#if defined(DNDS_DIST_MT_USE_OMP)
51# pragma omp parallel for schedule(static)
52#endif
53 for (index i = 0; i < iTop; i++)
54 self.operator[](i) += R.operator[](i);
55 }
56
57 template <int n_m, int n_n>
58 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::operator_plus_assign(t_self &self, real R)
59 {
60 DNDS_assert(self.father && self.son);
61 index iTop = self.Size();
62#if defined(DNDS_DIST_MT_USE_OMP)
63# pragma omp parallel for schedule(static)
64#endif
65 for (index i = 0; i < iTop; i++)
66 self.operator[](i).array() += R;
67 }
68
69 template <int n_m, int n_n>
70 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::operator_plus_assign(t_self &self, const Eigen::Ref<const t_element_mat> &R)
71 {
72 DNDS_assert(self.father && self.son);
73 index iTop = self.Size();
74#if defined(DNDS_DIST_MT_USE_OMP)
75# pragma omp parallel for schedule(static)
76#endif
77 for (index i = 0; i < iTop; i++)
78 self.operator[](i) += R;
79 }
80
81 template <int n_m, int n_n>
82 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::operator_minus_assign(t_self &self, const t_self &R)
83 {
84 DNDS_assert(self.father && self.son);
85 DNDS_assert(R.father && R.son);
86 index iTop = self.Size();
87#if defined(DNDS_DIST_MT_USE_OMP)
88# pragma omp parallel for schedule(static)
89#endif
90 for (index i = 0; i < iTop; i++)
91 self.operator[](i) -= R.operator[](i);
92 }
93
94 template <int n_m, int n_n>
95 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::operator_mult_assign(t_self &self, real R)
96 {
97 DNDS_assert(self.father && self.son);
98 if (R == 1)
99 return;
100 index iTop = self.Size();
101#if defined(DNDS_DIST_MT_USE_OMP)
102# pragma omp parallel for schedule(static)
103#endif
104 for (index i = 0; i < iTop; i++)
105 self.operator[](i) *= R;
106 }
107
108 template <int n_m, int n_n>
109 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::operator_mult_assign_scalar_arr(t_self &self, const ArrayDof<1, 1> &R)
110 {
111 DNDS_assert(self.father && self.son);
112 DNDS_assert(R.father && R.son);
113 index iTop = self.Size();
114#if defined(DNDS_DIST_MT_USE_OMP)
115# pragma omp parallel for schedule(static)
116#endif
117 for (index i = 0; i < iTop; i++)
118 self.operator[](i).array() *= R[i](0);
119 }
120
121 template <int n_m, int n_n>
122 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::operator_mult_assign(t_self &self, const Eigen::Ref<const t_element_mat> &R)
123 {
124 DNDS_assert(self.father && self.son);
125 index iTop = self.Size();
126#if defined(DNDS_DIST_MT_USE_OMP)
127# pragma omp parallel for schedule(static)
128#endif
129 for (index i = 0; i < iTop; i++)
130 self.operator[](i).array() *= R.array();
131 }
132
133 template <int n_m, int n_n>
134 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::operator_mult_assign(t_self &self, const t_self &R)
135 {
136 DNDS_assert(self.father && self.son);
137 DNDS_assert(R.father && R.son);
138 index iTop = self.Size();
139#if defined(DNDS_DIST_MT_USE_OMP)
140# pragma omp parallel for schedule(static)
141#endif
142 for (index i = 0; i < iTop; i++)
143 self.operator[](i).array() *= R.operator[](i).array();
144 }
145
146 template <int n_m, int n_n>
147 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::operator_div_assign(t_self &self, const t_self &R)
148 {
149 DNDS_assert(self.father && self.son);
150 DNDS_assert(R.father && R.son);
151 index iTop = self.Size();
152#if defined(DNDS_DIST_MT_USE_OMP)
153# pragma omp parallel for schedule(static)
154#endif
155 for (index i = 0; i < iTop; i++)
156 self.operator[](i).array() /= R.operator[](i).array();
157 }
158
159 template <int n_m, int n_n>
160 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::operator_assign(t_self &self, const t_self &R)
161 {
162 DNDS_assert(self.father && self.son);
163 DNDS_assert(R.father && R.son);
164 // TODO: OMP
165 // for (index i = 0; i < this->Size(); i++)
166 // this->operator[](i) = R.operator[](i);
167 DNDS_assert(R.father->RawDataVector().size() == self.father->RawDataVector().size());
168 DNDS_assert(R.son->RawDataVector().size() == self.son->RawDataVector().size());
169
170 // #if defined(DNDS_DIST_MT_USE_OMP)
171 // {
172 // size_t part_size = R.father->RawDataVector().size() / omp_get_max_threads();
173 // #pragma omp parallel for schedule(static)
174 // for (int iT = 0; iT < omp_get_max_threads(); iT++)
175 // std::copy(R.father->RawDataVector().begin() + part_size * iT,
176 // (iT == omp_get_max_threads() - 1)
177 // ? R.father->RawDataVector().end()
178 // : R.father->RawDataVector().begin() + part_size * (iT + 1),
179 // this->father->RawDataVector().begin() + part_size * iT);
180 // }
181 // {
182 // size_t part_size = R.son->RawDataVector().size() / omp_get_max_threads();
183 // #pragma omp parallel for schedule(static)
184 // for (int iT = 0; iT < omp_get_max_threads(); iT++)
185 // std::copy(R.son->RawDataVector().begin() + part_size * iT,
186 // (iT == omp_get_max_threads() - 1)
187 // ? R.son->RawDataVector().end()
188 // : R.son->RawDataVector().begin() + part_size * (iT + 1),
189 // this->son->RawDataVector().begin() + part_size * iT);
190 // }
191 // #else
192 std::copy(R.father->RawDataVector().begin(), R.father->RawDataVector().end(), self.father->RawDataVector().begin());
193 std::copy(R.son->RawDataVector().begin(), R.son->RawDataVector().end(), self.son->RawDataVector().begin());
194 // #endif
195 }
196
197 template <int n_m, int n_n>
198 void ArrayDofOp<DeviceBackend::Host, n_m, n_n>::addTo(t_self &self, const t_self &R, real r)
199 {
200 DNDS_assert(self.father && self.son);
201 DNDS_assert(R.father && R.son);
202 // for (index i = 0; i < this->Size(); i++)
203 // this->operator[](i) += R.operator[](i) * r;
204 DNDS_assert(R.father->RawDataVector().size() == self.father->RawDataVector().size());
205 auto &RVF = R.father->RawDataVector();
206 auto &TVF = self.father->RawDataVector();
207#if defined(DNDS_DIST_MT_USE_OMP)
208# pragma omp parallel for schedule(static)
209#endif
210 for (size_t i = 0; i < RVF.size(); i++)
211 TVF[i] += r * RVF[i];
212 DNDS_assert(R.son->RawDataVector().size() == self.son->RawDataVector().size());
213 auto &RVS = R.son->RawDataVector();
214 auto &TVS = self.son->RawDataVector();
215#if defined(DNDS_DIST_MT_USE_OMP)
216# pragma omp parallel for schedule(static)
217#endif
218 for (size_t i = 0; i < RVS.size(); i++)
219 TVS[i] += r * RVS[i];
220 }
221
222 template <int n_m, int n_n>
223 real ArrayDofOp<DeviceBackend::Host, n_m, n_n>::norm2(t_self &self)
224 {
225 DNDS_assert(self.father && self.son);
226 real sqrSum{0}, sqrSumAll{0};
227 index iTop = self.father->Size();
228#if defined(DNDS_DIST_MT_USE_OMP)
229# pragma omp parallel for schedule(static) reduction(+ : sqrSum)
230#endif
231 for (index i = 0; i < iTop; i++) //*note that only father is included
232 sqrSum += self.father->operator[](i).squaredNorm();
233 MPI::Allreduce(&sqrSum, &sqrSumAll, 1, DNDS_MPI_REAL, MPI_SUM, self.father->getMPI().comm);
234 // std::cout << "norm2is " << std::scientific << sqrSumAll << std::endl;
235 return std::sqrt(sqrSumAll);
236 }
237
238 template <int n_m, int n_n>
239 real ArrayDofOp<DeviceBackend::Host, n_m, n_n>::norm2(t_self &self, const t_self &R)
240 {
241 DNDS_assert(self.father && self.son);
242 real sqrSum{0}, sqrSumAll{0};
243 index iTop = self.father->Size();
244#if defined(DNDS_DIST_MT_USE_OMP)
245# pragma omp parallel for schedule(static) reduction(+ : sqrSum)
246#endif
247 for (index i = 0; i < iTop; i++) //*note that only father is included
248 sqrSum += (self.father->operator[](i) - R.father->operator[](i)).squaredNorm();
249 MPI::Allreduce(&sqrSum, &sqrSumAll, 1, DNDS_MPI_REAL, MPI_SUM, self.father->getMPI().comm);
250 // std::cout << "norm2is " << std::scientific << sqrSumAll << std::endl;
251 return std::sqrt(sqrSumAll);
252 }
253
254 template <int n_m, int n_n>
255 real ArrayDofOp<DeviceBackend::Host, n_m, n_n>::reduction(t_self &self, const std::string &op)
256 {
257 DNDS_assert(self.father && self.son);
258 real sqrSum{0}, sqrSumAll{0};
259 index iTop = self.father->DataSize();
260 if (op == "min")
261 {
262 sqrSum = sqrSumAll = std::numeric_limits<real>::max();
263#if defined(DNDS_DIST_MT_USE_OMP)
264# pragma omp parallel for schedule(static) reduction(min : sqrSum)
265#endif
266 for (index i = 0; i < iTop; i++) //*note that only father is included
267 sqrSum = std::min(sqrSum, self.father->data()[i]);
268 MPI::Allreduce(&sqrSum, &sqrSumAll, 1, DNDS_MPI_REAL, MPI_MIN, self.father->getMPI().comm);
269 }
270 else if (op == "max")
271 {
272 sqrSum = sqrSumAll = std::numeric_limits<real>::lowest();
273#if defined(DNDS_DIST_MT_USE_OMP)
274# pragma omp parallel for schedule(static) reduction(max : sqrSum)
275#endif
276 for (index i = 0; i < iTop; i++) //*note that only father is included
277 sqrSum = std::max(sqrSum, self.father->data()[i]);
278 MPI::Allreduce(&sqrSum, &sqrSumAll, 1, DNDS_MPI_REAL, MPI_MAX, self.father->getMPI().comm);
279 }
280 else if (op == "sum")
281 {
282 sqrSum = sqrSumAll = 0.0;
283#if defined(DNDS_DIST_MT_USE_OMP)
284# pragma omp parallel for schedule(static) reduction(+ : sqrSum)
285#endif
286 for (index i = 0; i < iTop; i++) //*note that only father is included
287 sqrSum = sqrSum + self.father->data()[i];
288 MPI::Allreduce(&sqrSum, &sqrSumAll, 1, DNDS_MPI_REAL, MPI_SUM, self.father->getMPI().comm);
289 }
290 else
291 DNDS_assert_info(false, op);
292 // std::cout << "norm2is " << std::scientific << sqrSumAll << std::endl;
293 return sqrSumAll;
294 }
295
296 template <int n_m, int n_n>
297 typename ArrayDofOp<DeviceBackend::Host, n_m, n_n>::t_element_mat
298 ArrayDofOp<DeviceBackend::Host, n_m, n_n>::componentWiseNorm1(t_self &self)
299 {
300 DNDS_assert(self.father && self.son);
301 t_element_mat minLocal, min;
302 //! let it fail if size not compatible
303 if (self.father->Size() || (n_m >= 0 && n_n >= 0) || (self.son->Size() == 0))
304 minLocal.resize(self.father->MatRowSize(0), self.father->MatColSize(0));
305 else if (self.son->Size())
306 minLocal.resize(self.son->MatRowSize(0), self.son->MatColSize(0));
307 minLocal.setConstant(0);
308 min = minLocal;
309 index iTop = self.father->Size();
310#if defined(DNDS_DIST_MT_USE_OMP)
311# pragma omp declare reduction(EigenVecAdd:t_element_mat : omp_out += omp_in) initializer(omp_priv = omp_orig)
312# pragma omp parallel for schedule(static) reduction(EigenVecAdd : minLocal)
313#endif
314 for (index i = 0; i < iTop; i++) //*note that only father is included
315 minLocal += (self.operator[](i).array().abs()).matrix();
316 MPI::Allreduce(minLocal.data(), min.data(), minLocal.size(), DNDS_MPI_REAL, MPI_SUM, self.father->getMPI().comm);
317 return min;
318 }
319
320 template <int n_m, int n_n>
321 typename ArrayDofOp<DeviceBackend::Host, n_m, n_n>::t_element_mat
322 ArrayDofOp<DeviceBackend::Host, n_m, n_n>::componentWiseNorm1(t_self &self, const t_self &R)
323 {
324 DNDS_assert(self.father && self.son);
325 t_element_mat minLocal, min;
326 //! let it fail if size not compatible
327 if (self.father->Size() || (n_m >= 0 && n_n >= 0))
328 minLocal.resize(self.father->MatRowSize(0), self.father->MatColSize(0));
329 else if (self.son->Size())
330 minLocal.resize(self.son->MatRowSize(0), self.son->MatColSize(0));
331 minLocal.setConstant(0);
332 min = minLocal;
333 index iTop = self.father->Size();
334#if defined(DNDS_DIST_MT_USE_OMP)
335# pragma omp declare reduction(EigenVecAdd:t_element_mat : omp_out += omp_in) initializer(omp_priv = omp_orig)
336# pragma omp parallel for schedule(static) reduction(EigenVecAdd : minLocal)
337#endif
338 for (index i = 0; i < iTop; i++) //*note that only father is included
339 minLocal += ((self.operator[](i) - R.operator[](i)).array().abs()).matrix();
340 MPI::Allreduce(minLocal.data(), min.data(), minLocal.size(), DNDS_MPI_REAL, MPI_SUM, self.father->getMPI().comm);
341 return min;
342 }
343
344 template <int n_m, int n_n>
345 real ArrayDofOp<DeviceBackend::Host, n_m, n_n>::dot(t_self &self, const t_self &R)
346 {
347 DNDS_assert(self.father && self.son);
348 DNDS_assert(R.father && R.son);
349 real sqrSum{0}, sqrSumAll;
350 index iTop = self.father->Size();
351#if defined(DNDS_DIST_MT_USE_OMP)
352# pragma omp parallel for schedule(static) reduction(+ : sqrSum)
353#endif
354 for (index i = 0; i < iTop; i++) //*note that only father is included
355 sqrSum += (self.operator[](i).array() * (R.operator[](i)).array()).sum();
356 MPI::Allreduce(&sqrSum, &sqrSumAll, 1, DNDS_MPI_REAL, MPI_SUM, self.father->getMPI().comm);
357 return sqrSumAll;
358 }
359}
Degree-of-freedom array with vector-space operations (MPI-collective).
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
Assertion / error-handling macros and supporting helper functions.
#define DNDS_assert_info(expr, info)
Debug-only assertion with an extra std::string info message.
Definition Errors.hpp:113
#define DNDS_assert(expr)
Debug-only assertion (compiled out when DNDS_NDEBUG is defined). Prints the expression + file/line + ...
Definition Errors.hpp:108
MPI_int Allreduce(const void *sendbuf, void *recvbuf, MPI_int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
Wrapper over MPI_Allreduce.
Definition MPI.cpp:203
the host side operators are provided as implemented
int64_t index
Global row / DOF index type (signed 64-bit; handles multi-billion-cell meshes).
Definition Defines.hpp:107
double real
Canonical floating-point scalar used throughout DNDSR (double precision).
Definition Defines.hpp:105
const MPI_Datatype DNDS_MPI_REAL
MPI datatype matching real (= MPI_REAL8).
Definition MPI.hpp:92
tVec r(NCells)