32 template <DeviceBackend B,
int n_m,
int n_n>
46 template <DeviceBackend B,
int n_m,
int n_n>
59 template <
int n_m,
int n_n>
62 template <DeviceBackend B,
int n_m,
int n_n>
65#define DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n)
66#define DNDS_ARRAY_DOF_OP_FUNC_LIST(B, n_m, n_n, spec) \
67 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) setConstant(t_self &self, real R); \
68 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) setConstant(t_self &self, const Eigen::Ref<const t_element_mat> &R); \
69 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) operator_plus_assign(t_self &self, const t_self &R); \
70 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) operator_plus_assign(t_self &self, real R); \
71 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) operator_plus_assign(t_self &self, const Eigen::Ref<const t_element_mat> &R); \
72 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) operator_minus_assign(t_self &self, const t_self &R); \
73 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) operator_mult_assign(t_self &self, real R); \
74 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) operator_mult_assign_scalar_arr(t_self &self, const ArrayDof<1, 1> &R); \
75 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) operator_mult_assign(t_self &self, const Eigen::Ref<const t_element_mat> &R); \
76 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) operator_mult_assign(t_self &self, const t_self &R); \
77 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) operator_div_assign(t_self &self, const t_self &R); \
78 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) operator_assign(t_self &self, const t_self &R); \
79 spec void DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) addTo(t_self &self, const t_self &R, real r); \
80 spec real DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) norm2(t_self &self); \
81 spec real DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) norm2(t_self &self, const t_self &R); \
82 spec real DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) reduction(t_self &self, const std::string &op); \
83 spec ArrayDofOp<B, n_m, n_n>::t_element_mat DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) componentWiseNorm1(t_self &self); \
84 spec ArrayDofOp<B, n_m, n_n>::t_element_mat DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) componentWiseNorm1(t_self &self, const t_self &R); \
85 spec real DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) dot(t_self &self, const t_self &R);
97 template <
int n_m,
int n_n>
111 template <
int n_m,
int n_n>
122# define DNDS_ARRAY_OP_SWITCH_CUDA_CASE(expr) \
123 case DeviceBackend::CUDA: \
125 return (t_ops<DeviceBackend::CUDA>::expr); \
128# define DNDS_ARRAY_OP_SWITCH_CUDA_CASE(expr)
131#define DNDS_ARRAY_OP_SWITCHER(Backend, expr) \
134 case DeviceBackend::Host: \
135 case DeviceBackend::Unknown: \
137 return (t_ops<DeviceBackend::Host>::expr); \
139 DNDS_ARRAY_OP_SWITCH_CUDA_CASE(expr) \
141 DNDS_assert(false); \
142 return (t_ops<DeviceBackend::Host>::expr); \
170 template <
int n_m,
int n_n>
175 using t_base::t_base;
178 template <DeviceBackend B>
181 template <DeviceBackend B>
185 template <DeviceBackend B>
192 template <DeviceBackend B>
204 template <DeviceBackend B>
263 template <
int n_m_T = n_m>
264 std::enable_if_t<!(
n_m_T == 1 &&
n_n == 1)>
354#undef DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE
355#define DNDS_ARRAY_DOF_OP_FUNC_LIST_SCOPE(B, n_m, n_n) ArrayDofOp<B, n_m, n_n>::
357#define DNDS_ARRAY_DOF_OP_FUNC_SEQ_INST(B, offset, exttmp) \
358 DNDS_ARRAY_DOF_OP_FUNC_LIST(B, 1, 1 + (offset), exttmp); \
359 DNDS_ARRAY_DOF_OP_FUNC_LIST(B, 2, 1 + (offset), exttmp); \
360 DNDS_ARRAY_DOF_OP_FUNC_LIST(B, 3, 1 + (offset), exttmp); \
361 DNDS_ARRAY_DOF_OP_FUNC_LIST(B, 4, 1 + (offset), exttmp); \
362 DNDS_ARRAY_DOF_OP_FUNC_LIST(B, 5, 1 + (offset), exttmp); \
363 DNDS_ARRAY_DOF_OP_FUNC_LIST(B, 6, 1 + (offset), exttmp); \
364 DNDS_ARRAY_DOF_OP_FUNC_LIST(B, 7, 1 + (offset), exttmp); \
365 DNDS_ARRAY_DOF_OP_FUNC_LIST(B, 8, 1 + (offset), exttmp); \
366 DNDS_ARRAY_DOF_OP_FUNC_LIST(B, DynamicSize, 1 + (offset), exttmp); \
367 DNDS_ARRAY_DOF_OP_FUNC_LIST(B, NonUniformSize, 1 + (offset), exttmp);
#define DNDS_ARRAY_DOF_OP_FUNC_SEQ_INST(B, offset, exttmp)
#define DNDS_ARRAY_DOF_OP_FUNC_LIST(B, n_m, n_n, spec)
#define DNDS_ARRAY_OP_SWITCHER(Backend, expr)
Host-side implementations of #ArrayDofOp vector-space operations.
Eigen-matrix array: each row is an Eigen::Map<Matrix> over contiguous real storage.
Eigen-vector array: each row is an Eigen::Map over contiguous real storage.
Father-son array pairs with device views and ghost communication.
Core type aliases, constants, and metaprogramming utilities for the DNDS framework.
#define DNDS_DEVICE_TRIVIAL_COPY_DEFINE(T, T_Self)
#define DNDS_DEVICE_CALLABLE
Device memory abstraction layer with backend-specific storage and factory creation.
Const device view of an ArrayDof father/son pair.
DNDS_DEVICE_CALLABLE ArrayDofDeviceViewConst(t_base &&base_view)
Mutable device view of an ArrayDof father/son pair.
DNDS_DEVICE_CALLABLE ArrayDofDeviceView(t_base &&base_view)
Eigen::Matrix< real, RowSize_To_EigenSize(n_m), RowSize_To_EigenSize(n_n)> t_element_mat
Primary solver state container: an ArrayEigenMatrix pair with MPI-collective vector-space operations.
real norm2()
Global L2 norm (MPI-collective). sqrt(sum_i sum_j x_ij^2).
t_element_mat componentWiseNorm1()
Per-component global L1 norm, returned as an n_m x n_n matrix (collective).
void operator-=(const t_self &R)
In-place element-wise subtract: this -= R.
real dot(const t_self &R)
Global inner product: sum_i sum_j x_ij * R_ij (collective).
void operator+=(real R)
Add the scalar R to every entry.
real max()
Global maximum across all entries (collective).
void operator+=(const t_self &R)
In-place element-wise add: this += R.
void clone(const t_self &R)
Deep copy from another ArrayDof. Delegates to the base-class clone.
t_element_mat componentWiseNorm1(const t_self &R)
Per-component global L1 distance between this and R (collective).
t_deviceView< B > deviceView()
Build a mutable device view (wraps the base-class implementation).
t_deviceViewConst< B > deviceView() const
Build a const device view.
real min()
Global minimum across all entries (collective).
real norm2(const t_self &R)
Global L2 distance between this and R (collective).
void operator/=(const t_self &R)
Element-wise divide: this /= R.
void operator*=(real R)
Scalar multiply in place.
void setConstant(real R)
Set every entry of every (father+son) row to the scalar R.
void operator*=(const Eigen::Ref< const t_element_mat > &R)
In-place multiplication by a small fixed matrix (same applied to every row).
void operator*=(const t_self &R)
Element-wise multiply: this *= R (Hadamard).
void setConstant(const Eigen::Ref< const t_element_mat > &R)
Set every row to the matrix R (must have shape n_m x n_n).
void operator+=(const Eigen::Ref< const t_element_mat > &R)
Add a per-row matrix R (same to every row).
void addTo(const t_self &R, real r)
AXPY: this += r * R. One of the hot-path solver primitives.
real sum()
Global sum of all entries (collective).
ArrayDofDeviceView< B, n_m, n_n > t_deviceView
Mutable device view alias.
Eigen::Matrix< real, RowSize_To_EigenSize(n_m), RowSize_To_EigenSize(n_n)> t_element_mat
Shape of one DOF row as an Eigen matrix.
void operator=(const t_self &R)
Value-copy assignment from another ArrayDof of identical layout.
the host side operators are provided as implemented
DeviceBackend
Enumerates the backends a DeviceStorage / Array can live on.
constexpr int RowSize_To_EigenSize(rowsize rs)
Convert a rowsize constant to the corresponding Eigen compile-time size. Fixed >= 0 -> the value; Dyn...
DNDS_CONSTANT const rowsize DynamicSize
Template parameter flag: "row width is set at runtime but uniform".
DNDS_CONSTANT const rowsize NonUniformSize
Template parameter flag: "each row has an independent width".
double real
Canonical floating-point scalar used throughout DNDSR (double precision).
Const device view of a father-son array pair.
Mutable device view onto an ArrayPair (for CUDA kernels).
Convenience bundle of a father, son, and attached ArrayTransformer.
void to_device(DeviceBackend backend)
Mirror both father and son to the given device backend.
void to_host()
Bring both father and son mirrors back to host memory.
ssp< TArray > father
Owned-side array (must be resized before ghost setup).
void clone(const t_self &R)
Deep-copy: allocate new father / son and copy their data; rebind trans.