Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 47 additions & 3 deletions Common/include/geometry/CGeometry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,17 @@ class CGeometry {

CEdgeToNonZeroMapUL edgeToCSRMap; /*!< \brief Map edges to CSR entries referenced by them (i,j) and (j,i). */

CCompressedSparsePatternUL finiteVolumeLSparse; /*!< \brief Strictly-lower 0-fill FVM sparsity (L of LDU). */
CCompressedSparsePatternUL finiteVolumeUSparse; /*!< \brief Strictly-upper 0-fill FVM sparsity (U of LDU). */
CCompressedSparsePatternUL finiteElementLSparse; /*!< \brief Strictly-lower 0-fill FEM sparsity (L of LDU). */
CCompressedSparsePatternUL finiteElementUSparse; /*!< \brief Strictly-upper 0-fill FEM sparsity (U of LDU). */
su2vector<unsigned long>
edgeToLMap; /*!< \brief Map from edge index to L-pattern index (U-index == edge index by construction). */
su2vector<unsigned long> finiteVolumeLToUTranspMap; /*!< \brief FVM L-entry -> U-entry of its transpose. */
su2vector<unsigned long> finiteVolumeUToLTranspMap; /*!< \brief FVM U-entry -> L-entry of its transpose. */
su2vector<unsigned long> finiteElementLToUTranspMap; /*!< \brief FEM L-entry -> U-entry of its transpose. */
su2vector<unsigned long> finiteElementUToLTranspMap; /*!< \brief FEM U-entry -> L-entry of its transpose. */

/*--- Edge and element colorings. ---*/

CCompressedSparsePatternUL edgeColoring, /*!< \brief Edge coloring structure for thread-based parallelization. */
Expand Down Expand Up @@ -1878,11 +1889,44 @@ class CGeometry {
const CEdgeToNonZeroMapUL& GetEdgeToSparsePatternMap();

/*!
* \brief Get the transpose of the (main, i.e 0 fill) sparse pattern (e.g. CSR becomes CSC).
* \brief Get the strictly-lower CSR pattern (L of LDU split) for the 0-fill pattern.
* \note Builds the pattern (and the full 0-fill pattern with diagonal pointer) if needed.
* \param[in] type - Finite volume or finite element.
* \return Reference to the map.
* \return Reference to the strictly-lower pattern.
*/
const CCompressedSparsePatternUL& GetLSparsePattern(ConnectivityType type);

/*!
* \brief Get the strictly-upper CSR pattern (U of LDU split) for the 0-fill pattern.
* \note Builds the pattern (and the full 0-fill pattern with diagonal pointer) if needed.
* \param[in] type - Finite volume or finite element.
* \return Reference to the strictly-upper pattern.
*/
const CCompressedSparsePatternUL& GetUSparsePattern(ConnectivityType type);

/*!
* \brief Get the edge→L-index map for the FVM LDU-split pattern.
* \note The U-index equals the edge index by construction (edges are ordered to match the U pattern 1:1).
* This is verified with a debug check the first time this map is built.
* \return Reference to the per-edge L-index array.
*/
const su2vector<unsigned long>& GetEdgeToLSparsePatternMap();

/*!
* \brief Get the bijective map from L-entry indices to U-entry indices of their transposes.
* \note Requires symmetric pattern. Builds both LU transpose maps if not already built.
* \param[in] type - Finite volume or finite element.
* \return Reference to the l_to_u map.
*/
const su2vector<unsigned long>& GetLToUTransposeSparsePatternMap(ConnectivityType type);

/*!
* \brief Get the bijective map from U-entry indices to L-entry indices of their transposes.
* \note Requires symmetric pattern. Builds both LU transpose maps if not already built.
* \param[in] type - Finite volume or finite element.
* \return Reference to the u_to_l map.
*/
const su2vector<unsigned long>& GetTransposeSparsePatternMap(ConnectivityType type);
const su2vector<unsigned long>& GetUToLTransposeSparsePatternMap(ConnectivityType type);

/*!
* \brief Get the edge coloring.
Expand Down
114 changes: 78 additions & 36 deletions Common/include/linear_algebra/CSysMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,43 @@ class CSysMatrix {
unsigned long nVar; /*!< \brief Number of variables (and rows of the blocks). */
unsigned long nEqn; /*!< \brief Number of equations (and columns of the blocks). */

ScalarType* matrix; /*!< \brief Entries of the sparse matrix. */
unsigned long nnz; /*!< \brief Number of possible nonzero entries in the matrix. */
const unsigned long* row_ptr; /*!< \brief Pointers to the first element in each row. */
const unsigned long* dia_ptr; /*!< \brief Pointers to the diagonal element in each row. */
const unsigned long* col_ind; /*!< \brief Column index for each of the elements in val(). */
const unsigned long* col_ptr; /*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */

ScalarType* d_matrix; /*!< \brief Device Pointer to store the matrix values on the GPU. */
const unsigned long* d_row_ptr; /*!< \brief Device Pointers to the first element in each row. */
const unsigned long* d_col_ind; /*!< \brief Device Column index for each of the elements in val(). */
bool useCuda = false; /*!< \brief Boolean that indicates whether user has enabled CUDA or not.
Mainly used to conditionally free GPU memory in the class destructor. */
const unsigned long*
row_ptr; /*!< \brief Row pointers for the full unified CSR (geometry-owned; used by PaStiX and ILU-n). */
const unsigned long* col_ind; /*!< \brief Column indices for the full unified CSR (geometry-owned). */

ScalarType* matrix_d; /*!< \brief Diagonal blocks of the LDU split (nPoint * nVar * nEqn). */
ScalarType* matrix_l; /*!< \brief Strictly-lower off-diagonal blocks (nnz_l * nVar * nEqn). */
ScalarType* matrix_u; /*!< \brief Strictly-upper off-diagonal blocks (nnz_u * nVar * nEqn). */

ScalarType* d_matrix_d; /*!< \brief Device diagonal blocks. */
ScalarType* d_matrix_l; /*!< \brief Device strictly-lower blocks. */
ScalarType* d_matrix_u; /*!< \brief Device strictly-upper blocks. */
const unsigned long* d_row_ptr_l; /*!< \brief Device row pointers for the L pattern. */
const unsigned long* d_col_ind_l; /*!< \brief Device column indices for the L pattern. */
const unsigned long* d_row_ptr_u; /*!< \brief Device row pointers for the U pattern. */
const unsigned long* d_col_ind_u; /*!< \brief Device column indices for the U pattern. */
bool useCuda = false; /*!< \brief Whether CUDA is enabled. */

const unsigned long* row_ptr_l; /*!< \brief Row pointers for the strictly-lower CSR pattern. */
const unsigned long* col_ind_l; /*!< \brief Column indices for the strictly-lower CSR pattern. */
unsigned long nnz_l; /*!< \brief Number of non-zeros in the strictly-lower part. */
const unsigned long* row_ptr_u; /*!< \brief Row pointers for the strictly-upper CSR pattern. */
const unsigned long* col_ind_u; /*!< \brief Column indices for the strictly-upper CSR pattern. */
unsigned long nnz_u; /*!< \brief Number of non-zeros in the strictly-upper part. */
const unsigned long* l_to_u_transp; /*!< \brief L-entry index -> U-entry index of its transpose. */
const unsigned long* u_to_l_transp; /*!< \brief U-entry index -> L-entry index of its transpose. */

/*!
* \brief Lookup table from edges to the (U-index, L-index) pair in the LDU split.
* U-index == edge index (edges are ordered 1:1 with the U pattern by construction).
* edge_ptr_lu.l(iEdge) = L-index for (GetNode(1), GetNode(0)).
*/
struct {
const unsigned long* ptr = nullptr; /*!< ptr[iEdge] = L-index; U-index is iEdge itself. */
unsigned long nEdge = 0;
operator bool() const { return nEdge != 0; }
inline unsigned long l(unsigned long edge) const { return ptr[edge]; }
} edge_ptr_lu;

ScalarType* ILU_matrix; /*!< \brief Entries of the ILU sparse matrix. */
unsigned long nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */
Expand Down Expand Up @@ -285,6 +310,13 @@ class CSysMatrix {
for (auto iVar = 0ul; iVar < nVar * nEqn; ++iVar) mat[iVar] = 0;
}

/*!
* \brief Writes the LDU blocks into a flat buffer in unified CSR row order (L blocks, diagonal, U blocks per row).
* Used to feed PaStiX which expects a standard CSR layout.
* \param[out] out - Buffer of size (nnz_l + nnz_u + nPoint) * nVar * nEqn.
*/
void GatherCSR(ScalarType* out) const;

/*!
* \brief Solve a small (nVar x nVar) linear system using Gaussian elimination.
* \param[in,out] matrix - On entry the system matrix, on exit the factorized matrix.
Expand Down Expand Up @@ -372,6 +404,9 @@ class CSysMatrix {
*/
void RowProduct(const CSysVector<ScalarType>& vec, unsigned long row_i, ScalarType* prod) const;

FORCEINLINE ScalarType* GetDiagBlock(unsigned long i) { return &matrix_d[i * nVar * nEqn]; }
FORCEINLINE const ScalarType* GetDiagBlock(unsigned long i) const { return &matrix_d[i * nVar * nEqn]; }

public:
/*!
* \brief Constructor of the class.
Expand All @@ -392,7 +427,7 @@ class CSysMatrix {
* \param[in] neqn - Number of equations (and columns of the blocks).
* \param[in] geometry - Geometrical definition of the problem.
* \param[in] config - Definition of the particular problem.
* \param[in] needTranspPtr - If "col_ptr" should be created, used for "SetDiagonalAsColumnSum".
* \param[in] needTranspPtr - If the L/U transpose maps should be built, used for "SetDiagonalAsColumnSum".
*/
void Initialize(unsigned long npoint, unsigned long npointdomain, unsigned short nvar, unsigned short neqn,
bool EdgeConnect, CGeometry* geometry, const CConfig* config, bool needTranspPtr = false,
Expand Down Expand Up @@ -421,10 +456,14 @@ class CSysMatrix {
* \return Pointer to location in memory where the block starts.
*/
FORCEINLINE const ScalarType* GetBlock(unsigned long block_i, unsigned long block_j) const {
/*--- The position of the diagonal block is known which allows halving the search space. ---*/
const auto end = (block_j < block_i) ? dia_ptr[block_i] : row_ptr[block_i + 1];
for (auto index = (block_j < block_i) ? row_ptr[block_i] : dia_ptr[block_i]; index < end; ++index)
if (col_ind[index] == block_j) return &matrix[index * nVar * nEqn];
if (block_i == block_j) return &matrix_d[block_i * nVar * nEqn];
if (block_j < block_i) {
for (auto index = row_ptr_l[block_i]; index < row_ptr_l[block_i + 1]; ++index)
if (col_ind_l[index] == block_j) return &matrix_l[index * nVar * nEqn];
return nullptr;
}
for (auto index = row_ptr_u[block_i]; index < row_ptr_u[block_i + 1]; ++index)
if (col_ind_u[index] == block_j) return &matrix_u[index * nVar * nEqn];
return nullptr;
}

Expand Down Expand Up @@ -574,10 +613,11 @@ class CSysMatrix {
*/
inline void GetBlocks(unsigned long iEdge, unsigned long iPoint, unsigned long jPoint, ScalarType*& bii,
ScalarType*& bij, ScalarType*& bji, ScalarType*& bjj) {
bii = &matrix[dia_ptr[iPoint] * nVar * nEqn];
bjj = &matrix[dia_ptr[jPoint] * nVar * nEqn];
bij = &matrix[edge_ptr(iEdge, 0) * nVar * nEqn];
bji = &matrix[edge_ptr(iEdge, 1) * nVar * nEqn];
bii = GetDiagBlock(iPoint);
bjj = GetDiagBlock(jPoint);
const auto blkSz = nVar * nEqn;
bij = &matrix_u[iEdge * blkSz];
bji = &matrix_l[edge_ptr_lu.l(iEdge) * blkSz];
}

/*!
Expand Down Expand Up @@ -647,10 +687,10 @@ class CSysMatrix {
if (mask[k] == 0) continue;

/*--- Fetch the blocks. ---*/
auto bii = &matrix[dia_ptr[iPoint[k]] * blkSz];
auto bjj = &matrix[dia_ptr[jPoint[k]] * blkSz];
auto bij = &matrix[edge_ptr(iEdge[k], 0) * blkSz];
auto bji = &matrix[edge_ptr(iEdge[k], 1) * blkSz];
auto bii = GetDiagBlock(iPoint[k]);
auto bjj = GetDiagBlock(jPoint[k]);
ScalarType* bij = &matrix_u[iEdge[k] * blkSz];
ScalarType* bji = &matrix_l[edge_ptr_lu.l(iEdge[k]) * blkSz];

/*--- Update, block i was negated during transpose in the
* hope the assignments below become non-temporal stores. ---*/
Expand All @@ -677,8 +717,9 @@ class CSysMatrix {
template <class MatrixType, class OtherType = ScalarType, bool Overwrite = true>
inline void SetBlocks(unsigned long iEdge, const MatrixType& block_i, const MatrixType& block_j,
OtherType scale = 1) {
ScalarType* bij = &matrix[edge_ptr(iEdge, 0) * nVar * nEqn];
ScalarType* bji = &matrix[edge_ptr(iEdge, 1) * nVar * nEqn];
const auto blkSz = nVar * nEqn;
ScalarType* bij = &matrix_u[iEdge * blkSz];
ScalarType* bji = &matrix_l[edge_ptr_lu.l(iEdge) * blkSz];

unsigned long iVar, jVar, offset = 0;

Expand Down Expand Up @@ -737,8 +778,8 @@ class CSysMatrix {
if (mask[k] == 0) continue;

/*--- Fetch the blocks. ---*/
auto bij = &matrix[edge_ptr(iEdge[k], 0) * blkSz];
auto bji = &matrix[edge_ptr(iEdge[k], 1) * blkSz];
ScalarType* bij = &matrix_u[iEdge[k] * blkSz];
ScalarType* bji = &matrix_l[edge_ptr_lu.l(iEdge[k]) * blkSz];

/*--- Update, block i was negated during transpose in the
* hope the assignments below become non-temporal stores. ---*/
Expand All @@ -760,7 +801,7 @@ class CSysMatrix {
*/
template <class OtherType, bool Overwrite = true, class T = ScalarType>
inline void SetBlock2Diag(unsigned long block_i, const OtherType& val_block, T alpha = 1.0) {
auto mat_ii = &matrix[dia_ptr[block_i] * nVar * nEqn];
auto mat_ii = GetDiagBlock(block_i);

for (auto iVar = 0ul; iVar < nVar; iVar++)
for (auto jVar = 0ul; jVar < nEqn; jVar++) {
Expand Down Expand Up @@ -793,8 +834,8 @@ class CSysMatrix {
*/
template <class OtherType>
inline void AddVal2Diag(unsigned long block_i, OtherType val_matrix) {
for (auto iVar = 0ul; iVar < nVar; iVar++)
matrix[dia_ptr[block_i] * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val_matrix);
auto* d = GetDiagBlock(block_i);
for (auto iVar = 0ul; iVar < nVar; iVar++) d[iVar * (nVar + 1)] += PassiveAssign(val_matrix);
}

/*!
Expand All @@ -806,7 +847,7 @@ class CSysMatrix {
*/
template <class OtherType>
inline void AddVal2Diag(unsigned long block_i, unsigned long iVar, OtherType val) {
matrix[dia_ptr[block_i] * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val);
GetDiagBlock(block_i)[iVar * (nVar + 1)] += PassiveAssign(val);
}

/*!
Expand All @@ -817,13 +858,14 @@ class CSysMatrix {
*/
template <class OtherType>
inline void SetVal2Diag(unsigned long block_i, OtherType val_matrix) {
unsigned long iVar, index = dia_ptr[block_i] * nVar * nVar;
auto* d = GetDiagBlock(block_i);
unsigned long iVar;

/*--- Clear entire block before setting its diagonal. ---*/
SU2_OMP_SIMD
for (iVar = 0; iVar < nVar * nVar; iVar++) matrix[index + iVar] = 0.0;
for (iVar = 0; iVar < nVar * nVar; iVar++) d[iVar] = 0.0;

for (iVar = 0; iVar < nVar; iVar++) matrix[index + iVar * (nVar + 1)] = PassiveAssign(val_matrix);
for (iVar = 0; iVar < nVar; iVar++) d[iVar * (nVar + 1)] = PassiveAssign(val_matrix);
}

/*!
Expand Down
29 changes: 16 additions & 13 deletions Common/include/linear_algebra/CSysMatrix.inl
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ template <class ScalarType>
FORCEINLINE void CSysMatrix<ScalarType>::Gauss_Elimination(unsigned long block_i, ScalarType* rhs) const {
/*--- Copy block, as the algorithm modifies the matrix ---*/
ScalarType block[MAXNVAR * MAXNVAR];
MatrixCopy(&matrix[dia_ptr[block_i] * nVar * nVar], block);
MatrixCopy(GetDiagBlock(block_i), block);

Gauss_Elimination(block, rhs);
}
Expand All @@ -154,7 +154,7 @@ template <class ScalarType>
FORCEINLINE void CSysMatrix<ScalarType>::InverseDiagonalBlock(unsigned long block_i, ScalarType* invBlock) const {
/*--- Copy block, as the algorithm modifies the matrix ---*/
ScalarType block[MAXNVAR * MAXNVAR];
MatrixCopy(&matrix[dia_ptr[block_i] * nVar * nVar], block);
MatrixCopy(GetDiagBlock(block_i), block);

MatrixInverse(block, invBlock);
}
Expand All @@ -174,22 +174,25 @@ FORCEINLINE void CSysMatrix<ScalarType>::RowProduct(const CSysVector<ScalarType>
ScalarType* prod) const {
for (auto iVar = 0ul; iVar < nVar; iVar++) prod[iVar] = 0.0;

for (auto index = row_ptr[row_i]; index < row_ptr[row_i + 1]; index++) {
auto col_j = col_ind[index];
MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
}
for (auto index = row_ptr_l[row_i]; index < row_ptr_l[row_i + 1]; index++)
MatrixVectorProductAdd(&matrix_l[index * nVar * nEqn], &vec[col_ind_l[index] * nEqn], prod);

MatrixVectorProductAdd(GetDiagBlock(row_i), &vec[row_i * nEqn], prod);

for (auto index = row_ptr_u[row_i]; index < row_ptr_u[row_i + 1]; index++)
MatrixVectorProductAdd(&matrix_u[index * nVar * nEqn], &vec[col_ind_u[index] * nEqn], prod);
}

template <class ScalarType>
FORCEINLINE void CSysMatrix<ScalarType>::UpperProduct(const CSysVector<ScalarType>& vec, unsigned long row_i,
unsigned long col_ub, ScalarType* prod) const {
for (auto iVar = 0ul; iVar < nVar; iVar++) prod[iVar] = 0.0;

for (auto index = dia_ptr[row_i] + 1; index < row_ptr[row_i + 1]; index++) {
auto col_j = col_ind[index];
for (auto index = row_ptr_u[row_i]; index < row_ptr_u[row_i + 1]; index++) {
auto col_j = col_ind_u[index];
/*--- Always include halos. ---*/
if (col_j < col_ub || col_j >= nPointDomain)
MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
MatrixVectorProductAdd(&matrix_u[index * nVar * nEqn], &vec[col_j * nEqn], prod);
}
}

Expand All @@ -198,14 +201,14 @@ FORCEINLINE void CSysMatrix<ScalarType>::LowerProduct(const CSysVector<ScalarTyp
unsigned long col_lb, ScalarType* prod) const {
for (auto iVar = 0ul; iVar < nVar; iVar++) prod[iVar] = 0.0;

for (auto index = row_ptr[row_i]; index < dia_ptr[row_i]; index++) {
auto col_j = col_ind[index];
if (col_j >= col_lb) MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
for (auto index = row_ptr_l[row_i]; index < row_ptr_l[row_i + 1]; index++) {
auto col_j = col_ind_l[index];
if (col_j >= col_lb) MatrixVectorProductAdd(&matrix_l[index * nVar * nEqn], &vec[col_j * nEqn], prod);
}
}

template <class ScalarType>
FORCEINLINE void CSysMatrix<ScalarType>::DiagonalProduct(const CSysVector<ScalarType>& vec, unsigned long row_i,
ScalarType* prod) const {
MatrixVectorProduct(&matrix[dia_ptr[row_i] * nVar * nEqn], &vec[row_i * nEqn], prod);
MatrixVectorProduct(GetDiagBlock(row_i), &vec[row_i * nEqn], prod);
}
Loading
Loading