su2code · pcarruscag · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026 · Jun 29, 2026
diff --git a/Common/include/basic_types/datatype_structure.hpp b/Common/include/basic_types/datatype_structure.hpp
@@ -135,6 +135,10 @@ template <>
 struct Passive<su2double> {
   FORCEINLINE static passivedouble Value(const su2double& val) { return GetValue(val); }
 };
+template <class T>
+FORCEINLINE auto PassiveValue(const T& val) {
+  return Passive<T>::Value(val);
+}
 
 /*!
  * \brief Casts the primitive value to int (uses GetValue, already implemented for each type).

diff --git a/Common/include/linear_algebra/CSysMatrix.hpp b/Common/include/linear_algebra/CSysMatrix.hpp
@@ -33,6 +33,7 @@
 #include "CPastixWrapper.hpp"
 #include "../toolboxes/graph_toolbox.hpp"
 
+#include <cstdint>
 #include <cstdlib>
 #include <vector>
 #include <cassert>
@@ -125,6 +126,16 @@ class CSysMatrix {
    * size is needed for fast, per-thread, static memory allocation. */
   enum : size_t { MAXNVAR = 20 };
 
+  /*--- Quantized off-diagonal storage. ---*/
+  using QuantType = int8_t;
+  /*! \brief When true, off-diagonal blocks are read from the quantized representation
+   *         during matrix-vector products and LU-SGS; the diagonal block always uses
+   *         the original full-precision storage. Call QuantizeOffDiagonalBlocks() first. */
+  static constexpr bool USE_QUANTIZATION = true;
+
+  QuantType* q_scale;   /*!< \brief Per-row signed binary exponent e; scale = 2^e, [nnz * nVar]. */
+  QuantType* q_offdiag; /*!< \brief Quantized entries per block, [nnz * nVar * nVar]. */
+
   enum { OMP_MAX_SIZE_L = 8192 }; /*!< \brief Max. chunk size used in light parallel for loops. */
   enum { OMP_MAX_SIZE_H = 512 };  /*!< \brief Max. chunk size used in heavy parallel for loops. */
   enum { OMP_MIN_SIZE = 32 };     /*!< \brief Chunk size for finer grain operations. */
@@ -372,6 +383,15 @@ class CSysMatrix {
    */
   void RowProduct(const CSysVector<ScalarType>& vec, unsigned long row_i, ScalarType* prod) const;
 
+  /*!
+   * \brief Computes product += A_k * vec using the quantized representation of block k.
+   * \note Only valid after QuantizeOffDiagonalBlocks() has been called.
+   * \param[in] k - Block index in the CSR flat storage.
+   * \param[in] vec - Input vector (nEqn entries).
+   * \param[in,out] prod - Accumulation output (nVar entries).
+   */
+  inline void QuantizedMatVecAdd(unsigned long k, const ScalarType* vec, ScalarType* prod) const;
+
  public:
   /*!
    * \brief Constructor of the class.
@@ -398,6 +418,11 @@ class CSysMatrix {
                   bool EdgeConnect, CGeometry* geometry, const CConfig* config, bool needTranspPtr = false,
                   bool grad_mode = false);
 
+  /*!
+   * \brief Compresses off-diagonal blocks into quantized form for use with USE_QUANTIZATION.
+   */
+  void QuantizeOffDiagonalBlocks();
+
   /*!
    * \brief Sets to zero all the entries of the sparse matrix.
    */

diff --git a/Common/include/linear_algebra/CSysMatrix.inl b/Common/include/linear_algebra/CSysMatrix.inl
@@ -145,8 +145,19 @@ template <class ScalarType>
 FORCEINLINE void CSysMatrix<ScalarType>::Gauss_Elimination(unsigned long block_i, ScalarType* rhs) const {
   /*--- Copy block, as the algorithm modifies the matrix ---*/
   ScalarType block[MAXNVAR * MAXNVAR];
-  MatrixCopy(&matrix[dia_ptr[block_i] * nVar * nVar], block);
-
+  if (USE_QUANTIZATION && nVar > 1) {
+    const auto k = dia_ptr[block_i];
+    const QuantType* __restrict qscale = &q_scale[k * nVar];
+    const QuantType* __restrict q = &q_offdiag[k * nVar * nVar];
+    for (auto r = 0ul; r < nVar; ++r) {
+      const uint32_t rs_bits = static_cast<uint32_t>(std::max(0, static_cast<int>(qscale[r]) + 127)) << 23;
+      float row_scale;
+      memcpy(&row_scale, &rs_bits, sizeof(rs_bits));
+      for (auto c = 0ul; c < nVar; ++c) block[r * nVar + c] = row_scale * q[r * nVar + c];
+    }
+  } else {
+    MatrixCopy(&matrix[dia_ptr[block_i] * nVar * nVar], block);
+  }
   Gauss_Elimination(block, rhs);
 }
 
@@ -169,14 +180,31 @@ FORCEINLINE const ScalarType* CSysMatrix<ScalarType>::InvertDiagonalBlockILUMatr
   return Uii;
 }
 
+template <class ScalarType>
+FORCEINLINE void CSysMatrix<ScalarType>::QuantizedMatVecAdd(unsigned long k, const ScalarType* vec,
+                                                            ScalarType* prod) const {
+  const QuantType* __restrict qscale = &q_scale[k * nVar];
+  const QuantType* __restrict q = &q_offdiag[k * nVar * nVar];
+  for (auto r = 0ul; r < nVar; ++r) {
+    const auto row_scale = ldexpf(1, static_cast<int>(qscale[r]));
+    auto sum = ScalarType(0);
+    for (auto c = 0ul; c < nVar; ++c) sum += q[r * nVar + c] * vec[c];
+    prod[r] += row_scale * sum;
+  }
+}
+
 template <class ScalarType>
 FORCEINLINE void CSysMatrix<ScalarType>::RowProduct(const CSysVector<ScalarType>& vec, unsigned long row_i,
                                                     ScalarType* prod) const {
   for (auto iVar = 0ul; iVar < nVar; iVar++) prod[iVar] = 0.0;
 
   for (auto index = row_ptr[row_i]; index < row_ptr[row_i + 1]; index++) {
     auto col_j = col_ind[index];
-    MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
+    if (USE_QUANTIZATION && nVar > 1) {
+      QuantizedMatVecAdd(index, &vec[col_j * nEqn], prod);
+    } else {
+      MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
+    }
   }
 }
 
@@ -188,8 +216,13 @@ FORCEINLINE void CSysMatrix<ScalarType>::UpperProduct(const CSysVector<ScalarTyp
   for (auto index = dia_ptr[row_i] + 1; index < row_ptr[row_i + 1]; index++) {
     auto col_j = col_ind[index];
     /*--- Always include halos. ---*/
-    if (col_j < col_ub || col_j >= nPointDomain)
-      MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
+    if (col_j < col_ub || col_j >= nPointDomain) {
+      if (USE_QUANTIZATION && nVar > 1) {
+        QuantizedMatVecAdd(index, &vec[col_j * nEqn], prod);
+      } else {
+        MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
+      }
+    }
   }
 }
 
@@ -200,12 +233,23 @@ FORCEINLINE void CSysMatrix<ScalarType>::LowerProduct(const CSysVector<ScalarTyp
 
   for (auto index = row_ptr[row_i]; index < dia_ptr[row_i]; index++) {
     auto col_j = col_ind[index];
-    if (col_j >= col_lb) MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
+    if (col_j >= col_lb) {
+      if (USE_QUANTIZATION && nVar > 1) {
+        QuantizedMatVecAdd(index, &vec[col_j * nEqn], prod);
+      } else {
+        MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
+      }
+    }
   }
 }
 
 template <class ScalarType>
 FORCEINLINE void CSysMatrix<ScalarType>::DiagonalProduct(const CSysVector<ScalarType>& vec, unsigned long row_i,
                                                          ScalarType* prod) const {
-  MatrixVectorProduct(&matrix[dia_ptr[row_i] * nVar * nEqn], &vec[row_i * nEqn], prod);
+  if (USE_QUANTIZATION && nVar > 1) {
+    for (auto iVar = 0ul; iVar < nVar; iVar++) prod[iVar] = 0.0;
+    QuantizedMatVecAdd(dia_ptr[row_i], &vec[row_i * nEqn], prod);
+  } else {
+    MatrixVectorProduct(&matrix[dia_ptr[row_i] * nVar * nEqn], &vec[row_i * nEqn], prod);
+  }
 }
diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -31,6 +31,7 @@
 #include "../../include/toolboxes/allocation_toolbox.hpp"
 
 #include <cmath>
+#include <limits>
 
 template <class ScalarType>
 CSysMatrix<ScalarType>::CSysMatrix() : rank(SU2_MPI::GetRank()), size(SU2_MPI::GetSize()) {
@@ -48,6 +49,9 @@ CSysMatrix<ScalarType>::CSysMatrix() : rank(SU2_MPI::GetRank()), size(SU2_MPI::G
   col_ind = nullptr;
   col_ptr = nullptr;
 
+  q_offdiag = nullptr;
+  q_scale = nullptr;
+
   ILU_matrix = nullptr;
   row_ptr_ilu = nullptr;
   dia_ptr_ilu = nullptr;
@@ -71,6 +75,8 @@ CSysMatrix<ScalarType>::~CSysMatrix() {
   MemoryAllocation::aligned_free(ILU_matrix);
   MemoryAllocation::aligned_free(matrix);
   MemoryAllocation::aligned_free(invM);
+  MemoryAllocation::aligned_free(q_offdiag);
+  MemoryAllocation::aligned_free(q_scale);
 
   if (useCuda) {
     GPUMemoryAllocation::gpu_free(d_matrix);
@@ -488,6 +494,51 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry* geometry, const CCon
 #endif
 }
 
+template <class ScalarType>
+void CSysMatrix<ScalarType>::QuantizeOffDiagonalBlocks() {
+  SU2_ZONE_SCOPED
+
+  if (nVar == 1) return;
+
+  if (q_scale == nullptr) {
+    q_scale = MemoryAllocation::aligned_alloc<QuantType, true>(64, nnz * nVar * sizeof(QuantType));
+    q_offdiag = MemoryAllocation::aligned_alloc<QuantType, true>(64, nnz * nVar * nVar * sizeof(QuantType));
+  }
+
+  SU2_OMP_FOR_DYN(omp_heavy_size)
+  for (auto k = 0ul; k < nnz; ++k) {
+    for (auto r = 0ul; r < nVar; ++r) {
+      const ScalarType* __restrict row = &matrix[(k * nVar + r) * nVar];
+      QuantType* __restrict q_row = &q_offdiag[(k * nVar + r) * nVar];
+
+      /*--- Integer max over sign-cleared float bits gives max-abs with no float compare.
+       *    The biased exponent is then free — no extra extraction step needed. ---*/
+      constexpr uint32_t eps_bits = 0x34000000u;  // FLT_EPSILON ≈ 1.19e-7, floor for degenerate rows
+      uint32_t max_abs_bits = eps_bits;
+      for (auto c = 0ul; c < nVar; ++c) {
+        uint32_t fb;
+        const float fv = SU2_TYPE::PassiveValue(row[c]);
+        memcpy(&fb, &fv, sizeof(fb));
+        max_abs_bits = std::max(max_abs_bits, fb & 0x7FFFFFFFu);
+      }
+
+      /*--- e = biased_exp - 133 puts quantized max-abs in [64,128). ---*/
+      const int e_clamped = std::min(127, std::max(-128, static_cast<int>(max_abs_bits >> 23) - 133));
+      q_scale[k * nVar + r] = static_cast<QuantType>(e_clamped);
+
+      /*--- 2^(-e) is a float with biased_exp = (127-e), zero mantissa — no ldexpf needed. ---*/
+      const uint32_t inv_bits = static_cast<uint32_t>(127 - e_clamped) << 23;
+      float inv_rscale;
+      memcpy(&inv_rscale, &inv_bits, sizeof(inv_rscale));
+      for (auto c = 0ul; c < nVar; ++c) {
+        q_row[c] = static_cast<QuantType>(
+            std::max(-128.f, std::min(127.f, roundf(SU2_TYPE::PassiveValue(row[c]) * inv_rscale))));
+      }
+    }
+  }
+  END_SU2_OMP_FOR
+}
+
 template <class ScalarType>
 void CSysMatrix<ScalarType>::SetValZero() {
   SU2_ZONE_SCOPED
@@ -628,7 +679,6 @@ void CSysMatrix<ScalarType>::MatrixInverse(ScalarType* matrix, ScalarType* inver
 
 template <class ScalarType>
 void CSysMatrix<ScalarType>::DeleteValsRowi(unsigned long block_i, unsigned long row) {
-  SU2_ZONE_SCOPED
   for (auto index = row_ptr[block_i]; index < row_ptr[block_i + 1]; index++) {
     for (auto iVar = 0u; iVar < nVar; iVar++)
       matrix[index * nVar * nVar + row * nVar + iVar] = 0.0;  // Delete row values in the block

diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
@@ -212,11 +212,15 @@ bool CSysSolve<ScalarType>::ModGramSchmidt(bool shared_hsbg, int i, su2matrix<Sc
   LinearCombination(
       shared_hsbg, i + 1, w, [&h_i](int k) { return -h_i(0, k); }, w[i + 1], true);
 
-  const auto& dh_i = CSysVector<ScalarType>::multiDot(w, i + 1, 1, w, i + 1);
-  LinearCombination(
-      shared_hsbg, i + 1, w, [&dh_i](int k) { return -dh_i(0, k); }, w[i + 1], true);
+  if (i < 5) {
+    for (int k = 0; k < i + 1; k++) SetHsbg(k, i, h_i(0, k));
+  } else {
+    const auto& dh_i = CSysVector<ScalarType>::multiDot(w, i + 1, 1, w, i + 1);
+    LinearCombination(
+        shared_hsbg, i + 1, w, [&dh_i](int k) { return -dh_i(0, k); }, w[i + 1], true);
 
-  for (int k = 0; k < i + 1; k++) SetHsbg(k, i, h_i(0, k) + dh_i(0, k));
+    for (int k = 0; k < i + 1; k++) SetHsbg(k, i, h_i(0, k) + dh_i(0, k));
+  }
 
   /*--- The norm of w[i+1] is 0 or NaN: the input vector from mat_vec is
    * zero or contains NaN. Cannot proceed with orthogonalization. ---*/

diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
@@ -649,6 +649,7 @@ void CFVMFlowSolverBase<V, R>::ImplicitEuler_Iteration(CGeometry *geometry, CSol
   }
   END_SU2_OMP_FOR
 
+  Jacobian.QuantizeOffDiagonalBlocks();
   auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config);
 
   BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {

diff --git a/SU2_CFD/include/solvers/CScalarSolver.inl b/SU2_CFD/include/solvers/CScalarSolver.inl
@@ -572,6 +572,7 @@ void CScalarSolver<VariableType>::ImplicitEuler_Iteration(CGeometry* geometry, C
   }
   END_SU2_OMP_FOR
 
+  Jacobian.QuantizeOffDiagonalBlocks();
   auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config);
 
   BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {