Skip to content

Commit f3de6e5

Browse files
committed
fix race condition to access Hsbg
1 parent abc6ea3 commit f3de6e5

2 files changed

Lines changed: 25 additions & 9 deletions

File tree

Common/include/linear_algebra/CSysSolve.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ class CSysSolve {
168168
* \brief Modified Gram-Schmidt orthogonalization
169169
* \author Based on Kesheng John Wu's mgsro subroutine in Saad's SPARSKIT
170170
*
171+
* \param[in] shared_hsbg - if the Hessenberg matrix is shared by multiple threads
171172
* \param[in] i - index indicating which vector in w is being orthogonalized
172173
* \param[in,out] Hsbg - the upper Hessenberg begin updated
173174
* \param[in,out] w - the (i+1)th vector of w is orthogonalized against the
@@ -181,7 +182,7 @@ class CSysSolve {
181182
* vector is kept in nrm0 and updated after operating with each vector
182183
*
183184
*/
184-
void ModGramSchmidt(int i, su2matrix<ScalarType>& Hsbg, std::vector<VectorType>& w) const;
185+
void ModGramSchmidt(bool shared_hsbg, int i, su2matrix<ScalarType>& Hsbg, std::vector<VectorType>& w) const;
185186

186187
/*!
187188
* \brief writes header information for a CSysSolve residual history

Common/src/linear_algebra/CSysSolve.cpp

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,19 @@ void CSysSolve<ScalarType>::SolveReduced(int n, const su2matrix<ScalarType>& Hsb
109109
}
110110

111111
template <class ScalarType>
112-
void CSysSolve<ScalarType>::ModGramSchmidt(int i, su2matrix<ScalarType>& Hsbg,
112+
void CSysSolve<ScalarType>::ModGramSchmidt(bool shared_hsbg, int i, su2matrix<ScalarType>& Hsbg,
113113
vector<CSysVector<ScalarType> >& w) const {
114+
const auto thread = omp_get_thread_num();
115+
116+
/*--- If Hsbg is shared by multiple threads calling this function, only one
117+
* thread can write into it. If Hsbg is private, all threads need to write. ---*/
118+
119+
auto SetHsbg = [&](int row, int col, const ScalarType& value) {
120+
if (!shared_hsbg || thread == 0) {
121+
Hsbg(row, col) = value;
122+
}
123+
};
124+
114125
/*--- Parameter for reorthonormalization ---*/
115126

116127
const ScalarType reorth = 0.98;
@@ -132,28 +143,29 @@ void CSysSolve<ScalarType>::ModGramSchmidt(int i, su2matrix<ScalarType>& Hsbg,
132143

133144
for (int k = 0; k < i + 1; k++) {
134145
ScalarType prod = w[i + 1].dot(w[k]);
135-
Hsbg(k, i) = prod;
146+
ScalarType h_ki = prod;
136147
w[i + 1] -= prod * w[k];
137148

138149
/*--- Check if reorthogonalization is necessary ---*/
139150

140151
if (prod * prod > thr) {
141152
prod = w[i + 1].dot(w[k]);
142-
Hsbg(k, i) += prod;
153+
h_ki += prod;
143154
w[i + 1] -= prod * w[k];
144155
}
156+
SetHsbg(k, i, h_ki);
145157

146158
/*--- Update the norm and check its size ---*/
147159

148-
nrm -= pow(Hsbg(k, i), 2);
160+
nrm -= pow(h_ki, 2);
149161
nrm = max<ScalarType>(nrm, 0.0);
150162
thr = nrm * reorth;
151163
}
152164

153165
/*--- Test the resulting vector ---*/
154166

155167
nrm = w[i + 1].norm();
156-
Hsbg(i + 1, i) = nrm;
168+
SetHsbg(i + 1, i, nrm);
157169

158170
/*--- Scale the resulting vector ---*/
159171

@@ -343,6 +355,9 @@ unsigned long CSysSolve<ScalarType>::FGMRES_LinSolver(const CSysVector<ScalarTyp
343355
const CConfig* config) const {
344356
const bool masterRank = (SU2_MPI::GetRank() == MASTER_NODE);
345357
const bool flexible = !precond.IsIdentity();
358+
/*--- If we call the solver outside of a parallel region, but the number of threads allows,
359+
* we still want to parallelize some of the expensive operations. ---*/
360+
const bool nestedParallel = !omp_in_parallel() && omp_get_max_threads() > 1;
346361

347362
/*--- Check the subspace size ---*/
348363

@@ -452,8 +467,8 @@ unsigned long CSysSolve<ScalarType>::FGMRES_LinSolver(const CSysVector<ScalarTyp
452467

453468
/*--- Modified Gram-Schmidt orthogonalization ---*/
454469

455-
SU2_OMP_PARALLEL_(if (!omp_in_parallel()))
456-
ModGramSchmidt(i, H, W);
470+
SU2_OMP_PARALLEL_(if (nestedParallel))
471+
ModGramSchmidt(nestedParallel, i, H, W);
457472
END_SU2_OMP_PARALLEL
458473

459474
/*--- Apply old Givens rotations to new column of the Hessenberg matrix then generate the
@@ -482,7 +497,7 @@ unsigned long CSysSolve<ScalarType>::FGMRES_LinSolver(const CSysVector<ScalarTyp
482497

483498
const auto& basis = flexible ? Z : W;
484499

485-
SU2_OMP_PARALLEL_(if (!omp_in_parallel()))
500+
SU2_OMP_PARALLEL_(if (nestedParallel))
486501
for (unsigned long k = 0; k < i; k++) {
487502
x += y[k] * basis[k];
488503
}

0 commit comments

Comments
 (0)