Skip to content

Commit 7a2eb43

Browse files
committed
Further applications of SU2_OMP_SAFE_GLOBAL_ACCESS.
1 parent 0b021a7 commit 7a2eb43

29 files changed

Lines changed: 126 additions & 260 deletions

Common/include/linear_algebra/CSysSolve.hpp

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -219,12 +219,11 @@ class CSysSolve {
219219
void HandleTemporariesIn(const CSysVector<OtherType>& LinSysRes, CSysVector<OtherType>& LinSysSol) {
220220

221221
/*--- Set the pointers. ---*/
222-
SU2_OMP_MASTER {
222+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
223223
LinSysRes_ptr = &LinSysRes;
224224
LinSysSol_ptr = &LinSysSol;
225225
}
226-
END_SU2_OMP_MASTER
227-
SU2_OMP_BARRIER
226+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
228227
}
229228

230229
/*!
@@ -241,12 +240,11 @@ class CSysSolve {
241240
LinSysSol_tmp.PassiveCopy(LinSysSol);
242241

243242
/*--- Set the pointers. ---*/
244-
SU2_OMP_MASTER {
243+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
245244
LinSysRes_ptr = &LinSysRes_tmp;
246245
LinSysSol_ptr = &LinSysSol_tmp;
247246
}
248-
END_SU2_OMP_MASTER
249-
SU2_OMP_BARRIER
247+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
250248
}
251249

252250
/*!
@@ -258,13 +256,11 @@ class CSysSolve {
258256
void HandleTemporariesOut(CSysVector<OtherType>& LinSysSol) {
259257

260258
/*--- Reset the pointers. ---*/
261-
SU2_OMP_BARRIER
262-
SU2_OMP_MASTER {
259+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
263260
LinSysRes_ptr = nullptr;
264261
LinSysSol_ptr = nullptr;
265262
}
266-
END_SU2_OMP_MASTER
267-
SU2_OMP_BARRIER
263+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
268264
}
269265

270266
/*!
@@ -279,13 +275,11 @@ class CSysSolve {
279275
LinSysSol.PassiveCopy(LinSysSol_tmp);
280276

281277
/*--- Reset the pointers. ---*/
282-
SU2_OMP_BARRIER
283-
SU2_OMP_MASTER {
278+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
284279
LinSysRes_ptr = nullptr;
285280
LinSysSol_ptr = nullptr;
286281
}
287-
END_SU2_OMP_MASTER
288-
SU2_OMP_BARRIER
282+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
289283
}
290284

291285
public:

Common/include/linear_algebra/CSysVector.hpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
186186
/*--- check if self-assignment, otherwise perform deep copy ---*/
187187
if ((const void*)this == (const void*)&other) return;
188188

189-
SU2_OMP_MASTER
190-
Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false);
191-
END_SU2_OMP_MASTER
192-
SU2_OMP_BARRIER
189+
SU2_OMP_SAFE_GLOBAL_ACCESS(Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false);)
193190

194191
CSYSVEC_PARFOR
195192
for (auto i = 0ul; i < nElm; i++) vec_val[i] = SU2_TYPE::GetValue(other[i]);
@@ -297,11 +294,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
297294
ScalarType dot(const VecExpr::CVecExpr<T, ScalarType>& expr) const {
298295
static ScalarType dotRes;
299296
/*--- All threads get the same "view" of the vectors and shared variable. ---*/
300-
SU2_OMP_BARRIER
301-
SU2_OMP_MASTER
302-
dotRes = 0.0;
303-
END_SU2_OMP_MASTER
304-
SU2_OMP_BARRIER
297+
SU2_OMP_SAFE_GLOBAL_ACCESS(dotRes = 0.0;)
305298

306299
/*--- Local dot product for each thread. ---*/
307300
ScalarType sum = 0.0;
@@ -317,16 +310,13 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
317310

318311
#ifdef HAVE_MPI
319312
/*--- Reduce across all mpi ranks, only master thread communicates. ---*/
320-
SU2_OMP_BARRIER
321-
SU2_OMP_MASTER {
313+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
322314
sum = dotRes;
323315
const auto mpi_type = (sizeof(ScalarType) < sizeof(double)) ? MPI_FLOAT : MPI_DOUBLE;
324316
SelectMPIWrapper<ScalarType>::W::Allreduce(&sum, &dotRes, 1, mpi_type, MPI_SUM, SU2_MPI::GetComm());
325317
}
326-
END_SU2_OMP_MASTER
318+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
327319
#endif
328-
/*--- Make view of result consistent across threads. ---*/
329-
SU2_OMP_BARRIER
330320

331321
return dotRes;
332322
}

Common/include/parallelization/omp_structure.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,10 @@ void omp_finalize();
185185

186186
#endif
187187

188+
/* The SU2_OMP_SAFE_GLOBAL_ACCESS constructs are used to safeguard code that should only be executed by the master
189+
* thread, with all threads and memory views synchronized both beforehand and afterwards.
190+
*/
191+
188192
#define BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS \
189193
SU2_OMP_BARRIER \
190194
SU2_OMP_MASTER

Common/src/geometry/CGeometry.cpp

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,7 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) {
357357

358358
if (countPerPoint <= maxCountPerPoint) return;
359359

360-
SU2_OMP_BARRIER
361-
SU2_OMP_MASTER {
360+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
362361

363362
/*--- Store the larger packet size to the class data. ---*/
364363

@@ -379,8 +378,7 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) {
379378
bufS_P2PRecv = new unsigned short[maxCountPerPoint*nPoint_P2PRecv[nP2PRecv]] ();
380379

381380
}
382-
END_SU2_OMP_MASTER
383-
SU2_OMP_BARRIER
381+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
384382

385383
}
386384

@@ -763,10 +761,7 @@ void CGeometry::CompleteComms(CGeometry *geometry,
763761
/*--- For efficiency, recv the messages dynamically based on
764762
the order they arrive. ---*/
765763

766-
SU2_OMP_MASTER
767-
SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status);
768-
END_SU2_OMP_MASTER
769-
SU2_OMP_BARRIER
764+
SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status);)
770765

771766
/*--- Once we have recv'd a message, get the source rank. ---*/
772767

@@ -831,12 +826,8 @@ void CGeometry::CompleteComms(CGeometry *geometry,
831826
data in the loop above at this point. ---*/
832827

833828
#ifdef HAVE_MPI
834-
SU2_OMP_MASTER
835-
SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE);
836-
END_SU2_OMP_MASTER
829+
SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE);)
837830
#endif
838-
SU2_OMP_BARRIER
839-
840831
}
841832

842833
void CGeometry::PreprocessPeriodicComms(CGeometry *geometry,
@@ -1186,8 +1177,7 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) {
11861177

11871178
if (countPerPeriodicPoint <= maxCountPerPeriodicPoint) return;
11881179

1189-
SU2_OMP_BARRIER
1190-
SU2_OMP_MASTER {
1180+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
11911181

11921182
/*--- Store the larger packet size to the class data. ---*/
11931183

@@ -1213,8 +1203,7 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) {
12131203
bufS_PeriodicRecv = new unsigned short[nRecv] ();
12141204

12151205
}
1216-
END_SU2_OMP_MASTER
1217-
SU2_OMP_BARRIER
1206+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
12181207
}
12191208

12201209
void CGeometry::PostPeriodicRecvs(CGeometry *geometry,
@@ -3136,7 +3125,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
31363125
END_SU2_OMP_FOR
31373126

31383127
/*--- Share with all processors ---*/
3139-
SU2_OMP_MASTER
3128+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
31403129
{
31413130
su2double* dbl_buffer = new su2double [Global_nElemDomain*nDim];
31423131
SU2_MPI::Allreduce(cg_elem,dbl_buffer,Global_nElemDomain*nDim,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
@@ -3150,8 +3139,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
31503139
MPI_Allreduce(halo_detect.data(),char_buffer.data(),Global_nElemDomain,MPI_CHAR,MPI_SUM,SU2_MPI::GetComm());
31513140
halo_detect.swap(char_buffer);
31523141
}
3153-
END_SU2_OMP_MASTER
3154-
SU2_OMP_BARRIER
3142+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
31553143

31563144
SU2_OMP_FOR_STAT(256)
31573145
for(auto iElem=0ul; iElem<Global_nElemDomain; ++iElem) {
@@ -3190,14 +3178,13 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
31903178

31913179
#ifdef HAVE_MPI
31923180
/*--- Share with all processors ---*/
3193-
SU2_OMP_MASTER
3181+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
31943182
{
31953183
su2double *buffer = new su2double [Global_nElemDomain];
31963184
SU2_MPI::Allreduce(work_values,buffer,Global_nElemDomain,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
31973185
swap(buffer, work_values); delete [] buffer;
31983186
}
3199-
END_SU2_OMP_MASTER
3200-
SU2_OMP_BARRIER
3187+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
32013188

32023189
/*--- Account for duplication ---*/
32033190
SU2_OMP_FOR_STAT(256)

Common/src/geometry/CMultiGridGeometry.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,7 @@ void CMultiGridGeometry::MatchPeriodic(const CConfig *config, unsigned short val
920920

921921
void CMultiGridGeometry::SetControlVolume(const CGeometry *fine_grid, unsigned short action) {
922922

923-
SU2_OMP_MASTER {
923+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
924924

925925
unsigned long iFinePoint, iCoarsePoint, iEdge, iParent;
926926
long FineEdge, CoarseEdge;
@@ -983,13 +983,12 @@ void CMultiGridGeometry::SetControlVolume(const CGeometry *fine_grid, unsigned s
983983
}
984984

985985
}
986-
END_SU2_OMP_MASTER
987-
SU2_OMP_BARRIER
986+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
988987
}
989988

990989
void CMultiGridGeometry::SetBoundControlVolume(const CGeometry *fine_grid, unsigned short action) {
991990

992-
SU2_OMP_MASTER {
991+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
993992

994993
unsigned long iCoarsePoint, iFinePoint, FineVertex, iVertex;
995994
unsigned short iMarker, iChildren, iDim;
@@ -1027,8 +1026,7 @@ void CMultiGridGeometry::SetBoundControlVolume(const CGeometry *fine_grid, unsig
10271026
}
10281027

10291028
}
1030-
END_SU2_OMP_MASTER
1031-
SU2_OMP_BARRIER
1029+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
10321030
}
10331031

10341032
void CMultiGridGeometry::SetCoord(const CGeometry *fine_grid) {

Common/src/geometry/CPhysicalGeometry.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4706,7 +4706,7 @@ void CPhysicalGeometry::SetPoint_Connectivity() {
47064706
unsigned long jElem, Point_Neighbor, iPoint, iElem;
47074707

47084708
/*--- Loop over all the elements ---*/
4709-
SU2_OMP_MASTER
4709+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
47104710
{
47114711
vector<vector<long> > elems(nPoint);
47124712

@@ -4720,8 +4720,7 @@ void CPhysicalGeometry::SetPoint_Connectivity() {
47204720
}
47214721
nodes->SetElems(elems);
47224722
}
4723-
END_SU2_OMP_MASTER
4724-
SU2_OMP_BARRIER
4723+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
47254724

47264725
/*--- Loop over all the points ---*/
47274726

@@ -7378,7 +7377,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
73787377
END_SU2_OMP_FOR
73797378
}
73807379

7381-
SU2_OMP_MASTER { /*--- The following is difficult to parallelize with threads. ---*/
7380+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- The following is difficult to parallelize with threads. ---*/
73827381

73837382
su2double my_DomainVolume = 0.0;
73847383
for (auto iElem = 0ul; iElem < nElem; iElem++) {
@@ -7511,8 +7510,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
75117510
}
75127511

75137512
}
7514-
END_SU2_OMP_MASTER
7515-
SU2_OMP_BARRIER
7513+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
75167514

75177515
/*--- Check if there is a normal with null area ---*/
75187516
SU2_OMP_FOR_STAT(1024)

Common/src/linear_algebra/CSysMatrix.cpp

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -378,10 +378,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
378378
/*--- For efficiency, recv the messages dynamically based on
379379
the order they arrive. ---*/
380380

381-
SU2_OMP_MASTER
382-
SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status);
383-
END_SU2_OMP_MASTER
384-
SU2_OMP_BARRIER
381+
SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status);)
385382

386383
/*--- Once we have recv'd a message, get the source rank. ---*/
387384

@@ -475,12 +472,8 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
475472
data in the loop above at this point. ---*/
476473

477474
#ifdef HAVE_MPI
478-
SU2_OMP_MASTER
479-
SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE);
480-
END_SU2_OMP_MASTER
475+
SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE);)
481476
#endif
482-
SU2_OMP_BARRIER
483-
484477
}
485478

486479
template<class ScalarType>
@@ -1392,13 +1385,12 @@ void CSysMatrix<ScalarType>::BuildPastixPreconditioner(CGeometry *geometry, cons
13921385
unsigned short kind_fact) {
13931386
#ifdef HAVE_PASTIX
13941387
/*--- Pastix will launch nested threads. ---*/
1395-
SU2_OMP_MASTER
1388+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
13961389
{
13971390
pastix_wrapper.SetMatrix(nVar,nPoint,nPointDomain,row_ptr,col_ind,matrix);
13981391
pastix_wrapper.Factorize(geometry, config, kind_fact);
13991392
}
1400-
END_SU2_OMP_MASTER
1401-
SU2_OMP_BARRIER
1393+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
14021394
#else
14031395
SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION);
14041396
#endif
@@ -1408,11 +1400,7 @@ template<class ScalarType>
14081400
void CSysMatrix<ScalarType>::ComputePastixPreconditioner(const CSysVector<ScalarType> & vec, CSysVector<ScalarType> & prod,
14091401
CGeometry *geometry, const CConfig *config) const {
14101402
#ifdef HAVE_PASTIX
1411-
SU2_OMP_BARRIER
1412-
SU2_OMP_MASTER
1413-
pastix_wrapper.Solve(vec,prod);
1414-
END_SU2_OMP_MASTER
1415-
SU2_OMP_BARRIER
1403+
SU2_OMP_SAFE_GLOBAL_ACCESS(pastix_wrapper.Solve(vec,prod);)
14161404

14171405
CSysMatrixComms::Initiate(prod, geometry, config);
14181406
CSysMatrixComms::Complete(prod, geometry, config);

Common/src/linear_algebra/CSysSolve.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -866,12 +866,11 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
866866

867867
TapeActive = AD::getGlobalTape().isActive();
868868

869-
SU2_OMP_MASTER {
869+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
870870
AD::StartExtFunc(false, false);
871871
AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize());
872872
}
873-
END_SU2_OMP_MASTER
874-
SU2_OMP_BARRIER
873+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
875874

876875
AD::StopRecording();
877876
#endif
@@ -982,11 +981,8 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
982981
END_SU2_OMP_SAFE_GLOBAL_ACCESS
983982

984983
AD::FuncHelper->addToTape(CSysSolve_b<ScalarType>::Solve_b);
985-
SU2_OMP_BARRIER
986984

987-
SU2_OMP_MASTER
988-
AD::EndExtFunc();
989-
END_SU2_OMP_MASTER
985+
SU2_OMP_SAFE_GLOBAL_ACCESS(AD::EndExtFunc();)
990986
#endif
991987
}
992988

SU2_CFD/include/limiters/CLimiterDetails.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,13 +178,12 @@ struct CLimiterDetails<LIMITER::VENKATAKRISHNAN_WANG>
178178
/*--- Allocate the static members (shared between threads) to
179179
* perform the reduction across all threads in the rank. ---*/
180180

181-
SU2_OMP_MASTER
181+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
182182
{
183183
sharedMin.resize(varEnd) = largeNum;
184184
sharedMax.resize(varEnd) =-largeNum;
185185
}
186-
END_SU2_OMP_MASTER
187-
SU2_OMP_BARRIER
186+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
188187

189188
/*--- Per thread reduction. ---*/
190189

0 commit comments

Comments
 (0)