Skip to content

Commit 3a9f8d6

Browse files
authored
Merge pull request #1679 from su2code/fix_race_conditions
Hybrid Parallel (AD): Fix Race Conditions
2 parents 40e82f4 + 9673bb9 commit 3a9f8d6

50 files changed

Lines changed: 430 additions & 520 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Common/include/basic_types/ad_structure.hpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -559,21 +559,13 @@ namespace AD{
559559
FORCEINLINE bool PausePreaccumulation() {
560560
const auto current = PreaccEnabled;
561561
if (!current) return false;
562-
SU2_OMP_BARRIER
563-
SU2_OMP_MASTER
564-
PreaccEnabled = false;
565-
END_SU2_OMP_MASTER
566-
SU2_OMP_BARRIER
562+
SU2_OMP_SAFE_GLOBAL_ACCESS(PreaccEnabled = false;)
567563
return true;
568564
}
569565

570566
FORCEINLINE void ResumePreaccumulation(bool wasActive) {
571567
if (!wasActive) return;
572-
SU2_OMP_BARRIER
573-
SU2_OMP_MASTER
574-
PreaccEnabled = true;
575-
END_SU2_OMP_MASTER
576-
SU2_OMP_BARRIER
568+
SU2_OMP_SAFE_GLOBAL_ACCESS(PreaccEnabled = true;)
577569
}
578570

579571
FORCEINLINE void StartNoSharedReading() {

Common/include/geometry/dual_grid/CEdge.hpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class CEdge {
4343
using NodeArray = C2DContainer<Index, Index, StorageType::ColumnMajor, 64, DynamicSize, 2>;
4444
NodeArray Nodes; /*!< \brief Vector to store the node indices of the edge. */
4545
su2activematrix Normal; /*!< \brief Normal (area) of the edge. */
46+
const Index nEdge, nEdgeSIMD;
4647

4748
friend class CPhysicalGeometry;
4849

@@ -70,13 +71,27 @@ class CEdge {
7071
inline unsigned long GetNode(unsigned long iEdge, unsigned long iNode) const { return Nodes(iEdge,iNode); }
7172

7273
/*!
73-
* \brief SIMD version of GetNode, iNode returned for multiple contiguous iEdges
74+
* \brief SIMD version of GetNode, iNode returned for contiguous iEdges.
7475
*/
7576
template<class T, size_t N>
7677
FORCEINLINE simd::Array<T,N> GetNode(simd::Array<T,N> iEdge, unsigned long iNode) const {
7778
return simd::Array<T,N>(&Nodes(iEdge[0],iNode));
7879
}
7980

81+
/*!
82+
* \brief Sets the tail of "Nodes" to repeat one of the last edges.
83+
* \note This is needed when using the SIMD version of GetNode and
84+
* the number of edges is not a multiple of the simd width.
85+
*/
86+
void SetPaddingNodes() {
87+
for (auto iEdge = nEdge; iEdge < nEdgeSIMD; ++iEdge) {
88+
/*--- Pad nodes by repeating the first edge in the last SIMD group. ---*/
89+
const auto iEdge0 = nEdgeSIMD - simd::preferredLen<su2double>();
90+
Nodes(iEdge, LEFT) = Nodes(iEdge0, LEFT);
91+
Nodes(iEdge, RIGHT) = Nodes(iEdge0, RIGHT);
92+
}
93+
}
94+
8095
/*!
8196
* \brief Set the node indices of an edge.
8297
* \param[in] iEdge - Edge index.

Common/include/linear_algebra/CSysSolve.hpp

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -219,12 +219,11 @@ class CSysSolve {
219219
void HandleTemporariesIn(const CSysVector<OtherType>& LinSysRes, CSysVector<OtherType>& LinSysSol) {
220220

221221
/*--- Set the pointers. ---*/
222-
SU2_OMP_MASTER {
222+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
223223
LinSysRes_ptr = &LinSysRes;
224224
LinSysSol_ptr = &LinSysSol;
225225
}
226-
END_SU2_OMP_MASTER
227-
SU2_OMP_BARRIER
226+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
228227
}
229228

230229
/*!
@@ -241,12 +240,11 @@ class CSysSolve {
241240
LinSysSol_tmp.PassiveCopy(LinSysSol);
242241

243242
/*--- Set the pointers. ---*/
244-
SU2_OMP_MASTER {
243+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
245244
LinSysRes_ptr = &LinSysRes_tmp;
246245
LinSysSol_ptr = &LinSysSol_tmp;
247246
}
248-
END_SU2_OMP_MASTER
249-
SU2_OMP_BARRIER
247+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
250248
}
251249

252250
/*!
@@ -258,13 +256,11 @@ class CSysSolve {
258256
void HandleTemporariesOut(CSysVector<OtherType>& LinSysSol) {
259257

260258
/*--- Reset the pointers. ---*/
261-
SU2_OMP_BARRIER
262-
SU2_OMP_MASTER {
259+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
263260
LinSysRes_ptr = nullptr;
264261
LinSysSol_ptr = nullptr;
265262
}
266-
END_SU2_OMP_MASTER
267-
SU2_OMP_BARRIER
263+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
268264
}
269265

270266
/*!
@@ -279,13 +275,11 @@ class CSysSolve {
279275
LinSysSol.PassiveCopy(LinSysSol_tmp);
280276

281277
/*--- Reset the pointers. ---*/
282-
SU2_OMP_BARRIER
283-
SU2_OMP_MASTER {
278+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
284279
LinSysRes_ptr = nullptr;
285280
LinSysSol_ptr = nullptr;
286281
}
287-
END_SU2_OMP_MASTER
288-
SU2_OMP_BARRIER
282+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
289283
}
290284

291285
public:

Common/include/linear_algebra/CSysSolve_b.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@
3232
#ifdef CODI_REVERSE_TYPE
3333
template<class ScalarType>
3434
struct CSysSolve_b {
35-
static void Solve_b(const codi::RealReverse::Real* x, codi::RealReverse::Real* x_b, size_t m,
36-
const codi::RealReverse::Real* y, const codi::RealReverse::Real* y_b, size_t n,
35+
static void Solve_b(const su2double::Real* x, su2double::Real* x_b, size_t m,
36+
const su2double::Real* y, const su2double::Real* y_b, size_t n,
3737
codi::DataStore* d);
3838
};
3939
#endif

Common/include/linear_algebra/CSysVector.hpp

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
186186
/*--- check if self-assignment, otherwise perform deep copy ---*/
187187
if ((const void*)this == (const void*)&other) return;
188188

189-
SU2_OMP_MASTER
190-
Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false);
191-
END_SU2_OMP_MASTER
192-
SU2_OMP_BARRIER
189+
SU2_OMP_SAFE_GLOBAL_ACCESS(Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false);)
193190

194191
CSYSVEC_PARFOR
195192
for (auto i = 0ul; i < nElm; i++) vec_val[i] = SU2_TYPE::GetValue(other[i]);
@@ -297,11 +294,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
297294
ScalarType dot(const VecExpr::CVecExpr<T, ScalarType>& expr) const {
298295
static ScalarType dotRes;
299296
/*--- All threads get the same "view" of the vectors and shared variable. ---*/
300-
SU2_OMP_BARRIER
301-
SU2_OMP_MASTER
302-
dotRes = 0.0;
303-
END_SU2_OMP_MASTER
304-
SU2_OMP_BARRIER
297+
SU2_OMP_SAFE_GLOBAL_ACCESS(dotRes = 0.0;)
305298

306299
/*--- Local dot product for each thread. ---*/
307300
ScalarType sum = 0.0;
@@ -317,16 +310,16 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
317310

318311
#ifdef HAVE_MPI
319312
/*--- Reduce across all mpi ranks, only master thread communicates. ---*/
320-
SU2_OMP_BARRIER
321-
SU2_OMP_MASTER {
313+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
322314
sum = dotRes;
323315
const auto mpi_type = (sizeof(ScalarType) < sizeof(double)) ? MPI_FLOAT : MPI_DOUBLE;
324316
SelectMPIWrapper<ScalarType>::W::Allreduce(&sum, &dotRes, 1, mpi_type, MPI_SUM, SU2_MPI::GetComm());
325317
}
326-
END_SU2_OMP_MASTER
327-
#endif
318+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
319+
#else
328320
/*--- Make view of result consistent across threads. ---*/
329321
SU2_OMP_BARRIER
322+
#endif
330323

331324
return dotRes;
332325
}

Common/include/parallelization/omp_structure.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,25 @@ void omp_finalize();
185185

186186
#endif
187187

188+
/* The SU2_OMP_SAFE_GLOBAL_ACCESS constructs are used to safeguard code that should only be executed by the master
189+
* thread, with all threads and memory views synchronized both beforehand and afterwards.
190+
*/
191+
192+
#define BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS \
193+
SU2_OMP_BARRIER \
194+
SU2_OMP_MASTER
195+
196+
#define END_SU2_OMP_SAFE_GLOBAL_ACCESS \
197+
END_SU2_OMP_MASTER \
198+
SU2_OMP_BARRIER
199+
200+
#define SU2_OMP_SAFE_GLOBAL_ACCESS(...) \
201+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS \
202+
{ \
203+
__VA_ARGS__ \
204+
} \
205+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
206+
188207
/*--- Convenience functions (e.g. to compute chunk sizes). ---*/
189208

190209
/*!

Common/include/parallelization/vectorization.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ constexpr size_t PREFERRED_SIZE = 8;
5656
*/
5757
template<class T>
5858
constexpr size_t preferredLen() { return PREFERRED_SIZE / sizeof(T); }
59+
5960
template<>
6061
constexpr size_t preferredLen<su2double>() { return PREFERRED_SIZE / sizeof(passivedouble); }
6162

Common/src/geometry/CGeometry.cpp

Lines changed: 36 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,7 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) {
357357

358358
if (countPerPoint <= maxCountPerPoint) return;
359359

360-
SU2_OMP_BARRIER
361-
SU2_OMP_MASTER {
360+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
362361

363362
/*--- Store the larger packet size to the class data. ---*/
364363

@@ -379,8 +378,7 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) {
379378
bufS_P2PRecv = new unsigned short[maxCountPerPoint*nPoint_P2PRecv[nP2PRecv]] ();
380379

381380
}
382-
END_SU2_OMP_MASTER
383-
SU2_OMP_BARRIER
381+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
384382

385383
}
386384

@@ -763,10 +761,7 @@ void CGeometry::CompleteComms(CGeometry *geometry,
763761
/*--- For efficiency, recv the messages dynamically based on
764762
the order they arrive. ---*/
765763

766-
SU2_OMP_MASTER
767-
SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status);
768-
END_SU2_OMP_MASTER
769-
SU2_OMP_BARRIER
764+
SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status);)
770765

771766
/*--- Once we have recv'd a message, get the source rank. ---*/
772767

@@ -831,12 +826,8 @@ void CGeometry::CompleteComms(CGeometry *geometry,
831826
data in the loop above at this point. ---*/
832827

833828
#ifdef HAVE_MPI
834-
SU2_OMP_MASTER
835-
SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE);
836-
END_SU2_OMP_MASTER
829+
SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE);)
837830
#endif
838-
SU2_OMP_BARRIER
839-
840831
}
841832

842833
void CGeometry::PreprocessPeriodicComms(CGeometry *geometry,
@@ -1186,8 +1177,7 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) {
11861177

11871178
if (countPerPeriodicPoint <= maxCountPerPeriodicPoint) return;
11881179

1189-
SU2_OMP_BARRIER
1190-
SU2_OMP_MASTER {
1180+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
11911181

11921182
/*--- Store the larger packet size to the class data. ---*/
11931183

@@ -1213,8 +1203,7 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) {
12131203
bufS_PeriodicRecv = new unsigned short[nRecv] ();
12141204

12151205
}
1216-
END_SU2_OMP_MASTER
1217-
SU2_OMP_BARRIER
1206+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
12181207
}
12191208

12201209
void CGeometry::PostPeriodicRecvs(CGeometry *geometry,
@@ -1409,6 +1398,7 @@ void CGeometry::SetEdges(void) {
14091398
}
14101399
}
14111400
}
1401+
edges->SetPaddingNodes();
14121402
}
14131403

14141404
void CGeometry::SetFaces(void) {
@@ -2506,38 +2496,41 @@ void CGeometry::UpdateCustomBoundaryConditions(CGeometry **geometry_container, C
25062496
}
25072497

25082498
void CGeometry::ComputeSurfaceAreaCfgFile(const CConfig *config) {
2509-
const auto nMarker_Global = config->GetnMarker_CfgFile();
2510-
SurfaceAreaCfgFile.resize(nMarker_Global);
2511-
vector<su2double> LocalSurfaceArea(nMarker_Global, 0.0);
2499+
SU2_OMP_MASTER
2500+
{
2501+
const auto nMarker_Global = config->GetnMarker_CfgFile();
2502+
SurfaceAreaCfgFile.resize(nMarker_Global);
2503+
vector<su2double> LocalSurfaceArea(nMarker_Global, 0.0);
25122504

2513-
/*--- Loop over all local markers ---*/
2514-
for (unsigned short iMarker = 0; iMarker < nMarker; iMarker++) {
2505+
/*--- Loop over all local markers ---*/
2506+
for (unsigned short iMarker = 0; iMarker < nMarker; iMarker++) {
25152507

2516-
const auto Local_TagBound = config->GetMarker_All_TagBound(iMarker);
2508+
const auto Local_TagBound = config->GetMarker_All_TagBound(iMarker);
25172509

2518-
/*--- Loop over all global markers, and find the local-global pair via
2519-
matching unique string tags. ---*/
2520-
for (unsigned short iMarker_Global = 0; iMarker_Global < nMarker_Global; iMarker_Global++) {
2510+
/*--- Loop over all global markers, and find the local-global pair via
2511+
matching unique string tags. ---*/
2512+
for (unsigned short iMarker_Global = 0; iMarker_Global < nMarker_Global; iMarker_Global++) {
25212513

2522-
const auto Global_TagBound = config->GetMarker_CfgFile_TagBound(iMarker_Global);
2523-
if (Local_TagBound == Global_TagBound) {
2514+
const auto Global_TagBound = config->GetMarker_CfgFile_TagBound(iMarker_Global);
2515+
if (Local_TagBound == Global_TagBound) {
25242516

2525-
for(auto iVertex = 0ul; iVertex < nVertex[iMarker]; iVertex++ ) {
2517+
for(auto iVertex = 0ul; iVertex < nVertex[iMarker]; iVertex++ ) {
25262518

2527-
const auto iPoint = vertex[iMarker][iVertex]->GetNode();
2519+
const auto iPoint = vertex[iMarker][iVertex]->GetNode();
25282520

2529-
if(!nodes->GetDomain(iPoint)) continue;
2521+
if(!nodes->GetDomain(iPoint)) continue;
25302522

2531-
const auto AreaNormal = vertex[iMarker][iVertex]->GetNormal();
2532-
const auto Area = GeometryToolbox::Norm(nDim, AreaNormal);
2523+
const auto AreaNormal = vertex[iMarker][iVertex]->GetNormal();
2524+
const auto Area = GeometryToolbox::Norm(nDim, AreaNormal);
25332525

2534-
LocalSurfaceArea[iMarker_Global] += Area;
2535-
}// for iVertex
2536-
}//if Local == Global
2537-
}//for iMarker_Global
2538-
}//for iMarker
2526+
LocalSurfaceArea[iMarker_Global] += Area;
2527+
}// for iVertex
2528+
}//if Local == Global
2529+
}//for iMarker_Global
2530+
}//for iMarker
25392531

2540-
SU2_MPI::Allreduce(LocalSurfaceArea.data(), SurfaceAreaCfgFile.data(), SurfaceAreaCfgFile.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
2532+
SU2_MPI::Allreduce(LocalSurfaceArea.data(), SurfaceAreaCfgFile.data(), SurfaceAreaCfgFile.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
2533+
} END_SU2_OMP_MASTER
25412534
}
25422535

25432536
su2double CGeometry::GetSurfaceArea(const CConfig *config, unsigned short val_marker) const {
@@ -3133,7 +3126,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
31333126
END_SU2_OMP_FOR
31343127

31353128
/*--- Share with all processors ---*/
3136-
SU2_OMP_MASTER
3129+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
31373130
{
31383131
su2double* dbl_buffer = new su2double [Global_nElemDomain*nDim];
31393132
SU2_MPI::Allreduce(cg_elem,dbl_buffer,Global_nElemDomain*nDim,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
@@ -3147,8 +3140,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
31473140
MPI_Allreduce(halo_detect.data(),char_buffer.data(),Global_nElemDomain,MPI_CHAR,MPI_SUM,SU2_MPI::GetComm());
31483141
halo_detect.swap(char_buffer);
31493142
}
3150-
END_SU2_OMP_MASTER
3151-
SU2_OMP_BARRIER
3143+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
31523144

31533145
SU2_OMP_FOR_STAT(256)
31543146
for(auto iElem=0ul; iElem<Global_nElemDomain; ++iElem) {
@@ -3187,14 +3179,13 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
31873179

31883180
#ifdef HAVE_MPI
31893181
/*--- Share with all processors ---*/
3190-
SU2_OMP_MASTER
3182+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
31913183
{
31923184
su2double *buffer = new su2double [Global_nElemDomain];
31933185
SU2_MPI::Allreduce(work_values,buffer,Global_nElemDomain,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
31943186
swap(buffer, work_values); delete [] buffer;
31953187
}
3196-
END_SU2_OMP_MASTER
3197-
SU2_OMP_BARRIER
3188+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
31983189

31993190
/*--- Account for duplication ---*/
32003191
SU2_OMP_FOR_STAT(256)

0 commit comments

Comments
 (0)