Skip to content

Commit 0c1b07b

Browse files
committed
Merge branch 'vectorize_when_possible' into improve_doxydocs
2 parents e616fa1 + c9af050 commit 0c1b07b

27 files changed

Lines changed: 119 additions & 125 deletions

File tree

.github/workflows/release-management.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
runs-on: ubuntu-latest
3030
steps:
3131
- name: Cache Object Files
32-
uses: actions/cache@v1
32+
uses: actions/cache@v3
3333
with:
3434
path: ccache
3535
key: ${{ matrix.os_bin }}-${{ github.sha }}
@@ -44,7 +44,7 @@ jobs:
4444
zip -r ../${{matrix.os_bin}}.zip bin/*
4545
# Uploads binaries as artifacts (just as a backup)
4646
- name: Upload Binaries
47-
uses: actions/upload-artifact@v1
47+
uses: actions/upload-artifact@v3
4848
with:
4949
name: ${{matrix.os_bin}}
5050
path: ${{matrix.os_bin}}.zip

Common/include/basic_types/datatype_structure.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,21 @@ namespace SU2_TYPE {
114114
FORCEINLINE void SetDerivative(su2double &, const passivedouble &) {}
115115
#endif
116116

117+
/*!
118+
* \brief Get the passive value of any variable. For most types return directly,
119+
* specialize for su2double to call GetValue.
120+
* \note This is a struct instead of a function because the return type of the
121+
* su2double specialization changes.
122+
*/
123+
template <class T>
124+
struct Passive {
125+
FORCEINLINE static T Value(const T& val) {return val;}
126+
};
127+
template <>
128+
struct Passive<su2double> {
129+
FORCEINLINE static passivedouble Value(const su2double& val) {return GetValue(val);}
130+
};
131+
117132
/*!
118133
* \brief Casts the primitive value to int (uses GetValue, already implemented for each type).
119134
* \param[in] data - The non-primitive datatype.

Common/include/linear_algebra/vector_expressions.hpp

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <cassert>
3434
#include <cstdlib>
3535
#include <cmath>
36+
#include <cstdint>
3637

3738
namespace VecExpr {
3839

@@ -158,21 +159,28 @@ FORCEINLINE auto FUN(decay_t<S> u, const CVecExpr<V,S>& v) \
158159
RETURNS( EXPR<Bcast<S>,V,S>(Bcast<S>(u), v.derived()) \
159160
) \
160161

161-
/*--- std::max/min have issues (maybe because they return by reference).
162-
* For AD codi::max/min need to be used to avoid issues in debug builds. ---*/
163-
164-
#if defined(CODI_REVERSE_TYPE) || defined(CODI_FORWARD_TYPE)
165-
#define max_impl math::max
166-
#define min_impl math::min
167-
#else
168-
#define max_impl(a,b) a<b? Scalar(b) : Scalar(a)
169-
#define min_impl(a,b) b<a? Scalar(b) : Scalar(a)
170-
#endif
171-
MAKE_BINARY_FUN(fmax, max_, max_impl)
172-
MAKE_BINARY_FUN(fmin, min_, min_impl)
162+
/*--- std::max/min have issues (because they return by reference).
163+
* fmin and fmax return by value and thus are fine, but they would force
164+
* conversions to double, to avoid that we provide integer overloads.
165+
* We use int32/64 instead of int/long to avoid issues with Windows,
166+
* where long is 32 bits (instead of 64 bits). ---*/
167+
168+
#define MAKE_FMINMAX_OVERLOADS(TYPE) \
169+
FORCEINLINE TYPE fmax(TYPE a, TYPE b) { return a<b? b : a; } \
170+
FORCEINLINE TYPE fmin(TYPE a, TYPE b) { return a<b? a : b; }
171+
MAKE_FMINMAX_OVERLOADS(int32_t)
172+
MAKE_FMINMAX_OVERLOADS(int64_t)
173+
MAKE_FMINMAX_OVERLOADS(uint32_t)
174+
MAKE_FMINMAX_OVERLOADS(uint64_t)
175+
/*--- Make the float and double versions of fmin/max available in this
176+
* namespace to avoid ambiguous overloads. ---*/
177+
using std::fmax;
178+
using std::fmin;
179+
#undef MAKE_FMINMAX_OVERLOADS
180+
181+
MAKE_BINARY_FUN(fmax, max_, fmax)
182+
MAKE_BINARY_FUN(fmin, min_, fmin)
173183
MAKE_BINARY_FUN(pow, pow_, math::pow)
174-
#undef max_impl
175-
#undef min_impl
176184

177185
/*--- sts::plus and co. were tried, the code was horrendous (due to the forced
178186
* conversion between different types) and creating functions for these ops
@@ -191,20 +199,25 @@ MAKE_BINARY_FUN(operator/, div_, div_impl)
191199
#undef mul_impl
192200
#undef div_impl
193201

194-
/*--- Relational operators need to be cast to the scalar type to allow vectorization. ---*/
195-
196-
#define le_impl(a,b) Scalar(a<=b)
197-
#define ge_impl(a,b) Scalar(a>=b)
198-
#define eq_impl(a,b) Scalar(a==b)
199-
#define ne_impl(a,b) Scalar(a!=b)
200-
#define lt_impl(a,b) Scalar(a<b)
201-
#define gt_impl(a,b) Scalar(a>b)
202+
/*--- Relational operators need to be cast to the scalar type to allow vectorization.
203+
* TO_PASSIVE is used to convert active scalars to passive, which CoDi will then capture
204+
* by value in its expressions, and thus dangling references are avoided. No AD info
205+
* is lost since these operators are non-differentiable. ---*/
206+
207+
#define TO_PASSIVE(IMPL) SU2_TYPE::Passive<Scalar>::Value(IMPL)
208+
#define le_impl(a,b) TO_PASSIVE(a<=b)
209+
#define ge_impl(a,b) TO_PASSIVE(a>=b)
210+
#define eq_impl(a,b) TO_PASSIVE(a==b)
211+
#define ne_impl(a,b) TO_PASSIVE(a!=b)
212+
#define lt_impl(a,b) TO_PASSIVE(a<b)
213+
#define gt_impl(a,b) TO_PASSIVE(a>b)
202214
MAKE_BINARY_FUN(operator<=, le_, le_impl)
203215
MAKE_BINARY_FUN(operator>=, ge_, ge_impl)
204216
MAKE_BINARY_FUN(operator==, eq_, eq_impl)
205217
MAKE_BINARY_FUN(operator!=, ne_, ne_impl)
206218
MAKE_BINARY_FUN(operator<, lt_, lt_impl)
207219
MAKE_BINARY_FUN(operator>, gt_, gt_impl)
220+
#undef TO_PASSIVE
208221
#undef le_impl
209222
#undef ge_impl
210223
#undef eq_impl

Common/src/grid_movement/CFreeFormDefBox.cpp

Lines changed: 61 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -102,19 +102,34 @@ CFreeFormDefBox::CFreeFormDefBox(const unsigned short Degree[], unsigned short B
102102
CFreeFormDefBox::~CFreeFormDefBox(void) {
103103
unsigned short iOrder, jOrder, kOrder, iCornerPoints, iDim;
104104

105-
for (iOrder = 0; iOrder < lOrder; iOrder++)
106-
for (jOrder = 0; jOrder < mOrder; jOrder++)
105+
for (iOrder = 0; iOrder < lOrder; iOrder++) {
106+
for (jOrder = 0; jOrder < mOrder; jOrder++) {
107107
for (kOrder = 0; kOrder < nOrder; kOrder++) {
108108
delete [] Coord_Control_Points[iOrder][jOrder][kOrder];
109109
delete [] ParCoord_Control_Points[iOrder][jOrder][kOrder];
110110
delete [] Coord_Control_Points_Copy[iOrder][jOrder][kOrder];
111+
if (Coord_SupportCP != nullptr) delete [] Coord_SupportCP[iOrder][jOrder][kOrder];
111112
}
113+
delete [] Coord_Control_Points[iOrder][jOrder];
114+
delete [] ParCoord_Control_Points[iOrder][jOrder];
115+
delete [] Coord_Control_Points_Copy[iOrder][jOrder];
116+
if (Coord_SupportCP != nullptr) delete [] Coord_SupportCP[iOrder][jOrder];
117+
}
118+
delete [] Coord_Control_Points[iOrder];
119+
delete [] ParCoord_Control_Points[iOrder];
120+
delete [] Coord_Control_Points_Copy[iOrder];
121+
if (Coord_SupportCP != nullptr) delete [] Coord_SupportCP[iOrder];
122+
}
123+
112124
delete [] Coord_Control_Points;
113125
delete [] ParCoord_Control_Points;
114126
delete [] Coord_Control_Points_Copy;
127+
delete [] Coord_SupportCP;
115128

116129
delete [] ParamCoord;
130+
delete [] ParamCoord_;
117131
delete [] cart_coord;
132+
delete [] cart_coord_;
118133
delete [] Gradient;
119134

120135
for (iCornerPoints = 0; iCornerPoints < nCornerPoints; iCornerPoints++)
@@ -969,26 +984,37 @@ su2double *CFreeFormDefBox::GetParametricCoord_Iterative(unsigned long iPoint, s
969984
}
970985

971986
bool CFreeFormDefBox::GetPointFFD(CGeometry *geometry, CConfig *config, unsigned long iPoint) const {
972-
su2double Coord[3] = {0.0, 0.0, 0.0};
973-
unsigned short iVar, jVar, iDim;
974-
su2double X_0, Y_0, Z_0, Xbar, Ybar, Zbar;
975987

976-
bool Inside = false;
988+
bool Inside = true;
977989
bool cylindrical = (config->GetFFD_CoordSystem() == CYLINDRICAL);
978990
bool spherical = (config->GetFFD_CoordSystem() == SPHERICAL);
979991
bool polar = (config->GetFFD_CoordSystem() == POLAR);
980992

981-
unsigned short Index[5][7] = {
982-
{0, 1, 2, 5, 0, 1, 2},
983-
{0, 2, 7, 5, 0, 2, 7},
984-
{0, 2, 3, 7, 0, 2, 3},
985-
{0, 5, 7, 4, 0, 5, 7},
986-
{2, 7, 5, 6, 2, 7, 5}};
993+
/*--- indices of the FFD box. Note that the front face is labelled 0,1,2,3 and the back face is 4,5,6,7 ---*/
994+
995+
unsigned short Index[6][5] = {
996+
{0,1,2,3,0}, // front side
997+
{1,5,6,2,1}, // right side
998+
{2,6,7,3,2}, // top side
999+
{3,7,4,0,3}, // left side
1000+
{4,5,1,0,4}, // bottom side
1001+
{4,7,6,5,4}}; // back side
1002+
1003+
/*--- The current approach is to subdivide each of the 6 faces of the hexahedral FFD box into 4 triangles
1004+
by defining a supporting middle point. This allows nonplanar FFD boxes.
1005+
Note that the definition of the FFD box is as follows: the FFD box is a 6-sided die and we are looking at the side "1".
1006+
The opposite side is side "6".
1007+
If we are looking at side "1", we define the nodes counterclockwise.
1008+
If we are looking at side "6", we define the face clockwise ---*/
1009+
9871010
unsigned short nDim = geometry->GetnDim();
9881011

989-
for (iDim = 0; iDim < nDim; iDim++)
1012+
su2double Coord[3] = {0.0, 0.0, 0.0};
1013+
for (unsigned short iDim = 0; iDim < nDim; iDim++)
9901014
Coord[iDim] = geometry->nodes->GetCoord(iPoint, iDim);
9911015

1016+
su2double X_0, Y_0, Z_0, Xbar, Ybar, Zbar;
1017+
9921018
if (cylindrical) {
9931019

9941020
X_0 = config->GetFFD_Axis(0); Y_0 = config->GetFFD_Axis(1); Z_0 = config->GetFFD_Axis(2);
@@ -1013,78 +1039,38 @@ bool CFreeFormDefBox::GetPointFFD(CGeometry *geometry, CConfig *config, unsigned
10131039

10141040
}
10151041

1016-
/*--- 1st tetrahedron {V0, V1, V2, V5}
1017-
2nd tetrahedron {V0, V2, V7, V5}
1018-
3th tetrahedron {V0, V2, V3, V7}
1019-
4th tetrahedron {V0, V5, V7, V4}
1020-
5th tetrahedron {V2, V7, V5, V6} ---*/
1042+
/*--- loop over the faces of the FFD box ---*/
1043+
1044+
for (unsigned short iVar = 0; iVar < 6; iVar++) {
1045+
1046+
su2double P[3] = {0.0, 0.0, 0.0};
1047+
1048+
/*--- every face needs an interpolated middle point for the triangles ---*/
1049+
1050+
for (int p = 0; p < 4; p++){
1051+
P[0] += 0.25*Coord_Corner_Points[Index[iVar][p]][0];
1052+
P[1] += 0.25*Coord_Corner_Points[Index[iVar][p]][1];
1053+
P[2] += 0.25*Coord_Corner_Points[Index[iVar][p]][2];
1054+
}
1055+
1056+
/*--- loop over the 4 triangles making up the FFD box. The sign is equal for all distances ---*/
10211057

1022-
for (iVar = 0; iVar < 5; iVar++) {
1023-
Inside = true;
1024-
for (jVar = 0; jVar < 4; jVar++) {
1058+
for (unsigned short jVar = 0; jVar < 4; jVar++) {
10251059
su2double Distance_Point = geometry->Point2Plane_Distance(Coord,
1060+
Coord_Corner_Points[Index[iVar][jVar]],
10261061
Coord_Corner_Points[Index[iVar][jVar+1]],
1027-
Coord_Corner_Points[Index[iVar][jVar+2]],
1028-
Coord_Corner_Points[Index[iVar][jVar+3]]);
1029-
1030-
su2double Distance_Vertex = geometry->Point2Plane_Distance(Coord_Corner_Points[Index[iVar][jVar]],
1031-
Coord_Corner_Points[Index[iVar][jVar+1]],
1032-
Coord_Corner_Points[Index[iVar][jVar+2]],
1033-
Coord_Corner_Points[Index[iVar][jVar+3]]);
1034-
if (Distance_Point*Distance_Vertex < 0.0) Inside = false;
1062+
P);
1063+
if (Distance_Point < 0) {
1064+
Inside = false;
1065+
return Inside;
1066+
}
10351067
}
1036-
if (Inside) break;
10371068
}
10381069

10391070
return Inside;
10401071

10411072
}
10421073

1043-
void CFreeFormDefBox::SetDeformationZone(CGeometry *geometry, CConfig *config, unsigned short iFFDBox) const {
1044-
su2double *Coord;
1045-
unsigned short iMarker, iVar, jVar;
1046-
unsigned long iVertex, iPoint;
1047-
bool Inside = false;
1048-
1049-
unsigned short Index[5][7] = {
1050-
{0, 1, 2, 5, 0, 1, 2},
1051-
{0, 2, 7, 5, 0, 2, 7},
1052-
{0, 2, 3, 7, 0, 2, 3},
1053-
{0, 5, 7, 4, 0, 5, 7},
1054-
{2, 7, 5, 6, 2, 7, 5}};
1055-
1056-
for (iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) {
1057-
if (config->GetMarker_All_DV(iMarker) == YES) {
1058-
for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) {
1059-
iPoint = geometry->vertex[iMarker][iVertex]->GetNode();
1060-
1061-
Coord = geometry->nodes->GetCoord(iPoint);
1062-
1063-
/*--- 1st tetrahedron {V0, V1, V2, V5}
1064-
2nd tetrahedron {V0, V2, V7, V5}
1065-
3th tetrahedron {V0, V2, V3, V7}
1066-
4th tetrahedron {V0, V5, V7, V4}
1067-
5th tetrahedron {V2, V7, V5, V6} ---*/
1068-
1069-
for (iVar = 0; iVar < 5; iVar++) {
1070-
Inside = true;
1071-
for (jVar = 0; jVar < 4; jVar++) {
1072-
su2double Distance_Point = geometry->Point2Plane_Distance(Coord,
1073-
Coord_Corner_Points[Index[iVar][jVar+1]],
1074-
Coord_Corner_Points[Index[iVar][jVar+2]],
1075-
Coord_Corner_Points[Index[iVar][jVar+3]]);
1076-
su2double Distance_Vertex = geometry->Point2Plane_Distance(Coord_Corner_Points[Index[iVar][jVar]],
1077-
Coord_Corner_Points[Index[iVar][jVar+1]],
1078-
Coord_Corner_Points[Index[iVar][jVar+2]],
1079-
Coord_Corner_Points[Index[iVar][jVar+3]]);
1080-
if (Distance_Point*Distance_Vertex < 0.0) Inside = false;
1081-
}
1082-
if (Inside) break;
1083-
}
1084-
}
1085-
}
1086-
}
1087-
}
10881074

10891075
su2double CFreeFormDefBox::GetDerivative1(su2double *uvw, unsigned short val_diff, unsigned short *ijk, unsigned short *lmn) const {
10901076

SU2_CFD/include/numerics_simd/flow/convection/common.hpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,8 @@ FORCEINLINE CPair<ReconVarType> reconstructPrimitives(Int iEdge, Int iPoint, Int
133133
break;
134134
}
135135
/*--- Detect a non-physical reconstruction based on negative pressure or density. ---*/
136-
/*--- Some weird issues with forward AD if this is all done in one line. ---*/
137-
const Double neg_p = fmin(V.i.pressure(), V.j.pressure()) < 0.0;
138-
const Double neg_rho = fmin(V.i.density(), V.j.density()) < 0.0;
139-
const Double neg_p_or_rho = fmax(neg_p, neg_rho);
136+
const Double neg_p_or_rho = fmax(fmin(V.i.pressure(), V.j.pressure()) < 0.0,
137+
fmin(V.i.density(), V.j.density()) < 0.0);
140138
/*--- Test the sign of the Roe-averaged speed of sound. ---*/
141139
const Double R = sqrt(abs(V.j.density() / V.i.density()));
142140
/*--- Delay dividing by R+1 until comparing enthalpy and velocity magnitude. ---*/

TestCases/disc_adj_rans/naca0012/turb_NACA0012_sst.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@ MG_DAMP_PROLONGATION= 0.75
143143
% Convective numerical method (JST, LAX-FRIEDRICH, CUSP, ROE, AUSM, HLLC,
144144
% TURKEL_PREC, MSW)
145145
CONV_NUM_METHOD_FLOW= ROE
146-
USE_VECTORIZATION= YES
147146
%
148147
% Spatial numerical order integration (1ST_ORDER, 2ND_ORDER, 2ND_ORDER_LIMITER)
149148
MUSCL_FLOW= YES

TestCases/navierstokes/flatplate/lam_flatplate.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ MG_DAMP_PROLONGATION= 0.75
6969
% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
7070
%
7171
CONV_NUM_METHOD_FLOW= ROE
72-
USE_VECTORIZATION= YES
7372
MUSCL_FLOW= YES
7473
SLOPE_LIMITER_FLOW= NONE
7574
TIME_DISCRE_FLOW= EULER_IMPLICIT

TestCases/navierstokes/flatplate/lam_flatplate_unst.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ LINEAR_SOLVER_ITER= 2
6969
% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
7070
%
7171
CONV_NUM_METHOD_FLOW= ROE
72-
USE_VECTORIZATION= YES
7372
MUSCL_FLOW= YES
7473
SLOPE_LIMITER_FLOW= NONE
7574

TestCases/navierstokes/periodic2D/config.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ LINEAR_SOLVER_ITER= 4
5555
% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
5656
%
5757
CONV_NUM_METHOD_FLOW= ROE
58-
USE_VECTORIZATION= YES
5958
MUSCL_FLOW= YES
6059
SLOPE_LIMITER_FLOW= VENKATAKRISHNAN_WANG
6160
VENKAT_LIMITER_COEFF= 0.05

TestCases/py_wrapper/translating_NACA0012/config.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ VENKAT_LIMITER_COEFF= 0.1
5050

5151
% SOLUTION ACCELERATION
5252
%
53-
USE_VECTORIZATION= YES
5453
CFL_NUMBER= 1e3
5554
CFL_ADAPT= NO
5655
%

0 commit comments

Comments
 (0)