Skip to content

Commit 360b390

Browse files
committed
optimization and reset before multizone inner iters (works better)
1 parent 07b3c51 commit 360b390

3 files changed

Lines changed: 35 additions & 26 deletions

File tree

Common/include/toolboxes/CQuasiNewtonDriver.hpp

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ class CQuasiNewtonDriver {
5353
static_assert(std::is_floating_point<Scalar>::value,"");
5454

5555
private:
56+
using MPI_Wrapper = typename SelectMPIWrapper<Scalar>::W;
57+
5658
enum: size_t {BLOCK_SIZE = 1024}; /*!< \brief Loop tiling parameter. */
5759
std::vector<su2matrix<Scalar> > X, R; /*!< \brief Input and residual history of the FP. */
5860
su2matrix<Scalar> corr; /*!< \brief Correction to the natural FP result. */
@@ -70,13 +72,11 @@ class CQuasiNewtonDriver {
7072
}
7173

7274
void computeNormalEquations() {
73-
/*--- Lower triangular packed storage. ---*/
74-
mat.resize(iSample*(iSample+1)/2) = Scalar(0);
75-
rhs.resize(iSample) = Scalar(0);
76-
7775
/*--- Size for the dot products. ---*/
7876
const auto end = std::min<Index>(nPtDomain,corr.rows())*corr.cols();
7977

78+
mat = Scalar(0); rhs = Scalar(0);
79+
8080
/*--- Tiled part of the loop. ---*/
8181
Index begin = 0;
8282
while (end-begin >= BLOCK_SIZE) {
@@ -94,13 +94,13 @@ class CQuasiNewtonDriver {
9494
const auto type = (sizeof(Scalar) < sizeof(double))? MPI_FLOAT : MPI_DOUBLE;
9595

9696
su2vector<Scalar> tmp(mat.size());
97-
SelectMPIWrapper<Scalar>::W::Allreduce(mat.data(), tmp.data(), mat.size(),
98-
type, MPI_SUM, MPI_COMM_WORLD);
97+
MPI_Wrapper::Allreduce(mat.data(), tmp.data(), iSample*(iSample+1)/2,
98+
type, MPI_SUM, MPI_COMM_WORLD);
9999
mat = std::move(tmp);
100100

101-
sol = rhs;
102-
SelectMPIWrapper<Scalar>::W::Allreduce(sol.data(), rhs.data(), rhs.size(),
103-
type, MPI_SUM, MPI_COMM_WORLD);
101+
MPI_Wrapper::Allreduce(rhs.data(), sol.data(), iSample,
102+
type, MPI_SUM, MPI_COMM_WORLD);
103+
std::swap(rhs, sol);
104104
}
105105
}
106106

@@ -115,9 +115,11 @@ class CQuasiNewtonDriver {
115115
for (Index i = 0; i < iSample; ++i) {
116116
const auto ri1 = R[i+1].data() + start;
117117
const auto ri0 = R[i].data() + start;
118-
for (Index j = 0; j <= i; ++j) {
118+
/*--- Off-diagonal coefficients. ---*/
119+
for (Index j = 0; j < i; ++j) {
119120
const auto rj1 = R[j+1].data() + start;
120121
const auto rj0 = R[j].data() + start;
122+
/*--- Sum of partial sums to reduce trunc. error. ---*/
121123
Scalar sum = 0;
122124
SU2_OMP_SIMD
123125
for (Index k = 0; k < blkSize; ++k) {
@@ -126,13 +128,16 @@ class CQuasiNewtonDriver {
126128
const auto iCoeff = i*(i+1)/2 + j;
127129
mat(iCoeff) += sum;
128130
}
131+
/*--- Diagonal coeff and residual fused. ---*/
132+
Scalar diag = 0, res = 0;
129133
const auto r = R[iSample].data() + start;
130-
Scalar sum = 0;
131134
SU2_OMP_SIMD
132135
for (Index k = 0; k < blkSize; ++k) {
133-
sum += (ri1[k]-ri0[k]) * r[k];
136+
diag += pow(ri1[k]-ri0[k], 2);
137+
res += (ri1[k]-ri0[k]) * r[k];
134138
}
135-
vec(i) -= sum;
139+
mat(i*(i+3)/2) += diag;
140+
vec(i) -= res;
136141
}
137142
}
138143

@@ -165,6 +170,10 @@ class CQuasiNewtonDriver {
165170
R.emplace_back(npt,nvar);
166171
}
167172
X[0] = Scalar(0);
173+
/*--- Lower triangular packed storage. ---*/
174+
mat.resize(nsample*(nsample-1)/2);
175+
rhs.resize(nsample-1);
176+
sol.resize(nsample-1);
168177
}
169178

170179
/*! \brief Size of the object, the number of samples. */
@@ -211,11 +220,10 @@ class CQuasiNewtonDriver {
211220
for (Index j = 0; j <= i; ++j)
212221
pseudoInv(i,j) = mat(k++);
213222
pseudoInv.Invert(true);
214-
sol.resize(iSample);
215223
pseudoInv.MatVecMult(rhs.data(), sol.data());
216224

217225
/*--- Compute correction, cleared before for less trunc. error. ---*/
218-
for (Index k = 0; k < sol.size(); ++k) {
226+
for (Index k = 0; k < iSample; ++k) {
219227
const auto x1 = X[k+1].data();
220228
const auto r1 = R[k+1].data();
221229
const auto x0 = X[k].data();

SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -262,11 +262,11 @@ void CDiscAdjMultizoneDriver::Run() {
262262
const bool restart = config_container[iZone]->GetRestart();
263263
const bool no_restart = (iOuterIter > 0) || !restart;
264264

265-
/*--- When restarting, we start using the QN driver only after the first iteration. ---*/
265+
/*--- Reset QN driver for new inner iterations. ---*/
266266

267-
if (QNDriver[iZone].size() && restart && (iOuterIter==1)) {
267+
if (QNDriver[iZone].size()) {
268268
QNDriver[iZone].reset();
269-
GetAllSolutions(iZone, true, QNDriver[iZone].solution());
269+
if(restart && (iOuterIter==1)) GetAllSolutions(iZone, true, QNDriver[iZone]);
270270
}
271271

272272
for (unsigned long iInnerIter = 0; iInnerIter < nInnerIter[iZone]; iInnerIter++) {
@@ -297,18 +297,19 @@ void CDiscAdjMultizoneDriver::Run() {
297297
solver_container, numerics_container, config_container,
298298
surface_movement, grid_movement, FFDBox, iZone, INST_0);
299299

300-
/*--- This is done explicitly here for multizone cases, only in inner iterations and not when
301-
* extracting cross terms so that the adjoint residuals in each zone still make sense. ---*/
302-
303-
Set_SolutionOld_To_Solution(iZone);
304-
305-
/*--- Use QN driver to improve solution. ---*/
300+
/*--- Use QN driver to improve the solution. ---*/
306301

307302
if (QNDriver[iZone].size()) {
308303
GetAllSolutions(iZone, true, QNDriver[iZone].FPresult());
309-
SetAllSolutions(iZone, true, QNDriver[iZone].compute());
304+
QNDriver[iZone].compute();
305+
if(iInnerIter) SetAllSolutions(iZone, true, QNDriver[iZone]);
310306
}
311307

308+
/*--- This is done explicitly here for multizone cases, only in inner iterations and not when
309+
* extracting cross terms so that the adjoint residuals in each zone still make sense. ---*/
310+
311+
Set_SolutionOld_To_Solution(iZone);
312+
312313
/*--- Print out the convergence data to screen and history file. ---*/
313314

314315
bool converged = iteration_container[iZone][INST_0]->Monitor(output_container[iZone], integration_container,

SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ void CDiscAdjSinglezoneDriver::Run() {
159159
GetTotalNumberOfVariables(ZONE_0,true),
160160
geometry_container[ZONE_0][INST_0][MESH_0]->GetnPointDomain());
161161

162-
GetAllSolutions(ZONE_0, true, QNDriver.solution());
162+
if (TimeIter != 0) GetAllSolutions(ZONE_0, true, QNDriver);
163163
}
164164

165165
for (auto iter = 0ul; iter < nAdjoint_Iter; iter++) {

0 commit comments

Comments
 (0)