Skip to content

Commit 875b954

Browse files
committed
fix walltime output nompi + omp, some simd optimization
1 parent 1f86ab4 commit 875b954

9 files changed

Lines changed: 38 additions & 29 deletions

File tree

Common/include/linear_algebra/CSysMatrix.hpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#pragma once
3030

3131
#include "../../include/mpi_structure.hpp"
32+
#include "../../include/omp_structure.hpp"
3233
#include "CSysVector.hpp"
3334
#include "CPastixWrapper.hpp"
3435

@@ -169,58 +170,60 @@ class CSysMatrix {
169170
* \param[in] vector
170171
* \param[out] product
171172
*/
172-
inline void MatrixVectorProduct(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const;
173+
void MatrixVectorProduct(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const;
173174

174175
/*!
175176
* \brief Calculates the matrix-vector product: product += matrix*vector
176177
* \param[in] matrix
177178
* \param[in] vector
178179
* \param[in,out] product
179180
*/
180-
inline void MatrixVectorProductAdd(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const;
181+
void MatrixVectorProductAdd(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const;
181182

182183
/*!
183184
* \brief Calculates the matrix-vector product: product -= matrix*vector
184185
* \param[in] matrix
185186
* \param[in] vector
186187
* \param[in,out] product
187188
*/
188-
inline void MatrixVectorProductSub(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const;
189+
void MatrixVectorProductSub(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const;
189190

190191
/*!
191192
* \brief Calculates the matrix-vector product: product += matrix^T * vector
192193
* \param[in] matrix
193194
* \param[in] vector
194195
* \param[in,out] product
195196
*/
196-
inline void MatrixVectorProductTransp(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const;
197+
void MatrixVectorProductTransp(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const;
197198

198199
/*!
199200
* \brief Calculates the matrix-matrix product
200201
*/
201-
inline void MatrixMatrixProduct(const ScalarType *matrix_a, const ScalarType *matrix_b, ScalarType *product) const;
202+
void MatrixMatrixProduct(const ScalarType *matrix_a, const ScalarType *matrix_b, ScalarType *product) const;
202203

203204
/*!
204205
* \brief Subtract b from a and store the result in c.
205206
*/
206-
inline void VectorSubtraction(const ScalarType *a, const ScalarType *b, ScalarType *c) const {
207+
FORCEINLINE void VectorSubtraction(const ScalarType *a, const ScalarType *b, ScalarType *c) const {
207208
for(unsigned long iVar = 0; iVar < nVar; iVar++)
208209
c[iVar] = a[iVar] - b[iVar];
209210
}
210211

211212
/*!
212213
* \brief Subtract b from a and store the result in c.
213214
*/
214-
inline void MatrixSubtraction(const ScalarType *a, const ScalarType *b, ScalarType *c) const {
215+
FORCEINLINE void MatrixSubtraction(const ScalarType *a, const ScalarType *b, ScalarType *c) const {
216+
SU2_OMP_SIMD
215217
for(unsigned long iVar = 0; iVar < nVar*nEqn; iVar++)
216218
c[iVar] = a[iVar] - b[iVar];
217219
}
218220

219221
/*!
220222
* \brief Copy matrix src into dst, transpose if required.
221223
*/
222-
inline void MatrixCopy(const ScalarType *src, ScalarType *dst, bool transposed = false) const {
224+
FORCEINLINE void MatrixCopy(const ScalarType *src, ScalarType *dst, bool transposed = false) const {
223225
if (!transposed) {
226+
SU2_OMP_SIMD
224227
for(auto iVar = 0ul; iVar < nVar*nEqn; ++iVar)
225228
dst[iVar] = src[iVar];
226229
}
@@ -446,6 +449,7 @@ class CSysMatrix {
446449

447450
auto mat_ij = GetBlock(block_i, block_j);
448451
if (!mat_ij) return;
452+
SU2_OMP_SIMD
449453
for (auto iVar = 0ul; iVar < nVar*nEqn; ++iVar) {
450454
mat_ij[iVar] = (Overwrite? ScalarType(0) : mat_ij[iVar]) + PassiveAssign(alpha * val_block[iVar]);
451455
}
@@ -657,6 +661,7 @@ class CSysMatrix {
657661
unsigned long iVar, index = dia_ptr[block_i]*nVar*nVar;
658662

659663
/*--- Clear entire block before setting its diagonal. ---*/
664+
SU2_OMP_SIMD
660665
for (iVar = 0; iVar < nVar*nVar; iVar++)
661666
matrix[index+iVar] = 0.0;
662667

Common/include/linear_algebra/CSysMatrix.inl

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,15 @@ FORCEINLINE void CSysMatrix<ScalarType>::SetBlock_ILUMatrix(unsigned long block_
4747

4848
auto ilu_ij = GetBlock_ILUMatrix(block_i, block_j);
4949
if (!ilu_ij) return;
50-
for (auto iVar = 0ul; iVar < nVar*nVar; ++iVar)
51-
ilu_ij[iVar] = val_block[iVar];
50+
MatrixCopy(val_block, ilu_ij, false);
5251
}
5352

5453
template<class ScalarType>
5554
FORCEINLINE void CSysMatrix<ScalarType>::SetBlockTransposed_ILUMatrix(unsigned long block_i, unsigned long block_j, ScalarType *val_block) {
5655

5756
auto ilu_ij = GetBlock_ILUMatrix(block_i, block_j);
5857
if (!ilu_ij) return;
59-
for (auto iVar = 0ul; iVar < nVar; iVar++)
60-
for (auto jVar = 0ul; jVar < nVar; jVar++)
61-
ilu_ij[iVar*nVar+jVar] = val_block[jVar*nVar+iVar];
58+
MatrixCopy(val_block, ilu_ij, true);
6259
}
6360

6461
template<class T, bool alpha, bool beta, bool transp>

Common/include/mpi_structure.inl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,11 @@ inline void CMediMPIWrapper::Waitany(int nrequests, Request *request,
512512
}
513513
#endif
514514
#else // HAVE_MPI
515+
#ifdef _OPENMP
516+
#include <omp.h>
517+
#else
515518
#include <ctime>
519+
#endif
516520

517521
inline void CBaseMPIWrapper::Error(std::string ErrorMsg, std::string FunctionName){
518522
if (Rank == 0){
@@ -688,6 +692,10 @@ inline void CBaseMPIWrapper::CopyData(void *sendbuf, void *recvbuf, int size, Da
688692
}
689693

690694
inline passivedouble CBaseMPIWrapper::Wtime(void) {
695+
#ifdef _OPENMP
696+
return omp_get_wtime();
697+
#else
691698
return passivedouble(clock()) / CLOCKS_PER_SEC;
699+
#endif
692700
}
693701
#endif

Common/include/omp_structure.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ inline void omp_destroy_lock(omp_lock_t*){}
110110
#define SU2_OMP_FOR_DYN(CHUNK) SU2_OMP(for schedule(dynamic,CHUNK))
111111
#define SU2_OMP_FOR_STAT(CHUNK) SU2_OMP(for schedule(static,CHUNK))
112112

113+
/*--- Disable some unsupported features on MSVC. ---*/
114+
#if defined(_MSC_VER)
115+
#undef SU2_OMP_SIMD
116+
#define SU2_OMP_SIMD
117+
#endif
113118

114119
/*--- Convenience functions (e.g. to compute chunk sizes). ---*/
115120

SU2_CFD/include/output/CMultizoneOutput.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@
3737
#endif
3838
#include <fstream>
3939
#include <cmath>
40-
#include <time.h>
41-
#include <fstream>
4240

4341
#include "COutput.hpp"
4442
#include "../../../Common/include/CConfig.hpp"

SU2_CFD/include/output/COutput.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
#include <fstream>
3131
#include <cmath>
3232
#include <map>
33-
#include <time.h>
3433
#include <sstream>
3534
#include <iomanip>
3635
#include <limits>

SU2_CFD/include/output/COutputLegacy.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@
3838
#endif
3939
#include <fstream>
4040
#include <cmath>
41-
#include <time.h>
42-
#include <fstream>
4341
#include <vector>
4442

4543
#include "../../../Common/include/option_structure.hpp"

SU2_CFD/include/output/filewriter/CFileWriter.hpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
#include <string>
3434
#include <cstring>
3535
#include <fstream>
36-
#include <time.h>
3736

3837
#include "../../output/filewriter/CParallelDataSorter.hpp"
3938

@@ -83,7 +82,7 @@ class CFileWriter{
8382
* \brief The parallel data sorter
8483
*/
8584
CParallelDataSorter* dataSorter;
86-
85+
8786
#ifdef HAVE_MPI
8887
/*!
8988
* \brief The displacement that every process has in the current file view
@@ -143,9 +142,9 @@ class CFileWriter{
143142
* \return The time used to write to file.
144143
*/
145144
su2double Get_UsedTime() const {return usedTime;}
146-
145+
147146
protected:
148-
147+
149148
/*!
150149
* \brief Collectively write a binary data array distributed over all processors to file using MPI I/O.
151150
* \param[in] data - Pointer to the data to write.
@@ -165,27 +164,27 @@ class CFileWriter{
165164
* \return Boolean indicating whether the writing was successful.
166165
*/
167166
bool WriteMPIBinaryData(const void *data, unsigned long sizeInBytes, unsigned short processor);
168-
167+
169168
/*!
170169
* \brief Write a string to a currently opened file using MPI I/O. Note: routine must be called collectively,
171170
* although only one processor writes the string.
172171
* \param[in] str - The string to write to file.
173172
* \param[in] processor - Rank of the processor that should the string.
174-
* \return
173+
* \return
175174
*/
176175
bool WriteMPIString(const std::string& str, unsigned short processor);
177-
176+
178177
/*!
179178
* \brief Open a file to write using MPI I/O. Already existing file is deleted.
180179
* \return Boolean indicating whether the opening was successful.
181180
*/
182181
bool OpenMPIFile();
183-
182+
184183
/*!
185-
* \brief Close a file using MPI I/O.
184+
* \brief Close a file using MPI I/O.
186185
* \return Boolean indicating whether the closing was successful.
187186
*/
188187
bool CloseMPIFile();
189-
188+
190189
};
191190

SU2_CFD/src/solvers/CFEASolver.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1959,7 +1959,7 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CSolver **solver_container,
19591959

19601960
SU2_OMP_PARALLEL
19611961
{
1962-
#if !defined(CODI_REVERSE_TYPE) && !defined(USE_MIXED_PRECISION)
1962+
#if !(defined(CODI_REVERSE_TYPE) || defined(USE_MIXED_PRECISION)) || defined(CODI_FORWARD_TYPE)
19631963
Jacobian.ComputeResidual(LinSysSol, LinSysRes, LinSysAux);
19641964
#else
19651965
sol.PassiveCopy(LinSysSol);

0 commit comments

Comments
 (0)