Skip to content

Commit 7572391

Browse files
committed
fixing energy calculation. Test passes for 2D!
1 parent 01b2a22 commit 7572391

7 files changed

Lines changed: 138 additions & 13 deletions

File tree

include/kernels.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,12 @@ __global__ void is_eq(bool *a, bool *b, bool *ans);
5959
* @return Magnitude of complex number
6060
*/
6161
__device__ double complexMagnitude(double2 in);
62+
__global__ void complexMultiply(double2 *in1, double2 *in2, double2 *out);
63+
__host__ __device__ double2 complexMultiply(double2 in1, double2 in2);
6264

6365
__device__ double2 make_complex(double in, int evolution_type);
6466

67+
__global__ void complexAbsSum(double2 *in1, double2 *in2, double *out);
6568
__global__ void complexMagnitude(double2 *in, double *out);
6669
/**
6770
* @brief Return the squared magnitude of a complex number. $|(a+\textrm{i}b)*(a-\textrm{i}b)|$
@@ -286,6 +289,8 @@ __global__ void zeros(bool *in, bool *out);
286289

287290
__global__ void set_eq(double *in1, double *in2);
288291

292+
__global__ void print_ds(double *vector);
293+
289294
//##############################################################################
290295
/**
291296
* Non-implemented functions.

include/split_op.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,5 +97,6 @@ void optLatSetup(const std::shared_ptr<Vtx::Vortex> centre, const double* V,
9797
* @return $\langle \Psi | H | \Psi \rangle$
9898
*/
9999
double energy_angmom(double2 *gpuWfc, int gState, Grid &par);
100+
double energy_calc(Grid &par, double2* wfc);
100101

101102
#endif

src/evolution.cu

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ void evolve(Grid &par,
191191
time_spent = (double) (end - begin) / CLOCKS_PER_SEC;
192192
printf("Time spent: %lf\n", time_spent);
193193
std::string fileName = "";
194-
printf("ramp=%d gstate=%d rg=%d \n",
195-
ramp, gstate, ramp | (gstate << 1));
194+
//printf("ramp=%d gstate=%d rg=%d \n",
195+
// ramp, gstate, ramp | (gstate << 1));
196196
switch (ramp | (gstate << 1)) {
197197
case 0: //Groundstate solver, constant Omega value.
198198
{
@@ -464,7 +464,6 @@ void evolve(Grid &par,
464464
num_vortices[1] = num_vortices[0];
465465
vortCoords->getVortices().swap(vortCoordsP->getVortices());
466466
vortCoords->getVortices().clear();
467-
std::cout << "I am here" << std::endl;
468467

469468
}
470469
fileName = "wfc_ev";
@@ -488,7 +487,7 @@ void evolve(Grid &par,
488487
}
489488
//std::cout << "written" << '\n';
490489
if (par.bval("energy_calc")){
491-
double energy = energy_angmom(gpuWfc,gstate, par);
490+
double energy = energy_calc(par,gpuWfc);
492491
// Now opening and closing file for writing.
493492
std::ofstream energy_out;
494493
std::string mode = "energyi.dat";

src/kernels.cu

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,15 @@ __device__ double2 realCompMult(double scalar, double2 comp){
159159
__device__ double complexMagnitude(double2 in){
160160
return sqrt(in.x*in.x + in.y*in.y);
161161
}
162+
163+
__global__ void complexAbsSum(double2 *in1, double2 *in2, double *out){
164+
int gid = getGid3d3d();
165+
double2 temp;
166+
temp.x = in1[gid].x + in2[gid].x;
167+
temp.y = in1[gid].y + in2[gid].y;
168+
out[gid] = sqrt(temp.x*temp.x + temp.y*temp.y);
169+
}
170+
162171
__global__ void complexMagnitude(double2 *in, double *out){
163172
int gid = getGid3d3d();
164173
out[gid] = sqrt(in[gid].x*in[gid].x + in[gid].y*in[gid].y);
@@ -186,6 +195,12 @@ __host__ __device__ double2 complexMultiply(double2 in1, double2 in2){
186195
return result;
187196
}
188197

198+
__global__ void complexMultiply(double2 *in1, double2 *in2, double2 *out){
199+
int gid = getGid3d3d();
200+
out[gid] = complexMultiply(in1[gid], in2[gid]);
201+
}
202+
203+
189204
/*
190205
* Used to perform conj(in1)*in2; == < in1 | in2 >
191206
*/
@@ -220,8 +235,8 @@ __global__ void cMultPhi(double2* in1, double* in2, double2* out){
220235
__global__ void vecMult(double2 *in, double *factor, double2 *out){
221236
double2 result;
222237
unsigned int gid = getGid3d3d();
223-
result.x = (in[gid].x * factor[gid]);
224-
result.y = (in[gid].y * factor[gid]);
238+
result.x = in[gid].x * factor[gid];
239+
result.y = in[gid].y * factor[gid];
225240
out[gid] = result;
226241
}
227242

@@ -381,6 +396,7 @@ __global__ void scalarPow(double2* in, double param, double2* out){
381396
__global__ void vecConjugate(double2 *in, double2 *out){
382397
double2 result;
383398
unsigned int gid = getGid3d3d();
399+
result.x = in[gid].x;
384400
result.y = -in[gid].y;
385401
out[gid] = result;
386402
}
@@ -465,7 +481,6 @@ __global__ void multipass(double* input, double* output){
465481
}
466482
}
467483

468-
469484
/*
470485
* Calculates all of the energy of the current state. sqrt_omegaz_mass = sqrt(omegaZ/mass), part of the nonlin interaction term
471486
*/
@@ -650,6 +665,11 @@ __global__ void set_eq(double *in1, double *in2){
650665
in2[gid] = in1[gid];
651666
}
652667

668+
__global__ void print_ds(double *vector){
669+
int gid = getGid3d3d();
670+
printf("%d\t%e\n",gid,vector[gid]);
671+
672+
}
653673

654674
//##############################################################################
655675
//##############################################################################

src/operators.cu

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -826,12 +826,10 @@ void generate_fields(Grid &par){
826826
cudaFree(V_gpu);
827827
}
828828
else{
829-
par.store("V_gpu", V_gpu);
830-
par.store("K_gpu", K_gpu);
829+
par.store("V_gpu",V_gpu);
831830
}
832831

833832
par.store("V",V);
834-
//par.store("V_gpu",V_gpu);
835833
par.store("items", items);
836834
//par.store("items_gpu", items_gpu);
837835
par.store("wfc", wfc);

src/split_op.cu

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,89 @@ void optLatSetup(std::shared_ptr<Vtx::Vortex> centre, const double* V,
241241
par.store("V_opt",v_opt);
242242
}
243243

244+
double energy_calc(Grid &par, double2* wfc){
245+
double* K = par.dsval("K_gpu");
246+
double* V = par.dsval("V_gpu");
247+
248+
dim3 grid = par.grid;
249+
dim3 threads = par.threads;
250+
251+
int xDim = par.ival("xDim");
252+
int yDim = par.ival("yDim");
253+
int zDim = par.ival("zDim");
254+
int gsize = xDim*yDim*zDim;
255+
256+
double dx = par.dval("dx");
257+
double dy = par.dval("dy");
258+
double dz = par.dval("dz");
259+
double dg = dx*dy*dz;
260+
261+
cufftHandle plan;
262+
263+
if (par.ival("dimnum") == 1){
264+
plan = par.ival("plan_1d");
265+
}
266+
if (par.ival("dimnum") == 2){
267+
plan = par.ival("plan_2d");
268+
}
269+
if (par.ival("dimnum") == 3){
270+
plan = par.ival("plan_3d");
271+
}
272+
273+
double renorm_factor = 1.0/pow(gsize,0.5);
274+
275+
double2 *wfc_c, *wfc_k;
276+
double2 *energy_r, *energy_k;
277+
double *energy;
278+
279+
cudaMalloc((void **) &wfc_c, sizeof(double2)*gsize);
280+
cudaMalloc((void **) &wfc_k, sizeof(double2)*gsize);
281+
cudaMalloc((void **) &energy_r, sizeof(double2)*gsize);
282+
cudaMalloc((void **) &energy_k, sizeof(double2)*gsize);
283+
284+
cudaMalloc((void **) &energy, sizeof(double)*gsize);
285+
286+
// Finding conjugate
287+
vecConjugate<<<grid, threads>>>(wfc, wfc_c);
288+
289+
// Momentum-space energy
290+
cufftExecZ2Z(plan, wfc, wfc_k, CUFFT_FORWARD);
291+
scalarMult<<<grid, threads>>>(wfc_k, renorm_factor, wfc_k);
292+
293+
vecMult<<<grid, threads>>>(wfc_k, K, energy_k);
294+
295+
cufftExecZ2Z(plan, energy_k, energy_k, CUFFT_INVERSE);
296+
scalarMult<<<grid, threads>>>(energy_k, renorm_factor, energy_k);
297+
298+
cMult<<<grid, threads>>>(wfc_c, energy_k, energy_k);
299+
300+
// Position-space energy
301+
vecMult<<<grid, threads>>>(wfc, V, energy_r);
302+
cMult<<<grid, threads>>>(wfc_c, energy_r, energy_r);
303+
304+
complexAbsSum<<<grid, threads>>>(energy_r, energy_k, energy);
305+
306+
double *energy_cpu;
307+
energy_cpu = (double *)malloc(sizeof(double)*gsize);
308+
309+
cudaMemcpy(energy_cpu, energy, sizeof(double)*gsize,
310+
cudaMemcpyDeviceToHost);
311+
312+
double sum = 0;
313+
for (int i = 0; i < gsize; ++i){
314+
sum += energy_cpu[i]*dg;
315+
}
316+
317+
free(energy_cpu);
318+
cudaFree(energy_r);
319+
cudaFree(energy_k);
320+
cudaFree(energy);
321+
cudaFree(wfc_c);
322+
cudaFree(wfc_k);
323+
324+
return sum;
325+
}
326+
244327
/**
245328
** Calculates energy and angular momentum of current state.
246329
** Implementation not fully finished.

src/unit_test.cu

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -388,9 +388,11 @@ void dynamic_test(){
388388

389389
cudaMemcpy(array, array_gpu, sizeof(double)*n, cudaMemcpyDeviceToHost);
390390

391+
/*
391392
for (int i = 0; i < n; ++i){
392393
std::cout << array[i] << '\n';
393394
}
395+
*/
394396

395397
std::cout << "Dynamic tests passed" <<'\n';
396398
}
@@ -441,6 +443,8 @@ void bessel_test(){
441443
// These will be checked against 1d
442444
void fft_test(){
443445

446+
std::cout << "Beginning cufft test.\n";
447+
444448
// For these tests, we are assuming that the x, y and z dimensions are
445449
// All the same (2x2x2)
446450
// Note that yDim needs to be singled out differently, but z/x need no loops
@@ -500,9 +504,11 @@ void fft_test(){
500504
exit(1);
501505
}
502506

507+
/*
503508
for (int i = 0; i < gsize; i++){
504509
std::cout << array[i].x << '\t' << array[i].y << '\n';
505510
}
511+
*/
506512

507513
// Now to try the inverse direction
508514

@@ -521,9 +527,13 @@ void fft_test(){
521527
exit(1);
522528
}
523529

530+
/*
524531
for (int i = 0; i < gsize; i++){
525532
std::cout << array[i].x << '\t' << array[i].y << '\n';
526533
}
534+
*/
535+
536+
std::cout << "cufft test passed!\n";
527537

528538

529539

@@ -764,6 +774,8 @@ void grid_test3d(){
764774
// Test of the parSum function in 3d
765775
void parSum_test(){
766776

777+
std::cout << "Beginning test of parallel summation.\n";
778+
767779
// Setting error
768780
cudaError_t err;
769781

@@ -886,6 +898,8 @@ void parSum_test(){
886898
}
887899
}
888900

901+
std::cout << "Parallel summation test passed in 2 and 3D!\n";
902+
889903
}
890904

891905
// Test for the Grid structure with paramters in it
@@ -1141,7 +1155,7 @@ void evolve_test(){
11411155
par.store("thresh_const", 1.0);
11421156

11431157

1144-
double thresh = 0.0001;
1158+
double thresh = 0.01;
11451159
std::string buffer;
11461160
int gsteps = 30001;
11471161
int esteps = 30001;
@@ -1154,14 +1168,15 @@ void evolve_test(){
11541168
par.store("omegaY", 1.0);
11551169
par.store("esteps", esteps);
11561170
par.store("gsteps", gsteps);
1157-
par.store("printSteps", 1000);
1171+
par.store("printSteps", 30000);
11581172
par.store("write_file", false);
1159-
par.store("write_it", true);
1173+
par.store("write_it", false);
11601174
par.store("energy_calc", true);
11611175
par.store("box_size", 0.00007);
11621176
par.store("yDim", 1);
11631177
par.store("zDim", 1);
11641178

1179+
11651180
// Running through all the dimensions to check the energy
11661181
for (int i = 2; i <= 3; ++i){
11671182
if (i == 2){
@@ -1173,6 +1188,10 @@ void evolve_test(){
11731188
par.store("dimnum",i);
11741189
init(par);
11751190

1191+
if (par.bval("write_file")){
1192+
FileIO::writeOutParam(buffer, par, "data/Params.dat");
1193+
}
1194+
11761195
double omegaX = par.dval("omegaX");
11771196
set_variables(par, 0);
11781197

0 commit comments

Comments
 (0)