Skip to content

Commit

Permalink
GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
GregorySchwing committed Mar 9, 2024
1 parent 3a282b8 commit 81424f6
Show file tree
Hide file tree
Showing 9 changed files with 167 additions and 25 deletions.
7 changes: 6 additions & 1 deletion src/FFParticle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,12 @@ void FFParticle::Init(ff_setup::Particle const &mie,
forcefield.isMartini, count, forcefield.rCut,
forcefield.rCutCoulomb, forcefield.rCutLow,
forcefield.rswitch, forcefield.alpha, forcefield.ewald,
diElectric_1);
diElectric_1,
forcefield.wolf, forcefield.dsf,
forcefield.wolf_alpha,
forcefield.wolf_factor_1,
forcefield.wolf_factor_2,
forcefield.wolf_factor_3);
#endif
}

Expand Down
28 changes: 24 additions & 4 deletions src/GPU/CalculateEnergyCUDAKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,12 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
vars->gpu_Invcell_z[box], sc_coul, sc_sigma_6, sc_alpha, sc_power,
vars->gpu_rMin, vars->gpu_rMaxSq, vars->gpu_expConst, vars->gpu_molIndex,
vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box);
vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box,
vars->gpu_wolf,
vars->gpu_dsf,
vars->gpu_wolf_alpha,
vars->gpu_wolf_factor_1,
vars->gpu_wolf_factor_2);
cudaDeviceSynchronize();
checkLastErrorCUDA(__FILE__, __LINE__);

Expand Down Expand Up @@ -164,7 +169,12 @@ BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_rMin,
double *gpu_rMaxSq, double *gpu_expConst, int *gpu_molIndex,
double *gpu_lambdaVDW, double *gpu_lambdaCoulomb,
bool *gpu_isFraction, int box) {
bool *gpu_isFraction, int box,
int *gpu_wolf,
int *gpu_dsf,
double * gpu_wolf_alpha,
double * gpu_wolf_factor_1,
double * gpu_wolf_factor_2) {
int threadID = blockIdx.x * blockDim.x + threadIdx.x;
double REn = 0.0, LJEn = 0.0;
double cutoff = fmax(gpu_rCut[0], gpu_rCutCoulomb[box]);
Expand Down Expand Up @@ -228,7 +238,12 @@ BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
distSq, kA, kB, qi_qj_fact, gpu_rCutLow[0], gpu_ewald[0],
gpu_VDW_Kind[0], gpu_alpha[box], gpu_rCutCoulomb[box],
gpu_isMartini[0], gpu_diElectric_1[0], lambdaCoulomb, sc_coul,
sc_sigma_6, sc_alpha, sc_power, gpu_sigmaSq, gpu_count[0]);
sc_sigma_6, sc_alpha, sc_power, gpu_sigmaSq, gpu_count[0],
gpu_wolf[0],
gpu_dsf[0],
gpu_wolf_alpha[box],
gpu_wolf_factor_1[box],
gpu_wolf_factor_2[box]);
}
}
}
Expand All @@ -246,7 +261,12 @@ CalcCoulombGPU(double distSq, int kind1, int kind2, double qi_qj_fact,
double gpu_alpha, double gpu_rCutCoulomb, int gpu_isMartini,
double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul,
double sc_sigma_6, double sc_alpha, uint sc_power,
double *gpu_sigmaSq, int gpu_count) {
double *gpu_sigmaSq, int gpu_count,
int gpu_wolf,
int gpu_dsf,
double gpu_wolf_alpha,
double gpu_wolf_factor_1,
double gpu_wolf_factor_2) {
if ((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) {
return 0.0;
}
Expand Down
14 changes: 12 additions & 2 deletions src/GPU/CalculateEnergyCUDAKernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,12 @@ __global__ void BoxInterGPU(int *gpu_cellStartIndex,
double *gpu_lambdaVDW,
double *gpu_lambdaCoulomb,
bool *gpu_isFraction,
int box);
int box,
int *gpu_wolf,
int *gpu_dsf,
double * gpu_wolf_alpha,
double * gpu_wolf_factor_1,
double * gpu_wolf_factor_2);


__device__ double CalcCoulombGPU(double distSq, int kind1, int kind2,
Expand All @@ -88,7 +93,12 @@ __device__ double CalcCoulombGPU(double distSq, int kind1, int kind2,
double gpu_lambdaCoulomb, bool sc_coul,
double sc_sigma_6, double sc_alpha,
uint sc_power, double *gpu_sigmaSq,
int gpu_count);
int gpu_count,
int gpu_wolf,
int gpu_dsf,
double gpu_wolf_alpha,
double gpu_wolf_factor_1,
double gpu_wolf_factor_2);
__device__ double CalcCoulombVirGPU(double distSq, double qi_qj,
double gpu_rCutCoulomb, double gpu_alpha,
int gpu_VDW_Kind, int gpu_ewald,
Expand Down
51 changes: 44 additions & 7 deletions src/GPU/CalculateForceCUDAKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,12 @@ void CallBoxInterForceGPU(
vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth,
sc_coul, sc_sigma_6, sc_alpha, sc_power, vars->gpu_rMin, vars->gpu_rMaxSq,
vars->gpu_expConst, vars->gpu_molIndex, vars->gpu_lambdaVDW,
vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box);
vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box,
vars->gpu_wolf,
vars->gpu_dsf,
vars->gpu_wolf_alpha,
vars->gpu_wolf_factor_2,
vars->gpu_wolf_factor_3);
checkLastErrorCUDA(__FILE__, __LINE__);
cudaDeviceSynchronize();
// ReduceSum // Virial of LJ
Expand Down Expand Up @@ -310,7 +315,13 @@ void CallBoxForceGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
vars->gpu_aForcez, vars->gpu_mForcex, vars->gpu_mForcey,
vars->gpu_mForcez, sc_coul, sc_sigma_6, sc_alpha, sc_power,
vars->gpu_rMin, vars->gpu_rMaxSq, vars->gpu_expConst, vars->gpu_molIndex,
vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box);
vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box,
vars->gpu_wolf,
vars->gpu_dsf,
vars->gpu_wolf_alpha,
vars->gpu_wolf_factor_1,
vars->gpu_wolf_factor_2,
vars->gpu_wolf_factor_3);
cudaDeviceSynchronize();
checkLastErrorCUDA(__FILE__, __LINE__);
// LJ ReduceSum
Expand Down Expand Up @@ -475,7 +486,12 @@ __global__ void BoxInterForceGPU(
double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6,
double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq,
double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW,
double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box) {
double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box,
int *gpu_wolf,
int *gpu_dsf,
double * gpu_wolf_alpha,
double * gpu_wolf_factor_2,
double * gpu_wolf_factor_3) {
double distSq;
double3 virComponents;

Expand Down Expand Up @@ -579,7 +595,12 @@ __global__ void BoxInterForceGPU(
distSq, qi_qj, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0],
gpu_alpha[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0],
gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power,
lambdaCoulomb, gpu_count[0], kA, kB);
lambdaCoulomb, gpu_count[0], kA, kB,
gpu_wolf[0],
gpu_dsf[0],
gpu_wolf_alpha[box],
gpu_wolf_factor_2[box],
gpu_wolf_factor_3[box]);

gpu_rT11[threadID] += pRF * (virComponents.x * diff_com.x);
gpu_rT22[threadID] += pRF * (virComponents.y * diff_com.y);
Expand Down Expand Up @@ -617,7 +638,13 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power,
double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst,
int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb,
bool *gpu_isFraction, int box) {
bool *gpu_isFraction, int box,
int *gpu_wolf,
int *gpu_dsf,
double * gpu_wolf_alpha,
double * gpu_wolf_factor_1,
double * gpu_wolf_factor_2,
double * gpu_wolf_factor_3) {
__shared__ double shr_cutoff;
__shared__ int shr_particlesInsideCurrentCell, shr_numberOfPairs;
__shared__ int shr_currentCellStartIndex, shr_neighborCellStartIndex;
Expand Down Expand Up @@ -704,13 +731,23 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
distSq, kA, kB, qi_qj_fact, gpu_rCutLow[0], gpu_ewald[0],
gpu_VDW_Kind[0], gpu_alpha[box], gpu_rCutCoulomb[box],
gpu_isMartini[0], gpu_diElectric_1[0], lambdaCoulomb, sc_coul,
sc_sigma_6, sc_alpha, sc_power, gpu_sigmaSq, gpu_count[0]);
sc_sigma_6, sc_alpha, sc_power, gpu_sigmaSq, gpu_count[0],
gpu_wolf[0],
gpu_dsf[0],
gpu_wolf_alpha[box],
gpu_wolf_factor_1[box],
gpu_wolf_factor_2[box]);

forces += CalcCoulombForceGPU(
distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0],
gpu_isMartini[0], gpu_alpha[box], gpu_rCutCoulomb[box],
gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha,
sc_power, lambdaCoulomb, gpu_count[0], kA, kB);
sc_power, lambdaCoulomb, gpu_count[0], kA, kB,
gpu_wolf[0],
gpu_dsf[0],
gpu_wolf_alpha[box],
gpu_wolf_factor_2[box],
gpu_wolf_factor_3[box]);
}
}

Expand Down
22 changes: 19 additions & 3 deletions src/GPU/CalculateForceCUDAKernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,13 @@ __global__ void BoxForceGPU(int *gpu_cellStartIndex,
double *gpu_lambdaVDW,
double *gpu_lambdaCoulomb,
bool *gpu_isFraction,
int box);
int box,
int *gpu_wolf,
int *gpu_dsf,
double * gpu_wolf_alpha,
double * gpu_wolf_factor_1,
double * gpu_wolf_factor_2,
double * gpu_wolf_factor_3);

__global__ void BoxInterForceGPU(int *gpu_cellStartIndex,
int *gpu_cellVector,
Expand Down Expand Up @@ -203,7 +209,12 @@ __global__ void BoxInterForceGPU(int *gpu_cellStartIndex,
double *gpu_lambdaVDW,
double *gpu_lambdaCoulomb,
bool *gpu_isFraction,
int box);
int box,
int *gpu_wolf,
int *gpu_dsf,
double * gpu_wolf_alpha,
double * gpu_wolf_factor_2,
double * gpu_wolf_factor_3);

__global__ void VirialReciprocalGPU(double *gpu_x,
double *gpu_y,
Expand Down Expand Up @@ -369,7 +380,12 @@ __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj,
uint sc_power,
double gpu_lambdaCoulomb,
int gpu_count, int kind1,
int kind2)
int kind2,
int gpu_wolf,
int gpu_dsf,
double gpu_wolf_alpha,
double gpu_wolf_factor_2,
double gpu_wolf_factor_3)
{
if((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) {
return 0.0;
Expand Down
34 changes: 33 additions & 1 deletion src/GPU/ConstantDefinitionsCUDAKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq,
double const *epsilon_Cn, double const *n, int VDW_Kind,
int isMartini, int count, double Rcut,
double const *rCutCoulomb, double RcutLow, double Ron,
double const *alpha, int ewald, double diElectric_1) {
double const *alpha, int ewald, double diElectric_1,
int wolf,
int dsf,
double const * wolf_alpha,
double const * wolf_factor_1,
double const * wolf_factor_2,
double const * wolf_factor_3) {
int countSq = count * count;
CUMALLOC((void **)&vars.gpu_sigmaSq, countSq * sizeof(double));
CUMALLOC((void **)&vars.gpu_epsilon_Cn, countSq * sizeof(double));
Expand All @@ -55,6 +61,14 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq,
CUMALLOC((void **)&vars.gpu_lambdaCoulomb, (int)BOX_TOTAL * sizeof(double));
CUMALLOC((void **)&vars.gpu_isFraction, (int)BOX_TOTAL * sizeof(bool));

// allocate gpu memory for wolf variables
CUMALLOC((void**) &vars.gpu_wolf, sizeof(int));
CUMALLOC((void**) &vars.gpu_dsf, sizeof(int));
CUMALLOC((void**) &vars.gpu_wolf_alpha, (int)BOX_TOTAL * sizeof(double));
CUMALLOC((void**) &vars.gpu_wolf_factor_1, (int)BOX_TOTAL * sizeof(double));
CUMALLOC((void**) &vars.gpu_wolf_factor_2, (int)BOX_TOTAL * sizeof(double));
CUMALLOC((void**) &vars.gpu_wolf_factor_3, (int)BOX_TOTAL * sizeof(double));

cudaMemcpy(vars.gpu_sigmaSq, sigmaSq, countSq * sizeof(double),
cudaMemcpyHostToDevice);
cudaMemcpy(vars.gpu_epsilon_Cn, epsilon_Cn, countSq * sizeof(double),
Expand All @@ -75,6 +89,18 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq,
cudaMemcpy(vars.gpu_ewald, &ewald, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(vars.gpu_diElectric_1, &diElectric_1, sizeof(double),
cudaMemcpyHostToDevice);

cudaMemcpy(vars.gpu_wolf, &wolf, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(vars.gpu_dsf, &dsf, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(vars.gpu_wolf_alpha, wolf_alpha, BOX_TOTAL * sizeof(double),
cudaMemcpyHostToDevice);
cudaMemcpy(vars.gpu_wolf_factor_1, wolf_factor_1, BOX_TOTAL * sizeof(double),
cudaMemcpyHostToDevice);
cudaMemcpy(vars.gpu_wolf_factor_2, wolf_factor_2, BOX_TOTAL * sizeof(double),
cudaMemcpyHostToDevice);
cudaMemcpy(vars.gpu_wolf_factor_3, wolf_factor_3, BOX_TOTAL * sizeof(double),
cudaMemcpyHostToDevice);

checkLastErrorCUDA(__FILE__, __LINE__);
}

Expand Down Expand Up @@ -328,6 +354,12 @@ void DestroyCUDAVars(VariablesCUDA *vars) {
CUFREE(vars->gpu_rOn);
CUFREE(vars->gpu_alpha);
CUFREE(vars->gpu_ewald);
CUFREE(vars->gpu_wolf);
CUFREE(vars->gpu_dsf);
CUFREE(vars->gpu_wolf_alpha);
CUFREE(vars->gpu_wolf_factor_1);
CUFREE(vars->gpu_wolf_factor_2);
CUFREE(vars->gpu_wolf_factor_3);
CUFREE(vars->gpu_diElectric_1);
CUFREE(vars->gpu_x);
CUFREE(vars->gpu_y);
Expand Down
8 changes: 7 additions & 1 deletion src/GPU/ConstantDefinitionsCUDAKernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq,
int VDW_Kind, int isMartini, int count,
double Rcut, double const *rCutCoulomb,
double RcutLow, double Ron, double const *alpha,
int ewald, double diElectric_1);
int ewald, double diElectric_1,
int wolf,
int dsf,
double const * wolf_alpha,
double const * wolf_factor_1,
double const * wolf_factor_2,
double const * wolf_factor_3);
void InitCoordinatesCUDA(VariablesCUDA *vars, uint atomNumber,
uint maxAtomsInMol, uint maxMolNumber);
void InitEwaldVariablesCUDA(VariablesCUDA *vars, uint imageTotal);
Expand Down
16 changes: 16 additions & 0 deletions src/GPU/VariablesCUDA.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,14 @@ public:
gpu_lambdaVDW = NULL;
gpu_lambdaCoulomb = NULL;
gpu_isFraction = NULL;

// set wolf variables to null
gpu_wolf = NULL;
gpu_dsf = NULL;
gpu_wolf_alpha = NULL;
gpu_wolf_factor_1 = NULL;
gpu_wolf_factor_2 = NULL;
gpu_wolf_factor_3 = NULL;
}
double *gpu_sigmaSq;
double *gpu_epsilon_Cn;
Expand Down Expand Up @@ -133,5 +141,13 @@ public:

// new pair interaction calculation done on GPU
int *gpu_cellVector, *gpu_mapParticleToCell;

// Wolf Variables
int *gpu_wolf;
int *gpu_dsf;
double * gpu_wolf_alpha;
double * gpu_wolf_factor_1;
double * gpu_wolf_factor_2;
double * gpu_wolf_factor_3;
};
#endif
12 changes: 6 additions & 6 deletions src/WolfCalibrationOutput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,9 +348,9 @@ void WolfCalibrationOutput::CalculateGrid() {
double rCutCoulomb = wolfCutoffCoulombStart[b] + RCutIndex*wolfCutoffCoulombDelta[b];
// Wolf class has references to these forcefield values
statValRef.forcefield.SetWolfAlphaAndWolfFactors(rCutCoulomb, alpha, b);
#ifdef GOMC_CUDA
statValRef.forcefield.particles->updateWolfEwald();
#endif
//#ifdef GOMC_CUDA
//statValRef.forcefield.particles->updateWolfEwald();
//#endif
SystemPotential wolfTot = calcEn.SystemTotal();
wolfTot.Total();
sumRelativeErrorVec[b][wolfKind][coulKind][GetIndex(RCutIndex, alphaIndex, b)].add_value(wolfTot.boxEnergy[b].totalElect);
Expand All @@ -366,9 +366,9 @@ void WolfCalibrationOutput::CalculateGrid() {
for (uint b = 0; b < BOXES_WITH_U_NB; ++b) {
statValRef.forcefield.SetRCutCoulomb(originalCutoffCoulomb[b], b);
}
#ifdef GOMC_CUDA
statValRef.forcefield.particles->updateWolfEwald();
#endif
//#ifdef GOMC_CUDA
//statValRef.forcefield.particles->updateWolfEwald();
//#endif

}

Expand Down

0 comments on commit 81424f6

Please sign in to comment.