30
__device__ void twobody(int iparam, F_FLOAT rsq, F_FLOAT &fforce,
31
int eflag, ENERGY_FLOAT &eng)
30
__device__ void twobody(int iparam, F_CFLOAT rsq, F_CFLOAT &fforce,
31
int eflag, ENERGY_CFLOAT &eng)
33
F_FLOAT r, rp, rq, rainv, expsrainv;
33
F_CFLOAT r, rp, rq, rainv, expsrainv;
36
36
rp = pow(r, -params_sw[iparam].powerp);
46
46
__device__ void threebody(int paramij, int paramik, int paramijk,
49
F_FLOAT3 &fj, F_FLOAT3 &fk, int eflag, ENERGY_FLOAT &eng)
49
F_CFLOAT3 &fj, F_CFLOAT3 &fk, int eflag, ENERGY_CFLOAT &eng)
51
F_FLOAT r1, rinvsq1, rainv1, gsrainv1, gsrainvsq1, expgsrainv1;
52
F_FLOAT r2, rinvsq2, rainv2, gsrainv2, gsrainvsq2, expgsrainv2;
53
F_FLOAT rinv12, cs, delcs, delcssq, facexp, facrad, frad1, frad2;
54
F_FLOAT facang, facang12, csfacang, csfac1, csfac2;
51
F_CFLOAT r1, rinvsq1, rainv1, gsrainv1, gsrainvsq1, expgsrainv1;
52
F_CFLOAT r2, rinvsq2, rainv2, gsrainv2, gsrainvsq2, expgsrainv2;
53
F_CFLOAT rinv12, cs, delcs, delcssq, facexp, facrad, frad1, frad2;
54
F_CFLOAT facang, facang12, csfacang, csfac1, csfac2;
56
56
r1 = sqrt(delr1.w);
57
57
rinvsq1 = F_F(1.0) / delr1.w;
101
101
__device__ void threebody_fj(int paramij, int paramik, int paramijk,
106
F_FLOAT r1, rinvsq1, rainv1, gsrainv1, gsrainvsq1, expgsrainv1;
107
F_FLOAT r2, rainv2, gsrainv2, expgsrainv2;
108
F_FLOAT rinv12, cs, delcs, delcssq, facexp, facrad, frad1;
109
F_FLOAT facang, facang12, csfacang, csfac1;
106
F_CFLOAT r1, rinvsq1, rainv1, gsrainv1, gsrainvsq1, expgsrainv1;
107
F_CFLOAT r2, rainv2, gsrainv2, expgsrainv2;
108
F_CFLOAT rinv12, cs, delcs, delcssq, facexp, facrad, frad1;
109
F_CFLOAT facang, facang12, csfacang, csfac1;
111
111
r1 = sqrt(delr1.w);
112
112
rinvsq1 = F_F(1.0) / delr1.w;
146
__global__ void Pair_SW_Kernel_TpA_RIJ()//F_FLOAT4* _glob_r_ij,int* _glob_numneigh_red,int* _glob_neighbors_red,int* _glob_neightype_red)
146
__global__ void Pair_SW_Kernel_TpA_RIJ()//F_CFLOAT4* _glob_r_ij,int* _glob_numneigh_red,int* _glob_neighbors_red,int* _glob_neightype_red)
148
148
int ii = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
150
150
if(ii >= _nall) return;
154
F_FLOAT xtmp, ytmp, ztmp;
154
F_CFLOAT xtmp, ytmp, ztmp;
155
155
int itype, jnum, i, j;
157
157
int neigh_red = 0;
197
197
template <int eflag, int vflagm>
198
__global__ void Pair_SW_Kernel_TpA(int eflag_atom, int vflag_atom) //,F_FLOAT* _glob_zeta_ij,F_FLOAT4* _glob_r_ij,int* _glob_numneigh_red,int* _glob_neighbors_red,int* _glob_neightype_red)
198
__global__ void Pair_SW_Kernel_TpA(int eflag_atom, int vflag_atom) //,F_CFLOAT* _glob_zeta_ij,F_CFLOAT4* _glob_r_ij,int* _glob_numneigh_red,int* _glob_neighbors_red,int* _glob_neightype_red)
200
ENERGY_FLOAT evdwl = ENERGY_F(0.0);
202
ENERGY_FLOAT* sharedE = &sharedmem[threadIdx.x];
203
ENERGY_FLOAT* sharedV = &sharedmem[threadIdx.x];
205
F_FLOAT* shared_F_F = (F_FLOAT*) sharedmem;
207
if((eflag || eflag_atom) && (vflagm || vflag_atom)) shared_F_F = (F_FLOAT*) &sharedmem[7 * blockDim.x];
208
else if(eflag) shared_F_F = (F_FLOAT*) &sharedmem[blockDim.x];
209
else if(vflagm) shared_F_F = (F_FLOAT*) &sharedmem[6 * blockDim.x];
200
ENERGY_CFLOAT evdwl = ENERGY_F(0.0);
202
ENERGY_CFLOAT* sharedE = &sharedmem[threadIdx.x];
203
ENERGY_CFLOAT* sharedV = &sharedmem[threadIdx.x];
205
F_CFLOAT* shared_F_F = (F_CFLOAT*) sharedmem;
207
if((eflag || eflag_atom) && (vflagm || vflag_atom)) shared_F_F = (F_CFLOAT*) &sharedmem[7 * blockDim.x];
208
else if(eflag) shared_F_F = (F_CFLOAT*) &sharedmem[blockDim.x];
209
else if(vflagm) shared_F_F = (F_CFLOAT*) &sharedmem[6 * blockDim.x];
211
211
shared_F_F += threadIdx.x;
231
231
//#define jnum_red (static_cast <int> (shared_F_F[3*blockDim.x]))
233
233
int ii = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
234
X_FLOAT4 myxtype_i, myxtype_j, myxtype_k;
235
F_FLOAT4 delij, delik, deljk;
234
X_CFLOAT4 myxtype_i, myxtype_j, myxtype_k;
235
F_CFLOAT4 delij, delik, deljk;
277
277
volatile int iparam_ji = elem2param[(jtype * nelements + itype) * nelements + itype];
279
279
if(delij.w < params_sw[iparam_ij].cutsq) {
280
F_FLOAT dxfp, dyfp, dzfp;
280
F_CFLOAT dxfp, dyfp, dzfp;
281
281
twobody(iparam_ij, delij.w, fpair, eflag, evdwl);
282
282
fxtmp += dxfp = delij.x * fpair;
283
283
fytmp += dyfp = delij.y * fpair;
316
316
vec3_scale(F_F(-1.0), delik, delik);
318
318
if(delik.w <= params_sw[iparam_ijk].cutsq) {
320
320
threebody(iparam_ij, iparam_ik, iparam_ijk,
321
321
delij, delik, fj, fk, eflag, evdwl);
322
322
fxtmp -= fj.x + fk.x;
377
377
vec3_scale(F_F(-1.0), delij, delij);
379
379
if(deljk.w <= params_sw[iparam_jik].cutsq) {
382
382
threebody_fj(iparam_ji, iparam_jk, iparam_jik,
383
383
delij, deljk, fj);