53
53
__global__ void PairEAMCuda_Kernel1(int eflag, int vflag, int eflag_atom, int vflag_atom)
55
ENERGY_FLOAT* sharedE;
56
ENERGY_FLOAT* sharedV = &sharedmem[threadIdx.x];
55
ENERGY_CFLOAT* sharedE;
56
ENERGY_CFLOAT* sharedV = &sharedmem[threadIdx.x];
59
59
if(eflag || eflag_atom) {
74
74
int ii = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
76
X_FLOAT xtmp, ytmp, ztmp;
78
F_FLOAT delx, dely, delz;
76
X_CFLOAT xtmp, ytmp, ztmp;
78
F_CFLOAT delx, dely, delz;
109
109
dely = ytmp - myxtype.y;
110
110
delz = ztmp - myxtype.z;
111
111
int jtype = static_cast <int>(myxtype.w);
112
const F_FLOAT rsq = delx * delx + dely * dely + delz * delz;
112
const F_CFLOAT rsq = delx * delx + dely * dely + delz * delz;
114
114
if(rsq < _cutsq_global) {
115
F_FLOAT p = sqrt(rsq) * _rdr + F_F(1.0);
115
F_CFLOAT p = sqrt(rsq) * _rdr + F_F(1.0);
116
116
int m = static_cast<int>(p);
117
117
m = MIN(m, _nr - 1);
119
119
p = MIN(p, F_F(1.0));
121
121
int k = (static_cast <int>(_type2rhor[jtype * _cuda_ntypes + itype]) * (_nr + 1) + m) * 2;
122
F_FLOAT4 c = fetchRhor(k + 1);
122
F_CFLOAT4 c = fetchRhor(k + 1);
123
123
_rho[i] += ((c.w * p + c.x) * p + c.y) * p + c.z;
130
F_FLOAT p = _rho[i] * _rdrho + F_F(1.0);
130
F_CFLOAT p = _rho[i] * _rdrho + F_F(1.0);
131
131
int m = static_cast<int>(p);
132
132
m = MAX(1, MIN(m, _nrho - 1));
134
134
p = MIN(p, F_F(1.0));
135
F_FLOAT* coeff = &_frho_spline[(static_cast <int>(_type2frho[itype]) * (_nrho + 1) + m) * EAM_COEFF_LENGTH];
135
F_CFLOAT* coeff = &_frho_spline[(static_cast <int>(_type2frho[itype]) * (_nrho + 1) + m) * EAM_COEFF_LENGTH];
136
136
_fp[i] = (coeff[0] * p + coeff[1]) * p + coeff[2];
138
138
if(eflag || eflag_atom) {
148
148
_eatom[i] += sharedmem[threadIdx.x];
150
150
reduceBlock(sharedmem);
151
ENERGY_FLOAT* buffer = (ENERGY_FLOAT*) _buffer;
151
ENERGY_CFLOAT* buffer = (ENERGY_CFLOAT*) _buffer;
152
152
buffer[blockIdx.x * gridDim.y + blockIdx.y] = ENERGY_F(2.0) * sharedmem[0];
156
156
__global__ void PairEAMCuda_Kernel2(int eflag, int vflag, int eflag_atom, int vflag_atom)
158
ENERGY_FLOAT evdwl = ENERGY_F(0.0);
158
ENERGY_CFLOAT evdwl = ENERGY_F(0.0);
160
ENERGY_FLOAT* sharedE;
161
ENERGY_FLOAT* sharedV = &sharedmem[threadIdx.x];
160
ENERGY_CFLOAT* sharedE;
161
ENERGY_CFLOAT* sharedV = &sharedmem[threadIdx.x];
164
164
if(eflag || eflag_atom) {
179
179
int ii = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
181
X_FLOAT xtmp, ytmp, ztmp;
183
F_FLOAT fxtmp, fytmp, fztmp, fpair;
184
F_FLOAT delx, dely, delz;
181
X_CFLOAT xtmp, ytmp, ztmp;
183
F_CFLOAT fxtmp, fytmp, fztmp, fpair;
184
F_CFLOAT delx, dely, delz;
219
219
dely = ytmp - myxtype.y;
220
220
delz = ztmp - myxtype.z;
221
221
int jtype = static_cast <int>(myxtype.w);
222
const F_FLOAT rsq = delx * delx + dely * dely + delz * delz;
222
const F_CFLOAT rsq = delx * delx + dely * dely + delz * delz;
224
224
if(rsq < _cutsq_global) {
225
F_FLOAT r = _SQRT_(rsq);
226
F_FLOAT p = r * _rdr + F_F(1.0);
225
F_CFLOAT r = _SQRT_(rsq);
226
F_CFLOAT p = r * _rdr + F_F(1.0);
227
227
int m = static_cast<int>(p);
228
228
m = MIN(m, _nr - 1);
230
230
p = MIN(p, F_F(1.0));
232
232
int k = (static_cast <int>(_type2rhor[itype * _cuda_ntypes + jtype]) * (_nr + 1) + m) * 2;
233
F_FLOAT4 c = fetchRhor(k);
234
F_FLOAT rhoip = (c.x * p + c.y) * p + c.z;
233
F_CFLOAT4 c = fetchRhor(k);
234
F_CFLOAT rhoip = (c.x * p + c.y) * p + c.z;
235
235
k = (static_cast <int>(_type2rhor[jtype * _cuda_ntypes + itype]) * (_nr + 1) + m) * 2;
236
236
c = fetchRhor(k);
237
F_FLOAT rhojp = (c.x * p + c.y) * p + c.z;
237
F_CFLOAT rhojp = (c.x * p + c.y) * p + c.z;
238
238
k = (static_cast <int>(_type2z2r[itype * _cuda_ntypes + jtype]) * (_nr + 1) + m) * 2;
240
F_FLOAT z2p = (c.x * p + c.y) * p + c.z;
240
F_CFLOAT z2p = (c.x * p + c.y) * p + c.z;
241
241
c = fetchZ2r(k + 1);
242
F_FLOAT z2 = ((c.w * p + c.x) * p + c.y) * p + c.z;
242
F_CFLOAT z2 = ((c.w * p + c.x) * p + c.y) * p + c.z;
244
F_FLOAT recip = F_F(1.0) / r;
245
F_FLOAT phi = z2 * recip;
246
F_FLOAT phip = z2p * recip - phi * recip;
247
F_FLOAT psip = _fp[i] * rhojp + _fp[j] * rhoip + phip;
244
F_CFLOAT recip = F_F(1.0) / r;
245
F_CFLOAT phi = z2 * recip;
246
F_CFLOAT phip = z2p * recip - phi * recip;
247
F_CFLOAT psip = _fp[i] * rhojp + _fp[j] * rhoip + phip;
248
248
fpair = -psip * recip;
250
F_FLOAT dxfp, dyfp, dzfp;
250
F_CFLOAT dxfp, dyfp, dzfp;
251
251
fxtmp += dxfp = delx * fpair;
252
252
fytmp += dyfp = dely * fpair;
253
253
fztmp += dzfp = delz * fpair;
320
320
if(vflag || eflag) PairVirialCompute_A_Kernel(eflag, vflag, 0);
323
__global__ void PairEAMCuda_PackComm_Kernel(int* sendlist, int n, int maxlistlength, int iswap, F_FLOAT* buffer)
323
__global__ void PairEAMCuda_PackComm_Kernel(int* sendlist, int n, int maxlistlength, int iswap, F_CFLOAT* buffer)
325
325
int i = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
326
326
int* list = sendlist + iswap * maxlistlength;
334
__global__ void PairEAMCuda_UnpackComm_Kernel(int n, int first, F_FLOAT* buffer)
334
__global__ void PairEAMCuda_UnpackComm_Kernel(int n, int first, F_CFLOAT* buffer)
336
336
int i = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;