1
// **************************************************************************
6
// Device code for acceleration of the coul/debye pair style
8
// __________________________________________________________________________
9
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
10
// __________________________________________________________________________
13
// email : ndtrung@umich.edu
14
// ***************************************************************************/
18
#include "lal_aux_fun1.h"
19
#ifndef _DOUBLE_DOUBLE
20
texture<float4> pos_tex;
23
texture<int4,1> pos_tex;
32
__kernel void k_coul_debye(const __global numtyp4 *restrict x_,
33
const __global numtyp *restrict scale,
35
const __global numtyp *restrict sp_cl_in,
36
const __global int *dev_nbor,
37
const __global int *dev_packed,
38
__global acctyp4 *restrict ans,
39
__global acctyp *restrict engv,
40
const int eflag, const int vflag, const int inum,
42
const __global numtyp *restrict q_ ,
43
const __global numtyp *restrict cutsq,
44
const numtyp qqrd2e, const numtyp kappa,
45
const int t_per_atom) {
47
atom_info(t_per_atom,ii,tid,offset);
49
__local numtyp sp_cl[4];
55
acctyp energy=(acctyp)0;
56
acctyp e_coul=(acctyp)0;
58
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
60
for (int i=0; i<6; i++)
64
int i, numj, nbor, nbor_end;
66
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
67
n_stride,nbor_end,nbor);
69
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
70
numtyp qtmp; fetch(qtmp,i,q_tex);
74
for ( ; nbor<nbor_end; nbor+=n_stride) {
76
int j=dev_packed[nbor];
77
factor_coul = sp_cl[sbmask(j)];
80
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
84
numtyp delx = ix.x-jx.x;
85
numtyp dely = ix.y-jx.y;
86
numtyp delz = ix.z-jx.z;
87
numtyp rsq = delx*delx+dely*dely+delz*delz;
89
int mtype=itype*lj_types+jtype;
90
if (rsq<cutsq[mtype]) {
91
numtyp r2inv=ucl_recip(rsq);
92
numtyp forcecoul, force, r, rinv, screening;
96
fetch(screening,j,q_tex);
97
screening *= ucl_exp(-kappa*r);
98
forcecoul = qqrd2e*qtmp*scale[mtype]*(kappa+rinv)*screening*factor_coul;
99
force = forcecoul * r2inv;
106
e_coul+=qqrd2e*scale[mtype]*qtmp*rinv*screening*factor_coul;
109
virial[0] += delx*delx*force;
110
virial[1] += dely*dely*force;
111
virial[2] += delz*delz*force;
112
virial[3] += delx*dely*force;
113
virial[4] += delx*delz*force;
114
virial[5] += dely*delz*force;
119
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
124
__kernel void k_coul_debye_fast(const __global numtyp4 *restrict x_,
125
const __global numtyp *restrict scale_in,
126
const __global numtyp *restrict sp_cl_in,
127
const __global int *dev_nbor,
128
const __global int *dev_packed,
129
__global acctyp4 *restrict ans,
130
__global acctyp *restrict engv,
131
const int eflag, const int vflag, const int inum,
132
const int nbor_pitch,
133
const __global numtyp *restrict q_,
134
const __global numtyp *restrict _cutsq,
135
const numtyp qqrd2e, const numtyp kappa,
136
const int t_per_atom) {
138
atom_info(t_per_atom,ii,tid,offset);
140
__local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
141
__local numtyp cutsq[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
142
__local numtyp sp_cl[4];
144
sp_cl[tid]=sp_cl_in[tid];
145
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
146
scale[tid]=scale_in[tid];
147
cutsq[tid]=_cutsq[tid];
150
acctyp energy=(acctyp)0;
151
acctyp e_coul=(acctyp)0;
153
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
155
for (int i=0; i<6; i++)
161
int i, numj, nbor, nbor_end;
162
__local int n_stride;
163
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
164
n_stride,nbor_end,nbor);
166
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
167
numtyp qtmp; fetch(qtmp,i,q_tex);
169
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
172
for ( ; nbor<nbor_end; nbor+=n_stride) {
174
int j=dev_packed[nbor];
175
factor_coul = sp_cl[sbmask(j)];
178
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
179
int mtype=itype+jx.w;
182
numtyp delx = ix.x-jx.x;
183
numtyp dely = ix.y-jx.y;
184
numtyp delz = ix.z-jx.z;
185
numtyp rsq = delx*delx+dely*dely+delz*delz;
187
if (rsq<cutsq[mtype]) {
188
numtyp r2inv=ucl_recip(rsq);
189
numtyp forcecoul, force, r, rinv, screening;
193
fetch(screening,j,q_tex);
194
screening *= ucl_exp(-kappa*r);
195
forcecoul = qqrd2e*scale[mtype]*qtmp*(kappa+rinv)*screening*factor_coul;
196
force = forcecoul * r2inv;
203
e_coul+=qqrd2e*scale[mtype]*qtmp*rinv*screening*factor_coul;
206
virial[0] += delx*delx*force;
207
virial[1] += dely*dely*force;
208
virial[2] += delz*delz*force;
209
virial[3] += delx*dely*force;
210
virial[4] += delx*delz*force;
211
virial[5] += dely*delz*force;
216
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,