1
/* ----------------------------------------------------------------------
2
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
3
http://lammps.sandia.gov, Sandia National Laboratories
4
Steve Plimpton, sjplimp@sandia.gov
6
Copyright (2003) Sandia Corporation. Under the terms of Contract
7
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
8
certain rights in this software. This software is distributed under
9
the GNU General Public License.
11
See the README file in the top-level LAMMPS directory.
12
------------------------------------------------------------------------- */
14
/* ----------------------------------------------------------------------
15
Contributing author: W. Michael Brown (Intel)
16
------------------------------------------------------------------------- */
22
#include "pair_sw_intel.h"
25
#include "neigh_request.h"
30
#include "neigh_list.h"
36
using namespace LAMMPS_NS;
38
#define FC_PACKED0_T typename ForceConst<flt_t>::fc_packed0
39
#define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1
40
#define FC_PACKED2_T typename ForceConst<flt_t>::fc_packed2
41
#define FC_PACKED3_T typename ForceConst<flt_t>::fc_packed3
46
/* ---------------------------------------------------------------------- */
48
PairSWIntel::PairSWIntel(LAMMPS *lmp) : PairSW(lmp)
50
suffix_flag |= Suffix::INTEL;
53
/* ---------------------------------------------------------------------- */
55
PairSWIntel::~PairSWIntel()
59
/* ---------------------------------------------------------------------- */
61
void PairSWIntel::compute(int eflag, int vflag)
63
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
64
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
66
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
67
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
70
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
77
/* ---------------------------------------------------------------------- */
79
template <class flt_t, class acc_t>
80
void PairSWIntel::compute(int eflag, int vflag,
81
IntelBuffers<flt_t,acc_t> *buffers,
82
const ForceConst<flt_t> &fc)
85
ev_setup(eflag, vflag);
86
} else evflag = vflag_fdotr = 0;
88
const int inum = list->inum;
89
const int nthreads = comm->nthreads;
90
const int host_start = fix->host_start_pair();
91
const int offload_end = fix->offload_end_pair();
92
const int ago = neighbor->ago;
94
if (ago != 0 && fix->separate_buffers() == 0) {
95
fix->start_watch(TIME_PACK);
98
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
102
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
103
nthreads, sizeof(ATOM_T));
104
buffers->thr_pack(ifrom, ito, ago);
107
fix->stop_watch(TIME_PACK);
111
if (evflag || vflag_fdotr) {
113
if (vflag_fdotr) ovflag = 2;
114
else if (vflag) ovflag = 1;
116
eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
117
eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
119
eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
120
eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
123
eval<1,0,0>(1, 0, buffers, fc, 0, offload_end, _offload_pad);
124
eval<1,0,0>(0, 0, buffers, fc, host_start, inum, _host_pad);
127
if (evflag || vflag_fdotr) {
129
if (vflag_fdotr) ovflag = 2;
130
else if (vflag) ovflag = 1;
132
eval<0,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
133
eval<0,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
135
eval<0,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
136
eval<0,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
139
eval<0,0,0>(1, 0, buffers, fc, 0, offload_end, _offload_pad);
140
eval<0,0,0>(0, 0, buffers, fc, host_start, inum, _host_pad);
145
/* ---------------------------------------------------------------------- */
147
template <int SPQ, int EVFLAG, int EFLAG, class flt_t, class acc_t>
148
void PairSWIntel::eval(const int offload, const int vflag,
149
IntelBuffers<flt_t,acc_t> *buffers,
150
const ForceConst<flt_t> &fc, const int astart,
151
const int aend, const int pad_width)
153
const int inum = aend - astart;
154
if (inum == 0) return;
155
int nlocal, nall, minlocal;
156
fix->get_buffern(offload, nlocal, nall, minlocal);
158
const int ago = neighbor->ago;
159
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
161
ATOM_T * _noalias const x = buffers->get_x(offload);
162
const int * _noalias const numneighhalf = buffers->get_atombin();
163
const int * _noalias const numneigh = list->numneigh;
164
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
165
const int * _noalias const firstneigh = buffers->firstneigh(list);
167
const FC_PACKED0_T * _noalias const p2 = fc.p2[0];
168
const FC_PACKED1_T * _noalias const p2f = fc.p2f[0];
169
const FC_PACKED2_T * _noalias const p2e = fc.p2e[0];
170
const FC_PACKED3_T * _noalias const p3 = fc.p3[0][0];
172
flt_t * _noalias const ccachex = buffers->get_ccachex();
173
flt_t * _noalias const ccachey = buffers->get_ccachey();
174
flt_t * _noalias const ccachez = buffers->get_ccachez();
175
flt_t * _noalias const ccachew = buffers->get_ccachew();
176
int * _noalias const ccachei = buffers->get_ccachei();
177
int * _noalias const ccachej = buffers->get_ccachej();
178
const int ccache_stride = _ccache_stride;
180
const int ntypes = atom->ntypes + 1;
181
const int eatom = this->eflag_atom;
183
// Determine how much data to transfer
184
int x_size, q_size, f_stride, ev_size, separate_flag;
185
IP_PRE_get_transfern(ago, /* NEWTON_PAIR*/ 1, EVFLAG, EFLAG, vflag,
186
buffers, offload, fix, separate_flag,
187
x_size, q_size, ev_size, f_stride);
190
FORCE_T * _noalias f_start;
191
acc_t * _noalias ev_global;
192
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
193
const int nthreads = tc;
195
#ifdef _LMP_INTEL_OFFLOAD
196
double *timer_compute = fix->off_watch_pair();
197
int *overflow = fix->get_off_overflow_flag();
199
if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY);
200
#pragma offload target(mic:_cop) if(offload) \
201
in(p2,p2f,p2e,p3:length(0) alloc_if(0) free_if(0)) \
202
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
203
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
204
in(numneigh:length(0) alloc_if(0) free_if(0)) \
205
in(x:length(x_size) alloc_if(0) free_if(0)) \
206
in(numneighhalf:length(0) alloc_if(0) free_if(0)) \
207
in(overflow:length(0) alloc_if(0) free_if(0)) \
208
in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \
209
in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \
210
in(ccache_stride,nthreads,inum,nall,ntypes,vflag,eatom,offload) \
211
in(astart,nlocal,f_stride,minlocal,separate_flag,pad_width) \
212
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
213
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
214
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
219
*timer_compute = MIC_Wtime();
222
IP_PRE_repack_for_offload(1, separate_flag, nlocal, nall,
225
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
228
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
232
#pragma omp parallel default(none) \
233
shared(f_start,f_stride,nlocal,nall,minlocal) \
234
reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
237
int iifrom, iito, tid;
238
IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads);
242
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
243
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
245
const int toffs = tid * ccache_stride;
246
flt_t * _noalias const tdelx = ccachex + toffs;
247
flt_t * _noalias const tdely = ccachey + toffs;
248
flt_t * _noalias const tdelz = ccachez + toffs;
249
flt_t * _noalias const trsq = ccachew + toffs;
250
int * _noalias const tj = ccachei + toffs;
251
int * _noalias const tjtype = ccachej + toffs;
253
// loop over full neighbor list of my atoms
255
for (int i = iifrom; i < iito; ++i) {
256
const flt_t xtmp = x[i].x;
257
const flt_t ytmp = x[i].y;
258
const flt_t ztmp = x[i].z;
259
const int itype = x[i].w;
261
const int ptr_off = itype * ntypes;
262
const FC_PACKED0_T * _noalias const p2i = p2 + ptr_off;
263
const FC_PACKED1_T * _noalias const p2fi = p2f + ptr_off;
264
const FC_PACKED2_T * _noalias const p2ei = p2e + ptr_off;
265
const FC_PACKED3_T * _noalias const p3i = p3 + ptr_off*ntypes;
267
const int * _noalias const jlist = firstneigh + cnumneigh[i];
268
const int jnum = numneigh[i];
269
const int jnumhalf = numneighhalf[i];
271
acc_t fxtmp, fytmp, fztmp, fwtmp;
272
acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5;
273
fxtmp = fytmp = fztmp = (acc_t)0.0;
275
if (EFLAG) fwtmp = sevdwl = (acc_t)0;
276
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
279
int ejnum = 0, ejnumhalf = 0;
280
for (int jj = 0; jj < jnum; jj++) {
283
const flt_t delx = x[j].x - xtmp;
284
const flt_t dely = x[j].y - ytmp;
285
const flt_t delz = x[j].z - ztmp;
286
const int jtype = x[j].w;
287
const flt_t rsq1 = delx * delx + dely * dely + delz * delz;
288
if (rsq1 < p2i[jtype].cutsq) {
294
tjtype[ejnum] = jtype;
296
if (jj < jnumhalf) ejnumhalf++;
299
int ejnum_pad = ejnum;
300
while ( (ejnum_pad % pad_width) != 0) {
301
tdelx[ejnum_pad] = (flt_t)0.0;
302
tdely[ejnum_pad] = (flt_t)0.0;
303
tdelz[ejnum_pad] = (flt_t)0.0;
304
trsq[ejnum_pad] = (flt_t)1.0;
305
tj[ejnum_pad] = nall;
306
tjtype[ejnum_pad] = 0;
310
#if defined(__INTEL_COMPILER)
311
#pragma vector aligned
312
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
313
sv0, sv1, sv2, sv3, sv4, sv5)
315
for (int jj = 0; jj < ejnum_pad; jj++) {
316
acc_t fjxtmp, fjytmp, fjztmp, fjtmp;
317
fjxtmp = fjytmp = fjztmp = (acc_t)0.0;
318
if (EFLAG) fjtmp = (acc_t)0.0;
320
const flt_t delx = tdelx[jj];
321
const flt_t dely = tdely[jj];
322
const flt_t delz = tdelz[jj];
323
const int jtype = tjtype[jj];
324
const flt_t rsq1 = trsq[jj];
326
const flt_t r1 = sqrt(rsq1);
327
const flt_t rinvsq1 = (flt_t)1.0 / rsq1;
328
const flt_t rainv1 = (flt_t)1.0 / (r1 - p2fi[jtype].cut);
330
// two-body interactions, skip half of them
335
rp = (flt_t)1.0 / rp;
338
rp = pow(r1, -p2fi[jtype].powerp);
339
rq = pow(r1, -p2fi[jtype].powerq);
341
const flt_t rainvsq = rainv1 * rainv1 * r1;
342
flt_t expsrainv = exp(p2fi[jtype].sigma * rainv1);
343
if (jj >= ejnumhalf) expsrainv = (flt_t)0.0;
344
const flt_t fpair = (p2fi[jtype].c1 * rp - p2fi[jtype].c2 * rq +
345
(p2fi[jtype].c3 * rp -p2fi[jtype].c4 * rq) *
346
rainvsq) * expsrainv * rinvsq1;
348
fxtmp -= delx * fpair;
349
fytmp -= dely * fpair;
350
fztmp -= delz * fpair;
351
fjxtmp += delx * fpair;
352
fjytmp += dely * fpair;
353
fjztmp += delz * fpair;
358
evdwl = (p2ei[jtype].c5 * rp - p2ei[jtype].c6 * rq) *
362
fwtmp += (acc_t)0.5 * evdwl;
363
fjtmp += (acc_t)0.5 * evdwl;
366
IP_PRE_ev_tally_nbor(vflag, (flt_t)1.0, fpair,
367
-delx, -dely, -delz);
370
/*---------------------------------------------*/
372
flt_t gsrainv1 = p2i[jtype].sigma_gamma * rainv1;
373
flt_t gsrainvsq1 = gsrainv1 * rainv1 / r1;
374
flt_t expgsrainv1 = exp(gsrainv1);
375
const int joffset = jtype * ntypes;
377
for (int kk = 0; kk < ejnum; kk++) {
379
delr2[0] = tdelx[kk];
380
delr2[1] = tdely[kk];
381
delr2[2] = tdelz[kk];
382
const int ktype = tjtype[kk];
383
const flt_t rsq2 = trsq[kk];
385
const flt_t r2 = sqrt(rsq2);
386
const flt_t rinvsq2 = (flt_t)1.0 / rsq2;
387
const flt_t rainv2 = (flt_t)1.0 / (r2 - p2i[ktype].cut);
388
const flt_t gsrainv2 = p2i[ktype].sigma_gamma * rainv2;
389
const flt_t gsrainvsq2 = gsrainv2 * rainv2 / r2;
390
const flt_t expgsrainv2 = exp(gsrainv2);
392
const flt_t rinv12 = (flt_t)1.0 / (r1 * r2);
393
const flt_t cs = (delx * delr2[0] + dely * delr2[1] +
394
delz * delr2[2]) * rinv12;
395
const flt_t delcs = cs - p3i[joffset + ktype].costheta;
396
const flt_t delcssq = delcs*delcs;
399
if (jj == kk) kfactor = (flt_t)0.0;
400
else kfactor = (flt_t)1.0;
402
const flt_t facexp = expgsrainv1*expgsrainv2*kfactor;
403
const flt_t facrad = p3i[joffset + ktype].lambda_epsilon *
405
const flt_t frad1 = facrad*gsrainvsq1;
406
const flt_t frad2 = facrad*gsrainvsq2;
407
const flt_t facang = p3i[joffset + ktype].lambda_epsilon2 *
409
const flt_t facang12 = rinv12*facang;
410
const flt_t csfacang = cs*facang;
411
const flt_t csfac1 = rinvsq1*csfacang;
413
const flt_t fjx = delx*(frad1+csfac1)-delr2[0]*facang12;
414
const flt_t fjy = dely*(frad1+csfac1)-delr2[1]*facang12;
415
const flt_t fjz = delz*(frad1+csfac1)-delr2[2]*facang12;
426
const flt_t evdwl = facrad * (flt_t)0.5;
429
fwtmp += (acc_t)0.33333333 * evdwl;
430
fjtmp += (acc_t)0.33333333 * facrad;
433
IP_PRE_ev_tally_nbor3v(vflag, fjx, fjy, fjz,
437
const int j = tj[jj];
442
if (eatom) f[j].w += fjtmp;
448
IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp);
453
IP_PRE_fdotr_acc_force(1, EVFLAG, EFLAG, vflag, eatom, nall,
454
nlocal, minlocal, nthreads, f_start, f_stride,
459
ev_global[0] = oevdwl;
460
ev_global[1] = (acc_t)0.0;
472
*timer_compute = MIC_Wtime() - *timer_compute;
476
fix->stop_watch(TIME_OFFLOAD_LATENCY);
478
fix->stop_watch(TIME_HOST_PAIR);
481
fix->add_result_array(f_start, ev_global, offload, eatom);
483
fix->add_result_array(f_start, 0, offload);
486
/* ---------------------------------------------------------------------- */
488
void PairSWIntel::allocate()
493
/* ----------------------------------------------------------------------
494
init specific to this pair style
495
------------------------------------------------------------------------- */
497
void PairSWIntel::init_style()
499
PairSW::init_style();
500
neighbor->requests[neighbor->nrequest-1]->intel = 1;
503
int ifix = modify->find_fix("package_intel");
506
"The 'package intel' command is required for /intel styles");
507
fix = static_cast<FixIntel *>(modify->fix[ifix]);
509
fix->pair_init_check();
510
#ifdef _LMP_INTEL_OFFLOAD
511
_cop = fix->coprocessor_number();
514
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
515
pack_force_const(force_const_single, fix->get_mixed_buffers());
516
fix->get_mixed_buffers()->need_tag(1);
517
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
518
pack_force_const(force_const_double, fix->get_double_buffers());
519
fix->get_double_buffers()->need_tag(1);
521
pack_force_const(force_const_single, fix->get_single_buffers());
522
fix->get_single_buffers()->need_tag(1);
525
#ifdef _LMP_INTEL_OFFLOAD
526
if (fix->offload_noghost())
527
error->all(FLERR,"The 'ghost no' option cannot be used with sw/intel.");
530
#if defined(__INTEL_COMPILER)
531
if (__INTEL_COMPILER_BUILD_DATE < 20141023)
532
error->all(FLERR, "Intel compiler versions before "
533
"15 Update 1 not supported for sw/intel");
537
/* ---------------------------------------------------------------------- */
539
template <class flt_t, class acc_t>
540
void PairSWIntel::pack_force_const(ForceConst<flt_t> &fc,
541
IntelBuffers<flt_t,acc_t> *buffers)
544
#ifdef _LMP_INTEL_OFFLOAD
545
if (_cop >= 0) off_ccache = 1;
547
buffers->grow_ccache(off_ccache, comm->nthreads);
548
_ccache_stride = buffers->ccache_stride();
550
int tp1 = atom->ntypes + 1;
551
fc.set_ntypes(tp1,memory,_cop);
552
buffers->set_ntypes(tp1);
553
flt_t **cutneighsq = buffers->get_cutneighsq();
555
// Repeat cutsq calculation because done after call to init_style
556
double cut, cutneigh;
557
for (int i = 1; i <= atom->ntypes; i++) {
558
for (int j = i; j <= atom->ntypes; j++) {
559
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
561
cutneigh = cut + neighbor->skin;
562
cutsq[i][j] = cutsq[j][i] = cut*cut;
563
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
569
for (int ii = 0; ii < tp1; ii++) {
571
for (int jj = 0; jj < tp1; jj++) {
573
if (i < 0 || j < 0 || ii == 0 || jj == 0) {
574
fc.p2[ii][jj].cutsq = 0;
575
fc.p2[ii][jj].cut = 0;
576
fc.p2[ii][jj].sigma_gamma = 0;
577
fc.p2f[ii][jj].cut = 0;
578
fc.p2f[ii][jj].powerp = 0;
579
fc.p2f[ii][jj].powerq = 0;
580
fc.p2f[ii][jj].sigma = 0;
581
fc.p2f[ii][jj].c1 = 0;
582
fc.p2f[ii][jj].c2 = 0;
583
fc.p2f[ii][jj].c3 = 0;
584
fc.p2f[ii][jj].c4 = 0;
585
fc.p2e[ii][jj].c5 = 0;
586
fc.p2e[ii][jj].c6 = 0;
588
int ijparam = elem2param[i][j][j];
589
fc.p2[ii][jj].cutsq = params[ijparam].cutsq;
590
fc.p2[ii][jj].cut = params[ijparam].cut;
591
fc.p2[ii][jj].sigma_gamma = params[ijparam].sigma_gamma;
592
fc.p2f[ii][jj].cut = params[ijparam].cut;
593
fc.p2f[ii][jj].powerp = params[ijparam].powerp;
594
fc.p2f[ii][jj].powerq = params[ijparam].powerq;
595
fc.p2f[ii][jj].sigma = params[ijparam].sigma;
596
fc.p2f[ii][jj].c1 = params[ijparam].c1;
597
fc.p2f[ii][jj].c2 = params[ijparam].c2;
598
fc.p2f[ii][jj].c3 = params[ijparam].c3;
599
fc.p2f[ii][jj].c4 = params[ijparam].c4;
600
fc.p2e[ii][jj].c5 = params[ijparam].c5;
601
fc.p2e[ii][jj].c6 = params[ijparam].c6;
603
double cutcut = params[ijparam].cut * params[ijparam].cut;
604
if (params[ijparam].cutsq >= cutcut)
605
fc.p2[ii][jj].cutsq *= 0.98;
607
if (params[ijparam].powerp != 4.0 || params[ijparam].powerq != 0.0)
611
for (int kk = 0; kk < tp1; kk++) {
613
if (i < 0 || j < 0 || k < 0 || ii == 0 || jj == 0 || kk == 0) {
614
fc.p3[ii][jj][kk].costheta = 0;
615
fc.p3[ii][jj][kk].lambda_epsilon = 0;
616
fc.p3[ii][jj][kk].lambda_epsilon2 = 0;
618
int ijkparam = elem2param[i][j][k];
619
fc.p3[ii][jj][kk].costheta = params[ijkparam].costheta;
620
fc.p3[ii][jj][kk].lambda_epsilon = params[ijkparam].lambda_epsilon;
621
fc.p3[ii][jj][kk].lambda_epsilon2 = params[ijkparam].lambda_epsilon2;
630
if (INTEL_NBOR_PAD > 1)
631
_host_pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t);
633
#ifdef _LMP_INTEL_OFFLOAD
634
if (_cop < 0) return;
635
FC_PACKED0_T *op2 = fc.p2[0];
636
FC_PACKED1_T *op2f = fc.p2f[0];
637
FC_PACKED2_T *op2e = fc.p2e[0];
638
FC_PACKED3_T *op3 = fc.p3[0][0];
639
flt_t * ocutneighsq = cutneighsq[0];
640
int tp1sq = tp1 * tp1;
641
int tp1cu = tp1sq * tp1;
642
if (op2 != NULL && op2f != NULL && op2e != NULL && op3 != NULL &&
643
ocutneighsq != NULL) {
644
#pragma offload_transfer target(mic:_cop) \
645
in(op2,op2f,op2e: length(tp1sq) alloc_if(0) free_if(0)) \
646
in(op3: length(tp1cu) alloc_if(0) free_if(0)) \
647
in(ocutneighsq: length(tp1sq))
652
/* ---------------------------------------------------------------------- */
654
template <class flt_t>
655
void PairSWIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
658
if (ntypes != _ntypes) {
660
fc_packed0 *op2 = p2[0];
661
fc_packed1 *op2f = p2f[0];
662
fc_packed2 *op2e = p2e[0];
663
fc_packed3 *op3 = p3[0][0];
665
#ifdef _LMP_INTEL_OFFLOAD
666
if (op2 != NULL && op2f != NULL && op2e != NULL && op3 != NULL &&
668
#pragma offload_transfer target(mic:_cop) \
669
nocopy(op2, op2f, op2e, op3: alloc_if(0) free_if(1))
673
_memory->destroy(op2);
674
_memory->destroy(op2f);
675
_memory->destroy(op2e);
676
_memory->destroy(op3);
680
memory->create(p2,ntypes,ntypes,"fc.p2");
681
memory->create(p2f,ntypes,ntypes,"fc.p2f");
682
memory->create(p2e,ntypes,ntypes,"fc.p2e");
683
memory->create(p3,ntypes,ntypes,ntypes,"fc.p3");
685
#ifdef _LMP_INTEL_OFFLOAD
686
fc_packed0 *op2 = p2[0];
687
fc_packed1 *op2f = p2f[0];
688
fc_packed2 *op2e = p2e[0];
689
fc_packed3 *op3 = p3[0][0];
690
int tp1sq = ntypes * ntypes;
691
int tp1cu = tp1sq * ntypes;
692
if (op2 != NULL && op2f != NULL && op2e != NULL && op3 != NULL &&
694
#pragma offload_transfer target(mic:cop) \
695
nocopy(op2,op2f,op2e: length(tp1sq) alloc_if(1) free_if(0)) \
696
nocopy(op3: length(tp1cu) alloc_if(1) free_if(0))