1
/* ----------------------------------------------------------------------
2
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
3
http://lammps.sandia.gov, Sandia National Laboratories
4
Steve Plimpton, sjplimp@sandia.gov
6
Copyright (2003) Sandia Corporation. Under the terms of Contract
7
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
8
certain rights in this software. This software is distributed under
9
the GNU General Public License.
11
See the README file in the top-level LAMMPS directory.
12
------------------------------------------------------------------------- */
14
/* ----------------------------------------------------------------------
15
Contributing authors: Aidan Thompson, Christian Trott, SNL
16
------------------------------------------------------------------------- */
20
#include "math_const.h"
21
#include "math_extra.h"
24
#include "openmp_snap.h"
32
using namespace LAMMPS_NS;
33
using namespace MathConst;
35
/* ----------------------------------------------------------------------
37
this implementation is based on the method outlined
38
in Bartok[1], using formulae from VMK[2].
40
for the Clebsch-Gordan coefficients, we
41
convert the VMK half-integral labels
42
a, b, c, alpha, beta, gamma
43
to array offsets j1, j2, j, m1, m2, m
44
using the following relations:
50
m1 = alpha+a 2*alpha = 2*m1 - j1
51
m2 = beta+b or 2*beta = 2*m2 - j2
52
m = gamma+c 2*gamma = 2*m - j
66
and the requirement that
67
a+b+c be integral implies that
75
2*m - j = 2*m1 - j1 + 2*m2 - j2
77
Similarly, for the Wigner U-functions U(J,m,m') we
78
convert the half-integral labels J,m,m' to
79
array offsets j,ma,mb:
90
For the bispectrum components B(J1,J2,J) we convert to:
98
|J1-J2| <= J <= J1+J2, for j1+j2+j integral
102
|j1-j2| <= j <= j1+j2, for j1+j2+j even integer
106
j = |j1-j2|, |j1-j2|+2,...,j1+j2-2,j1+j2
108
[1] Albert Bartok-Partay, "Gaussian Approximation..."
109
Doctoral Thesis, Cambrindge University, (2009)
111
[2] D. A. Varshalovich, A. N. Moskalev, and V. K. Khersonskii,
112
"Quantum Theory of Angular Momentum," World Scientific (1988)
114
------------------------------------------------------------------------- */
116
SNA::SNA(LAMMPS* lmp, double rfac0_in,
117
int twojmax_in, int diagonalstyle_in, int use_shared_arrays_in,
118
double rmin0_in, int switch_flag_in) : Pointers(lmp)
122
use_shared_arrays = use_shared_arrays_in;
125
switch_flag = switch_flag_in;
127
twojmax = twojmax_in;
128
diagonalstyle = diagonalstyle_in;
130
ncoeff = compute_ncoeff();
132
create_twojmax_arrays();
136
memory->create(bvec, ncoeff, "pair:bvec");
137
memory->create(dbvec, ncoeff, 3, "pair:dbvec");
146
timers = new double[20];
147
for(int i = 0; i < 20; i++) timers[i] = 0;
156
/* ---------------------------------------------------------------------- */
160
if(!use_shared_arrays) {
161
destroy_twojmax_arrays();
162
memory->destroy(rij);
163
memory->destroy(inside);
165
memory->destroy(rcutij);
166
memory->destroy(bvec);
167
memory->destroy(dbvec);
172
void SNA::build_indexlist()
174
if(diagonalstyle == 0) {
177
for(int j1 = 0; j1 <= twojmax; j1++)
178
for(int j2 = 0; j2 <= j1; j2++)
179
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
182
// indexList can be changed here
184
idxj = new SNA_LOOPINDICES[idxj_count];
185
idxj_max = idxj_count;
189
for(int j1 = 0; j1 <= twojmax; j1++)
190
for(int j2 = 0; j2 <= j1; j2++)
191
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
192
idxj[idxj_count].j1 = j1;
193
idxj[idxj_count].j2 = j2;
194
idxj[idxj_count].j = j;
199
if(diagonalstyle == 1) {
202
for(int j1 = 0; j1 <= twojmax; j1++)
203
for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) {
207
// indexList can be changed here
209
idxj = new SNA_LOOPINDICES[idxj_count];
210
idxj_max = idxj_count;
214
for(int j1 = 0; j1 <= twojmax; j1++)
215
for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) {
216
idxj[idxj_count].j1 = j1;
217
idxj[idxj_count].j2 = j1;
218
idxj[idxj_count].j = j;
223
if(diagonalstyle == 2) {
226
for(int j1 = 0; j1 <= twojmax; j1++) {
230
// indexList can be changed here
232
idxj = new SNA_LOOPINDICES[idxj_count];
233
idxj_max = idxj_count;
237
for(int j1 = 0; j1 <= twojmax; j1++) {
238
idxj[idxj_count].j1 = j1;
239
idxj[idxj_count].j2 = j1;
240
idxj[idxj_count].j = j1;
245
if(diagonalstyle == 3) {
248
for(int j1 = 0; j1 <= twojmax; j1++)
249
for(int j2 = 0; j2 <= j1; j2++)
250
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
251
if (j >= j1) idxj_count++;
253
// indexList can be changed here
255
idxj = new SNA_LOOPINDICES[idxj_count];
256
idxj_max = idxj_count;
260
for(int j1 = 0; j1 <= twojmax; j1++)
261
for(int j2 = 0; j2 <= j1; j2++)
262
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
264
idxj[idxj_count].j1 = j1;
265
idxj[idxj_count].j2 = j2;
266
idxj[idxj_count].j = j;
272
/* ---------------------------------------------------------------------- */
276
init_clebsch_gordan();
281
void SNA::grow_rij(int newnmax)
283
if(newnmax <= nmax) return;
287
if(!use_shared_arrays) {
288
memory->destroy(rij);
289
memory->destroy(inside);
291
memory->destroy(rcutij);
292
memory->create(rij, nmax, 3, "pair:rij");
293
memory->create(inside, nmax, "pair:inside");
294
memory->create(wj, nmax, "pair:wj");
295
memory->create(rcutij, nmax, "pair:rcutij");
298
/* ----------------------------------------------------------------------
299
compute Ui by summing over neighbors j
300
------------------------------------------------------------------------- */
302
void SNA::compute_ui(int jnum)
304
double rsq, r, x, y, z, z0, theta0;
306
// utot(j,ma,mb) = 0 for all j,ma,ma
307
// utot(j,ma,ma) = 1 for all j,ma
308
// for j in neighbors of i:
309
// compute r0 = (x,y,z,z0)
310
// utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
313
addself_uarraytot(wself);
316
clock_gettime(CLOCK_REALTIME, &starttime);
319
for(int j = 0; j < jnum; j++) {
323
rsq = x * x + y * y + z * z;
326
theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0);
327
// theta0 = (r - rmin0) * rscale0;
328
z0 = r / tan(theta0);
330
compute_uarray(x, y, z, z0, r);
331
add_uarraytot(r, wj[j], rcutij[j]);
335
clock_gettime(CLOCK_REALTIME, &endtime);
336
timers[0] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
337
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
342
void SNA::compute_ui_omp(int jnum, int sub_threads)
344
double rsq, r, x, y, z, z0, theta0;
346
// utot(j,ma,mb) = 0 for all j,ma,ma
347
// utot(j,ma,ma) = 1 for all j,ma
348
// for j in neighbors of i:
349
// compute r0 = (x,y,z,z0)
350
// utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
353
addself_uarraytot(wself);
355
for(int j = 0; j < jnum; j++) {
359
rsq = x * x + y * y + z * z;
361
theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0);
362
// theta0 = (r - rmin0) * rscale0;
363
z0 = r / tan(theta0);
364
omp_set_num_threads(sub_threads);
367
#pragma omp parallel shared(x,y,z,z0,r,sub_threads) default(none)
370
compute_uarray_omp(x, y, z, z0, r, sub_threads);
372
add_uarraytot(r, wj[j], rcutij[j]);
378
/* ----------------------------------------------------------------------
379
compute Zi by summing over products of Ui
380
------------------------------------------------------------------------- */
382
void SNA::compute_zi()
384
// for j1 = 0,...,twojmax
385
// for j2 = 0,twojmax
386
// for j = |j1-j2|,Min(twojmax,j1+j2),2
388
// for mb = 0,...,jmid
389
// z(j1,j2,j,ma,mb) = 0
390
// for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
392
// ma2 = ma-ma1+(j1+j2-j)/2;
393
// for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
394
// mb2 = mb-mb1+(j1+j2-j)/2;
395
// sumb1 += cg(j1,mb1,j2,mb2,j) *
396
// u(j1,ma1,mb1) * u(j2,ma2,mb2)
397
// z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j)
400
clock_gettime(CLOCK_REALTIME, &starttime);
403
// compute_dbidrj() requires full j1/j2/j chunk of z elements
404
// use zarray j1/j2 symmetry
406
for(int j1 = 0; j1 <= twojmax; j1++)
407
for(int j2 = 0; j2 <= j1; j2++) {
408
for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
409
double sumb1_r, sumb1_i;
411
for(int mb = 0; 2*mb <= j; mb++)
412
for(int ma = 0; ma <= j; ma++) {
413
zarray_r[j1][j2][j][ma][mb] = 0.0;
414
zarray_i[j1][j2][j][ma][mb] = 0.0;
416
for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2);
417
ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) {
421
ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2;
423
for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2);
424
mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) {
426
mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2;
427
sumb1_r += cgarray[j1][j2][j][mb1][mb2] *
428
(uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] -
429
uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]);
430
sumb1_i += cgarray[j1][j2][j][mb1][mb2] *
431
(uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] +
432
uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]);
433
} // end loop over mb1
435
zarray_r[j1][j2][j][ma][mb] +=
436
sumb1_r * cgarray[j1][j2][j][ma1][ma2];
437
zarray_i[j1][j2][j][ma][mb] +=
438
sumb1_i * cgarray[j1][j2][j][ma1][ma2];
439
} // end loop over ma1
440
} // end loop over ma, mb
442
} // end loop over j1, j2
445
clock_gettime(CLOCK_REALTIME, &endtime);
446
timers[1] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
447
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
451
void SNA::compute_zi_omp(int sub_threads)
453
// for j1 = 0,...,twojmax
454
// for j2 = 0,twojmax
455
// for j = |j1-j2|,Min(twojmax,j1+j2),2
458
// z(j1,j2,j,ma,mb) = 0
459
// for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
461
// ma2 = ma-ma1+(j1+j2-j)/2;
462
// for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
463
// mb2 = mb-mb1+(j1+j2-j)/2;
464
// sumb1 += cg(j1,mb1,j2,mb2,j) *
465
// u(j1,ma1,mb1) * u(j2,ma2,mb2)
466
// z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j)
468
if(omp_in_parallel())
469
omp_set_num_threads(sub_threads);
471
// compute_dbidrj() requires full j1/j2/j chunk of z elements
472
// use zarray j1/j2 symmetry
475
#pragma omp parallel for schedule(auto) default(none)
477
for(int j1 = 0; j1 <= twojmax; j1++)
478
for(int j2 = 0; j2 <= j1; j2++)
479
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
481
double sumb1_r, sumb1_i;
484
for(int ma = 0; ma <= j; ma++)
485
for(int mb = 0; mb <= j; mb++) {
486
zarray_r[j1][j2][j][ma][mb] = 0.0;
487
zarray_i[j1][j2][j][ma][mb] = 0.0;
489
for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2);
490
ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) {
494
ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2;
496
for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2);
497
mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) {
499
mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2;
500
sumb1_r += cgarray[j1][j2][j][mb1][mb2] *
501
(uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] -
502
uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]);
503
sumb1_i += cgarray[j1][j2][j][mb1][mb2] *
504
(uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] +
505
uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]);
508
zarray_r[j1][j2][j][ma][mb] +=
509
sumb1_r * cgarray[j1][j2][j][ma1][ma2];
510
zarray_i[j1][j2][j][ma][mb] +=
511
sumb1_i * cgarray[j1][j2][j][ma1][ma2];
517
/* ----------------------------------------------------------------------
518
compute Bi by summing conj(Ui)*Zi
519
------------------------------------------------------------------------- */
521
void SNA::compute_bi()
523
// for j1 = 0,...,twojmax
524
// for j2 = 0,twojmax
525
// for j = |j1-j2|,Min(twojmax,j1+j2),2
527
// for mb = 0,...,jmid
530
// 2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb)
533
clock_gettime(CLOCK_REALTIME, &starttime);
536
for(int j1 = 0; j1 <= twojmax; j1++)
537
for(int j2 = 0; j2 <= j1; j2++) {
538
for(int j = abs(j1 - j2);
539
j <= MIN(twojmax, j1 + j2); j += 2) {
540
barray[j1][j2][j] = 0.0;
542
for(int mb = 0; 2*mb < j; mb++) {
543
for(int ma = 0; ma <= j; ma++) {
545
uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] +
546
uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb];
550
// For j even, special treatment for middle column
554
for(int ma = 0; ma < mb; ma++)
556
uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] +
557
uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb];
560
(uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] +
561
uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb])*0.5;
564
barray[j1][j2][j] *= 2.0;
569
clock_gettime(CLOCK_REALTIME, &endtime);
570
timers[2] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
571
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
576
/* ----------------------------------------------------------------------
577
copy Bi array to a vector
578
------------------------------------------------------------------------- */
580
void SNA::copy_bi2bvec()
582
int ncount, j1, j2, j;
586
for(j1 = 0; j1 <= twojmax; j1++)
587
if(diagonalstyle == 0) {
588
for(j2 = 0; j2 <= j1; j2++)
589
for(j = abs(j1 - j2);
590
j <= MIN(twojmax, j1 + j2); j += 2) {
591
bvec[ncount] = barray[j1][j2][j];
594
} else if(diagonalstyle == 1) {
596
for(j = abs(j1 - j2);
597
j <= MIN(twojmax, j1 + j2); j += 2) {
598
bvec[ncount] = barray[j1][j2][j];
601
} else if(diagonalstyle == 2) {
603
bvec[ncount] = barray[j1][j2][j];
605
} else if(diagonalstyle == 3) {
606
for(j2 = 0; j2 <= j1; j2++)
607
for(j = abs(j1 - j2);
608
j <= MIN(twojmax, j1 + j2); j += 2)
610
bvec[ncount] = barray[j1][j2][j];
616
/* ----------------------------------------------------------------------
617
calculate derivative of Ui w.r.t. atom j
618
------------------------------------------------------------------------- */
620
void SNA::compute_duidrj(double* rij, double wj, double rcut)
622
double rsq, r, x, y, z, z0, theta0, cs, sn;
628
rsq = x * x + y * y + z * z;
630
double rscale0 = rfac0 * MY_PI / (rcut - rmin0);
631
theta0 = (r - rmin0) * rscale0;
635
dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
638
clock_gettime(CLOCK_REALTIME, &starttime);
641
compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut);
644
clock_gettime(CLOCK_REALTIME, &endtime);
645
timers[3] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
646
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
651
/* ----------------------------------------------------------------------
652
calculate derivative of Bi w.r.t. atom j
653
variant using indexlist for j1,j2,j
654
variant not using symmetry relation
655
------------------------------------------------------------------------- */
657
void SNA::compute_dbidrj_nonsymm()
659
// for j1 = 0,...,twojmax
660
// for j2 = 0,twojmax
661
// for j = |j1-j2|,Min(twojmax,j1+j2),2
666
// for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
668
// ma2 = ma-ma1+(j1+j2-j)/2;
669
// for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
670
// mb2 = mb-mb1+(j1+j2-j)/2;
671
// sumb1 += cg(j1,mb1,j2,mb2,j) *
672
// (dudr(j1,ma1,mb1) * u(j2,ma2,mb2) +
673
// u(j1,ma1,mb1) * dudr(j2,ma2,mb2))
674
// dzdr += sumb1*cg(j1,ma1,j2,ma2,j)
676
// Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) +
677
// Conj(u(j,ma,mb))*dzdr
680
double* dudr_r, *dudr_i;
681
double sumb1_r[3], sumb1_i[3], dzdr_r[3], dzdr_i[3];
685
clock_gettime(CLOCK_REALTIME, &starttime);
688
for(int JJ = 0; JJ < idxj_max; JJ++) {
689
const int j1 = idxj[JJ].j1;
690
const int j2 = idxj[JJ].j2;
691
const int j = idxj[JJ].j;
693
dbdr = dbarray[j1][j2][j];
698
double** *j1duarray_r = duarray_r[j1];
699
double** *j2duarray_r = duarray_r[j2];
700
double** *j1duarray_i = duarray_i[j1];
701
double** *j2duarray_i = duarray_i[j2];
702
double** j1uarraytot_r = uarraytot_r[j1];
703
double** j2uarraytot_r = uarraytot_r[j2];
704
double** j1uarraytot_i = uarraytot_i[j1];
705
double** j2uarraytot_i = uarraytot_i[j2];
706
double** j1j2jcgarray = cgarray[j1][j2][j];
708
for(int ma = 0; ma <= j; ma++)
709
for(int mb = 0; mb <= j; mb++) {
717
const int max_mb1 = MIN(j1, (2 * mb - j + j2 + j1) / 2) + 1;
718
const int max_ma1 = MIN(j1, (2 * ma - j + j2 + j1) / 2) + 1;
720
for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2);
721
ma1 < max_ma1; ma1++) {
723
ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2;
731
//inside loop 54 operations (mul and add)
732
for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2),
733
mb2 = mb + (j1 + j2 - j) / 2 - mb1;
734
mb1 < max_mb1; mb1++, mb2--) {
736
double* dudr1_r, *dudr1_i, *dudr2_r, *dudr2_i;
738
dudr1_r = j1duarray_r[ma1][mb1];
739
dudr2_r = j2duarray_r[ma2][mb2];
740
dudr1_i = j1duarray_i[ma1][mb1];
741
dudr2_i = j2duarray_i[ma2][mb2];
743
const double cga_mb1mb2 = j1j2jcgarray[mb1][mb2];
744
const double uat_r_ma2mb2 = cga_mb1mb2 * j2uarraytot_r[ma2][mb2];
745
const double uat_r_ma1mb1 = cga_mb1mb2 * j1uarraytot_r[ma1][mb1];
746
const double uat_i_ma2mb2 = cga_mb1mb2 * j2uarraytot_i[ma2][mb2];
747
const double uat_i_ma1mb1 = cga_mb1mb2 * j1uarraytot_i[ma1][mb1];
749
for(int k = 0; k < 3; k++) {
750
sumb1_r[k] += dudr1_r[k] * uat_r_ma2mb2;
751
sumb1_r[k] -= dudr1_i[k] * uat_i_ma2mb2;
752
sumb1_i[k] += dudr1_r[k] * uat_i_ma2mb2;
753
sumb1_i[k] += dudr1_i[k] * uat_r_ma2mb2;
755
sumb1_r[k] += dudr2_r[k] * uat_r_ma1mb1;
756
sumb1_r[k] -= dudr2_i[k] * uat_i_ma1mb1;
757
sumb1_i[k] += dudr2_r[k] * uat_i_ma1mb1;
758
sumb1_i[k] += dudr2_i[k] * uat_r_ma1mb1;
760
} // end loop over mb1,mb2
762
// dzdr += sumb1*cg(j1,ma1,j2,ma2,j)
764
dzdr_r[0] += sumb1_r[0] * j1j2jcgarray[ma1][ma2];
765
dzdr_r[1] += sumb1_r[1] * j1j2jcgarray[ma1][ma2];
766
dzdr_r[2] += sumb1_r[2] * j1j2jcgarray[ma1][ma2];
767
dzdr_i[0] += sumb1_i[0] * j1j2jcgarray[ma1][ma2];
768
dzdr_i[1] += sumb1_i[1] * j1j2jcgarray[ma1][ma2];
769
dzdr_i[2] += sumb1_i[2] * j1j2jcgarray[ma1][ma2];
770
} // end loop over ma1,ma2
773
// Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) +
774
// Conj(u(j,ma,mb))*dzdr
776
dudr_r = duarray_r[j][ma][mb];
777
dudr_i = duarray_i[j][ma][mb];
779
for(int k = 0; k < 3; k++)
781
(dudr_r[k] * zarray_r[j1][j2][j][ma][mb] +
782
dudr_i[k] * zarray_i[j1][j2][j][ma][mb]) +
783
(uarraytot_r[j][ma][mb] * dzdr_r[k] +
784
uarraytot_i[j][ma][mb] * dzdr_i[k]);
785
} //end loop over ma mb
787
} //end loop over j1 j2 j
790
clock_gettime(CLOCK_REALTIME, &endtime);
791
timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
792
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
797
/* ----------------------------------------------------------------------
798
calculate derivative of Bi w.r.t. atom j
799
variant using indexlist for j1,j2,j
800
variant using symmetry relation
801
------------------------------------------------------------------------- */
803
void SNA::compute_dbidrj()
805
// for j1 = 0,...,twojmax
806
// for j2 = 0,twojmax
807
// for j = |j1-j2|,Min(twojmax,j1+j2),2
809
// for mb = 0,...,jmid
812
// Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb)
813
// dbdr(j1,j2,j) += 2*zdb
815
// for mb1 = 0,...,j1mid
816
// for ma1 = 0,...,j1
818
// Conj(dudr(j1,ma1,mb1))*z(j1,j2,j,ma1,mb1)
819
// dbdr(j1,j2,j) += 2*zdb*(j+1)/(j1+1)
821
// for mb2 = 0,...,j2mid
822
// for ma2 = 0,...,j2
824
// Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2)
825
// dbdr(j1,j2,j) += 2*zdb*(j+1)/(j2+1)
828
double* dudr_r, *dudr_i;
830
double** jjjzarray_r;
831
double** jjjzarray_i;
832
double jjjmambzarray_r;
833
double jjjmambzarray_i;
836
clock_gettime(CLOCK_REALTIME, &starttime);
839
for(int JJ = 0; JJ < idxj_max; JJ++) {
840
const int j1 = idxj[JJ].j1;
841
const int j2 = idxj[JJ].j2;
842
const int j = idxj[JJ].j;
844
dbdr = dbarray[j1][j2][j];
849
// Sum terms Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb)
851
for(int k = 0; k < 3; k++)
854
// use zarray j1/j2 symmetry (optional)
857
jjjzarray_r = zarray_r[j1][j2][j];
858
jjjzarray_i = zarray_i[j1][j2][j];
860
jjjzarray_r = zarray_r[j2][j1][j];
861
jjjzarray_i = zarray_i[j2][j1][j];
864
for(int mb = 0; 2*mb < j; mb++)
865
for(int ma = 0; ma <= j; ma++) {
867
dudr_r = duarray_r[j][ma][mb];
868
dudr_i = duarray_i[j][ma][mb];
869
jjjmambzarray_r = jjjzarray_r[ma][mb];
870
jjjmambzarray_i = jjjzarray_i[ma][mb];
871
for(int k = 0; k < 3; k++)
873
dudr_r[k] * jjjmambzarray_r +
874
dudr_i[k] * jjjmambzarray_i;
876
} //end loop over ma mb
878
// For j even, handle middle column
882
for(int ma = 0; ma < mb; ma++) {
883
dudr_r = duarray_r[j][ma][mb];
884
dudr_i = duarray_i[j][ma][mb];
885
jjjmambzarray_r = jjjzarray_r[ma][mb];
886
jjjmambzarray_i = jjjzarray_i[ma][mb];
887
for(int k = 0; k < 3; k++)
889
dudr_r[k] * jjjmambzarray_r +
890
dudr_i[k] * jjjmambzarray_i;
893
dudr_r = duarray_r[j][ma][mb];
894
dudr_i = duarray_i[j][ma][mb];
895
jjjmambzarray_r = jjjzarray_r[ma][mb];
896
jjjmambzarray_i = jjjzarray_i[ma][mb];
897
for(int k = 0; k < 3; k++)
899
(dudr_r[k] * jjjmambzarray_r +
900
dudr_i[k] * jjjmambzarray_i)*0.5;
903
for(int k = 0; k < 3; k++)
904
dbdr[k] += 2.0*sumzdu_r[k];
906
// Sum over Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1)
908
double j1fac = (j+1)/(j1+1.0);
910
for(int k = 0; k < 3; k++)
913
// use zarray j1/j2 symmetry (optional)
916
jjjzarray_r = zarray_r[j][j2][j1];
917
jjjzarray_i = zarray_i[j][j2][j1];
919
jjjzarray_r = zarray_r[j2][j][j1];
920
jjjzarray_i = zarray_i[j2][j][j1];
923
for(int mb1 = 0; 2*mb1 < j1; mb1++)
924
for(int ma1 = 0; ma1 <= j1; ma1++) {
926
dudr_r = duarray_r[j1][ma1][mb1];
927
dudr_i = duarray_i[j1][ma1][mb1];
928
jjjmambzarray_r = jjjzarray_r[ma1][mb1];
929
jjjmambzarray_i = jjjzarray_i[ma1][mb1];
930
for(int k = 0; k < 3; k++)
932
dudr_r[k] * jjjmambzarray_r +
933
dudr_i[k] * jjjmambzarray_i;
935
} //end loop over ma1 mb1
937
// For j1 even, handle middle column
941
for(int ma1 = 0; ma1 < mb1; ma1++) {
942
dudr_r = duarray_r[j1][ma1][mb1];
943
dudr_i = duarray_i[j1][ma1][mb1];
944
jjjmambzarray_r = jjjzarray_r[ma1][mb1];
945
jjjmambzarray_i = jjjzarray_i[ma1][mb1];
946
for(int k = 0; k < 3; k++)
948
dudr_r[k] * jjjmambzarray_r +
949
dudr_i[k] * jjjmambzarray_i;
952
dudr_r = duarray_r[j1][ma1][mb1];
953
dudr_i = duarray_i[j1][ma1][mb1];
954
jjjmambzarray_r = jjjzarray_r[ma1][mb1];
955
jjjmambzarray_i = jjjzarray_i[ma1][mb1];
956
for(int k = 0; k < 3; k++)
958
(dudr_r[k] * jjjmambzarray_r +
959
dudr_i[k] * jjjmambzarray_i)*0.5;
962
for(int k = 0; k < 3; k++)
963
dbdr[k] += 2.0*sumzdu_r[k]*j1fac;
965
// Sum over Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2)
967
double j2fac = (j+1)/(j2+1.0);
969
for(int k = 0; k < 3; k++)
972
// use zarray j1/j2 symmetry (optional)
975
jjjzarray_r = zarray_r[j1][j][j2];
976
jjjzarray_i = zarray_i[j1][j][j2];
978
jjjzarray_r = zarray_r[j][j1][j2];
979
jjjzarray_i = zarray_i[j][j1][j2];
982
for(int mb2 = 0; 2*mb2 < j2; mb2++)
983
for(int ma2 = 0; ma2 <= j2; ma2++) {
985
dudr_r = duarray_r[j2][ma2][mb2];
986
dudr_i = duarray_i[j2][ma2][mb2];
987
jjjmambzarray_r = jjjzarray_r[ma2][mb2];
988
jjjmambzarray_i = jjjzarray_i[ma2][mb2];
989
for(int k = 0; k < 3; k++)
991
dudr_r[k] * jjjmambzarray_r +
992
dudr_i[k] * jjjmambzarray_i;
994
} //end loop over ma2 mb2
996
// For j2 even, handle middle column
1000
for(int ma2 = 0; ma2 < mb2; ma2++) {
1001
dudr_r = duarray_r[j2][ma2][mb2];
1002
dudr_i = duarray_i[j2][ma2][mb2];
1003
jjjmambzarray_r = jjjzarray_r[ma2][mb2];
1004
jjjmambzarray_i = jjjzarray_i[ma2][mb2];
1005
for(int k = 0; k < 3; k++)
1007
dudr_r[k] * jjjmambzarray_r +
1008
dudr_i[k] * jjjmambzarray_i;
1011
dudr_r = duarray_r[j2][ma2][mb2];
1012
dudr_i = duarray_i[j2][ma2][mb2];
1013
jjjmambzarray_r = jjjzarray_r[ma2][mb2];
1014
jjjmambzarray_i = jjjzarray_i[ma2][mb2];
1015
for(int k = 0; k < 3; k++)
1017
(dudr_r[k] * jjjmambzarray_r +
1018
dudr_i[k] * jjjmambzarray_i)*0.5;
1021
for(int k = 0; k < 3; k++)
1022
dbdr[k] += 2.0*sumzdu_r[k]*j2fac;
1024
} //end loop over j1 j2 j
1027
clock_gettime(CLOCK_REALTIME, &endtime);
1028
timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
1029
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
1034
/* ----------------------------------------------------------------------
1035
copy Bi derivatives into a vector
1036
------------------------------------------------------------------------- */
1038
void SNA::copy_dbi2dbvec()
1040
int ncount, j1, j2, j;
1044
for(j1 = 0; j1 <= twojmax; j1++) {
1045
if(diagonalstyle == 0) {
1046
for(j2 = 0; j2 <= j1; j2++)
1047
for(j = abs(j1 - j2);
1048
j <= MIN(twojmax, j1 + j2); j += 2) {
1049
dbvec[ncount][0] = dbarray[j1][j2][j][0];
1050
dbvec[ncount][1] = dbarray[j1][j2][j][1];
1051
dbvec[ncount][2] = dbarray[j1][j2][j][2];
1054
} else if(diagonalstyle == 1) {
1056
for(j = abs(j1 - j2);
1057
j <= MIN(twojmax, j1 + j2); j += 2) {
1058
dbvec[ncount][0] = dbarray[j1][j2][j][0];
1059
dbvec[ncount][1] = dbarray[j1][j2][j][1];
1060
dbvec[ncount][2] = dbarray[j1][j2][j][2];
1063
} else if(diagonalstyle == 2) {
1065
dbvec[ncount][0] = dbarray[j1][j2][j][0];
1066
dbvec[ncount][1] = dbarray[j1][j2][j][1];
1067
dbvec[ncount][2] = dbarray[j1][j2][j][2];
1069
} else if(diagonalstyle == 3) {
1070
for(j2 = 0; j2 <= j1; j2++)
1071
for(j = abs(j1 - j2);
1072
j <= MIN(twojmax, j1 + j2); j += 2)
1074
dbvec[ncount][0] = dbarray[j1][j2][j][0];
1075
dbvec[ncount][1] = dbarray[j1][j2][j][1];
1076
dbvec[ncount][2] = dbarray[j1][j2][j][2];
1083
/* ---------------------------------------------------------------------- */
1085
void SNA::zero_uarraytot()
1087
for (int j = 0; j <= twojmax; j++)
1088
for (int ma = 0; ma <= j; ma++)
1089
for (int mb = 0; mb <= j; mb++) {
1090
uarraytot_r[j][ma][mb] = 0.0;
1091
uarraytot_i[j][ma][mb] = 0.0;
1095
/* ---------------------------------------------------------------------- */
1097
void SNA::addself_uarraytot(double wself_in)
1099
for (int j = 0; j <= twojmax; j++)
1100
for (int ma = 0; ma <= j; ma++) {
1101
uarraytot_r[j][ma][ma] = wself_in;
1102
uarraytot_i[j][ma][ma] = 0.0;
1106
/* ----------------------------------------------------------------------
1107
add Wigner U-functions for one neighbor to the total
1108
------------------------------------------------------------------------- */
1110
void SNA::add_uarraytot(double r, double wj, double rcut)
1114
sfac = compute_sfac(r, rcut);
1118
for (int j = 0; j <= twojmax; j++)
1119
for (int ma = 0; ma <= j; ma++)
1120
for (int mb = 0; mb <= j; mb++) {
1121
uarraytot_r[j][ma][mb] +=
1122
sfac * uarray_r[j][ma][mb];
1123
uarraytot_i[j][ma][mb] +=
1124
sfac * uarray_i[j][ma][mb];
1128
void SNA::add_uarraytot_omp(double r, double wj, double rcut)
1132
sfac = compute_sfac(r, rcut);
1136
#if defined(_OPENMP)
1139
for (int j = 0; j <= twojmax; j++)
1140
for (int ma = 0; ma <= j; ma++)
1141
for (int mb = 0; mb <= j; mb++) {
1142
uarraytot_r[j][ma][mb] +=
1143
sfac * uarray_r[j][ma][mb];
1144
uarraytot_i[j][ma][mb] +=
1145
sfac * uarray_i[j][ma][mb];
1149
/* ----------------------------------------------------------------------
1150
compute Wigner U-functions for one neighbor
1151
------------------------------------------------------------------------- */
1153
void SNA::compute_uarray(double x, double y, double z,
1154
double z0, double r)
1157
double a_r, b_r, a_i, b_i;
1160
// compute Cayley-Klein parameters for unit quaternion
1162
r0inv = 1.0 / sqrt(r * r + z0 * z0);
1168
// VMK Section 4.8.2
1170
uarray_r[0][0][0] = 1.0;
1171
uarray_i[0][0][0] = 0.0;
1173
for (int j = 1; j <= twojmax; j++) {
1175
// fill in left side of matrix layer from previous layer
1177
for (int mb = 0; 2*mb <= j; mb++) {
1178
uarray_r[j][0][mb] = 0.0;
1179
uarray_i[j][0][mb] = 0.0;
1181
for (int ma = 0; ma < j; ma++) {
1182
rootpq = rootpqarray[j - ma][j - mb];
1183
uarray_r[j][ma][mb] +=
1185
(a_r * uarray_r[j - 1][ma][mb] +
1186
a_i * uarray_i[j - 1][ma][mb]);
1187
uarray_i[j][ma][mb] +=
1189
(a_r * uarray_i[j - 1][ma][mb] -
1190
a_i * uarray_r[j - 1][ma][mb]);
1192
rootpq = rootpqarray[ma + 1][j - mb];
1193
uarray_r[j][ma + 1][mb] =
1195
(b_r * uarray_r[j - 1][ma][mb] +
1196
b_i * uarray_i[j - 1][ma][mb]);
1197
uarray_i[j][ma + 1][mb] =
1199
(b_r * uarray_i[j - 1][ma][mb] -
1200
b_i * uarray_r[j - 1][ma][mb]);
1204
// copy left side to right side with inversion symmetry VMK 4.4(2)
1205
// u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb])
1208
for (int mb = 0; 2*mb <= j; mb++) {
1211
for (int ma = 0; ma <= j; ma++) {
1214
uarray_r[j][j-ma][j-mb] = uarray_r[j][ma][mb];
1215
uarray_i[j][j-ma][j-mb] = -uarray_i[j][ma][mb];
1217
uarray_r[j][j-ma][j-mb] = -uarray_r[j][ma][mb];
1218
uarray_i[j][j-ma][j-mb] = uarray_i[j][ma][mb];
1225
void SNA::compute_uarray_omp(double x, double y, double z,
1226
double z0, double r, int sub_threads)
1229
double a_r, b_r, a_i, b_i;
1232
// compute Cayley-Klein parameters for unit quaternion
1234
r0inv = 1.0 / sqrt(r * r + z0 * z0);
1240
// VMK Section 4.8.2
1242
uarray_r[0][0][0] = 1.0;
1243
uarray_i[0][0][0] = 0.0;
1245
for (int j = 1; j <= twojmax; j++) {
1246
#if defined(_OPENMP)
1249
for (int mb = 0; mb < j; mb++) {
1250
uarray_r[j][0][mb] = 0.0;
1251
uarray_i[j][0][mb] = 0.0;
1253
for (int ma = 0; ma < j; ma++) {
1254
rootpq = rootpqarray[j - ma][j - mb];
1255
uarray_r[j][ma][mb] +=
1257
(a_r * uarray_r[j - 1][ma][mb] +
1258
a_i * uarray_i[j - 1][ma][mb]);
1259
uarray_i[j][ma][mb] +=
1261
(a_r * uarray_i[j - 1][ma][mb] -
1262
a_i * uarray_r[j - 1][ma][mb]);
1264
rootpq = rootpqarray[ma + 1][j - mb];
1265
uarray_r[j][ma + 1][mb] =
1267
(b_r * uarray_r[j - 1][ma][mb] +
1268
b_i * uarray_i[j - 1][ma][mb]);
1269
uarray_i[j][ma + 1][mb] =
1271
(b_r * uarray_i[j - 1][ma][mb] -
1272
b_i * uarray_r[j - 1][ma][mb]);
1277
uarray_r[j][0][mb] = 0.0;
1278
uarray_i[j][0][mb] = 0.0;
1280
#if defined(_OPENMP)
1283
for (int ma = 0; ma < j; ma++) {
1284
rootpq = rootpqarray[j - ma][mb];
1285
uarray_r[j][ma][mb] +=
1287
(b_r * uarray_r[j - 1][ma][mb - 1] -
1288
b_i * uarray_i[j - 1][ma][mb - 1]);
1289
uarray_i[j][ma][mb] +=
1291
(b_r * uarray_i[j - 1][ma][mb - 1] +
1292
b_i * uarray_r[j - 1][ma][mb - 1]);
1294
rootpq = rootpqarray[ma + 1][mb];
1295
uarray_r[j][ma + 1][mb] =
1297
(a_r * uarray_r[j - 1][ma][mb - 1] -
1298
a_i * uarray_i[j - 1][ma][mb - 1]);
1299
uarray_i[j][ma + 1][mb] =
1301
(a_r * uarray_i[j - 1][ma][mb - 1] +
1302
a_i * uarray_r[j - 1][ma][mb - 1]);
1307
/* ----------------------------------------------------------------------
1308
compute derivatives of Wigner U-functions for one neighbor
1309
see comments in compute_uarray()
1310
------------------------------------------------------------------------- */
1312
void SNA::compute_duarray(double x, double y, double z,
1313
double z0, double r, double dz0dr,
1314
double wj, double rcut)
1317
double a_r, a_i, b_r, b_i;
1318
double da_r[3], da_i[3], db_r[3], db_i[3];
1319
double dz0[3], dr0inv[3], dr0invdr;
1322
double rinv = 1.0 / r;
1323
double ux = x * rinv;
1324
double uy = y * rinv;
1325
double uz = z * rinv;
1327
r0inv = 1.0 / sqrt(r * r + z0 * z0);
1333
dr0invdr = -pow(r0inv, 3.0) * (r + z0 * dz0dr);
1335
dr0inv[0] = dr0invdr * ux;
1336
dr0inv[1] = dr0invdr * uy;
1337
dr0inv[2] = dr0invdr * uz;
1339
dz0[0] = dz0dr * ux;
1340
dz0[1] = dz0dr * uy;
1341
dz0[2] = dz0dr * uz;
1343
for (int k = 0; k < 3; k++) {
1344
da_r[k] = dz0[k] * r0inv + z0 * dr0inv[k];
1345
da_i[k] = -z * dr0inv[k];
1350
for (int k = 0; k < 3; k++) {
1351
db_r[k] = y * dr0inv[k];
1352
db_i[k] = -x * dr0inv[k];
1358
uarray_r[0][0][0] = 1.0;
1359
duarray_r[0][0][0][0] = 0.0;
1360
duarray_r[0][0][0][1] = 0.0;
1361
duarray_r[0][0][0][2] = 0.0;
1362
uarray_i[0][0][0] = 0.0;
1363
duarray_i[0][0][0][0] = 0.0;
1364
duarray_i[0][0][0][1] = 0.0;
1365
duarray_i[0][0][0][2] = 0.0;
1367
for (int j = 1; j <= twojmax; j++) {
1368
for (int mb = 0; 2*mb <= j; mb++) {
1369
uarray_r[j][0][mb] = 0.0;
1370
duarray_r[j][0][mb][0] = 0.0;
1371
duarray_r[j][0][mb][1] = 0.0;
1372
duarray_r[j][0][mb][2] = 0.0;
1373
uarray_i[j][0][mb] = 0.0;
1374
duarray_i[j][0][mb][0] = 0.0;
1375
duarray_i[j][0][mb][1] = 0.0;
1376
duarray_i[j][0][mb][2] = 0.0;
1378
for (int ma = 0; ma < j; ma++) {
1379
rootpq = rootpqarray[j - ma][j - mb];
1380
uarray_r[j][ma][mb] += rootpq *
1381
(a_r * uarray_r[j - 1][ma][mb] +
1382
a_i * uarray_i[j - 1][ma][mb]);
1383
uarray_i[j][ma][mb] += rootpq *
1384
(a_r * uarray_i[j - 1][ma][mb] -
1385
a_i * uarray_r[j - 1][ma][mb]);
1387
for (int k = 0; k < 3; k++) {
1388
duarray_r[j][ma][mb][k] +=
1389
rootpq * (da_r[k] * uarray_r[j - 1][ma][mb] +
1390
da_i[k] * uarray_i[j - 1][ma][mb] +
1391
a_r * duarray_r[j - 1][ma][mb][k] +
1392
a_i * duarray_i[j - 1][ma][mb][k]);
1393
duarray_i[j][ma][mb][k] +=
1394
rootpq * (da_r[k] * uarray_i[j - 1][ma][mb] -
1395
da_i[k] * uarray_r[j - 1][ma][mb] +
1396
a_r * duarray_i[j - 1][ma][mb][k] -
1397
a_i * duarray_r[j - 1][ma][mb][k]);
1400
rootpq = rootpqarray[ma + 1][j - mb];
1401
uarray_r[j][ma + 1][mb] =
1402
-rootpq * (b_r * uarray_r[j - 1][ma][mb] +
1403
b_i * uarray_i[j - 1][ma][mb]);
1404
uarray_i[j][ma + 1][mb] =
1405
-rootpq * (b_r * uarray_i[j - 1][ma][mb] -
1406
b_i * uarray_r[j - 1][ma][mb]);
1408
for (int k = 0; k < 3; k++) {
1409
duarray_r[j][ma + 1][mb][k] =
1410
-rootpq * (db_r[k] * uarray_r[j - 1][ma][mb] +
1411
db_i[k] * uarray_i[j - 1][ma][mb] +
1412
b_r * duarray_r[j - 1][ma][mb][k] +
1413
b_i * duarray_i[j - 1][ma][mb][k]);
1414
duarray_i[j][ma + 1][mb][k] =
1415
-rootpq * (db_r[k] * uarray_i[j - 1][ma][mb] -
1416
db_i[k] * uarray_r[j - 1][ma][mb] +
1417
b_r * duarray_i[j - 1][ma][mb][k] -
1418
b_i * duarray_r[j - 1][ma][mb][k]);
1424
for (int mb = 0; 2*mb <= j; mb++) {
1427
for (int ma = 0; ma <= j; ma++) {
1430
uarray_r[j][j-ma][j-mb] = uarray_r[j][ma][mb];
1431
uarray_i[j][j-ma][j-mb] = -uarray_i[j][ma][mb];
1432
for (int k = 0; k < 3; k++) {
1433
duarray_r[j][j-ma][j-mb][k] = duarray_r[j][ma][mb][k];
1434
duarray_i[j][j-ma][j-mb][k] = -duarray_i[j][ma][mb][k];
1437
uarray_r[j][j-ma][j-mb] = -uarray_r[j][ma][mb];
1438
uarray_i[j][j-ma][j-mb] = uarray_i[j][ma][mb];
1439
for (int k = 0; k < 3; k++) {
1440
duarray_r[j][j-ma][j-mb][k] = -duarray_r[j][ma][mb][k];
1441
duarray_i[j][j-ma][j-mb][k] = duarray_i[j][ma][mb][k];
1448
double sfac = compute_sfac(r, rcut);
1449
double dsfac = compute_dsfac(r, rcut);
1454
for (int j = 0; j <= twojmax; j++)
1455
for (int ma = 0; ma <= j; ma++)
1456
for (int mb = 0; mb <= j; mb++) {
1457
duarray_r[j][ma][mb][0] = dsfac * uarray_r[j][ma][mb] * ux +
1458
sfac * duarray_r[j][ma][mb][0];
1459
duarray_i[j][ma][mb][0] = dsfac * uarray_i[j][ma][mb] * ux +
1460
sfac * duarray_i[j][ma][mb][0];
1461
duarray_r[j][ma][mb][1] = dsfac * uarray_r[j][ma][mb] * uy +
1462
sfac * duarray_r[j][ma][mb][1];
1463
duarray_i[j][ma][mb][1] = dsfac * uarray_i[j][ma][mb] * uy +
1464
sfac * duarray_i[j][ma][mb][1];
1465
duarray_r[j][ma][mb][2] = dsfac * uarray_r[j][ma][mb] * uz +
1466
sfac * duarray_r[j][ma][mb][2];
1467
duarray_i[j][ma][mb][2] = dsfac * uarray_i[j][ma][mb] * uz +
1468
sfac * duarray_i[j][ma][mb][2];
1472
/* ----------------------------------------------------------------------
1473
memory usage of arrays
1474
------------------------------------------------------------------------- */
1476
double SNA::memory_usage()
1478
int jdim = twojmax + 1;
1480
bytes = jdim * jdim * jdim * jdim * jdim * sizeof(double);
1481
bytes += 2 * jdim * jdim * jdim * sizeof(complex<double>);
1482
bytes += 2 * jdim * jdim * jdim * sizeof(double);
1483
bytes += jdim * jdim * jdim * 3 * sizeof(complex<double>);
1484
bytes += jdim * jdim * jdim * 3 * sizeof(double);
1485
bytes += ncoeff * sizeof(double);
1486
bytes += jdim * jdim * jdim * jdim * jdim * sizeof(complex<double>);
1490
/* ---------------------------------------------------------------------- */
1492
void SNA::create_twojmax_arrays()
1494
int jdim = twojmax + 1;
1496
memory->create(cgarray, jdim, jdim, jdim, jdim, jdim,
1498
memory->create(rootpqarray, jdim+1, jdim+1,
1500
memory->create(barray, jdim, jdim, jdim,
1502
memory->create(dbarray, jdim, jdim, jdim, 3,
1505
memory->create(duarray_r, jdim, jdim, jdim, 3,
1507
memory->create(duarray_i, jdim, jdim, jdim, 3,
1510
memory->create(uarray_r, jdim, jdim, jdim,
1512
memory->create(uarray_i, jdim, jdim, jdim,
1515
if(!use_shared_arrays) {
1516
memory->create(uarraytot_r, jdim, jdim, jdim,
1518
memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim,
1520
memory->create(uarraytot_i, jdim, jdim, jdim,
1522
memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim,
1528
/* ---------------------------------------------------------------------- */
1530
void SNA::destroy_twojmax_arrays()
1532
memory->destroy(cgarray);
1533
memory->destroy(rootpqarray);
1534
memory->destroy(barray);
1536
memory->destroy(dbarray);
1538
memory->destroy(duarray_r);
1539
memory->destroy(duarray_i);
1541
memory->destroy(uarray_r);
1542
memory->destroy(uarray_i);
1544
if(!use_shared_arrays) {
1545
memory->destroy(uarraytot_r);
1546
memory->destroy(zarray_r);
1547
memory->destroy(uarraytot_i);
1548
memory->destroy(zarray_i);
1552
/* ----------------------------------------------------------------------
1553
store n! and return it as needed
1554
------------------------------------------------------------------------- */
1556
double SNA::factorial(int n)
1558
const int nmax = 167; // Largest n supported
1559
double nfac_table[nmax+1] = {
1579
1.21645100408832e+17,
1580
2.43290200817664e+18,
1581
5.10909421717094e+19,
1582
1.12400072777761e+21,
1583
2.5852016738885e+22,
1584
6.20448401733239e+23,
1585
1.5511210043331e+25,
1586
4.03291461126606e+26,
1587
1.08888694504184e+28,
1588
3.04888344611714e+29,
1589
8.8417619937397e+30,
1590
2.65252859812191e+32,
1591
8.22283865417792e+33,
1592
2.63130836933694e+35,
1593
8.68331761881189e+36,
1594
2.95232799039604e+38,
1595
1.03331479663861e+40,
1596
3.71993326789901e+41,
1597
1.37637530912263e+43,
1598
5.23022617466601e+44,
1599
2.03978820811974e+46,
1600
8.15915283247898e+47,
1601
3.34525266131638e+49,
1602
1.40500611775288e+51,
1603
6.04152630633738e+52,
1604
2.65827157478845e+54,
1605
1.1962222086548e+56,
1606
5.50262215981209e+57,
1607
2.58623241511168e+59,
1608
1.24139155925361e+61,
1609
6.08281864034268e+62,
1610
3.04140932017134e+64,
1611
1.55111875328738e+66,
1612
8.06581751709439e+67,
1613
4.27488328406003e+69,
1614
2.30843697339241e+71,
1615
1.26964033536583e+73,
1616
7.10998587804863e+74,
1617
4.05269195048772e+76,
1618
2.35056133128288e+78,
1619
1.3868311854569e+80,
1620
8.32098711274139e+81,
1621
5.07580213877225e+83,
1622
3.14699732603879e+85,
1623
1.98260831540444e+87,
1624
1.26886932185884e+89,
1625
8.24765059208247e+90,
1626
5.44344939077443e+92,
1627
3.64711109181887e+94,
1628
2.48003554243683e+96,
1629
1.71122452428141e+98,
1630
1.19785716699699e+100,
1631
8.50478588567862e+101,
1632
6.12344583768861e+103,
1633
4.47011546151268e+105,
1634
3.30788544151939e+107,
1635
2.48091408113954e+109,
1636
1.88549470166605e+111,
1637
1.45183092028286e+113,
1638
1.13242811782063e+115,
1639
8.94618213078297e+116,
1640
7.15694570462638e+118,
1641
5.79712602074737e+120,
1642
4.75364333701284e+122,
1643
3.94552396972066e+124,
1644
3.31424013456535e+126,
1645
2.81710411438055e+128,
1646
2.42270953836727e+130,
1647
2.10775729837953e+132,
1648
1.85482642257398e+134,
1649
1.65079551609085e+136,
1650
1.48571596448176e+138,
1651
1.3520015276784e+140,
1652
1.24384140546413e+142,
1653
1.15677250708164e+144,
1654
1.08736615665674e+146,
1655
1.03299784882391e+148,
1656
9.91677934870949e+149,
1657
9.61927596824821e+151,
1658
9.42689044888324e+153,
1659
9.33262154439441e+155,
1660
9.33262154439441e+157,
1661
9.42594775983835e+159,
1662
9.61446671503512e+161,
1663
9.90290071648618e+163,
1664
1.02990167451456e+166,
1665
1.08139675824029e+168,
1666
1.14628056373471e+170,
1667
1.22652020319614e+172,
1668
1.32464181945183e+174,
1669
1.44385958320249e+176,
1670
1.58824554152274e+178,
1671
1.76295255109024e+180,
1672
1.97450685722107e+182,
1673
2.23119274865981e+184,
1674
2.54355973347219e+186,
1675
2.92509369349301e+188,
1676
3.3931086844519e+190,
1677
3.96993716080872e+192,
1678
4.68452584975429e+194,
1679
5.5745857612076e+196,
1680
6.68950291344912e+198,
1681
8.09429852527344e+200,
1682
9.8750442008336e+202,
1683
1.21463043670253e+205,
1684
1.50614174151114e+207,
1685
1.88267717688893e+209,
1686
2.37217324288005e+211,
1687
3.01266001845766e+213,
1688
3.8562048236258e+215,
1689
4.97450422247729e+217,
1690
6.46685548922047e+219,
1691
8.47158069087882e+221,
1692
1.118248651196e+224,
1693
1.48727070609069e+226,
1694
1.99294274616152e+228,
1695
2.69047270731805e+230,
1696
3.65904288195255e+232,
1697
5.01288874827499e+234,
1698
6.91778647261949e+236,
1699
9.61572319694109e+238,
1700
1.34620124757175e+241,
1701
1.89814375907617e+243,
1702
2.69536413788816e+245,
1703
3.85437071718007e+247,
1704
5.5502938327393e+249,
1705
8.04792605747199e+251,
1706
1.17499720439091e+254,
1707
1.72724589045464e+256,
1708
2.55632391787286e+258,
1709
3.80892263763057e+260,
1710
5.71338395644585e+262,
1711
8.62720977423323e+264,
1712
1.31133588568345e+267,
1713
2.00634390509568e+269,
1714
3.08976961384735e+271,
1715
4.78914290146339e+273,
1716
7.47106292628289e+275,
1717
1.17295687942641e+278,
1718
1.85327186949373e+280,
1719
2.94670227249504e+282,
1720
4.71472363599206e+284,
1721
7.59070505394721e+286,
1722
1.22969421873945e+289,
1723
2.0044015765453e+291,
1724
3.28721858553429e+293,
1725
5.42391066613159e+295,
1726
9.00369170577843e+297,
1727
1.503616514865e+300,
1730
if(n < 0 || n > nmax) {
1732
sprintf(str, "Invalid argument to factorial %d", n);
1733
error->all(FLERR, str);
1736
return nfac_table[n];
1739
/* ----------------------------------------------------------------------
1740
the function delta given by VMK Eq. 8.2(1)
1741
------------------------------------------------------------------------- */
1743
double SNA::deltacg(int j1, int j2, int j)
1745
double sfaccg = factorial((j1 + j2 + j) / 2 + 1);
1746
return sqrt(factorial((j1 + j2 - j) / 2) *
1747
factorial((j1 - j2 + j) / 2) *
1748
factorial((-j1 + j2 + j) / 2) / sfaccg);
1751
/* ----------------------------------------------------------------------
1752
assign Clebsch-Gordan coefficients using
1753
the quasi-binomial formula VMK 8.2.1(3)
1754
------------------------------------------------------------------------- */
1756
void SNA::init_clebsch_gordan()
1758
double sum,dcg,sfaccg;
1759
int m, aa2, bb2, cc2;
1762
for (int j1 = 0; j1 <= twojmax; j1++)
1763
for (int j2 = 0; j2 <= twojmax; j2++)
1764
for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
1765
for (int m1 = 0; m1 <= j1; m1 += 1) {
1768
for (int m2 = 0; m2 <= j2; m2 += 1) {
1773
m = (aa2 + bb2 + j) / 2;
1775
if(m < 0 || m > j) continue;
1779
for (int z = MAX(0, MAX(-(j - j2 + aa2)
1780
/ 2, -(j - j1 - bb2) / 2));
1781
z <= MIN((j1 + j2 - j) / 2,
1782
MIN((j1 - aa2) / 2, (j2 + bb2) / 2));
1784
ifac = z % 2 ? -1 : 1;
1787
factorial((j1 + j2 - j) / 2 - z) *
1788
factorial((j1 - aa2) / 2 - z) *
1789
factorial((j2 + bb2) / 2 - z) *
1790
factorial((j - j2 + aa2) / 2 + z) *
1791
factorial((j - j1 - bb2) / 2 + z));
1795
dcg = deltacg(j1, j2, j);
1796
sfaccg = sqrt(factorial((j1 + aa2) / 2) *
1797
factorial((j1 - aa2) / 2) *
1798
factorial((j2 + bb2) / 2) *
1799
factorial((j2 - bb2) / 2) *
1800
factorial((j + cc2) / 2) *
1801
factorial((j - cc2) / 2) *
1804
cgarray[j1][j2][j][m1][m2] = sum * dcg * sfaccg;
1809
/* ----------------------------------------------------------------------
1810
pre-compute table of sqrt[p/m2], p, q = 1,twojmax
1811
the p = 0, q = 0 entries are allocated and skipped for convenience.
1812
------------------------------------------------------------------------- */
1814
void SNA::init_rootpqarray()
1816
for (int p = 1; p <= twojmax; p++)
1817
for (int q = 1; q <= twojmax; q++)
1818
rootpqarray[p][q] = sqrt(static_cast<double>(p)/q);
1821
/* ----------------------------------------------------------------------
1823
------------------------------------------------------------------------- */
1825
void SNA::jtostr(char* str, int j)
1828
sprintf(str, "%d", j / 2);
1830
sprintf(str, "%d/2", j);
1833
/* ----------------------------------------------------------------------
1835
------------------------------------------------------------------------- */
1837
void SNA::mtostr(char* str, int j, int m)
1840
sprintf(str, "%d", m - j / 2);
1842
sprintf(str, "%d/2", 2 * m - j);
1845
/* ----------------------------------------------------------------------
1846
list values of Clebsch-Gordan coefficients
1847
using notation of VMK Table 8.11
1848
------------------------------------------------------------------------- */
1850
void SNA::print_clebsch_gordan(FILE* file)
1852
char stra[20], strb[20], strc[20], straa[20], strbb[20], strcc[20];
1855
fprintf(file, "a, aa, b, bb, c, cc, c(a,aa,b,bb,c,cc) \n");
1857
for (int j1 = 0; j1 <= twojmax; j1++) {
1860
for (int j2 = 0; j2 <= twojmax; j2++) {
1863
for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
1866
for (int m1 = 0; m1 <= j1; m1 += 1) {
1867
mtostr(straa, j1, m1);
1870
for (int m2 = 0; m2 <= j2; m2 += 1) {
1872
m = (aa2 + bb2 + j) / 2;
1874
if(m < 0 || m > j) continue;
1876
mtostr(strbb, j2, m2);
1877
mtostr(strcc, j, m);
1879
fprintf(file, "%s\t%s\t%s\t%s\t%s\t%s\t%g\n",
1880
stra, straa, strb, strbb, strc, strcc,
1881
cgarray[j1][j2][j][m1][m2]);
1889
/* ---------------------------------------------------------------------- */
1891
int SNA::compute_ncoeff()
1897
for (int j1 = 0; j1 <= twojmax; j1++)
1898
if(diagonalstyle == 0) {
1899
for (int j2 = 0; j2 <= j1; j2++)
1900
for (int j = abs(j1 - j2);
1901
j <= MIN(twojmax, j1 + j2); j += 2)
1903
} else if(diagonalstyle == 1) {
1906
for (int j = abs(j1 - j2);
1907
j <= MIN(twojmax, j1 + j2); j += 2)
1909
} else if(diagonalstyle == 2) {
1911
} else if(diagonalstyle == 3) {
1912
for (int j2 = 0; j2 <= j1; j2++)
1913
for (int j = abs(j1 - j2);
1914
j <= MIN(twojmax, j1 + j2); j += 2)
1915
if (j >= j1) ncount++;
1921
/* ---------------------------------------------------------------------- */
1923
double SNA::compute_sfac(double r, double rcut)
1925
if (switch_flag == 0) return 1.0;
1926
if (switch_flag == 1) {
1927
if(r <= rmin0) return 1.0;
1928
else if(r > rcut) return 0.0;
1930
double rcutfac = MY_PI / (rcut - rmin0);
1931
return 0.5 * (cos((r - rmin0) * rcutfac) + 1.0);
1937
/* ---------------------------------------------------------------------- */
1939
double SNA::compute_dsfac(double r, double rcut)
1941
if (switch_flag == 0) return 0.0;
1942
if (switch_flag == 1) {
1943
if(r <= rmin0) return 0.0;
1944
else if(r > rcut) return 0.0;
1946
double rcutfac = MY_PI / (rcut - rmin0);
1947
return -0.5 * sin((r - rmin0) * rcutfac) * rcutfac;