1
/* C version of the GA SCF code */
20
// #include "global.h"
21
// #include "tcgmsg.h"
25
//function declaration
26
void makesz(double* schwmax);
28
double s(int i,int j);
30
int nxtask(int nproc);
31
int verify_zero_chunk(double chunk[][ichunk]);
32
void damp(double* fac);
33
void prnout(int iter, double energy, double deltad, double tester);
35
double testfock(void);
36
void shiftfock(double shift);
37
void prnfin(double energy);
38
void diagon(double* tester, int iter);
42
void closearrays(void);
43
void makoverlap(void);
44
void setoverlap(double *a,int* lo,int* hi, int ld1,int ld2);
45
void print_GA_block(int g_a);
46
void print_GA_block_ij(int g_a,int tlo);
47
void dump_chunk(double *a,int ld1,int ld2);
48
// double abs_double(double x);
54
double enrep, q[maxatom], ax[maxatom], ay[maxatom], az[maxatom],
55
x[maxnbfn], y[maxnbfn], z[maxnbfn], expnt[maxnbfn], rnorm[maxnbfn];
56
long long int iky[maxnbfn], nocc, nbfn, nnbfn;
57
long long int icut1,icut2,icut3,icut4;
58
int natom; //long long int --> long
61
int g_counter, g_dens, g_fock, g_tfock, g_schwarz, g_work, g_ident, g_orbs;
62
int g_tmp, g_proc; //temporay global array for storage major transformation
64
int main(int argc, char **argv)
66
long long int nints, maxint;
68
// CAUTION: int precision requirements;
69
// nints, maxint, etc. are proportional to the number of basis functions;
70
// to the fourth power! 216^4 is greater than the largest number;
71
// that can be represented as a 32-bit signed interger, so 64-bit;
72
// arithmetic is needed to count integrals when calculating more than;
73
// 14 Be atoms with 15 basis functions each. Since integrals are counted;
74
// over all iterations, 18 iterations with 7 atoms can result in precision;
75
// problems. Note that the wave function could be calculated correctly;
76
// for much larger basis sets without 64-bit ints because the required;
77
// indexing is usually proportional to nbfn^2, which is good to 46,340;
78
// basis functions, except that the task counter runs as (nbfn/ichunk)^4,;
79
// so with ichunk = 10, 32-bit ints yield correct wavefunctions out to;
80
// 2145 basis functions (maxatom=143), or 4290 (maxatom=286) with ichunk =;
83
// This warning applies to the Global Arrays implementation as well!;
84
// functions of special concern are GA_igop and NGA_Read_inc.;
86
#define USE_TRANSFORM 1
90
double tinit=0.0, tonel=0.0, ttwoel=0.0, tdiag=0.0, tdens=0.0, tprint=0.0;
91
double eone=0.0, etwo=0.0, energy=0.0, deltad=0.0;
93
//implicit variables in fortran
100
double totsec, tester;
106
// initalize the parallel message passing environment;
108
MPI_Init(&argc,&argv);
119
if (!MA_init(MT_DBL, stack, heap))
120
GA_Error("ma_init failed",-1);
129
// initialize a bunch of stuff and initial density matrix;
133
sprintf(fname, "clog.dat.%d", me);
134
clog = fopen(fname, "w");
137
// get input from file be.inpt;
140
// create and allocate global arrays;
144
printf(" bytes of memory used by node 0: %lld\n\n", GA_Inquire_memory());
149
// create initial guess for density matrix by using single atom;
154
GA_Print_distribution(g_schwarz);
155
GA_Print_distribution(g_dens);
159
// make initial orthogonal orbital set for solution method using;
160
// similarity transform;
163
// make info for sparsity test;
168
// print preliminary data before any long compute segments start;
173
for (iter = 0; iter < mxiter; iter++) {
176
// make the one particle contribution to the fock matrix;
177
// and get the partial contribution to the energy;
178
oneel(schwmax, &eone, nproc);
179
tonel = tonel + timer();
186
printf("g_fock sum after oneel: %f\n", s);
192
printf("g_orbs sum after oneel: %f\n", s);
195
// compute the two particle contributions to the fock matrix and;
196
// get the total energy.;
197
twoel(schwmax, &etwo, nproc);
198
ttwoel = ttwoel + timer();
205
printf("g_fock sum after twoel: %f\n", s);
208
// Diagonalize the fock matrix. The diagonalizers used in this;
209
// are actually sequential, not parallel.;
210
diagon(&tester, iter);
211
tdiag = tdiag + timer();
218
printf("g_fock sum after diagon: %f\n", s);
224
printf("g_orbs sum after diagon: %f\n", s);
227
// make the new density matrix in g_work from orbitals in g_orbs,;
228
// compute the norm of the change in the density matrix and;
229
// then update the density matrix in g_dens with damping.;
246
tdens = tdens + timer();
248
// add up energy and print out convergence information;
250
energy = enrep + eone + etwo;
251
prnout(iter, energy, deltad, tester);
252
tprint = tprint + timer();
255
// if converged exit iteration loop;
259
#if GA_VERSION_MAJOR >= 5
260
GA_Llgop(&icut4, 1, "+");
262
GA_Igop(&icut4, 1, "+");
266
// something has gone wrong--print what you know and quit.;
267
printf("no two-electron integrals computed!\n");
275
printf("SCF failed to converge in %d iters\n", iter);
276
//...v....1....v....2....v....3....v....4....v....5....v....6....v....7..;
278
// finished ... print out eigenvalues and occupied orbitals;
283
#if GA_VERSION_MAJOR >= 5
284
GA_Llgop(&icut1, 1, (char*) "+");
285
GA_Llgop(&icut2, 1, (char*) "+");
286
GA_Llgop(&icut3, 1, (char*) "+");
288
GA_Igop(&icut1, 1, (char*) "+");
289
GA_Igop(&icut2, 1, (char*) "+");
290
GA_Igop(&icut3, 1, (char*) "+");
294
// print out timing information;
296
printf(" init onel twoel diag dens print ncpu \n");
297
printf(" ---- ---- ----- ---- ---- ---- ---- \n");
298
totsec = tinit + tonel + ttwoel + tdiag + tdens + tprint;
299
printf("%10.2f %10.2f %10.2f %10.2f %10.2f %10.2f %d",tinit, tonel, ttwoel, tdiag,
301
printf("\n elapsed time in seconds %10.2f\n\n",totsec);
302
// print out information on # integrals evaluated each iteration;
303
nints = icut1 + icut2 + icut3;
304
frac = (double)icut3 / (double)nints;
305
printf("No. of integrals screened or computed (all iters) \n\n");
306
printf("-------------------------------------\n");
307
printf(" failed #ij test failed #kl test #compute #total fraction\n");
308
printf(" ---------- ---------------- ------------- ------ --------\n");
309
printf("%15lld %15lld %15lld %15lld %9.6f\n", icut1, icut2, icut3, nints, frac);
312
maxint = pow(maxint, 4) * (iter + 1);
313
if(nints != maxint) {
314
printf("Inconsistent number of integrals, should be %ld\n", maxint);
315
printf("Note: largest 32-bit int is 2,147,483,647\n");
316
printf("Probably due to insufficient int precision in GA.\n");
326
fprintf(stderr, "Before GFFinalize()\n");
339
fprintf(stderr, "After GFFinalize()\n");
346
fprintf(stderr, "After GA_Terminate()\n");
358
void makesz(double *schwmax)
360
double work[ichunk][ichunk];
361
int lo[2],hi[2],i,j,iloc,jloc,ld;
364
int lo_c[2],hi_c[2]; //column-major inter-patch switch
366
//implicit declared variable in fortran
369
//schwarz(ij) = (ij|ij) for sparsity test;
377
dotask = next_chunk(lo, hi);
381
for (i = lo[0]; i <= hi[0]; i++) {
383
for (j = lo[1]; j <= hi[1]; j++) {
386
work[iloc][jloc] = sqrt(gg);
387
//work[jloc][iloc] = sqrt(gg);
388
*schwmax = MAX((*schwmax), work[iloc][jloc]);
389
// *schwmax = MAX((*schwmax), work[jloc][iloc]);
398
NGA_Put(g_schwarz, lo, hi, work, &ld);
399
// NGA_Put(g_schwarz,lo_c,hi_c,work,&ld); //column-major inter-patch switch
400
dotask = next_chunk(lo, hi);
402
GA_Dgop(schwmax, 1, "max");
409
long long int maxint;
410
//implicit declared variables in fortran
413
// write a little welcome message;
415
maxint = pow(maxint, 4);
417
if (GA_Nodeid()==0) {
418
printf(" Example Direct Self Consistent Field Program \n");
419
printf(" -------------------------------------------- \n\n");
420
printf(" no. of atoms .............. %5d\n", natom);
421
printf(" no. of occupied orbitals .. %5ld\n", nocc);
422
printf(" no. of basis functions .... %5ld\n", nbfn);
423
printf(" basis functions^4 ......... %5ld\n", maxint);
424
printf(" convergence threshold ..... %9.4f\n", tol);
425
printf(" chunk size .................%5d\n", ichunk);
428
// generate normalisation coefficients for the basis functions;
429
// and the index array iky;
430
for (i = 0; i < nbfn; i++) {
431
//iky[i] = i*(i-1)/2;
432
iky[i] = (i + 1) * i / 2;
435
for (i = 0;i < nbfn;i++) {
436
rnorm[i] = pow((expnt[i] * 2.00 / pi), 0.750);
439
// initialize common for computing f0;
443
double h(int i,int j)
447
// generate the one particle hamiltonian matrix element;
448
// over the normalized primitive 1s functions i and j;
451
//implicit declared variables in fortran
454
double facij,expij,repij;
457
double xp,yp,zp,rpc2;
460
rab2 = (x[i]-x[j])*(x[i]-x[j]) + (y[i]-y[j])*(y[i]-y[j]) + (z[i]-z[j])*(z[i]-z[j]);
461
facij = expnt[i]*expnt[j]/(expnt[i]+expnt[j]);
462
expij = exprjh(-facij*rab2);
463
repij = (2.00*pi/(expnt[i]+expnt[j])) * expij;
465
// first do the nuclear attraction integrals;
467
for (iat = 0;iat < natom;iat++)
469
xp = (x[i]*expnt[i] + x[j]*expnt[j])/(expnt[i]+expnt[j]);
470
yp = (y[i]*expnt[i] + y[j]*expnt[j])/(expnt[i]+expnt[j]);
471
zp = (z[i]*expnt[i] + z[j]*expnt[j])/(expnt[i]+expnt[j]);
472
rpc2 = (xp-ax[iat])*(xp-ax[iat]) + (yp-ay[iat])*(yp-ay[iat]) + (zp-az[iat])*(zp-az[iat]);
474
f0(&f0val, (expnt[i]+expnt[j])*rpc2);
475
sum = sum - repij * q[iat] * f0val;
477
// add on the kinetic energy term;
479
sum = sum + facij*(3.00-2.00*facij*rab2) *
480
pow((pi/(expnt[i]+expnt[j])),1.50) * expij;
482
// finally multiply by the normalization constants;
483
ret = sum * rnorm[i] * rnorm[j];
487
double s(int i, int j)
489
// generate the overlap matrix element between the normalized;
490
// primitve gaussian 1s functions i and j;
493
//implicit declared variables in fortran
496
rab2 = (x[i] - x[j]) * (x[i] - x[j]) + (y[i] - y[j]) * (y[i] - y[j]) +
497
(z[i] - z[j]) * (z[i] - z[j]);
498
facij = expnt[i] * expnt[j] / (expnt[i] + expnt[j]);
499
ret = pow((pi / (expnt[i] + expnt[j])), 1.50) * exprjh(-facij * rab2) * rnorm[i] * rnorm[j];
505
double work[ichunk][ichunk], orbsi[maxnbfn][ichunk]; //maxnbfn
506
double orbsj[maxnbfn][ichunk]; //maxnbfn
507
int lo[2], hi[2], tlo[2], thi[2], i, j, iloc, jloc, ld;
510
//implicit declared variables in fortran
514
//transform between column-major row-major
515
// double a[ichunk][ichunk], b[ichunk][ichunk];
516
// int lo_c[2],hi_c[2];
518
// generate density matrix from orbitals in g_orbs. the first;
519
// nocc orbitals are doubly occupied.;
520
// GA_Zero(g_counter);
521
// dotask = next_chunk(lo, hi);
524
//transform g_orbs from column-major to row-major
525
/* while (dotask) { */
526
/* NGA_Get(g_orbs, lo, hi, a, &ld); */
527
/* for (j = 0; j <= hi[1] - lo[1]; j++) { */
528
/* for (i = 0; i <= hi[0] - lo[0]; i++) { */
529
/* b[j][i] = a[i][j]; //intra-patch transfrom */
532
/* lo_c[0] = lo[1]; //inter-patch transform */
533
/* lo_c[1] = lo[0]; */
534
/* hi_c[0] = hi[1]; */
535
/* hi_c[1] = hi[0]; */
536
/* NGA_Put(g_tmp, lo_c, hi_c, b, &ld); */
537
/* dotask = next_chunk(lo, hi); */
539
/* GA_Copy(g_tmp, g_orbs); */
541
// GA_Transpose(g_orbs, g_tmp);
542
// GA_Copy(g_tmp, g_orbs);
545
dotask = next_chunk(lo, hi);
554
NGA_Get(g_orbs, tlo, thi, orbsi, &ld);
558
NGA_Get(g_orbs, tlo, thi, orbsj, &ld);
560
for (i = lo[0]; i <= hi[0]; i++) {
561
iloc = i - lo[0];// + 1;
562
for (j = lo[1]; j <= hi[1]; j++) {
563
jloc = j - lo[1];// + 1;
565
for (k = 0; k < nocc; k++) {
566
p = p + orbsi[k][iloc] * orbsj[k][jloc];
568
work[iloc][jloc] = 2.0 * p;
572
NGA_Put(g_work, lo, hi, work, &ld);
573
dotask = next_chunk(lo, hi);
576
// GA_Transpose(g_orbs, g_tmp);
577
// GA_Copy(g_tmp, g_orbs);
579
// GA_Transpose(g_work, g_tmp);
580
// GA_Copy(g_tmp, g_work);
582
//transform g_orbs from row-major to column major
584
/* GA_Zero(g_counter); */
585
/* dotask = next_chunk(lo, hi); */
586
/* while (dotask) { */
587
/* NGA_Get(g_orbs, lo, hi, a, &ld); */
588
/* for (j = 0; j <= hi[1] - lo[1]; j++) { */
589
/* for (i = 0; i <= hi[0] - lo[0]; i++) { */
590
/* b[j][i] = a[i][j]; //intra-patch transfrom */
593
/* lo_c[0] = lo[1]; //inter-patch transform */
594
/* lo_c[1] = lo[0]; */
595
/* hi_c[0] = hi[1]; */
596
/* hi_c[1] = hi[0]; */
597
/* NGA_Put(g_tmp, lo_c, hi_c, b, &ld); */
598
/* dotask = next_chunk(lo, hi); */
600
/* GA_Copy(g_tmp, g_orbs); */
601
// transform g_work from row-major to column major
603
/* GA_Zero(g_counter); */
604
/* dotask = next_chunk(lo, hi); */
605
/* while (dotask) { */
606
/* NGA_Get(g_work, lo, hi, a, &ld); */
607
/* for (j = 0; j <= hi[1] - lo[1]; j++) { */
608
/* for (i = 0; i <= hi[0] - lo[0]; i++) { */
609
/* b[j][i] = a[i][j]; //intra-patch transfrom */
612
/* lo_c[0] = lo[1]; //inter-patch transform */
613
/* lo_c[1] = lo[0]; */
614
/* hi_c[0] = hi[1]; */
615
/* hi_c[1] = hi[0]; */
616
/* NGA_Put(g_tmp, lo_c, hi_c, b, &ld); */
617
/* dotask = next_chunk(lo, hi); */
619
/* GA_Copy(g_tmp, g_work); */
623
int nxtask(int nproc) //not used
625
const int ichunk_local = 10; //in the header file, ichunk is defined as 20, so I rename it as ichunk_local
626
static double nleft=0, icount=0;
630
//implicit declared variables in fortran
633
// wrapper round nxtval() to increase granularity;
634
// and thus reduce no. of requests to shared counter;
640
icount = tcg_nxtval(nproc) * ichunk_local;
641
nleft = ichunk_local;
651
junk = tcg_nxtval(nproc);
654
// following does dumb static load balancing;
657
//$$$ if (nleft == 0) {
658
//$$$ icount = GA_Nodeid();
661
//$$$ nxtask = icount;
662
//$$$ icount = icount + GA_Nnodes();
671
int next_chunk(int* lo,int* hi)
677
//implicit declared variables in fortran
682
itask = NGA_Read_inc(g_counter, &one, 1);
683
imax = nbfn / ichunk;
684
if (nbfn - ichunk * imax > 0)
686
if (itask < imax * imax) {
688
jlo = (itask - ilo) / imax;
689
lo[0] = ilo * ichunk;
690
lo[1] = jlo * ichunk;
691
hi[0] = MIN((ilo + 1) * ichunk - 1, nbfn);
692
hi[1] = MIN((jlo + 1) * ichunk - 1, nbfn);
701
long int acquire_tasks(int numTasks)
706
itask = NGA_Read_inc(g_counter, &one, numTasks);
711
int translate_task(long int itask, int *lo, int *hi, int *ilo, int *jlo, int *klo, int *llo)
717
imax = nbfn / ichunk;
718
if (nbfn - ichunk * imax > 0)
721
printf("translate_task: itask negative: %ld imax: %ld nbfn: %ld ichunk: %d\n", itask, imax,
723
printf("probable GA int precision problem if imax^4 > 2^31\n");
725
printf("translate_task\n");
728
if (itask < pow(imax, 4)) {
730
itmp = (itask - (*ilo)) / imax;
732
itmp = (itmp - (*jlo)) / imax;
734
*llo = (itmp - (*klo)) / imax;
735
lo[0] = (*ilo) * ichunk;
736
lo[1] = (*jlo) * ichunk;
737
lo[2] = (*klo) * ichunk;
738
lo[3] = (*llo) * ichunk;
739
hi[0] = MIN(((*ilo) + 1) * ichunk - 1, nbfn);
740
hi[1] = MIN(((*jlo) + 1) * ichunk - 1, nbfn);
741
hi[2] = MIN(((*klo) + 1) * ichunk -1, nbfn);
742
hi[3] = MIN(((*llo) + 1) * ichunk - 1, nbfn);
751
int next_4chunk(int *lo,int *hi,int* ilo,int* jlo,int* klo,int* llo, long int *pitask)
760
itask = NGA_Read_inc(g_counter, &one, 1);
763
imax = nbfn / ichunk;
764
if (nbfn - ichunk * imax > 0)
767
printf("next_4chunk: itask negative: %ld imax: %ld nbfn: %ld ichunk: %d\n", itask, imax,
769
printf("probable GA int precision problem if imax^4 > 2^31\n");
771
printf("next_4chunk\n");
774
if (itask < pow(imax, 4)) {
776
itmp = (itask - (*ilo)) / imax;
778
itmp = (itmp - (*jlo)) / imax;
780
*llo = (itmp - (*klo)) / imax;
781
lo[0] = (*ilo) * ichunk;
782
lo[1] = (*jlo) * ichunk;
783
lo[2] = (*klo) * ichunk;
784
lo[3] = (*llo) * ichunk;
785
hi[0] = MIN(((*ilo) + 1) * ichunk - 1, nbfn);
786
hi[1] = MIN(((*jlo) + 1) * ichunk - 1, nbfn);
787
hi[2] = MIN(((*klo) + 1) * ichunk -1, nbfn);
788
hi[3] = MIN(((*llo) + 1) * ichunk - 1, nbfn);
797
void clean_chunk(double chunk[][ichunk])
801
for (i = 0; i < ichunk; i++)
802
for (j = 0; j < ichunk; j++)
806
int verify_zero_chunk(double chunk[][ichunk])
812
for (i = 0; (i < ichunk) && flag; i++)
813
for (j = 0; (j < ichunk) && flag; j++)
814
if (chunk[i][j] != 0.0) {
816
fprintf(stderr, "chunk[%d][%d] != 0.0, %.16f\n", i, j, chunk[i][j]);
823
void damp(double *fac)
825
// create damped density matrix as a linear combination of;
826
// old density matrix and density matrix formed from new orbitals;
828
//implicit declared variables in fortran
831
ofac = 1.00 - (*fac);
832
GA_Add(fac, g_dens, &ofac, g_work, g_dens);
836
void prnout(int iter, double energy, double deltad, double tester)
838
// printout results of each iteration;
839
if (GA_Nodeid() != 0)
842
printf(" iter= %3d, energy=%15.8f, deltad= %9.7f, deltaf=%9.7f\n",
843
iter, energy, deltad, tester);
850
double dens_c[ichunk][ichunk],work_c[ichunk][ichunk];
851
int lo[2], hi[2], i, j, ld;
854
// int lo_c[2],hi_c[2]; //column-major inter-patch switch
858
//implicit declared variables in fortran
860
// compute largest change in density matrix elements;
863
dotask = next_chunk(lo,hi);
866
//column-major inter-path switch
872
NGA_Get(g_dens, lo, hi, dens_c, &ld);
873
NGA_Get(g_work, lo, hi, work_c, &ld);
875
//column-major inter-patch switch
876
// NGA_Get(g_dens,lo_c,hi_c,dens_c,&ld);
877
// NGA_Get(g_work,lo_c,hi_c,work_c,&ld);
879
for (j = 0; j <= hi[1] - lo[1]; j++) {
880
for (i = 0; i <= hi[0] - lo[0]; i++) {
881
//xdiff = abs(dens_c[i][j]-work_c[i][j]);
882
//xdiff = abs_double(dens_c[j][i]-work_c[j][i]); //column-major intra-patch switch
883
xdiff = fabs(dens_c[i][j] - work_c[i][j]); //column-major intra-patch switch
888
dotask = next_chunk(lo, hi);
890
GA_Dgop(&denmax, 1, "max");
895
double testfock(void)
898
double work[ichunk][ichunk];
899
int lo[2], hi[2], i, j, iloc, jloc, ld;
902
// int lo_c[2],hi_c[2]; //column-major inter-patch switch
905
// compute largest change in density matrix elements;
908
dotask = next_chunk(lo, hi);
911
//column-major inter-patch switch
917
NGA_Get(g_fock, lo, hi, work, &ld);
918
// NGA_Get(g_fock, lo_c, hi_c, work, &ld);//column-major inter-patch switch
920
for (j = lo[1]; j <= hi[1]; j++) {
922
for (i = lo[0]; i <= hi[0]; i++) {
925
//xtmp = abs(work[iloc][jloc]);
926
// xtmp = abs_double(work[jloc][iloc]); //column-major intra-patch switch
927
xtmp = fabs(work[iloc][jloc]); //column-major intra-patch switch
933
dotask = next_chunk(lo, hi);
935
GA_Dgop(&xmax, 1, "max");
941
void shiftfock(double shift)
943
double work[ichunk][ichunk];
944
int lo[2], hi[2], i, j, iloc, jloc, ld, icnt;
947
// int lo_c[2],hi_c[2]; //column-major inter-patch switch
949
// compute largest change in density matrix elements;
951
dotask = next_chunk(lo, hi);
954
//column-major inter-path switch
960
// NGA_Get(g_fock, lo_c, hi_c, work, &ld); //column-major inter-patch switch
961
NGA_Get(g_fock, lo, hi, work, &ld);
963
for (j = lo[1]; j <= hi[1]; j++) {
965
for (i = lo[0]; i <= hi[0]; i++) {
967
if ((i == j) && (i > (nocc - 1))) {
968
work[iloc][jloc] = work[iloc][jloc] + shift;
969
// work[jloc][iloc] = work[jloc][iloc] + shift; //column-major inter-patch switch
975
//NGA_Put(g_fock, lo_c, hi_c, work, &ld);//column-major inter-patch switch
976
NGA_Put(g_fock, lo, hi, work, &ld);
977
dotask = next_chunk(lo, hi);
982
void prnfin(double energy)
984
double orbs[maxnbfn][maxnbfn];
987
// printout final results;
988
if (GA_Nodeid() != 0)
991
printf("\n\nfinal energy = %18.11f\n",energy);
992
printf("\neigenvalues\n\n");
993
output(eigv, 0, MIN(nbfn,nocc+5), 0, 1, nbfn, 1, 1);
994
//output(eigv, 1, MIN(nbfn,nocc+5), 1, 1, nbfn, 1, 1);
999
void g(double *value, int i, int j, int k, int l)
1001
//implicit declared variables in fortran
1002
double f0val, rab2, rcd2, facij,fackl,exijkl,denom, fac, xp,yp,zp,xq,yq,zq,rpq2;
1004
// compute the two electon integral (ij|kl) over normalized;
1005
// primitive 1s gaussians;
1007
rab2 = (x[i] - x[j]) * (x[i] - x[j]) + (y[i] - y[j]) * (y[i] - y[j]) +
1008
(z[i] - z[j]) * (z[i] - z[j]);
1009
rcd2 = (x[k] - x[l]) * (x[k] - x[l]) + (y[k] - y[l]) * (y[k] - y[l]) +
1010
(z[k] - z[l]) * (z[k] - z[l]);
1011
facij = expnt[i] * expnt[j] / (expnt[i] + expnt[j]);
1012
fackl = expnt[k] * expnt[l] / (expnt[k] + expnt[l]);
1013
exijkl = exprjh(-facij * rab2 - fackl * rcd2);
1014
denom = (expnt[i] + expnt[j]) * (expnt[k] + expnt[l]) *
1015
sqrt(expnt[i] + expnt[j] + expnt[k] + expnt[l]);
1016
fac = (expnt[i] + expnt[j]) * (expnt[k] + expnt[l]) /
1017
(expnt[i] + expnt[j] + expnt[k] + expnt[l]);
1019
xp = (x[i] * expnt[i] + x[j] * expnt[j]) / (expnt[i] + expnt[j]);
1020
yp = (y[i] * expnt[i] + y[j] * expnt[j]) / (expnt[i] + expnt[j]);
1021
zp = (z[i] * expnt[i] + z[j] * expnt[j]) / (expnt[i] + expnt[j]);
1022
xq = (x[k] * expnt[k] + x[l] * expnt[l]) / (expnt[k] + expnt[l]);
1023
yq = (y[k] * expnt[k] + y[l] * expnt[l]) / (expnt[k] + expnt[l]);
1024
zq = (z[k] * expnt[k] + z[l] * expnt[l]) / (expnt[k] + expnt[l]);
1025
rpq2 = (xp - xq) * (xp - xq) + (yp - yq) * (yp - yq) + (zp - zq) * (zp - zq);
1027
f0(&f0val, fac * rpq2);
1028
*value = (2.00 * pow(pi, 2.50) / denom) * exijkl * f0val *
1029
rnorm[i] * rnorm[j] * rnorm[k] * rnorm[l];
1033
void diagon(double *tester, int iter)
1035
// diagon(fock, orbs, evals, work, tester, iter);
1036
double r_zero, r_one, shift;
1038
//implicit declared variables in fortran
1041
double test = -1.0, s;
1046
// use similarity transform to solve standard eigenvalue problem;
1047
// (overlap matrix has been transformed out of the problem);
1050
GA_Dgemm('n', 'n', nbfn, nbfn, nbfn, r_one, g_fock, g_orbs, r_zero, g_tfock);
1054
GA_Dgop(&s, 1, "+");
1057
printf("g_tfock sum after first dgemm(): %f\n", s);
1060
GA_Dgemm('t', 'n', nbfn, nbfn, nbfn, r_one, g_orbs, g_tfock, r_zero, g_fock);
1064
GA_Dgop(&s, 1, "+");
1067
printf("g_fock sum after second dgemm(): %f\n", s);
1070
*tester = testfock();
1072
if ((*tester) > 0.30) {
1084
//if (iter>=2 && shift!=0.00)
1085
if (iter >= 1 && shift != 0.00) { //iter 2 in Fotran is iter 1 in C
1088
GA_Copy(g_orbs, g_tfock);
1089
GA_Diag_std_seq(g_fock, g_work, eigv);
1091
// Back transform eigenvectors;
1092
GA_Dgemm('n', 'n', nbfn, nbfn, nbfn, r_one, g_tfock, g_work, r_zero, g_orbs);
1094
if (iter>= 1 && shift != 0.00) { //>=2 --> >=1
1095
for (i = nocc; i < nbfn; i++) {
1096
eigv[i] = eigv[i] - shift;
1101
// Keep remaking overlap matrix since GA_Diag_seq does not;
1102
// guarantee that g_ident is preserved.;
1105
GA_Diag_seq(g_fock, g_ident, g_orbs, eigv);
1113
double work[ichunk][ichunk],orbs[ichunk][ichunk];
1114
double eval[maxnbfn];
1115
int lo[2],hi[2],ld,me,i,j,iloc,jloc;
1118
int lo_c[2],hi_c[2]; //for column-major switch
1119
// generate set of orthonormal vectors by creating a random;
1120
// symmetric matrix and solving associated generalized eigenvalue;
1121
// problem using the correct overlap matrix.;
1125
dotask = next_chunk(lo, hi);
1129
for (i = lo[0]; i <= hi[0]; i++) {
1131
for (j = lo[1]; j <= hi[1]; j++) {
1133
work[iloc][jloc] = s(i, j);
1134
//work[jloc][iloc] = s(i,j); //column-major intra-patch switch
1135
orbs[iloc][jloc] = 0.5;//drand48();
1136
//orbs[jloc][iloc] = 0.5; //column-major intra-patch switch
1140
NGA_Put(g_ident, lo, hi, work, &ld);
1141
NGA_Put(g_fock, lo, hi, orbs, &ld);
1146
// NGA_Put(g_ident,lo_c,hi_c,work,&ld);
1147
// NGA_Put(g_fock,lo_c,hi_c,orbs,&ld);
1149
dotask = next_chunk(lo, hi);
1151
GA_Symmetrize(g_fock);
1152
GA_Diag_seq(g_fock, g_ident, g_orbs, eval);
1158
// Form guess density from superposition of atomic densities in the AO;
1159
// basis set ... instead of doing the atomic SCF hardwire for this;
1160
// small basis set for the Be atom.;
1161
int one, itask, lo[2], hi[2], ld;
1163
double atdens[15][15] = {
1164
{0.000002,0.000027,0.000129,0.000428,0.000950,0.001180,
1165
0.000457,-0.000270,-0.000271,0.000004,0.000004,0.000004,
1166
0.000004,0.000004,0.000004},
1167
{0.000027,0.000102,0.000987,
1168
0.003269,0.007254,0.009007,0.003492,-0.002099,-0.002108,
1169
0.000035,0.000035,0.000035,0.000035,0.000035,0.000035},
1170
{0.000129,0.000987,0.002381,0.015766,0.034988,0.043433,
1171
0.016835,-0.010038,-0.010082,0.000166,0.000166,0.000166,
1172
0.000166,0.000166,0.000166},
1173
{0.000428,0.003269,0.015766,
1174
0.026100,0.115858,0.144064,0.055967,-0.035878,-0.035990,
1175
0.000584,0.000584,0.000584,0.000584,0.000584,0.000584},
1176
{0.000950,0.007254,0.034988,0.115858,0.128586,0.320120,
1177
0.124539,-0.083334,-0.083536,0.001346,0.001346,0.001346,
1178
0.001346,0.001346,0.001346},
1179
{0.001180,0.009007,0.043433,
1180
0.144064,0.320120,0.201952,0.159935,-0.162762,-0.162267,
1181
0.002471,0.002471,0.002471,0.002471,0.002471,0.002471},
1182
{0.000457,0.003492,0.016835,0.055967,0.124539,0.159935,
1183
0.032378,-0.093780,-0.093202,0.001372,0.001372,0.001372,
1184
0.001372,0.001372,0.001372},
1185
{-0.000270,-0.002099,-0.010038,
1186
-0.035878,-0.083334,-0.162762,-0.093780,0.334488,0.660918,
1187
-0.009090,-0.009090,-0.009090,-0.009090,-0.009090,-0.009090},
1188
{-0.000271,-0.002108,-0.010082,-0.035990,-0.083536,-0.162267,
1189
-0.093202,0.660918,0.326482,-0.008982,-0.008982,-0.008981,
1190
-0.008981,-0.008981,-0.008982},
1191
{0.000004,0.000035,0.000166,
1192
0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008982,
1193
0.000062,0.000124,0.000124,0.000124,0.000124,0.000124},
1194
{0.000004,0.000035,0.000166,0.000584,0.001346,0.002471,
1195
0.001372,-0.009090,-0.008982,0.000124,0.000062,0.000124,
1196
0.000124,0.000124,0.000124},
1197
{0.000004,0.000035,0.000166,
1198
0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008981,
1199
0.000124,0.000124,0.000062,0.000124,0.000124,0.000124},
1200
{0.000004,0.000035,0.000166,0.000584,0.001346,0.002471,
1201
0.001372,-0.009090,-0.008981,0.000124,0.000124,0.000124,
1202
0.000062,0.000124,0.000124},
1203
{0.000004,0.000035,0.000166,
1204
0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008981,
1205
0.000124,0.000124,0.000124,0.000124,0.000062,0.000124},
1206
{0.000004,0.000035,0.000166,0.000584,0.001346,0.002471,
1207
0.001372,-0.009090,-0.008982,0.000124,0.000124,0.000124,
1208
0.000124,0.000124,0.000062}};
1210
//implicit declared variables in fortran
1213
double atdens_f[15][15];
1215
// Create initial guess for density matrix in global array;
1222
// Correct for a factor of two along the diagonal;
1223
for (i = 0; i < ld; i++) {
1224
atdens[i][i] = 2.00 * atdens[i][i];
1227
//to get a fortran based array by switching the indices
1228
for (i = 0; i < 15; i++) {
1229
for (j = 0; j < 15; j++) {
1230
atdens_f[j][i] = atdens[i][j];
1234
itask = NGA_Read_inc(g_counter, &one, 1);
1236
while(itask < natom) {
1238
lo[0] = ioff + 0; //1--->0
1239
lo[1] = ioff + 0; //1--->0
1240
hi[0] = ioff + 14;//15--->14
1241
hi[1] = ioff + 14;//15--->14
1242
//NGA_Put(g_dens,lo,hi,atdens,&ld);
1243
NGA_Put(g_dens, lo, hi, atdens_f, &ld);
1244
itask = NGA_Read_inc(g_counter, &one, 1);
1250
void setarrays(void)
1252
int one, two, dims[2];
1259
nproc = GA_Nnodes();
1261
g_counter = GA_Create_handle();
1262
GA_Set_data(g_counter,one,&one,MT_INT);
1263
status = GA_Allocate(g_counter);
1266
g_proc = GA_Create_handle();
1267
GA_Set_data(g_proc, one, &nproc, MT_INT);
1268
status = GA_Allocate(g_proc);
1273
g_dens = GA_Create_handle();
1274
GA_Set_data(g_dens, two, dims, MT_DBL);
1275
status = GA_Allocate(g_dens);
1278
g_schwarz = GA_Create_handle();
1279
GA_Set_data(g_schwarz, two, dims, MT_DBL);
1280
status = GA_Allocate(g_schwarz);
1283
g_fock = GA_Create_handle();
1284
GA_Set_data(g_fock, two, dims, MT_DBL);
1285
status = GA_Allocate(g_fock);
1288
g_tfock = GA_Create_handle();
1289
GA_Set_data(g_tfock, two, dims, MT_DBL);
1290
status = GA_Allocate(g_tfock);
1293
g_work = GA_Create_handle();
1294
GA_Set_data(g_work, two, dims, MT_DBL);
1295
status = GA_Allocate(g_work);
1298
g_ident = GA_Create_handle();
1299
GA_Set_data(g_ident, two, dims, MT_DBL);
1300
status = GA_Allocate(g_ident);
1303
g_orbs = GA_Create_handle();
1304
GA_Set_data(g_orbs, two, dims, MT_DBL);
1305
status = GA_Allocate(g_orbs);
1308
//temporay global array for storage major transformation
1309
g_tmp = GA_Create_handle();
1310
GA_Set_data(g_tmp, two, dims, MT_DBL);
1311
status = GA_Allocate(g_tmp);
1317
void closearrays(void)
1319
GA_Destroy(g_counter);
1322
GA_Destroy(g_schwarz);
1324
GA_Destroy(g_tfock);
1326
GA_Destroy(g_ident);
1328
//temporay global array for storage major transformation
1334
void makoverlap(void) //not used due to USE_TRANSFORM
1336
int me, lo[2], hi[2], ld[2];
1338
double *ptr; //int--->double
1341
NGA_Distribution(g_ident, me, lo, hi);
1342
NGA_Access(g_ident, lo, hi, &ptr, ld);
1343
ld1 = hi[0] - lo[0] + 1;
1344
ld2 = hi[1] - lo[1] + 1;
1346
setoverlap(ptr,lo,hi,ld1,ld2);
1347
NGA_Release(g_ident,lo,hi);
1351
void setoverlap(double *a,int* lo,int* hi, int ld1,int ld2) //not used due to USE_TRANSFORM
1355
for (i = 0;i < ld1;i++)
1358
for (j = 0;j < ld2;j++)
1369
//a[i][j] = s[ii][jj];
1370
a[ld1*i+j] = s[ii][jj];
1377
void print_GA_block(int g_a) //not used
1379
int lo[2], hi[2], ld1, ld2, ld;
1380
double *ptr; //int--->double
1382
//implicit declared variables in fortran
1386
NGA_Distribution(g_a, me, lo, hi);
1387
ld1 = hi[0] - lo[0] + 1;
1388
ld2 = hi[1] - lo[1] + 1;
1389
NGA_Access(g_a, lo, hi, &ptr, &ld);
1390
dump_chunk(ptr,ld1,ld2);
1391
NGA_Release(g_a,lo,hi);
1395
void print_GA_block_ij(int g_a,int tlo) //not used
1397
int lo[2], hi[2], ld1, ld2, ld;
1398
double *ptr; //int--->double
1400
//implicit declared variables in fortran
1404
NGA_Distribution(g_a, me, lo, hi);
1405
ld1 = hi[0] - lo[0] + 1;
1406
ld2 = hi[1] - lo[1] + 1;
1407
NGA_Access(g_a, &tlo, hi, &ptr, &ld);
1408
dump_chunk(ptr,ld1,ld2);
1409
NGA_Release(g_a,lo,hi);
1413
void dump_chunk(double *a,int ld1,int ld2) //not used
1415
//implicit declared variables in fortran
1419
for (i = 0; i < MIN(10,ld1);i++)
1421
for (j = 0; j < MIN(10,ld2); j++)
1423
//printf("%10.4f\n",a[i][j]);
1424
printf("%10.4f\n",a[MIN(10,ld2)*i+j]);
1430
//trace = trace +a[i][i];
1431
trace = trace + a[MIN(10,ld2)*i+j];
1433
printf("trace= %10.4f\n",trace);
1437
double contract_matrices(int g_a, int g_b)
1439
int lo[2], hi[2], ptr_a, ptr_b, ld, ld1, ld2;
1440
double a[ichunk][ichunk],b[ichunk][ichunk];
1444
//implicit declared variables in fortran
1449
// evalute sum_ij a_ij*b_ij;
1452
dotask = next_chunk(lo, hi);
1455
NGA_Get(g_a, lo, hi, a, &ld);
1456
NGA_Get(g_b, lo, hi, b, &ld);
1458
for (i = 0; i <= hi[0] - lo[0]; i++) {
1459
for (j = 0; j <= hi[1] - lo[1]; j++) {
1460
value += a[i][j] * b[i][j];
1464
/* for (j = 0; j <= hi[0] - lo[0]; j++) //column-major switch */
1466
/* for (i = 0;i <= hi[1]-lo[1];i++) */
1468
/* value = value + a[j][i]*b[j][i]; */
1472
dotask = next_chunk(lo, hi);
1474
GA_Dgop(&value, 1, "+");
1480
double abs_double(double x)