2
C$Id: ft-scf.F,v 1.1 2005-07-07 17:41:07 vinod Exp $
3
implicit double precision (a-h, o-z)
9
#define USE_TRANSFORM 1
11
data tinit, tonel, ttwoel, tdiag, tdens, tprint /6*0.0d0/
12
data eone, etwo, energy, deltad /4*0.0d0/
14
c initalize the parallel message passing environment
22
call ga_set_spare_procs(2);
29
if (.not.ma_init(MT_DBL, stack, heap))
30
+ call ga_error("ma_init failed",-1)
34
c initialize a bunch of stuff and initial density matrix
38
c get input from file be.inpt
42
c create and allocate global arrays
45
write(6,*) 'done with setarrays'
47
write(6,*) '1-done with setarrays'
49
c create initial guess for density matrix by using single atom
56
c make initial orthogonal orbital set for solution method using
57
c similarity transform
62
c make info for sparsity test
69
call ga_igop(6, icut1, 1, '+')
70
call ga_igop(7, icut2, 1, '+')
71
call ga_igop(8, icut3, 1, '+')
74
c print out timing information
77
write(6,1) tinit, tonel, ttwoel, tdiag, tdens, tprint,
79
1 format(/' init onel twoel diag dens print ncpu'/
80
$ ' ------ ------ ------ ------ ------ ------ ------'/
83
c print out information on # integrals evaulated each iteration
85
nints = nnbfn*(nnbfn+1)/2
87
frac = dble(icut3)/dble(nints)
88
write(6,2) icut1, icut2, icut3, nints, frac
89
2 format(/' No. of integrals screened or computed '
90
$ /' -------------------------------------'/
91
$ /1x,' #ij test #kl test #compute #total',
93
$ /1x,' --------- --------- --------- ---------',
109
subroutine iterate(niter)
110
implicit double precision (a-h, o-z)
112
#include "mafdecls.fh"
115
do 10 iter = 1, niter
116
#ifdef CHECKPOINT_SCF
117
call ga_checkpoint_arrays(arraylist,numarrays)
120
c make the one particle contribution to the fock matrix
121
c and get the partial contribution to the energy
123
call oneel(schwmax, eone)
124
tonel = tonel + timer()
126
c compute the two particle contributions to the fock matrix and
127
c get the total energy.
129
call twoel(schwmax, etwo)
130
ttwoel = ttwoel + timer()
132
c Diagonalize the fock matrix. The diagonalizers used in this
133
c subroutine are actually sequential, not parallel.
135
call diagon(tester,iter)
136
tdiag = tdiag + timer()
138
c make the new density matrix in g_work from orbitals in g_orbs,
139
c compute the norm of the change in the density matrix and
140
c then update the density matrix in g_dens with damping.
146
else if (iter .le. 5) then
147
if (nbfn .gt. 60) then
156
tdens = tdens + timer()
158
c add up energy and print out convergence information
161
energy = enrep + eone + etwo
162
call prnout(iter, energy, deltad, tester)
163
tprint = tprint + timer()
166
c if converged then exit iteration loop
168
if (deltad .lt. tol) goto 20
171
$ write(6,*) ' SCF failed to converge in ', niter, ' iters'
173
c finished ... print out eigenvalues and occupied orbitals
178
subroutine makesz(schwmax)
179
implicit double precision (a-h, o-z)
181
#include "mafdecls.fh"
184
dimension work(ichunk,ichunk)
185
integer lo(2),hi(2),i,j,iloc,jloc,ld
188
c schwarz(ij) = (ij|ij) for sparsity test
194
call ga_zero(g_schwarz)
195
call ga_zero(g_counter)
197
dotask = next_chunk(lo,hi)
205
work(iloc,jloc) = sqrt(gg)
206
schwmax = max(schwmax, work(iloc,jloc))
209
call nga_put(g_schwarz,lo,hi,work,ld)
210
dotask = next_chunk(lo,hi)
212
call ga_dgop(11,schwmax,1,'max')
218
implicit double precision (a-h, o-z)
220
#include "mafdecls.fh"
224
c write a little welcome message
226
if (ga_nodeid().eq.0) write(6,1) natom, nocc, nbfn, tol
227
1 format(/' Example Direct Self Consistent Field Program '/
228
$ ' -------------------------------------------- '//
229
$ ' no. of atoms ............... ',i3/
230
$ ' no. of occupied orbitals ... ',i3/
231
$ ' no. of basis functions ..... ',i3/
232
$ ' convergence threshold ...... ',d9.2//)
234
c generate normalisation coefficients for the basis functions
235
c and the index array iky
242
rnorm(i) = (expnt(i)*2.0d0/pi)**0.75d0
245
c initialize common for computing f0
250
double precision function h(i,j)
251
implicit double precision (a-h, o-z)
253
#include "mafdecls.fh"
259
c generate the one particle hamiltonian matrix element
260
c over the normalized primitive 1s functions i and j
264
rab2 = (x(i)-x(j))**2 + (y(i)-y(j))**2 + (z(i)-z(j))**2
265
facij = expnt(i)*expnt(j)/(expnt(i)+expnt(j))
266
expij = exprjh(-facij*rab2)
267
repij = (2.0d0*pi/(expnt(i)+expnt(j))) * expij
269
c first do the nuclear attraction integrals
272
xp = (x(i)*expnt(i) + x(j)*expnt(j))/(expnt(i)+expnt(j))
273
yp = (y(i)*expnt(i) + y(j)*expnt(j))/(expnt(i)+expnt(j))
274
zp = (z(i)*expnt(i) + z(j)*expnt(j))/(expnt(i)+expnt(j))
275
rpc2 = (xp-ax(iat))**2 + (yp-ay(iat))**2 + (zp-az(iat))**2
277
call f0(f0val, (expnt(i)+expnt(j))*rpc2)
278
sum = sum - repij * q(iat) * f0val
281
c add on the kinetic energy term
283
sum = sum + facij*(3.0d0-2.0d0*facij*rab2) *
284
$ (pi/(expnt(i)+expnt(j)))**1.5d0 * expij
286
c finally multiply by the normalization constants
288
h = sum * rnorm(i) * rnorm(j)
291
double precision function s(i,j)
292
implicit double precision (a-h, o-z)
294
#include "mafdecls.fh"
298
c generate the overlap matrix element between the normalized
299
c primitve gaussian 1s functions i and j
301
rab2 = (x(i)-x(j))**2 + (y(i)-y(j))**2 + (z(i)-z(j))**2
302
facij = expnt(i)*expnt(j)/(expnt(i)+expnt(j))
303
s = (pi/(expnt(i)+expnt(j)))**1.5d0 * exprjh(-facij*rab2) *
308
implicit double precision (a-h, o-z)
310
#include "mafdecls.fh"
313
c dimension work(maxnbfn,maxnbfn), torbs(maxnbfn,maxnbfn)
314
dimension work(ichunk,ichunk), orbsi(ichunk,maxnbfn)
315
dimension orbsj(ichunk,maxnbfn)
316
integer lo(2), hi(2), tlo(2), thi(2), i, j, iloc, jloc, ld
319
c generate density matrix from orbitals in g_orbs. the first
320
c nocc orbitals are doubly occupied.
322
call ga_zero(g_counter)
323
dotask = next_chunk(lo,hi)
330
call nga_get(g_orbs,tlo,thi,orbsi,ld)
333
call nga_get(g_orbs,tlo,thi,orbsj,ld)
340
p = p + orbsi(iloc,k)*orbsj(jloc,k)
342
work(iloc,jloc) = 2.0d00*p
345
call nga_put(g_work,lo,hi,work,ld)
346
dotask = next_chunk(lo,hi)
351
subroutine oneel(schwmax, eone)
352
implicit double precision (a-h, o-z)
354
#include "mafdecls.fh"
357
integer lo(2), hi(2), i, j, iloc, jloc, ld
358
dimension dens(nnbfn), fock(nnbfn), schwarz(nnbfn)
359
dimension work(ichunk,ichunk),tfock(ichunk,ichunk)
362
c fill in the one-electron part of the fock matrix and
363
c compute the one-electron energy contribution
368
call ga_zero(g_counter)
369
dotask = next_chunk(lo,hi)
372
call nga_get(g_schwarz,lo,hi,work,ld)
377
tfock(iloc,jloc) = 0.0d00
378
if (work(iloc,jloc)*schwmax.gt.tol2e)
379
+ tfock(iloc,jloc) = h(i,j)
382
call nga_put(g_fock,lo,hi,tfock,ld)
383
dotask = next_chunk(lo,hi)
385
eone = 0.5d00*contract_matrices(g_fock,g_dens)
388
integer function nxtask(nproc)
389
parameter (ichunk = 10)
391
data nleft, icount /0, 0/
393
c wrapper round nxtval() to increase granularity
394
c and thus reduce no. of requests to shared counter
398
icount = nxtval(nproc) * ichunk
410
c following does dumb static load balancing
412
c$$$ if(nproc.gt.0) then
413
c$$$ if (nleft .eq. 0) then
414
c$$$ icount = ga_nodeid()
418
c$$$ icount = icount + ga_nnodes()
425
logical function next_chunk(lo,hi)
429
integer imax, lo(2), hi(2), ilo, jlo
430
itask = nga_read_inc(g_counter,one,one)
432
if (itask.lt.imax*imax) then
433
if (nbfn - ichunk*imax.gt.0) imax = imax + 1
434
ilo = mod(itask,imax)
435
jlo = (itask-ilo)/imax
436
lo(1) = ilo*ichunk + 1
437
lo(2) = jlo*ichunk + 1
438
hi(1) = min((ilo+1)*ichunk,nbfn)
439
hi(2) = min((jlo+1)*ichunk,nbfn)
447
logical function next_4chunk(lo,hi,ilo,jlo,klo,llo)
451
integer imax, lo(4), hi(4), ilo, jlo, klo, llo, itmp
452
itask = nga_read_inc(g_counter,one,one)
454
if (nbfn - ichunk*imax.gt.0) imax = imax + 1
455
if (itask.lt.imax**4) then
456
ilo = mod(itask,imax)
457
itmp = (itask - ilo)/imax
459
itmp = (itmp - jlo)/imax
461
llo = (itmp - klo)/imax
462
lo(1) = ilo*ichunk + 1
463
lo(2) = jlo*ichunk + 1
464
lo(3) = klo*ichunk + 1
465
lo(4) = llo*ichunk + 1
466
hi(1) = min((ilo+1)*ichunk,nbfn)
467
hi(2) = min((jlo+1)*ichunk,nbfn)
468
hi(3) = min((klo+1)*ichunk,nbfn)
469
hi(4) = min((llo+1)*ichunk,nbfn)
472
next_4chunk = .false.
477
subroutine clean_chunk(chunk)
479
double precision chunk(ichunk,ichunk)
489
subroutine twoel(schwmax, etwo)
490
implicit double precision (a-h, o-z)
492
#include "mafdecls.fh"
495
double precision f_ij(ichunk,ichunk),d_kl(ichunk,ichunk)
496
double precision f_ik(ichunk,ichunk),d_jl(ichunk,ichunk)
497
double precision s_ij(ichunk,ichunk),s_kl(ichunk,ichunk)
498
double precision schwmax, one
499
integer nproc,lo(4),hi(4),ld,ich
500
integer lo_ik(2),hi_ik(2),lo_jl(2),hi_jl(2)
501
integer i,j,k,l,iloc,jloc,kloc,lloc,it,jt,kt,lt
502
logical dotask, next_4chunk
504
c add in the two-electron contribution to the fock matrix
509
call ga_zero(g_counter)
512
dotask = next_4chunk(lo,hi,it,jt,kt,lt)
523
call nga_get(g_schwarz,lo,hi,s_ij,ich)
524
call nga_get(g_schwarz,lo(3),hi(3),s_kl,ich)
525
call nga_get(g_dens,lo(3),hi(3),d_kl,ich)
526
call nga_get(g_dens,lo_jl,hi_jl,d_jl,ich)
528
call clean_chunk(f_ij)
529
call clean_chunk(f_ik)
534
if (s_ij(iloc,jloc)*schwmax .lt. tol2e) then
535
icut1 = icut1 + (hi(1)-lo(1)+1)*(hi(2)-lo(2)+1)
541
if (s_ij(iloc,jloc)*s_kl(kloc,lloc).lt.tol2e) then
544
call g(gg, i, j, k, l)
545
f_ij(iloc,jloc) = f_ij(iloc,jloc)
546
+ + gg*d_kl(kloc,lloc)
547
f_ik(iloc,kloc) = f_ik(iloc,kloc)
548
+ - 0.5d00*gg*d_jl(jloc,lloc)
556
call nga_acc(g_fock,lo,hi,f_ij,ich,one)
557
call nga_acc(g_fock,lo_ik,hi_ik,f_ik,ich,one)
558
dotask = next_4chunk(lo,hi,it,jt,kt,lt)
560
etwo = 0.5d00*contract_matrices(g_fock,g_dens)
565
implicit double precision (a-h, o-z)
567
#include "mafdecls.fh"
571
c create damped density matrix as a linear combination of
572
c old density matrix and density matrix formed from new orbitals
575
call ga_add(fac,g_dens,ofac,g_work,g_dens)
579
subroutine prnout(iter, energy, deltad, tester)
580
implicit double precision (a-h, o-z)
582
#include "mafdecls.fh"
586
c printout results of each iteration
588
if (ga_nodeid().ne.0) return
589
write(6,1) iter, energy, deltad, tester
591
1 format(' iter=',i3,', energy=',f13.8,', deltad=',d9.2,
596
double precision function dendif()
597
implicit double precision (a-h, o-z)
599
#include "mafdecls.fh"
602
double precision xdiff
603
dimension dens_c(ichunk,ichunk),work_c(ichunk,ichunk)
604
integer lo(2), hi(2), i, j, ld
607
c compute largest change in density matrix elements
610
call ga_zero(g_counter)
611
dotask = next_chunk(lo,hi)
614
call nga_get(g_dens,lo,hi,dens_c,ld)
615
call nga_get(g_work,lo,hi,work_c,ld)
616
do j = 1, hi(2)-lo(2)+1
617
do i = 1, hi(1)-lo(1)+1
618
xdiff = abs(dens_c(i,j)-work_c(i,j))
619
if (xdiff.gt.denmax) denmax = xdiff
622
dotask = next_chunk(lo,hi)
624
call ga_dgop(1,denmax,1,'max')
629
double precision function testfock()
630
implicit double precision (a-h, o-z)
632
#include "mafdecls.fh"
635
double precision xmax, xtmp
636
dimension work(ichunk,ichunk)
637
integer lo(2), hi(2), i, j, iloc, jloc, ld
640
c compute largest change in density matrix elements
643
call ga_zero(g_counter)
644
dotask = next_chunk(lo,hi)
647
call nga_get(g_fock,lo,hi,work,ld)
653
xtmp = abs(work(iloc,jloc))
654
if (xtmp.gt.xmax) xmax = xtmp
658
dotask = next_chunk(lo,hi)
660
call ga_dgop(1,xmax,1,'max')
665
subroutine shiftfock(shift)
666
implicit double precision (a-h, o-z)
668
#include "mafdecls.fh"
671
double precision shift
672
dimension work(ichunk,ichunk)
673
integer lo(2), hi(2), i, j, iloc, jloc, ld, icnt
676
c compute largest change in density matrix elements
678
call ga_zero(g_counter)
679
dotask = next_chunk(lo,hi)
682
call nga_get(g_fock,lo,hi,work,ld)
688
if (i.eq.j.and.i.gt.nocc) then
689
work(iloc,jloc) = work(iloc,jloc) + shift
694
if (icnt.gt.0) call nga_put(g_fock,lo,hi,work,ld)
695
dotask = next_chunk(lo,hi)
700
subroutine prnfin(energy)
701
implicit double precision (a-h, o-z)
703
#include "mafdecls.fh"
706
dimension orbs(maxnbfn, maxnbfn)
707
integer lo(2),hi(2),ld
709
c printout final results
711
if (ga_nodeid().ne.0) return
713
1 format(//' final energy = ',f16.11//' eigenvalues')
714
call output(eigv, 1, min(nbfn,nocc+5), 1, 1, nbfn, 1, 1)
716
2 format(//' eigenvectors ')
722
call nga_get(g_orbs,lo,hi,orbs,ld)
723
call output(orbs, 1, nbfn, 1, nocc, nbfn, nbfn, 1)
727
subroutine g(value,i,j,k,l)
728
implicit double precision (a-h, o-z)
730
#include "mafdecls.fh"
734
c compute the two electon integral (ij|kl) over normalized
735
c primitive 1s gaussians
738
rab2 = (x(i)-x(j))**2 + (y(i)-y(j))**2 + (z(i)-z(j))**2
739
rcd2 = (x(k)-x(l))**2 + (y(k)-y(l))**2 + (z(k)-z(l))**2
740
facij = expnt(i)*expnt(j)/(expnt(i)+expnt(j))
741
fackl = expnt(k)*expnt(l)/(expnt(k)+expnt(l))
742
exijkl = exprjh(- facij*rab2 - fackl*rcd2)
743
denom = (expnt(i)+expnt(j))*(expnt(k)+expnt(l)) *
744
$ sqrt(expnt(i)+expnt(j)+expnt(k)+expnt(l))
745
fac = (expnt(i)+expnt(j))*(expnt(k)+expnt(l)) /
746
$ (expnt(i)+expnt(j)+expnt(k)+expnt(l))
748
xp = (x(i)*expnt(i) + x(j)*expnt(j))/(expnt(i)+expnt(j))
749
yp = (y(i)*expnt(i) + y(j)*expnt(j))/(expnt(i)+expnt(j))
750
zp = (z(i)*expnt(i) + z(j)*expnt(j))/(expnt(i)+expnt(j))
751
xq = (x(k)*expnt(k) + x(l)*expnt(l))/(expnt(k)+expnt(l))
752
yq = (y(k)*expnt(k) + y(l)*expnt(l))/(expnt(k)+expnt(l))
753
zq = (z(k)*expnt(k) + z(l)*expnt(l))/(expnt(k)+expnt(l))
754
rpq2 = (xp-xq)**2 + (yp-yq)**2 + (zp-zq)**2
756
call f0(f0val, fac*rpq2)
757
value = (2.0d0 * pi**2.5d0 / denom) * exijkl * f0val *
758
$ rnorm(i)*rnorm(j)*rnorm(k)*rnorm(l)
762
subroutine diagon(tester, iter)
763
c subroutine diagon(fock, orbs, evals, work, tester, iter)
764
implicit double precision (a-h, o-z)
766
#include "mafdecls.fh"
769
double precision r_zero, r_one, shift, tester
773
c use similarity transform to solve standard eigenvalue problem
774
c (overlap matrix has been transformed out of the problem)
778
call ga_dgemm('n','n',nbfn,nbfn,nbfn,r_one,g_fock,g_orbs,
780
call ga_dgemm('t','n',nbfn,nbfn,nbfn,r_one,g_orbs,g_tfock,
784
if (tester.gt.0.3d0) then
787
if (iter.ge.2.and.shift.ne.0.0d00) then
788
call shiftfock(shift)
790
call ga_copy(g_orbs,g_tfock)
791
call ga_diag_std_seq(g_fock, g_work, eigv)
793
c Back transform eigenvectors
795
call ga_dgemm('n','n',nbfn,nbfn,nbfn,r_one,g_tfock,g_work,
797
if (iter.ge.2.and.shift.ne.0.0d00) then
798
do 50 i = nocc+1, nbfn
799
eigv(i) = eigv(i) - shift
804
c Keep remaking overlap matrix since ga_diag_seq does not
805
c guarantee that g_ident is preserved.
808
call ga_diag_seq(g_fock, g_ident, g_orbs, eigv)
815
implicit double precision (a-h, o-z)
817
#include "mafdecls.fh"
820
double precision work(ichunk,ichunk),orbs(ichunk,ichunk)
821
double precision eval(maxnbfn)
822
integer lo(2),hi(2),ld,me,i,j,iloc,jloc
825
c generate set of orthonormal vectors by creating a random
826
c symmetric matrix and solving associated generalized eigenvalue
827
c problem using the correct overlap matrix.
830
call ga_zero(g_counter)
831
dotask = next_chunk(lo,hi)
838
work(iloc,jloc) = s(i,j)
839
orbs(iloc,jloc) = drand48(0)
842
call nga_put(g_ident,lo,hi,work,ld)
843
call nga_put(g_fock,lo,hi,orbs,ld)
844
dotask = next_chunk(lo,hi)
846
call ga_symmetrize(g_fock)
847
call ga_diag_seq(g_fock, g_ident, g_orbs, eval)
853
implicit double precision (a-h, o-z)
855
#include "mafdecls.fh"
859
c Form guess density from superposition of atomic densities in the AO
860
c basis set ... instead of doing the atomic SCF hardwire for this
861
c small basis set for the Be atom.
863
integer one, itask, lo(2), hi(2), ld
864
dimension atdens(15,15)
866
$ 0.000002,0.000027,0.000129,0.000428,0.000950,0.001180,
867
$ 0.000457,-0.000270,-0.000271,0.000004,0.000004,0.000004,
868
$ 0.000004,0.000004,0.000004,0.000027,0.000102,0.000987,
869
$ 0.003269,0.007254,0.009007,0.003492,-0.002099,-0.002108,
870
$ 0.000035,0.000035,0.000035,0.000035,0.000035,0.000035,
871
$ 0.000129,0.000987,0.002381,0.015766,0.034988,0.043433,
872
$ 0.016835,-0.010038,-0.010082,0.000166,0.000166,0.000166,
873
$ 0.000166,0.000166,0.000166,0.000428,0.003269,0.015766,
874
$ 0.026100,0.115858,0.144064,0.055967,-0.035878,-0.035990,
875
$ 0.000584,0.000584,0.000584,0.000584,0.000584,0.000584,
876
$ 0.000950,0.007254,0.034988,0.115858,0.128586,0.320120,
877
$ 0.124539,-0.083334,-0.083536,0.001346,0.001346,0.001346,
878
$ 0.001346,0.001346,0.001346,0.001180,0.009007,0.043433,
879
$ 0.144064,0.320120,0.201952,0.159935,-0.162762,-0.162267,
880
$ 0.002471,0.002471,0.002471,0.002471,0.002471,0.002471,
881
$ 0.000457,0.003492,0.016835,0.055967,0.124539,0.159935,
882
$ 0.032378,-0.093780,-0.093202,0.001372,0.001372,0.001372,
883
$ 0.001372,0.001372,0.001372,-0.000270,-0.002099,-0.010038,
884
$ -0.035878,-0.083334,-0.162762,-0.093780,0.334488,0.660918,
885
$ -0.009090,-0.009090,-0.009090,-0.009090,-0.009090,-0.009090,
886
$ -0.000271,-0.002108,-0.010082,-0.035990,-0.083536,-0.162267,
887
$ -0.093202,0.660918,0.326482,-0.008982,-0.008982,-0.008981,
888
$ -0.008981,-0.008981,-0.008982,0.000004,0.000035,0.000166,
889
$ 0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008982,
890
$ 0.000062,0.000124,0.000124,0.000124,0.000124,0.000124,
891
$ 0.000004,0.000035,0.000166,0.000584,0.001346,0.002471,
892
$ 0.001372,-0.009090,-0.008982,0.000124,0.000062,0.000124,
893
$ 0.000124,0.000124,0.000124,0.000004,0.000035,0.000166,
894
$ 0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008981,
895
$ 0.000124,0.000124,0.000062,0.000124,0.000124,0.000124,
896
$ 0.000004,0.000035,0.000166,0.000584,0.001346,0.002471,
897
$ 0.001372,-0.009090,-0.008981,0.000124,0.000124,0.000124,
898
$ 0.000062,0.000124,0.000124,0.000004,0.000035,0.000166,
899
$ 0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008981,
900
$ 0.000124,0.000124,0.000124,0.000124,0.000062,0.000124,
901
$ 0.000004,0.000035,0.000166,0.000584,0.001346,0.002471,
902
$ 0.001372,-0.009090,-0.008982,0.000124,0.000124,0.000124,
903
$ 0.000124,0.000124,0.000062/
905
c Create initial guess for density matrix in global array
908
call ga_zero(g_counter)
912
c Correct for a factor of two along the diagonal
915
atdens(i,i) = 2.0d00*atdens(i,i)
917
itask = nga_read_inc(g_counter,one,one)
918
do while(itask.lt.natom)
924
call nga_put(g_dens,lo,hi,atdens,ld)
925
itask = nga_read_inc(g_counter,one,one)
932
implicit double precision (a-h, o-z)
934
#include "mafdecls.fh"
937
integer one, two, dims(2)
941
g_counter = ga_create_handle()
942
call ga_set_data(g_counter,one,one,MT_INT)
943
status = ga_allocate(g_counter)
944
call ga_zero(g_counter)
948
g_dens = ga_create_handle()
949
call ga_set_data(g_dens, two, dims, MT_DBL)
950
status = ga_allocate(g_dens)
953
g_schwarz = ga_create_handle()
954
call ga_set_data(g_schwarz, two, dims, MT_DBL)
955
status = ga_allocate(g_schwarz)
956
call ga_zero(g_schwarz)
958
g_fock = ga_create_handle()
959
call ga_set_data(g_fock, two, dims, MT_DBL)
960
status = ga_allocate(g_fock)
963
g_tfock = ga_create_handle()
964
call ga_set_data(g_tfock, two, dims, MT_DBL)
965
status = ga_allocate(g_tfock)
966
call ga_zero(g_tfock)
968
g_work = ga_create_handle()
969
call ga_set_data(g_work, two, dims, MT_DBL)
970
status = ga_allocate(g_work)
972
#ifdef CHECKPOINT_SCF
973
arraylist(1) = g_counter
974
arraylist(2) = g_dens
975
arraylist(3) = g_schwarz
976
arraylist(4) = g_fock
977
arraylist(5) = g_work
978
write (6,*) arraylist(1),arraylist(2),arraylist(3)
979
call ga_checkpoint_arrays(arraylist,numarrays)
982
g_ident = ga_create_handle()
983
call ga_set_data(g_ident, two, dims, MT_DBL)
984
status = ga_allocate(g_ident)
985
call ga_zero(g_ident)
987
g_orbs = ga_create_handle()
988
call ga_set_data(g_orbs, two, dims, MT_DBL)
989
status = ga_allocate(g_orbs)
994
subroutine closearrays
995
implicit double precision (a-h, o-z)
997
#include "mafdecls.fh"
1002
status = ga_destroy(g_counter)
1003
status = ga_destroy(g_dens)
1004
status = ga_destroy(g_schwarz)
1005
status = ga_destroy(g_fock)
1006
status = ga_destroy(g_tfock)
1007
status = ga_destroy(g_work)
1008
status = ga_destroy(g_ident)
1009
status = ga_destroy(g_orbs)
1014
subroutine makoverlap
1015
implicit double precision (a-h, o-z)
1017
#include "mafdecls.fh"
1018
#include "global.fh"
1019
#include "tcgmsg.fh"
1020
integer me, lo(2), hi(2), ptr, ld(2)
1023
call nga_distribution(g_ident, me, lo, hi)
1024
call nga_access(g_ident, lo, hi, ptr, ld)
1025
ld1 = hi(1) - lo(1) + 1
1026
ld2 = hi(2) - lo(2) + 1
1027
call setoverlap(dbl_mb(ptr),lo,hi,ld1,ld2)
1028
call nga_release(g_ident)
1032
subroutine setoverlap(a,lo,hi,ld1,ld2)
1034
#include "mafdecls.fh"
1035
#include "global.fh"
1036
#include "tcgmsg.fh"
1037
integer lo(2), hi(2)
1038
integer ld1, ld2, ii, jj
1039
double precision a(ld1,ld2)
1058
subroutine print_ga_block(g_a)
1059
implicit double precision(a-h,o-z)
1061
#include "mafdecls.fh"
1062
#include "global.fh"
1063
#include "tcgmsg.fh"
1064
integer lo(2), hi(2), ptr, ld1, ld2
1067
call nga_distribution(g_a, me, lo, hi)
1068
ld1 = hi(1) - lo(1) + 1
1069
ld2 = hi(2) - lo(2) + 1
1070
call nga_access(g_a, lo, hi, ptr, ld)
1071
call dump_chunk(dbl_mb(ptr),ld1,ld2)
1072
call nga_release(g_a)
1077
subroutine print_ga_block_ij(g_a,tlo)
1078
implicit double precision(a-h,o-z)
1080
#include "mafdecls.fh"
1081
#include "global.fh"
1082
#include "tcgmsg.fh"
1083
integer lo(2), hi(2), ptr, ld1, ld2
1086
call nga_distribution(g_a, me, lo, hi)
1087
ld1 = hi(1) - lo(1) + 1
1088
ld2 = hi(2) - lo(2) + 1
1089
call nga_access(g_a, tlo, hi, ptr, ld)
1090
call dump_chunk(dbl_mb(ptr),ld1,ld2)
1091
call nga_release(g_a)
1096
subroutine dump_chunk(a,ld1,ld2)
1097
implicit double precision (a-h, o-z)
1099
#include "mafdecls.fh"
1100
#include "global.fh"
1101
#include "tcgmsg.fh"
1103
double precision a(ld1, ld2)
1104
do i = 1, min(10,ld1)
1105
write(6,100) (a(i,j), j = 1, min(10,ld2))
1112
double precision function contract_matrices(g_a,g_b)
1113
implicit double precision(a-h,o-z)
1115
#include "mafdecls.fh"
1116
#include "global.fh"
1117
#include "tcgmsg.fh"
1118
integer lo(2), hi(2), ptr_a, ptr_b, ld, ld1, ld2
1119
double precision a(ichunk,ichunk),b(ichunk,ichunk)
1120
double precision value
1123
c evalute sum_ij a_ij*b_ij
1126
call ga_zero(g_counter)
1127
dotask = next_chunk(lo,hi)
1130
call nga_get(g_a,lo,hi,a,ld)
1131
call nga_get(g_b,lo,hi,b,ld)
1132
do j = 1, hi(2)-lo(2)+1
1133
do i = 1, hi(1)-lo(1)+1
1134
value = value + a(i,j)*b(i,j)
1137
dotask = next_chunk(lo,hi)
1139
call ga_dgop(3,value,1,'+')
1140
contract_matrices=value