~ubuntu-branches/ubuntu/saucy/nwchem/saucy

« back to all changes in this revision

Viewing changes to src/tools/ga-4-3/global/examples/scf/ft-scf.F

  • Committer: Package Import Robot
  • Author(s): Michael Banck, Michael Banck, Daniel Leidert
  • Date: 2012-02-09 20:02:41 UTC
  • mfrom: (1.1.1)
  • Revision ID: package-import@ubuntu.com-20120209200241-jgk03qfsphal4ug2
Tags: 6.1-1
* New upstream release.

[ Michael Banck ]
* debian/patches/02_makefile_flags.patch: Updated.
* debian/patches/02_makefile_flags.patch: Use internal blas and lapack code.
* debian/patches/02_makefile_flags.patch: Define GCC4 for LINUX and LINUX64
  (Closes: #632611 and LP: #791308).
* debian/control (Build-Depends): Added openssh-client.
* debian/rules (USE_SCALAPACK, SCALAPACK): Removed variables (Closes:
  #654658).
* debian/rules (LIBDIR, USE_MPIF4, ARMCI_NETWORK): New variables.
* debian/TODO: New file.
* debian/control (Build-Depends): Removed libblas-dev, liblapack-dev and
  libscalapack-mpi-dev.
* debian/patches/04_show_testsuite_diff_output.patch: New patch, shows the
  diff output for failed tests.
* debian/patches/series: Adjusted.
* debian/testsuite: Optionally run all tests if "all" is passed as option.
* debian/rules: Run debian/testsuite with "all" if DEB_BUILD_OPTIONS
  contains "checkall".

[ Daniel Leidert ]
* debian/control: Used wrap-and-sort. Added Vcs-Svn and Vcs-Browser fields.
  (Priority): Moved to extra according to policy section 2.5.
  (Standards-Version): Bumped to 3.9.2.
  (Description): Fixed a typo.
* debian/watch: Added.
* debian/patches/03_hurd-i386_define_path_max.patch: Added.
  - Define MAX_PATH if not defines to fix FTBFS on hurd.
* debian/patches/series: Adjusted.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
      program scf
 
2
C$Id: ft-scf.F,v 1.1 2005-07-07 17:41:07 vinod Exp $
 
3
      implicit double precision (a-h, o-z)
 
4
      include 'cscf.h'
 
5
#include "mafdecls.fh"
 
6
#include "global.fh"
 
7
#include "tcgmsg.fh"
 
8
 
 
9
#define USE_TRANSFORM 1
 
10
      integer heap, stack
 
11
      data tinit, tonel, ttwoel, tdiag, tdens, tprint /6*0.0d0/
 
12
      data eone, etwo, energy, deltad /4*0.0d0/
 
13
c     
 
14
c     initalize the parallel message passing environment
 
15
c     
 
16
#ifdef MPI
 
17
      integer ierr
 
18
      call mpi_init(ierr)
 
19
#else
 
20
      call pbeginf
 
21
#endif
 
22
      call ga_set_spare_procs(2);
 
23
      call ga_initialize()
 
24
c
 
25
c   Allocate memory
 
26
c
 
27
      heap = 2000000
 
28
      stack = 2000000
 
29
      if (.not.ma_init(MT_DBL, stack, heap))
 
30
     +   call ga_error("ma_init failed",-1)
 
31
      me = ga_nodeid()
 
32
      nproc = ga_nnodes()
 
33
c     
 
34
c     initialize a bunch of stuff and initial density matrix
 
35
c     
 
36
      rjunk = timer()
 
37
c
 
38
c     get input from file be.inpt
 
39
c
 
40
      call input
 
41
c
 
42
c     create and allocate global arrays
 
43
c
 
44
      call setarrays
 
45
      write(6,*) 'done with setarrays'
 
46
      call ininrm
 
47
      write(6,*) '1-done with setarrays'
 
48
c
 
49
c     create initial guess for density matrix by using single atom
 
50
c     densities
 
51
c
 
52
      call denges
 
53
      tinit = timer()
 
54
#if USE_TRANSFORM
 
55
c
 
56
c     make initial orthogonal orbital set for solution method using
 
57
c     similarity transform
 
58
c
 
59
      call makeob
 
60
#endif
 
61
c     
 
62
c     make info for sparsity test
 
63
c     
 
64
         call makesz(schwmax)
 
65
c     
 
66
c     iterate
 
67
      call iterate(10)
 
68
c     
 
69
      call ga_igop(6, icut1, 1, '+')
 
70
      call ga_igop(7, icut2, 1, '+')
 
71
      call ga_igop(8, icut3, 1, '+')
 
72
      if (me.eq.0) then
 
73
c     
 
74
c     print out timing information
 
75
c     
 
76
         call prnfin(energy)
 
77
         write(6,1) tinit, tonel, ttwoel, tdiag, tdens, tprint,
 
78
     $        nproc
 
79
 1       format(/'   init   onel  twoel   diag   dens   print  ncpu'/
 
80
     $        '  ------ ------ ------ ------ ------ ------ ------'/
 
81
     $        1x, 6f7.2, i7/)
 
82
c     
 
83
c     print out information on # integrals evaulated each iteration
 
84
c     
 
85
         nints = nnbfn*(nnbfn+1)/2
 
86
         nints = nbfn**4
 
87
         frac  = dble(icut3)/dble(nints)
 
88
         write(6,2) icut1, icut2, icut3, nints, frac
 
89
 2       format(/'       No. of integrals screened or computed '
 
90
     $        /'       -------------------------------------'/
 
91
     $        /1x,'   #ij test   #kl test   #compute     #total',
 
92
     $        '  fraction',
 
93
     $        /1x,'  ---------  ---------  ---------  ---------',
 
94
     $        '  --------',
 
95
     $        /1x,4(2x,i9),f9.3)
 
96
         call stats
 
97
      endif
 
98
c     
 
99
      call closearrays
 
100
      call ga_terminate
 
101
#ifdef MPI
 
102
      call mpi_finalize
 
103
#else
 
104
      call pend
 
105
#endif
 
106
c     
 
107
      end
 
108
c
 
109
      subroutine iterate(niter)
 
110
      implicit double precision (a-h, o-z)
 
111
      include 'cscf.h'
 
112
#include "mafdecls.fh"
 
113
#include "global.fh"
 
114
#include "tcgmsg.fh"
 
115
      do 10 iter = 1, niter
 
116
#ifdef CHECKPOINT_SCF
 
117
         call ga_checkpoint_arrays(arraylist,numarrays)
 
118
#endif
 
119
c
 
120
c     make the one particle contribution to the fock matrix
 
121
c     and get the partial contribution to the energy
 
122
c     
 
123
         call oneel(schwmax, eone)
 
124
         tonel = tonel + timer()
 
125
c     
 
126
c     compute the two particle contributions to the fock matrix and
 
127
c     get the total energy.
 
128
c     
 
129
         call twoel(schwmax, etwo)
 
130
         ttwoel = ttwoel + timer()
 
131
c     
 
132
c     Diagonalize the fock matrix. The diagonalizers used in this
 
133
c     subroutine are actually sequential, not parallel.
 
134
c
 
135
         call diagon(tester,iter)
 
136
         tdiag = tdiag + timer()
 
137
c     
 
138
c     make the new density matrix in g_work from orbitals in g_orbs,
 
139
c     compute the norm of the change in the density matrix and
 
140
c     then update the density matrix in g_dens with damping.
 
141
c     
 
142
         call makden
 
143
         deltad = dendif()
 
144
         if (iter.eq.1) then
 
145
            scale = 0.0d0
 
146
         else if (iter .le. 5) then
 
147
            if (nbfn .gt. 60) then
 
148
               scale = 0.5d0
 
149
            else
 
150
               scale = 0.0d0
 
151
            endif
 
152
         else
 
153
            scale = 0.0d0
 
154
         endif
 
155
         call damp(scale)
 
156
         tdens = tdens + timer()
 
157
c     
 
158
c     add up energy and print out convergence information
 
159
c     
 
160
         if (me.eq.0) then
 
161
           energy = enrep + eone + etwo
 
162
           call prnout(iter, energy, deltad, tester)
 
163
           tprint = tprint + timer()
 
164
         endif
 
165
c     
 
166
c     if converged then exit iteration loop
 
167
c     
 
168
         if (deltad .lt. tol) goto 20
 
169
 10   continue
 
170
      if(me.eq.0)
 
171
     $     write(6,*) ' SCF failed to converge in ', niter, ' iters'
 
172
c     
 
173
c     finished ... print out eigenvalues and occupied orbitals
 
174
c     
 
175
 20   continue
 
176
      end
 
177
 
 
178
      subroutine makesz(schwmax)
 
179
      implicit double precision (a-h, o-z)
 
180
      include 'cscf.h'
 
181
#include "mafdecls.fh"
 
182
#include "global.fh"
 
183
#include "tcgmsg.fh"
 
184
      dimension work(ichunk,ichunk)
 
185
      integer lo(2),hi(2),i,j,iloc,jloc,ld
 
186
      logical dotask
 
187
c
 
188
c     schwarz(ij) = (ij|ij) for sparsity test
 
189
c
 
190
      icut1 = 0
 
191
      icut2 = 0
 
192
      icut3 = 0
 
193
c
 
194
      call ga_zero(g_schwarz)
 
195
      call ga_zero(g_counter)
 
196
      schwmax = 0.0d0
 
197
      dotask = next_chunk(lo,hi)
 
198
      ld = ichunk
 
199
      do while (dotask)
 
200
        do i = lo(1), hi(1)
 
201
          iloc = i - lo(1) + 1
 
202
          do j = lo(2), hi(2)
 
203
            jloc = j - lo(2) + 1
 
204
            call g(gg,i,j,i,j)
 
205
            work(iloc,jloc) = sqrt(gg)
 
206
            schwmax = max(schwmax, work(iloc,jloc))
 
207
          end do
 
208
        end do
 
209
        call nga_put(g_schwarz,lo,hi,work,ld)
 
210
        dotask = next_chunk(lo,hi)
 
211
      end do
 
212
      call ga_dgop(11,schwmax,1,'max')
 
213
c
 
214
      return
 
215
      end
 
216
c
 
217
      subroutine ininrm
 
218
      implicit double precision (a-h, o-z)
 
219
      include 'cscf.h'
 
220
#include "mafdecls.fh"
 
221
#include "global.fh"
 
222
#include "tcgmsg.fh"
 
223
c
 
224
c     write a little welcome message
 
225
c
 
226
      if (ga_nodeid().eq.0) write(6,1) natom, nocc, nbfn, tol
 
227
1     format(/' Example Direct Self Consistent Field Program '/
 
228
     $        ' -------------------------------------------- '//
 
229
     $        ' no. of atoms ............... ',i3/
 
230
     $        ' no. of occupied orbitals ... ',i3/
 
231
     $        ' no. of basis functions ..... ',i3/
 
232
     $        ' convergence threshold ...... ',d9.2//)
 
233
c
 
234
c     generate normalisation coefficients for the basis functions
 
235
c     and the index array iky
 
236
c
 
237
      do 10 i = 1, nbfn
 
238
         iky(i) = i*(i-1)/2
 
239
 10   continue
 
240
c
 
241
      do 20 i = 1, nbfn
 
242
         rnorm(i) = (expnt(i)*2.0d0/pi)**0.75d0
 
243
 20   continue
 
244
c
 
245
c     initialize common for computing f0
 
246
c
 
247
      call setfm
 
248
c
 
249
      end
 
250
      double precision function h(i,j)
 
251
      implicit double precision (a-h, o-z)
 
252
      include 'cscf.h'
 
253
#include "mafdecls.fh"
 
254
#include "global.fh"
 
255
#include "tcgmsg.fh"
 
256
cvd$r novector
 
257
cvd$r noconcur
 
258
c
 
259
c     generate the one particle hamiltonian matrix element
 
260
c     over the normalized primitive 1s functions i and j
 
261
c
 
262
      f0val = 0.0d0
 
263
      sum = 0.0d0
 
264
      rab2 = (x(i)-x(j))**2 + (y(i)-y(j))**2 + (z(i)-z(j))**2
 
265
      facij = expnt(i)*expnt(j)/(expnt(i)+expnt(j))
 
266
      expij = exprjh(-facij*rab2)
 
267
      repij = (2.0d0*pi/(expnt(i)+expnt(j))) * expij
 
268
c
 
269
c     first do the nuclear attraction integrals
 
270
c
 
271
      do 10 iat = 1, natom
 
272
         xp = (x(i)*expnt(i) + x(j)*expnt(j))/(expnt(i)+expnt(j))
 
273
         yp = (y(i)*expnt(i) + y(j)*expnt(j))/(expnt(i)+expnt(j))
 
274
         zp = (z(i)*expnt(i) + z(j)*expnt(j))/(expnt(i)+expnt(j))
 
275
         rpc2 = (xp-ax(iat))**2 + (yp-ay(iat))**2 + (zp-az(iat))**2
 
276
c
 
277
         call f0(f0val, (expnt(i)+expnt(j))*rpc2)
 
278
         sum = sum - repij * q(iat) * f0val
 
279
 10   continue
 
280
c
 
281
c     add on the kinetic energy term
 
282
c
 
283
      sum = sum + facij*(3.0d0-2.0d0*facij*rab2) *
 
284
     $     (pi/(expnt(i)+expnt(j)))**1.5d0 * expij
 
285
c
 
286
c     finally multiply by the normalization constants
 
287
c
 
288
      h = sum * rnorm(i) * rnorm(j)
 
289
c
 
290
      end
 
291
      double precision function s(i,j)
 
292
      implicit double precision (a-h, o-z)
 
293
      include 'cscf.h'
 
294
#include "mafdecls.fh"
 
295
#include "global.fh"
 
296
#include "tcgmsg.fh"
 
297
c
 
298
c     generate the overlap matrix element between the normalized
 
299
c     primitve gaussian 1s functions i and j
 
300
c
 
301
      rab2 = (x(i)-x(j))**2 + (y(i)-y(j))**2 + (z(i)-z(j))**2
 
302
      facij = expnt(i)*expnt(j)/(expnt(i)+expnt(j))
 
303
      s = (pi/(expnt(i)+expnt(j)))**1.5d0 * exprjh(-facij*rab2) *
 
304
     $     rnorm(i)*rnorm(j)
 
305
c
 
306
      end
 
307
      subroutine makden
 
308
      implicit double precision (a-h, o-z)
 
309
      include 'cscf.h'
 
310
#include "mafdecls.fh"
 
311
#include "global.fh"
 
312
#include "tcgmsg.fh"
 
313
c      dimension work(maxnbfn,maxnbfn), torbs(maxnbfn,maxnbfn)
 
314
      dimension work(ichunk,ichunk), orbsi(ichunk,maxnbfn)
 
315
      dimension orbsj(ichunk,maxnbfn)
 
316
      integer lo(2), hi(2), tlo(2), thi(2), i, j, iloc, jloc, ld
 
317
      logical dotask
 
318
c
 
319
c     generate density matrix from orbitals in g_orbs. the first
 
320
c     nocc orbitals are doubly occupied.
 
321
c
 
322
      call ga_zero(g_counter)
 
323
      dotask = next_chunk(lo,hi)
 
324
      ld = ichunk
 
325
      do while (dotask)
 
326
        tlo(1) = lo(1)
 
327
        thi(1) = hi(1)
 
328
        tlo(2) = 1
 
329
        thi(2) = nocc
 
330
        call nga_get(g_orbs,tlo,thi,orbsi,ld)
 
331
        tlo(1) = lo(2)
 
332
        thi(1) = hi(2)
 
333
        call nga_get(g_orbs,tlo,thi,orbsj,ld)
 
334
        do i = lo(1), hi(1)
 
335
          iloc = i - lo(1) + 1
 
336
          do j = lo(2), hi(2)
 
337
            jloc = j - lo(2) + 1
 
338
            p = 0.0d00
 
339
            do k = 1, nocc
 
340
              p = p + orbsi(iloc,k)*orbsj(jloc,k)
 
341
            end do
 
342
            work(iloc,jloc) = 2.0d00*p
 
343
          end do
 
344
        end do
 
345
        call nga_put(g_work,lo,hi,work,ld)
 
346
        dotask = next_chunk(lo,hi)
 
347
      end do
 
348
      return
 
349
      end
 
350
c
 
351
      subroutine oneel(schwmax, eone)
 
352
      implicit double precision (a-h, o-z)
 
353
      include 'cscf.h'
 
354
#include "mafdecls.fh"
 
355
#include "global.fh"
 
356
#include "tcgmsg.fh"
 
357
      integer lo(2), hi(2), i, j, iloc, jloc, ld
 
358
      dimension dens(nnbfn), fock(nnbfn), schwarz(nnbfn)
 
359
      dimension work(ichunk,ichunk),tfock(ichunk,ichunk)
 
360
      logical dotask
 
361
c
 
362
c     fill in the one-electron part of the fock matrix and
 
363
c     compute the one-electron energy contribution
 
364
c
 
365
      me = ga_nodeid()
 
366
      nproc = ga_nnodes()
 
367
c
 
368
      call ga_zero(g_counter)
 
369
      dotask = next_chunk(lo,hi)
 
370
      ld = ichunk
 
371
      do while (dotask)
 
372
        call nga_get(g_schwarz,lo,hi,work,ld)
 
373
        do j = lo(2), hi(2)
 
374
          jloc = j - lo(2) + 1
 
375
          do i = lo(1), hi(1)
 
376
            iloc = i - lo(1) + 1
 
377
            tfock(iloc,jloc) = 0.0d00
 
378
            if (work(iloc,jloc)*schwmax.gt.tol2e)
 
379
     +        tfock(iloc,jloc) = h(i,j)
 
380
          end do
 
381
        end do
 
382
        call nga_put(g_fock,lo,hi,tfock,ld)
 
383
        dotask = next_chunk(lo,hi)
 
384
      end do
 
385
      eone = 0.5d00*contract_matrices(g_fock,g_dens)
 
386
c
 
387
      end
 
388
      integer function nxtask(nproc)
 
389
      parameter (ichunk = 10)
 
390
      save icount, nleft
 
391
      data nleft, icount /0, 0/
 
392
c
 
393
c     wrapper round nxtval() to increase granularity
 
394
c     and thus reduce no. of requests to shared counter
 
395
c
 
396
      if(nproc.gt.0) then
 
397
         if(nleft.eq.0) then
 
398
            icount = nxtval(nproc) * ichunk
 
399
            nleft = ichunk
 
400
         endif
 
401
         nxtask = icount
 
402
         icount = icount + 1
 
403
         nleft = nleft -1
 
404
      else
 
405
          nleft = 0
 
406
          nxtask = 0
 
407
          junk = nxtval(nproc)
 
408
      endif
 
409
c
 
410
c     following does dumb static load balancing
 
411
c
 
412
c$$$      if(nproc.gt.0) then
 
413
c$$$         if (nleft .eq. 0) then
 
414
c$$$            icount = ga_nodeid()
 
415
c$$$            nleft = 1
 
416
c$$$         endif
 
417
c$$$         nxtask = icount
 
418
c$$$         icount = icount + ga_nnodes()
 
419
c$$$      else
 
420
c$$$          nleft = 0
 
421
c$$$          nxtask = 0
 
422
c$$$      endif
 
423
      end
 
424
c
 
425
      logical function next_chunk(lo,hi)
 
426
      include 'cscf.h'
 
427
      integer one
 
428
      parameter (one = 1)
 
429
      integer imax, lo(2), hi(2), ilo, jlo
 
430
      itask = nga_read_inc(g_counter,one,one)
 
431
      imax = nbfn/ichunk
 
432
      if (itask.lt.imax*imax) then
 
433
        if (nbfn - ichunk*imax.gt.0) imax = imax + 1
 
434
        ilo = mod(itask,imax)
 
435
        jlo = (itask-ilo)/imax
 
436
        lo(1) = ilo*ichunk + 1
 
437
        lo(2) = jlo*ichunk + 1
 
438
        hi(1) = min((ilo+1)*ichunk,nbfn)
 
439
        hi(2) = min((jlo+1)*ichunk,nbfn)
 
440
        next_chunk = .true.
 
441
      else
 
442
        next_chunk = .false.
 
443
      endif
 
444
      return
 
445
      end
 
446
c
 
447
      logical function next_4chunk(lo,hi,ilo,jlo,klo,llo)
 
448
      include 'cscf.h'
 
449
      integer one
 
450
      parameter (one = 1)
 
451
      integer imax, lo(4), hi(4), ilo, jlo, klo, llo, itmp
 
452
      itask = nga_read_inc(g_counter,one,one)
 
453
      imax = nbfn/ichunk
 
454
      if (nbfn - ichunk*imax.gt.0) imax = imax + 1
 
455
      if (itask.lt.imax**4) then
 
456
        ilo = mod(itask,imax)
 
457
        itmp = (itask - ilo)/imax
 
458
        jlo = mod(itmp,imax)
 
459
        itmp = (itmp - jlo)/imax
 
460
        klo = mod(itmp,imax)
 
461
        llo = (itmp - klo)/imax
 
462
        lo(1) = ilo*ichunk + 1
 
463
        lo(2) = jlo*ichunk + 1
 
464
        lo(3) = klo*ichunk + 1
 
465
        lo(4) = llo*ichunk + 1
 
466
        hi(1) = min((ilo+1)*ichunk,nbfn)
 
467
        hi(2) = min((jlo+1)*ichunk,nbfn)
 
468
        hi(3) = min((klo+1)*ichunk,nbfn)
 
469
        hi(4) = min((llo+1)*ichunk,nbfn)
 
470
        next_4chunk = .true.
 
471
      else
 
472
        next_4chunk = .false.
 
473
      endif
 
474
      return
 
475
      end
 
476
c
 
477
      subroutine clean_chunk(chunk)
 
478
      include 'cscf.h'
 
479
      double precision chunk(ichunk,ichunk)
 
480
      integer i,j
 
481
      do j = 1, ichunk
 
482
        do i = 1, ichunk
 
483
          chunk(i,j) = 0.0d00
 
484
        end do
 
485
      end do
 
486
      return
 
487
      end
 
488
c
 
489
      subroutine twoel(schwmax, etwo)
 
490
      implicit double precision (a-h, o-z)
 
491
      include 'cscf.h'
 
492
#include "mafdecls.fh"
 
493
#include "global.fh"
 
494
#include "tcgmsg.fh"
 
495
      double precision f_ij(ichunk,ichunk),d_kl(ichunk,ichunk)
 
496
      double precision f_ik(ichunk,ichunk),d_jl(ichunk,ichunk)
 
497
      double precision s_ij(ichunk,ichunk),s_kl(ichunk,ichunk)
 
498
      double precision schwmax, one
 
499
      integer nproc,lo(4),hi(4),ld,ich
 
500
      integer lo_ik(2),hi_ik(2),lo_jl(2),hi_jl(2)
 
501
      integer i,j,k,l,iloc,jloc,kloc,lloc,it,jt,kt,lt
 
502
      logical dotask, next_4chunk
 
503
c     
 
504
c     add in the two-electron contribution to the fock matrix
 
505
c     
 
506
      nproc = ga_nnodes()
 
507
      one = 1.0d00
 
508
c
 
509
      call ga_zero(g_counter)
 
510
      ld = maxnbfn
 
511
      ich = ichunk
 
512
      dotask = next_4chunk(lo,hi,it,jt,kt,lt)
 
513
      itask = 0
 
514
      do while (dotask)
 
515
        lo_ik(1) = lo(1)
 
516
        lo_ik(2) = lo(3)
 
517
        hi_ik(1) = hi(1)
 
518
        hi_ik(2) = hi(3)
 
519
        lo_jl(1) = lo(2)
 
520
        lo_jl(2) = lo(4)
 
521
        hi_jl(1) = hi(2)
 
522
        hi_jl(2) = hi(4)
 
523
        call nga_get(g_schwarz,lo,hi,s_ij,ich)
 
524
        call nga_get(g_schwarz,lo(3),hi(3),s_kl,ich)
 
525
        call nga_get(g_dens,lo(3),hi(3),d_kl,ich)
 
526
        call nga_get(g_dens,lo_jl,hi_jl,d_jl,ich)
 
527
        itask = itask + 1
 
528
        call clean_chunk(f_ij)
 
529
        call clean_chunk(f_ik)
 
530
        do i = lo(1), hi(1)
 
531
          iloc = i-lo(1) + 1
 
532
          do j = lo(2), hi(2)
 
533
            jloc = j-lo(2) + 1
 
534
            if (s_ij(iloc,jloc)*schwmax .lt. tol2e) then
 
535
              icut1 = icut1 + (hi(1)-lo(1)+1)*(hi(2)-lo(2)+1)
 
536
            else
 
537
              do k = lo(3), hi(3)
 
538
                kloc = k-lo(3) + 1
 
539
                do l = lo(4), hi(4)
 
540
                  lloc = l-lo(4) + 1
 
541
                  if (s_ij(iloc,jloc)*s_kl(kloc,lloc).lt.tol2e) then
 
542
                    icut2 = icut2 + 1
 
543
                  else
 
544
                    call g(gg, i, j, k, l)
 
545
                    f_ij(iloc,jloc) = f_ij(iloc,jloc)
 
546
     +                              + gg*d_kl(kloc,lloc)
 
547
                    f_ik(iloc,kloc) = f_ik(iloc,kloc)
 
548
     +                              - 0.5d00*gg*d_jl(jloc,lloc)
 
549
                    icut3 = icut3 + 1
 
550
                  endif
 
551
                end do
 
552
              end do
 
553
            endif
 
554
          end do
 
555
        end do
 
556
        call nga_acc(g_fock,lo,hi,f_ij,ich,one)
 
557
        call nga_acc(g_fock,lo_ik,hi_ik,f_ik,ich,one)
 
558
        dotask = next_4chunk(lo,hi,it,jt,kt,lt)
 
559
      end do
 
560
      etwo = 0.5d00*contract_matrices(g_fock,g_dens)
 
561
      return
 
562
      end
 
563
c
 
564
      subroutine damp(fac)
 
565
      implicit double precision (a-h, o-z)
 
566
      include 'cscf.h'
 
567
#include "mafdecls.fh"
 
568
#include "global.fh"
 
569
#include "tcgmsg.fh"
 
570
c
 
571
c    create damped density matrix as a linear combination of
 
572
c    old density matrix and density matrix formed from new orbitals
 
573
c
 
574
      ofac = 1.0d0 - fac
 
575
      call ga_add(fac,g_dens,ofac,g_work,g_dens)
 
576
      return
 
577
      end
 
578
c
 
579
      subroutine prnout(iter, energy, deltad, tester)
 
580
      implicit double precision (a-h, o-z)
 
581
      include 'cscf.h'
 
582
#include "mafdecls.fh"
 
583
#include "global.fh"
 
584
#include "tcgmsg.fh"
 
585
c
 
586
c     printout results of each iteration
 
587
c
 
588
      if (ga_nodeid().ne.0) return
 
589
      write(6,1) iter, energy, deltad, tester
 
590
      call flush(6)
 
591
1     format(' iter=',i3,', energy=',f13.8,', deltad=',d9.2,
 
592
     $     ', deltaf=',d9.2)
 
593
      return
 
594
      end
 
595
c
 
596
      double precision function dendif()
 
597
      implicit double precision (a-h, o-z)
 
598
      include 'cscf.h'
 
599
#include "mafdecls.fh"
 
600
#include "global.fh"
 
601
#include "tcgmsg.fh"
 
602
      double precision xdiff
 
603
      dimension dens_c(ichunk,ichunk),work_c(ichunk,ichunk)
 
604
      integer lo(2), hi(2), i, j, ld
 
605
      logical dotask
 
606
c
 
607
c     compute largest change in density matrix elements
 
608
c
 
609
      denmax = 0.0d0
 
610
      call ga_zero(g_counter)
 
611
      dotask = next_chunk(lo,hi)
 
612
      ld = ichunk
 
613
      do while(dotask)
 
614
        call nga_get(g_dens,lo,hi,dens_c,ld)
 
615
        call nga_get(g_work,lo,hi,work_c,ld)
 
616
        do j = 1, hi(2)-lo(2)+1
 
617
          do i = 1, hi(1)-lo(1)+1
 
618
            xdiff = abs(dens_c(i,j)-work_c(i,j))
 
619
            if (xdiff.gt.denmax) denmax = xdiff
 
620
          end do
 
621
        end do
 
622
        dotask = next_chunk(lo,hi)
 
623
      end do
 
624
      call ga_dgop(1,denmax,1,'max')
 
625
      dendif = denmax
 
626
      return
 
627
      end
 
628
c
 
629
      double precision function testfock()
 
630
      implicit double precision (a-h, o-z)
 
631
      include 'cscf.h'
 
632
#include "mafdecls.fh"
 
633
#include "global.fh"
 
634
#include "tcgmsg.fh"
 
635
      double precision xmax, xtmp
 
636
      dimension work(ichunk,ichunk)
 
637
      integer lo(2), hi(2), i, j, iloc, jloc, ld
 
638
      logical dotask
 
639
c
 
640
c     compute largest change in density matrix elements
 
641
c
 
642
      xmax = 0.0d0
 
643
      call ga_zero(g_counter)
 
644
      dotask = next_chunk(lo,hi)
 
645
      ld = ichunk
 
646
      do while(dotask)
 
647
        call nga_get(g_fock,lo,hi,work,ld)
 
648
        do j = lo(2), hi(2)
 
649
          jloc = j - lo(2) + 1
 
650
          do i = lo(1), hi(1)
 
651
            iloc = i - lo(1) + 1
 
652
            if (i.ne.j) then
 
653
              xtmp = abs(work(iloc,jloc))
 
654
              if (xtmp.gt.xmax) xmax = xtmp
 
655
            endif
 
656
          end do
 
657
        end do
 
658
        dotask = next_chunk(lo,hi)
 
659
      end do
 
660
      call ga_dgop(1,xmax,1,'max')
 
661
      testfock = xmax
 
662
      return
 
663
      end
 
664
c
 
665
      subroutine shiftfock(shift)
 
666
      implicit double precision (a-h, o-z)
 
667
      include 'cscf.h'
 
668
#include "mafdecls.fh"
 
669
#include "global.fh"
 
670
#include "tcgmsg.fh"
 
671
      double precision shift
 
672
      dimension work(ichunk,ichunk)
 
673
      integer lo(2), hi(2), i, j, iloc, jloc, ld, icnt
 
674
      logical dotask
 
675
c
 
676
c     compute largest change in density matrix elements
 
677
c
 
678
      call ga_zero(g_counter)
 
679
      dotask = next_chunk(lo,hi)
 
680
      ld = ichunk
 
681
      do while(dotask)
 
682
        call nga_get(g_fock,lo,hi,work,ld)
 
683
        icnt = 0
 
684
        do j = lo(2), hi(2)
 
685
          jloc = j - lo(2) + 1
 
686
          do i = lo(1), hi(1)
 
687
            iloc = i - lo(1) + 1
 
688
            if (i.eq.j.and.i.gt.nocc) then
 
689
              work(iloc,jloc) = work(iloc,jloc) + shift
 
690
              icnt = icnt + 1
 
691
            endif
 
692
          end do
 
693
        end do
 
694
        if (icnt.gt.0) call nga_put(g_fock,lo,hi,work,ld)
 
695
        dotask = next_chunk(lo,hi)
 
696
      end do
 
697
      return
 
698
      end
 
699
c
 
700
      subroutine prnfin(energy)
 
701
      implicit double precision (a-h, o-z)
 
702
      include 'cscf.h'
 
703
#include "mafdecls.fh"
 
704
#include "global.fh"
 
705
#include "tcgmsg.fh"
 
706
      dimension orbs(maxnbfn, maxnbfn)
 
707
      integer lo(2),hi(2),ld
 
708
c
 
709
c     printout final results
 
710
c
 
711
      if (ga_nodeid().ne.0) return
 
712
      write(6,1) energy
 
713
 1    format(//' final energy = ',f16.11//' eigenvalues')
 
714
      call output(eigv, 1, min(nbfn,nocc+5), 1, 1, nbfn, 1, 1)
 
715
      write(6,2)
 
716
2     format(//' eigenvectors ')
 
717
      lo(1) = 1
 
718
      lo(2) = 1
 
719
      hi(1) = nbfn
 
720
      hi(2) = nbfn
 
721
      ld = nbfn
 
722
      call nga_get(g_orbs,lo,hi,orbs,ld) 
 
723
      call output(orbs, 1, nbfn, 1, nocc, nbfn, nbfn, 1)
 
724
c
 
725
      return
 
726
      end
 
727
      subroutine g(value,i,j,k,l)
 
728
      implicit double precision (a-h, o-z)
 
729
      include 'cscf.h'
 
730
#include "mafdecls.fh"
 
731
#include "global.fh"
 
732
#include "tcgmsg.fh"
 
733
c
 
734
c     compute the two electon integral (ij|kl) over normalized
 
735
c     primitive 1s gaussians
 
736
c
 
737
      f0val = 0.0d0
 
738
      rab2 = (x(i)-x(j))**2 + (y(i)-y(j))**2 + (z(i)-z(j))**2
 
739
      rcd2 = (x(k)-x(l))**2 + (y(k)-y(l))**2 + (z(k)-z(l))**2
 
740
      facij = expnt(i)*expnt(j)/(expnt(i)+expnt(j))
 
741
      fackl = expnt(k)*expnt(l)/(expnt(k)+expnt(l))
 
742
      exijkl = exprjh(- facij*rab2 - fackl*rcd2)
 
743
      denom = (expnt(i)+expnt(j))*(expnt(k)+expnt(l)) *
 
744
     $        sqrt(expnt(i)+expnt(j)+expnt(k)+expnt(l))
 
745
      fac = (expnt(i)+expnt(j))*(expnt(k)+expnt(l)) /
 
746
     $        (expnt(i)+expnt(j)+expnt(k)+expnt(l))
 
747
c
 
748
      xp = (x(i)*expnt(i) + x(j)*expnt(j))/(expnt(i)+expnt(j))
 
749
      yp = (y(i)*expnt(i) + y(j)*expnt(j))/(expnt(i)+expnt(j))
 
750
      zp = (z(i)*expnt(i) + z(j)*expnt(j))/(expnt(i)+expnt(j))
 
751
      xq = (x(k)*expnt(k) + x(l)*expnt(l))/(expnt(k)+expnt(l))
 
752
      yq = (y(k)*expnt(k) + y(l)*expnt(l))/(expnt(k)+expnt(l))
 
753
      zq = (z(k)*expnt(k) + z(l)*expnt(l))/(expnt(k)+expnt(l))
 
754
      rpq2 = (xp-xq)**2 + (yp-yq)**2 + (zp-zq)**2
 
755
c
 
756
      call f0(f0val, fac*rpq2)
 
757
      value = (2.0d0 * pi**2.5d0 / denom) * exijkl * f0val *
 
758
     $    rnorm(i)*rnorm(j)*rnorm(k)*rnorm(l)
 
759
      return
 
760
      end
 
761
c
 
762
      subroutine diagon(tester, iter)
 
763
c      subroutine diagon(fock, orbs, evals, work, tester, iter)
 
764
      implicit double precision (a-h, o-z)
 
765
      include 'cscf.h'
 
766
#include "mafdecls.fh"
 
767
#include "global.fh"
 
768
#include "tcgmsg.fh"
 
769
      double precision r_zero, r_one, shift, tester
 
770
c
 
771
#if USE_TRANSFORM
 
772
c
 
773
c     use similarity transform to solve standard eigenvalue problem
 
774
c     (overlap matrix has been transformed out of the problem)
 
775
c
 
776
      r_one = 1.0d00
 
777
      r_zero = 0.0d00
 
778
      call ga_dgemm('n','n',nbfn,nbfn,nbfn,r_one,g_fock,g_orbs,
 
779
     +               r_zero,g_tfock)
 
780
      call ga_dgemm('t','n',nbfn,nbfn,nbfn,r_one,g_orbs,g_tfock,
 
781
     +               r_zero,g_fock)
 
782
      tester = testfock()
 
783
      shift = 0.0d00
 
784
      if (tester.gt.0.3d0) then
 
785
        shift = 0.3d0
 
786
      endif
 
787
      if (iter.ge.2.and.shift.ne.0.0d00) then
 
788
        call shiftfock(shift)
 
789
      endif
 
790
      call ga_copy(g_orbs,g_tfock)
 
791
      call ga_diag_std_seq(g_fock, g_work, eigv)
 
792
c
 
793
c     Back transform eigenvectors
 
794
c
 
795
      call ga_dgemm('n','n',nbfn,nbfn,nbfn,r_one,g_tfock,g_work,
 
796
     +               r_zero,g_orbs)
 
797
      if (iter.ge.2.and.shift.ne.0.0d00) then
 
798
         do 50 i = nocc+1, nbfn
 
799
            eigv(i) = eigv(i) - shift
 
800
 50      continue
 
801
      endif
 
802
#else
 
803
c
 
804
c     Keep remaking overlap matrix since ga_diag_seq does not
 
805
c     guarantee that g_ident is preserved.
 
806
c
 
807
         call makoverlap
 
808
         call ga_diag_seq(g_fock, g_ident, g_orbs, eigv)
 
809
         tester = 0.0d00
 
810
#endif
 
811
      return
 
812
      end
 
813
c
 
814
      subroutine makeob
 
815
      implicit double precision (a-h, o-z)
 
816
      include 'cscf.h'
 
817
#include "mafdecls.fh"
 
818
#include "global.fh"
 
819
#include "tcgmsg.fh"
 
820
      double precision work(ichunk,ichunk),orbs(ichunk,ichunk)
 
821
      double precision eval(maxnbfn)
 
822
      integer lo(2),hi(2),ld,me,i,j,iloc,jloc
 
823
      logical dotask
 
824
c
 
825
c     generate set of orthonormal vectors by creating a random
 
826
c     symmetric matrix and solving associated generalized eigenvalue
 
827
c     problem using the correct overlap matrix.
 
828
c
 
829
      me = ga_nodeid()
 
830
      call ga_zero(g_counter)
 
831
      dotask = next_chunk(lo,hi)
 
832
      ld = ichunk
 
833
      do while (dotask)
 
834
        do j = lo(2), hi(2)
 
835
          jloc = j - lo(2) + 1
 
836
          do i = lo(1), hi(1)
 
837
            iloc = i - lo(1) + 1
 
838
            work(iloc,jloc) = s(i,j)
 
839
            orbs(iloc,jloc) = drand48(0)
 
840
          end do
 
841
        end do
 
842
        call nga_put(g_ident,lo,hi,work,ld)
 
843
        call nga_put(g_fock,lo,hi,orbs,ld)
 
844
        dotask = next_chunk(lo,hi)
 
845
      end do
 
846
      call ga_symmetrize(g_fock)
 
847
      call ga_diag_seq(g_fock, g_ident, g_orbs, eval)
 
848
c
 
849
      return
 
850
      end
 
851
c
 
852
      subroutine denges
 
853
      implicit double precision (a-h, o-z)
 
854
      include 'cscf.h'
 
855
#include "mafdecls.fh"
 
856
#include "global.fh"
 
857
#include "tcgmsg.fh"
 
858
c
 
859
c     Form guess density from superposition of atomic densities in the AO
 
860
c     basis set ... instead of doing the atomic SCF hardwire for this
 
861
c     small basis set for the Be atom.
 
862
c
 
863
      integer one, itask, lo(2), hi(2), ld
 
864
      dimension atdens(15,15)
 
865
      data atdens/
 
866
     $     0.000002,0.000027,0.000129,0.000428,0.000950,0.001180,
 
867
     $     0.000457,-0.000270,-0.000271,0.000004,0.000004,0.000004,
 
868
     $     0.000004,0.000004,0.000004,0.000027,0.000102,0.000987,
 
869
     $     0.003269,0.007254,0.009007,0.003492,-0.002099,-0.002108,
 
870
     $     0.000035,0.000035,0.000035,0.000035,0.000035,0.000035,
 
871
     $     0.000129,0.000987,0.002381,0.015766,0.034988,0.043433,
 
872
     $     0.016835,-0.010038,-0.010082,0.000166,0.000166,0.000166,
 
873
     $     0.000166,0.000166,0.000166,0.000428,0.003269,0.015766,
 
874
     $     0.026100,0.115858,0.144064,0.055967,-0.035878,-0.035990,
 
875
     $     0.000584,0.000584,0.000584,0.000584,0.000584,0.000584,
 
876
     $     0.000950,0.007254,0.034988,0.115858,0.128586,0.320120,
 
877
     $     0.124539,-0.083334,-0.083536,0.001346,0.001346,0.001346,
 
878
     $     0.001346,0.001346,0.001346,0.001180,0.009007,0.043433,
 
879
     $     0.144064,0.320120,0.201952,0.159935,-0.162762,-0.162267,
 
880
     $     0.002471,0.002471,0.002471,0.002471,0.002471,0.002471,
 
881
     $     0.000457,0.003492,0.016835,0.055967,0.124539,0.159935,
 
882
     $     0.032378,-0.093780,-0.093202,0.001372,0.001372,0.001372,
 
883
     $     0.001372,0.001372,0.001372,-0.000270,-0.002099,-0.010038,
 
884
     $     -0.035878,-0.083334,-0.162762,-0.093780,0.334488,0.660918,
 
885
     $     -0.009090,-0.009090,-0.009090,-0.009090,-0.009090,-0.009090,
 
886
     $     -0.000271,-0.002108,-0.010082,-0.035990,-0.083536,-0.162267,
 
887
     $     -0.093202,0.660918,0.326482,-0.008982,-0.008982,-0.008981,
 
888
     $     -0.008981,-0.008981,-0.008982,0.000004,0.000035,0.000166,
 
889
     $     0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008982,
 
890
     $     0.000062,0.000124,0.000124,0.000124,0.000124,0.000124,
 
891
     $     0.000004,0.000035,0.000166,0.000584,0.001346,0.002471,
 
892
     $     0.001372,-0.009090,-0.008982,0.000124,0.000062,0.000124,
 
893
     $     0.000124,0.000124,0.000124,0.000004,0.000035,0.000166,
 
894
     $     0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008981,
 
895
     $     0.000124,0.000124,0.000062,0.000124,0.000124,0.000124,
 
896
     $     0.000004,0.000035,0.000166,0.000584,0.001346,0.002471,
 
897
     $     0.001372,-0.009090,-0.008981,0.000124,0.000124,0.000124,
 
898
     $     0.000062,0.000124,0.000124,0.000004,0.000035,0.000166,
 
899
     $     0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008981,
 
900
     $     0.000124,0.000124,0.000124,0.000124,0.000062,0.000124,
 
901
     $     0.000004,0.000035,0.000166,0.000584,0.001346,0.002471,
 
902
     $     0.001372,-0.009090,-0.008982,0.000124,0.000124,0.000124,
 
903
     $     0.000124,0.000124,0.000062/
 
904
c
 
905
c   Create initial guess for density matrix in global array
 
906
c
 
907
      call ga_zero(g_dens)
 
908
      call ga_zero(g_counter)
 
909
      one = 1
 
910
      ld = 15
 
911
c
 
912
c   Correct for a factor of two along the diagonal
 
913
c
 
914
      do i = 1, ld
 
915
        atdens(i,i) = 2.0d00*atdens(i,i)
 
916
      end do
 
917
      itask = nga_read_inc(g_counter,one,one)
 
918
      do while(itask.lt.natom)
 
919
        ioff = itask*15
 
920
        lo(1) = ioff+1
 
921
        lo(2) = ioff+1
 
922
        hi(1) = ioff+15
 
923
        hi(2) = ioff+15
 
924
        call nga_put(g_dens,lo,hi,atdens,ld)
 
925
        itask = nga_read_inc(g_counter,one,one)
 
926
      end do
 
927
      call ga_sync
 
928
      return
 
929
      end
 
930
c
 
931
      subroutine setarrays
 
932
      implicit double precision (a-h, o-z)
 
933
      include 'cscf.h'
 
934
#include "mafdecls.fh"
 
935
#include "global.fh"
 
936
#include "tcgmsg.fh"
 
937
      integer one, two, dims(2)
 
938
      logical status
 
939
      one = 1
 
940
      two = 2
 
941
      g_counter = ga_create_handle()
 
942
      call ga_set_data(g_counter,one,one,MT_INT)
 
943
      status = ga_allocate(g_counter)
 
944
      call ga_zero(g_counter)
 
945
 
 
946
      dims(1) = nbfn
 
947
      dims(2) = nbfn
 
948
      g_dens = ga_create_handle()
 
949
      call ga_set_data(g_dens, two, dims, MT_DBL)
 
950
      status = ga_allocate(g_dens)
 
951
      call ga_zero(g_dens)
 
952
 
 
953
      g_schwarz = ga_create_handle()
 
954
      call ga_set_data(g_schwarz, two, dims, MT_DBL)
 
955
      status = ga_allocate(g_schwarz)
 
956
      call ga_zero(g_schwarz)
 
957
 
 
958
      g_fock = ga_create_handle()
 
959
      call ga_set_data(g_fock, two, dims, MT_DBL)
 
960
      status = ga_allocate(g_fock)
 
961
      call ga_zero(g_fock)
 
962
 
 
963
      g_tfock = ga_create_handle()
 
964
      call ga_set_data(g_tfock, two, dims, MT_DBL)
 
965
      status = ga_allocate(g_tfock)
 
966
      call ga_zero(g_tfock)
 
967
 
 
968
      g_work = ga_create_handle()
 
969
      call ga_set_data(g_work, two, dims, MT_DBL)
 
970
      status = ga_allocate(g_work)
 
971
      call ga_zero(g_work)
 
972
#ifdef CHECKPOINT_SCF
 
973
      arraylist(1) = g_counter
 
974
      arraylist(2) = g_dens
 
975
      arraylist(3) = g_schwarz
 
976
      arraylist(4) = g_fock
 
977
      arraylist(5) = g_work
 
978
      write (6,*) arraylist(1),arraylist(2),arraylist(3)
 
979
      call ga_checkpoint_arrays(arraylist,numarrays)
 
980
#endif
 
981
 
 
982
      g_ident = ga_create_handle()
 
983
      call ga_set_data(g_ident, two, dims, MT_DBL)
 
984
      status = ga_allocate(g_ident)
 
985
      call ga_zero(g_ident)
 
986
 
 
987
      g_orbs = ga_create_handle()
 
988
      call ga_set_data(g_orbs, two, dims, MT_DBL)
 
989
      status = ga_allocate(g_orbs)
 
990
      call ga_zero(g_orbs)
 
991
 
 
992
      return
 
993
      end
 
994
      subroutine closearrays
 
995
      implicit double precision (a-h, o-z)
 
996
      include 'cscf.h'
 
997
#include "mafdecls.fh"
 
998
#include "global.fh"
 
999
#include "tcgmsg.fh"
 
1000
      logical status
 
1001
c
 
1002
      status = ga_destroy(g_counter)
 
1003
      status = ga_destroy(g_dens)
 
1004
      status = ga_destroy(g_schwarz)
 
1005
      status = ga_destroy(g_fock)
 
1006
      status = ga_destroy(g_tfock)
 
1007
      status = ga_destroy(g_work)
 
1008
      status = ga_destroy(g_ident)
 
1009
      status = ga_destroy(g_orbs)
 
1010
c
 
1011
      return
 
1012
      end
 
1013
c
 
1014
      subroutine makoverlap
 
1015
      implicit double precision (a-h, o-z)
 
1016
      include 'cscf.h'
 
1017
#include "mafdecls.fh"
 
1018
#include "global.fh"
 
1019
#include "tcgmsg.fh"
 
1020
      integer me, lo(2), hi(2), ptr, ld(2)
 
1021
      integer ld1, ld2
 
1022
      me = ga_nodeid()
 
1023
      call nga_distribution(g_ident, me, lo, hi)
 
1024
      call nga_access(g_ident, lo, hi, ptr, ld)
 
1025
      ld1 = hi(1) - lo(1) + 1
 
1026
      ld2 = hi(2) - lo(2) + 1
 
1027
      call setoverlap(dbl_mb(ptr),lo,hi,ld1,ld2)
 
1028
      call nga_release(g_ident)
 
1029
      return
 
1030
      end
 
1031
c
 
1032
      subroutine setoverlap(a,lo,hi,ld1,ld2)
 
1033
      include 'cscf.h'
 
1034
#include "mafdecls.fh"
 
1035
#include "global.fh"
 
1036
#include "tcgmsg.fh"
 
1037
      integer lo(2), hi(2)
 
1038
      integer ld1, ld2, ii, jj
 
1039
      double precision a(ld1,ld2)
 
1040
      do i = 1, ld1
 
1041
        ii = i + lo(1) - 1
 
1042
        do j = 1, ld2
 
1043
          jj = j + lo(2) - 1
 
1044
#if USE_TRANSFORM
 
1045
          if (ii.eq.jj) then
 
1046
            a(i,j) = 1.0d00
 
1047
          else
 
1048
            a(i,j) = 0.0d00
 
1049
          endif
 
1050
#else
 
1051
          a(i,j) = s(ii,jj)
 
1052
#endif
 
1053
        end do
 
1054
      end do
 
1055
      return
 
1056
      end
 
1057
c
 
1058
      subroutine print_ga_block(g_a)
 
1059
      implicit double precision(a-h,o-z)
 
1060
      include 'cscf.h'
 
1061
#include "mafdecls.fh"
 
1062
#include "global.fh"
 
1063
#include "tcgmsg.fh"
 
1064
      integer lo(2), hi(2), ptr, ld1, ld2
 
1065
c
 
1066
      me = ga_nodeid()
 
1067
      call nga_distribution(g_a, me, lo, hi)
 
1068
      ld1 = hi(1) - lo(1) + 1
 
1069
      ld2 = hi(2) - lo(2) + 1
 
1070
      call nga_access(g_a, lo, hi, ptr, ld)
 
1071
      call dump_chunk(dbl_mb(ptr),ld1,ld2)
 
1072
      call nga_release(g_a)
 
1073
c
 
1074
      return
 
1075
      end
 
1076
c
 
1077
      subroutine print_ga_block_ij(g_a,tlo)
 
1078
      implicit double precision(a-h,o-z)
 
1079
      include 'cscf.h'
 
1080
#include "mafdecls.fh"
 
1081
#include "global.fh"
 
1082
#include "tcgmsg.fh"
 
1083
      integer lo(2), hi(2), ptr, ld1, ld2
 
1084
c
 
1085
      me = ga_nodeid()
 
1086
      call nga_distribution(g_a, me, lo, hi)
 
1087
      ld1 = hi(1) - lo(1) + 1
 
1088
      ld2 = hi(2) - lo(2) + 1
 
1089
      call nga_access(g_a, tlo, hi, ptr, ld)
 
1090
      call dump_chunk(dbl_mb(ptr),ld1,ld2)
 
1091
      call nga_release(g_a)
 
1092
c
 
1093
      return
 
1094
      end
 
1095
c
 
1096
      subroutine dump_chunk(a,ld1,ld2)
 
1097
      implicit double precision (a-h, o-z)
 
1098
      include 'cscf.h'
 
1099
#include "mafdecls.fh"
 
1100
#include "global.fh"
 
1101
#include "tcgmsg.fh"
 
1102
      integer ld1, ld2
 
1103
      double precision a(ld1, ld2)
 
1104
      do i = 1, min(10,ld1)
 
1105
        write(6,100) (a(i,j), j = 1, min(10,ld2))
 
1106
      end do
 
1107
      write(6,*)
 
1108
  100 format(10f10.4)
 
1109
      return
 
1110
      end
 
1111
c
 
1112
      double precision function contract_matrices(g_a,g_b)
 
1113
      implicit double precision(a-h,o-z)
 
1114
      include 'cscf.h'
 
1115
#include "mafdecls.fh"
 
1116
#include "global.fh"
 
1117
#include "tcgmsg.fh"
 
1118
      integer lo(2), hi(2), ptr_a, ptr_b, ld, ld1, ld2
 
1119
      double precision a(ichunk,ichunk),b(ichunk,ichunk)
 
1120
      double precision value
 
1121
      logical dotask
 
1122
c
 
1123
c   evalute sum_ij a_ij*b_ij
 
1124
c
 
1125
      value = 0.0d00
 
1126
      call ga_zero(g_counter)
 
1127
      dotask = next_chunk(lo,hi)
 
1128
      ld = ichunk
 
1129
      do while (dotask)
 
1130
        call nga_get(g_a,lo,hi,a,ld)
 
1131
        call nga_get(g_b,lo,hi,b,ld)
 
1132
        do j = 1, hi(2)-lo(2)+1
 
1133
          do i = 1, hi(1)-lo(1)+1
 
1134
            value = value + a(i,j)*b(i,j)
 
1135
          end do
 
1136
        end do
 
1137
        dotask = next_chunk(lo,hi)
 
1138
      end do
 
1139
      call ga_dgop(3,value,1,'+')
 
1140
      contract_matrices=value
 
1141
c
 
1142
      return
 
1143
      end