2
! Copyright (C) 1996-2016 The SIESTA group
3
! This file is distributed under the terms of the
4
! GNU General Public License: see COPYING in the top directory
5
! or http://www.gnu.org/copyleft/gpl.txt.
6
! See Docs/Contributors.txt for a list of contributors.
8
subroutine cdiag( H, S, n, nm, nml, w, Z, neigvec, iscf, ierror,
10
C ***************************************************************************
11
C Subroutine to solve all eigenvalues and eigenvectors of the
12
C real general eigenvalue problem H z = w S z, with H and S
13
C real symmetric matrices.
14
C Written by G.Fabricius and J.Soler, March 1998
15
C Rewritten by Julian Gale, August 2004
16
C ************************** INPUT ******************************************
17
C complex*16 H(nml,nm) : Symmetric H matrix
18
C complex*16 S(nml,nm) : Symmetric S matrix
19
C integer n : Order of the generalized system
20
C integer nm : Right hand dimension of H and S matrices
21
C integer nml : Left hand dimension of H and S matrices
22
C which is greater than or equal to nm
23
C integer neigvec : No. of eigenvectors to calculate
24
C integer iscf : SCF cycle
25
C integer BlockSize : Effective parallel block size
26
C ************************** OUTPUT *****************************************
27
C real*8 w(nml) : Eigenvalues
28
C complex*16 Z(nml,nm) : Eigenvectors
29
C integer ierror : Flag indicating success code for routine
31
C : -1 = repeat call as memory is increased
33
C ************************* PARALLEL ****************************************
34
C When running in parallel this routine now uses Scalapack to perform a
35
C parallel matrix diagonalisation. This requires Scalapack and Blacs to
36
C be installed first. Although globally a 1-D block cyclic data distribution
37
C is employed, locally 1 or 2-D distributions are allowed for.
38
C The blocksize is now explicitly passed to the routine (A. Garcia, 2016)
39
C The routine allows access to all the phases of diagonalisation for fuller
40
C control, and allows for parallel divide and conquer with reduced memory.
41
C The presence of eigenvalue clusters is checked for and the memory adjusted
42
C accordingly to try to guarantee convergence.
43
C Note that the blacs grid is only initialised on the first call to avoid
44
C exceeding MPI limits for split/group creation.
45
C ***************************************************************************
51
use parallel, only : Node, Nodes, ProcessorY
52
use parallel, only : ParallelOverK, ResetFirstCall
54
use m_diagon_opt,only : saveeigenvectors, maxclustersize, serial,
55
& i2d_ctxt, noexpert, divideconquer, use2d,
56
& prerotate, ictxt, allinone, doread,
59
use diagmemory, only : MemoryFactor
77
real(dp) :: H(2,nml,nm)
78
real(dp) :: S(2,nml,nm)
79
real(dp) :: Z(2,nml,nm)
82
type(allocDefaults) oldDefaults
85
integer :: clustersize
103
integer :: nhegst_lwopt
104
integer :: nhetrd_lwopt
107
integer :: oldmaxclustersize
110
integer, pointer, save :: iclustr(:)
111
real(dp), pointer, save :: gap(:)
114
C Additional variables for a 2D proc grid
115
integer :: np2d_row, np2d_col
116
integer :: my2d_row, my2d_col
117
integer, dimension(9) :: desc_h2d
120
real(dp), pointer, save, dimension(:,:,:) :: h2d, s2d, z2d
121
integer :: mat_2d_row, mat_2d_col
133
integer, pointer, save :: ifail(:)
134
integer, pointer, save :: iwork(:)
135
logical :: ChangeAlgorithm
140
real(dp), pointer, save :: rwork(:)
141
real(dp), pointer, save :: work(:)
142
real(dp), pointer, save :: Zsave(:,:,:)
144
#ifdef SCALAPACK_DEBUG
145
integer :: d_lwork, d_lrwork, d_liwork
146
logical, save :: debugged = .false.
151
complex(dp), parameter ::
152
. zero_z = (0.0_dp, 0.0_dp)
153
. , one_z = (1.0_dp, 0.0_dp)
155
call timer('cdiag',1)
157
C Check whether first call needs to be reset due to a change of algorithm
158
if (ResetFirstCall) then
160
ResetFirstCall = .false.
165
nullify( ifail, iwork, rwork, work, Zsave )
167
nullify( gap, iclustr )
168
nullify( h2d, s2d, z2d )
172
C Get old allocation defaults and set new ones
173
call alloc_default( old=oldDefaults, copy=.false., shrink=.true.,
174
. imin=1, routine='cdiag' )
176
C*******************************************************************************
178
C*******************************************************************************
180
C Initialise error flag
183
! Trap n=1 case, which is not handled correctly otherwise (JMS 2011/07/19)
186
w(1) = H(1,1,1) / S(1,1,1)
188
Z(1,1,1) = 1._dp / sqrt(S(1,1,1))
189
goto 1000 ! exit point
192
! vl and vu are not currently used, but they must be initialized
196
C Set algorithm logicals
199
AllInOne = fdf_boolean('Diag.AllInOne',.false.)
200
DivideConquer = fdf_boolean('Diag.DivideAndConquer',.true.)
201
NoExpert = fdf_boolean('Diag.NoExpert',.false.)
202
PreRotate = fdf_boolean('Diag.PreRotate',.false.)
203
Use2D = fdf_boolean('Diag.Use2D',.true.)
205
Serial = (Nodes.eq.1.or.ParallelOverK)
206
if (Serial) Use2D = .false.
207
if (DivideConquer.and..not.Serial) AllInOne = .false.
208
if (AllInOne.and.Serial) DivideConquer = .false.
209
if (PreRotate) SaveEigenvectors = .true.
210
if (SaveEigenvectors) then
212
call die('Zsave not dimensioned in cdiag')
214
call re_alloc( Zsave, 1,2, 1,n, 1,nm, name='Zsave',
219
C Is it time to switch from standard diagonaliser?
220
ChangeAlgorithm = (iscf.gt.2)
223
if (.not.Serial) then
224
C Get Blacs context and initialise Blacs grid for main grid
228
! JMS: Changed to allow mpi_comm_world to be supplanted in MPI_siesta.
229
! The change has no effect otherwise, since blacs_get returns the
230
! true value of mpi_comm_world (usually zero) as ictxt
231
! call blacs_get( -1, 0, ictxt ) ! removed
232
ictxt = mpi_comm_world ! added
233
call blacs_gridinit( ictxt, 'C', nprow, npcol )
237
C If 2D approach is being used then setup secondary grid
238
np2d_row = processorY
239
np2d_col = Nodes/processorY
241
call blacs_get(ictxt, 10, i2d_ctxt)
242
call blacs_gridinit(i2d_ctxt, 'R', np2d_row, np2d_col)
244
call blacs_gridinfo(i2d_ctxt, np2d_row, np2d_col,
245
. my2d_row, my2d_col)
248
C Set up blacs descriptors for parallel case
250
call descinit( desch, n, n, BlockSize, BlockSize, 0, 0,
252
if (info.ne.0) BlacsOK = .false.
253
if (.not.BlacsOK) then
254
call die('ERROR : Blacs setup has failed in cdiag!')
258
C Enquire size of local part of 2D matices
259
mat_2d_row = numroc(n, BlockSize, my2d_row, 0, np2d_row)
260
mat_2d_col = numroc(n, BlockSize, my2d_col, 0, np2d_col)
262
C Set up blacs descriptors for 2D case
263
call descinit(desc_h2d, n, n, Blocksize, BlockSize, 0, 0,
264
. i2d_ctxt, mat_2d_row, info)
265
if (info.ne.0) BlacsOK = .false.
266
if (.not.BlacsOK) then
267
call die('ERROR : Blacs setup has failed in cdiag!')
274
C Set general Lapack/Scalapack parameters
275
C NB: Divide and conquer doesnot have eigenvalue only case in Scalapack
276
if (neigvec.gt.0.or.(DivideConquer.and..not.Serial)) then
278
if (neigvec.eq.n) then
291
C Calculate memory requirements
293
if (DivideConquer) then
294
if (neigvec.gt.0) then
296
lrwork = 1 + 5*n + 2*n*n
304
nb = max(ilaenv(1,'ZHETRD','U',n,-1,-1,-1),
305
. ilaenv(1,'ZUNMTR','U',n,-1,-1,-1))
308
lrwork = max(1,3*n-2)
319
call blacs_gridinfo(desc_h2d(2),nprow,npcol,myrow,mycol)
320
iarow = indxg2p(1,desc_h2d(6),myrow,desc_h2d(7),nprow)
321
iacol = indxg2p(1,desc_h2d(5),mycol,desc_h2d(8),npcol)
323
call blacs_gridinfo(desch(2),nprow,npcol,myrow,mycol)
324
iarow = indxg2p(1,desch(6),myrow,desch(7),nprow)
325
iacol = indxg2p(1,desch(5),mycol,desch(8),npcol)
327
nn = max(n,BlockSize)
329
np0 = numroc(nn,BlockSize,0,0,nprow)
330
mq0 = numroc(nn,BlockSize,0,0,npcol)
331
np = numroc(n,BlockSize,myrow,iarow,nprow)
332
nq = numroc(n,BlockSize,mycol,iacol,npcol)
333
if (DivideConquer) then
334
lwork = n + BlockSize*(np0 + mq0 + BlockSize)
335
nhegst_lwopt = (2*np + nq)*BlockSize +
336
. BlockSize*BlockSize
337
lwork = max(lwork,nhegst_lwopt)
338
C Note: Scalapack documentation fibs about the size of lrwork!
339
lrwork = 1 + 10*n + 3*np*nq
340
liwork = 7*n + 8*Nodes + 2
342
if (neigvec.gt.0) then
343
lwork = n + (np0 + mq0 + BlockSize)*BlockSize
345
anb = pjlaenv(ictxt,3,'PDSYTTRD','L',0,0,0,0)
346
sqnpc = int(sqrt(dble(Nodes)))
347
nps = max(numroc(n,1,0,0,sqnpc),2*anb)
348
nhetrd_lwopt = 2*(anb + 1)*(4*nps + 2) +
350
nhegst_lwopt = (2*np + nq)*BlockSize +
351
. BlockSize*BlockSize
352
lwork2 = max(n + nhetrd_lwopt,nhegst_lwopt)
353
lwork = max(lwork,lwork2)
354
lwork2 = n + max(nn*(np0+1),3)
355
lwork = max(lwork,lwork2)
356
lrwork = 4*n + max(5*nn,np0*mq0) + iceil(neigvec,Nodes)*nn +
359
lwork = n + max(nn*(np0+1),3)
360
nhegst_lwopt = (2*np + nq)*BlockSize +
361
. BlockSize*BlockSize
362
lwork = max(lwork,nhegst_lwopt)
365
liwork = 6*max(n,Nodes+1,4)
370
C Scale memory by memory factor
371
lwork = nint(MemoryFactor*dble(lwork))
373
C Allocate workspace arrays
374
call re_alloc( work, 1,2*lwork, name='work',
376
call re_alloc( rwork, 1,lrwork, name='rwork',
378
call re_alloc( iwork, 1,liwork, name='iwork',
380
call re_alloc( ifail, 1,n, name='ifail',
383
call re_alloc( gap, 1,Nodes, name='gap',
385
call re_alloc( iclustr, 1,2*Nodes, name='iclustr',
389
C Set up workspace arrays for 2D versions of 1D arrays
390
call re_alloc(h2d, 1,2, 1,mat_2d_row, 1,mat_2d_col, name='h2d',
392
call re_alloc(s2d, 1,2, 1,mat_2d_row, 1,mat_2d_col, name='s2d',
394
call re_alloc(z2d, 1,2, 1,mat_2d_row, 1,mat_2d_col, name='z2d',
397
C For 2D case, copy arrays to new distribution
398
call pzgemr2d(n, n, H, 1, 1, desch, h2d, 1, 1, desc_h2d, ictxt)
399
call pzgemr2d(n, n, S, 1, 1, desch, s2d, 1, 1, desc_h2d, ictxt)
401
#ifdef SCALAPACK_DEBUG
402
if ( .not. (Serial .or. debugged) ) then
407
if (DivideConquer) then
409
call pzheevd(jobz,'U',n,h2d,1,1,desc_h2d,w,z2d,1,1,
410
. desc_h2d,work,d_lwork,rwork,d_lrwork,iwork,
413
call pzheevd(jobz,'U',n,H,1,1,desch,w,Z,1,1,desch,work,
414
. d_lwork,rwork,d_lrwork,iwork,d_liwork,info)
418
call pzheevx(jobz,range,'U',n,h2d,1,1,desc_h2d,vl,vu,1,
419
. neigvec,abstol,neigok,nz,w,orfac,z2d,1,1,
420
. desc_h2d,work,d_lwork,rwork,d_lrwork,iwork,
421
. d_liwork,ifail,iclustr,gap,info)
423
call pzheevx(jobz,range,'U',n,H,1,1,desch,vl,vu,1,neigvec,
424
. abstol,neigok,nz,w,orfac,Z,1,1,desch,work,
425
. d_lwork,rwork,d_lrwork,iwork,d_liwork,ifail,
429
d_lwork = nint(work(1))
430
d_lrwork = nint(rwork(1))
432
write(*,'(a,i0,6(a,i10))')'cdiag-debug: Node=',Node,
433
& ', lwork=', lwork,'>= lworkq=',d_lwork,
434
& ', lrwork=',lrwork,'>= lrworkq=',d_lrwork,
435
& ', liwork=',liwork,'>= liworkq=',d_liwork
442
C*******************************************************************************
443
C Perform diagonalisation using simple driver *
444
C*******************************************************************************
447
call zhegv(1,jobz,'U',n,H,n,S,n,w,work,lwork,rwork,info)
449
call zcopy(n*n,H,1,Z,1)
451
call zhegvx(1,jobz,range,'U',n,H,n,S,n,vl,vu,1,neigvec,
452
. abstol,neigok,w,Z,n,work,lwork,rwork,iwork,
458
call pzhegvx(1,jobz,range,'U',n,h2d,1,1,desc_h2d,s2d,1,1,
459
. desc_h2d,vl,vu,1,neigvec,abstol,neigok,nz,w,
460
. orfac,z2d,1,1,desc_h2d,work,lwork,rwork,lrwork,
461
. iwork,liwork,ifail,iclustr,gap,info)
462
call pzgemr2d(n,n,z2d,1,1,desc_h2d,Z,1,1,desch,ictxt)
464
call pzhegvx(1,jobz,range,'U',n,H,1,1,desch,S,1,1,desch,
465
. vl,vu,1,neigvec,abstol,neigok,nz,w,orfac,Z,
466
. 1,1,desch,work,lwork,rwork,lrwork,iwork,liwork,
467
. ifail,iclustr,gap,info)
470
C If there is insufficient memory due to eigenvalue clustering increase memory
472
oldmaxclustersize = maxclustersize
474
clustersize = iclustr(2*nc) - iclustr(2*nc-1)
475
maxclustersize = max(maxclustersize,clustersize)
477
if (maxclustersize.gt.oldmaxclustersize) then
478
C If the memory has increased then try again - otherwise fail
490
call die('Illegal argument to general eigensolver')
491
elseif (info.gt.0) then
492
if (mod(info/2,2).ne.0) then
494
write(6,'(/,''Clustered eigenvectors not converged - '',
495
. ''more memory required'',/)')
498
call die('Failure to converge general eigenproblem')
501
if (neigok.lt.neigvec) then
502
call die('Insufficient eigenvalues converged in cdiag')
506
C*******************************************************************************
507
C Factorise overlap matrix *
508
C*******************************************************************************
509
call timer('cdiag1',1)
511
call zpotrf('U',n,S,n,info)
515
call pzpotrf('U',n,s2d,1,1,desc_h2d,info)
517
call pzpotrf('U',n,S,1,1,desch,info)
522
call die('Error in Cholesky factorisation in cdiag')
524
call timer('cdiag1',2)
526
C*******************************************************************************
527
C Transform problem to standard eigenvalue problem *
528
C*******************************************************************************
529
call timer('cdiag2',1)
531
call zhegst(ibtype,'U',n,H,n,S,n,info)
535
call pzhengst(ibtype,'U',n,h2d,1,1,desc_h2d,s2d,1,1,
536
. desc_h2d,dscale,work,lwork,info)
538
call pzhengst(ibtype,'U',n,H,1,1,desch,S,1,1,
539
. desch,dscale,work,lwork,info)
544
call die('Error in forward transformation in cdiag')
546
call timer('cdiag2',2)
548
C*******************************************************************************
549
C Pre-rotate using trial eigenvectors *
550
C*******************************************************************************
551
if (PreRotate.and.ChangeAlgorithm) then
553
C Rotate according to eigenvectors from previous cycle
554
call zhemm('L','U',n,n,one_z,H,n,Zsave,n,zero_z,Z,n)
555
call zgemm('T','N',n,n,n,one_z,Zsave,n,Z,n,zero_z,H,n)
559
call pzhemm('L','U',n,n,one_z,h2d,1,1,desc_h2d,Zsave,1,1,
560
. desc_h2d,zero_z,z2d,1,1,desc_h2d)
561
call pzgemm('T','N',n,n,n,one_z,Zsave,1,1,desc_h2d,z2d,1,
562
. 1,desch,zero_z,h2d,1,1,desc_h2d)
564
call pzhemm('L','U',n,n,one_z,H,1,1,desch,Zsave,1,1,desch,
565
. zero_z,Z,1,1,desch)
566
call pzgemm('T','N',n,n,n,one_z,Zsave,1,1,desch,Z,1,1,
567
. desch,zero_z,H,1,1,desch)
573
C*******************************************************************************
574
C Solve standard eigenvalue problem *
575
C*******************************************************************************
576
call timer('cdiag3',1)
578
if (DivideConquer) then
579
call zheevds(jobz,'U',n,H,n,Z,n,w,work,lwork,rwork,lrwork,
583
call zheevx(jobz,range,'U',n,H,n,vl,vu,1,neigvec,abstol,
584
. neigok,w,Z,n,work,lwork,rwork,iwork,ifail,info)
588
if (DivideConquer) then
590
call pzheevd(jobz,'U',n,h2d,1,1,desc_h2d,w,z2d,1,1,
591
. desc_h2d,work,lwork,rwork,lrwork,iwork,
594
call pzheevd(jobz,'U',n,H,1,1,desch,w,Z,1,1,desch,work,
595
. lwork,rwork,lrwork,iwork,liwork,info)
600
call pzheevx(jobz,range,'U',n,h2d,1,1,desc_h2d,vl,vu,1,
601
. neigvec,abstol,neigok,nz,w,orfac,z2d,1,1,
602
. desc_h2d,work,lwork,rwork,lrwork,iwork,
603
. liwork,ifail,iclustr,gap,info)
605
call pzheevx(jobz,range,'U',n,H,1,1,desch,vl,vu,1,neigvec,
606
. abstol,neigok,nz,w,orfac,Z,1,1,desch,work,
607
. lwork,rwork,lrwork,iwork,liwork,ifail,
611
C If there is insufficient memory due to eigenvalue clustering increase memory
613
oldmaxclustersize = maxclustersize
615
clustersize = iclustr(2*nc) - iclustr(2*nc-1)
616
maxclustersize = max(maxclustersize,clustersize)
618
if (maxclustersize.gt.oldmaxclustersize) then
619
C If the memory has increased then try again - otherwise fail
622
write(6,*) "Re-trying diagonalization "//
623
$ "with more memory..."
625
call timer('cdiag3',2)
638
call die('Illegal argument to standard eigensolver')
639
elseif (info.gt.0) then
640
if (mod(info/2,2).ne.0) then
642
write(6,'(/,''Clustered eigenvectors not converged - '',
643
. ''more memory required'',/)')
646
call die('Failure to converge standard eigenproblem')
649
if (neigok.lt.neigvec) then
650
call die('Insufficient eigenvalues converged in cdiag')
653
call timer('cdiag3',2)
655
C*******************************************************************************
656
C Post-rotate using trial eigenvectors *
657
C*******************************************************************************
658
if (PreRotate.and.ChangeAlgorithm) then
660
call zgemm('N','N',n,n,n,one_z,Zsave,n,Z,n,zero_z,H,n)
661
call zcopy(n*nm,H(1,1,1),1,Z(1,1,1),1)
665
call pzgemm('N','N',n,n,n,one_z,Zsave,1,1,desc_h2d,z2d,1,
666
. 1,desc_h2d,zero_z,h2d,1,1,desc_h2d)
667
call zcopy(n*nm,h2d(1,1,1),1,z2d(1,1,1),1)
669
call pzgemm('N','N',n,n,n,one_z,Zsave,1,1,desch,Z,1,1,
670
. desch,zero_z,H,1,1,desch)
671
call zcopy(n*nm,H(1,1,1),1,Z(1,1,1),1)
677
C Save eigenvectors if required
678
if (SaveEigenvectors) then
681
call zcopy(n*nm,z2d(1,1,1),1,Zsave(1,1,1),1)
684
call zcopy(n*nm,Z(1,1,1),1,Zsave(1,1,1),1)
690
C*******************************************************************************
691
C Back transformation *
692
C*******************************************************************************
693
call timer('cdiag4',1)
695
if (neigvec.gt.0) then
696
call ztrsm('Left', 'U', 'N', 'Non-unit',
697
. n, neigvec, one_z, S, n, Z, n)
701
if (neigvec.gt.0) then
703
call pztrsm('Left','U','N','Non-unit',n,neigvec,
704
. one_z,s2d,1,1,desc_h2d,z2d,1,1,desc_h2d)
705
call pzgemr2d(n,n,z2d,1,1,desc_h2d,Z,1,1,desch,ictxt)
707
call pztrsm('Left','U','N','Non-unit',n,neigvec,
708
. one_z,S,1,1,desch,Z,1,1,desch)
711
if (dscale.ne.1.0_dp) then
712
call dscal(n,dscale,w,1)
717
call die('Error in back transformation in cdiag')
719
call timer('cdiag4',2)
723
C*******************************************************************************
725
C*******************************************************************************
730
C Deallocate workspace arrays
732
call de_alloc( iclustr, name='iclustr')
733
call de_alloc( gap, name='gap' )
735
call de_alloc( ifail, name='ifail' )
736
call de_alloc( iwork, name='iwork' )
737
call de_alloc( work, name='work' )
739
C Exit point, excluding deallocations
742
C Restore old allocation defaults
743
call alloc_default( restore=oldDefaults )
745
C Set first time call logical to false
749
call timer('cdiag',2)