2
! Copyright (C) 1996-2016 The SIESTA group
3
! This file is distributed under the terms of the
4
! GNU General Public License: see COPYING in the top directory
5
! or http://www.gnu.org/copyleft/gpl.txt.
6
! See Docs/Contributors.txt for a list of contributors.
8
subroutine rdiag(H,S,n,nm,nml,w,Z,neigvec,iscf,ierror)
9
C ***************************************************************************
10
C Subroutine to solve all eigenvalues and eigenvectors of the
11
C real general eigenvalue problem H z = w S z, with H and S
12
C real symmetric matrices.
13
C Written by G.Fabricius and J.Soler, March 1998
14
C Rewritten by Julian Gale, August 2004
15
C ************************** INPUT ******************************************
16
C real*8 H(nml,nm) : Symmetric H matrix
17
C real*8 S(nml,nm) : Symmetric S matrix
18
C integer n : Order of the generalized system
19
C integer nm : Right hand dimension of H and S matrices
20
C integer nml : Left hand dimension of H and S matrices
21
C which is greater than or equal to nm
22
C integer neigvec : No. of eigenvectors to calculate
23
C integer iscf : SCF cycle
24
C ************************** OUTPUT *****************************************
25
C real*8 w(nml) : Eigenvalues
26
C real*8 Z(nml,nm) : Eigenvectors
27
C integer ierror : Flag indicating success code for routine
29
C : -1 = repeat call as memory is increased
31
C ************************* PARALLEL ****************************************
32
C When running in parallel this routine now uses Scalapack to perform a
33
C parallel matrix diagonalisation. This requires Scalapack and Blacs to
34
C be installed first. Although globally a 1-D block cyclic data distribution
35
C is employed, locally 1 or 2-D distributions are allowed for.
36
C The routine allows access to all the phases of diagonalisation for fuller
37
C control, and allows for parallel divide and conquer with reduced memory.
38
C The presence of eigenvalue clusters is checked for and the memory adjusted
39
C accordingly to try to guarantee convergence.
40
C Note that the blacs grid is only initialised on the first call to avoid
41
C exceeding MPI limits for split/group creation.
42
C ***************************************************************************
48
use parallel, only : Node, Nodes, BlockSize, ProcessorY
49
use diagmemory, only : MemoryFactor
50
use alloc, only : re_alloc, de_alloc, allocdefaults,
53
use m_diagon_opt,only : saveeigenvectors, maxclustersize, serial,
54
& i2d_ctxt, noexpert, divideconquer, use2d,
55
& prerotate, ictxt, allinone, doread,
75
type(allocDefaults) oldDefaults
79
integer :: clustersize
98
integer :: nsygst_lwopt
99
integer :: nsytrd_lwopt
102
integer :: oldmaxclustersize
108
C Additional variables for a 2D proc grid
109
integer :: np2d_row, np2d_col
110
integer :: my2d_row, my2d_col
111
integer, dimension(9) :: desc_h2d
114
integer :: mat_2d_row, mat_2d_col
124
logical :: ChangeAlgorithm
128
#ifdef SCALAPACK_DEBUG
129
integer :: d_lwork, d_lrwork, d_liwork
130
logical, save :: debugged = .false.
134
real(dp), parameter :: abstol = 1.0e-8_dp
135
real(dp), parameter :: orfac = 1.0e-3_dp
136
real(dp), parameter :: zero = 0.0_dp
137
real(dp), parameter :: one = 1.0_dp
139
integer, pointer :: iclustr(:), ifail(:), iwork(:)
140
real(dp), pointer :: gap(:), work(:), Zsave(:,:),
141
& h2d(:,:), s2d(:,:), z2d(:,:)
144
call timer('rdiag',1)
147
nullify( iclustr, ifail, iwork, gap, work, Zsave, h2d, s2d, z2d )
149
C Get old allocation defaults and set new ones
150
call alloc_default( old=oldDefaults, copy=.false., shrink=.true.,
151
& imin=1, routine='rdiag' )
153
C*******************************************************************************
155
C*******************************************************************************
157
C Initialise error flag
160
! Trap n=1 case, which is not handled correctly otherwise (JMS 2011/07/19)
163
w(1) = H(1,1) / S(1,1)
165
Z(1,1) = one / sqrt(S(1,1))
166
goto 1000 ! exit point
169
! vl and vu are not currently used, but they must be initialized
173
C Set algorithm logicals
176
AllInOne = fdf_boolean('Diag.AllInOne',.false.)
177
DivideConquer = fdf_boolean('Diag.DivideAndConquer',.true.)
178
NoExpert = fdf_boolean('Diag.NoExpert',.false.)
179
PreRotate = fdf_boolean('Diag.PreRotate',.false.)
180
Use2D = fdf_boolean('Diag.Use2D',.true.)
185
lBtmp(2) = DivideConquer
190
call MPI_Bcast(lBtmp,5,MPI_logical,0,MPI_Comm_World,MPIerror)
192
DivideConquer = lBtmp(2)
197
Serial = (Nodes.eq.1)
198
if (Serial) Use2D = .false.
199
if (AllInOne.and.Serial) DivideConquer = .false.
200
if (neigvec.ne.n) PreRotate = .false.
201
if (PreRotate) SaveEigenvectors = .true.
202
if (Nodes.eq.1) Use2D = .false.
203
if (SaveEigenvectors) then
205
call die('Zsave not dimensioned in rdiag')
207
call re_alloc( Zsave, 1, nml, 1, nm, 'Zsave', 'rdiag' )
212
C Is it time to switch from standard diagonaliser?
213
ChangeAlgorithm = (iscf.gt.2)
216
if (.not.Serial) then
217
C Get Blacs context and initialise Blacs grid for main grid
221
! JMS: Changed to allow mpi_comm_world to be supplanted in MPI_siesta.
222
! The change has no effect otherwise, since blacs_get returns the
223
! true value of mpi_comm_world (usually zero) as ictxt
224
! call blacs_get( -1, 0, ictxt ) ! removed
225
ictxt = mpi_comm_world ! added
226
call blacs_gridinit( ictxt, 'C', nprow, npcol )
230
C If 2D approach is being used then setup secondary grid
231
np2d_row = processorY
232
np2d_col = Nodes/processorY
234
call blacs_get(ictxt, 10, i2d_ctxt)
235
call blacs_gridinit(i2d_ctxt, 'R', np2d_row, np2d_col)
237
call blacs_gridinfo(i2d_ctxt, np2d_row, np2d_col,
238
& my2d_row, my2d_col)
241
C Set up blacs descriptors for parallel case
243
call descinit( desch, n, n, BlockSize, BlockSize, 0, 0,
245
if (info.ne.0) BlacsOK = .false.
246
if (.not.BlacsOK) then
247
call die('ERROR : Blacs setup has failed in rdiag!')
251
C Enquire size of local part of 2D matices
252
mat_2d_row = numroc(n, BlockSize, my2d_row, 0, np2d_row)
253
mat_2d_col = numroc(n, BlockSize, my2d_col, 0, np2d_col)
255
C Set up blacs descriptors for 2D case
256
call descinit(desc_h2d, n, n, Blocksize, BlockSize, 0, 0,
257
& i2d_ctxt, mat_2d_row, info)
258
if (info.ne.0) BlacsOK = .false.
259
if (.not.BlacsOK) then
260
call die('ERROR : Blacs setup has failed in rdiag!')
267
C Set general Lapack parameters
268
if (neigvec.gt.0) then
270
if (neigvec.eq.n) then
280
C Calculate memory requirements
282
if (DivideConquer) then
283
if (neigvec.gt.0) then
284
lwork = 1 + 6*n + n*n
291
nb = ilaenv(1,'DSYTRD','U',n,-1,-1,-1)
296
lwork = max(8*n,(nb+3)*n)
303
call blacs_gridinfo(desc_h2d(2),nprow,npcol,myrow,mycol)
305
call blacs_gridinfo(desch(2),nprow,npcol,myrow,mycol)
307
if (DivideConquer) then
308
np0 = numroc(n,BlockSize,0,0,nprow)
309
mq0 = numroc(n,BlockSize,0,0,npcol)
310
lwork = max(BlockSize*(np0+1),3*BlockSize)
311
lwork2 = BlockSize*(2*np0 + mq0 + BlockSize)
312
lwork = max(lwork,lwork2)
313
iarow = indxg2p(1,BlockSize,myrow,desch(7),nprow)
314
iacol = indxg2p(1,BlockSize,mycol,desch(8),npcol)
315
np = numroc(n,BlockSize,myrow,iarow,nprow)
316
nq = numroc(n,BlockSize,mycol,iacol,npcol)
317
nt = 3*n + max(BlockSize*(np+1),3*BlockSize)
318
lwork2 = max(1+6*n+2*np*nq,nt) + 2*n
319
lwork = max(lwork,lwork2)
320
liwork = 7*n + 8*Nodes + 2
322
nn = max(n,BlockSize)
324
np0 = numroc(nn,BlockSize,0,0,nprow)
325
mq0 = numroc(nn,BlockSize,0,0,npcol)
326
if (neigvec.gt.0) then
327
lwork = 5*n + max(5*nn,np0*mq0+2*BlockSize*BlockSize) +
328
& iceil(neigvec,Nodes)*nn + maxclustersize*n
329
anb = pjlaenv(ictxt,3,'PDSYTTRD','L',0,0,0,0)
330
sqnpc = int(sqrt(dble(Nodes)))
331
nps = max(numroc(n,1,0,0,sqnpc),2*anb)
332
nsytrd_lwopt = n + 2*(anb + 1)*(4*nps + 2) + (nps + 3)*nps
333
nsygst_lwopt = (2*np0 + mq0)*BlockSize + BlockSize*BlockSize
334
lwork2 = max(5*n + nsytrd_lwopt,nsygst_lwopt)
335
lwork = max(lwork,lwork2)
337
lwork = 5*n + max(5*nn,BlockSize*(np0+1))
339
liwork = 6*max(n,Nodes+1,4)
344
C Scale memory by memory factor
345
lwork = nint(MemoryFactor*dble(lwork))
347
C Allocate workspace arrays
348
call re_alloc( work, 1, lwork, 'work', 'rdiag' )
349
call re_alloc( iwork, 1, liwork, 'iwork', 'rdiag' )
350
call re_alloc( ifail, 1, n, 'ifail', 'rdiag' )
352
call re_alloc( gap, 1, Nodes, 'gap', 'rdiag' )
353
call re_alloc( iclustr, 1, 2*Nodes, 'iclustr', 'rdiag' )
356
C Set up workspace arrays for 2D versions of 1D arrays
357
call re_alloc( h2d, 1, mat_2d_row, 1, mat_2d_col, 'h2d',
359
call re_alloc( s2d, 1, mat_2d_row, 1, mat_2d_col, 's2d',
361
call re_alloc( z2d, 1, mat_2d_row, 1, mat_2d_col, 'z2d',
364
C For 2D case, copy arrays to new distribution
365
call pdgemr2d(n, n, H, 1, 1, desch, h2d, 1, 1, desc_h2d, ictxt)
366
call pdgemr2d(n, n, S, 1, 1, desch, s2d, 1, 1, desc_h2d, ictxt)
368
#ifdef SCALAPACK_DEBUG
369
if ( .not. (Serial .or. debugged) ) then
373
if (DivideConquer) then
375
call pdsyevd(jobz,'U',n,h2d,1,1,desc_h2d,w,z2d,1,1,
376
& desc_h2d,work,d_lwork,iwork,d_liwork,info)
378
call pdsyevd(jobz,'U',n,H,1,1,desch,w,Z,1,1,desch,work,
379
& d_lwork,iwork,d_liwork,info)
383
call pdsyevx(jobz,range,'U',n,h2d,1,1,desc_h2d,vl,vu,1,
384
& neigvec,abstol,neigok,nz,w,orfac,z2d,1,1,
385
& desc_h2d,work,d_lwork,iwork,d_liwork,ifail,
388
call pdsyevx(jobz,range,'U',n,H,1,1,desch,vl,vu,1,neigvec,
389
& abstol,neigok,nz,w,orfac,Z,1,1,desch,work,
390
& d_lwork,iwork,d_liwork,ifail,iclustr,gap,info)
393
d_lwork = nint(work(1))
395
write(*,'(a,i0,4(a,i10))')'rdiag-debug: Node=',Node,
396
& ', lwork=', lwork,'>= lworkq=',d_lwork,
397
& ', liwork=',liwork,'>= liworkq=',d_liwork
404
C*******************************************************************************
405
C Perform diagonalisation using simple driver *
406
C*******************************************************************************
409
call dsygv(1,jobz,'U',n,H,n,S,n,w,work,lwork,info)
411
call dcopy(n*n,H,1,Z,1)
413
call dsygvx(1,jobz,range,'U',n,H,n,S,n,vl,vu,1,neigvec,
414
& abstol,neigok,w,Z,n,work,lwork,iwork,ifail,
420
call pdsygvx(1,jobz,range,'U',n,h2d,1,1,desc_h2d,s2d,1,1,
421
& desc_h2d,vl,vu,1,neigvec,abstol,neigok,nz,w,
422
& orfac,z2d,1,1,desc_h2d,work,lwork,iwork,liwork,
423
& ifail,iclustr,gap,info)
424
call pdgemr2d(n,n,z2d,1,1,desc_h2d,Z,1,1,desch,ictxt)
426
call pdsygvx(1,jobz,range,'U',n,H,1,1,desch,S,1,1,desch,
427
& vl,vu,1,neigvec,abstol,neigok,nz,w,orfac,Z,
428
& 1,1,desch,work,lwork,iwork,liwork,ifail,
432
C If there is insufficient memory due to eigenvalue clustering increase memory
434
oldmaxclustersize = maxclustersize
436
clustersize = iclustr(2*nc) - iclustr(2*nc-1)
437
maxclustersize = max(maxclustersize,clustersize)
439
if (maxclustersize.gt.oldmaxclustersize) then
440
C If the memory has increased then try again - otherwise fail
453
call die('Illegal argument to general eigensolver')
454
elseif (info.gt.0) then
455
if (mod(info/2,2).ne.0) then
457
write(6,'(/,''Clustered eigenvectors not converged - '',
458
& ''more memory required'',/)')
461
call die('Failure to converge general eigenproblem')
464
if (neigok.lt.neigvec) then
465
call die('Insufficient eigenvalues converged in rdiag')
469
C*******************************************************************************
470
C Factorise overlap matrix *
471
C*******************************************************************************
472
call timer('rdiag1',1)
474
call dpotrf('U',n,S,n,info)
478
call pdpotrf('U',n,s2d,1,1,desc_h2d,info)
480
call pdpotrf('U',n,S,1,1,desch,info)
485
call die('Error in Cholesky factorisation in rdiag')
487
call timer('rdiag1',2)
489
C*******************************************************************************
490
C Transform problem to standard eigenvalue problem *
491
C*******************************************************************************
492
call timer('rdiag2',1)
494
call dsygst(1,'U',n,H,n,S,n,info)
498
call pdsyngst(1,'U',n,h2d,1,1,desc_h2d,s2d,1,1,
499
& desc_h2d,dscale,work,lwork,info)
501
call pdsyngst(1,'U',n,H,1,1,desch,S,1,1,
502
& desch,dscale,work,lwork,info)
507
call die('Error in forward transformation in rdiag')
509
call timer('rdiag2',2)
511
C*******************************************************************************
512
C Pre-rotate using trial eigenvectors *
513
C*******************************************************************************
514
if (PreRotate.and.ChangeAlgorithm) then
516
C Rotate according to eigenvectors from previous cycle
517
call dsymm('L','U',n,n,one,H,n,Zsave,n,zero,Z,n)
518
call dgemm('T','N',n,n,n,one,Zsave,n,Z,n,zero,H,n)
522
call pdsymm('L','U',n,n,one,h2d,1,1,desc_h2d,Zsave,1,1,
523
& desc_h2d,zero,z2d,1,1,desc_h2d)
524
call pdgemm('T','N',n,n,n,one,Zsave,1,1,desc_h2d,z2d,1,
525
& 1,desch,zero,h2d,1,1,desc_h2d)
527
call pdsymm('L','U',n,n,one,H,1,1,desch,Zsave,1,1,desch,
529
call pdgemm('T','N',n,n,n,one,Zsave,1,1,desch,Z,1,1,
530
& desch,zero,H,1,1,desch)
536
C*******************************************************************************
537
C Solve standard eigenvalue problem *
538
C*******************************************************************************
539
call timer('rdiag3',1)
541
if (DivideConquer) then
542
call dsyevds(jobz,'U',n,H,n,w,Z,n,work,lwork,iwork,liwork,
546
call dsyevx(jobz,range,'U',n,H,n,vl,vu,1,neigvec,abstol,
547
& neigok,w,Z,n,work,lwork,iwork,ifail,info)
551
if (DivideConquer) then
553
call pdsyevd(jobz,'U',n,h2d,1,1,desc_h2d,w,z2d,1,1,
554
& desc_h2d,work,lwork,iwork,liwork,info)
556
call pdsyevd(jobz,'U',n,H,1,1,desch,w,Z,1,1,desch,work,
557
& lwork,iwork,liwork,info)
562
call pdsyevx(jobz,range,'U',n,h2d,1,1,desc_h2d,vl,vu,1,
563
& neigvec,abstol,neigok,nz,w,orfac,z2d,1,1,
564
& desc_h2d,work,lwork,iwork,liwork,ifail,
567
call pdsyevx(jobz,range,'U',n,H,1,1,desch,vl,vu,1,neigvec,
568
& abstol,neigok,nz,w,orfac,Z,1,1,desch,work,
569
& lwork,iwork,liwork,ifail,iclustr,gap,info)
572
C If there is insufficient memory due to eigenvalue clustering increase memory
574
oldmaxclustersize = maxclustersize
576
clustersize = iclustr(2*nc) - iclustr(2*nc-1)
577
maxclustersize = max(maxclustersize,clustersize)
579
if (maxclustersize.gt.oldmaxclustersize) then
580
C If the memory has increased then try again - otherwise fail
582
call timer('rdiag3',2)
595
call die('Illegal argument to standard eigensolver')
596
elseif (info.gt.0) then
597
if (mod(info/2,2).ne.0) then
599
write(6,'(/,''Clustered eigenvectors not converged - '',
600
& ''more memory required'',/)')
603
call die('Failure to converge standard eigenproblem')
606
if (neigok.lt.neigvec) then
607
call die('Insufficient eigenvalues converged in rdiag')
610
call timer('rdiag3',2)
612
C*******************************************************************************
613
C Post-rotate using trial eigenvectors *
614
C*******************************************************************************
615
if (PreRotate.and.ChangeAlgorithm) then
617
call dgemm('N','N',n,n,n,one,Zsave,n,Z,n,zero,H,n)
618
call dcopy(n*nm,H(1,1),1,Z(1,1),1)
622
call pdgemm('N','N',n,n,n,one,Zsave,1,1,desc_h2d,z2d,1,
623
& 1,desc_h2d,zero,h2d,1,1,desc_h2d)
624
call dcopy(n*nm,h2d(1,1),1,z2d(1,1),1)
626
call pdgemm('N','N',n,n,n,one,Zsave,1,1,desch,Z,1,1,
627
& desch,zero,H,1,1,desch)
628
call dcopy(n*nm,H(1,1),1,Z(1,1),1)
634
C Save eigenvectors if required
635
if (SaveEigenvectors) then
638
call dcopy(n*nm,z2d(1,1),1,Zsave(1,1),1)
641
call dcopy(n*nm,Z(1,1),1,Zsave(1,1),1)
647
C*******************************************************************************
648
C Back transformation *
649
C*******************************************************************************
650
call timer('rdiag4',1)
652
if (neigvec.gt.0) then
653
call dtrsm('Left','U','N','Non-unit',n,neigvec,one,S,n,Z,n)
657
if (neigvec.gt.0) then
659
call pdtrsm('Left','U','N','Non-unit',n,neigvec,one,
660
& s2d,1,1,desc_h2d,z2d,1,1,desc_h2d)
661
call pdgemr2d(n,n,z2d,1,1,desc_h2d,Z,1,1,desch,ictxt)
663
call pdtrsm('Left','U','N','Non-unit',n,neigvec,one,
664
& S,1,1,desch,Z,1,1,desch)
666
if (dscale.ne.one) then
667
call dscal(n,dscale,w,1)
673
call die('Error in back transformation in rdiag')
675
call timer('rdiag4',2)
678
C*******************************************************************************
680
C*******************************************************************************
685
C Deallocate workspace arrays
687
if (associated(Zsave)) call de_alloc( Zsave, 'Zsave', 'rdiag' )
689
call de_alloc( h2d, 'h2d', 'rdiag' )
690
call de_alloc( s2d, 's2d', 'rdiag' )
691
call de_alloc( z2d, 'z2d', 'rdiag' )
693
call de_alloc( iclustr, 'iclustr', 'rdiag' )
694
call de_alloc( gap, 'gap', 'rdiag' )
696
call de_alloc( ifail, 'ifail', 'rdiag' )
697
call de_alloc( iwork, 'iwork', 'rdiag' )
698
call de_alloc( work, 'work', 'rdiag' )
700
C Exit point, excluding deallocations
703
C Restore old allocation defaults
704
call alloc_default( restore=oldDefaults )
706
C Set first call flag to false
710
call timer('rdiag',2)