2
! Copyright (C) 1996-2016 The SIESTA group
3
! This file is distributed under the terms of the
4
! GNU General Public License: see COPYING in the top directory
5
! or http://www.gnu.org/copyleft/gpl.txt .
6
! See Docs/Contributors.txt for a list of contributors.
16
subroutine rdiag_elpa(H,S,n,nm,nml,w,Z,neigvec,ierror)
17
! ***************************************************************************
18
! Subroutine to solve all eigenvalues and eigenvectors of the
19
! real general eigenvalue problem H z = w S z, with H and S
20
! real symmetric matrices.
22
! This version uses the ELPA library, using matrices in 2D block-cyclic
23
! distribution, and performing the Cholesky and Back-transform steps.
26
! ************************** INPUT ******************************************
27
! real*8 H(nml,nm) : Symmetric H matrix
28
! real*8 S(nml,nm) : Symmetric S matrix
29
! integer n : Order of the generalized system
30
! integer nm : Right hand dimension of H and S matrices
31
! integer nml : Left hand dimension of H and S matrices
32
! which is greater than or equal to nm
33
! integer neigvec : No. of eigenvectors to calculate
34
! ************************** OUTPUT *****************************************
35
! real*8 w(nml) : Eigenvalues
36
! real*8 Z(nml,nm) : Eigenvectors
37
! integer ierror : Flag indicating success code for routine
43
use parallel, only : Node, Nodes, BlockSize
45
use mpi_siesta, only : mpi_comm_world
50
use elpa1, only: get_elpa_row_col_comms
51
use elpa2, only: solve_evp_real_2stage
56
integer, intent(out) :: ierror
57
integer, intent(in) :: n
58
integer, intent(in) :: neigvec
59
integer, intent(in) :: nm
60
integer, intent(in) :: nml
61
real(dp), intent(in) :: H(nml,nm)
62
real(dp), intent(in) :: S(nml,nm)
63
real(dp), intent(out) :: w(nml)
64
real(dp), intent(out) :: Z(nml,nm)
68
integer :: MPIerror, mpi_comm_rows, mpi_comm_cols
76
! Additional variables for a 2D proc grid
77
integer :: np_rows, np_cols
78
integer :: my_prow, my_pcol
80
integer, dimension(9) :: desc_h2d
83
real(dp), pointer, dimension(:,:) :: h2d =>null()
84
real(dp), pointer, dimension(:,:) :: s2d =>null()
85
real(dp), pointer, dimension(:,:) :: z2d =>null()
86
integer :: nh_rows, nh_cols
88
integer :: info, i, n_col, n_row
90
real(dp), pointer :: work(:) => null()
92
real(dp), parameter :: zero = 0.0_dp
93
real(dp), parameter :: one = 1.0_dp
95
integer, external :: numroc, indxl2g
97
!*******************************************************************************
99
!*******************************************************************************
101
! Initialise error flag
105
! Get Blacs context and initialise Blacs grid for main grid
107
! First, declare explicitly a context for the original
108
! distribution, which is block-cyclic over the columns,
109
! since our matrices are dimensioned as H(n,no_l) in diagg,
110
! i.e., each processor contains all the rows but just no_l
114
call blacs_get( -1, 0, ictxt )
115
! The design of the BLACS is weird.
116
! The above call is equivalent to setting:
117
ictxt = mpi_comm_world ! or whatever comm we are using
118
! The following call will OVERWRITE ictxt with a new context
119
! "C" means "column-major" ordering, which is not surprising,
120
! since we have np_rows_1d = 1
121
call blacs_gridinit( ictxt, 'C', np_rows_1d, np_cols_1d )
124
! Set up blacs descriptors for parallel case
126
! This is wrong in the BSs
127
! since the effective row blocksize is n. It should be
128
! n, n, n , BlockSize
129
call descinit( desch, n, n, BlockSize, BlockSize, 0, 0,
131
if (info.ne.0) BlacsOK = .false.
132
if (.not.BlacsOK) then
133
call die('ERROR : Blacs setup has failed in rdiag!')
136
! Setup secondary grid for 2D distribution
138
! Selection of number of processor rows/columns
139
! We try to set up the grid square-like, i.e. start the search for possible
140
! divisors of Nodes with a number next to the square root of Nodes
141
! and decrement it until a divisor is found.
143
do np_rows = NINT(SQRT(REAL(Nodes))),2,-1
144
if(mod(Nodes,np_rows) == 0 ) exit
146
! at the end of the above loop, Nodes is always divisible by np_rows
148
np_cols = Nodes/np_rows
150
if (Node == 0) print *, "np_rows, col: ", np_rows, np_cols
152
!call blacs_get(ictxt, 10, i2d_ctxt)
153
i2d_ctxt = mpi_comm_world ! or whatever comm we are using
154
! after the above call, i2d_ctxt will contain the communicator
155
! handle we used to initialize ictxt
156
! Now we set up the process grid in row-major, or "natural" order.
159
call blacs_gridinit(i2d_ctxt, 'R', np_rows, np_cols)
161
! We need this call to know who we are (my_prow, my_pcol)
162
call blacs_gridinfo(i2d_ctxt, np_rows, np_cols,
164
if (Node == 0) print *, "ictxt, i2_ctxt:", ictxt, i2d_ctxt
166
! Enquire size of local part of 2D matrices
167
! since we are going to change and in principle we do not know.
169
! We might want to change the value of BlockSize...
171
nh_rows = numroc(n, BlockSize, my_prow, 0, np_rows)
172
nh_cols = numroc(n, BlockSize, my_pcol, 0, np_cols)
174
! Set up blacs descriptors for 2D case
175
! It is only necessary to give the leading (row) dimension of the array.
176
call descinit(desc_h2d, n, n, Blocksize, BlockSize, 0, 0,
177
. i2d_ctxt, nh_rows, info)
178
if (info.ne.0) BlacsOK = .false.
179
if (.not.BlacsOK) then
180
call die('ERROR : Blacs setup has failed in rdiag!')
183
! Set up workspace arrays for 2D versions of 1D arrays
184
call re_alloc( h2d, 1, nh_rows, 1, nh_cols, name='h2d')
185
call re_alloc( s2d, 1, nh_rows, 1, nh_cols, name='s2d')
186
call re_alloc( z2d, 1, nh_rows, 1, nh_cols, name='z2d')
188
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
189
if (node==0) print *, "About to re-distribute arrays..."
190
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
192
! Copy arrays to new 2D distribution
193
call pdgemr2d(n, n, H, 1, 1, desch, h2d, 1, 1, desc_h2d, ictxt)
194
call pdgemr2d(n, n, S, 1, 1, desch, s2d, 1, 1, desc_h2d, ictxt)
196
np0 = numroc(n,BlockSize,0,0,np_rows)
197
lwork = max(BlockSize*(np0+1),3*BlockSize)
199
! Allocate workspace arrays
200
call re_alloc( work, 1,lwork, name='work' )
202
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
203
if (node==0) print *, "About to factorize..."
204
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
205
!*******************************************************************************
206
! Transform problem to standard eigenvalue problem
207
! by first factorizing the overlap matrix by Cholesky
208
!*******************************************************************************
209
call pdpotrf('U',n,s2d,1,1,desc_h2d,info)
211
call die('Error in Cholesky factorisation in rdiag')
216
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
217
if (node==0) print *, "About to transform..."
218
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
220
call pdsyngst(1,'U',n,h2d,1,1,desc_h2d,s2d,1,1,
221
. desc_h2d,dscale,work,lwork,info)
223
call die('Error in forward transformation in rdiag')
226
! Now call ELPA solver
228
! All ELPA routines need MPI communicators for communicating within
229
! rows or columns of processes, these are set in get_elpa_row_col_comms.
231
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
232
if (node==0) print *, "About to get comms..."
233
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
235
call get_elpa_row_col_comms(mpi_comm_world, my_prow, my_pcol,
236
$ mpi_comm_rows, mpi_comm_cols)
238
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
239
if (node==0) print *, "About to solve..."
240
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
242
! NOTE: Need the full matrix, not just a triangle...
243
! First, use z2d as scratch to hold the transpose of h2d
244
call pdtran(n,n,1.d0,h2d,1,1,desc_h2d,0.d0,z2d,1,1,desc_h2d)
245
! Now z2d has in its lower triangle the symmetric part of h2d
246
! So we fill the lower triangle of h2d with the elements of z2d
249
! Get global column corresponding to i and number of local rows up to
250
! and including the diagonal, these are unchanged in h2d
252
n_col = indxl2g(i, BlockSize, my_pcol, 0, np_cols)
253
n_row = numroc (n_col, BlockSize, my_prow, 0, np_rows)
254
h2d(n_row+1:nh_rows,i) = z2d(n_row+1:nh_rows,i)
258
call solve_evp_real_2stage(n, neigvec, h2d, nh_rows, w,
259
$ z2d, nh_rows, BlockSize,
260
$ mpi_comm_rows, mpi_comm_cols, mpi_comm_world)
262
! This was the equivalent scalapack call...
263
! call pdsyevx(jobz,range,'U',n,h2d,1,1,desc_h2d,vl,vu,1,
264
! . neigvec,abstol,neigok,nz,w,orfac,z2d,1,1,
265
! . desc_h2d,work,lwork,iwork,liwork,ifail,
266
! . iclustr,gap,info)
268
!*******************************************************************************
269
! Back transformation *
270
!*******************************************************************************
272
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
273
if (node==0) print *, "About to back-transform..."
274
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
276
call pdtrsm('Left','U','N','Non-unit',n,neigvec,one,
277
. s2d,1,1,desc_h2d,z2d,1,1,desc_h2d)
279
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
280
if (node==0) print *, "About to convert z to 1d..."
281
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
282
call pdgemr2d(n,n,z2d,1,1,desc_h2d,Z,1,1,desch,ictxt)
283
if (dscale.ne.one) then
284
call dscal(n,dscale,w,1)
288
call die('Error in back transformation in rdiag')
291
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
292
if (node==0) print *, "About to exit"
293
call mpi_barrier(mpi_comm_world, mpierror) ! for correct timings only
296
CALL BLACS_GRIDEXIT( ICTXT )
297
CALL BLACS_GRIDEXIT( I2D_CTXT )
298
! Do not call blacs_exit, as it seems to be
299
! shutting down mpi...
300
!!!!! CALL BLACS_EXIT( 0 )
302
! Deallocate workspace arrays
303
call de_alloc( work, name='work' )
305
end subroutine rdiag_elpa
306
end module m_rdiag_elpa