334
427
value = value.and.MA_free_heap(pspspin_downions(2))
338
432
> call errquit('psp_end:error freeing heap memory',0,MA_ERR)
434
* **** deallocate prj_indx ****
435
call psp_prj_indx_end()
441
* ****************************************************
443
* * psp_prj_indx_init *
445
* ****************************************************
447
* This routine sets up the prj_indx indexes:
448
* shift_prj_indx,ii_prj_indx,ia_prj_indx,sd_function_prj_indx
450
subroutine psp_prj_indx_init()
453
#include "mafdecls.fh"
455
#include "errquit.fh"
457
* **** local variables ****
458
logical value,sd_function
459
integer k,l,ii,ia,nproj,l_prj,count1,count2,shift,nion
461
* **** external functions ****
462
integer ion_nion,ion_katm,psi_data_get_ptr
463
external ion_nion,ion_katm,psi_data_get_ptr
471
nproj = int_mb(nprj(1)+ia-1)
474
nion_prj_indx = nion_prj_indx + 1
477
n_prj_indx = n_prj_indx + 1
479
end if !** nproj>0 **
482
value = MA_alloc_get(mt_int,nion_prj_indx,'ii_prj_indx',
483
> ii_prj_indx(2),ii_prj_indx(1))
485
> MA_alloc_get(mt_int,nion_prj_indx,'ia_prj_indx',
486
> ia_prj_indx(2),ia_prj_indx(1))
488
> MA_alloc_get(mt_int,nion_prj_indx,'nproj_prj_indx',
489
> nproj_prj_indx(2),nproj_prj_indx(1))
491
> MA_alloc_get(mt_int,nion_prj_indx,'swstart_prj_indx',
492
> swstart_prj_indx(2),swstart_prj_indx(1))
494
value = MA_alloc_get(mt_int,n_prj_indx,'shift_prj_indx',
495
> shift_prj_indx(2),shift_prj_indx(1))
496
value = MA_alloc_get(mt_int,n_prj_indx,'l_prj_prj_indx',
497
> l_prj_prj_indx(2),l_prj_prj_indx(1))
499
> MA_alloc_get(mt_log,n_prj_indx,'sd_function_prj_indx',
500
> sd_function_prj_indx(2),sd_function_prj_indx(1))
502
> call errquit('psp_prj_indx_init: out of memory',0, MA_ERR)
510
nproj = int_mb(nprj(1)+ia-1)
512
int_mb(ii_prj_indx(1)+count1) = ii
513
int_mb(ia_prj_indx(1)+count1) = ia
514
int_mb(nproj_prj_indx(1)+count1) = nproj
515
int_mb(swstart_prj_indx(1)+count1) = count2
519
shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
520
l_prj = int_mb(l_projector(1)+(l-1)
521
> + (ia-1)*(nmax_max*lmmax_max))
527
sd_function = (k.eq.0)
529
int_mb(shift_prj_indx(1)+count2) = shift
530
int_mb(l_prj_prj_indx(1)+count2) = l_prj
531
log_mb(sd_function_prj_indx(1)+count2) = sd_function
535
end if !** nproj>0 **
542
* ****************************************************
544
* * psp_prj_indx_alloc_sw1a_sw2a *
546
* ****************************************************
547
subroutine psp_prj_indx_alloc_sw1a_sw2a(nn)
551
#include "mafdecls.fh"
553
#include "errquit.fh"
555
* **** local variables ****
558
if (swaset.and.(nn.gt.swann)) then
559
ok = MA_free_heap(sw1a(2))
560
ok = ok.and.MA_free_heap(sw2a(2))
562
> call errquit('psp_prj_indx_alloc_sw1a_sw2a:freeing heap',
567
if (.not.swaset) then
568
ok = MA_alloc_get(mt_dbl,nn*n_prj_indx,'sw1a',sw1a(2),sw1a(1))
570
> MA_alloc_get(mt_dbl,nn*n_prj_indx,'sw2a',sw2a(2),sw2a(1))
572
> call errquit('psp_prj_indx_alloc_sw1a_sw2a: out of memory',
583
* ****************************************************
585
* * psp_prj_indx_end *
587
* ****************************************************
588
subroutine psp_prj_indx_end()
591
#include "mafdecls.fh"
593
#include "errquit.fh"
595
* **** local variables ****
598
value = MA_free_heap(ii_prj_indx(2))
599
value = value.and.MA_free_heap(ia_prj_indx(2))
600
value = value.and.MA_free_heap(nproj_prj_indx(2))
601
value = value.and.MA_free_heap(swstart_prj_indx(2))
602
value = value.and.MA_free_heap(shift_prj_indx(2))
603
value = value.and.MA_free_heap(l_prj_prj_indx(2))
604
value = value.and.MA_free_heap(sd_function_prj_indx(2))
606
value = value.and.MA_free_heap(sw1a(2))
607
value = value.and.MA_free_heap(sw2a(2))
610
> call errquit('psp_prj_indx_end:error freeing heap',0,MA_ERR)
344
617
* ***********************************
1504
1791
if (.not.value) call errquit('v_nonlocal: popping stack',3,
1506
1793
call nwpw_timing_end(6)
1799
* ***********************************
1803
* ***********************************
1805
* This routine computes the Kleinman-Bylander non-local
1806
* pseudopotential projection.
1808
* Note - This routine was restructured 12-1-2013 to handle PAW operators.
1810
* To Do - For very large numbers of atoms the code will need to be restructured
1811
* to distribute the sw1a and sw2a matrices over np_i. Basically, if the orthogonalization matrices
1812
* need to be distributed then sw1a and sw2a will need to be distributed as well. A simple algorithm
1813
* to do this will be to keep the loop structure the same but instead of the
1814
* call to D3dB_Vector_SumAll(nn*n_prj_indx,dbl_mb(sw1a(1))), this will need to be,
1815
* changed to D3dB_Vector_SumAll(nn*nproj,dbl_mb(sw1(1))) and then place sw1 --> sw1a(distributed)
1817
subroutine v_nonlocal(ispin,ne,psi1,psi2,move,fion,
1828
#include "mafdecls.fh"
1830
#include "errquit.fh"
1832
* *** local variables ***
1833
integer pcount,taskid_j,np_j
1834
integer G(3),npack0,npack1,nion,nu,mult_l,m,ms
1835
integer i,j,ii,ia,l,n,nn,l1,ip,iip,iipmax,jp,swstart
1836
integer k,shift,l_prj,nproj,Gijl_indx
1838
real*8 omega,scal,ff(3),scal1
1840
integer exi(2),xtmp(2),sum(2)
1841
integer dng_cmp(2),dng_cmp_smooth(2)
1842
integer vcmp(2),vcmp_smooth(2)
1843
logical value,sd_function,periodic
1844
real*8 vmm(50),eh_atom
1847
* **** external functions ****
1849
integer ion_nion,ion_katm,Pack_G_indx
1850
integer psi_data_get_ptr,psi_data_get_chnk
1851
real*8 lattice_omega,ddot
1853
external ion_nion,ion_katm,Pack_G_indx
1854
external psi_data_get_ptr,psi_data_get_chnk
1855
external lattice_omega,ddot
1856
integer nwpw_compcharge_mult_l,control_version
1857
external nwpw_compcharge_mult_l,control_version
1858
real*8 nwpw_compcharge_Qlm
1859
external nwpw_compcharge_Qlm
1861
call nwpw_timing_start(6)
1863
periodic = (control_version().eq.3)
1864
call Parallel2d_taskid_j(taskid_j)
1865
call Parallel2d_np_j(np_j)
1867
* **** allocate local memory ****
1870
call Pack_npack(1,npack1)
1872
call psp_prj_indx_alloc_sw1a_sw2a(nn)
1873
value = MA_push_get(mt_dcpl,npack1,'exi',exi(2), exi(1))
1875
> call errquit('v_nonlocal:out of stack',0, MA_ERR)
1878
value = value.and.MA_push_get(mt_dbl,npack1,
1879
> 'xtmp',xtmp(2),xtmp(1))
1880
value = value.and.MA_push_get(mt_dbl,3*nn,'sum',sum(2),sum(1))
1882
> call errquit('v_nonlocal:out of stack',1,MA_ERR)
1884
G(1) = Pack_G_indx(1,1)
1885
G(2) = Pack_G_indx(1,2)
1886
G(3) = Pack_G_indx(1,3)
1889
omega = lattice_omega()
1890
scal = 1.0d0/(omega)
1894
do ip=1,nion_prj_indx
1895
ii = int_mb(ii_prj_indx(1)+ip-1)
1896
ia = int_mb(ia_prj_indx(1)+ip-1)
1897
nproj = int_mb(nproj_prj_indx(1)+ip-1)
1899
* **** structure factor and local pseudopotential ****
1900
call strfac_pack(1,ii,dcpl_mb(exi(1)))
1903
shift = int_mb(shift_prj_indx(1)+jp)
1904
sd_function = log_mb(sd_function_prj_indx(1)+jp)
1907
* **** phase factor does not matter therefore ****
1908
* **** (-i)^l is the same as (i)^l in the ****
1909
* **** Rayleigh scattering formula ****
1911
* *** current function is s or d ****
1912
if (sd_function) then
1913
call Pack_tc_Mul(1,dbl_mb(shift),
1915
> dcpl_mb(prjtmp(1)))
1917
* *** current function is p or f ****
1919
call Pack_tc_iMul(1,dbl_mb(shift),
1921
> dcpl_mb(prjtmp(1)))
1924
call Pack_cc_indot(1,nn,
1926
> dcpl_mb(prjtmp(1)),
1927
> dbl_mb(sw1a(1)+(jp-1)*nn))
1930
call D3dB_Vector_SumAll(nn*n_prj_indx,dbl_mb(sw1a(1)))
1933
* **** Compute sw2 ****
1935
do ip=1,nion_prj_indx
1936
ii = int_mb(ii_prj_indx(1)+ip-1)
1937
ia = int_mb(ia_prj_indx(1)+ip-1)
1938
nproj = int_mb(nproj_prj_indx(1)+ip-1)
1939
swstart = int_mb(swstart_prj_indx(1)+ip-1)
1942
* **** sw2 = Gijl*sw1 ******
1943
Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),1)
1944
call Multiply_Gijl_sw1(nn,
1946
> int_mb(nmax(1)+ia-1),
1947
> int_mb(lmax(1)+ia-1),
1948
> int_mb(n_projector(1)
1949
> + (ia-1)*(nmax_max*lmmax_max)),
1950
> int_mb(l_projector(1)
1951
> + (ia-1)*(nmax_max*lmmax_max)),
1952
> int_mb(m_projector(1)
1953
> + (ia-1)*(nmax_max*lmmax_max)),
1954
> dbl_mb(Gijl_indx),
1955
> dbl_mb(sw1a(1)+swstart*nn),
1956
> dbl_mb(sw2a(1)+swstart*nn))
1959
* **** paw operations #1 - generate it's compcharge, add atomic coulomb, and add xc potential ****
1960
if ((int_mb(psp_type(1)+ia-1).eq.4)) then
1962
* **** paw atom - generate it's atomic density matrix ****
1963
call psp_gen_density_matrix(ispin,ne,nproj,
1964
> dbl_mb(sw1a(1)+swstart*nn),
1967
* **** paw atom - generate it's compcharge ***
1968
call nwpw_compcharge_gen_Qlm(ii,ia,ispin,nproj,
1971
* **** atomic coulomb matrix - sw2 = sw2 + Vhatomijl*sw1 ****
1972
call nwpw_compcharge_coulomb_atom(ii,ia,ispin,ne,nproj,
1974
> dbl_mb(sw1a(1)+swstart*nn),
1975
> dbl_mb(sw2a(1)+swstart*nn),
1977
c write(*,*) "eh_atom=",ip,ii,eh_atom
1979
* **** xc matrix - sw2 = sw2 + Vxcijl*sw1 ******
1980
call nwpw_xc_solve(ii,ia,
1981
> int_mb(n1dgrid(1)+ia-1),
1982
> int_mb(n1dbasis(1)+ia-1),
1983
> dbl_mb(psi_data_get_chnk(int_mb(phi_ae(1)+ia-1))),
1984
> dbl_mb(psi_data_get_chnk(int_mb(phi_ps(1)+ia-1))),
1985
> dbl_mb(psi_data_get_chnk(int_mb(dphi_ae(1)+ia-1))),
1986
> dbl_mb(psi_data_get_chnk(int_mb(dphi_ps(1)+ia-1))),
1987
> dbl_mb(psi_data_get_chnk(int_mb(core_ae(1)+ia-1))),
1988
> dbl_mb(psi_data_get_chnk(int_mb(core_ps(1)+ia-1))),
1989
> dbl_mb(psi_data_get_chnk(int_mb(core_ae_prime(1)+ia-1))),
1990
> dbl_mb(psi_data_get_chnk(int_mb(core_ps_prime(1)+ia-1))),
1991
> dbl_mb(psi_data_get_chnk(int_mb(rgrid(1)+ia-1))),
1992
> dbl_mb(log_amesh(1)+ia-1),
1994
> dbl_mb(sw1a(1)+swstart*nn),dbl_mb(sw2a(1)+swstart*nn))
2000
* **** paw operations #2 - generate vcmp,.... and Gaussian Multipole ****
2005
scal1 = 1.0d0/dble(nx*ny*nz)
2008
call Pack_npack(0,npack0)
2010
call D3dB_nfft3d(1,npack0)
2013
* **** zero out dE/dQlm array ****
2014
call nwpw_compcharge_zero_dE_Qlm()
2016
if (use_grid_cmp) then
2017
value = MA_push_get(mt_dcpl,npack0,'dng_cmp',
2018
> dng_cmp(2),dng_cmp(1))
2020
> MA_push_get(mt_dcpl,npack0,'vcmp',
2023
> call errquit('v_nonlocal:out of stack',4, MA_ERR)
2025
call nwpw_compcharge_gen_zv_cmp(dbl_mb(zv(1)),
2026
> dcpl_mb(dng_cmp(1)))
2027
call coulomb_v(dcpl_mb(dng_cmp(1)),dcpl_mb(vcmp_tmp(1)))
2028
call Pack_cc_dot(0,dcpl_mb(dng_cmp(1)),
2029
> dcpl_mb(vcmp_tmp(1)),ff(1))
2030
c write(*,*) "e_i-i=",0.5d0*ff(1)
2032
call nwpw_compcharge_gen_dn_cmp(ispin,dcpl_mb(dng_cmp(1)))
2034
c call nwpw_compcharge_gen_dn_cmp_zv(ispin,dbl_mb(zv(1)),
2035
c > dcpl_mb(dng_cmp(1)))
2037
call coulomb_v(dcpl_mb(dng_cmp(1)),dcpl_mb(vcmp_tmp(1)))
2039
> dcpl_mb(vc_tmp(1)),
2040
> dcpl_mb(vcmp_tmp(1)),
2044
do ip=1,nion_prj_indx
2045
ii = int_mb(ii_prj_indx(1)+ip-1)
2046
ia = int_mb(ia_prj_indx(1)+ip-1)
2047
if ((int_mb(psp_type(1)+ia-1).eq.4)) then
2048
mult_l = nwpw_compcharge_mult_l(ia)
2051
if (mod(pcount,np_j).eq.taskid_j) then
2052
call nwpw_compcharge_gen_glm(ii,l,m,
2053
> dcpl_mb(dng_cmp(1)))
2054
call Pack_cc_idot(0,dcpl_mb(dng_cmp(1)),
2055
> dcpl_mb(vcmp(1)),ff(1))
2056
c write(*,*) "ff=",ff(1)*omega
2057
call nwpw_compcharge_add_dE_Qlm(ispin,ii,l,m,
2065
value = MA_pop_stack(vcmp(2))
2066
value = value.and.MA_pop_stack(dng_cmp(2))
2068
> call errquit('v_nonlocal:popping stack',4, MA_ERR)
2072
call nwpw_compcharge_add_dEmult_Qlm(ispin)
2074
value = MA_push_get(mt_dcpl,npack0,'dng_cmp',
2075
> dng_cmp(2),dng_cmp(1))
2077
> MA_push_get(mt_dcpl,npack0,'dng_cmp_smooth',
2078
> dng_cmp_smooth(2),dng_cmp_smooth(1))
2080
> MA_push_get(mt_dcpl,npack0,'vcmp',
2083
> MA_push_get(mt_dcpl,npack0,'vcmp_smooth',
2084
> vcmp_smooth(2),vcmp_smooth(1))
2086
> call errquit('v_nonlocal:out of stack',4, MA_ERR)
2088
call nwpw_compcharge_gen_dn_cmp2_zv(ispin,dbl_mb(zv(1)),
2089
> dcpl_mb(dng_cmp(1)),
2090
> dcpl_mb(dng_cmp_smooth(1)))
2092
!*** compute hartree potential of ntilde + ncmp_tilde ***
2093
!*** compute hartree potential ncmp - ncmp_tilde ***
2095
call coulomb_v(dcpl_mb(dng_cmp(1)),dcpl_mb(vcmp(1)))
2096
call coulomb_v(dcpl_mb(dng_cmp_smooth(1)),
2097
> dcpl_mb(vcmp_smooth(1)))
2099
call Pack_c_Copy(0,dcpl_mb(vcmp(1)),dcpl_mb(vcmp_tmp(1)))
2100
call Pack_cc_Sub(0,dcpl_mb(vcmp_tmp(1)),
2101
> dcpl_mb(vcmp_smooth(1)),
2103
call Pack_cc_Sum2(0,dcpl_mb(vc_tmp(1)),
2104
> dcpl_mb(vcmp_smooth(1)))
2108
c call Pack_c_unpack(0,dcpl_mb(dng_cmp(1)))
2109
c call D3dB_cr_fft3b(1,dcpl_mb(dng_cmp(1)))
2110
c call coulomb2_v(dcpl_mb(dng_cmp(1)),dcpl_mb(vcmp(1)))
2111
c call D3dB_r_SMul1(1,scal1,dcpl_mb(vcmp(1)))
2112
c call D3dB_rc_fft3f(1,dcpl_mb(vcmp(1)))
2113
c call Pack_c_pack(0,dcpl_mb(vcmp(1)))
2115
c call Pack_c_unpack(0,dcpl_mb(dng_cmp_smooth(1)))
2116
c call D3dB_cr_fft3b(1,dcpl_mb(dng_cmp_smooth(1)))
2117
c call coulomb2_v(dcpl_mb(dng_cmp_smooth(1)),
2118
c > dcpl_mb(vcmp_smooth(1)))
2119
c call D3dB_r_SMul1(1,scal1,dcpl_mb(vcmp(1)))
2120
c call D3dB_rc_fft3f(1,dcpl_mb(vcmp_smooth(1)))
2121
c call Pack_c_pack(0,dcpl_mb(vcmp_smooth(1)))
2126
do ip=1,nion_prj_indx
2127
ii = int_mb(ii_prj_indx(1)+ip-1)
2128
ia = int_mb(ia_prj_indx(1)+ip-1)
2129
if ((int_mb(psp_type(1)+ia-1).eq.4)) then
2130
mult_l = nwpw_compcharge_mult_l(ia)
2133
if (mod(pcount,np_j).eq.taskid_j) then
2134
call nwpw_compcharge_gen_glm2(ii,l,m,
2135
> dcpl_mb(dng_cmp(1)),
2136
> dcpl_mb(dng_cmp_smooth(1)))
2137
call Pack_cc_idot(0,
2138
> dcpl_mb(dng_cmp(1)),
2139
> dcpl_mb(vcmp_smooth(1)),
2141
call Pack_cc_idot(0,
2142
> dcpl_mb(dng_cmp_smooth(1)),
2145
c write(*,*) "ff=",ff(1)*omega,ff(2)*omega
2146
call nwpw_compcharge_add_dE_Qlm(ispin,ii,l,m,
2147
> (ff(1)+ff(2))*omega)
2155
value = MA_pop_stack(vcmp_smooth(2))
2156
value = value.and.MA_pop_stack(vcmp(2))
2157
value = value.and.MA_pop_stack(dng_cmp_smooth(2))
2158
value = value.and.MA_pop_stack(dng_cmp(2))
2160
> call errquit('v_nonlocal:popping stack',5,MA_ERR)
2163
call nwpw_compcharge_reduce_dE_Qlm()
2165
do ip=1,nion_prj_indx
2166
ii = int_mb(ii_prj_indx(1)+ip-1)
2167
ia = int_mb(ia_prj_indx(1)+ip-1)
2168
nproj = int_mb(nproj_prj_indx(1)+ip-1)
2169
swstart = int_mb(swstart_prj_indx(1)+ip-1)
2170
call nwpw_compcharge_gen_sw2(ii,ia,ispin,ne,nproj,
2171
> dbl_mb(sw1a(1)+swstart*nn),
2172
> dbl_mb(sw2a(1)+swstart*nn))
2179
* **** do Kleinman-Bylander Multiplication ****
2180
call dscal(nn*n_prj_indx,scal,dbl_mb(sw2a(1)),1)
2182
* **** apply the sw2 to psi ****
2184
do iip=1,nion_prj_indx,nprj_mult
2186
swstart = int_mb(swstart_prj_indx(1)+iip-1)
2188
iipmax = (iip+nprj_mult-1)
2189
if (iipmax.gt.nion_prj_indx) iipmax = nion_prj_indx
2192
ii = int_mb(ii_prj_indx(1)+ip-1)
2193
ia = int_mb(ia_prj_indx(1)+ip-1)
2194
nproj = int_mb(nproj_prj_indx(1)+ip-1)
2196
* **** structure factor and local pseudopotential ****
2197
call strfac_pack(1,ii,dcpl_mb(exi(1)))
2200
shift = int_mb(shift_prj_indx(1)+jp)
2201
l_prj = int_mb(l_prj_prj_indx(1)+jp)
2202
sd_function = log_mb(sd_function_prj_indx(1)+jp)
2205
* **** phase factor does not matter therefore ****
2206
* **** (-i)^l is the same as (i)^l in the ****
2207
* **** Rayleigh scattering formula ****
2209
* *** current function is s or d ****
2210
if (sd_function) then
2211
call Pack_tc_Mul(1,dbl_mb(shift),
2213
> dcpl_mb(prjtmp(1)+l1*npack1))
2215
* *** current function is p or f ****
2217
call Pack_tc_iMul(1,dbl_mb(shift),
2219
> dcpl_mb(prjtmp(1)+l1*npack1))
2222
* ***** scale (sw2a) psp by factor - used for generating antiferromagnetic structures ****
2223
* **** nwchem input: pspspin up/down scale l ion_numbers ****
2225
if (log_mb(pspspin_upions(1)+ii-1).and.
2226
> (l_prj.eq.pspspin_upl))
2227
> call dscal(ne(1),pspspin_upscale,
2228
> dbl_mb(sw2a(1)+(swstart+l-1)*nn),1)
2229
if (log_mb(pspspin_downions(1)+ii-1).and.
2230
> (l_prj.eq.pspspin_downl))
2231
> call dscal(ne(2),pspspin_downscale,
2232
> dbl_mb(sw2a(1)+(swstart+l-1)*nn+ne(1)),1)
2239
call DGEMM('N','T',2*npack1,nn,l1,
2241
> dcpl_mb(prjtmp(1)), 2*npack1,
2242
> dbl_mb(sw2a(1)+swstart*nn), nn,
2250
ii = int_mb(ii_prj_indx(1)+ip-1)
2251
ia = int_mb(ia_prj_indx(1)+ip-1)
2252
nproj = int_mb(nproj_prj_indx(1)+ip-1)
2256
call Pack_cct_iconjgMul(1,
2257
> dcpl_mb(prjtmp(1)+l1*npack1),
2258
> psi1(1+(n-1)*npack1),
2260
call Pack_tt_idot(1,dbl_mb(G(1)),dbl_mb(xtmp(1)),
2261
> dbl_mb(sum(1)+3*(n-1)))
2262
call Pack_tt_idot(1,dbl_mb(G(2)),dbl_mb(xtmp(1)),
2263
> dbl_mb(sum(1)+1+3*(n-1)))
2264
call Pack_tt_idot(1,dbl_mb(G(3)),dbl_mb(xtmp(1)),
2265
> dbl_mb(sum(1)+2+3*(n-1)))
2268
call D3dB_Vector_SumAll(3*(nn),dbl_mb(sum(1)))
2270
!**** fractional weighting ****
2271
if (fractional) then
2273
call Dneall_qton(n,i)
2274
dbl_mb(sum(1)+3*(n-1))
2275
> =dbl_mb(sum(1) +3*(n-1))*occ(i)
2276
dbl_mb(sum(1)+1+3*(n-1))
2277
> =dbl_mb(sum(1)+1+3*(n-1))*occ(i)
2278
dbl_mb(sum(1)+2+3*(n-1))
2279
> =dbl_mb(sum(1)+2+3*(n-1))*occ(i)
2287
c ff(1) = ff(1) + 2.0d0*dbl_mb(sw2a(1)+swstart*nn+n-1+(l-1)*nn) !// change
2288
c > *dbl_mb(sum(1)+ 3*(n-1))
2289
c ff(2) = ff(2) + 2.0d0*dbl_mb(sw2a(1)+swstart*nn+n-1+(l-1)*nn) !// change
2290
c > *dbl_mb(sum(1)+1+3*(n-1))
2291
c ff(3) = ff(3) + 2.0d0*dbl_mb(sw2a(1)+swstart*nn+n-1+(l-1)*nn) !// change
2292
c > *dbl_mb(sum(1)+2+3*(n-1))
2294
ff(1) =2.0d0*ddot(nn,dbl_mb(sw2a(1)+(swstart+l1)*nn),1,
2296
ff(2) =2.0d0*ddot(nn,dbl_mb(sw2a(1)+(swstart+l1)*nn),1,
2297
> dbl_mb(sum(1)+1),3)
2298
ff(3) =2.0d0*ddot(nn,dbl_mb(sw2a(1)+(swstart+l1)*nn),1,
2299
> dbl_mb(sum(1)+2),3)
2300
call D1dB_Vector_SumAll(3,ff)
2301
fion(1,ii) = fion(1,ii) + ff(1)*(3-ispin)
2302
fion(2,ii) = fion(2,ii) + ff(2)*(3-ispin)
2303
fion(3,ii) = fion(3,ii) + ff(3)*(3-ispin)
2314
value = value.and.MA_pop_stack(sum(2))
2315
value = value.and.MA_pop_stack(xtmp(2))
2317
value = value.and.MA_pop_stack(exi(2))
2318
if (.not.value) call errquit('v_nonlocal: popping stack',3,
2321
call nwpw_timing_end(6)
1511
2330
subroutine Multiply_Gijl_sw1(nn,nprj,nmax,lmax,
2688
* ***********************************
2692
* ***********************************
2694
* This routine computes the Kleinman-Bylander non-local
2695
* pseudopotential projection.
2697
* Note - This routine was restructured 12-1-2013 to handle PAW operators.
2699
* To Do - For very large numbers of atoms the code will need to be restructured
2700
* to distribute the sw1a and sw2a matrices over np_i. Basically, if the orthogonalization matrices
2701
* need to be distributed then sw1a and sw2a will need to be distributed as well. A simple algorithm
2702
* to do this will be to keep the loop structure the same but instead of the
2703
* call to D3dB_Vector_SumAll(nn*n_prj_indx,dbl_mb(sw1a(1))), this will need to be,
2704
* changed to D3dB_Vector_SumAll(nn*nproj,dbl_mb(sw1(1))) and then place sw1 --> sw1a(distributed)
2706
subroutine f_vnonlocal(ispin,ne,psi1,fion,fractional,occ)
2714
#include "mafdecls.fh"
2716
#include "errquit.fh"
2718
* *** local variables ***
2719
integer G(3),npack1,nion,nu
2720
integer i,j,ii,ia,l,n,nn,l1,ip,iip,iipmax,jp,swstart
2721
integer k,shift,l_prj,nproj,Gijl_indx
2722
real*8 omega,scal,ff(3)
2724
integer exi(2),xtmp(2),sum(2)
2725
logical value,sd_function
2729
* **** external functions ****
2731
integer ion_nion,ion_katm,Pack_G_indx
2732
integer psi_data_get_ptr,psi_data_get_chnk
2733
real*8 lattice_omega,ddot
2735
external ion_nion,ion_katm,Pack_G_indx
2736
external psi_data_get_ptr,psi_data_get_chnk
2737
external lattice_omega,ddot
2739
call nwpw_timing_start(6)
2742
* **** allocate local memory ****
2745
call Pack_npack(1,npack1)
2747
call psp_prj_indx_alloc_sw1a_sw2a(nn)
2748
value = MA_push_get(mt_dcpl,npack1,'exi',exi(2), exi(1))
2749
value = value.and.MA_push_get(mt_dbl,npack1,
2750
> 'xtmp',xtmp(2),xtmp(1))
2751
value = value.and.MA_push_get(mt_dbl,3*nn,'sum',sum(2),sum(1))
2753
> call errquit('f_vnonlocal:out of stack',0, MA_ERR)
2755
G(1) = Pack_G_indx(1,1)
2756
G(2) = Pack_G_indx(1,2)
2757
G(3) = Pack_G_indx(1,3)
2759
omega = lattice_omega()
2760
scal = 1.0d0/(omega)
2763
do ip=1,nion_prj_indx
2764
ii = int_mb(ii_prj_indx(1)+ip-1)
2765
ia = int_mb(ia_prj_indx(1)+ip-1)
2766
nproj = int_mb(nproj_prj_indx(1)+ip-1)
2768
* **** structure factor and local pseudopotential ****
2769
call strfac_pack(1,ii,dcpl_mb(exi(1)))
2772
shift = int_mb(shift_prj_indx(1)+jp)
2773
sd_function = log_mb(sd_function_prj_indx(1)+jp)
2776
* **** phase factor does not matter therefore ****
2777
* **** (-i)^l is the same as (i)^l in the ****
2778
* **** Rayleigh scattering formula ****
2780
* *** current function is s or d ****
2781
if (sd_function) then
2782
call Pack_tc_Mul(1,dbl_mb(shift),
2784
> dcpl_mb(prjtmp(1)))
2786
* *** current function is p or f ****
2788
call Pack_tc_iMul(1,dbl_mb(shift),
2790
> dcpl_mb(prjtmp(1)))
2793
call Pack_cc_indot(1,nn,
2795
> dcpl_mb(prjtmp(1)),
2796
> dbl_mb(sw1a(1)+(jp-1)*nn))
2799
call D3dB_Vector_SumAll(nn*n_prj_indx,dbl_mb(sw1a(1)))
2802
* **** Compute sw2 ****
2803
do ip=1,nion_prj_indx
2804
ii = int_mb(ii_prj_indx(1)+ip-1)
2805
ia = int_mb(ia_prj_indx(1)+ip-1)
2806
nproj = int_mb(nproj_prj_indx(1)+ip-1)
2807
swstart = int_mb(swstart_prj_indx(1)+ip-1)
2809
* **** sw2 = Gijl*sw1 ******
2810
Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),1)
2811
call Multiply_Gijl_sw1(nn,
2813
> int_mb(nmax(1)+ia-1),
2814
> int_mb(lmax(1)+ia-1),
2815
> int_mb(n_projector(1)
2816
> + (ia-1)*(nmax_max*lmmax_max)),
2817
> int_mb(l_projector(1)
2818
> + (ia-1)*(nmax_max*lmmax_max)),
2819
> int_mb(m_projector(1)
2820
> + (ia-1)*(nmax_max*lmmax_max)),
2821
> dbl_mb(Gijl_indx),
2822
> dbl_mb(sw1a(1)+swstart*nn),
2823
> dbl_mb(sw2a(1)+swstart*nn))
2827
* **** do Kleinman-Bylander Multiplication ****
2828
call dscal(nn*n_prj_indx,scal,dbl_mb(sw2a(1)),1)
2830
* **** apply the sw2 to psi ****
2832
do iip=1,nion_prj_indx,nprj_mult
2834
swstart = int_mb(swstart_prj_indx(1)+iip-1)
2836
iipmax = (iip+nprj_mult-1)
2837
if (iipmax.gt.nion_prj_indx) iipmax = nion_prj_indx
2840
ii = int_mb(ii_prj_indx(1)+ip-1)
2841
ia = int_mb(ia_prj_indx(1)+ip-1)
2842
nproj = int_mb(nproj_prj_indx(1)+ip-1)
2844
* **** structure factor and local pseudopotential ****
2845
call strfac_pack(1,ii,dcpl_mb(exi(1)))
2848
shift = int_mb(shift_prj_indx(1)+jp)
2849
l_prj = int_mb(l_prj_prj_indx(1)+jp)
2850
sd_function = log_mb(sd_function_prj_indx(1)+jp)
2853
* **** phase factor does not matter therefore ****
2854
* **** (-i)^l is the same as (i)^l in the ****
2855
* **** Rayleigh scattering formula ****
2857
* *** current function is s or d ****
2858
if (sd_function) then
2859
call Pack_tc_Mul(1,dbl_mb(shift),
2861
> dcpl_mb(prjtmp(1)+l1*npack1))
2863
* *** current function is p or f ****
2865
call Pack_tc_iMul(1,dbl_mb(shift),
2867
> dcpl_mb(prjtmp(1)+l1*npack1))
2870
* ***** scale (sw2a) psp by factor - used for generating antiferromagnetic structures ****
2871
* **** nwchem input: pspspin up/down scale l ion_numbers ****
2873
if (log_mb(pspspin_upions(1)+ii-1).and.
2874
> (l_prj.eq.pspspin_upl))
2875
> call dscal(ne(1),pspspin_upscale,
2876
> dbl_mb(sw2a(1)+(swstart+l-1)*nn),1)
2877
if (log_mb(pspspin_downions(1)+ii-1).and.
2878
> (l_prj.eq.pspspin_downl))
2879
> call dscal(ne(2),pspspin_downscale,
2880
> dbl_mb(sw2a(1)+(swstart+l-1)*nn+ne(1)),1)
2889
ii = int_mb(ii_prj_indx(1)+ip-1)
2890
ia = int_mb(ia_prj_indx(1)+ip-1)
2891
nproj = int_mb(nproj_prj_indx(1)+ip-1)
2895
call Pack_cct_iconjgMul(1,
2896
> dcpl_mb(prjtmp(1)+l1*npack1),
2897
> psi1(1+(n-1)*npack1),
2899
call Pack_tt_idot(1,dbl_mb(G(1)),dbl_mb(xtmp(1)),
2900
> dbl_mb(sum(1)+3*(n-1)))
2901
call Pack_tt_idot(1,dbl_mb(G(2)),dbl_mb(xtmp(1)),
2902
> dbl_mb(sum(1)+1+3*(n-1)))
2903
call Pack_tt_idot(1,dbl_mb(G(3)),dbl_mb(xtmp(1)),
2904
> dbl_mb(sum(1)+2+3*(n-1)))
2907
call D3dB_Vector_SumAll(3*(nn),dbl_mb(sum(1)))
2909
!**** fractional weighting ****
2910
if (fractional) then
2912
call Dneall_qton(n,i)
2913
dbl_mb(sum(1)+3*(n-1))
2914
> =dbl_mb(sum(1) +3*(n-1))*occ(i)
2915
dbl_mb(sum(1)+1+3*(n-1))
2916
> =dbl_mb(sum(1)+1+3*(n-1))*occ(i)
2917
dbl_mb(sum(1)+2+3*(n-1))
2918
> =dbl_mb(sum(1)+2+3*(n-1))*occ(i)
2922
ff(1) =2.0d0*ddot(nn,dbl_mb(sw2a(1)+(swstart+l1)*nn),1,
2924
ff(2) =2.0d0*ddot(nn,dbl_mb(sw2a(1)+(swstart+l1)*nn),1,
2925
> dbl_mb(sum(1)+1),3)
2926
ff(3) =2.0d0*ddot(nn,dbl_mb(sw2a(1)+(swstart+l1)*nn),1,
2927
> dbl_mb(sum(1)+2),3)
2928
call D1dB_Vector_SumAll(3,ff)
2929
fion(1,ii) = fion(1,ii) + ff(1)*(3-ispin)
2930
fion(2,ii) = fion(2,ii) + ff(2)*(3-ispin)
2931
fion(3,ii) = fion(3,ii) + ff(3)*(3-ispin)
2939
value = MA_pop_stack(sum(2))
2940
value = value.and.MA_pop_stack(xtmp(2))
2941
value = value.and.MA_pop_stack(exi(2))
2942
if (.not.value) call errquit('f_vnonlocal: popping stack',3,
2945
call nwpw_timing_end(6)
1806
2951
* ********************************************