1
c $Id: ga_it2.F 19707 2010-10-29 17:59:36Z d3y133 $
1
c $Id: ga_it2.F 23368 2013-01-04 18:58:01Z niri $
3
C> \defgroup kain Krylov subspace Accelerated Inexact Newton method (KAIN)
4
C> \brief Implementation of Krylov-subspace nonlinear equation solver
6
C> This files provides an implementation of the KAIN method to solve
7
C> linear and nonlinear systems of equations [1]. The solvers
8
C> `ga_kain` and `ga_lkain` are general purpose routines. They require
9
C> a routine to calculate a matrix-vector product and a routine
10
C> to apply a preconditioner as arguments.
13
C> <i>"Krylov Subspace Accelerated Inexact Newton Method for Linear
14
C> and Nonlinear Systems",</i>
15
C> Journal of Computational Chemistry (2004) <b>25</b>, pp 328-334,
16
C> DOI: <a href="http://dx.doi.org/10.1002/jcc.10108">10.1002/jcc.10108</a>
22
C> \brief A trivial matrix-vector product routine
24
C> This routine is simply there to test the code for system solvers.
25
C> The matrix-vector product it calculates is simply a product involving
26
C> an explicitly stored matrix. This matrix is passed through a common
2
29
subroutine test_product(acc,g_x, g_Ax)
32
double precision acc !< [Input] Accuracy (not used here)
33
integer g_x !< [Input] Global array with vector
34
integer g_Ax !< [Output] Global array with matrix-vector product
8
37
common /testme/ g_A, g_B
11
call ga_inquire(g_x, type, n, nvec)
38
integer n, nvec, typex, typeax
39
double complex one, zero
41
one = cmplx(1.0d0,0.0d0)
42
zero = cmplx(0.0d0,0.0d0)
44
call ga_inquire(g_Ax, typeax, n, nvec)
45
call ga_inquire(g_x, typex, n, nvec)
47
$ call errquit("test_product: g_x not same type as g_Ax",
13
call ga_dgemm('n', 'n', n, nvec, n, 1.0d0, g_A, g_x, 0.0d0, g_Ax)
50
call ga_dgemm('n', 'n', n, nvec, n, one, g_A, g_x, zero, g_Ax)
54
C> \brief A trivial non-linear matrix-vector product routine
56
C> This routine is simply there to test the code for system solvers.
57
C> The matrix-vector product it calculates is simply a product involving
58
C> an explicitly stored matrix and it adds another vector to achieve
59
C> something non-linear. The additional matrix and vector are passed
60
C> through a common block.
16
62
subroutine test_nlproduct(acc,g_x, g_Ax)
18
65
double precision acc
22
69
common /testme/ g_A, g_B
25
call ga_inquire(g_x, type, n, nvec)
70
integer n, nvec, typex, typeax, typeb
71
double complex mone, one, zero
73
mone = cmplx(-1.0d0,0.0d0)
74
one = cmplx(1.0d0,0.0d0)
75
zero = cmplx(0.0d0,0.0d0)
77
call ga_inquire(g_b, typeb, n, nvec)
78
call ga_inquire(g_Ax, typeax, n, nvec)
79
call ga_inquire(g_x, typex, n, nvec)
81
$ call errquit("test_nlproduct: g_x not same type as g_Ax",
84
$ call errquit("test_nlproduct: g_x not same type as g_b",
27
call ga_dgemm('n', 'n', n, nvec, n, 1.0d0, g_A, g_x, 0.0d0, g_Ax)
28
call ga_add(1.0d0, g_AX, -1.0d0, g_B, g_AX)
87
call ga_dgemm('n', 'n', n, nvec, n, one, g_A, g_x, zero, g_Ax)
88
call ga_add(one, g_AX, mone, g_B, g_AX)
92
C> \brief A trivial preconditioner
94
C> This routine is simply there to test the code for system solvers.
95
C> It simply scales the elements of a vector by
97
C> x_i = \frac{x_i}{i - \epsilon}
99
C> where \f$ \epsilon \f$ is the shift.
31
101
subroutine test_precond(g_x,shift)
103
#include "mafdecls.fh"
104
#include "errquit.fh"
33
105
#include "global.fh"
35
107
double precision shift
36
108
integer n, nvec, type
38
110
double precision x
40
113
call ga_inquire(g_x, type, n, nvec)
41
114
if (ga_nodeid() .eq. 0) then
44
call ga_get(g_x, i, i, ivec, ivec, x, 1)
45
x = x / (dble(i) - shift)
46
call ga_put(g_x, i, i, ivec, ivec, x, 1)
115
if (type.eq.MT_DBL) then
118
call ga_get(g_x, i, i, ivec, ivec, x, 1)
119
x = x / (dble(i) - shift)
120
call ga_put(g_x, i, i, ivec, ivec, x, 1)
123
else if (type.eq.MT_DCPL) then
126
call ga_get(g_x, i, i, ivec, ivec, zx, 1)
127
zx = zx / (dble(i) - shift)
128
call ga_put(g_x, i, i, ivec, ivec, zx, 1)
132
call errquit('test_precond: illegal type',type,UERR)
139
C> \brief test driver for ga_lkain
141
C> This routine drives a standard test of the ga_lkain
142
C> and the ga_kain solver.
143
C> Because the test is fixed the subroutine takes no inputs
144
C> and it prints a summary on standard output.
53
146
subroutine ga_lkain_test()
55
148
#include "errquit.fh"
56
149
#include "mafdecls.fh"
57
150
#include "global.fh"
58
151
integer n, nvec, maxsub, maxiter
59
parameter (n=300, nvec=1)
152
c parameter (n=300, nvec=1)
153
parameter (n=10, nvec=1)
61
external test_precond, test_product, test_nlproduct
156
double precision util_random
157
external test_precond, test_product, test_nlproduct, util_random
159
integer g_tx, g_ta, g_tb
64
161
common /testme/ g_a, g_b
68
167
**** double precision a(n,n), b(n), w(n)
89
188
call ga_copy(g_b, g_x)
90
189
call test_precond(g_x,0.0d0)
92
**** call ga_get(g_a, 1, n, 1, n, a, n)
93
**** call ga_get(g_b, 1, n, 1, nvec, b, n)
94
**** call dgesv(n, nvec, a, n, w, b, n, info)
95
**** write(6,*) ' info ', info
96
**** call ga_put(g_x, 1, n, 1, nvec, b, n)
98
C This should have something other than zero
99
call ga_lkain(0,g_x, g_b,test_product,test_precond,1d-6,maxsub,
100
$ maxiter,.true.,.true.)
102
call ma_summarize_allocated_blocks()
107
write(6,*) ' DOING NON LINEAR '
111
call ga_copy(g_b, g_x)
112
call test_precond(g_x,0.0d0)
116
$ test_nlproduct, test_precond,
195
**** call ga_get(g_a, 1, n, 1, n, a, n)
196
**** call ga_get(g_b, 1, n, 1, nvec, b, n)
197
**** call dgesv(n, nvec, a, n, w, b, n, info)
198
**** write(6,*) ' info ', info
199
**** call ga_put(g_x, 1, n, 1, nvec, b, n)
201
C This should have something other than zero
202
call ga_lkain(0,g_x, g_b,test_product,test_precond,1d-6,maxsub,
203
$ maxiter,.true.,.true.)
208
call ma_summarize_allocated_blocks()
213
write(6,*) ' DOING NON LINEAR '
217
call ga_copy(g_b, g_x)
218
call test_precond(g_x,0.0d0)
222
$ test_nlproduct, test_precond,
232
call ma_summarize_allocated_blocks()
234
c Same but now COMPLEX
241
if (.not. ga_create(MT_DCPL, n, nvec, 'testx', 0, 0, g_x))
242
$ call errquit('test kain', 1, GA_ERR)
243
if (.not. ga_create(MT_DCPL, n, nvec, 'testb', 0, 0, g_b))
244
$ call errquit('test kain', 2, GA_ERR)
245
if (.not. ga_create(MT_DCPL, n, n, 'testA', 0, 0, g_A))
246
$ call errquit('test kain', 3, GA_ERR)
248
c call ga_ran_fill(g_A, 1, n, 1, n)
249
c call ga_ran_fill(g_b, 1, n, 1, nvec)
250
c if (ga_nodeid() .eq. 0) then
252
c call ga_put(g_a, i, i, i, i, 0.5*cmplx(dble(i)), 1)
255
if (.not. ga_copy_dz(g_tb,g_b))
256
+ call errquit('test kain: copy g_b failed',0,GA_ERR)
257
if (.not. ga_copy_dz(g_tA,g_A))
258
+ call errquit('test kain: copy g_A failed',0,GA_ERR)
260
angle = cmplx(1.0d0,util_random(0))
261
angle = angle/abs(angle)
262
call ga_scale(g_b,angle)
263
angle = cmplx(1.0d0,util_random(0))
264
angle = angle/abs(angle)
265
call ga_scale(g_A,angle)
267
if (.not. ga_destroy(g_tx))
268
$ call errquit('test kain', 1, GA_ERR)
269
if (.not. ga_destroy(g_tb))
270
$ call errquit('test kain', 2, GA_ERR)
271
if (.not. ga_destroy(g_tA))
272
$ call errquit('test kain', 3, GA_ERR)
274
call ga_copy(g_b, g_x)
275
call test_precond(g_x,0.0d0)
281
**** call ga_get(g_a, 1, n, 1, n, a, n)
282
**** call ga_get(g_b, 1, n, 1, nvec, b, n)
283
**** call dgesv(n, nvec, a, n, w, b, n, info)
284
**** write(6,*) ' info ', info
285
**** call ga_put(g_x, 1, n, 1, nvec, b, n)
287
C This should have something other than zero
288
call ga_lkain(0,g_x, g_b,test_product,test_precond,1d-6,maxsub,
289
$ maxiter,.true.,.true.)
294
call ma_summarize_allocated_blocks()
299
write(6,*) ' DOING NON LINEAR '
303
call ga_copy(g_b, g_x)
304
call test_precond(g_x,0.0d0)
308
$ test_nlproduct, test_precond,
122
317
call ga_summarize(0)
123
318
call ma_summarize_allocated_blocks()
124
319
call errquit('done',0, MEM_ERR)
321
if (.not. ga_destroy(g_x))
322
$ call errquit('test kain', 1, GA_ERR)
323
if (.not. ga_destroy(g_b))
324
$ call errquit('test kain', 2, GA_ERR)
325
if (.not. ga_destroy(g_A))
326
$ call errquit('test kain', 3, GA_ERR)
330
C> \brief The Linear System Solver
332
C> The routine solves a linear system of equations using a Krylov
338
C> The Right-Hand Side (RHS) matrix and the solution
339
C> matrix store the data pertaining to a single linear system in
340
C> columns. Using multiple columns the solver can solve multiple
341
C> equations simultaneously. Internally one Krylov subspace is used
342
C> for all systems of equations, and subspace vectors contribute to
343
C> the solutions of all equations.
345
C> An special argument `odiff` is provided that allows one to use a
346
C> difference strategy for the required matrix-vector products. I.e.
347
C> rather than computing
351
C> each iteration one uses a formulation that requires
353
C> y' &=& A(x_i-x_{i-1})
355
C> As \f$ x \f$ converges \f$ x_i - x_{i-1} \f$ approaches \f$ 0 \f$.
356
C> This may be exploited in the matrix-vector product to achieve
357
C> considerable savings.
359
C> Furthermore the routine takes two subroutines as arguments. One
360
C> provides the matrix-vector product and has the interface
362
C> subroutine product(acc,g_x,g_Ax)
370
C> - acc: the accuracy required for the matrix-vector elements
372
C> - g_x: is a global array containing the input vectors in columns
374
C> - g_Ax: is a global array that will contain the matrix-vector product
375
C> results in columns
377
C> Note that `ga_lkain` may pass global arrays with a different number
378
C> of vectors than the number of linear systems being solved. Hence
379
C> the product routine should check the actual dimension of g_x and
382
C> The second subroutine provides a preconditioner and has the
385
C> subroutine precond(g_x,shift)
389
C> - g_x: is a global array holding a set of vectors that will have
390
C> the preconditioner applied to them
392
C> - shift: is a shift to ensure the preconditioner is non-singular
394
C> Note that the number of vectors may also here be different from the
395
C> number of linear systems being solved. Hence the dimension of g_x
396
C> should be tested. Finally, the shift is not used in this linear
397
C> system solver. The argument is provided to ensure that the same
398
C> preconditioner can be used in eigenvalue solvers where the shift is
127
401
subroutine ga_lkain(rtdb,g_x, g_b, product, precond,
128
402
$ tol, mmaxsub, maxiter, odiff, oprint)
165
439
c Needs to be extended to store the sub-space vectors out-of-core
166
440
c at least while the product() routine is being executed.
168
integer iter, n, nvec, nsub, isub, type, maxsub
443
integer iter, n, nvec, nsub, isub, typex, typeb, maxsub
169
444
integer g_y, g_Ay, g_Ax, g_r, g_a, g_bb, g_c, g_xold, g_Axold
170
445
double precision rmax, acc, ga_svd_tol
446
double complex zero, one, mone
171
447
logical converged
450
zero = cmplx(0.0d0,0.0d0)
451
one = cmplx(1.0d0,0.0d0)
452
mone = cmplx(-1.0d0,0.0d0)
174
454
odebug = util_print('debug lsolve', print_never) .and.
175
455
$ ga_nodeid().eq.0
176
456
if (.not.rtdb_get(rtdb,'cphf:acc',mt_dbl,1,acc)) acc=0.01d0*tol
177
call ga_inquire(g_x, type, n, nvec)
457
call ga_inquire(g_b, typeb, dimi, dimj)
458
call ga_inquire(g_x, typex, n, nvec)
460
$ call errquit("ga_lkain: solution and right-hand-side vectors not
461
$ of same type",0,UERR)
178
462
maxsub = mmaxsub ! So don't modify input scalar arg
179
463
if (maxsub .lt. 3*nvec) maxsub = 3*nvec
180
464
maxsub = (maxsub/nvec)*nvec
202
486
call util_flush(6)
205
if (.not. ga_create(MT_DBL, n, maxsub, 'lkain: Y',
489
if (.not. ga_create(typex, n, maxsub, 'lkain: Y',
207
491
$ call errquit('lkain: failed allocating subspace', maxsub,
209
if (.not. ga_create(MT_DBL, n, maxsub, 'lkain: Ay',
493
if (.not. ga_create(typex, n, maxsub, 'lkain: Ay',
211
495
$ call errquit('lkain: failed allocating subspace2', maxsub,
213
if (.not. ga_create(MT_DBL, n, nvec, 'lkain: Ax',
497
if (.not. ga_create(typex, n, nvec, 'lkain: Ax',
215
499
$ call errquit('lkain: failed allocating subspace3', nvec,
217
if (.not. ga_create(MT_DBL, n, nvec, 'lkain: r',
501
if (.not. ga_create(typex, n, nvec, 'lkain: r',
219
503
$ call errquit('lkain: failed allocating subspace4', nvec,
222
if (.not. ga_create(MT_DBL, n, nvec, 'lkain: xold',
506
if (.not. ga_create(typex, n, nvec, 'lkain: xold',
224
508
$ call errquit('lkain: failed allocating subspace5', nvec,
226
if (.not. ga_create(MT_DBL, n, nvec, 'lkain: xold',
510
if (.not. ga_create(typex, n, nvec, 'lkain: Axold',
227
511
$ 0, 0, g_Axold))
228
512
$ call errquit('lkain: failed allocating subspace6', nvec,
287
571
c Form and solve the subspace equations using SVD in order
288
572
c to manage near linear dependence in the subspace.
290
if (.not. ga_create(MT_DBL, nsub, nsub, 'lkain: A', 0, 0, g_a))
574
if (.not. ga_create(typex, nsub, nsub, 'lkain: A', 0, 0, g_a))
291
575
$ call errquit('lkain: allocating g_a?', nsub, GA_ERR)
292
if (.not. ga_create(MT_DBL, nsub, nvec, 'lkain: B', 0, 0,g_bb))
576
if (.not. ga_create(typex, nsub, nvec, 'lkain: B', 0, 0,g_bb))
293
577
$ call errquit('lkain: allocating g_bb?', nsub, GA_ERR)
294
if (.not. ga_create(MT_DBL, nsub, nvec, 'lkain: C', 0, 0, g_c))
578
if (.not. ga_create(typex, nsub, nvec, 'lkain: C', 0, 0, g_c))
295
579
$ call errquit('lkain: allocating g_c?', nsub, GA_ERR)
296
580
call ga_zero(g_a)
297
581
call ga_zero(g_bb)
298
582
call ga_zero(g_c)
300
call ga_dgemm('t','n',nsub,nsub,n,1.0d0,g_y,g_Ay,0.0d0,g_a)
302
call ga_dgemm('t','n',nsub,nvec,n,1.0d0,g_y,g_r,0.0d0,g_bb)
304
if (odebug) call ga_print(g_a)
305
if (odebug) call ga_print(g_c)
584
call ga_dgemm('c','n',nsub,nsub,n,one,g_y,g_Ay,zero,g_a)
586
call ga_dgemm('c','n',nsub,nvec,n,one,g_y,g_r,zero,g_bb)
307
595
c The threshold used here should reflect the accuracy in the
308
596
c products. If very accurate products are used, then there is big
313
601
c screening with a realistic threshold is important.
315
603
call ga_svd_solve_seq(g_a,g_bb,g_c,ga_svd_tol)
316
if (odebug) call ga_print(g_c)
318
610
c Form and add the correction, in parts, onto the solution
321
call ga_dgemm('n','n',n,nvec,nsub,-1.0d0,g_Ay,g_c,1.0d0,g_r)
613
call ga_dgemm('n','n',n,nvec,nsub,mone,g_Ay,g_c,one,g_r)
323
616
write(6,*) ' The update in the complement '
324
618
call ga_print(g_r)
327
call ga_add(1.0d0, g_r, 1.0d0, g_x, g_x)
621
call ga_add(one, g_r, one, g_x, g_x)
329
call ga_dgemm('n','n',n,nvec,nsub,1.0d0,g_y,g_c,0.0d0,g_r)
623
call ga_dgemm('n','n',n,nvec,nsub,one,g_y,g_c,zero,g_r)
331
626
write(6,*) ' The update in the subspace '
332
628
call ga_print(g_r)
335
call ga_add(1.0d0, g_r, 1.0d0, g_x, g_x)
631
call ga_add(one, g_r, one, g_x, g_x)
338
634
if (.not. ga_destroy(g_a)) call errquit('lkain: a',0, GA_ERR)
683
C> \brief The Non-Linear System Solver
685
C> The routine solves a non-linear system of equations using a Krylov
690
C> For linear systems the vector function in the above equation would
691
C> be \f$ F(x) = Ax - b \f$ but this routine also allows for non-linear
693
C> The Right-Hand Side (RHS) matrix and the solution
694
C> matrix store the data pertaining to a single linear system in
695
C> columns. Using multiple columns the solver can solve multiple
696
C> equations simultaneously. Internally one Krylov subspace is used
697
C> for all systems of equations, and subspace vectors contribute to
698
C> the solutions of all equations.
700
C> Furthermore the routine takes two subroutines as arguments. One
701
C> provides the vector residual and has the interface
703
C> subroutine residual(acc,g_x,g_Ax)
707
C> - acc: the accuracy required for the matrix-vector elements
709
C> - g_x: is a global array containing the input vectors in columns
711
C> - g_Ax: is a global array that will contain the residual-vector
712
C> results in columns
714
C> Note that `ga_lkain` may pass global arrays with a different number
715
C> of vectors than the number of linear systems being solved. Hence
716
C> the product routine should check the actual dimension of g_x and
719
C> The second subroutine provides a preconditioner and has the
722
C> subroutine precond(g_x,shift)
726
C> - g_x: is a global array holding a set of vectors that will have
727
C> the preconditioner applied to them
729
C> - shift: is a shift to ensure the preconditioner is non-singular
731
C> Note that the number of vectors may also here be different from the
732
C> number of linear systems being solved. Hence the dimension of g_x
733
C> should be tested. Finally, the shift is not used in this linear
734
C> system solver. The argument is provided to ensure that the same
735
C> preconditioner can be used in eigenvalue solvers where the shift is
386
738
subroutine ga_kain(
390
742
$ trustmin, trustmax,
391
743
$ maxsub, maxiter,
500
860
c Not converged ... make the update
502
862
g_delta = g_Ax ! A reminder that these two are aliased
503
call ga_scale(g_delta, -1.0d0)
863
call ga_scale(g_delta, mone)
505
865
if (iter .gt. 1) then
507
867
c Form the reduced space matrix and RHS
509
call ga_local_mdot(n, nsub, nsub, a, maxmaxsub, g_y, g_Ay)
511
b(isub) = -(a(isub,nsub) - a(nsub,nsub))
515
a(isub,jsub) = a(isub,jsub)
516
$ - a(nsub,jsub) - a(isub,nsub) + a(nsub,nsub)
869
if (typex.eq.MT_DBL) then
870
call ga_local_mdot(n,nsub,nsub,a,maxmaxsub,g_y,g_Ay)
872
b(isub) = -(a(isub,nsub) - a(nsub,nsub))
876
a(isub,jsub) = a(isub,jsub)
877
$ - a(nsub,jsub) - a(isub,nsub) + a(nsub,nsub)
880
else if (typex.eq.MT_DCPL) then
881
call ga_local_zmdot(n,nsub,nsub,za,maxmaxsub,g_y,g_Ay)
883
zb(isub) = -(za(isub,nsub) - za(nsub,nsub))
887
za(isub,jsub) = za(isub,jsub)
888
$ - za(nsub,jsub) - za(isub,nsub) + za(nsub,nsub)
892
call errquit("ga_kain: illegal type",typex,UERR)
520
895
c Solve the subspace equations (lazily using existing GA routine)
522
if (.not. ga_create(MT_DBL,nsub-1,nsub-1,'kain: A',
897
if (.not. ga_create(typex,nsub-1,nsub-1,'kain: A',
523
898
$ nsub-1,nsub-1,g_a))
524
899
$ call errquit('kain: allocating g_a?', nsub, GA_ERR)
525
if (.not. ga_create(MT_DBL,nsub-1,1,'kain: B',nsub-1,1,g_b))
900
if (.not. ga_create(typex,nsub-1,1,'kain: B',nsub-1,1,g_b))
526
901
$ call errquit('kain: allocating g_bb?', nsub, GA_ERR)
527
if (.not. ga_create(MT_DBL,nsub-1,1,'kain: C',nsub-1,1,g_c))
902
if (.not. ga_create(typex,nsub-1,1,'kain: C',nsub-1,1,g_c))
528
903
$ call errquit('kain: allocating g_c?', nsub, GA_ERR)
529
904
if (ga_nodeid() .eq. 0) then
530
call ga_put(g_a, 1, nsub-1, 1, nsub-1, a, maxmaxsub)
531
call ga_put(g_b, 1, nsub-1, 1, 1, b, 1)
905
if (typex.eq.MT_DBL) then
906
call ga_put(g_a, 1, nsub-1, 1, nsub-1, a, maxmaxsub)
907
call ga_put(g_b, 1, nsub-1, 1, 1, b, 1)
908
else if (typex.eq.MT_DCPL) then
909
call ga_put(g_a, 1, nsub-1, 1, nsub-1, za, maxmaxsub)
910
call ga_put(g_b, 1, nsub-1, 1, 1, zb, 1)
912
call errquit("ga_kain: illegal type",typex,UERR)
535
917
call ga_svd_solve_seq(g_a,g_b,g_c,1d-14)
537
if (odebug) call ga_print(g_c)
538
if (ga_nodeid() .eq. 0)
924
if (typex.eq.MT_DBL) then
925
if (ga_nodeid() .eq. 0)
539
926
$ call ga_get(g_c, 1, nsub-1, 1, 1, c, 1)
540
call ga_brdcst(1, c, mdtob(nsub-1), 0)
541
write(6,*) ' KAIN SUBSPACE COEFFS'
542
call output(c, 1, nsub-1, 1, 1, nsub-1, 1, 1)
927
call ga_brdcst(1, c, mdtob(nsub-1), 0)
928
write(6,*) ' KAIN SUBSPACE COEFFS'
929
call output(c, 1, nsub-1, 1, 1, nsub-1, 1, 1)
930
else if (typex.eq.MT_DCPL) then
931
if (ga_nodeid() .eq. 0)
932
$ call ga_get(g_c, 1, nsub-1, 1, 1, zc, 1)
933
call ga_brdcst(1, zc, 2*mdtob(nsub-1), 0)
934
write(6,*) ' KAIN SUBSPACE COEFFS'
935
call zoutput(zc, 1, nsub-1, 1, 1, nsub-1, 1, 1)
544
938
if (.not. ga_destroy(g_a)) call errquit('kain: a',0, GA_ERR)
545
939
if (.not. ga_destroy(g_b)) call errquit('kain: b',0, GA_ERR)
548
942
c Form the correction
552
csum = csum + c(isub)
553
call ga_add_patch( c(isub), g_y, 1, n, isub, isub,
554
$ 1.0d0, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
555
call ga_add_patch(-c(isub), g_Ay, 1, n, isub, isub,
556
$ 1.0d0, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
558
call ga_add_patch(-csum, g_y, 1, n, nsub, nsub,
559
$ 1.0d0, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
560
call ga_add_patch( csum, g_Ay, 1, n, nsub, nsub,
561
$ 1.0d0, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
944
if (typex.eq.MT_DBL) then
947
csum = csum + c(isub)
948
call ga_add_patch( c(isub), g_y, 1, n, isub, isub,
949
$ one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
950
call ga_add_patch(-c(isub), g_Ay, 1, n, isub, isub,
951
$ one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
953
call ga_add_patch(-csum, g_y, 1, n, nsub, nsub,
954
$ one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
955
call ga_add_patch( csum, g_Ay, 1, n, nsub, nsub,
956
$ one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
957
else if (typex.eq.MT_DCPL) then
960
zcsum = zcsum + zc(isub)
961
call ga_add_patch( zc(isub), g_y, 1, n, isub, isub,
962
$ one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
963
call ga_add_patch(-zc(isub), g_Ay, 1, n, isub, isub,
964
$ one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
966
call ga_add_patch(-zcsum, g_y, 1, n, nsub, nsub,
967
$ one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
968
call ga_add_patch( zcsum, g_Ay, 1, n, nsub, nsub,
969
$ one, g_delta, 1, n, 1, 1, g_delta, 1, n, 1, 1)
564
973
c Step restriction
645
1083
nsing = min(m,n)
646
1084
if (.not. ma_push_get(MT_DBL, nsing, 'gasvdsol', l_val, k_val))
647
1085
$ call errquit('gasvdsol: val',nsing, MA_ERR)
648
if (.not. ga_create(MT_DBL,m,nsing,'gasvd',0,0,g_u))
1086
if (.not. ga_create(type,m,nsing,'gasvd',0,0,g_u))
649
1087
$ call errquit('gasvdsol: u',m*nsing, GA_ERR)
650
if (.not. ga_create(MT_DBL,nsing,n,'gasvd',0,0,g_vt))
1088
if (.not. ga_create(type,nsing,n,'gasvd',0,0,g_vt))
651
1089
$ call errquit('gasvdsol: u',nsing*n, GA_ERR)
652
if (.not. ga_create(MT_DBL,nsing,nvec,'gasvd',0,0,g_tmp))
1090
if (.not. ga_create(type,nsing,nvec,'gasvd',0,0,g_tmp))
653
1091
$ call errquit('gasvdsol: tmp',nsing*nvec, GA_ERR)
654
1092
call ga_zero(g_tmp)
656
1094
call ga_svd_seq(g_a, g_u,g_vt,dbl_mb(k_val))
658
1096
do i = 0, nsing-1
659
if (dbl_mb(k_val+i) .lt. tol) then
660
if (ga_nodeid() .eq. 0 .and. oprint) then
661
write(6,*) ' neglecting ', i+1, dbl_mb(k_val+i)
663
dbl_mb(k_val+i) = 0.0d0
665
dbl_mb(k_val+i) = 1.0d0/dbl_mb(k_val+i)
1097
if (dbl_mb(k_val+i) .lt. tol) then
1098
if (ga_nodeid() .eq. 0 .and. oprint) then
1099
write(6,*) ' neglecting ', i+1, dbl_mb(k_val+i)
1101
dbl_mb(k_val+i) = 0.0d0
1103
dbl_mb(k_val+i) = 1.0d0/dbl_mb(k_val+i)
669
call ga_dgemm('t','n',nsing,nvec,m,1.0d0,g_u,g_b,0.0d0,g_tmp)
1107
call ga_dgemm('c','n',nsing,nvec,m,one,g_u,g_b,zero,g_tmp)
670
1108
call ga_scale_lh(g_tmp,dbl_mb(k_val))
671
1109
call ga_zero(g_x)
672
call ga_dgemm('t','n',n,nvec,nsing,1.0d0,g_vt,g_tmp,0.0d0,g_x)
1110
call ga_dgemm('c','n',n,nvec,nsing,one,g_vt,g_tmp,zero,g_x)
674
1112
if (.not. ga_destroy(g_tmp)) call errquit('gasvdsol: des',1,
705
1146
integer n, m, type, l_a, k_a, l_u, k_u, l_vt, k_vt,
706
1147
$ l_work, k_work, lwork, info, nsing
1148
integer l_rwork, k_rwork
708
1150
call ga_inquire(g_a, type, m, n)
709
1151
nsing = min(m,n)
710
1152
if (ga_nodeid() .eq. 0) then
711
1153
lwork = 10*max(m,n)
712
if (.not. ma_push_get(MT_DBL, m*n, 'gasvd', l_a, k_a))
1154
if (.not. ma_push_get(type, m*n, 'gasvd', l_a, k_a))
713
1155
$ call errquit('gasvd: a',m*n, MA_ERR)
714
if (.not. ma_push_get(MT_DBL, m*nsing, 'gasvd', l_u, k_u))
1156
if (.not. ma_push_get(type, m*nsing, 'gasvd', l_u, k_u))
715
1157
$ call errquit('gasvd: u',m*nsing, MA_ERR)
716
if (.not. ma_push_get(MT_DBL, nsing*n, 'gasvd', l_vt, k_vt))
1158
if (.not. ma_push_get(type, nsing*n, 'gasvd', l_vt, k_vt))
717
1159
$ call errquit('gasvd: vt',nsing*n, MA_ERR)
718
if (.not. ma_push_get(MT_DBL, lwork, 'gasvd', l_work, k_work))
1160
if (.not. ma_push_get(type, lwork, 'gasvd', l_work, k_work))
719
1161
$ call errquit('gasvd: work',lwork, MA_ERR)
721
call ga_get(g_a, 1, m, 1, n, dbl_mb(k_a), m)
723
call dgesvd('s','s',m,n,dbl_mb(k_a),m,values,
724
$ dbl_mb(k_u),m,dbl_mb(k_vt),nsing,
725
$ dbl_mb(k_work),lwork,info)
726
if (info .ne. 0) call errquit('gasvd: failed', info, MEM_ERR)
728
call ga_put(g_u, 1, n, 1, nsing, dbl_mb(k_u), n)
729
call ga_put(g_vt, 1, nsing, 1, m, dbl_mb(k_vt), n)
1162
if (type.eq.MT_DCPL) then
1163
if (.not. ma_push_get(MT_DBL, lwork, 'gasvd',
1164
$ l_rwork, k_rwork))
1165
$ call errquit('gasvd: work',lwork, MA_ERR)
1168
if (type.eq.MT_DBL) then
1170
call ga_get(g_a, 1, m, 1, n, dbl_mb(k_a), m)
1172
call dgesvd('s','s',m,n,dbl_mb(k_a),m,values,
1173
$ dbl_mb(k_u),m,dbl_mb(k_vt),nsing,
1174
$ dbl_mb(k_work),lwork,info)
1175
if (info .ne. 0) call errquit('gasvd:d: failed',info,MEM_ERR)
1177
call ga_put(g_u, 1, n, 1, nsing, dbl_mb(k_u), n)
1178
call ga_put(g_vt, 1, nsing, 1, m, dbl_mb(k_vt), n)
1180
else if (type.eq.MT_DCPL) then
1182
call ga_get(g_a, 1, m, 1, n, dcpl_mb(k_a), m)
1184
call zgesvd('s','s',m,n,dcpl_mb(k_a),m,values,
1185
$ dcpl_mb(k_u),m,dcpl_mb(k_vt),nsing,
1186
$ dcpl_mb(k_work),lwork,dbl_mb(k_rwork),info)
1187
if (info .ne. 0) call errquit('gasvd:z: failed',info,MEM_ERR)
1189
call ga_put(g_u, 1, n, 1, nsing, dcpl_mb(k_u), n)
1190
call ga_put(g_vt, 1, nsing, 1, m, dcpl_mb(k_vt), n)
1194
call errquit('gasvd: illegal data type',type,UERR)
731
1198
if (.not. ma_chop_stack(l_a)) call errquit('gasvd ma',0,