4
c $Id: testsolve.F,v 1.11 2006/03/20 20:02:29 manoj Exp $
10
#define BLOCK_CYCLIC 0
12
c*** Intitialize a message passing library
16
c Intitialize the GA package
19
c if(ga_nodeid().eq.0)print *,ga_nnodes(),' nodes'
21
c Initialize the MA package
25
if (.not. ma_init(MT_DBL, heap, stack))
26
$ call ga_error("ma init failed",heap+stack)
31
if(ga_nodeid().eq.0) print *,'All tests successful '
43
#include "mafdecls.fh"
48
double precision a(n,n), b(n,n), c(n,n)
49
integer g_a,g_b,g_c,g_d, g_e, g_f, g_g
50
integer g_aa,g_bb,g_cc,g_dd, g_ee, g_ff, g_gg
52
integer ndim, dims(2), block(2), proc_grid(2), g1, g2
54
double precision dsin, sum
60
c a() is a local copy of what the global array should start as
65
b(i,j) = DSIN(1d0* (i+j))
79
open(unit=7,file='amat.dat',status='unknown')
81
write(7,128) (a(i,j),j=1,min(20,n))
87
c*** Create global arrays
91
write(6,*) '* Creating Block-Cyclic Arrays'
98
call factor(nproc,g1,g2)
101
g_a = ga_create_handle()
102
call ga_set_data(g_a,2,dims,MT_DBL)
103
call ga_set_block_cyclic_proc_grid(g_a,block,proc_grid)
104
status = ga_allocate(g_a)
105
g_b = ga_create_handle()
106
call ga_set_data(g_b,2,dims,MT_DBL)
107
call ga_set_block_cyclic_proc_grid(g_b,block,proc_grid)
108
status = ga_allocate(g_b)
109
g_c = ga_create_handle()
110
call ga_set_data(g_c,2,dims,MT_DBL)
111
call ga_set_block_cyclic_proc_grid(g_c,block,proc_grid)
112
status = ga_allocate(g_c)
113
g_d = ga_create_handle()
114
call ga_set_data(g_d,2,dims,MT_DBL)
115
call ga_set_block_cyclic_proc_grid(g_d,block,proc_grid)
116
status = ga_allocate(g_d)
118
g_e = ga_create_handle()
119
call ga_set_data(g_e,2,dims,MT_DBL)
120
call ga_set_block_cyclic_proc_grid(g_e,block,proc_grid)
121
status = ga_allocate(g_e)
122
g_f = ga_create_handle()
123
call ga_set_data(g_f,2,dims,MT_DBL)
124
call ga_set_block_cyclic_proc_grid(g_f,block,proc_grid)
125
status = ga_allocate(g_f)
126
g_g = ga_create_handle()
127
call ga_set_data(g_g,2,dims,MT_DBL)
128
call ga_set_block_cyclic_proc_grid(g_g,block,proc_grid)
129
status = ga_allocate(g_g)
130
g_aa = ga_create_handle()
132
call ga_set_data(g_aa,2,dims,MT_DBL)
133
status = ga_allocate(g_aa)
134
g_bb = ga_create_handle()
135
call ga_set_data(g_bb,2,dims,MT_DBL)
136
status = ga_allocate(g_bb)
137
g_cc = ga_create_handle()
138
call ga_set_data(g_cc,2,dims,MT_DBL)
139
status = ga_allocate(g_cc)
140
g_dd = ga_create_handle()
141
call ga_set_data(g_dd,2,dims,MT_DBL)
142
status = ga_allocate(g_dd)
144
g_ee = ga_create_handle()
145
call ga_set_data(g_ee,2,dims,MT_DBL)
146
status = ga_allocate(g_ee)
147
g_ff = ga_create_handle()
148
call ga_set_data(g_ff,2,dims,MT_DBL)
149
status = ga_allocate(g_ff)
150
g_gg = ga_create_handle()
151
call ga_set_data(g_gg,2,dims,MT_DBL)
152
status = ga_allocate(g_gg)
154
if (.not. ga_create(MT_DBL, n, n, 'a', 1, 1, g_a))
155
$ call ga_error(' ga_create failed ',2)
156
if (.not. ga_create(MT_DBL, n, n, 'b', 1, 1, g_b))
157
$ call ga_error(' ga_create failed ',2)
158
if (.not. ga_create(MT_DBL, n, n, 'c', 1, 1, g_c))
159
$ call ga_error(' ga_create failed ',2)
160
if (.not. ga_create(MT_DBL, n, n, 'd', 1, 1, g_d))
161
$ call ga_error(' ga_create failed ',2)
162
if (.not. ga_create(MT_DBL, n, 1, 'e', 1, 1, g_e))
163
$ call ga_error(' ga_create failed ',2)
164
if (.not. ga_create(MT_DBL, n, 1, 'f', 1, 1, g_f))
165
$ call ga_error(' ga_create failed ',2)
166
if (.not. ga_create(MT_DBL, n, 1, 'g', 1, 1, g_g))
167
$ call ga_error(' ga_create failed ',2)
171
c*** Fill in arrays A & B
173
print *, ' filling in A and B '
175
call ga_put(g_e, 1,n, 1,1, b(1,1),n)
176
call ga_put(g_f, 1,n, 1,1, b(1,1),n)
178
do j = 1+me, n, nproc
179
call ga_put(g_a, 1,n, j,j, a(1,j),n)
180
call ga_put(g_b, 1,n, j,j, b(1,j),n)
181
call ga_put(g_c, 1,n, j,j, b(1,j),n)
185
c call ga_copy(g_b,g_c)
189
print *, '>Test of the LU-based solver with nxn rhs '
193
#ifndef HAVE_SCALAPACK
194
call ga_lu_solve_seq('n', g_a, g_b)
197
call ga_copy(g_a,g_aa)
199
call ga_lu_solve('n', g_a, g_b)
201
call ga_copy(g_aa,g_a)
205
c call print_block(g_b)
206
call ga_copy(g_b,g_bb)
207
call ga_copy(g_c,g_cc)
208
call ga_copy(g_d,g_dd)
209
call ga_dgemm('n','n',n,n,n, 1d0, g_aa, g_bb, 0d0, g_dd) ! d := a*b
210
call ga_add(1d0, g_dd, -1d0, g_cc, g_cc)
211
sum = ga_ddot(g_cc,g_cc)
213
c call print_rblock(g_b)
214
call ga_dgemm('n','n',n,n,n, 1d0, g_a, g_b, 0d0, g_d) ! d := a*b
215
call ga_add(1d0, g_d, -1d0, g_c, g_c)
216
sum = ga_ddot(g_c,g_c)
220
print *, ' dsqrt(sum) = ', dsqrt(sum)
222
print *, ' norm = ', dsqrt(sum)/n
223
if(dsqrt(sum)/n.lt.1d-10) then
224
print *, ' test passed '
226
call ga_error(' test failed ',3)
234
print *,'>Test of the LU-based solver with a single vector rhs'
239
#ifndef HAVE_SCALAPACK
240
call ga_lu_solve_seq('n', g_a, g_e)
243
call ga_copy(g_a,g_aa)
245
call ga_lu_solve('n', g_a, g_e)
249
call ga_copy(g_e,g_ee)
250
call ga_copy(g_f,g_ff)
251
call ga_copy(g_g,g_gg)
252
call ga_dgemm('n','n',n,1,n, 1d0, g_aa, g_ee, 0d0, g_gg) ! g := a*e
253
call ga_add(1d0, g_gg, -1d0, g_ff, g_ff)
254
sum = ga_ddot(g_ff,g_ff)
256
call ga_dgemm('n','n',n,1,n, 1d0, g_a, g_e, 0d0, g_g) ! g := a*e
257
call ga_add(1d0, g_g, -1d0, g_f, g_f)
258
sum = ga_ddot(g_f,g_f)
262
print *, ' norm = ', dsqrt(sum)/n
263
if(dsqrt(sum)/n.lt.1d-10) then
264
print *, ' test passed '
266
call ga_error(' test failed ',4)
273
subroutine factor(p,idx,idy)
275
integer i,j,p,idx,idy,it
276
integer ip,ifac,pmax,prime(1280)
282
c factor p completely
283
c first, find all prime numbers less than or equal to p
288
if (mod(i,prime(j)).eq.0) go to 100
295
c find all prime factors of p
299
200 if (mod(ip,prime(i)).eq.0) then
307
c determine two factors of p of approximately the
322
subroutine print_block(g_a)
324
#include "mafdecls.fh"
326
integer g_a, i, j, istride, jstride
335
call nga_access_block_segment(g_a,me,idx,ld)
337
write(6,733) me,(dbl_mb(idx+(j-1)*istride+i-1),j=1,jstride)
339
733 format(i8,8f12.6)
343
subroutine print_rblock(g_a)
345
#include "mafdecls.fh"
347
integer g_a, i, j, istride, jstride
348
integer idx, ld, me, lo(2), hi(2)
352
call nga_distribution(g_a,me,lo,hi)
353
call nga_access(g_a,lo,hi,idx,ld)
355
write(6,733) me,(dbl_mb(idx+(j-1)*istride+i-1),j=1,jstride)
357
733 format(i8,8f12.6)