4
c $Id: testspd.F,v 1.8 2004-11-12 22:19:10 edo Exp $
5
c***********************************************************************
7
c* Scope : test LLT SCALAPACK routines
9
c* 04/12/96 GVT First Implementation
10
c* Giuseppe Vitillaro peppe@unipg.it
11
c***********************************************************************
16
#define ga_dnormF(g_a) sqrt(ga_ddot(g_a, g_a))
18
#define BLOCK_CYCLIC 0
21
c***********************************************************************
23
c***********************************************************************
28
#include "mafdecls.fh"
35
c*** Intitialize a message passing library
39
c**** Initialize GA package
42
c**** get number of nodes and calculate memory allocation
44
hsize = 6*SIZE*SIZE + 3*SIZE
45
ssize = 3*SIZE*SIZE + 3*SIZE
48
hsize = (hsize/nproc) + 1
49
ssize = (ssize/nproc) + 1 + 3*256*256
51
c**** Initialize MA package
52
if (.not. ma_init(MT_DBL, ssize, hsize))
53
$ call ga_error("ma init failed",ssize+hsize)
58
c**** Exit from the GA package
65
c***********************************************************************
67
c* print informations about this run
68
c***********************************************************************
69
subroutine infos(me, nproc, uplo, eps)
71
#include "mafdecls.fh"
82
print *, 'Number of nodes : ', nproc
83
print *, 'Problem size : ', SIZE
84
print *, 'Uplo : ', uplo
85
print *, 'Epsilon : ', eps
93
c***********************************************************************
95
c* emit test header output
96
c***********************************************************************
97
subroutine thead(header)
99
#include "mafdecls.fh"
109
print *, '> ', header, ' '
117
c***********************************************************************
118
c* subroutine: dthtest
119
c* test a double precision against the THRESH parameter
120
c***********************************************************************
121
subroutine dthtest(msg, dval)
123
#include "mafdecls.fh"
127
double precision dval
134
print *, ' ', msg, dval, ' '
135
if (dval.lt.TRESH) then
136
print *, '> success '
138
print *, '> failure '
147
c***********************************************************************
149
c* test solver result
150
c***********************************************************************
151
subroutine stest(irc, ierc)
153
#include "mafdecls.fh"
167
elseif (irc.gt.0) then
171
if (irc.eq.ierc .or. (irc.gt.0 .and. ierc.gt.0)) then
172
print *, '> success: expected factorization'
174
print *, '> failure: not expected factorization',irc,ierc
185
c***********************************************************************
188
c***********************************************************************
189
subroutine emsg(msg, ival)
191
#include "mafdecls.fh"
202
print *, ' >>> ', msg, ival, ' '
203
print *, '> failure '
213
c***********************************************************************
214
c* subroutine: ctests
215
c* do coretests for LLT Cholesky factorization, solver
216
c***********************************************************************
217
subroutine ctests(uplo)
221
#include "mafdecls.fh"
231
external ga_llt_f, ga_llt_s, ga_llt_i
232
integer ga_llt_f, ga_llt_i
234
double precision A(n,n), X(n,n)
236
integer g_A, g_B, g_X, g_Y
237
integer g_A1, g_X1, g_Y1, g_Y2
239
integer g_AA, g_BB, g_CC, g_YY, g_ZZ
240
integer dims(2), proc_grid(2), block(2)
248
double precision buf(n)
250
double precision dnA, dnX, dnX1, dnY1, dnS, dnF, dnI
253
double precision eps, dlamch
260
call infos(me, nproc, uplo, eps)
263
c****************************************
264
c* Initialize tests variables
265
c* Generate a Lower triangula matrix
266
c* with large positive diagonal elements
267
c* so the LU decomposition will be
269
c****************************************
270
c**** Initialize local arrays A, X
271
c**** they are local copies of the corrispondent
275
X(i,j) = dsin(1.d0 * (i * j))
278
A(i,j) = dsin(1.d0 * (i + j))
281
A(i,j) = SIZE*dabs(dsin(10.d0 * i))
286
c**** Create global arrays
290
write(6,*) '* Creating Block-Cyclic Arrays'
297
call factor(nproc,g1,g2)
301
g_A = ga_create_handle()
302
call ga_set_data(g_A,2,dims,MT_DBL)
303
call ga_set_block_cyclic_proc_grid(g_A,block,proc_grid)
304
if (.not.ga_allocate(g_A))
305
& call ga_error(' ga_create A failed ', 2)
307
g_B = ga_create_handle()
308
call ga_set_data(g_B,2,dims,MT_DBL)
309
call ga_set_block_cyclic_proc_grid(g_B,block,proc_grid)
310
if (.not.ga_allocate(g_B))
311
& call ga_error(' ga_create B failed ', 2)
313
g_A1 = ga_create_handle()
314
call ga_set_data(g_A1,2,dims,MT_DBL)
315
call ga_set_block_cyclic_proc_grid(g_A1,block,proc_grid)
316
if (.not.ga_allocate(g_A1))
317
& call ga_error(' ga_create A1 failed ', 2)
319
g_X = ga_create_handle()
320
call ga_set_data(g_X,2,dims,MT_DBL)
321
call ga_set_block_cyclic_proc_grid(g_X,block,proc_grid)
322
if (.not.ga_allocate(g_X))
323
& call ga_error(' ga_create X failed ', 2)
325
g_X1 = ga_create_handle()
326
call ga_set_data(g_X1,2,dims,MT_DBL)
327
call ga_set_block_cyclic_proc_grid(g_X1,block,proc_grid)
328
if (.not.ga_allocate(g_X1))
329
& call ga_error(' ga_create X1 failed ', 2)
331
g_AA = ga_create_handle()
332
call ga_set_data(g_AA,2,dims,MT_DBL)
333
if (.not.ga_allocate(g_AA))
334
& call ga_error(' ga_create AA failed ', 2)
336
g_BB = ga_create_handle()
337
call ga_set_data(g_BB,2,dims,MT_DBL)
338
if (.not.ga_allocate(g_BB))
339
& call ga_error(' ga_create BB failed ', 2)
341
g_CC = ga_create_handle()
342
call ga_set_data(g_CC,2,dims,MT_DBL)
343
if (.not.ga_allocate(g_CC))
344
& call ga_error(' ga_create CC failed ', 2)
348
g_Y = ga_create_handle()
349
call ga_set_data(g_Y,2,dims,MT_DBL)
350
call ga_set_block_cyclic_proc_grid(g_Y,block,proc_grid)
351
if (.not.ga_allocate(g_Y))
352
& call ga_error(' ga_create Y failed ', 2)
354
g_Y1 = ga_create_handle()
355
call ga_set_data(g_Y1,2,dims,MT_DBL)
356
call ga_set_block_cyclic_proc_grid(g_Y1,block,proc_grid)
357
if (.not.ga_allocate(g_Y1))
358
& call ga_error(' ga_create Y1 failed ', 2)
360
g_Y2 = ga_create_handle()
361
call ga_set_data(g_Y2,2,dims,MT_DBL)
362
call ga_set_block_cyclic_proc_grid(g_Y2,block,proc_grid)
363
if (.not.ga_allocate(g_Y2))
364
& call ga_error(' ga_create Y1 failed ', 2)
366
g_YY = ga_create_handle()
367
call ga_set_data(g_YY,2,dims,MT_DBL)
368
if (.not.ga_allocate(g_YY))
369
& call ga_error(' ga_create YY failed ', 2)
371
g_ZZ = ga_create_handle()
372
call ga_set_data(g_ZZ,2,dims,MT_DBL)
373
if (.not.ga_allocate(g_ZZ))
374
& call ga_error(' ga_create YY failed ', 2)
376
if (.not. ga_create(MT_DBL, n, n, 'A', 1, 1, g_A))
377
& call ga_error(' ga_create A failed ', 2)
379
if (.not. ga_create(MT_DBL, n, n, 'B', 1, 1, g_B))
380
& call ga_error(' ga_create B failed ', 2)
382
if (.not. ga_create(MT_DBL, n, n, 'A1', 1, 1, g_A1))
383
& call ga_error(' ga_create A1 failed ', 2)
385
if (.not. ga_create(MT_DBL, n, n, 'X', 1, 1, g_X))
386
& call ga_error(' ga_create X failed ', 2)
388
if (.not. ga_create(MT_DBL, n, n, 'X1', 1, 1, g_X1))
389
& call ga_error(' ga_create X1 failed ', 2)
391
if (.not. ga_create(MT_DBL, n, 1, 'Y', 1, 1, g_Y))
392
& call ga_error(' ga_create Y failed ', 2)
394
if (.not. ga_create(MT_DBL, n, 1, 'Y1', 1, 1, g_Y1))
395
& call ga_error(' ga_create Y1 failed ', 2)
397
if (.not. ga_create(MT_DBL, n, 1, 'Y2', 1, 1, g_Y2))
398
& call ga_error(' ga_create Y2 failed ', 2)
401
c**** Fill in arrays A, X
402
do j = me+1, n, nproc
403
call ga_put(g_A, 1, n, j, j, A(1,j), n)
404
call ga_put(g_X, 1, n, j, j, X(1,j), n)
408
c**** It is granted that A, B, Y will not change
409
c**** during the execution of the test LLT driver
413
call ga_copy(g_A, g_AA)
414
call ga_dgemm('N', 'T', n, n, n, 1.d0, g_AA, g_AA, 0.d0, g_BB)
415
call ga_copy(g_BB, g_B)
417
call ga_dgemm('N', 'T', n, n, n, 1.d0, g_A, g_A, 0.d0, g_B)
421
call ga_copy(g_B, g_A)
425
call ga_copy(g_A, g_AA)
426
call ga_copy(g_X, g_CC)
427
call ga_dgemm('N', 'N', n, n, n, 1.d0, g_AA, g_CC, 0.d0, g_BB)
428
call ga_copy(g_BB,g_B)
430
call ga_dgemm('N', 'N', n, n, n, 1.d0, g_A, g_X, 0.d0, g_B)
433
c**** Copy B(1:n,1:1) to Y(1:n) so Y will X column 1
434
call ga_copy_patch('N', g_B, 1, n, 1, 1, g_Y, 1, n, 1, 1)
440
c****************************************************
441
c Test Cholesky factorization external interface
442
c****************************************************
443
call thead("Test Cholesky factorization")
446
call ga_copy(g_A, g_X)
448
c**** call Cholesky factorization routine in ScaLAPACK PDPOTRF
449
c**** to obtain an LL'/U'U factorization (external interface)
450
irc = ga_cholesky(uplo, g_X)
452
c**** if return code 'zero' is OK
455
c**** A1 = X * X^ or A1 = X^ * X
456
if (uplo.eq.'L') then
458
call ga_copy(g_X, g_CC)
459
call ga_dgemm('N', 'T', n, n, n, 1.d0, g_CC, g_CC, 0.d0, g_AA)
460
call ga_copy(g_AA, g_A1)
462
call ga_dgemm('N', 'T', n, n, n, 1.d0, g_X, g_X, 0.d0, g_A1)
466
call ga_copy(g_X, g_CC)
467
call ga_dgemm('T', 'N', n, n, n, 1.d0, g_CC, g_CC, 0.d0, g_AA)
468
call ga_copy(g_AA, g_A1)
470
call ga_dgemm('T', 'N', n, n, n, 1.d0, g_X, g_X, 0.d0, g_A1)
474
c**** A1 = A - A1 = A - X * X^ or A1 = A - A1 = A - X^ * X
475
call ga_add(1.0d0, g_A1, -1.0d0, g_A, g_A1)
477
c**** dnF = ||A - A1*A1^|| / (||A|| * N * eps) : SHAPE L
478
c**** dnF = ||A - A1^*A1|| / (||A|| * N * eps) : SHAPE U
479
dnF = ga_dnormF(g_A1) / (dnA * n * eps)
481
if (uplo.eq.'L') then
482
call dthtest("||LL' - A|| / (||A|| * N * eps) =", dnF)
484
call dthtest("||U'U - A|| / (||A|| * N * eps) =", dnF)
487
c**** if return code is > 0
489
call emsg('It is not positive definite the minor of order:',
493
c****************************************************
494
c Test Cholesky factorization and solver
495
c internal interfaces with a NxN RHS array
496
c****************************************************
497
call thead("Test II Cholesky solver with a NxN RHS array")
499
c**** call Cholesky factorization routine in ScaLAPACK PDPOTRF
500
c**** to obtain an LL'/U'U factorization
501
c**** (internal interface: it will not destroy A)
503
call ga_copy(g_A, g_AA)
506
irc = ga_llt_f(uplo, g_A, hsA)
508
c**** if return code from ga_llt_f is zero
512
call ga_copy(g_B, g_X)
514
c**** call Cholesky solver routine in ScaLAPACK PDPOTRS
515
c**** internal interface with an NxN RHS GA
516
call ga_llt_s(uplo, g_A, g_X, hsA)
520
call ga_copy(g_X, g_BB)
521
call ga_dgemm('N', 'N', n, n, n, 1.d0,
522
& g_AA, g_BB, 0.d0, g_CC)
523
call ga_copy(g_CC,g_A1)
525
call ga_dgemm('N', 'N', n, n, n, 1.d0,
526
& g_A, g_X, 0.d0, g_A1)
529
c**** A1 = A1 - B = A * X - B
530
call ga_add(1.d0, g_A1, -1.d0, g_B, g_A1)
535
c**** dnS = ||AX - B|| / (||A|| * ||X|| * N * eps)
536
dnS = ga_dnormF(G_A1) / (dnA * dnX * n * eps)
538
call dthtest("||A*X - B||/(||A||*||X||*n*eps) =", dnS)
540
c**** if return code is > 0
542
call emsg('It is not positive definite the minor of order:',
547
c****************************************************
548
c Test Cholesky factorization and solver
549
c internal interfaces with a single vector RHS
550
c****************************************************
551
call thead("Test II Cholesky solver with a single vector RHS")
553
c**** call Cholesky factorization routine in ScaLAPACK PDPOTRF
554
c**** to obtain an LL'/U'U factorization
555
c**** (internal interface: it will not destroy A)
557
call ga_copy(g_AA, g_A)
560
irc = ga_llt_f(uplo, g_A, hsA)
562
c**** if return code from ga_llt_f is zero
566
call ga_copy(g_Y, g_Y1)
568
c**** call Cholesky solver routine in ScaLAPACK PDPOTRS
569
c**** internal interface with a single vector RHS
570
call ga_llt_s(uplo, g_A, g_Y1, hsA)
573
dnY1 = ga_dnormF(g_Y1)
577
call ga_copy(g_Y1, g_YY)
578
call ga_dgemm('N', 'N', n, 1, n, 1.d0,
579
& g_AA, g_YY, 0.d0, g_ZZ)
580
call ga_copy(g_ZZ,g_Y2)
582
call ga_dgemm('N', 'N', n, 1, n, 1.d0,
583
& g_A, g_Y1, 0.d0, g_Y2)
586
c**** Y1 = Y1 - Y = A * Y1 - Y
587
call ga_add(1.d0, g_Y2, -1.d0, g_Y, g_Y1)
589
c**** dnS = ||AY1 - Y|| / (||A|| * ||Y1|| * N * eps)
590
dnS = ga_dnormF(G_A1) / (dnA * dnY1 * n * eps)
592
call dthtest("||A*X - V||/(||A||*||X||*n*eps) =", dnS)
594
c**** if return code is > 0
596
call emsg('It is not positive definite the minor of order:',
600
c****************************************************
601
c Test Cholesky solver with a NxN RHS array
602
c****************************************************
603
call thead("Test EI Cholesky solver with a NxN RHS array")
606
call ga_copy(g_B, g_X)
608
c**** call Cholesky L/U solver with a NxN RHS
610
call ga_copy(g_AA,g_A)
612
irc = ga_llt_solve(g_A, g_X)
614
c**** if return code from ga_llt_solve is zero
619
call ga_copy(g_X,g_BB)
620
call ga_dgemm('N', 'N', n, n, n, 1.d0,
621
& g_AA, g_BB, 0.d0, g_CC)
622
call ga_copy(g_CC,g_A1)
624
call ga_dgemm('N', 'N', n, n, n, 1.d0,
625
& g_A, g_X, 0.d0, g_A1)
628
c**** A1 = A1 - B = A * X - B
629
call ga_add(1.d0, g_A1, -1.d0, g_B, g_A1)
634
c**** dnS = ||AX - B|| / (||A|| * ||X|| * N * eps)
635
dnS = ga_dnormF(G_A1) / (dnA * dnX * n * eps)
637
call dthtest("||A*X - B||/(||A||*||X||*n*eps) =", dnS)
639
c**** if return code is > 0
641
call emsg('It is not positive definite the minor of order:',
646
c****************************************************
647
c Test Cholesky solver with a single vector RHS
648
c****************************************************
649
call thead("Test EI Cholesky solver with a single vector RHS")
652
call ga_copy(g_Y, g_Y1)
654
c**** call Cholesky L/U solver with a single vector RHS
656
call ga_copy(g_AA,g_A)
658
irc = ga_llt_solve(g_A, g_Y1)
660
c**** if return code from ga_llt_solve is zero
664
dnY1 = ga_dnormF(g_Y1)
668
call ga_copy(g_Y1,g_YY)
669
call ga_dgemm('N', 'N', n, 1, n, 1.d0,
670
& g_AA, g_YY, 0.d0, g_ZZ)
671
call ga_copy(g_ZZ,g_Y2)
673
call ga_dgemm('N', 'N', n, 1, n, 1.d0,
674
& g_A, g_Y1, 0.d0, g_Y2)
677
c**** Y1 = Y1 - Y = A * Y1 - Y
678
call ga_add(1.d0, g_Y2, -1.d0, g_Y, g_Y1)
680
c**** dnS = ||AY1 - Y|| / (||A|| * ||Y1|| * N * eps)
681
dnS = ga_dnormF(G_A1) / (dnA * dnY1 * n * eps)
683
call dthtest("||A*X - V||/(||A||*||X||*n*eps) =", dnS)
685
c**** if return code is > 0
687
call emsg('It is not positive definite the minor of order:',
692
c****************************************************
693
c Test Cholesky factorization and inversion
694
c internal interfaces
695
c****************************************************
696
call thead("Test II inversion of an SPD matrix")
700
call ga_copy(g_AA, g_A)
702
call ga_copy(g_A, g_X)
704
c**** call Cholesky factorization routine in ScaLAPACK PDPOTRF
705
c**** to obtain an LL'/U'U factorization
706
c**** (internal interface: it will not destroy X1)
708
irc = ga_llt_f(uplo, g_X, hsA)
710
c**** if return code from ga_llt_f is zero
713
c**** call Cholesky inversion routine in ScaLAPACK PDPOTRI
714
c**** internal interface
715
irc = ga_llt_i(uplo, g_X, hsA)
717
c**** if the inversion could be done
720
c**** dnX = ||X|| = ||invA||
723
c**** A1 = A * X = A * invA
725
call ga_copy(g_X,g_BB)
726
call ga_dgemm('N', 'N', n, n, n, 1.d0,
727
& g_AA, g_BB, 0.d0, g_CC)
728
call ga_copy(g_CC,g_A1)
730
call ga_dgemm('N', 'N', n, n, n, 1.d0,
731
& g_A, g_X, 0.d0, g_A1)
734
c**** A1 = A1 - I = A * invA - I
735
do j = me+1, n, nproc
736
call ga_get(g_A1, j, j, j, j, buf, 1)
737
buf(1) = buf(1) - 1.d0
738
call ga_put(g_A1, j, j, j, j, buf, 1)
741
c**** dnI = ||A*invA - I|| / (||A| * ||invA|| * N * eps)
742
dnI = ga_dnormF(g_A1) / (dnA * dnX * n * eps)
744
call dthtest('||A*invA - I||/(||A||*||invA||*n*eps) =',
748
c**** otherwise if the ga_llt_i return code is > 0
749
call emsg('there is a zero diagonal element:', irc)
752
c**** if return code is > 0
754
call emsg('It is not positive definite the minor of order:',
759
c****************************************************
760
c Test Cholesky inversion
761
c****************************************************
762
call thead("Test inversion of an SPD matrix")
766
call ga_copy(g_AA, g_A)
768
call ga_copy(g_A, g_X)
770
c**** call Cholesky L/U inversion
771
irc = ga_spd_invert(g_X)
773
c**** if return code from ga_spd_invert is zero
776
c**** dnX = ||X|| = ||invA||
779
c**** A1 = A * X = A * invA
781
call ga_copy(g_X, g_BB)
782
call ga_dgemm('N', 'N', n, n, n, 1.d0,
783
& g_AA, g_BB, 0.d0, g_CC)
784
call ga_copy(g_CC, g_A1)
786
call ga_dgemm('N', 'N', n, n, n, 1.d0,
787
& g_A, g_X, 0.d0, g_A1)
790
c**** A1 = A1 - I = A * invA - I
791
do j = me+1, n, nproc
792
call ga_get(g_A1, j, j, j, j, buf, 1)
793
buf(1) = buf(1) - 1.d0
794
call ga_put(g_A1, j, j, j, j, buf, 1)
797
c**** dnI = ||A*invA - I|| / (||A|| * ||invA|| * N * eps)
798
dnI = ga_dnormF(g_A1) / (dnA * dnX * n * eps)
800
call dthtest('||A*invA - I||/(||A||*||invA||*n*eps) =',
803
c**** if return code is < 0
804
elseif (irc.lt.0) then
805
call emsg('there is a zero diagonal element:', irc)
806
c**** if return code is > 0
807
elseif (irc.gt.0) then
808
call emsg('It is not positive definite the minor of order:',
813
c****************************************************
814
c* Test general solver with a NxN RSH
815
c* simmetric positive definite array
816
c****************************************************
818
& 'Test solver for a symmetric P.D. matrix and NxN RHS'
822
call ga_copy(g_B, g_X)
824
c**** call general solver with a NxN RHS p.d. symmetric array
826
call ga_copy(g_AA, g_A)
828
irc = ga_solve(g_A, g_X)
832
call ga_copy(g_X, g_BB)
833
call ga_dgemm('N', 'N', n, n, n, 1.d0,
834
& g_AA, g_BB, 0.d0, g_CC)
835
call ga_copy(g_CC, g_A1)
837
call ga_dgemm('N', 'N', n, n, n, 1.d0,
838
& g_A, g_X, 0.d0, g_A1)
841
c**** A1 = A1 - B = A * X - B
842
call ga_add(1.d0, g_A1, -1.d0, g_B, g_A1)
847
c**** dnS = ||AX - B|| / (||A|| * ||X|| * N * eps)
848
dnS = ga_dnormF(G_A1) / (dnA * dnX * n * eps)
850
call dthtest("||A*X - B||/(||A||*||X||*n*eps) =", dnS)
855
c****************************************************
856
c* Test general solver with a NxN RSH
857
c* simmetric non positive definite array
858
c****************************************************
860
& 'Test solver for a symmetric NON P.D. matrix and NxN RHS')
863
call ga_copy(g_B, g_X1)
865
c**** and now symmetrize X1
866
c**** so we obtain a symmetric matrix in X1
867
call ga_symmetrize(g_X1)
870
call ga_copy(g_B, g_X)
872
c**** call general solver with a NxN RHS symmetric array
874
call ga_copy(g_X1, g_BB)
875
irc = ga_solve(g_BB, g_X)
877
irc = ga_solve(g_X1, g_X)
881
dnX1 = ga_dnormF(g_X1)
888
call ga_copy(g_X1,g_AA)
889
call ga_copy(g_X,g_BB)
890
call ga_dgemm('N', 'N', n, n, n, 1.d0,
891
& g_AA, g_BB, 0.d0, g_CC)
892
call ga_copy(g_CC,g_A1)
894
call ga_dgemm('N', 'N', n, n, n, 1.d0,
895
& g_X1, g_X, 0.d0, g_A1)
898
c**** A1 = A1 - B = X1 * X - B
899
call ga_add(1.d0, g_A1, -1.d0, g_B, g_A1)
901
c**** dnS = ||X1*X - B|| / (||X1|| * ||X|| * N * eps)
902
dnS = ga_dnormF(G_A1) / (dnX1 * dnX * n * eps)
904
call dthtest("||A*X - B||/(||A||*||X||*n*eps) =", dnS)
909
c****************************************************
911
c****************************************************
912
c**** just print a newline and return
921
subroutine factor(p,idx,idy)
923
integer i,j,p,idx,idy,it
924
integer ip,ifac,pmax,prime(1280)
930
c factor p completely
931
c first, find all prime numbers less than or equal to p
936
if (mod(i,prime(j)).eq.0) go to 100
943
c find all prime factors of p
947
200 if (mod(ip,prime(i)).eq.0) then
955
c determine two factors of p of approximately the