17
16
void F77_PDSTEQR(int *n, double *d, double *e,
18
17
double *z, int *ldz, int *nz, double *work,
19
void F77_DCOPY(int *n, double *dx, int *incx, double *dy, int *incy);
20
double F77_DNRM2(int *n, double *dx, int *incx);
21
double F77_DDOT(int *n, double *dx, int *incx, double *dy, int *incy);
22
void F77_DSCAL(int *n, double *da, double *dx, int *incx);
23
void F77_DAXPY(int *n, double *da, double *dx, int *incx, double *dy, int *incy);
24
28
static void dist_diagonalize_(int n, int m, double *a, double *d, double *e,
25
29
double *sigma, double *z, double *v, double *w,
26
30
int *ind, const Ref<MessageGrp>&);
28
pimtql2_ (double *d,double *e,int *sn,double *z,int *sm,int *info);
30
static double absol(double x);
32
32
static void pflip(int id,int n,int m,int p,double *ar,double *ac,double *w,
33
33
const Ref<MessageGrp>&);
35
static double epslon (double x);
37
static void update(double *z,int m,double c,double s);
40
36
ptred2_(double *a, int *lda, int *n, int *m, int *p, int *id,
41
37
double *d, double *e, double *z, double *work,
109
105
/* diagonalize tridiagonal matrix using implicit QL method */
112
pimtql2_(d,e,&n,z,&m,&info);
114
ExEnv::outn() << "dist_diagonalize: node "
115
<< grp->me() << ": nonzero ierr from pimtql2" << endl;
119
107
for (i=1; i<n; i++) e[i-1] = e[i];
120
108
F77_PDSTEQR(&n, d, e, z, &m, &m, w, &info);
123
110
/* rearrange the eigenvectors by transposition */
126
dcopy_(&i,&z[0],&one,&a[0],&one);
113
F77_DCOPY(&i,&z[0],&one,&a[0],&one);
127
114
pflip(id,n,m,nproc,&a[0],&v[0],&w[0],grp);
130
117
/* ******************************************************** */
131
/* Function of this subroutine : */
132
/* diagonalize a real, symmetric tridiagonal matrix */
133
/* using the QL method */
136
/* d[n] - the diagonal of the tridiagonal result */
137
/* e[n] - the offdiagonal of the result(e[1]-e[n-1]) */
138
/* sn - size of the tridiagonal matrix */
139
/* z[m][n] - m rows of transformation matrix from before */
140
/* m - number of locally held columns */
142
/* d[n] - the eigenvalues */
143
/* e[n] - non-useful information */
144
/* z[m][n] - m rows of eigenvectors */
145
/* info - if 0, results are accurate */
146
/* if nonzero, results may be inaccurate */
147
/* -------------------------------------------------------- */
150
pimtql2_ (double *d,double *e,int *sn,double *z,int *sm,int *info)
152
double c,s,t,q,u,p,h,macheps;
153
int n,m,i,j,k,im,its,maxit=30,one=1;
155
/* extract parameters */
163
for (i=1; i<n; i++) e[i-1] = e[i];
166
for (j=0; j<n; j++) {
168
while (its < maxit) {
169
for (im=j; im<=k; im++) {
170
// this is the original threshold
171
double threshold = macheps*(absol(d[im])+absol(d[im+1]));
172
// new threshold will hopefully ensure convergence
173
//if (threshold < macheps) threshold = macheps;
174
if (absol(e[im]) <= threshold) break;
176
// double dsum = absol(d[im])+absol(d[im+1]);
177
// if (dsum + absol(e[im]) == dsum) break;
184
/* form implicit Wilkinson shift */
186
q = (d[j+1] - u) / (2.0 * e[j]);
187
t = sqrt(1.0 + q * q);
188
q = d[im] - u + e[j]/((q < 0.0) ? q - t : q + t);
191
for (i=im-1; i>=j; i--) {
194
if (absol(p) >= absol(q)) {
196
t = sqrt(1.0 + c * c);
202
t = sqrt(1.0 + s * s);
208
t = (d[i] - q) * s + 2.0 * c * h;
213
/* form eigenvectors */
216
for (int ia=0; ia<m; ia++) {
218
z[(i+1)*m+ia] = s * z[i*m+ia] + c * p;
219
z[i*m+ia] = c * z[i*m+ia] - s * p;
222
update(&z[i*m],m,c,s);
236
/* order eigenvalues and eigenvectors */
238
for (j=0; j<n-1; j++) {
240
for (i=j+1; i<n; i++) if (d[i] < d[k]) k = i;
242
dswap_(&one,&d[j],&one,&d[k],&one);
243
dswap_(&m,&z[j*m],&one,&z[k*m],&one);
248
/* ******************************************************** */
259
/* ******************************************************** */
260
118
/* Function : transpose matrix */
261
119
/* -------------------------------------------------------- */
270
128
for (k=0; k<n; k++) {
273
dcopy_(&n,&ar[i],&m,&w[0],&one);
131
F77_DCOPY(&n,&ar[i],&m,&w[0],&one);
276
134
grp->raw_bcast(&w[0], n*dpsize, r);
277
dcopy_(&m,&w[id],&p,&ac[k],&n);
281
/* ******************************************************** */
282
/* Function : calculate machine epslon */
283
/* -------------------------------------------------------- */
296
if (eps < 0.0) eps = - eps;
298
if (x < 0.0) a = - x;
303
update(double *z,int m,double c,double s)
308
for (i=0; i < m; i++) {
310
z[m+i] = s * z[i] + c * p;
311
z[i] = c * z[i] - s * p;
135
F77_DCOPY(&m,&w[id],&p,&ac[k],&n);
396
alpha = dnrm2_(&q,&a[l*slda+k],&inc);
220
alpha = F77_DNRM2(&q,&a[l*slda+k],&inc);
397
221
if (a[l*slda+k] < 0.0) alpha = -alpha;
398
222
if (alpha != 0.0) {
399
223
alpha2 = 1.0 / alpha;
400
dscal_(&q,&alpha2,&a[l*slda+k],&inc);
224
F77_DSCAL(&q,&alpha2,&a[l*slda+k],&inc);
401
225
a[l*slda+k] += 1.0;
403
dcopy_(&q,&a[l*slda+k],&inc,&d[k],&inc);
227
F77_DCOPY(&q,&a[l*slda+k],&inc,&d[k],&inc);
417
dcopy_(&q,&alpha2,&j,&e[k],&inc);
241
F77_DCOPY(&q,&alpha2,&j,&e[k],&inc);
419
243
for (j=l; j<sm; j++) {
421
e[i] = e[i] + ddot_(&q,&a[j*slda+i],&inc,&d[i],&inc);
245
e[i] = e[i] + F77_DDOT(&q,&a[j*slda+i],&inc,&d[i],&inc);
423
daxpy_(&q,&d[i],&a[slda*j+i+1],&inc,&e[i+1],&inc);
247
F77_DAXPY(&q,&d[i],&a[slda*j+i+1],&inc,&e[i+1],&inc);
433
257
alpha2 = 1.0 / beta;
434
dscal_(&q,&alpha2,&e[k],&inc);
435
gamma = 0.5*ddot_(&q,&d[k],&inc,&e[k],&inc)/beta;
436
daxpy_(&q,&gamma,&d[k],&inc,&e[k],&inc);
258
F77_DSCAL(&q,&alpha2,&e[k],&inc);
259
gamma = 0.5*F77_DDOT(&q,&d[k],&inc,&e[k],&inc)/beta;
260
F77_DAXPY(&q,&gamma,&d[k],&inc,&e[k],&inc);
439
263
/* Rank two update of A, compute only lower half. */
443
267
for (j=l; j<sm; j++) {
445
daxpy_(&q,&d[i],&e[i],&inc,&a[j*slda+i],&inc);
446
daxpy_(&q,&e[i],&d[i],&inc,&a[j*slda+i],&inc);
269
F77_DAXPY(&q,&d[i],&e[i],&inc,&a[j*slda+i],&inc);
270
F77_DAXPY(&q,&e[i],&d[i],&inc,&a[j*slda+i],&inc);
451
275
for (i=0; i<sm; i++) {
452
gamma = ddot_(&q,&d[k],&inc,&z[k*sm+i],&sm)*oobeta;
453
daxpy_(&q,&gamma,&d[k],&inc,&z[k*sm+i],&sm);
276
gamma = F77_DDOT(&q,&d[k],&inc,&z[k*sm+i],&sm)*oobeta;
277
F77_DAXPY(&q,&gamma,&d[k],&inc,&z[k*sm+i],&sm);
570
394
if (beta != 0.0) {
571
395
for (i = 0; i < sm; i++) {
572
gamma = ddot_(&nmik, &d[ik], &inc, &z[ik * sm + i], &sm) / beta;
573
daxpy_(&nmik, &gamma, &d[ik], &inc, &z[ik * sm + i], &sm);
396
gamma = F77_DDOT(&nmik, &d[ik], &inc, &z[ik * sm + i], &sm) / beta;
397
F77_DAXPY(&nmik, &gamma, &d[ik], &inc, &z[ik * sm + i], &sm);
598
dcopy_(&q, &alpha2, &j, &e[k], &inc);
422
F77_DCOPY(&q, &alpha2, &j, &e[k], &inc);
600
424
for (j = l; j < sm; j++) {
603
e[i] += ddot_(&q, &a[ij], &inc, &d[i], &inc);
427
e[i] += F77_DDOT(&q, &a[ij], &inc, &d[i], &inc);
605
daxpy_(&q, &d[i], &a[ij+1], &inc, &e[i + 1], &inc);
429
F77_DAXPY(&q, &d[i], &a[ij+1], &inc, &e[i + 1], &inc);
608
432
grp->sum(&e[k], sn-k, work);
616
440
alpha2 = 1.0 / beta;
617
dscal_(&q, &alpha2, &e[k], &inc);
618
gamma = 0.5 * ddot_(&q, &d[k], &inc, &e[k], &inc) / beta;
619
daxpy_(&q, &gamma, &d[k], &inc, &e[k], &inc);
441
F77_DSCAL(&q, &alpha2, &e[k], &inc);
442
gamma = 0.5 * F77_DDOT(&q, &d[k], &inc, &e[k], &inc) / beta;
443
F77_DAXPY(&q, &gamma, &d[k], &inc, &e[k], &inc);
621
445
/* Rank two update of A, compute only lower half. */
622
446
/* A = A + u'*v + v'*u = H*A*H */
625
449
for (j = l; j < sm; j++) {
626
450
double *atmp= &a[j*slda+i];
628
daxpy_(&q, &d[i], &e[i], &inc, atmp, &inc);
629
daxpy_(&q, &e[i], &d[i], &inc, atmp, &inc);
452
F77_DAXPY(&q, &d[i], &e[i], &inc, atmp, &inc);
453
F77_DAXPY(&q, &e[i], &d[i], &inc, atmp, &inc);
641
465
oobeta = 1.0 / beta;
642
466
for (i = 0; i < sm; i++) {
643
gamma = ddot_(&q, &d[k], &inc, &z[k * sm + i], &sm) * oobeta;
644
daxpy_(&q, &gamma, &d[k], &inc, &z[k * sm + i], &sm);
467
gamma = F77_DDOT(&q, &d[k], &inc, &z[k * sm + i], &sm) * oobeta;
468
F77_DAXPY(&q, &gamma, &d[k], &inc, &z[k * sm + i], &sm);