3
#include <libciomr/libciomr.h>
12
/* Global variables necessary for pthreads */
13
pthread_mutex_t inc_Ia_mutex;
14
struct stringwr *Ia_global;
18
int form_ilist(struct stringwr *alplist, int Ja_list, int nas, int kl,
19
int *L, int *R, double *Sgn);
20
int form_ilist_rotf(int *Cnt, int **Ridx, signed char **Sn, int **Ij,
21
int nas, int kl, int *L, int *R, double *Sgn);
22
void s3_block_vdiag_pthread(void *threadarg);
23
void s3_block_v_pthread(void *threadarg);
26
#define INDEX(i,j) ((i>j) ? (ioff[(i)]+(j)) : (ioff[(j)]+(i)))
32
** Calculate a block of the sigma3 vector in equation (9c) of
33
** Olsen, Roos, et al. For diagonal blocks of sigma.
35
** currently assumes that (ij|ij)'s have not been halved
36
** Try to get the Olsen vector version working....again!!!!
38
void s3_block_vdiag(struct stringwr *alplist, struct stringwr *betlist,
39
double **C, double **S, double *tei, int nas, int nbs, int cnas,
40
int Ib_list, int Ja_list, int Jb_list, int Ib_sym, int Jb_sym,
41
double **Cprime, double *F, double *V, double *Sgn, int *L, int *R)
45
int ij, i, j, t, kl, I, J, RJ;
46
double tval, VS, *CprimeI0, *CI0;
47
int jlen, Jacnt, *Iaij, *orbsym, norbs, Ia_idx;
51
struct pthreads_s3diag **thread_info;
52
int npthreads, rc, status;
55
npthreads = Parameters.nthreads-1; /* subtract out the main thread */
57
thread = (pthread_t *) malloc(sizeof(pthread_t)*Parameters.nthreads);
59
thread_info = (struct pthreads_s3diag **)
60
malloc(sizeof(struct pthreads_s3diag *) * nas);
62
for (i=0; i<nas; i++) {
63
thread_info[i] = (struct pthreads_s3diag *)
64
malloc(sizeof(struct pthreads_s3diag));
67
norbs = CalcInfo.num_ci_orbs;
68
orbsym = CalcInfo.orbsym + CalcInfo.num_fzc_orbs;
71
for (i=0; i<norbs; i++) {
72
for (j=0; j<=i; j++) {
73
if ((orbsym[i] ^ orbsym[j] ^ Jb_sym ^ Ib_sym) != 0) continue;
75
jlen = form_ilist(betlist, Jb_list, nbs, ij, L, R, Sgn);
78
/* fprintf(outfile,"S3_BLOCK_VDIAG: ij = %d\t jlen = %d\n", ij, jlen);
80
Tptr = tei + ioff[ij];
82
/* gather operation */
83
for (I=0; I<cnas; I++) {
86
for (J=0; J<jlen; J++) {
88
CprimeI0[J] = CI0[L[J]] * tval;
94
if (Parameters.pthreads) {
95
detci_time.s3_mt_before_time = wall_time_new();
96
tpool_queue_open(thread_pool);
97
for (Ia=alplist, Ia_idx=0; Ia_idx<nas; Ia_idx++, Ia++) {
98
thread_info[Ia_idx]->nas = nas;
99
thread_info[Ia_idx]->jlen = jlen;
100
thread_info[Ia_idx]->ij = ij;
101
thread_info[Ia_idx]->Cprime = Cprime;
102
thread_info[Ia_idx]->Ja_list = Ja_list;
103
thread_info[Ia_idx]->Tptr = Tptr;
104
thread_info[Ia_idx]->S = S;
105
thread_info[Ia_idx]->R = R;
106
thread_info[Ia_idx]->Ia_local = Ia;
107
thread_info[Ia_idx]->Ia_idx_local = Ia_idx;
108
tpool_add_work(thread_pool , s3_block_vdiag_pthread, (void *) thread_info[Ia_idx]);
110
tpool_queue_close(thread_pool, 1);
111
detci_time.s3_mt_after_time = wall_time_new();
112
detci_time.s3_mt_total_time += detci_time.s3_mt_after_time - detci_time.s3_mt_before_time;
116
for (Ia=alplist, Ia_idx=0; Ia_idx<nas; Ia_idx++, Ia++) {
118
/* loop over excitations E^a_{kl} from |A(I_a)> */
119
Jacnt = Ia->cnt[Ja_list];
120
Iaridx = Ia->ridx[Ja_list];
121
Iasgn = Ia->sgn[Ja_list];
122
Iaij = Ia->ij[Ja_list];
125
/* fprintf(outfile,"Ia = %x\t Ia_idx = %d\n", Ia, Ia_idx);
129
fprintf(outfile,"S3_BLOCK_VDIAG: Jacnt = %d\n", Jacnt);
133
for (Ia_ex=0; Ia_ex < Jacnt && (kl = *Iaij++)<=ij; Ia_ex++) {
136
if (ij == kl) tval *= 0.5;
137
VS = Tptr[kl] * tval;
138
CprimeI0 = Cprime[I];
141
C_DAXPY(jlen, VS, CprimeI0, 1, V, 1);
143
for (J=0; J<jlen; J++) {
144
V[J] += VS * CprimeI0[J];
152
for (J=0; J<jlen; J++) {
154
S[Ia_idx][RJ] += V[J];
157
} /* end loop over Ia */
160
} /* end loop over j */
161
} /* end loop over i */
163
for (i=0; i<nas; i++) free(thread_info[i]);
170
** S3_BLOCK_VDIAG_PTHREAD()
172
** Multithreaded component of the S3_BLOCK_VDIAG routine
174
** Calculate a block of the sigma3 vector in equation (9c) of
175
** Olsen, Roos, et al. For diagonal blocks of sigma.
177
** currently assumes that (ij|ij)'s have not been halved
178
** Try to get the Olsen vector version working....again!!!!
180
void s3_block_vdiag_pthread(void *threadarg)
183
struct stringwr *Ia_local;
186
double tval, VS, *CprimeI0, *CI0;
188
unsigned int *Iaridx;
190
struct pthreads_s3diag *thread_info_i;
192
int nas, jlen, ij, Ja_list;
193
double **S, **Cprime, *Tptr, *V;
194
int *R, Ia_idx_local;
198
thread_info_i = (struct pthreads_s3diag *) threadarg;
199
nas = thread_info_i->nas;
200
jlen = thread_info_i->jlen;
201
ij = thread_info_i->ij;
202
Ja_list = thread_info_i->Ja_list;
203
S = thread_info_i->S;
204
Cprime = thread_info_i->Cprime;
205
Tptr = thread_info_i->Tptr;
206
R = thread_info_i->R;
207
Ia_idx_local = thread_info_i->Ia_idx_local;
208
Ia_local = thread_info_i->Ia_local;
210
V = init_array(jlen);
211
/* loop over excitations E^a_{kl} from |A(I_a)> */
212
Jacnt = Ia_local->cnt[Ja_list];
213
Iaridx = Ia_local->ridx[Ja_list];
214
Iasgn = Ia_local->sgn[Ja_list];
215
Iaij = Ia_local->ij[Ja_list];
218
fprintf(outfile,"Ia_local = %x\t Ia_idx_local = %d\n", Ia_local, Ia_idx_local);
221
fprintf(outfile,"Jacnt = %d\n", Jacnt);
226
for (Ia_ex=0; Ia_ex < Jacnt && (kl = *Iaij++)<=ij; Ia_ex++) {
229
if (ij == kl) tval *= 0.5;
230
VS = Tptr[kl] * tval;
231
CprimeI0 = Cprime[I];
234
C_DAXPY(jlen, VS, CprimeI0, 1, V, 1);
236
for (J=0; J<jlen; J++) {
237
V[J] += VS * CprimeI0[J];
243
for (J=0; J<jlen; J++) {
245
S[Ia_idx_local][RJ] += V[J];
257
** Calculate a block of the sigma3 vector in equation (9c) of
258
** Olsen, Roos, et al. For non-diagonal blocks of s3
261
void s3_block_v(struct stringwr *alplist, struct stringwr *betlist,
262
double **C, double **S, double *tei, int nas, int nbs, int cnas,
263
int Ib_list, int Ja_list, int Jb_list, int Ib_sym, int Jb_sym,
264
double **Cprime, double *F, double *V, double *Sgn, int *L, int *R)
268
int ij, i, j, kl, ijkl, I, J, RJ;
269
double tval, VS, *CprimeI0, *CI0;
270
int jlen, Ia_idx, Jacnt, *Iaij, *orbsym, norbs;
271
unsigned int *Iaridx;
274
struct pthreads_s3diag **thread_info;
275
int t, npthreads, rc, status;
278
norbs = CalcInfo.num_ci_orbs;
279
orbsym = CalcInfo.orbsym + CalcInfo.num_fzc_orbs;
281
thread = (pthread_t *) malloc(sizeof(pthread_t)*Parameters.nthreads);
282
thread_info = (struct pthreads_s3diag **)
283
malloc(sizeof(struct pthreads_s3diag *) * nas);
284
for (i=0; i<nas; i++) {
285
thread_info[i] = (struct pthreads_s3diag *)
286
malloc(sizeof(struct pthreads_s3diag));
290
for (i=0; i<norbs; i++) {
291
for (j=0; j<=i; j++) {
292
if ((orbsym[i] ^ orbsym[j] ^ Jb_sym ^ Ib_sym) != 0) continue;
294
jlen = form_ilist(betlist, Jb_list, nbs, ij, L, R, Sgn);
298
Tptr = tei + ioff[ij];
300
/* gather operation */
301
for (I=0; I<cnas; I++) {
302
CprimeI0 = Cprime[I];
304
for (J=0; J<jlen; J++) {
306
CprimeI0[J] = CI0[L[J]] * tval;
312
if (Parameters.pthreads) {
313
detci_time.s3_mt_before_time = wall_time_new();
314
tpool_queue_open(thread_pool);
315
for (Ia=alplist, Ia_idx=0; Ia_idx<nas; Ia_idx++, Ia++) {
316
thread_info[Ia_idx]->nas = nas;
317
thread_info[Ia_idx]->jlen = jlen;
318
thread_info[Ia_idx]->ij = ij;
319
thread_info[Ia_idx]->Cprime = Cprime;
320
thread_info[Ia_idx]->Ja_list = Ja_list;
321
thread_info[Ia_idx]->Tptr = tei;
322
thread_info[Ia_idx]->S = S;
323
thread_info[Ia_idx]->R = R;
324
thread_info[Ia_idx]->Ia_local = Ia;
325
thread_info[Ia_idx]->Ia_idx_local = Ia_idx;
326
tpool_add_work(thread_pool, s3_block_v_pthread, (void *) thread_info[Ia_idx]);
328
tpool_queue_close(thread_pool, 1);
329
detci_time.s3_mt_after_time = wall_time_new();
330
detci_time.s3_mt_total_time += detci_time.s3_mt_after_time - detci_time.s3_mt_before_time;
334
for (Ia=alplist, Ia_idx=0; Ia_idx<nas; Ia_idx++, Ia++) {
336
/* loop over excitations E^a_{kl} from |A(I_a)> */
337
Jacnt = Ia->cnt[Ja_list];
338
Iaridx = Ia->ridx[Ja_list];
339
Iasgn = Ia->sgn[Ja_list];
340
Iaij = Ia->ij[Ja_list];
344
for (Ia_ex=0; Ia_ex < Jacnt; Ia_ex++) {
349
VS = tval * tei[ijkl];
350
CprimeI0 = Cprime[I];
353
C_DAXPY(jlen, VS, CprimeI0, 1, V, 1);
355
for (J=0; J<jlen; J++) {
356
V[J] += VS * CprimeI0[J];
364
for (J=0; J<jlen; J++) {
366
S[Ia_idx][RJ] += V[J];
369
} /* end loop over Ia */
372
} /* end loop over j */
373
} /* end loop over i */
374
for (i=0; i<nas; i++) free(thread_info[i]);
380
** S3_BLOCK_V_PTHREAD()
382
** Multithreaded component of the S3_BLOCK_V routine
384
** Calculate a block of the sigma3 vector in equation (9c) of
385
** Olsen, Roos, et al. For non-diagonal blocks of s3
388
void s3_block_v_pthread(void *threadarg)
391
struct stringwr *Ia_local;
393
int I, J, RJ, kl, ijkl;
394
double tval, VS, *CprimeI0, *CI0;
396
unsigned int *Iaridx;
398
struct pthreads_s3diag *thread_info_i;
400
int nas, jlen, ij, Ja_list;
401
double **S, **Cprime, *tei, *V, *Tptr;
402
int *R, Ia_idx_local;
407
thread_info_i = (struct pthreads_s3diag *) threadarg;
408
nas = thread_info_i->nas;
409
jlen = thread_info_i->jlen;
410
ij = thread_info_i->ij;
411
Ja_list = thread_info_i->Ja_list;
412
S = thread_info_i->S;
413
Cprime = thread_info_i->Cprime;
414
tei = thread_info_i->Tptr;
415
R = thread_info_i->R;
416
thread_id = thread_info_i->thread_id;
417
Ia_idx_local = thread_info_i->Ia_idx_local;
418
Ia_local = thread_info_i->Ia_local;
420
V = init_array(jlen);
422
/* loop over excitations E^a_{kl} from |A(I_a)> */
423
Jacnt = Ia_local->cnt[Ja_list];
424
Iaridx = Ia_local->ridx[Ja_list];
425
Iasgn = Ia_local->sgn[Ja_list];
426
Iaij = Ia_local->ij[Ja_list];
432
for (Ia_ex=0; Ia_ex < Jacnt; Ia_ex++) {
437
VS = tval * tei[ijkl];
438
CprimeI0 = Cprime[I];
441
C_DAXPY(jlen, VS, CprimeI0, 1, V, 1);
443
for (J=0; J<jlen; J++) {
444
V[J] += VS * CprimeI0[J];
450
for (J=0; J<jlen; J++) {
452
S[Ia_idx_local][RJ] += V[J];
460
int form_ilist(struct stringwr *alplist, int Ja_list, int nas, int kl,
461
int *L, int *R, double *Sgn)
464
int inum=0, Ia_idx, Ia_ex, Iacnt, ij;
467
unsigned int *Iaridx;
471
for (Ia=alplist, Ia_idx=0; Ia_idx < nas; Ia_idx++,Ia++) {
473
/* loop over excitations E^a_{kl} from |A(I_a)> */
475
Iacnt = Ia->cnt[Ja_list];
476
if (!Iacnt) continue;
477
Iaridx = Ia->ridx[Ja_list];
478
Iasgn = Ia->sgn[Ja_list];
479
Iaij = Ia->ij[Ja_list];
481
while (Ia_ex < Iacnt && (ij = *Iaij++)<kl) Ia_ex++;
484
*L++ = Iaridx[Ia_ex];
485
*Sgn++ = (double) Iasgn[Ia_ex];
488
} /* end loop over Ia */
490
fprintf(outfile,"form_ilist: nas = %d\n", nas);
491
fprintf(outfile,"form_ilist: jlen = %d\n", inum);
500
** S3_BLOCK_VDIAG_ROTF()
502
** Calculate a block of the sigma3 vector in equation (9c) of
503
** Olsen, Roos, et al. For diagonal blocks of sigma.
505
** currently assumes that (ij|ij)'s have not been halved
506
** Try to get the Olsen vector version working....again!!!!
509
void s3_block_vdiag_rotf(int *Cnt[2], int **Ij[2], int **Ridx[2],
510
signed char **Sn[2], double **C, double **S,
511
double *tei, int nas, int nbs, int cnas,
512
int Ib_list, int Ja_list, int Jb_list, int Ib_sym, int Jb_sym,
513
double **Cprime, double *F, double *V, double *Sgn, int *L, int *R)
516
int ij, i, j, kl, I, J, RJ;
517
double tval, VS, *CprimeI0, *CI0;
518
int jlen, Ia_idx, Jacnt, *Iaij, *orbsym, norbs;
523
norbs = CalcInfo.num_ci_orbs;
524
orbsym = CalcInfo.orbsym + CalcInfo.num_fzc_orbs;
527
for (i=0; i<norbs; i++) {
528
for (j=0; j<=i; j++) {
529
if ((orbsym[i] ^ orbsym[j] ^ Jb_sym ^ Ib_sym) != 0) continue;
531
jlen = form_ilist_rotf(Cnt[1], Ridx[1], Sn[1], Ij[1],
536
Tptr = tei + ioff[ij];
538
/* gather operation */
539
for (I=0; I<cnas; I++) {
540
CprimeI0 = Cprime[I];
542
for (J=0; J<jlen; J++) {
544
CprimeI0[J] = CI0[L[J]] * tval;
550
for (Ia_idx=0; Ia_idx<nas; Ia_idx++) {
552
/* loop over excitations E^a_{kl} from |A(I_a)> */
553
Jacnt = Cnt[0][Ia_idx];
554
Iaridx = Ridx[0][Ia_idx];
555
Iasgn = Sn[0][Ia_idx];
556
Iaij = Ij[0][Ia_idx];
560
/* rotf doesn't yet ensure kl's in order */
561
for (Ia_ex=0; Ia_ex < Jacnt; Ia_ex++) {
565
if (kl > ij) continue;
566
if (ij == kl) tval *= 0.5;
567
VS = Tptr[kl] * tval;
568
CprimeI0 = Cprime[I];
571
C_DAXPY(jlen, VS, CprimeI0, 1, V, 1);
573
for (J=0; J<jlen; J++) {
574
V[J] += VS * CprimeI0[J];
581
for (J=0; J<jlen; J++) {
583
S[Ia_idx][RJ] += V[J];
586
} /* end loop over Ia */
588
} /* end loop over j */
589
} /* end loop over i */
597
** Calculate a block of the sigma3 vector in equation (9c) of
598
** Olsen, Roos, et al. For non-diagonal blocks of s3
601
void s3_block_vrotf(int *Cnt[2], int **Ij[2], int **Ridx[2],
602
signed char **Sn[2], double **C, double **S,
603
double *tei, int nas, int nbs, int cnas,
604
int Ib_list, int Ja_list, int Jb_list, int Ib_sym, int Jb_sym,
605
double **Cprime, double *F, double *V, double *Sgn, int *L, int *R)
608
int ij, i, j, kl, ijkl, I, J, RJ;
609
double tval, VS, *CprimeI0, *CI0;
610
int jlen, Ia_idx, Jacnt, *Iaij, *orbsym, norbs;
615
norbs = CalcInfo.num_ci_orbs;
616
orbsym = CalcInfo.orbsym + CalcInfo.num_fzc_orbs;
619
for (i=0; i<norbs; i++) {
620
for (j=0; j<=i; j++) {
621
if ((orbsym[i] ^ orbsym[j] ^ Jb_sym ^ Ib_sym) != 0) continue;
623
jlen = form_ilist_rotf(Cnt[1], Ridx[1], Sn[1], Ij[1],
628
Tptr = tei + ioff[ij];
630
/* gather operation */
631
for (I=0; I<cnas; I++) {
632
CprimeI0 = Cprime[I];
634
for (J=0; J<jlen; J++) {
636
CprimeI0[J] = CI0[L[J]] * tval;
642
for (Ia_idx=0; Ia_idx<nas; Ia_idx++) {
644
/* loop over excitations E^a_{kl} from |A(I_a)> */
645
Jacnt = Cnt[0][Ia_idx];
646
Iaridx = Ridx[0][Ia_idx];
647
Iasgn = Sn[0][Ia_idx];
648
Iaij = Ij[0][Ia_idx];
652
for (Ia_ex=0; Ia_ex < Jacnt; Ia_ex++) {
657
VS = tval * tei[ijkl];
658
CprimeI0 = Cprime[I];
661
C_DAXPY(jlen, VS, CprimeI0, 1, V, 1);
663
for (J=0; J<jlen; J++) {
664
V[J] += VS * CprimeI0[J];
672
for (J=0; J<jlen; J++) {
674
S[Ia_idx][RJ] += V[J];
677
} /* end loop over Ia */
679
} /* end loop over j */
680
} /* end loop over i */
685
int form_ilist_rotf(int *Cnt, int **Ridx, signed char **Sn, int **Ij,
686
int nas, int kl, int *L, int *R, double *Sgn)
689
int inum=0, Ia_idx, Ia_ex, Iacnt, ij;
695
for (Ia_idx=0; Ia_idx < nas; Ia_idx++) {
697
/* loop over excitations E^a_{kl} from |A(I_a)> */
700
if (!Iacnt) continue;
701
Iaridx = Ridx[Ia_idx];
705
for (Ia_ex=0; Ia_ex<Iacnt; Ia_ex++) {
709
*L++ = Iaridx[Ia_ex];
710
*Sgn++ = (double) Iasgn[Ia_ex];
714
} /* end loop over Ia */