3
\brief Enter brief description of file here
8
#include <libciomr/libciomr.h>
9
#include <libpsio/psio.h>
10
#include <libdpd/dpd.h>
17
namespace psi { namespace cchbar {
20
void ZFW(dpdbuf4 *Z, dpdbuf4 *F, dpdbuf4 *W, double alpha, double beta);
22
/* Wabei_RHF(): Builds the Wabei HBAR matrix elements for CCSD for
23
** spin-adapted, closed-shell cases. (Numbering of individual terms
24
** is given in Wabei.c.) This version produces a final storage of
25
** (Ei,Ab), uses only <ia|bc>-type F integrals, and avoids producing
26
** any intermediates of v^3 storage, beyond the final target. In
27
** addition, all memory bottlenecks larger than v^3.
35
dpdbuf4 F, W, T2, B, Z, Z1, Z2, D, T, C, F1, F2, W1, W2, Tau;
37
int Gef, Gei, Gab, Ge, Gi, Gf, Gmi, Gm, nrows, ncols, nlinks, EE, e, row, Gnm;
38
int Gma, ma, m, a, Ga, Gb, I, i, mi, E, ei, ab, ba, b, BB, fb, bf, fe, ef, mb, am;
39
double ***WW1, ***WW2;
40
int h, incore, core_total, rowtot, coltot, maxrows;
44
if(params.print & 2) {
45
fprintf(outfile, "\tF -> Wabei...");
48
dpd_buf4_init(&F, CC_FINTS, 0, 10, 5, 10, 5, 0, "F <ia|bc>");
49
dpd_buf4_sort(&F, CC_HBAR, qpsr, 11, 5, "WAbEi (Ei,Ab)");
51
if(params.print & 2) fprintf(outfile, "done.\n");
54
/** - F_ME t_Mi^Ab **/
55
if(params.print & 2) {
56
fprintf(outfile, "\tFME*T2 -> Wabei...");
59
dpd_buf4_init(&T2, CC_TAMPS, 0, 0, 5, 0, 5, 0, "tIjAb");
60
dpd_file2_init(&Fme, CC_OEI, 0, 0, 1, "FME");
61
dpd_file2_mat_init(&Fme);
62
dpd_file2_mat_rd(&Fme);
63
dpd_buf4_init(&W, CC_HBAR, 0, 11, 5, 11, 5, 0, "WAbEi (Ei,Ab)");
65
for(Gei=0; Gei < moinfo.nirreps; Gei++) {
66
Gmi = Gab = Gei; /* W and T2 are totally symmetric */
68
dpd_buf4_mat_irrep_init(&T2, Gmi);
69
dpd_buf4_mat_irrep_rd(&T2, Gmi);
72
for(Ge=0; Ge < moinfo.nirreps; Ge++) {
73
Gm = Ge; /* Fme is totally symmetric */
76
W.matrix[Gei] = dpd_block_matrix(moinfo.occpi[Gi], W.params->coltot[Gei]);
78
nrows = moinfo.occpi[Gm];
79
ncols = moinfo.occpi[Gi] * W.params->coltot[Gei];
82
for(E=0; E < moinfo.virtpi[Ge]; E++) {
83
e = moinfo.vir_off[Ge] + E;
85
dpd_buf4_mat_irrep_rd_block(&W, Gei, W.row_offset[Gei][e], moinfo.occpi[Gi]);
87
C_DGEMV('t',nrows,ncols,-1.0,&T2.matrix[Gmi][row][0],ncols,&Fme.matrix[Gm][0][E],
88
moinfo.virtpi[Ge],1.0,W.matrix[Gei][0],1);
90
dpd_buf4_mat_irrep_wrt_block(&W, Gei, W.row_offset[Gei][e], moinfo.occpi[Gi]);
94
row += moinfo.occpi[Gm] * moinfo.occpi[Gi];
95
dpd_free_block(W.matrix[Gei], moinfo.occpi[Gi], W.params->coltot[Gei]);
98
dpd_buf4_mat_irrep_close(&T2, Gmi);
102
dpd_file2_mat_close(&Fme);
103
dpd_file2_close(&Fme);
106
if(params.print & 2) fprintf(outfile, "done.\n");
109
/** <Ab|Ef> t_i^f **/
110
if(params.print & 2) {
111
fprintf(outfile, "\tB*T1 -> Wabei...");
114
/* Term re-written to use only (Ei,Ab) ordering on the target, TDC, 5/11/05 */
115
/* Code modified to use only symmetric and antisymmetry <ab|cd> ints, TDC, 9/25/05 */
116
dpd_buf4_init(&B, CC_BINTS, 0, 5, 8, 8, 8, 0, "B(+) <ab|cd> + <ab|dc>");
117
dpd_file2_init(&T1, CC_OEI, 0, 0, 1, "tIA");
118
dpd_file2_mat_init(&T1);
119
dpd_file2_mat_rd(&T1);
120
dpd_buf4_init(&Z1, CC_TMP0, 0, 11, 8, 11, 8, 0, "Z1(ei,a>=b)");
121
dpd_buf4_scm(&Z1, 0.0); /* this scm is necessary for cases with empty occpi or virtpi irreps */
122
for(Gef=0; Gef < moinfo.nirreps; Gef++) {
123
Gei = Gab = Gef; /* W and B are totally symmetric */
124
for(Ge=0; Ge < moinfo.nirreps; Ge++) {
125
Gf = Ge ^ Gef; Gi = Gf; /* T1 is totally symmetric */
126
B.matrix[Gef] = dpd_block_matrix(moinfo.virtpi[Gf],B.params->coltot[Gef]);
127
Z1.matrix[Gef] = dpd_block_matrix(moinfo.occpi[Gi],Z1.params->coltot[Gei]);
128
nrows = moinfo.occpi[Gi]; ncols = Z1.params->coltot[Gef]; nlinks = moinfo.virtpi[Gf];
129
if(nrows && ncols && nlinks) {
130
for(E=0; E < moinfo.virtpi[Ge]; E++) {
131
e = moinfo.vir_off[Ge] + E;
132
dpd_buf4_mat_irrep_rd_block(&B, Gef, B.row_offset[Gef][e], moinfo.virtpi[Gf]);
133
C_DGEMM('n','n',nrows,ncols,nlinks,0.5,T1.matrix[Gi][0],nlinks,B.matrix[Gef][0],ncols,
134
0.0,Z1.matrix[Gei][0],ncols);
135
dpd_buf4_mat_irrep_wrt_block(&Z1, Gei, Z1.row_offset[Gei][e], moinfo.occpi[Gi]);
138
dpd_free_block(B.matrix[Gef], moinfo.virtpi[Gf], B.params->coltot[Gef]);
139
dpd_free_block(Z1.matrix[Gef], moinfo.occpi[Gi], Z1.params->coltot[Gei]);
143
dpd_file2_mat_close(&T1);
144
dpd_file2_close(&T1);
147
dpd_buf4_init(&B, CC_BINTS, 0, 5, 9, 9, 9, 0, "B(-) <ab|cd> - <ab|dc>");
148
dpd_file2_init(&T1, CC_OEI, 0, 0, 1, "tIA");
149
dpd_file2_mat_init(&T1);
150
dpd_file2_mat_rd(&T1);
151
dpd_buf4_init(&Z2, CC_TMP0, 0, 11, 9, 11, 9, 0, "Z2(ei,a>=b)");
152
dpd_buf4_scm(&Z2, 0.0); /* this scm is necessary for cases with empty occpi or virtpi irreps */
153
for(Gef=0; Gef < moinfo.nirreps; Gef++) {
154
Gei = Gab = Gef; /* W and B are totally symmetric */
155
for(Ge=0; Ge < moinfo.nirreps; Ge++) {
156
Gf = Ge ^ Gef; Gi = Gf; /* T1 is totally symmetric */
157
B.matrix[Gef] = dpd_block_matrix(moinfo.virtpi[Gf],B.params->coltot[Gef]);
158
Z2.matrix[Gef] = dpd_block_matrix(moinfo.occpi[Gi],Z2.params->coltot[Gei]);
159
nrows = moinfo.occpi[Gi]; ncols = Z2.params->coltot[Gef]; nlinks = moinfo.virtpi[Gf];
160
if(nrows && ncols && nlinks) {
161
for(E=0; E < moinfo.virtpi[Ge]; E++) {
162
e = moinfo.vir_off[Ge] + E;
163
dpd_buf4_mat_irrep_rd_block(&B, Gef, B.row_offset[Gef][e], moinfo.virtpi[Gf]);
164
C_DGEMM('n','n',nrows,ncols,nlinks,0.5,T1.matrix[Gi][0],nlinks,B.matrix[Gef][0],ncols,
165
0.0,Z2.matrix[Gei][0],ncols);
166
dpd_buf4_mat_irrep_wrt_block(&Z2, Gei, Z2.row_offset[Gei][e], moinfo.occpi[Gi]);
169
dpd_free_block(B.matrix[Gef], moinfo.virtpi[Gf], B.params->coltot[Gef]);
170
dpd_free_block(Z2.matrix[Gef], moinfo.occpi[Gi], Z2.params->coltot[Gei]);
174
dpd_file2_mat_close(&T1);
175
dpd_file2_close(&T1);
178
dpd_buf4_init(&Z1, CC_TMP0, 0, 11, 5, 11, 8, 0, "Z1(ei,a>=b)");
179
dpd_buf4_init(&Z2, CC_TMP0, 0, 11, 5, 11, 9, 0, "Z2(ei,a>=b)");
180
dpd_buf4_init(&W, CC_HBAR, 0, 11, 5, 11, 5, 0, "WAbEi (Ei,Ab)");
181
dpd_buf4_axpy(&Z1, &W, 1);
182
dpd_buf4_axpy(&Z2, &W, 1);
187
if(params.print & 2) fprintf(outfile, "done.\n");
189
/** Terms IIIc + IIId + IV **/
190
if(params.print & 2) {
191
fprintf(outfile, "\tD*T1*T2 + E*T2 -> Wabei...");
194
dpd_buf4_init(&Z, CC_HBAR, 0, 0, 11, 0, 11, 0, "WMnIe (nM,eI)");
195
dpd_buf4_sort(&Z, CC_HBAR, rspq, 11, 0, "WMnIe (eI,nM)");
197
dpd_buf4_init(&W, CC_HBAR, 0, 11, 5, 11, 5, 0, "WAbEi (Ei,Ab)");
198
dpd_buf4_init(&Z, CC_HBAR, 0, 11, 0, 11, 0, 0, "WMnIe (eI,nM)");
199
dpd_buf4_init(&T, CC_TAMPS, 0, 0, 5, 0, 5, 0, "tauIjAb");
200
/* dpd_contract444(&Z, &T, &W, 1, 1, 1, 1); */
201
for(Gei=0; Gei < moinfo.nirreps; Gei++) {
202
Gab = Gnm = Gei; /* Everything is totally symmetric here */
203
nrows = T.params->rowtot[Gnm];
204
ncols = T.params->coltot[Gab];
206
dpd_buf4_mat_irrep_init(&Z, Gei);
207
dpd_buf4_mat_irrep_rd(&Z, Gei);
208
dpd_buf4_mat_irrep_init(&T, Gnm);
209
dpd_buf4_mat_irrep_rd(&T, Gnm);
210
dpd_buf4_mat_irrep_row_init(&W, Gei);
211
for(ei=0; ei < W.params->rowtot[Gei]; ei++) {
212
dpd_buf4_mat_irrep_row_rd(&W, Gei, ei);
213
C_DGEMV('t',nrows,ncols,1,T.matrix[Gei][0],ncols,Z.matrix[Gei][ei],1,
214
1,W.matrix[Gei][0],1);
215
dpd_buf4_mat_irrep_row_wrt(&W, Gei, ei);
217
dpd_buf4_mat_irrep_row_close(&W, Gei);
218
dpd_buf4_mat_irrep_close(&T, Gnm);
219
dpd_buf4_mat_irrep_close(&Z, Gei);
225
if(params.print & 2) fprintf(outfile, "done.\n");
227
/*** Terms IIIB + V ***/
228
/* Wabei <-- Z1(mi,fb) <ma|fe> - t(mi,fb) <ma|ef> - tau(mi,af) <mb|ef> */
229
if(params.print & 2) {
230
fprintf(outfile, "\t(T2+T1*T1) * F -> Wabei...");
233
build_Z1(); /* Z1(ib,mf) = 2 t(mi,fb) - t(mi,bf) - t(m,b) t(i,f) */
235
/* The default algorithm for terms IIIb+V requires storage of two additional
236
** ovvv quantities on disk besides the <ia|bc> integrals and the Wabei target. */
237
if(!params.wabei_lowdisk) {
239
dpd_buf4_init(&F, CC_FINTS, 0, 10, 5, 10, 5, 0, "F <ia|bc>");
240
dpd_buf4_sort(&F, CC_TMP0, prsq, 10, 5, "F <ia|bc> (ib,ca)");
243
/* can we run these contractions fully in core? */
246
for(h=0; h < moinfo.nirreps; h++) {
247
coltot = F.params->coltot[h];
249
maxrows = DPD_BIGNUM/coltot;
251
fprintf(stderr, "\nWabei_RHF Error: A single row of ovvv > 2 GW.\n");
252
exit(PSI_RETURN_FAILURE);
254
else maxrows = DPD_BIGNUM;
255
rowtot = F.params->rowtot[h];
256
for(; rowtot > maxrows; rowtot -= maxrows) {
257
if(core_total > (core_total + 2*maxrows*coltot)) incore = 0;
258
else core_total += 2*maxrows*coltot;
260
if(core_total > (core_total + 2*rowtot*coltot)) incore = 0;
261
core_total += 2*rowtot*coltot;
263
if(core_total > dpd_memfree()) incore = 0;
264
if(!incore && (params.print & 2))
265
fprintf(outfile, "\nUsing out-of-core algorithm for (T2+T1*T1)*F -> Wabei.\n");
267
dpd_buf4_init(&W, CC_TMP0, 0, 10, 5, 10, 5, 0, "W(ib,ea)");
268
dpd_buf4_init(&F, CC_TMP0, 0, 10, 5, 10, 5, 0, "F <ia|bc> (ib,ca)");
269
dpd_buf4_init(&Z, CC_TMP0, 0, 10, 10, 10, 10, 0, "Z1(ib,mf)");
270
if(incore) dpd_contract444(&Z, &F, &W, 0, 1, 1, 0);
271
else ZFW(&Z, &F, &W, 1, 0);
274
dpd_buf4_init(&F, CC_FINTS, 0, 10, 5, 10, 5, 0, "F <ia|bc>");
275
dpd_buf4_init(&Z, CC_TAMPS, 0, 10, 10, 10, 10, 0, "tIAjb");
276
if(incore) dpd_contract444(&Z, &F, &W, 0, 1, -1, 1);
277
else ZFW(&Z, &F, &W, -1, 1);
280
dpd_buf4_sort_axpy(&W, CC_HBAR, rpsq, 11, 5, "WAbEi (Ei,Ab)", 1);
282
psio_close(CC_TMP0, 0); /* delete the extra ovvv quantities on disk */
283
psio_open(CC_TMP0, PSIO_OPEN_NEW);
284
dpd_buf4_init(&W, CC_TMP0, 0, 10, 5, 10, 5, 0, "W(ia,eb)");
285
dpd_buf4_init(&F, CC_FINTS, 0, 10, 5, 10, 5, 0, "F <ia|bc>");
286
dpd_buf4_init(&Z, CC_TAMPS, 0, 10, 10, 10, 10, 0, "tauIjAb (Ib,jA)");
287
if(incore) dpd_contract444(&Z, &F, &W, 0, 1, -1, 0);
288
else ZFW(&Z, &F, &W, -1, 0);
291
dpd_buf4_sort_axpy(&W, CC_HBAR, rpqs, 11, 5, "WAbEi (Ei,Ab)", 1);
293
psio_close(CC_TMP0, 0); /* delete the extra ovvv quantity on disk */
294
psio_open(CC_TMP0, PSIO_OPEN_NEW);
296
/* This is an alternative algorithm for terms IIIb+V that stores no additional
297
** ovvv terms beyond <ia|bc> integrals and the WAbEi target, but it's very slow. */
300
fprintf(outfile, "\nUsing low-disk algorithm for (T2+T1*T1)*F -> Wabei.\n");
302
dpd_buf4_init(&W, CC_HBAR, 0, 11, 5, 11, 5, 0, "WAbEi (Ei,Ab)");
303
/* prepare rows of W in advance */
304
for(Gei=0; Gei < moinfo.nirreps; Gei++)
305
dpd_buf4_mat_irrep_row_init(&W, Gei);
307
dpd_buf4_init(&F, CC_FINTS, 0, 10, 5, 10, 5, 0, "F <ia|bc>");
308
dpd_buf4_init(&Z, CC_TMP0, 0, 10, 10, 10, 10, 0, "Z1(ib,mf)");
309
dpd_buf4_sort(&Z, CC_TMP0, rpsq, 0, 5, "Z1(mi,fb)");
311
dpd_buf4_init(&Z, CC_TMP0, 0, 0, 5, 0, 5, 0, "Z1(mi,fb)");
312
dpd_buf4_init(&T2, CC_TAMPS, 0, 0, 5, 0, 5, 0, "tIjAb");
313
dpd_buf4_init(&Tau, CC_TAMPS, 0, 0, 5, 0, 5, 0, "tauIjAb");
314
WW1 = (double ***) malloc(moinfo.nirreps * sizeof(double **));
315
WW2 = (double ***) malloc(moinfo.nirreps * sizeof(double **));
316
for(Gma=0; Gma < moinfo.nirreps; Gma++) {
317
dpd_buf4_mat_irrep_row_init(&F, Gma);
318
for(ma=0; ma < F.params->rowtot[Gma]; ma++) {
319
m = F.params->roworb[Gma][ma][0];
320
a = F.params->roworb[Gma][ma][1];
321
Gm = F.params->psym[m];
322
Ga = F.params->qsym[a];
323
dpd_buf4_mat_irrep_row_rd(&F, Gma, ma);
324
for(Gi=0; Gi < moinfo.nirreps; Gi++) {
326
dpd_buf4_mat_irrep_row_init(&Z, Gmi);
327
dpd_buf4_mat_irrep_row_init(&T2, Gmi);
328
dpd_buf4_mat_irrep_row_init(&Tau, Gmi);
329
/* allocate space for WW1 target */
330
for(Gf=0; Gf < moinfo.nirreps; Gf++) {
331
Gb = Gf ^ Gmi; /* Z is totally symmetric */
332
Ge = Gf ^ Gma; /* F is totally symmetric */
333
WW1[Gb] = dpd_block_matrix(moinfo.virtpi[Gb], moinfo.virtpi[Ge]);
334
WW2[Gb] = dpd_block_matrix(moinfo.virtpi[Gb], moinfo.virtpi[Ge]);
336
for(I=0; I < moinfo.occpi[Gi]; I++) {
337
i = moinfo.occ_off[Gi] + I;
338
mi = Z.params->rowidx[m][i];
339
dpd_buf4_mat_irrep_row_rd(&Z, Gmi, mi);
340
dpd_buf4_mat_irrep_row_rd(&T2, Gmi, mi);
341
dpd_buf4_mat_irrep_row_rd(&Tau, Gmi, mi);
342
for(Gf=0; Gf < moinfo.nirreps; Gf++) {
343
Gb = Gf ^ Gmi; /* Z is totally symmetric */
344
Ge = Gf ^ Gma; /* F is totally symmetric */
346
nrows = moinfo.virtpi[Gb];
347
ncols = moinfo.virtpi[Ge];
348
nlinks = moinfo.virtpi[Gf];
349
fb = Z.col_offset[Gmi][Gf];
350
bf = Z.col_offset[Gmi][Gb];
351
fe = F.col_offset[Gma][Gf];
352
ef = F.col_offset[Gma][Ge];
353
if(nrows && ncols && nlinks) {
354
C_DGEMM('t','n',nrows,ncols,nlinks,1,&(Z.matrix[Gmi][0][fb]),nrows,
355
&(F.matrix[Gma][0][fe]),ncols,0,WW1[Gb][0],ncols);
356
C_DGEMM('t','t',nrows,ncols,nlinks,-1,&(T2.matrix[Gmi][0][fb]),nrows,
357
&(F.matrix[Gma][0][ef]),nlinks,1,WW1[Gb][0],ncols);
358
C_DGEMM('n','t',nrows,ncols,nlinks,-1,&(Tau.matrix[Gmi][0][bf]),nlinks,
359
&(F.matrix[Gma][0][ef]),nlinks,0,WW2[Gb][0],ncols);
362
for(E=0; E < moinfo.virtpi[Ge]; E++) {
363
e = moinfo.vir_off[Ge] + E;
364
ei = W.params->rowidx[e][i];
365
dpd_buf4_mat_irrep_row_rd(&W, Gei, ei);
366
for(BB=0; BB < moinfo.virtpi[Gb]; BB++) {
367
b = moinfo.vir_off[Gb] + BB;
368
ab = W.params->colidx[a][b];
369
ba = W.params->colidx[b][a];
370
W.matrix[Gei][0][ab] += WW1[Gb][BB][E];
371
W.matrix[Gei][0][ba] += WW2[Gb][BB][E];
373
dpd_buf4_mat_irrep_row_wrt(&W, Gei, ei);
377
dpd_buf4_mat_irrep_row_close(&Z, Gmi);
378
dpd_buf4_mat_irrep_row_close(&T2, Gmi);
379
dpd_buf4_mat_irrep_row_close(&Tau, Gmi);
382
for(Gf=0; Gf < moinfo.nirreps; Gf++) {
383
Gb = Gf ^ Gmi; /* Z is totally symmetric */
384
Ge = Gf ^ Gma; /* F is totally symmetric */
385
dpd_free_block(WW1[Gb],moinfo.virtpi[Gb], moinfo.virtpi[Ge]);
386
dpd_free_block(WW2[Gb],moinfo.virtpi[Gb], moinfo.virtpi[Ge]);
389
dpd_buf4_mat_irrep_row_close(&F, Gma);
393
dpd_buf4_close(&Tau);
397
for(Gei=0; Gei < moinfo.nirreps; Gei++)
398
dpd_buf4_mat_irrep_row_close(&W, Gei);
402
if(params.print & 2) fprintf(outfile, "done.\n");
404
/** Terms VI and VII **/
406
/** t_in^bf <Mn|Ef> + t_iN^bF <MN||EF> --> Z1_MEib **/
407
if(params.print & 2) {
408
fprintf(outfile, "\tT1*(C+D*T2) -> Wabei...");
411
dpd_buf4_init(&Z, CC_TMP0, 0, 10, 10, 10, 10, 0, "Z(ME,ib)");
412
dpd_buf4_init(&D, CC_DINTS, 0, 10, 10, 10, 10, 0, "D 2<ij|ab> - <ij|ba> (ia,jb)");
413
dpd_buf4_init(&T2, CC_TAMPS, 0, 10, 10, 10, 10, 0, "tIAjb");
414
dpd_contract444(&D, &T2, &Z, 0, 0, 1, 0);
417
dpd_buf4_init(&D, CC_DINTS, 0, 10, 10, 10, 10, 0, "D <ij|ab> (ia,jb)");
418
dpd_buf4_init(&T2, CC_TAMPS, 0, 10, 10, 10, 10, 0, "tIbjA");
419
dpd_contract444(&D, &T2, &Z, 0, 0, -1, 1);
422
dpd_buf4_sort(&Z, CC_TMP0, qrps, 11, 10, "Z(Ei,Mb)");
425
/** - t_M^A ( <Ei|Mb> + Z(Ei,Mb) ) --> W(Ei,Ab) **/
426
dpd_buf4_init(&D, CC_DINTS, 0, 0, 5, 0, 5, 0, "D <ij|ab>");
427
dpd_buf4_sort_axpy(&D, CC_TMP0, spqr, 11, 10, "Z(Ei,Mb)", 1);
429
dpd_buf4_init(&Z, CC_TMP0, 0, 11, 10, 11, 10, 0, "Z(Ei,Mb)");
430
dpd_buf4_init(&W, CC_HBAR, 0, 11, 5, 11, 5, 0, "WAbEi (Ei,Ab)");
431
dpd_file2_init(&T1, CC_OEI, 0, 0, 1, "tIA");
432
/* dpd_contract244(&T1, &Z, &W, 0, 2, 1, -1, 1); */
433
dpd_file2_mat_init(&T1);
434
dpd_file2_mat_rd(&T1);
435
for(Gei=0; Gei < moinfo.nirreps; Gei++) {
436
dpd_buf4_mat_irrep_init(&Z, Gei);
437
dpd_buf4_mat_irrep_rd(&Z, Gei);
438
dpd_buf4_mat_irrep_row_init(&W, Gei);
439
for(ei=0; ei < Z.params->rowtot[Gei]; ei++) {
440
dpd_buf4_mat_irrep_row_rd(&W, Gei, ei);
441
for(Gm=0; Gm < moinfo.nirreps; Gm++) {
442
Ga = Gm; /* T1 is totally symmetric */
443
Gb = Gm ^ Gei; /* Z is totally symmetric */
444
nrows = moinfo.virtpi[Ga];
445
ncols = moinfo.virtpi[Gb];
446
nlinks = moinfo.occpi[Gm];
447
mb = Z.col_offset[Gei][Gm];
448
ab = W.col_offset[Gei][Ga];
449
if(nrows && ncols && nlinks)
450
C_DGEMM('t','n',nrows,ncols,nlinks,-1,T1.matrix[Gm][0],nrows,
451
&(Z.matrix[Gei][ei][mb]),ncols,1,&(W.matrix[Gei][0][ab]),ncols);
453
dpd_buf4_mat_irrep_row_wrt(&W, Gei, ei);
455
dpd_buf4_mat_irrep_close(&Z, Gei);
457
dpd_file2_mat_close(&T1);
458
dpd_file2_close(&T1);
462
/** - t_Ni^Af <mN|fE> --> Z2_mEiA **/
463
dpd_buf4_init(&D, CC_DINTS, 0, 10, 10, 10, 10, 0, "D <ij|ab> (ib,ja)");
464
dpd_buf4_init(&T2, CC_TAMPS, 0, 10, 10, 10, 10, 0, "tIbjA");
465
dpd_buf4_init(&Z, CC_TMP0, 0, 10, 10, 10, 10, 0, "Z(mE,iA)");
466
dpd_contract444(&D, &T2, &Z, 0, 0, 1, 0);
469
dpd_buf4_sort(&Z, CC_TMP0, qrsp, 11, 11, "Z(Ei,Am)");
472
/** t_m^b ( - <mA|iE> + Z(mA,iE) ) --> Z2(Ab,Ei) **/
473
dpd_buf4_init(&C, CC_CINTS, 0, 11, 11, 11, 11, 0, "C <ai|bj>");
474
dpd_buf4_init(&Z, CC_TMP0, 0, 11, 11, 11, 11, 0, "Z(Ei,Am)");
475
dpd_buf4_axpy(&C, &Z, -1.0);
477
dpd_buf4_init(&W, CC_HBAR, 0, 11, 5, 11, 5, 0, "WAbEi (Ei,Ab)");
478
dpd_file2_init(&T1, CC_OEI, 0, 0, 1, "tIA");
479
/* dpd_contract424(&Z, &T1, &W, 3, 0, 0, 1, 1); */
480
dpd_file2_mat_init(&T1);
481
dpd_file2_mat_rd(&T1);
482
for(Gei=0; Gei < moinfo.nirreps; Gei++) {
483
dpd_buf4_mat_irrep_init(&Z, Gei);
484
dpd_buf4_mat_irrep_rd(&Z, Gei);
485
dpd_buf4_mat_irrep_row_init(&W, Gei);
486
for(ei=0; ei < Z.params->rowtot[Gei]; ei++) {
487
dpd_buf4_mat_irrep_row_rd(&W, Gei, ei);
488
for(Gm=0; Gm < moinfo.nirreps; Gm++) {
489
Gb = Gm; /* T1 is totally symmetric */
490
Ga = Gm ^ Gei; /* Z is totally symmetric */
491
nrows = moinfo.virtpi[Ga];
492
ncols = moinfo.virtpi[Gb];
493
nlinks = moinfo.occpi[Gm];
494
am = Z.col_offset[Gei][Ga];
495
ab = W.col_offset[Gei][Ga];
496
if(nrows && ncols && nlinks)
497
C_DGEMM('n','n',nrows,ncols,nlinks,1,&(Z.matrix[Gei][ei][am]),nlinks,
498
T1.matrix[Gm][0],ncols,1,&(W.matrix[Gei][0][ab]),ncols);
500
dpd_buf4_mat_irrep_row_wrt(&W, Gei, ei);
502
dpd_buf4_mat_irrep_close(&Z, Gei);
504
dpd_file2_mat_close(&T1);
505
dpd_file2_close(&T1);
509
if(params.print & 2) fprintf(outfile, "done.\n");
513
** Generate intermediate needed for efficient evaluation of
514
** <am||ef> contributions to Wabei HBAR elements:
516
** Z1(ib,mf) = 2 t(mi,fb) - t(im,fb) - t(i,f) * t(m,b)
524
int h, row, col, p, q, r, s, P, Q, R, S, psym, qsym, rsym, ssym;
526
dpd_buf4_init(&T2, CC_TAMPS, 0, 10, 10, 10, 10, 0, "2 tIAjb - tIBja");
527
dpd_buf4_copy(&T2, CC_TMP0, "Z1(ib,mf)");
530
dpd_file2_init(&T1, CC_OEI, 0, 0, 1, "tIA");
531
dpd_file2_mat_init(&T1);
532
dpd_file2_mat_rd(&T1);
534
dpd_buf4_init(&Z1, CC_TMP0, 0, 10, 10, 10, 10, 0, "Z1(ib,mf)");
535
for(h=0; h < moinfo.nirreps; h++) {
536
dpd_buf4_mat_irrep_init(&Z1, h);
537
dpd_buf4_mat_irrep_rd(&Z1, h);
539
for(row=0; row < Z1.params->rowtot[h]; row++) {
540
p = Z1.params->roworb[h][row][0];
541
q = Z1.params->roworb[h][row][1];
542
P = T1.params->rowidx[p];
543
Q = T1.params->colidx[q];
544
psym = T1.params->psym[p];
545
qsym = T1.params->qsym[q];
546
for(col=0; col < Z1.params->coltot[h]; col++) {
547
r = Z1.params->colorb[h][col][0];
548
s = Z1.params->colorb[h][col][1];
549
R = T1.params->rowidx[r];
550
S = T1.params->colidx[s];
551
rsym = T1.params->psym[r];
552
ssym = T1.params->qsym[s];
554
if(qsym == rsym && psym == ssym)
555
Z1.matrix[h][row][col] -= T1.matrix[rsym][R][Q] * T1.matrix[psym][P][S];
558
dpd_buf4_mat_irrep_wrt(&Z1, h);
559
dpd_buf4_mat_irrep_close(&Z1, h);
561
dpd_file2_mat_close(&T1);
562
dpd_file2_close(&T1);
568
** ZFW(): Compute the product:
570
** W(ib,ea) = alpha * Z(ib,mf) * F(mf,ea) + beta * W(ib,ea)
572
** This algorithm uses all the available core by first storing all of
573
** Z(ib,mf) and splitting the remainder between F and W. It then
574
** makes a single pass through the W(ib,ea) file, but multiple passes
575
** through the F(mf,ea) file.
577
** Note that in this code, I think beta can be only 0 or 1.
582
void ZFW(dpdbuf4 *Z, dpdbuf4 *F, dpdbuf4 *W, double alpha, double beta)
586
int rows_per_bucket, nbuckets, rows_left;
587
int W_row_start, F_row_start;
588
int nrows, ncols, nlinks;
590
for(Gib=0; Gib < moinfo.nirreps; Gib++) {
591
Gea = Gmf = Gib; /* F and W are totally symmetric */
592
dpd_buf4_mat_irrep_init(Z, Gib);
593
dpd_buf4_mat_irrep_rd(Z, Gib);
595
/* how many rows of F/W can we store in half the remaining core? */
596
rows_per_bucket = dpd_memfree()/(2 * F->params->coltot[Gea]);
597
if(rows_per_bucket > F->params->rowtot[Gib])
598
rows_per_bucket = F->params->rowtot[Gib];
599
nbuckets = (int) ceil((double) F->params->rowtot[Gib]/(double) rows_per_bucket);
600
rows_left = F->params->rowtot[Gib] % rows_per_bucket;
602
/* allocate space for the full buckets */
603
dpd_buf4_mat_irrep_init_block(F, Gib, rows_per_bucket);
604
dpd_buf4_mat_irrep_init_block(W, Gib, rows_per_bucket);
606
ncols = W->params->coltot[Gea];
608
for(m=0; m < (rows_left ? nbuckets-1 : nbuckets); m++) {
609
W_row_start = m*rows_per_bucket;
610
zero_arr(W->matrix[Gib][0], rows_per_bucket*ncols);
613
dpd_buf4_mat_irrep_rd_block(W, Gib, W_row_start, rows_per_bucket);
615
for(n=0; n < (rows_left ? nbuckets-1 : nbuckets); n++) {
616
F_row_start = n*rows_per_bucket;
617
dpd_buf4_mat_irrep_rd_block(F, Gib, F_row_start, rows_per_bucket);
619
nrows = rows_per_bucket;
620
nlinks = rows_per_bucket;
622
if(nrows && ncols && nlinks)
623
C_DGEMM('n', 'n', nrows, ncols, nlinks, alpha,
624
&(Z->matrix[Gib][W_row_start][F_row_start]), Z->params->coltot[Gmf],
625
F->matrix[Gmf][0], ncols, 1, W->matrix[Gib][0], ncols);
628
F_row_start = n*rows_per_bucket;
629
dpd_buf4_mat_irrep_rd_block(F, Gib, F_row_start, rows_left);
631
nrows = rows_per_bucket;
634
if(nrows && ncols && nlinks)
635
C_DGEMM('n', 'n', nrows, ncols, nlinks, alpha,
636
&(Z->matrix[Gib][W_row_start][F_row_start]), Z->params->coltot[Gmf],
637
F->matrix[Gmf][0], ncols, 1, W->matrix[Gib][0], ncols);
640
dpd_buf4_mat_irrep_wrt_block(W, Gib, W_row_start, rows_per_bucket);
643
W_row_start = m*rows_per_bucket;
644
zero_arr(W->matrix[Gib][0], rows_per_bucket*ncols);
647
dpd_buf4_mat_irrep_rd_block(W, Gib, W_row_start, rows_left);
649
for(n=0; n < (rows_left ? nbuckets-1 : nbuckets); n++) {
650
F_row_start = n*rows_per_bucket;
651
dpd_buf4_mat_irrep_rd_block(F, Gib, F_row_start, rows_per_bucket);
654
nlinks = rows_per_bucket;
656
if(nrows && ncols && nlinks)
657
C_DGEMM('n', 'n', nrows, ncols, nlinks, alpha,
658
&(Z->matrix[Gib][W_row_start][F_row_start]), Z->params->coltot[Gmf],
659
F->matrix[Gmf][0], ncols, 1, W->matrix[Gib][0], ncols);
662
F_row_start = n*rows_per_bucket;
663
dpd_buf4_mat_irrep_rd_block(F, Gib, F_row_start, rows_left);
668
if(nrows && ncols && nlinks)
669
C_DGEMM('n', 'n', nrows, ncols, nlinks, alpha,
670
&(Z->matrix[Gib][W_row_start][F_row_start]), Z->params->coltot[Gmf],
671
F->matrix[Gmf][0], ncols, 1, W->matrix[Gib][0], ncols);
673
dpd_buf4_mat_irrep_wrt_block(W, Gib, W_row_start, rows_left);
676
dpd_buf4_mat_irrep_close_block(F, Gib, rows_per_bucket);
677
dpd_buf4_mat_irrep_close_block(W, Gib, rows_per_bucket);
679
dpd_buf4_mat_irrep_close(Z, Gib);
684
}} // namespace psi::cchbar