10
/* dpd_contract424(): Contracts four-index and two-index quantities to
11
** give a product four-index quantity.
14
** dpdbuf4 *X: A pointer to the leftmost dpd four-index
15
** buffer in the product.
16
** dpdfile2 *Y: A pointer to the rightmost dpd two-index
17
** file in the product.
18
** dpdbuf4 *Z: A pointer to the dpd four-index buffer target.
19
** int sum_X: Indicates which index (values of 0, 1, 2, and 3) of X
21
** int sum_Y: Indicates which index (values of 0 and 1) of Y is to be summed.
22
** int Ztrans: Boolean to indicate whether the final product must
23
** be bra-ket transposed in order for its indices to
24
** match those of the target, Z.
25
** double alpha: A prefactor for the product alpha * X * Y.
26
** double beta: A prefactor for the target beta * Z.
29
int dpd_contract424(dpdbuf4 *X, dpdfile2 *Y, dpdbuf4 *Z, int sum_X,
30
int sum_Y, int Ztrans, double alpha, double beta)
32
int h, nirreps, GX, GY, GZ, hxbuf, hzbuf, h0, Hx, Hy, Hz, GsX, GsZ;
35
int *numlinks, *numrows, *numcols;
37
long int core, memoryd, core_total, rowtot, coltot, maxrows;
38
int xcount, zcount, scount, Ysym;
39
int rowx, rowz, colx, colz;
40
int pq, rs, r, s, Gr, Gs;
42
double ***Xmat, ***Zmat;
44
int *xrow, *xcol, *yrow, *ycol, *zrow, *zcol;
47
nirreps = X->params->nirreps;
48
GX = X->file.my_irrep;
50
GZ = Z->file.my_irrep;
52
memoryd = dpd_main.memory;
53
incore = 1; /* default */
55
dpd_file2_mat_init(Y);
58
if(sum_Y == 0) { Ytrans = 0; numlinks = Y->params->rowtot; symlink=0; }
59
else if(sum_Y == 1) { Ytrans = 1; numlinks = Y->params->coltot; symlink=GY; }
60
else { fprintf(stderr, "Junk Y index %d\n", sum_Y); exit(PSI_RETURN_FAILURE); }
62
if((sum_X == 1) || (sum_X == 2)) dpd_trans4_init(&Xt, X);
64
if(Ztrans) dpd_trans4_init(&Zt, Z);
66
/* if(fabs(beta) > 0.0) dpd_buf4_scm(Z, beta); */
67
dpd_buf4_scm(Z, beta);
70
if(Ytrans) { yrow = Y->params->coltot; ycol = Y->params->rowtot; }
71
else { yrow = Y->params->rowtot; ycol = Y->params->coltot; }
74
for(hxbuf=0; hxbuf < nirreps; hxbuf++) {
76
incore = 1; /* default */
79
if (!Ztrans) hzbuf = hxbuf^GX; else hzbuf = hxbuf^GX^GZ;
82
if (!Ztrans) hzbuf = hxbuf; else hzbuf = hxbuf^GZ;
85
/* Compute the core requirements for the straight contraction */
88
coltot = X->params->coltot[hxbuf^GX];
90
maxrows = DPD_BIGNUM/coltot;
92
fprintf(stderr, "\nLIBDPD Error: cannot compute even the number of rows in contract424.\n");
93
dpd_error("contract424", stderr);
96
else maxrows = DPD_BIGNUM;
97
rowtot = X->params->rowtot[hxbuf];
98
for(; rowtot > maxrows; rowtot -= maxrows) {
99
if(core_total > (core_total + maxrows*coltot)) incore = 0;
100
else core_total += maxrows * coltot;
102
if(core_total > (core_total + rowtot*coltot)) incore = 0;
103
core_total += rowtot * coltot;
105
if(sum_X == 1 || sum_X == 2) core_total *= 2; /* we need room to transpose the X buffer */
108
coltot = Z->params->coltot[hzbuf^GZ];
110
maxrows = DPD_BIGNUM/coltot;
112
fprintf(stderr, "\nLIBDPD Error: cannot compute even the number of rows in contract424.\n");
113
dpd_error("contract424", stderr);
116
else maxrows = DPD_BIGNUM;
117
rowtot = Z->params->rowtot[hzbuf];
118
for(; rowtot > maxrows; rowtot -= maxrows) {
119
if(core_total > (core_total + maxrows*coltot)) incore = 0;
120
else core_total += maxrows * coltot;
122
if(core_total > (core_total + rowtot*coltot)) incore = 0;
123
core_total += rowtot * coltot;
125
if(core_total > memoryd) incore = 0;
127
/* Force incore for all but a "normal" 424 contraction for now */
128
if(Ztrans || sum_X == 0 || sum_X == 1 || sum_X == 2) incore = 1;
131
/* dpd_buf4_scm(Z, beta); */
132
dpd_buf4_mat_irrep_init(Z, hzbuf);
133
if(fabs(beta) > 0.0) dpd_buf4_mat_irrep_rd(Z, hzbuf);
135
dpd_trans4_mat_irrep_init(&Zt, hzbuf);
136
dpd_trans4_mat_irrep_rd(&Zt, hzbuf);
137
dpd_buf4_mat_irrep_close(Z, hzbuf);
138
dpd_trans4_mat_irrep_shift31(&Zt, hzbuf);
140
numrows = Zt.shift.rowtot[hzbuf];
141
numcols = Zt.shift.coltot[hzbuf];
142
Zmat = Zt.shift.matrix[hzbuf];
144
zrow = Zt.shift.rowtot[hzbuf];
145
zcol = Zt.shift.coltot[hzbuf];
149
dpd_buf4_mat_irrep_shift31(Z, hzbuf);
151
numrows = Z->shift.rowtot[hzbuf];
152
numcols = Z->shift.coltot[hzbuf];
153
Zmat = Z->shift.matrix[hzbuf];
155
zrow = Z->shift.rowtot[hzbuf];
156
zcol = Z->shift.coltot[hzbuf];
161
dpd_buf4_mat_irrep_init(X, hxbuf);
162
dpd_buf4_mat_irrep_rd(X, hxbuf);
163
dpd_buf4_mat_irrep_shift13(X, hxbuf);
164
Xmat = X->shift.matrix[hxbuf];
167
xrow = X->shift.coltot[hxbuf];
168
xcol = X->shift.rowtot[hxbuf];
171
else if(sum_X == 1) {
172
dpd_buf4_mat_irrep_init(X, hxbuf);
173
dpd_buf4_mat_irrep_rd(X, hxbuf);
174
dpd_trans4_mat_irrep_init(&Xt, hxbuf);
175
dpd_trans4_mat_irrep_rd(&Xt, hxbuf);
176
dpd_buf4_mat_irrep_close(X, hxbuf);
177
dpd_trans4_mat_irrep_shift31(&Xt, hxbuf);
179
Xmat = Xt.shift.matrix[hxbuf];
182
xrow = Xt.shift.rowtot[hxbuf];
183
xcol = Xt.shift.coltot[hxbuf];
186
else if(sum_X == 2) {
187
dpd_buf4_mat_irrep_init(X, hxbuf);
188
dpd_buf4_mat_irrep_rd(X, hxbuf);
189
dpd_trans4_mat_irrep_init(&Xt, hxbuf);
190
dpd_trans4_mat_irrep_rd(&Xt, hxbuf);
191
dpd_buf4_mat_irrep_close(X, hxbuf);
192
dpd_trans4_mat_irrep_shift13(&Xt, hxbuf);
193
Xmat = Xt.shift.matrix[hxbuf];
196
xrow = Xt.shift.coltot[hxbuf];
197
xcol = Xt.shift.rowtot[hxbuf];
200
else if(sum_X == 3) {
201
dpd_buf4_mat_irrep_init(X, hxbuf);
202
dpd_buf4_mat_irrep_rd(X, hxbuf);
203
dpd_buf4_mat_irrep_shift31(X, hxbuf);
205
Xmat = X->shift.matrix[hxbuf];
208
xrow = X->shift.rowtot[hxbuf];
209
xcol = X->shift.coltot[hxbuf];
214
for(Hz=0; Hz < nirreps; Hz++) {
215
if (!Xtrans && !Ytrans) {Hx=Hz; Hy = Hz^GX; }
216
else if (!Xtrans && Ytrans) {Hx=Hz; Hy = Hz^GX^GY; }
217
else if ( Xtrans && !Ytrans) {Hx=Hz^GX; Hy = Hz^GX; }
218
else if ( Xtrans && Ytrans) {Hx=Hz^GX; Hy = Hz^GX^GY; }
220
if((xrow[Hz] != zrow[Hz]) || (ycol[Hz] != zcol[Hz]) ||
221
(xcol[Hz] != yrow[Hz])) {
222
fprintf(stderr, "** Alignment error in contract424 **\n");
223
fprintf(stderr, "** Irrep: %d; Subirrep: %d **\n",hxbuf,Hz);
224
dpd_error("dpd_contract424", stderr);
227
/* fprintf(outfile,"Hx %d Hy %d Hz %d\n",Hx,Hy,Hz);
228
fprintf(outfile,"numrows %d numlinks %d numcols %d\n",numrows[Hz],numlinks[Hy],numcols[Hz]); */
229
newmm_rking(Xmat[Hx], Xtrans, Y->matrix[Hy], Ytrans,
230
Zmat[Hz], numrows[Hz], numlinks[Hy^symlink],
231
numcols[Hz], alpha, 1.0);
234
for(Hz=0; Hz < nirreps; Hz++) {
235
if (!Xtrans && !Ytrans) {Hx=Hz; Hy = Hz^GX; }
236
else if (!Xtrans && Ytrans) {Hx=Hz; Hy = Hz^GX^GY; }
237
else if ( Xtrans && !Ytrans) {Hx=Hz^GX; Hy = Hz^GX; }
238
else if ( Xtrans && Ytrans) {Hx=Hz^GX; Hy = Hz^GX^GY; }
240
if((xrow[Hz] != zrow[Hz]) || (ycol[Hz] != zcol[Hz]) ||
241
(xcol[Hz] != yrow[Hz])) {
242
fprintf(stderr, "** Alignment error in contract424 **\n");
243
fprintf(stderr, "** Irrep: %d; Subirrep: %d **\n",hxbuf,Hz);
244
dpd_error("dpd_contract424", stderr);
247
if(numrows[Hz] && numcols[Hz] && numlinks[Hy^symlink]) {
248
if(!Xtrans && !Ytrans) {
249
C_DGEMM('n','n',numrows[Hz], numcols[Hz], numlinks[Hy^symlink],
250
alpha, &(Xmat[Hz][0][0]), numlinks[Hy^symlink],
251
&(Y->matrix[Hy][0][0]), numcols[Hz], 1.0,
252
&(Zmat[Hz][0][0]), numcols[Hz]);
254
else if(Xtrans && !Ytrans) {
255
C_DGEMM('t','n',numrows[Hz], numcols[Hz], numlinks[Hy^symlink],
256
alpha, &(Xmat[Hz][0][0]), numrows[Hz],
257
&(Y->matrix[Hy][0][0]), numcols[Hz], 1.0,
258
&(Zmat[Hz][0][0]), numcols[Hz]);
260
else if(!Xtrans && Ytrans) {
261
C_DGEMM('n','t',numrows[Hz], numcols[Hz], numlinks[Hy^symlink],
262
alpha, &(Xmat[Hz][0][0]), numlinks[Hy^symlink],
263
&(Y->matrix[Hy][0][0]), numlinks[Hy^symlink], 1.0,
264
&(Zmat[Hz][0][0]), numcols[Hz]);
267
C_DGEMM('t','t',numrows[Hz], numcols[Hz], numlinks[Hy^symlink],
268
alpha, &(Xmat[Hz][0][0]), numrows[Hz],
269
&(Y->matrix[Hy][0][0]), numlinks[Hy^symlink], 1.0,
270
&(Zmat[Hz][0][0]), numcols[Hz]);
275
if(sum_X == 0) dpd_buf4_mat_irrep_close(X, hxbuf);
276
else if(sum_X == 1) dpd_trans4_mat_irrep_close(&Xt, hxbuf);
277
else if(sum_X == 2) dpd_trans4_mat_irrep_close(&Xt, hxbuf);
278
else if(sum_X == 3) dpd_buf4_mat_irrep_close(X, hxbuf);
281
dpd_buf4_mat_irrep_init(Z, hzbuf);
282
dpd_trans4_mat_irrep_wrt(&Zt, hzbuf);
283
dpd_trans4_mat_irrep_close(&Zt, hzbuf);
286
dpd_buf4_mat_irrep_wrt(Z, hzbuf);
287
dpd_buf4_mat_irrep_close(Z, hzbuf);
289
} /* end if(incore) */
290
else { /* out-of-core for "normal" 424 contractions */
291
/* Prepare the input buffer for the X factor and the target*/
293
fprintf(stderr, "\t424 out-of-core: %d\n", hxbuf);
295
dpd_buf4_mat_irrep_row_init(X, hxbuf);
296
dpd_buf4_mat_irrep_row_init(Z, hzbuf);
298
/* Loop over rows of the X factor and the target */
299
for(pq=0; pq < Z->params->rowtot[hzbuf]; pq++) {
301
dpd_buf4_mat_irrep_row_zero(X, hxbuf, pq);
302
dpd_buf4_mat_irrep_row_rd(X, hxbuf, pq);
304
dpd_buf4_mat_irrep_row_zero(Z, hzbuf, pq);
307
dpd_buf4_mat_irrep_row_rd(Z, hzbuf, pq);
311
for(Gr=0; Gr < nirreps; Gr++) {
315
rowx = X->params->rpi[Gr];
316
colx = X->params->spi[GsX];
317
rowz = Z->params->rpi[Gr];
318
colz = Z->params->spi[GsZ];
320
if(rowx && colx && colz) {
321
C_DGEMM('n',Ytrans?'t':'n',rowx,colz,colx,alpha,
322
&(X->matrix[hxbuf][0][xcount]),colx,
323
&(Y->matrix[Ytrans?GsZ:GsX][0][0]),Ytrans?colx:colz,1.0,
324
&(Z->matrix[hzbuf][0][zcount]),colz);
327
xcount += rowx * colx;
328
zcount += rowz * colz;
332
dpd_buf4_mat_irrep_row_wrt(Z, hzbuf, pq);
336
dpd_buf4_mat_irrep_row_close(X, hxbuf);
337
dpd_buf4_mat_irrep_row_close(Z, hzbuf);
341
if((sum_X == 1) || (sum_X == 2)) dpd_trans4_close(&Xt);
343
if(Ztrans) dpd_trans4_close(&Zt);
345
dpd_file2_mat_close(Y);