4
#include <libpsio/psio.h>
9
/* dpd_contract444(): Contracts a pair of four-index quantities to
10
** give a product four-index quantity.
13
** dpdbuf4 *X: A pointer to the leftmost dpd four-index
14
** buffer in the product.
15
** dpdbuf4 *Y: A pointer to the rightmost dpd four-index
16
** buffer in the product.
17
** int target_X: Indicates which pair of indices (0 = bra, 1 =
18
** ket) of X is the target pair.
19
** int target_Y: Indicates which pair of indices (0 = bra, 1 =
20
** ket) of Y is the target pair.
21
** double alpha: A prefactor for the product alpha * X * Y.
22
** double beta: A prefactor for the target beta * Z.
25
int dpd_contract444(dpdbuf4 *X, dpdbuf4 *Y, dpdbuf4 *Z,
26
int target_X, int target_Y, double alpha,
29
int n, Hx, Hy, Hz, GX, GY, GZ, nirreps, Xtrans, Ytrans, *numlinks, symlink;
30
long int size_Y, size_Z, size_file_X_row;
32
long int memoryd, core, rows_per_bucket, rows_left, memtotal;
34
int *xrow, *xcol, *yrow, *ycol, *zrow, *zcol;
38
nirreps = X->params->nirreps;
39
GX = X->file.my_irrep;
40
GY = Y->file.my_irrep;
41
GZ = Z->file.my_irrep;
43
if(target_X == 0) { Xtrans = 0; numlinks = X->params->coltot; symlink=GX; }
44
else if(target_X == 1) { Xtrans = 1; numlinks = X->params->rowtot; symlink=0; }
46
if(target_Y == 0) Ytrans = 1;
47
else if(target_Y == 1) Ytrans = 0;
50
if(Xtrans) { xrow = X->params->coltot; xcol = X->params->rowtot; }
51
else { xrow = X->params->rowtot; xcol = X->params->coltot; }
53
if(Ytrans) { yrow = Y->params->coltot; ycol = Y->params->rowtot; }
54
else { yrow = Y->params->rowtot; ycol = Y->params->coltot; }
56
zrow = Z->params->rowtot; zcol = Z->params->coltot;
58
if((zrow != xrow) || (zcol != ycol) || (xcol != yrow)) {
59
fprintf(stderr, "** Alignment error in contract444 **\n");
60
dpd_error("dpd_contract444",stderr);
66
for(Hx=0; Hx < nirreps; Hx++) {
68
if ((!Xtrans)&&(!Ytrans)) {Hy = Hx^GX; Hz = Hx; }
69
else if ((!Xtrans)&&( Ytrans)) {Hy = Hx^GX^GY; Hz = Hx; }
70
else if (( Xtrans)&&(!Ytrans)) {Hy = Hx; Hz = Hx^GX; }
71
else /* (( Xtrans)&&( Ytrans))*/{Hy = Hx^GY; Hz = Hx^GX; }
73
size_Y = ((long) Y->params->rowtot[Hy]) * ((long) Y->params->coltot[Hy^GY]);
74
size_Z = ((long) Z->params->rowtot[Hz]) * ((long) Z->params->coltot[Hz^GZ]);
75
size_file_X_row = ((long) X->file.params->coltot[0]); /* need room for a row of the X->file */
77
memoryd = dpd_memfree() - (size_Y + size_Z + size_file_X_row);
79
if(X->params->rowtot[Hx] && X->params->coltot[Hx^GX]) {
81
if(X->params->coltot[Hx^GX])
82
rows_per_bucket = memoryd/X->params->coltot[Hx^GX];
83
else rows_per_bucket = -1;
85
if(rows_per_bucket > X->params->rowtot[Hx])
86
rows_per_bucket = X->params->rowtot[Hx];
89
dpd_error("contract444: Not enough memory for one row", stderr);
91
nbuckets = ceil((double) X->params->rowtot[Hx]/
92
(double) rows_per_bucket);
94
rows_left = X->params->rowtot[Hx] % rows_per_bucket;
97
if(nbuckets > 1) incore = 0;
103
fprintf(stderr, "Contract444: memory information.\n");
104
fprintf(stderr, "Contract444: h = %d, row = %d, col = %d, tot = %d\n",
105
Hx, X->params->rowtot[Hx], X->params->coltot[Hx^GX],
106
X->params->rowtot[Hx] * X->params->coltot[Hx^GX]);
108
fprintf(stderr, "Contract444: nbuckets = %d\n", nbuckets);
109
fprintf(stderr, "Contract444: rows_per_bucket = %d\n",rows_per_bucket);
110
fprintf(stderr, "Contract444: rows_left = %d\n",rows_left);
111
memtotal = X->params->rowtot[Hx] * X->params->coltot[Hx^GX];
112
byte_conv = ((double) sizeof(double))/1e6;
113
fprintf(stderr, "Contract444: out of core algorithm used.\n");
114
fprintf(stderr, "Contract444: memtotal = %d.\n", memtotal);
115
fprintf(stderr, "Contract444: Need %5.2f MB to run in memory.\n",
116
((double) memtotal)*byte_conv);
117
dpd_file4_cache_print(stderr);
123
if(!incore && Xtrans) {
124
dpd_file4_cache_print(stderr);
125
dpd_error("out-of-core contract444 Xtrans=1 not coded", stderr);
130
dpd_buf4_mat_irrep_init(X, Hx);
131
dpd_buf4_mat_irrep_rd(X, Hx);
133
dpd_buf4_mat_irrep_init(Y, Hy);
134
dpd_buf4_mat_irrep_rd(Y, Hy);
135
dpd_buf4_mat_irrep_init(Z, Hz);
136
if(fabs(beta) > 0.0) dpd_buf4_mat_irrep_rd(Z, Hz);
138
if(Z->params->rowtot[Hz] &&
139
Z->params->coltot[Hz^GZ] &&
140
numlinks[Hx^symlink]) {
141
C_DGEMM(Xtrans?'t':'n', Ytrans?'t':'n',
142
Z->params->rowtot[Hz], Z->params->coltot[Hz^GZ],
143
numlinks[Hx^symlink], alpha,
144
&(X->matrix[Hx][0][0]), X->params->coltot[Hx^GX],
145
&(Y->matrix[Hy][0][0]), Y->params->coltot[Hy^GY], beta,
146
&(Z->matrix[Hz][0][0]), Z->params->coltot[Hz^GZ]);
149
dpd_buf4_mat_irrep_close(X, Hx);
151
dpd_buf4_mat_irrep_wrt(Z, Hz);
152
dpd_buf4_mat_irrep_close(Y, Hy);
153
dpd_buf4_mat_irrep_close(Z, Hz);
157
/* out-of-core algorithm coded only for NT and TN arrangements, not NN or TT */
158
if(!Ytrans && !Xtrans || Ytrans && Xtrans) {
159
fprintf(stderr, "Out-of-core algorithm not yet coded for NN or TT DGEMM.\n");
160
dpd_error("contract444", stderr);
163
dpd_buf4_mat_irrep_init_block(X, Hx, rows_per_bucket);
165
dpd_buf4_mat_irrep_init(Y, Hy);
166
dpd_buf4_mat_irrep_rd(Y, Hy);
167
dpd_buf4_mat_irrep_init(Z, Hz);
168
if(fabs(beta) > 0.0) dpd_buf4_mat_irrep_rd(Z, Hz);
170
for(n=0; n < (rows_left ? nbuckets-1 : nbuckets); n++) {
172
dpd_buf4_mat_irrep_rd_block(X, Hx, n*rows_per_bucket, rows_per_bucket);
174
if(!Xtrans && Ytrans) {
175
if(Z->params->coltot[Hz^GZ] && rows_per_bucket && numlinks[Hx^symlink])
176
C_DGEMM('n', 't', rows_per_bucket, Z->params->coltot[Hz^GZ],
177
numlinks[Hx^symlink], alpha, &(X->matrix[Hx][0][0]), numlinks[Hx^symlink],
178
&(Y->matrix[Hy][0][0]), numlinks[Hx^symlink], beta,
179
&(Z->matrix[Hz][n*rows_per_bucket][0]), Z->params->coltot[Hz^GZ]);
181
else if(Xtrans && !Ytrans) {
182
/* CAUTION: We need to accumulate the results of DGEMM for
183
each bucket in this case. So, we set beta="user value"
184
on the first bucket, but beta=1 for every bucket
186
if(Z->params->coltot[Hz^GZ] && Z->params->rowtot[Hz] && rows_per_bucket)
187
C_DGEMM('t', 'n', Z->params->rowtot[Hz], Z->params->coltot[Hz^GZ],
188
rows_per_bucket, alpha, &(X->matrix[Hx][0][0]), X->params->coltot[Hx^GX],
189
&(Y->matrix[Hy][n*rows_per_bucket][0]), Y->params->coltot[Hy^GY], (n==0 ? beta : 1.0),
190
&(Z->matrix[Hz][0][0]), Z->params->coltot[Hz^GZ]);
197
dpd_buf4_mat_irrep_rd_block(X, Hx, n*rows_per_bucket, rows_left);
199
if(!Xtrans && Ytrans) {
200
if(Z->params->coltot[Hz^GZ] && rows_left && numlinks[Hx^symlink])
201
C_DGEMM('n', 't', rows_left, Z->params->coltot[Hz^GZ],
202
numlinks[Hx^symlink], alpha, &(X->matrix[Hx][0][0]), numlinks[Hx^symlink],
203
&(Y->matrix[Hy][0][0]), numlinks[Hx^symlink], beta,
204
&(Z->matrix[Hz][n*rows_per_bucket][0]), Z->params->coltot[Hz^GZ]);
206
else if(Xtrans && !Ytrans) {
207
if(Z->params->coltot[Hz^GZ] && Z->params->rowtot[Hz] && rows_left)
208
C_DGEMM('t', 'n', Z->params->rowtot[Hz], Z->params->coltot[Hz^GZ],
209
rows_left, alpha, &(X->matrix[Hx][0][0]), X->params->coltot[Hx^GX],
210
&(Y->matrix[Hy][n*rows_per_bucket][0]), Y->params->coltot[Hy^GY], 1.0,
211
&(Z->matrix[Hz][0][0]), Z->params->coltot[Hz^GZ]);
216
dpd_buf4_mat_irrep_close_block(X, Hx, rows_per_bucket);
218
dpd_buf4_mat_irrep_close(Y, Hy);
219
dpd_buf4_mat_irrep_wrt(Z, Hz);
220
dpd_buf4_mat_irrep_close(Z, Hz);