30
30
long int size_Y, size_Z, size_file_X_row;
31
31
int incore, nbuckets;
32
32
long int memoryd, core, rows_per_bucket, rows_left, memtotal;
34
34
int *xrow, *xcol, *yrow, *ycol, *zrow, *zcol;
102
fprintf(stderr, "Contract444: memory information.\n");
103
fprintf(stderr, "Contract444: h = %d, row = %d, col = %d, tot = %d\n",
104
Hx, X->params->rowtot[Hx], X->params->coltot[Hx^GX],
105
X->params->rowtot[Hx] * X->params->coltot[Hx^GX]);
103
fprintf(stderr, "Contract444: memory information.\n");
104
fprintf(stderr, "Contract444: h = %d, row = %d, col = %d, tot = %d\n",
105
Hx, X->params->rowtot[Hx], X->params->coltot[Hx^GX],
106
X->params->rowtot[Hx] * X->params->coltot[Hx^GX]);
107
108
fprintf(stderr, "Contract444: nbuckets = %d\n", nbuckets);
108
109
fprintf(stderr, "Contract444: rows_per_bucket = %d\n",rows_per_bucket);
109
110
fprintf(stderr, "Contract444: rows_left = %d\n",rows_left);
114
115
fprintf(stderr, "Contract444: Need %5.2f MB to run in memory.\n",
115
116
((double) memtotal)*byte_conv);
116
117
dpd_file4_cache_print(stderr);
121
if(!incore && Xtrans) {
123
if(!incore && Xtrans) {
122
124
dpd_file4_cache_print(stderr);
123
125
dpd_error("out-of-core contract444 Xtrans=1 not coded", stderr);
127
130
dpd_buf4_mat_irrep_init(X, Hx);
143
146
&(Z->matrix[Hz][0][0]), Z->params->coltot[Hz^GZ]);
147
newmm(X->matrix[Hx], Xtrans, Y->matrix[Hy], Ytrans,
148
Z->matrix[Hz], Z->params->rowtot[Hz], numlinks[Hx^symlink],
149
Z->params->coltot[Hz^GZ], alpha, beta);
152
149
dpd_buf4_mat_irrep_close(X, Hx);
154
151
dpd_buf4_mat_irrep_wrt(Z, Hz);
161
fprintf(stderr, "Contract444: Problem! Out-of-core algorithm used, but Ytrans == 0!\n");
162
exit(PSI_RETURN_FAILURE);
157
/* out-of-core algorithm coded only for NT and TN arrangements, not NN or TT */
158
if(!Ytrans && !Xtrans || Ytrans && Xtrans) {
159
fprintf(stderr, "Out-of-core algorithm not yet coded for NN or TT DGEMM.\n");
160
dpd_error("contract444", stderr);
165
163
dpd_buf4_mat_irrep_init_block(X, Hx, rows_per_bucket);
174
172
dpd_buf4_mat_irrep_rd_block(X, Hx, n*rows_per_bucket, rows_per_bucket);
176
C_DGEMM('n', 't', rows_per_bucket, Z->params->coltot[Hz^GZ],
177
numlinks[Hx^symlink], alpha, &(X->matrix[Hx][0][0]), numlinks[Hx^symlink],
178
&(Y->matrix[Hy][0][0]), numlinks[Hx^symlink], beta,
179
&(Z->matrix[Hz][n*rows_per_bucket][0]), Z->params->coltot[Hz^GZ]);
174
if(!Xtrans && Ytrans) {
175
if(Z->params->coltot[Hz^GZ] && rows_per_bucket && numlinks[Hx^symlink])
176
C_DGEMM('n', 't', rows_per_bucket, Z->params->coltot[Hz^GZ],
177
numlinks[Hx^symlink], alpha, &(X->matrix[Hx][0][0]), numlinks[Hx^symlink],
178
&(Y->matrix[Hy][0][0]), numlinks[Hx^symlink], beta,
179
&(Z->matrix[Hz][n*rows_per_bucket][0]), Z->params->coltot[Hz^GZ]);
181
else if(Xtrans && !Ytrans) {
182
/* CAUTION: We need to accumulate the results of DGEMM for
183
each bucket in this case. So, we set beta="user value"
184
on the first bucket, but beta=1 for every bucket
186
if(Z->params->coltot[Hz^GZ] && Z->params->rowtot[Hz] && rows_per_bucket)
187
C_DGEMM('t', 'n', Z->params->rowtot[Hz], Z->params->coltot[Hz^GZ],
188
rows_per_bucket, alpha, &(X->matrix[Hx][0][0]), X->params->coltot[Hx^GX],
189
&(Y->matrix[Hy][n*rows_per_bucket][0]), Y->params->coltot[Hy^GY], (n==0 ? beta : 1.0),
190
&(Z->matrix[Hz][0][0]), Z->params->coltot[Hz^GZ]);
185
197
dpd_buf4_mat_irrep_rd_block(X, Hx, n*rows_per_bucket, rows_left);
187
C_DGEMM('n', 't', rows_left, Z->params->coltot[Hz^GZ],
188
numlinks[Hx^symlink], alpha, &(X->matrix[Hx][0][0]), numlinks[Hx^symlink],
189
&(Y->matrix[Hy][0][0]), numlinks[Hx^symlink], beta,
190
&(Z->matrix[Hz][n*rows_per_bucket][0]), Z->params->coltot[Hz^GZ]);
199
if(!Xtrans && Ytrans) {
200
if(Z->params->coltot[Hz^GZ] && rows_left && numlinks[Hx^symlink])
201
C_DGEMM('n', 't', rows_left, Z->params->coltot[Hz^GZ],
202
numlinks[Hx^symlink], alpha, &(X->matrix[Hx][0][0]), numlinks[Hx^symlink],
203
&(Y->matrix[Hy][0][0]), numlinks[Hx^symlink], beta,
204
&(Z->matrix[Hz][n*rows_per_bucket][0]), Z->params->coltot[Hz^GZ]);
206
else if(Xtrans && !Ytrans) {
207
if(Z->params->coltot[Hz^GZ] && Z->params->rowtot[Hz] && rows_left)
208
C_DGEMM('t', 'n', Z->params->rowtot[Hz], Z->params->coltot[Hz^GZ],
209
rows_left, alpha, &(X->matrix[Hx][0][0]), X->params->coltot[Hx^GX],
210
&(Y->matrix[Hy][n*rows_per_bucket][0]), Y->params->coltot[Hy^GY], 1.0,
211
&(Z->matrix[Hz][0][0]), Z->params->coltot[Hz^GZ]);
194
216
dpd_buf4_mat_irrep_close_block(X, Hx, rows_per_bucket);
196
218
dpd_buf4_mat_irrep_close(Y, Hy);