4
#include <libipv1/ip_lib.h>
5
#include <libciomr/libciomr.h>
6
#include <libiwl/iwl.h>
13
#include "yoshimine.h"
16
#define INDEX(i,j) ((i>j) ? (ioff[(i)]+(j)) : (ioff[(j)]+(i)))
17
#define MAX0(a,b) (((a)>(b)) ? (a) : (b))
19
/* transform_two_backtr_uhf(): Carry out the backtransformation of the
20
** AA, BB, and AB two-particle density matrices (twopdms) for
21
** UHF-based correlated wave functions:
23
** (1) Yoshimine presort the AA and AB G(pqrs) twopdms into two
24
** supermatrices, with p>=q, r>=s, but with all pq and rs (just like
25
** the usual presort for integral transformations in transform_two.c
26
** and transform_two_uhf.c).
28
** (2) With a single set of p,q,r,s loops, half-transform the AA and
29
** AB twopdms to the AO basis, forming G(pq,ij), and Yoshimine sort
30
** the results into the transposed form, G(ij,pq). Note that since
31
** both twopdms are now ordered (alpha-MO pq x AO ij), they are
32
** combined during the Yoshimine transposition.
34
** (3) Yoshimine presort the BB G(pqrs) as in step (1).
36
** (4) Half-transform the BB twopdm into G(pq,ij) and Yoshimine sort
37
** the result into G(ij,pq).
39
** (5) With a single set of i,j,p,q loops, complete the second
40
** half-transform for both the AA+AB and BB twopdms. Since the final
41
** targets are entirely in the AO basis, they are combined during the
42
** final dump to disk.
44
** (6) The total AO-basis twopdm is Yoshimine sorted into the
45
** "shell-quartet" ordering needed by the derivative integral code.
48
** NB that this structure is (necessarily) very different from the
49
** forward UHF-based transform of the two-electron integrals in
50
** transform_two_uhf.c, so I could not use the same code for forward
51
** and backwards transformations as was possible for the
52
** RHF/ROHF-based transforms in transform_two.c.
58
void transform_two_backtr_uhf(void)
61
long int maxcor, maxcord;
62
int max_buckets, first_tmp_file;
65
int src_orbs, src_ntri, *src_first, *src_last, *src_orbspi;
66
int dst_orbs, dst_ntri, *dst_first, *dst_last, *dst_orbspi, *dst_orbsym;
67
int p, q, psym, qsym, pfirst, plast, qfirst, qlast, pq, pqsym;
68
int r, s, rsym, ssym, rfirst, rlast, sfirst, slast, rs;
69
int i, j, isym, jsym, ifirst, ilast, jfirst, jlast, ij;
70
int k, l, ksym, lsym, kfirst, klast, lfirst, llast, kl, klsym;
71
int P, Q, R, S, K, L, I, J;
72
struct yoshimine YBuffP;
73
struct yoshimine YBuffJ;
74
struct iwlbuf PAA_Buff, PAB_Buff, PBB_Buff;
75
struct iwlbuf JA_Buff, JB_Buff;
76
struct iwlbuf *twopdm_out;
77
double *PAA_block, *PAB_block, *PBB_block, *J_block, *JA_block, *JB_block;
78
double **A_AA, **A_AB, **A_BB, **B, ***CA, ***CB;
79
int A_cols, B_cols, *C_colspi;
81
int ktr, ltr, *reorder, ntei;
82
struct iwlbuf MBuff, JBuff;
83
double *ints, **dens, energy;
86
double *ab_block, **ab_dens1, **ab_dens2, **ab_ints;
88
double AA_norm=0.0, AB_norm=0.0, BB_norm=0.0, AA_norm1=0.0, BB_norm1=0.0;
90
nirreps = moinfo.backtr_nirreps;
91
maxcor = params.maxcor;
92
maxcord = params.maxcord;
93
max_buckets = params.max_buckets;
94
first_tmp_file = params.first_tmp_file;
95
tolerance = params.tolerance;
96
print_lvl = params.print_lvl;
98
src_first = moinfo.backtr_mo_first;
99
src_last = moinfo.backtr_mo_lstact;
100
src_orbspi = moinfo.backtr_mo_active;
101
src_orbs = moinfo.nmo - moinfo.nfzv;
102
src_ntri = src_orbs * (src_orbs+1)/2;
104
dst_first = moinfo.backtr_ao_first;
105
dst_last = moinfo.backtr_ao_last;
106
dst_orbspi = moinfo.backtr_ao_orbspi;
107
dst_orbsym = moinfo.backtr_ao_orbsym;
108
dst_orbs = moinfo.nao;
109
dst_ntri = dst_orbs * (dst_orbs+1)/2;
111
C_colspi = src_orbspi;
113
/** Presort the AA twopdm **/
115
if(params.print_lvl) {
116
fprintf(outfile, "\n\tPre-sorting AA two-particle density...\n\n"); fflush(outfile);
119
yosh_init(&YBuffP, src_ntri, src_ntri, maxcor, maxcord, max_buckets,
120
first_tmp_file, tolerance, outfile);
121
if(print_lvl > 1) { yosh_print(&YBuffP, outfile); fflush(outfile); }
122
yosh_init_buckets(&YBuffP);
123
yosh_rdtwo_backtr_uhf("AA", &YBuffP, PSIF_MO_AA_TPDM, ioff, 1, 1, 1, 0, outfile);
124
yosh_close_buckets(&YBuffP, 0);
125
yosh_sort(&YBuffP, PSIF_AA_PRESORT, 0, ioff, NULL, src_orbs, src_ntri, 0, 1, 0, 0, 1, 0, outfile);
128
/** Presort the AB twopdm **/
130
if(params.print_lvl) {
131
fprintf(outfile, "\n\tPre-sorting AB two-particle density...\n\n"); fflush(outfile);
134
yosh_init(&YBuffP, src_ntri, src_ntri, maxcor, maxcord, max_buckets,
135
first_tmp_file, tolerance, outfile);
136
if(print_lvl > 1) { yosh_print(&YBuffP, outfile); fflush(outfile); }
137
yosh_init_buckets(&YBuffP);
138
yosh_rdtwo_backtr_uhf("AB", &YBuffP, PSIF_MO_AB_TPDM, ioff, 0, 1, 1, 0, outfile);
139
yosh_close_buckets(&YBuffP, 0);
140
yosh_sort(&YBuffP, PSIF_AB_PRESORT, 0, ioff, NULL, src_orbs, src_ntri, 0, 1, 0, 0, 1, 0, outfile);
143
A_cols = MAX0(src_orbs, dst_orbs);
144
A_AA = block_matrix(A_cols,A_cols);
145
A_AB = block_matrix(A_cols,A_cols);
146
A_BB = block_matrix(A_cols,A_cols);
148
B = block_matrix(src_orbs, dst_orbs);
150
CA = moinfo.evects_alpha;
151
CB = moinfo.evects_beta;
153
/* Try an in-core backtr of the AA density */
155
dim = MAX0(src_ntri, dst_ntri);
157
ab_dens1 = block_matrix(dim, dim);
158
ab_dens2 = block_matrix(dim, dim);
160
ab_block = init_array(dim);
161
iwl_buf_init(&JBuff, PSIF_AA_PRESORT, tolerance, 1, 1);
162
for(p=0, pq=0; p < src_orbs; p++) {
163
for(q=0; q <= p; q++, pq++) {
165
zero_arr(ab_block, src_ntri);
166
iwl_buf_rd(&JBuff, pq, ab_block, ioff, ioff, 0, 0, outfile);
168
for(r=0,rs=0; r < src_orbs; r++) {
169
for(s=0; s <= r; s++,rs++) {
170
ab_dens1[pq][rs] = ab_block[rs];
175
iwl_buf_close(&JBuff, 1);
180
fprintf(outfile, "\n\tMO-basis AA SCF Twopdm:\n");
181
print_mat(ab_dens1, dim, dim, outfile);
184
/* Check energy in the MO basis */
186
ntei = src_ntri * (src_ntri + 1)/2;
187
ints = init_array(ntei);
188
iwl_buf_init(&JBuff, PSIF_MO_AA_TEI, tolerance, 1, 0);
189
iwl_buf_rd_all(&JBuff, ints, ioff, ioff, 0, ioff, 0, outfile);
190
iwl_buf_close(&JBuff, 1);
192
for(p=0; p < src_orbs; p++)
193
for(q=0; q < src_orbs ; q++) {
195
for(r=0; r < src_orbs; r++) {
196
for(s=0; s < src_orbs; s++) {
199
energy += ab_dens1[pq][rs] * ints[pqrs];
203
fprintf(outfile, "\n\tAA energy from MO-twopdm: %20.14f\n", energy);
207
for(p=0, pq=0; p < src_orbs; p++) {
208
for(q=0; q <= p; q++, pq++) {
210
for(r=0,rs=0; r < src_orbs; r++) {
211
for(s=0; s <= r; s++,rs++) {
212
A_AB[r][s] = A_AB[s][r] = ab_dens1[pq][rs];
216
C_DGEMM('n','t', src_orbs, dst_orbs, src_orbs, 1.0, A_AB[0], A_cols,
217
CA[0][0], src_orbs, 0.0, B[0], B_cols);
218
zero_mat(A_AB, A_cols, A_cols);
219
C_DGEMM('n','n', dst_orbs, dst_orbs, src_orbs, 1.0, CA[0][0], src_orbs,
220
B[0], B_cols, 0.0, A_AB[0], A_cols);
222
for(r=0, rs=0; r < dst_orbs; r++) {
223
for(s=0; s <= r; s++,rs++) {
224
ab_dens1[pq][rs] = A_AB[r][s];
225
AA_norm += A_AB[r][s] * A_AB[r][s];
231
fprintf(outfile, "\n\tAA_norm (1st half) = %20.15f\n", AA_norm);
234
for(p=0, pq=0; p < src_orbs; p++) {
235
for(q=0; q <= p; q++, pq++) {
236
for(r=0,rs=0; r < dst_orbs; r++) {
237
for(s=0; s <= r; s++,rs++) {
238
ab_dens2[rs][pq] = ab_dens1[pq][rs];
245
for(r=0,rs=0; r < dst_orbs; r++) {
246
for(s=0; s <= r; s++,rs++) {
248
for(p=0, pq=0; p < src_orbs; p++) {
249
for(q=0; q <= p; q++,pq++) {
250
A_AB[p][q] = A_AB[q][p] = ab_dens2[rs][pq];
254
C_DGEMM('n','t',src_orbs,dst_orbs,src_orbs,1.0,A_AB[0], A_cols,
255
CA[0][0], src_orbs, 0.0, B[0], B_cols);
256
zero_mat(A_AB, A_cols, A_cols);
257
C_DGEMM('n','n',dst_orbs,dst_orbs,src_orbs,1.0, CA[0][0], src_orbs,
258
B[0], B_cols, 0.0, A_AB[0], A_cols);
260
for(p=0,pq=0; p < dst_orbs; p++) {
261
for(q=0; q <= p; q++,pq++) {
262
ab_dens2[rs][pq] = A_AB[p][q];
263
AA_norm += A_AB[p][q] * A_AB[p][q];
270
fprintf(outfile, "\n\tAA_norm (2nd half) = %20.15f\n", AA_norm);
274
/* compute the AA energy to test above backtr */
276
ntei = dst_ntri * (dst_ntri + 1)/2;
277
ints = init_array(ntei);
278
iwl_buf_init(&JBuff, PSIF_SO_TEI, tolerance, 1, 0);
279
iwl_buf_rd_all(&JBuff, ints, ioff, ioff, 0, ioff, 0, outfile);
280
iwl_buf_close(&JBuff, 1);
283
for(p=0; p < dst_orbs; p++) {
284
for(q=0; q < dst_orbs; q++) {
286
for(r=0; r < dst_orbs; r++) {
287
for(s=0; s < dst_orbs; s++) {
290
energy += ab_dens2[pq][rs] * ints[pqrs];
295
fprintf(outfile, "\n\tAA energy from AO-twopdm: %20.14f\n", energy);
298
free_block(ab_dens1);
299
free_block(ab_dens2);
303
/* Try an in-core backtr of the AB density */
305
dim = MAX0(src_ntri, dst_ntri);
307
ab_dens1 = block_matrix(dim, dim);
308
ab_dens2 = block_matrix(dim, dim);
310
ab_block = init_array(dim);
311
iwl_buf_init(&JBuff, PSIF_AB_PRESORT, tolerance, 1, 1);
312
for(p=0, pq=0; p < src_orbs; p++) {
313
for(q=0; q <= p; q++, pq++) {
315
zero_arr(ab_block, src_ntri);
316
iwl_buf_rd(&JBuff, pq, ab_block, ioff, ioff, 0, 0, outfile);
318
for(r=0,rs=0; r < src_orbs; r++) {
319
for(s=0; s <= r; s++,rs++) {
320
ab_dens1[pq][rs] = ab_block[rs];
325
iwl_buf_close(&JBuff, 1);
330
fprintf(outfile, "\n\tMO-basis AB SCF Twopdm:\n");
331
print_mat(ab_dens1, dim, dim, outfile);
334
/* Check energy in the MO basis */
336
ntei = src_ntri * (src_ntri + 1)/2;
337
ab_ints = block_matrix(dim,dim);
338
iwl_buf_init(&JBuff, PSIF_MO_AB_TEI, tolerance, 1, 0);
339
iwl_buf_rd_all2(&JBuff, ab_ints, ioff, ioff, 0, ioff, 0, outfile);
340
iwl_buf_close(&JBuff, 1);
342
for(p=0; p < src_orbs; p++) {
343
for(q=0; q < src_orbs; q++) {
345
for(r=0; r < src_orbs; r++) {
346
for(s=0; s < src_orbs; s++) {
351
energy += ab_dens1[pq][rs] * ab_ints[pq][rs];
356
fprintf(outfile, "\n\tAB energy from MO-twopdm: %20.14f\n", energy);
360
for(p=0, pq=0; p < src_orbs; p++) {
361
for(q=0; q <= p; q++, pq++) {
363
for(r=0,rs=0; r < src_orbs; r++) {
364
for(s=0; s <= r; s++,rs++) {
365
A_AB[r][s] = A_AB[s][r] = ab_dens1[pq][rs];
369
C_DGEMM('n','t', src_orbs, dst_orbs, src_orbs, 1.0, A_AB[0], A_cols,
370
CB[0][0], src_orbs, 0.0, B[0], B_cols);
371
zero_mat(A_AB, A_cols, A_cols);
372
C_DGEMM('n','n', dst_orbs, dst_orbs, src_orbs, 1.0, CB[0][0], src_orbs,
373
B[0], B_cols, 0.0, A_AB[0], A_cols);
375
for(r=0, rs=0; r < dst_orbs; r++) {
376
for(s=0; s <= r; s++,rs++) {
377
ab_dens1[pq][rs] = A_AB[r][s];
383
for(p=0, pq=0; p < src_orbs; p++) {
384
for(q=0; q <= p; q++, pq++) {
385
for(r=0,rs=0; r < dst_orbs; r++) {
386
for(s=0; s <= r; s++,rs++) {
387
ab_dens2[rs][pq] = ab_dens1[pq][rs];
394
for(r=0,rs=0; r < dst_orbs; r++) {
395
for(s=0; s <= r; s++,rs++) {
397
for(p=0, pq=0; p < src_orbs; p++) {
398
for(q=0; q <= p; q++,pq++) {
399
A_AB[p][q] = A_AB[q][p] = ab_dens2[rs][pq];
403
C_DGEMM('n','t',src_orbs,dst_orbs,src_orbs,1.0,A_AB[0], A_cols,
404
CA[0][0], src_orbs, 0.0, B[0], B_cols);
405
zero_mat(A_AB, A_cols, A_cols);
406
C_DGEMM('n','n',dst_orbs,dst_orbs,src_orbs,1.0, CA[0][0], src_orbs,
407
B[0], B_cols, 0.0, A_AB[0], A_cols);
409
for(p=0,pq=0; p < dst_orbs; p++) {
410
for(q=0; q <= p; q++,pq++) {
411
ab_dens2[rs][pq] = A_AB[p][q];
419
/* compute to the AB energy to test above backtr */
421
ntei = dst_ntri * (dst_ntri + 1)/2;
422
ints = init_array(ntei);
423
iwl_buf_init(&JBuff, PSIF_SO_TEI, tolerance, 1, 0);
424
iwl_buf_rd_all(&JBuff, ints, ioff, ioff, 0, ioff, 0, outfile);
425
iwl_buf_close(&JBuff, 1);
428
for(p=0; p < dst_orbs; p++) {
429
for(q=0; q < dst_orbs; q++) {
431
for(r=0; r < dst_orbs; r++) {
432
for(s=0; s < dst_orbs; s++) {
437
energy += ab_dens2[pq][rs] * ints[pqrs];
442
fprintf(outfile, "\n\tAB energy from AO-twopdm: %20.14f\n", energy);
445
free_block(ab_dens1);
446
free_block(ab_dens2);
449
/** AA/AB First-half transformation **/
450
fprintf(outfile, "\n\tBeginning AA/AB twopdm transform...\n");
452
PAA_block = init_array(src_ntri);
453
PAB_block = init_array(src_ntri);
454
iwl_buf_init(&PAA_Buff, PSIF_AA_PRESORT, tolerance, 1, 1);
455
iwl_buf_init(&PAB_Buff, PSIF_AB_PRESORT, tolerance, 1, 1);
457
J_block = init_array(MAX0(src_ntri,dst_ntri));
458
yosh_init(&YBuffJ, dst_ntri, src_ntri, maxcor, maxcord, max_buckets,
459
first_tmp_file, tolerance, outfile);
460
if(print_lvl > 1) { yosh_print(&YBuffJ, outfile); fflush(outfile); }
461
yosh_init_buckets(&YBuffJ);
463
for(psym=0; psym < nirreps; psym++) {
464
pfirst = src_first[psym];
465
plast = src_last[psym];
466
for (p=pfirst; p <= plast; p++) {
467
for (qsym=0; qsym <= psym; qsym++) {
468
qfirst = src_first[qsym];
469
qlast = src_last[qsym];
471
for (q=qfirst; (q<=qlast) && (q <= p); q++) {
474
zero_arr(PAA_block, src_ntri);
475
zero_arr(PAB_block, src_ntri);
477
iwl_buf_rd(&PAA_Buff, pq, PAA_block, ioff, ioff, 0, 0, outfile);
478
iwl_buf_rd(&PAB_Buff, pq, PAB_block, ioff, ioff, 0, 0, outfile);
480
for (rsym=0; rsym < nirreps; rsym++) {
481
rfirst = src_first[rsym];
482
rlast = src_last[rsym];
483
kfirst = dst_first[rsym];
484
klast = dst_last[rsym];
487
sfirst = src_first[ssym];
488
slast = src_last[ssym];
489
lfirst = dst_first[ssym];
490
llast = dst_last[ssym];
492
for(r=rfirst,R=0; r <= rlast; r++,R++) {
493
for(s=sfirst,S=0; (s <= slast) && (s <= r);
496
A_AA[R][S] = PAA_block[rs];
497
A_AA[S][R] = PAA_block[rs];
498
A_AB[R][S] = PAB_block[rs];
499
A_AB[S][R] = PAB_block[rs];
504
for (r=rfirst,R=0; r <= rlast; r++,R++) {
505
for (s=sfirst,S=0; s <= slast; s++,S++) {
507
A_AA[R][S] = PAA_block[rs];
508
A_AB[R][S] = PAB_block[rs];
513
/** AA half-transform for current pq **/
514
if(C_colspi[ssym] > 0)
515
C_DGEMM('n','t',src_orbspi[rsym],dst_orbspi[ssym],src_orbspi[ssym],1.0,
516
A_AA[0], A_cols,CA[ssym][0],C_colspi[ssym],0.0,B[0],B_cols);
517
zero_mat(A_AA, A_cols, A_cols);
518
if(C_colspi[rsym] > 0)
519
C_DGEMM('n','n',dst_orbspi[rsym],dst_orbspi[ssym],src_orbspi[rsym],1.0,
520
CA[rsym][0],C_colspi[rsym],B[0],B_cols,0.0,A_AA[0],A_cols);
522
/** AB half-transform for current pq **/
523
if(C_colspi[ssym] > 0)
524
C_DGEMM('n','t',src_orbspi[rsym],dst_orbspi[ssym],src_orbspi[ssym],1.0,
525
A_AB[0], A_cols,CB[ssym][0],C_colspi[ssym],0.0,B[0],B_cols);
526
zero_mat(A_AB, A_cols, A_cols);
527
if(C_colspi[rsym] > 0)
528
C_DGEMM('n','n',dst_orbspi[rsym],dst_orbspi[ssym],src_orbspi[rsym],1.0,
529
CB[rsym][0],C_colspi[rsym],B[0],B_cols,0.0,A_AB[0],A_cols);
531
/* collect the results and sum AA and AB contributions into J_block */
532
zero_arr(J_block, dst_ntri);
533
for(k=kfirst,K=0; k <= klast; k++,K++) {
534
for (l=lfirst,L=0; (l <= llast) && (l <= k); l++,L++) {
536
J_block[kl] = A_AA[K][L] + A_AB[K][L];
540
yosh_wrt_arr(&YBuffJ, p, q, pq, pqsym, J_block,
541
moinfo.nao, ioff, dst_orbsym, dst_first, dst_last, 1, 0, outfile);
550
iwl_buf_close(&PAA_Buff, 0);
551
iwl_buf_close(&PAB_Buff, 0);
555
if (params.print_lvl) {
556
fprintf(outfile, "\tSorting AA/AB half-transformed twopdm...\n"); fflush(outfile);
559
yosh_close_buckets(&YBuffJ, 0);
560
yosh_sort(&YBuffJ, params.jfile, 0, ioff, NULL, src_orbs, src_ntri, 0, 1, 0, 0, 1, 0, outfile);
564
fprintf(outfile, "\tFinished AA/AB half-transformation...\n"); fflush(outfile);
567
/** Presort the BB twopdm **/
569
if(params.print_lvl) {
570
fprintf(outfile, "\n\tPre-sorting BB two-particle density...\n\n"); fflush(outfile);
573
yosh_init(&YBuffP, src_ntri, src_ntri, maxcor, maxcord, max_buckets,
574
first_tmp_file, tolerance, outfile);
575
if(print_lvl > 1) { yosh_print(&YBuffP, outfile); fflush(outfile); }
576
yosh_init_buckets(&YBuffP);
577
yosh_rdtwo_backtr_uhf("BB", &YBuffP, PSIF_MO_BB_TPDM, ioff, 1, 1, 1, 0, outfile);
578
yosh_close_buckets(&YBuffP, 0);
579
yosh_sort(&YBuffP, PSIF_BB_PRESORT, 0, ioff, NULL, src_orbs, src_ntri, 0, 1, 0, 0, 1, 0, outfile);
582
/* Try an in-core backtr of the BB density */
584
dim = MAX0(src_ntri, dst_ntri);
586
ab_dens1 = block_matrix(dim, dim);
587
ab_dens2 = block_matrix(dim, dim);
589
ab_block = init_array(dim);
590
iwl_buf_init(&JBuff, PSIF_BB_PRESORT, tolerance, 1, 1);
591
for(p=0, pq=0; p < src_orbs; p++) {
592
for(q=0; q <= p; q++, pq++) {
594
zero_arr(ab_block, src_ntri);
595
iwl_buf_rd(&JBuff, pq, ab_block, ioff, ioff, 0, 0, outfile);
597
for(r=0,rs=0; r < src_orbs; r++) {
598
for(s=0; s <= r; s++,rs++) {
599
ab_dens1[pq][rs] = ab_block[rs];
604
iwl_buf_close(&JBuff, 1);
609
fprintf(outfile, "\n\tMO-basis BB SCF Twopdm:\n");
610
print_mat(ab_dens1, dim, dim, outfile);
613
/* Check energy in the MO basis */
615
ntei = src_ntri * (src_ntri + 1)/2;
616
ints = init_array(ntei);
617
iwl_buf_init(&JBuff, PSIF_MO_BB_TEI, tolerance, 1, 0);
618
iwl_buf_rd_all(&JBuff, ints, ioff, ioff, 0, ioff, 0, outfile);
619
iwl_buf_close(&JBuff, 1);
621
for(p=0; p < src_orbs; p++) {
622
for(q=0; q < src_orbs; q++) {
624
for(r=0; r < src_orbs; r++) {
625
for(s=0; s < src_orbs; s++) {
630
energy += ab_dens1[pq][rs] * ints[pqrs];
635
fprintf(outfile, "\n\tBB energy from MO-twopdm: %20.14f\n", energy);
639
for(p=0, pq=0; p < src_orbs; p++) {
640
for(q=0; q <= p; q++, pq++) {
642
for(r=0,rs=0; r < src_orbs; r++) {
643
for(s=0; s <= r; s++,rs++) {
644
A_AB[r][s] = A_AB[s][r] = ab_dens1[pq][rs];
648
C_DGEMM('n','t', src_orbs, dst_orbs, src_orbs, 1.0, A_AB[0], A_cols,
649
CB[0][0], src_orbs, 0.0, B[0], B_cols);
650
zero_mat(A_AB, A_cols, A_cols);
651
C_DGEMM('n','n', dst_orbs, dst_orbs, src_orbs, 1.0, CB[0][0], src_orbs,
652
B[0], B_cols, 0.0, A_AB[0], A_cols);
654
for(r=0, rs=0; r < dst_orbs; r++) {
655
for(s=0; s <= r; s++,rs++) {
656
ab_dens1[pq][rs] = A_AB[r][s];
657
BB_norm += A_AB[r][s] * A_AB[r][s];
663
fprintf(outfile, "\n\tBB_norm (1st half) = %20.15f\n", BB_norm);
666
for(p=0, pq=0; p < src_orbs; p++) {
667
for(q=0; q <= p; q++, pq++) {
668
for(r=0,rs=0; r < dst_orbs; r++) {
669
for(s=0; s <= r; s++,rs++) {
670
ab_dens2[rs][pq] = ab_dens1[pq][rs];
677
for(r=0,rs=0; r < dst_orbs; r++) {
678
for(s=0; s <= r; s++,rs++) {
680
for(p=0, pq=0; p < src_orbs; p++) {
681
for(q=0; q <= p; q++,pq++) {
682
A_AB[p][q] = A_AB[q][p] = ab_dens2[rs][pq];
686
C_DGEMM('n','t',src_orbs,dst_orbs,src_orbs,1.0,A_AB[0], A_cols,
687
CB[0][0], src_orbs, 0.0, B[0], B_cols);
688
zero_mat(A_AB, A_cols, A_cols);
689
C_DGEMM('n','n',dst_orbs,dst_orbs,src_orbs,1.0, CB[0][0], src_orbs,
690
B[0], B_cols, 0.0, A_AB[0], A_cols);
692
for(p=0,pq=0; p < dst_orbs; p++) {
693
for(q=0; q <= p; q++,pq++) {
694
ab_dens2[rs][pq] = A_AB[p][q];
695
BB_norm += A_AB[p][q] * A_AB[p][q];
702
fprintf(outfile, "\n\tBB_norm (2nd half) = %20.15f\n", BB_norm);
706
/* compute to the BB energy to test above backtr */
708
ntei = dst_ntri * (dst_ntri + 1)/2;
709
ints = init_array(ntei);
710
iwl_buf_init(&JBuff, PSIF_SO_TEI, tolerance, 1, 0);
711
iwl_buf_rd_all(&JBuff, ints, ioff, ioff, 0, ioff, 0, outfile);
712
iwl_buf_close(&JBuff, 1);
715
for(p=0; p < dst_orbs; p++) {
716
for(q=0; q < dst_orbs; q++) {
718
for(r=0; r < dst_orbs; r++) {
719
for(s=0; s < dst_orbs; s++) {
724
energy += ab_dens2[pq][rs] * ints[pqrs];
729
fprintf(outfile, "\n\tBB energy from AO-twopdm: %20.14f\n", energy);
732
free_block(ab_dens1);
733
free_block(ab_dens2);
736
/** BB First-half transformation **/
737
fprintf(outfile, "\n\tBeginning BB twopdm transform...\n");
739
PBB_block = init_array(src_ntri);
740
iwl_buf_init(&PBB_Buff, PSIF_BB_PRESORT, tolerance, 1, 1);
742
J_block = init_array(MAX0(src_ntri,dst_ntri));
743
yosh_init(&YBuffJ, dst_ntri, src_ntri, maxcor, maxcord, max_buckets,
744
first_tmp_file, tolerance, outfile);
745
if(print_lvl > 1) { yosh_print(&YBuffJ, outfile); fflush(outfile); }
746
yosh_init_buckets(&YBuffJ);
748
for(psym=0; psym < nirreps; psym++) {
749
pfirst = src_first[psym];
750
plast = src_last[psym];
751
for (p=pfirst; p <= plast; p++) {
752
for (qsym=0; qsym <= psym; qsym++) {
753
qfirst = src_first[qsym];
754
qlast = src_last[qsym];
756
for (q=qfirst; (q<=qlast) && (q <= p); q++) {
759
zero_arr(PBB_block, src_ntri);
761
iwl_buf_rd(&PBB_Buff, pq, PBB_block, ioff, ioff, 0, 0, outfile);
763
for (rsym=0; rsym < nirreps; rsym++) {
764
rfirst = src_first[rsym];
765
rlast = src_last[rsym];
766
kfirst = dst_first[rsym];
767
klast = dst_last[rsym];
770
sfirst = src_first[ssym];
771
slast = src_last[ssym];
772
lfirst = dst_first[ssym];
773
llast = dst_last[ssym];
775
for(r=rfirst,R=0; r <= rlast; r++,R++) {
776
for(s=sfirst,S=0; (s <= slast) && (s <= r);
779
A_BB[R][S] = PBB_block[rs];
780
A_BB[S][R] = PBB_block[rs];
785
for (r=rfirst,R=0; r <= rlast; r++,R++) {
786
for (s=sfirst,S=0; s <= slast; s++,S++) {
788
A_BB[R][S] = PBB_block[rs];
793
/** BB half-transform for current pq **/
794
if(C_colspi[ssym] > 0)
795
C_DGEMM('n','t',src_orbspi[rsym],dst_orbspi[ssym],src_orbspi[ssym],1.0,
796
A_BB[0], A_cols,CB[ssym][0],C_colspi[ssym],0.0,B[0],B_cols);
797
zero_mat(A_BB, A_cols, A_cols);
798
if(C_colspi[rsym] > 0)
799
C_DGEMM('n','n',dst_orbspi[rsym],dst_orbspi[ssym],src_orbspi[rsym],1.0,
800
CB[rsym][0],C_colspi[rsym],B[0],B_cols,0.0,A_BB[0],A_cols);
802
/* collect the results J_block */
803
zero_arr(J_block, dst_ntri);
804
for(k=kfirst,K=0; k <= klast; k++,K++) {
805
for (l=lfirst,L=0; (l <= llast) && (l <= k); l++,L++) {
807
J_block[kl] = A_BB[K][L];
811
yosh_wrt_arr(&YBuffJ, p, q, pq, pqsym, J_block,
812
moinfo.nao, ioff, dst_orbsym, dst_first, dst_last, 1, 0, outfile);
821
iwl_buf_close(&PBB_Buff, 0);
824
if (params.print_lvl) {
825
fprintf(outfile, "\tSorting BB half-transformed twopdm...\n"); fflush(outfile);
828
yosh_close_buckets(&YBuffJ, 0);
829
yosh_sort(&YBuffJ, params.jfile+1, 0, ioff, NULL, src_orbs, src_ntri, 0, 1, 0, 0, 1, 0, outfile);
833
fprintf(outfile, "\tFinished BB half-transformation...\n"); fflush(outfile);
837
fprintf(outfile, "\tStarting final half-transformation...\n"); fflush(outfile);
840
JA_block = init_array(MAX0(src_ntri,dst_ntri));
841
JB_block = init_array(MAX0(src_ntri,dst_ntri));
842
iwl_buf_init(&JA_Buff, params.jfile, tolerance, 1, 1);
843
iwl_buf_init(&JB_Buff, params.jfile+1, tolerance, 1, 1);
846
twopdm_out = (struct iwlbuf *) malloc(nbuckets * sizeof(struct iwlbuf));
847
for(p=0; p < nbuckets; p++)
848
iwl_buf_init(&twopdm_out[p], first_tmp_file+p, tolerance, 0, 0);
850
/* iwl_buf_init(&MBuff, 78, tolerance, 0, 0); */
853
reorder = init_int_array(dst_orbs);
854
for(p=0; p < dst_orbs; p++) reorder[p] = p;
857
for (ksym=0; ksym < nirreps; ksym++) {
858
kfirst = dst_first[ksym];
859
klast = dst_last[ksym];
860
for (k=kfirst; k <= klast; k++) {
861
for (lsym=0; lsym <= ksym; lsym++) {
862
lfirst = dst_first[lsym];
863
llast = dst_last[lsym];
865
for (l=lfirst; (l <= llast) && (l <= k); l++) {
868
zero_arr(JA_block, dst_ntri);
869
zero_arr(JB_block, dst_ntri);
870
iwl_buf_rd(&JA_Buff, kl, JA_block, ioff, ioff, 0, 0, outfile);
871
iwl_buf_rd(&JB_Buff, kl, JB_block, ioff, ioff, 0, 0, outfile);
873
for (psym=0; psym < nirreps; psym++) {
874
pfirst = src_first[psym];
875
plast = src_last[psym];
876
ifirst = dst_first[psym];
877
ilast = dst_last[psym];
880
qfirst = src_first[qsym];
881
qlast = src_last[qsym];
882
jfirst = dst_first[qsym];
883
jlast = dst_last[qsym];
884
for (p=pfirst,P=0; p <= plast; p++,P++) {
885
for (q=qfirst,Q=0; q <= qlast; q++,Q++) {
887
A_AA[P][Q] = JA_block[pq];
888
A_BB[P][Q] = JB_block[pq];
892
/** AA/AB second-half-transform for current pq **/
893
if (C_colspi[qsym] > 0)
894
C_DGEMM('n', 't', src_orbspi[psym],dst_orbspi[qsym], src_orbspi[qsym], 1.0,
895
A_AA[0], A_cols, CA[qsym][0], C_colspi[qsym], 0.0, B[0], B_cols);
896
zero_mat(A_AA, A_cols, A_cols);
897
if (C_colspi[psym] > 0)
898
C_DGEMM('n', 'n', dst_orbspi[psym], dst_orbspi[qsym], src_orbspi[psym], 1.0,
899
CA[psym][0], C_colspi[psym], B[0], B_cols, 0.0, A_AA[0], A_cols);
901
/** BB second-half-transform for current pq **/
902
if (C_colspi[qsym] > 0)
903
C_DGEMM('n', 't', src_orbspi[psym],dst_orbspi[qsym], src_orbspi[qsym], 1.0,
904
A_BB[0], A_cols, CB[qsym][0], C_colspi[qsym], 0.0, B[0], B_cols);
905
zero_mat(A_BB, A_cols, A_cols);
906
if (C_colspi[psym] > 0)
907
C_DGEMM('n', 'n', dst_orbspi[psym], dst_orbspi[qsym], src_orbspi[psym], 1.0,
908
CB[psym][0], C_colspi[psym], B[0], B_cols, 0.0, A_BB[0], A_cols);
910
/** combine AA/AB and BB transformed twopdm's for final sort **/
911
for (i=ifirst,I=0; i <= ilast; i++,I++) {
912
for (j=jfirst,J=0; j <= jlast; j++,J++) {
913
A_AA[I][J] += A_BB[I][J];
918
iwl_buf_wrt_mat2(&MBuff, k, l, A_AA, ifirst, ilast, jfirst, jlast, reorder, 0, 0,
922
backsort_write(k, l, A_AA, ifirst, ilast, jfirst, jlast, 0, outfile, twopdm_out, 1);
933
iwl_buf_close(&JA_Buff, 0);
934
iwl_buf_close(&JB_Buff, 0);
937
iwl_buf_flush(&MBuff, 1);
938
iwl_buf_close(&MBuff, 1);
947
for(p=0; p < nbuckets; p++) {
948
iwl_buf_flush(&twopdm_out[p], 1);
949
iwl_buf_close(&twopdm_out[p], 1);
953
fprintf(outfile, "\n\tSorting AO-basis twopdm...");
956
backsort(first_tmp_file, tolerance, 1);
958
fprintf(outfile, "\n\tdone.");
963
fprintf(outfile, "\n\tAA/AB/BB twopdm transformation finished.\n");
964
fprintf(outfile, "\tAO-basis twopdm written to file%d.\n",params.mfile);
969
ntei = dst_ntri * (dst_ntri + 1)/2;
970
ints = init_array(ntei);
971
dens = block_matrix(dst_ntri, dst_ntri);
973
iwl_buf_init(&MBuff, 78, tolerance, 1, 0);
974
iwl_buf_rd_all2(&MBuff, dens, ioff, ioff, 0, ioff, 0, outfile);
975
iwl_buf_init(&JBuff, PSIF_SO_TEI, tolerance, 1, 0);
976
iwl_buf_rd_all(&JBuff, ints, ioff, ioff, 0, ioff, 0, outfile);
978
for(p=0; p < dst_orbs; p++) {
979
for(q=0; q < dst_orbs; q++) {
981
for(r=0; r < dst_orbs; r++) {
982
for(s=0; s < dst_orbs; s++) {
985
energy += dens[pq][rs] * ints[pqrs];
990
fprintf(outfile, "\n\tTotal energy from AO-twopdm: %20.14f\n", energy);
991
iwl_buf_close(&MBuff, 1);
992
iwl_buf_close(&JBuff, 1);