6
#include<libciomr/libciomr.h>
8
#include<libpsio/psio.h>
9
#include<libint/libint.h>
10
#include<libr12/libr12.h>
17
#include"r12_quartet_data.h"
18
#include"norm_quartet.h"
20
#include"quartet_permutations.h"
21
#include"rmp2r12_energy.h"
23
namespace psi{ namespace CINTS{ namespace ump2r12_bb{
25
/*-------------------------------------------------------
28
***Split into threads. Each thread do the following:
30
Loop over I batches (batch size num_i_per_batch) of active DOCC
32
Loop over all symmetry-unique shells UR, US<=UR
33
***if this UR, US is to be handled by this thread - proceed, else skip to next one
34
Find all symmetry-distinct shell doublets resulting from (UR US|
35
Loop over all resulting shell doublets R, S
37
Loop over all symmetry-unique shells UP, UQ<=UP
38
Find all symmetry-distinct shell quartets resulting from (R S|UP UQ)
39
Loop over the resulting set of P, Q doublets
40
Evaluate (RS|PQ), (RS|r12|PQ), and (RS|[r12,T1]|PQ)
41
Loop over p in P, q in Q, r in R, s in S, i in I
42
(rs|iq) += Cpi * (rs|pq)
43
(rs|ip) += Cqi * (rs|pq)
44
same for (rs|r12|pq) and (rs|[r12,T1]|pq)
45
End p, q, r, s, i loop
49
Loop over r in R, s in S
50
Loop over q < nao, x < num_mo, i in I
51
(rs|ix) += Cxs * (rs|is)
52
same for (rs|r12|is) and (rs|[r12,T1]|is)
55
***Lock (js| and (jr| (either individual or shell blocks depending on LOCK_RS_SHELL in rmp2r12_energy.h)
56
Loop over r in R, s in S
57
Loop over i in I, x < num_mo, j <= i
58
(js|ix) += Cjr * (rs|ix)
59
(jr|ix) += Cjs * (rs|ix)
60
same for (rs|r12|ix), but
61
(js|[r12,T1]|ix) += Cjr * (rs|[r12,T1]|ix)
62
(jr|[r12,T1]|ix) -= Cjs * (rs|[r12,T1]|ix) <---- Note the minus sign here!!!
65
***Unlock (js| and (jr|
70
***Barrier: threads wait until everyone is done
71
***Do the following in one thread only
72
Loop over i in I, j <= i
73
Loop over r < nao, x < num_mo, y < num_mo
74
(jy|ix) += Cys * (js|ix)
75
same for (js|r12|ix) and (js|[r12,T1]|ix)
83
-------------------------------------------------------*/
86
void *ump2r12_energy_thread_bb(void *tnum_ptr)
88
const long int thread_num = (long int) tnum_ptr;
89
const double toler = UserOptions.cutoff;
91
extern pthread_mutex_t rmp2r12_energy_mutex;
92
extern pthread_mutex_t *rmp2r12_sindex_mutex;
93
extern pthread_cond_t rmp2r12_energy_cond;
94
extern RMP2R12_Status_t RMP2R12_Status;
95
extern double *jsix_buf[NUM_TE_TYPES]; /* buffer for (js|ia) integrals, where j runs over all beta occ act MOs,
96
s runs over all AOs, i - over I-batch, x - over all MOs */
97
extern double *jyix_buf[NUM_TE_TYPES]; /* buffer contains all MP2-R12/A-type integrals (BB|BB) spin case*/
98
extern double *jsix_buf2; /* buffer for (js|ia) integrals, where j runs over all alpha occ act MOs,
99
s runs over all AOs, i - over I-batch, x - over all MOs */
100
extern double *jyix_buf2; /* buffer contains all (BB|[r12,T1]|AA) integrals*/
102
extern int *first, *last; /* first and last absolute (Pitzer) orbital indices in symblk */
103
extern int *fstocc_alpha, *lstocc_alpha; /* first and last occupied indices in Pitzer ordering for each symblk */
104
extern int *fstocc_beta, *lstocc_beta; /* first and last occupied indices in Pitzer ordering for each symblk */
105
extern int *occ_alpha, *occ_beta; /* Pitzer to "full"(no frozen core) QTS index mapping */
106
extern int *act2fullQTS_alpha; /* Maps "active"(taking frozen core into account) QTS into "frozen" QTS index */
107
extern int *act2fullQTS_beta; /* Maps "active"(taking frozen core into account) QTS into "frozen" QTS index */
108
extern int *ioff3; /* returns pointers to the beginning of rows in a rectangular matrix */
110
struct shell_pair *sp_ij, *sp_kl;
112
double_array_t fjt_table;
114
int ij, kl, ik, jl, ijkl;
115
int count, dum, status;
117
int n, num[NUM_TE_TYPES];
120
int pkblock_end_index = -1;
121
int i, j, m, p, q, r, s, x, y;
122
int isym, jsym, xsym, ysym;
123
int p_abs, q_abs, r_abs, s_abs;
125
int sii, sjj, skk, sll , slll;
126
int num_ij, swap_ij_kl;
128
int *sj_arr, *sk_arr, *sl_arr;
129
int sr_fao, ss_fao, sp_fao, sq_fao;
130
int usii,usjj,uskk,usll,usi,usj,usk,usl,usij;
131
int stab_i,stab_j,stab_k,stab_l,stab_ij,stab_kl;
132
int *R_list, *S_list, *T_list;
134
int dcr_ij, dcr_kl, dcr_ijkl;
136
int num_unique_quartets;
138
int max_num_unique_quartets;
139
int max_num_prim_comb;
141
int size, class_size;
143
int max_cart_class_size;
145
int np_i, np_j, np_k, np_l;
148
int num_ibatch, num_i_per_ibatch, ibatch, ibatch_first, ibatch_length;
149
int imin, imax, jmin;
150
int max_bf_per_shell;
151
int mo_i, mo_j, mo_x, mo_y;
154
int rs_offset, rsi_offset, rsp_offset;
157
char ij_key_string[80];
158
int iounits[NUM_TE_TYPES]; /* integrals file numbers */
159
int iounits2[NUM_TE_TYPES]; /* integrals file numbers */
161
double *raw_data[NUM_TE_TYPES]; /* pointers to the unnormalized target quartet of integrals */
162
double *data[NUM_TE_TYPES]; /* pointers to the transformed normalized target quartet of integrals */
165
double ssss, ss_r12_ss;
168
double *rsiq_buf[NUM_TE_TYPES]; /* buffer for (rs|iq) integrals, where r,s run over shell sets,
169
i runs over I-batch, q runs over all AOs */
171
double *ix_block_ptr;
172
double *rsix_buf[NUM_TE_TYPES]; /* buffer for (rs|iq) integrals, where r,s run over shell sets,
173
i runs over I-batch, x runs over all MOs */
179
double temp1,temp2,*iq_row,*ip_row;
183
/*-------------------------
184
Allocate data structures
185
-------------------------*/
186
max_bf_per_shell = ioff[BasisSet.max_am];
187
max_cart_class_size = (max_bf_per_shell)*
191
max_num_unique_quartets = Symmetry.max_stab_index*
192
Symmetry.max_stab_index*
193
Symmetry.max_stab_index;
194
sj_arr = (int *)malloc(sizeof(int)*max_num_unique_quartets);
195
sk_arr = (int *)malloc(sizeof(int)*max_num_unique_quartets);
196
sl_arr = (int *)malloc(sizeof(int)*max_num_unique_quartets);
197
if (Symmetry.nirreps > 1)
198
max_class_size = max_cart_class_size;
199
#ifdef NONDOUBLE_INTS
200
for(i=0;i<NUM_TE_TYPES;i++)
201
raw_data[i] = init_array(max_cart_class_size);
203
max_num_prim_comb = (BasisSet.max_num_prims*
204
BasisSet.max_num_prims)*
205
(BasisSet.max_num_prims*
206
BasisSet.max_num_prims);
207
init_libr12(&Libr12,BasisSet.max_am-1,max_num_prim_comb);
208
init_fjt_table(&fjt_table);
210
num_i_per_ibatch = RMP2R12_Status.num_i_per_ibatch;
211
num_ibatch = RMP2R12_Status.num_ibatch;
212
ibatch_first = RMP2R12_Status.ibatch_first;
213
for(te_type=0;te_type<NUM_TE_TYPES-1;te_type++) {
214
rsiq_buf[te_type] = init_array(num_i_per_ibatch*BasisSet.num_ao*
215
max_bf_per_shell*max_bf_per_shell);
216
rsix_buf[te_type] = init_array(num_i_per_ibatch*MOInfo.num_mo*
217
max_bf_per_shell*max_bf_per_shell);
219
jy_buf = block_matrix(MOInfo.beta_occ,MOInfo.num_mo);
220
xy_buf = init_array(MOInfo.num_mo*MOInfo.num_mo);
221
scratch_buf = init_array(MAX(MOInfo.beta_act_occ*MOInfo.num_mo*
222
num_i_per_ibatch*MOInfo.num_mo,
223
MAX(max_cart_class_size,
224
num_i_per_ibatch*BasisSet.num_ao*
225
max_bf_per_shell*max_bf_per_shell)));
227
/*-----------------------------------
228
generate all unique shell quartets
229
-----------------------------------*/
230
/*--- I-batch loop ---*/
231
for (ibatch=ibatch_first;ibatch<num_ibatch;ibatch++) {
232
imin = ibatch * num_i_per_ibatch + MOInfo.nfrdocc;
233
imax = MIN( imin+num_i_per_ibatch , MOInfo.beta_occ );
234
ibatch_length = imax - imin;
235
jmin = MOInfo.nfrdocc;
237
fprintf(outfile," Pass #%d, MO %d through MO %d\n",ibatch,imin+1,imax);
240
/*--- "unique" R,S loop ---*/
242
for (usii=0; usii<Symmetry.num_unique_shells; usii++)
243
for (usjj=0; usjj<=usii; usjj++, usij++) {
244
/*--- Decide if this thread will do this ---*/
245
if ( usij%UserOptions.num_threads != thread_num )
247
usi = usii; usj = usjj;
249
/*----------------------------------------------------------------------
250
NOTE on swapping usi, usj, etc.:
252
For 2-electron integrals of Hermitian operators it does not matter
253
if we swap si and sj, or sk and sl, or even si,sj and sk,sl. It
254
matters here though since [r12,T1] is non-Hermitian! What we want
255
in the end are the integrals of the [r12,T1] operator of this type:
256
(si sj|[r12,T1]|sk sl). If we have to swap bra and ket in the process
257
we will end up with having to compute (sk sl|[r12,T2]|si sj) instead.
258
Therefore if we have to swap bra and ket (swap_ij_kl = 1), then we
259
have to take [r12,T2] integral instead and swap it's bra and ket back.
260
----------------------------------------------------------------------*/
262
/*--- As usual, swap order usi and usj according to their angular momenta ---*/
263
if(BasisSet.shells[Symmetry.us2s[usi]].am < BasisSet.shells[Symmetry.us2s[usj]].am){
269
sii = Symmetry.us2s[usi];
270
sjj = Symmetry.us2s[usj];
271
if (Symmetry.nirreps > 1) {
272
stab_i = Symmetry.atom_positions[BasisSet.shells[sii].center-1];
273
stab_j = Symmetry.atom_positions[BasisSet.shells[sjj].center-1];
274
stab_ij = Symmetry.GnG[stab_i][stab_j];
275
R_list = Symmetry.dcr[stab_i][stab_j];
276
num_ij = Symmetry.dcr_dim[stab_i][stab_j];
282
for(dcr_ij=0;dcr_ij<num_ij;dcr_ij++) {
283
if (Symmetry.nirreps > 1)
288
sj = BasisSet.shells[sjj].trans_vec[R]-1;
290
/*--- "Unique" P,Q loop ---*/
291
for (uskk=0; uskk<Symmetry.num_unique_shells; uskk++)
292
for (usll=0; usll<=uskk; usll++){
294
/*--- For each combination of unique shells generate "petit list" of shells ---*/
295
usk = uskk; usl = usll;
296
/*--- As usual, swap order usk and usl according to their angular momenta ---*/
297
if(BasisSet.shells[Symmetry.us2s[usk]].am < BasisSet.shells[Symmetry.us2s[usl]].am){
302
/*--- DO NOT SWAP bra and ket at this time. Do it later, in the main loop ---*/
303
if(BasisSet.shells[Symmetry.us2s[usi]].am + BasisSet.shells[Symmetry.us2s[usj]].am >
304
BasisSet.shells[Symmetry.us2s[usk]].am + BasisSet.shells[Symmetry.us2s[usl]].am)
309
skk = Symmetry.us2s[usk];
310
sll = Symmetry.us2s[usl];
311
if (Symmetry.nirreps > 1) { /*--- Non-C1 symmetry case ---*/
312
/*--- Generate the petite list of shell quadruplets using DCD approach of Davidson ---*/
313
stab_k = Symmetry.atom_positions[BasisSet.shells[skk].center-1];
314
stab_l = Symmetry.atom_positions[BasisSet.shells[sll].center-1];
315
stab_kl = Symmetry.GnG[stab_k][stab_l];
316
S_list = Symmetry.dcr[stab_k][stab_l];
317
T_list = Symmetry.dcr[stab_ij][stab_kl];
318
lambda_T = Symmetry.nirreps/Symmetry.dcr_deg[stab_ij][stab_kl];
320
memset(sj_arr,0,sizeof(int)*max_num_unique_quartets);
321
memset(sk_arr,0,sizeof(int)*max_num_unique_quartets);
322
memset(sl_arr,0,sizeof(int)*max_num_unique_quartets);
325
for(dcr_ijkl=0;dcr_ijkl<Symmetry.dcr_dim[stab_ij][stab_kl];dcr_ijkl++){
326
T = T_list[dcr_ijkl];
327
sk = BasisSet.shells[skk].trans_vec[T]-1;
328
slll = BasisSet.shells[sll].trans_vec[T]-1;
329
for(dcr_kl=0;dcr_kl<Symmetry.dcr_dim[stab_k][stab_l];dcr_kl++) {
331
sl = BasisSet.shells[slll].trans_vec[S]-1;
333
total_am = BasisSet.shells[si].am +
334
BasisSet.shells[sj].am +
335
BasisSet.shells[sk].am +
336
BasisSet.shells[sl].am;
337
/*-------------------------------------------------------------
338
Obviously redundant or zero cases should be eliminated here!
339
Right now only zero case is eliminated. Redundancies arising
340
in DCD approach when usi == usj etc. may be eliminated too
341
but lambda_T will have to be replaced by an array (it won't
342
the same for every shell quartet in petite list anymore).
343
-------------------------------------------------------------*/
345
(BasisSet.shells[si].center!=BasisSet.shells[sj].center)||
346
(BasisSet.shells[sj].center!=BasisSet.shells[sk].center)||
347
(BasisSet.shells[sk].center!=BasisSet.shells[sl].center)) {
354
} /* petite list is ready to be used */
355
num_unique_quartets = count;
357
else { /*--- C1 symmetry case ---*/
358
total_am = BasisSet.shells[si].am +
359
BasisSet.shells[usj].am +
360
BasisSet.shells[usk].am +
361
BasisSet.shells[usl].am;
363
(BasisSet.shells[si].center!=BasisSet.shells[usj].center)||
364
(BasisSet.shells[usj].center!=BasisSet.shells[usk].center)||
365
(BasisSet.shells[usk].center!=BasisSet.shells[usl].center)) {
366
num_unique_quartets = 1;
372
num_unique_quartets = 0;
375
/*----------------------------------
376
Compute the nonredundant quartets
377
----------------------------------*/
378
for(plquartet=0;plquartet<num_unique_quartets;plquartet++) {
380
sj = sj_arr[plquartet];
381
sk = sk_arr[plquartet];
382
sl = sl_arr[plquartet];
383
/*--- As usual, we have to order bra-ket so that ket has the largest angular momentum */
392
np_i = BasisSet.shells[si].n_prims;
393
np_j = BasisSet.shells[sj].n_prims;
394
np_k = BasisSet.shells[sk].n_prims;
395
np_l = BasisSet.shells[sl].n_prims;
396
orig_am[0] = BasisSet.shells[si].am-1;
397
orig_am[1] = BasisSet.shells[sj].am-1;
398
orig_am[2] = BasisSet.shells[sk].am-1;
399
orig_am[3] = BasisSet.shells[sl].am-1;
400
am = orig_am[0] + orig_am[1] + orig_am[2] + orig_am[3];
402
sp_ij = &(BasisSet.shell_pairs[si][sj]);
403
sp_kl = &(BasisSet.shell_pairs[sk][sl]);
405
Libr12.ShellQuartet.AB[0] = sp_ij->AB[0];
406
Libr12.ShellQuartet.AB[1] = sp_ij->AB[1];
407
Libr12.ShellQuartet.AB[2] = sp_ij->AB[2];
408
Libr12.ShellQuartet.CD[0] = sp_kl->AB[0];
409
Libr12.ShellQuartet.CD[1] = sp_kl->AB[1];
410
Libr12.ShellQuartet.CD[2] = sp_kl->AB[2];
411
Libr12.ShellQuartet.AC[0] = Molecule.centers[BasisSet.shells[si].center-1].x-
412
Molecule.centers[BasisSet.shells[sk].center-1].x;
413
Libr12.ShellQuartet.AC[1] = Molecule.centers[BasisSet.shells[si].center-1].y-
414
Molecule.centers[BasisSet.shells[sk].center-1].y;
415
Libr12.ShellQuartet.AC[2] = Molecule.centers[BasisSet.shells[si].center-1].z-
416
Molecule.centers[BasisSet.shells[sk].center-1].z;
417
Libr12.ShellQuartet.ABdotAC = Libr12.ShellQuartet.AB[0]*Libr12.ShellQuartet.AC[0]+
418
Libr12.ShellQuartet.AB[1]*Libr12.ShellQuartet.AC[1]+
419
Libr12.ShellQuartet.AB[2]*Libr12.ShellQuartet.AC[2];
420
Libr12.ShellQuartet.CDdotCA = -1.0*(Libr12.ShellQuartet.CD[0]*Libr12.ShellQuartet.AC[0]+
421
Libr12.ShellQuartet.CD[1]*Libr12.ShellQuartet.AC[1]+
422
Libr12.ShellQuartet.CD[2]*Libr12.ShellQuartet.AC[2]);
423
AB2 = Libr12.ShellQuartet.AB[0]*Libr12.ShellQuartet.AB[0]+
424
Libr12.ShellQuartet.AB[1]*Libr12.ShellQuartet.AB[1]+
425
Libr12.ShellQuartet.AB[2]*Libr12.ShellQuartet.AB[2];
426
CD2 = Libr12.ShellQuartet.CD[0]*Libr12.ShellQuartet.CD[0]+
427
Libr12.ShellQuartet.CD[1]*Libr12.ShellQuartet.CD[1]+
428
Libr12.ShellQuartet.CD[2]*Libr12.ShellQuartet.CD[2];
430
/*--------------------------------
431
contract by primitives out here
432
--------------------------------*/
434
for (pi = 0; pi < np_i; pi++)
435
for (pj = 0; pj < np_j; pj++)
436
for (pk = 0; pk < np_k; pk++)
437
for (pl = 0; pl < np_l; pl++){
438
r12_quartet_data(&(Libr12.PrimQuartet[num_prim_comb++]), &fjt_table, AB2, CD2,
439
sp_ij, sp_kl, am, pi, pj, pk, pl, lambda_T);
443
build_r12_grt[orig_am[0]][orig_am[1]][orig_am[2]][orig_am[3]](&Libr12,num_prim_comb);
445
/*--- (usi usj|[r12,T1]|usk usl) = (usk usl|[r12,T2]|usi usj) ---*/
446
Libr12.te_ptr[2] = Libr12.te_ptr[3];
447
#ifdef NONDOUBLE_INTS
448
size = ioff[BasisSet.shells[si].am]*ioff[BasisSet.shells[sj].am]*
449
ioff[BasisSet.shells[sk].am]*ioff[BasisSet.shells[sl].am];
450
for(i=0;i<NUM_TE_TYPES-1;i++)
452
raw_data[i][j] = (double) Libr12.te_ptr[i][j];
454
for(i=0;i<NUM_TE_TYPES-1;i++)
455
raw_data[i] = Libr12.te_ptr[i];
457
/*--- Just normalize the integrals ---*/
458
for(te_type=0;te_type<NUM_TE_TYPES-1;te_type++)
459
data[te_type] = norm_quartet(raw_data[te_type], NULL, orig_am, 0);
464
for(p=0;p<num_prim_comb;p++) {
465
ssss += (double) Libr12.PrimQuartet[p].F[0];
466
ss_r12_ss += (double) Libr12.PrimQuartet[p].ss_r12_ss;
468
build_r12_grt[0][0][0][0](&Libr12,num_prim_comb);
469
#ifdef NONDOUBLE_INTS
470
raw_data[0][0] = ssss;
471
raw_data[1][0] = ss_r12_ss;
472
raw_data[2][0] = (double) Libr12.te_ptr[2][0];
473
raw_data[3][0] = (double) Libr12.te_ptr[3][0];
474
data[0] = raw_data[0];
475
data[1] = raw_data[1];
476
data[2] = raw_data[2];
477
data[3] = raw_data[3];
479
Libr12.int_stack[2] = Libr12.te_ptr[2][0];
480
Libr12.int_stack[3] = Libr12.te_ptr[3][0];
481
Libr12.int_stack[0] = ssss;
482
Libr12.int_stack[1] = ss_r12_ss;
483
data[0] = Libr12.int_stack;
484
data[1] = Libr12.int_stack+1;
485
data[2] = Libr12.int_stack+2;
486
data[3] = Libr12.int_stack+3;
491
Swap bra and ket back to the reversed order (PQ|RS) if not done yet
492
Need this to make Step 1 a (fast) vector operation!
493
NOTE: This changes only the way integrals are stored! No need to
494
worry about non-hermiticity of [r12,T1] here!!!
503
sr_fao = BasisSet.shells[si].fao - 1;
504
ss_fao = BasisSet.shells[sj].fao - 1;
505
sp_fao = BasisSet.shells[sk].fao - 1;
506
sq_fao = BasisSet.shells[sl].fao - 1;
507
nr = ioff[BasisSet.shells[si].am];
508
ns = ioff[BasisSet.shells[sj].am];
509
np = ioff[BasisSet.shells[sk].am];
510
nq = ioff[BasisSet.shells[sl].am];
512
No need to swap bra and ket since the quartet
513
was already computed in reverse order
517
sr_fao = BasisSet.shells[si].fao - 1;
518
ss_fao = BasisSet.shells[sj].fao - 1;
519
sp_fao = BasisSet.shells[sk].fao - 1;
520
sq_fao = BasisSet.shells[sl].fao - 1;
521
nr = ioff[BasisSet.shells[si].am];
522
ns = ioff[BasisSet.shells[sj].am];
523
np = ioff[BasisSet.shells[sk].am];
524
nq = ioff[BasisSet.shells[sl].am];
526
Need to swap bra and ket, but do
527
the actual permutation in te_type-loop
531
/*--- step 1 of the transformation ---*/
532
/* timer_on("Step 1");*/
533
for(te_type=0;te_type<NUM_TE_TYPES-1;te_type++) {
535
if bra and ket were not swapped before computing
536
the quartet, swap them now so that Step 1 is
539
if ((!swap_ij_kl) && am) {
540
/* timer_on("Pre1Swap");*/
541
/*--- (rs|pq) -> (pq|rs) ---*/
542
ijkl_to_klij(data[te_type],scratch_buf,nr*ns,np*nq);
543
data[te_type] = scratch_buf;
544
/* timer_off("Pre1Swap");*/
547
for(mo_i=0;mo_i<ibatch_length;mo_i++) {
548
mo_vec = MOInfo.scf_evec_occ_beta[mo_i+imin];
549
rspq_ptr = data[te_type];
550
for(p=0,p_abs=sp_fao;p<np;p++,p_abs++) {
551
for(q=0,q_abs=sq_fao;
554
ip_row = rsiq_buf[te_type] + ((mo_i*BasisSet.num_ao + p_abs) * nr * ns);
555
temp1 = mo_vec[q_abs];
557
for(rs=0;rs<nr*ns;rs++,rspq_ptr++,iq_row++,ip_row++) {
558
(*ip_row) += temp1 * (*rspq_ptr);
561
C_DAXPY(nr*ns,temp1,rspq_ptr,1,ip_row,1);
566
rspq_ptr = data[te_type];
567
for(p=0,p_abs=sp_fao;p<np;p++,p_abs++) {
568
temp = mo_vec[p_abs];
569
i_row = rsiq_buf[te_type] + ((mo_i*BasisSet.num_ao + sq_fao) * nr * ns);
571
for(qrs=0;qrs<nq*nr*ns;qrs++,i_row++,rspq_ptr++) {
572
(*i_row) += temp * (*rspq_ptr);
575
C_DAXPY(nq*nr*ns,temp,rspq_ptr,1,i_row,1);
576
rspq_ptr += nq*nr*ns;
581
for(mo_i=0;mo_i<ibatch_length;mo_i++) {
582
mo_vec = MOInfo.scf_evec_occ_beta[mo_i+imin];
583
rspq_ptr = data[te_type];
584
for(p=0,p_abs=sp_fao;p<np;p++,p_abs++) {
585
temp = mo_vec[p_abs];
586
i_row = rsiq_buf[te_type] + ((mo_i*BasisSet.num_ao + sq_fao) * nr * ns);
588
for(qrs=0;qrs<nq*nr*ns;qrs++,i_row++,rspq_ptr++) {
589
(*i_row) += temp * (*rspq_ptr);
592
C_DAXPY(nq*nr*ns,temp,rspq_ptr,1,i_row,1);
593
rspq_ptr += nq*nr*ns;
598
/* timer_off("Step 1");*/
600
} /* end of computing "petit" list - end of P,Q loop */
601
} /* end of "unique" P,Q loop */
603
/*--- step 2 of the transfromation ---*/
604
for(te_type=0;te_type<NUM_TE_TYPES-1;te_type++) {
605
/* timer_on("Post1Swap");*/
606
ijkl_to_klij(rsiq_buf[te_type],scratch_buf,ibatch_length*BasisSet.num_ao,nr*ns);
607
rsi_row = scratch_buf;
608
/* timer_off("Post1Swap");*/
609
ix_block_ptr = rsix_buf[te_type];
612
/*--- Can be done as a matrix multiply now ---*/
613
/* timer_on("Step 2");*/
616
for(mo_i=0;mo_i<ibatch_length;mo_i++,rsi_row+=BasisSet.num_ao) {
617
for(mo_x=0;mo_x<MOInfo.num_mo;mo_x++,ix++) {
618
mo_vec = MOInfo.scf_evec_beta[mo_x];
619
temp = ix_block_ptr[ix];
620
for(q_abs=0;q_abs<BasisSet.num_ao;q_abs++) {
621
temp += mo_vec[q_abs] * rsi_row[q_abs];
623
ix_block_ptr[ix] = temp;
627
C_DGEMM('n','t',ibatch_length,MOInfo.num_mo,BasisSet.num_ao,1.0,
628
rsi_row,BasisSet.num_ao,MOInfo.scf_evec_beta[0],BasisSet.num_ao,
629
0.0,ix_block_ptr,MOInfo.num_mo);
630
rsi_row += BasisSet.num_ao*ibatch_length;
632
ix_block_ptr += MOInfo.num_mo*ibatch_length;
633
/* timer_off("Step 2");*/
637
/*--- step 3 of the transformation, beta - beta spin case---*/
638
rsi_row = rsix_buf[te_type];
639
/*--- To update (JS|IA) need to lock mutex corresponding to the S and R indices ---*/
641
pthread_mutex_lock(&rmp2r12_sindex_mutex[INDEX(si,sj)]);
645
/* timer_on("Step 3");*/
648
for(mo_i=0;mo_i<ibatch_length;mo_i++,rsi_row+=MOInfo.num_mo) {
649
for(mo_j=0;mo_j<(te_type==2 ? MOInfo.beta_act_occ : mo_i);mo_j++) {
651
pthread_mutex_lock(&rmp2r12_sindex_mutex[s_abs]);
653
jsi_row = jsix_buf[te_type] + ((mo_j * BasisSet.num_ao + s_abs) * ibatch_length + mo_i) * MOInfo.num_mo;
654
temp = MOInfo.scf_evec_occ_beta[mo_j+jmin][r_abs];
655
for(mo_x=0;mo_x<MOInfo.num_mo;mo_x++) {
656
jsi_row[mo_x] += temp * rsi_row[mo_x];
659
pthread_mutex_unlock(&rmp2r12_sindex_mutex[s_abs]);
663
pthread_mutex_lock(&rmp2r12_sindex_mutex[r_abs]);
665
jsi_row = jsix_buf[te_type] + ((mo_j * BasisSet.num_ao + r_abs) * ibatch_length + mo_i) * MOInfo.num_mo;
666
temp = MOInfo.scf_evec_occ_beta[mo_j+jmin][s_abs];
667
if (te_type == 2) /*--- [r12,T1] integral - negative sign ---*/
669
for(mo_x=0;mo_x<MOInfo.num_mo;mo_x++) {
670
jsi_row[mo_x] += temp * rsi_row[mo_x];
673
pthread_mutex_unlock(&rmp2r12_sindex_mutex[r_abs]);
678
/* timer_off("Step 3");*/
682
pthread_mutex_unlock(&rmp2r12_sindex_mutex[INDEX(si,sj)]);
686
/*--- step 3 of the transformation, beta - alpha spin case
687
only for the integrals over the non-hermitian [r12,T1] operator ---*/
688
rsi_row = rsix_buf[te_type];
689
/*--- To update (JS|IA) need to lock mutex corresponding to the S and R indices ---*/
691
pthread_mutex_lock(&rmp2r12_sindex_mutex[INDEX(si,sj)]);
695
/* timer_on("Step 3");*/
698
for(mo_i=0;mo_i<ibatch_length;mo_i++,rsi_row+=MOInfo.num_mo) {
699
for(mo_j=0;mo_j<MOInfo.alpha_act_occ;mo_j++) {
701
pthread_mutex_lock(&rmp2r12_sindex_mutex[s_abs]);
703
jsi_row = jsix_buf2 + ((mo_j * BasisSet.num_ao + s_abs) * ibatch_length + mo_i) * MOInfo.num_mo;
704
temp = MOInfo.scf_evec_occ_alpha[mo_j+jmin][r_abs];
705
for(mo_x=0;mo_x<MOInfo.num_mo;mo_x++) {
706
jsi_row[mo_x] += temp * rsi_row[mo_x];
709
pthread_mutex_unlock(&rmp2r12_sindex_mutex[s_abs]);
713
pthread_mutex_lock(&rmp2r12_sindex_mutex[r_abs]);
715
jsi_row = jsix_buf2 + ((mo_j * BasisSet.num_ao + r_abs) * ibatch_length + mo_i) * MOInfo.num_mo;
716
temp = MOInfo.scf_evec_occ_alpha[mo_j+jmin][s_abs];
717
temp *= (-1.0); /*--- [r12,T1] integral - negative sign ---*/
718
for(mo_x=0;mo_x<MOInfo.num_mo;mo_x++) {
719
jsi_row[mo_x] += temp * rsi_row[mo_x];
722
pthread_mutex_unlock(&rmp2r12_sindex_mutex[r_abs]);
727
/* timer_off("Step 3");*/
731
pthread_mutex_unlock(&rmp2r12_sindex_mutex[INDEX(si,sj)]);
733
} /* end if(te_type==2)*/
734
memset(rsiq_buf[te_type],0,nr*ns*ibatch_length*BasisSet.num_ao*sizeof(double));
735
memset(rsix_buf[te_type],0,nr*ns*ibatch_length*MOInfo.num_mo*sizeof(double));
736
} /* end of loop over te_type */
737
} /* end of R,S loop */
738
} /* end of "unique" R,S loop */
740
pthread_mutex_lock(&rmp2r12_energy_mutex);
741
RMP2R12_Status.num_arrived++;
742
if (RMP2R12_Status.num_arrived != UserOptions.num_threads) { /*--- there are some threads still working - wait here --*/
743
pthread_cond_wait(&rmp2r12_energy_cond,&rmp2r12_energy_mutex);
744
pthread_mutex_unlock(&rmp2r12_energy_mutex);
746
else { /*--- this is the last thread to get here - do the 4th step and dumping integrals alone and wake everybody up ---*/
747
/*--- step 4 of the transformation ---*/
748
/* timer_on("Step 4");*/
749
for(te_type=0;te_type<NUM_TE_TYPES-1;te_type++) {
750
for(mo_i=0;mo_i<ibatch_length;mo_i++) {
751
for(mo_j=0;mo_j<(te_type==2 ? MOInfo.beta_act_occ : mo_i);mo_j++) {
752
for(mo_y=0;mo_y<MOInfo.num_mo;mo_y++) {
753
jyi_row = jyix_buf[te_type] + ((mo_j * MOInfo.num_mo + mo_y) * ibatch_length + mo_i) * MOInfo.num_mo;
754
for(s_abs=0;s_abs<BasisSet.num_ao;s_abs++) {
755
jsi_row = jsix_buf[te_type] + ((mo_j * BasisSet.num_ao + s_abs) * ibatch_length + mo_i) * MOInfo.num_mo;
756
temp = MOInfo.scf_evec_beta[mo_y][s_abs];
757
for(mo_x=0;mo_x<MOInfo.num_mo;mo_x++) {
758
jyi_row[mo_x] += temp * jsi_row[mo_x];
765
/* here we finish forming the (BB|AA) ints over the [r12,T1] operator */
766
for(mo_i=0;mo_i<ibatch_length;mo_i++) {
767
for(mo_j=0;mo_j<MOInfo.alpha_act_occ;mo_j++) {
768
for(mo_y=0;mo_y<MOInfo.num_mo;mo_y++) {
769
jyi_row = jyix_buf2 + ((mo_j * MOInfo.num_mo + mo_y) * ibatch_length + mo_i) * MOInfo.num_mo;
770
for(s_abs=0;s_abs<BasisSet.num_ao;s_abs++) {
771
jsi_row = jsix_buf2 + ((mo_j * BasisSet.num_ao + s_abs) * ibatch_length + mo_i) * MOInfo.num_mo;
772
temp = MOInfo.scf_evec_alpha[mo_y][s_abs];
773
for(mo_x=0;mo_x<MOInfo.num_mo;mo_x++) {
774
jyi_row[mo_x] += temp * jsi_row[mo_x];
780
/* timer_off("Step 4");*/
783
/*--- Print them out for now ---*/
784
for(te_type=0;te_type<NUM_TE_TYPES-1;te_type++) {
785
fprintf(outfile," Beta Beta Transformed integrals of type %d\n",te_type);
786
for(mo_i=0;mo_i<ibatch_length;mo_i++) {
787
for(mo_j=0;mo_j<(te_type==2 ? MOInfo.beta_act_occ : mo_i);mo_j++) {
788
for(mo_y=0;mo_y<MOInfo.num_mo;mo_y++) {
789
for(mo_x=0;mo_x<MOInfo.num_mo;mo_x++) {
790
temp = jyix_buf[te_type][((mo_j * MOInfo.num_mo + mo_y) * ibatch_length + mo_i) * MOInfo.num_mo + mo_x];
791
if (fabs(temp) > ZERO) {
793
fprintf(outfile,"<%d %d %d %d [%d] [%d] = %20.10lf\n",
794
mo_j+jmin,mo_y,mo_i+imin,mo_x,
795
mo_j*MOInfo.num_mo+mo_y,
796
mo_i*MOInfo.num_mo+mo_x,
799
fprintf(outfile,"<%d %d %d %d [%d] [%d] = %20.10lf\n",
800
mo_i+imin,mo_x,mo_j+jmin,mo_y,
801
mo_i*MOInfo.num_mo+mo_x,
802
mo_j*MOInfo.num_mo+mo_y,
810
fprintf(outfile," Beta Alpha Transformed integrals of type %d\n",2);
811
for(mo_i=0;mo_i<ibatch_length;mo_i++) {
812
for(mo_j=0;mo_j<MOInfo.alpha_act_occ;mo_j++) {
813
for(mo_y=0;mo_y<MOInfo.num_mo;mo_y++) {
814
for(mo_x=0;mo_x<MOInfo.num_mo;mo_x++) {
815
temp = jyix_buf2[((mo_j * MOInfo.num_mo + mo_y) * ibatch_length + mo_i) * MOInfo.num_mo + mo_x];
816
if (fabs(temp) > ZERO) {
817
fprintf(outfile,"<%d %d %d %d [%d] [%d] = %20.10lf\n",
818
mo_i+imin,mo_x,mo_j+jmin,mo_y,
819
mo_i*MOInfo.num_mo+mo_x,
820
mo_j*MOInfo.num_mo+mo_y,
828
/*--- Files are opened and closed each pass to ensure integrity of TOCs
829
if restart ever needed ---*/
830
iounits[0] = PSIF_MO_BB_TEI;
831
iounits[1] = PSIF_MO_BB_R12;
832
iounits[2] = PSIF_MO_BB_R12T1;
833
for(te_type=0;te_type<NUM_TE_TYPES-1;te_type++)
834
psio_open(iounits[te_type], (ibatch != 0) ? PSIO_OPEN_OLD : PSIO_OPEN_NEW);
836
/*--------------------------------------------------------
837
Dump all fully transformed integrals to disk. Zero out
838
non-symmetrical integrals (what's left of the Pitzer's
839
equal contribution theorem in Abelian case).
840
--------------------------------------------------------*/
841
for(te_type=0;te_type<NUM_TE_TYPES-1;te_type++) {
842
/*--------------------------------------------------------------------
843
Write integrals out in num_mo by num_mo batches corresponding to
845
--------------------------------------------------------------------*/
846
for(mo_i=0;mo_i<ibatch_length;mo_i++) {
847
i = act2fullQTS_beta[mo_i+imin]; /*--- mo_i+imin is the index in QTS "active" indexing scheme
848
we need i to be in QTS "full" indexing scheme, that's
849
what the MP2R12 code expects ---*/
850
isym = MOInfo.mo2symblk_occ_beta[mo_i+imin];
851
for(mo_j=0;mo_j<(te_type==2 ? MOInfo.beta_act_occ : mo_i);mo_j++) {
852
jsym = MOInfo.mo2symblk_occ_beta[mo_j+jmin];
853
j = act2fullQTS_beta[mo_j+jmin]; /*--- Again, get the "full" QTS index ---*/
854
sprintf(ij_key_string,"Block_%d_x_%d_y",i,j);
855
memset(xy_buf,0,MOInfo.num_mo*MOInfo.num_mo*sizeof(double));
856
/*--- Put all integrals with common i and j into a buffer ---*/
857
for(mo_x=0,xy=0;mo_x<MOInfo.num_mo;mo_x++) {
858
x = mo_x; /*--- The second index is a Pitzer index, that's fine ---*/
859
xsym = MOInfo.mo2symblk[x];
860
for(mo_y=0;mo_y<MOInfo.num_mo;mo_y++,xy++) {
861
y = mo_y; /*--- Again, the Pitzer index here is what we need ---*/
862
ysym = MOInfo.mo2symblk[y];
863
/*--- Skip this indegral if it's non-totally symmetric -
864
Pitzer's contribution theorem in Abelian case ---*/
865
if ((isym ^ jsym) ^ (xsym ^ ysym))
867
/*--- find the integral in ixjy_buf and put it in xy_buf ---*/
868
m = (((mo_j*MOInfo.num_mo + mo_y)*ibatch_length + mo_i)*MOInfo.num_mo + mo_x);
869
xy_buf[xy] = jyix_buf[te_type][m];
872
psio_write_entry(iounits[te_type], ij_key_string, (char *)xy_buf,
873
MOInfo.num_mo*MOInfo.num_mo*sizeof(double));
876
psio_close(iounits[te_type], 1);
879
/* Here we also write the beta - alpha [r12,T1] integrals */
880
iounits2[0] = PSIF_MO_BA_R12T1;
881
psio_open(iounits2[0], (ibatch != 0) ? PSIO_OPEN_OLD : PSIO_OPEN_NEW);
882
for(mo_i=0;mo_i<ibatch_length;mo_i++) {
883
i = act2fullQTS_beta[mo_i+imin]; /*--- mo_i+imin is the index in QTS "active" indexing scheme
884
we need i to be in QTS "full" indexing scheme, that's
885
what the MP2R12 code expects ---*/
886
isym = MOInfo.mo2symblk_occ_beta[mo_i+imin];
887
for(mo_j=0;mo_j<MOInfo.alpha_act_occ;mo_j++) {
888
jsym = MOInfo.mo2symblk_occ_alpha[mo_j+jmin];
889
j = act2fullQTS_alpha[mo_j+jmin]; /*--- Again, get the "full" QTS index ---*/
890
sprintf(ij_key_string,"Block_%d_x_%d_y",i,j);
891
memset(xy_buf,0,MOInfo.num_mo*MOInfo.num_mo*sizeof(double));
892
/*--- Put all integrals with common i and j into a buffer ---*/
893
for(mo_x=0,xy=0;mo_x<MOInfo.num_mo;mo_x++) {
894
x = mo_x; /*--- The second index is a Pitzer index, that's fine ---*/
895
xsym = MOInfo.mo2symblk[x];
896
for(mo_y=0;mo_y<MOInfo.num_mo;mo_y++,xy++) {
897
y = mo_y; /*--- Again, the Pitzer index here is what we need ---*/
898
ysym = MOInfo.mo2symblk[y];
899
/*--- Skip this indegral if it's non-totally symmetric -
900
Pitzer's contribution theorem in Abelian case ---*/
901
if ((isym ^ jsym) ^ (xsym ^ ysym))
903
/*--- find the integral in ixjy_buf and put it in xy_buf ---*/
904
m = (((mo_j*MOInfo.num_mo + mo_y)*ibatch_length + mo_i)*MOInfo.num_mo + mo_x);
905
xy_buf[xy] = jyix_buf2[m];
908
psio_write_entry(iounits2[0], ij_key_string, (char *)xy_buf,
909
MOInfo.num_mo*MOInfo.num_mo*sizeof(double));
912
psio_close(iounits2[0], 1);
914
if (ibatch < num_ibatch-1){
915
for(te_type=0;te_type<NUM_TE_TYPES-1;te_type++) {
916
memset(jsix_buf[te_type],0,MOInfo.beta_act_occ*BasisSet.num_ao*ibatch_length*MOInfo.num_mo*sizeof(double));
917
memset(jyix_buf[te_type],0,MOInfo.beta_act_occ*MOInfo.num_mo*ibatch_length*MOInfo.num_mo*sizeof(double));
919
memset(jsix_buf2,0,MOInfo.alpha_act_occ*BasisSet.num_ao*ibatch_length*MOInfo.num_mo*sizeof(double));
920
memset(jyix_buf2,0,MOInfo.alpha_act_occ*MOInfo.num_mo*ibatch_length*MOInfo.num_mo*sizeof(double));
922
/*--- Done with the non-threaded part - wake everybody up and prepare for the next ibatch ---*/
923
RMP2R12_Status.num_arrived = 0;
924
pthread_cond_broadcast(&rmp2r12_energy_cond);
925
pthread_mutex_unlock(&rmp2r12_energy_mutex);
927
} /* End of "I"-loop */
932
for(te_type=0;te_type<NUM_TE_TYPES-1;te_type++) {
933
free(rsiq_buf[te_type]);
934
free(rsix_buf[te_type]);
939
free_libr12(&Libr12);
940
free_fjt_table(&fjt_table);
941
#ifdef NONDOUBLE_INTS
942
for(i=0;i<NUM_TE_TYPES;i++)
952
}}} /* End namespaces */