7
#include <libipv1/ip_lib.h>
8
#include <libiwl/iwl.h>
9
#include <libciomr/libciomr.h>
10
#include <libint/libint.h>
11
#include <libderiv/libderiv.h>
17
#include"taylor_fm_eval.h"
21
#include "deriv1_quartet_data.h"
22
#include "small_fns.h"
25
void *te_deriv1_scf_thread(void *tnum_ptr)
27
int thread_num = (int) tnum_ptr;
29
extern double **grad_te;
30
extern pthread_mutex_t deriv1_mutex; /* Used to lock "global" gradient matrix grad_te during update */
32
/*--- Various data structures ---*/
33
struct shell_pair *sp_ij, *sp_kl;
34
Libderiv_t Libderiv; /* Integrals library object */
36
double_array_t fjt_table; /* table of auxiliary function F_m(u) for each primitive combination */
39
PSI_INT_LEAST64 quartet_index;
40
int ij, kl, ik, jl, ijkl;
41
int ioffset, joffset, koffset, loffset;
47
register int i, j, k, l, m, ii, jj, kk, ll;
48
register int si, sj, sk, sl ;
49
register int sii, sjj, skk, sll, slll;
50
register int pi, pj, pk, pl ;
52
register int pii, pjj, pkk, pll ;
53
int switch_ij, switch_kl, switch_ijkl;
54
int center_i, center_j, center_k, center_l;
58
int max_cart_class_size;
60
int bf_i, bf_j, bf_k, bf_l, so_i, so_j, so_k, so_l, s;
61
int np_i, np_j, np_k, np_l;
62
int ni, nj, nk, nl, quartet_size;
64
int si_fao, sj_fao, sk_fao, sl_fao;
65
int sii_fao, sjj_fao, skk_fao, sll_fao;
66
int ao_i, imax, ao_j, jmax, ao_k, kmax, ao_l, lmax;
69
int iimax, jjmax, kkmax, llmax;
70
int irrep, npi_ij, npi_kl, npi_ik, npi_jl, ind_offset;
72
int num_prim_comb, p, max_num_prim_comb;
74
int buf_offset, buf_4offset, buf_size, last_buf;
75
int quartet_done, offset;
81
double **grad_te_local;
85
double **dens_i, **dens_j;
86
double ddax, dday, ddaz, ddbx, ddby, ddbz,
87
ddcx, ddcy, ddcz, dddx, dddy, dddz;
94
init_fjt_table(&fjt_table);
97
max_cart_class_size = ioff[BasisSet.max_am]*ioff[BasisSet.max_am]*ioff[BasisSet.max_am]*ioff[BasisSet.max_am];
98
max_class_size = max_cart_class_size;
99
max_num_prim_comb = (BasisSet.max_num_prims*BasisSet.max_num_prims)*
100
(BasisSet.max_num_prims*BasisSet.max_num_prims);
101
init_libderiv1(&Libderiv,BasisSet.max_am-1,max_num_prim_comb,max_class_size);
102
FourInd = init_array(max_cart_class_size);
104
grad_te_local = block_matrix(Molecule.num_atoms,3);
106
/*-------------------------------------------------
107
generate all unique shell quartets with ordering
108
suitable for building the PK-matrix
109
-------------------------------------------------*/
110
for (sii=0; sii<BasisSet.num_shells; sii++)
111
for (sjj=0; sjj<=sii; sjj++)
112
for (skk=0; skk<=sii; skk++)
113
for (sll=0; sll<= ((sii == skk) ? sjj : skk); sll++, quartet_index++){
115
si = sii; sj = sjj; sk = skk; sl = sll;
117
/*--- Decide if this thread will do this ---*/
118
if ( quartet_index%UserOptions.num_threads != thread_num )
121
/*--- Skip this quartet if all four centers are the same ---*/
122
if (BasisSet.shells[si].center == BasisSet.shells[sj].center &&
123
BasisSet.shells[si].center == BasisSet.shells[sk].center &&
124
BasisSet.shells[si].center == BasisSet.shells[sl].center &&
131
/* place in "ascending" angular mom-
132
my simple way of optimizing PHG recursion (VRR) */
133
/* these first two are good for the HRR */
134
if(BasisSet.shells[si].am < BasisSet.shells[sj].am){
140
if(BasisSet.shells[sk].am < BasisSet.shells[sl].am){
146
/* this should be /good/ for the VRR */
147
if(BasisSet.shells[si].am + BasisSet.shells[sj].am > BasisSet.shells[sk].am + BasisSet.shells[sl].am){
157
ni = ioff[BasisSet.shells[si].am];
158
nj = ioff[BasisSet.shells[sj].am];
159
nk = ioff[BasisSet.shells[sk].am];
160
nl = ioff[BasisSet.shells[sl].am];
161
quartet_size = ni*nj*nk*nl;
163
np_i = BasisSet.shells[si].n_prims;
164
np_j = BasisSet.shells[sj].n_prims;
165
np_k = BasisSet.shells[sk].n_prims;
166
np_l = BasisSet.shells[sl].n_prims;
168
orig_am[0] = BasisSet.shells[si].am-1;
169
orig_am[1] = BasisSet.shells[sj].am-1;
170
orig_am[2] = BasisSet.shells[sk].am-1;
171
orig_am[3] = BasisSet.shells[sl].am-1;
172
am = orig_am[0] + orig_am[1] + orig_am[2] + orig_am[3];
174
sp_ij = &(BasisSet.shell_pairs[si][sj]);
175
sp_kl = &(BasisSet.shell_pairs[sk][sl]);
177
Libderiv.AB[0] = sp_ij->AB[0];
178
Libderiv.AB[1] = sp_ij->AB[1];
179
Libderiv.AB[2] = sp_ij->AB[2];
180
Libderiv.CD[0] = sp_kl->AB[0];
181
Libderiv.CD[1] = sp_kl->AB[1];
182
Libderiv.CD[2] = sp_kl->AB[2];
184
AB2 = Libderiv.AB[0]*Libderiv.AB[0]+
185
Libderiv.AB[1]*Libderiv.AB[1]+
186
Libderiv.AB[2]*Libderiv.AB[2];
187
CD2 = Libderiv.CD[0]*Libderiv.CD[0]+
188
Libderiv.CD[1]*Libderiv.CD[1]+
189
Libderiv.CD[2]*Libderiv.CD[2];
191
/*-------------------------
192
Figure out the prefactor
193
-------------------------*/
199
if (si == sk && sj == sl || si == sl && sj == sk)
202
/*--- Compute data for primitive quartets here ---*/
204
for (pi = 0; pi < np_i; pi++) {
205
max_pj = (si == sj) ? pi+1 : np_j;
206
for (pj = 0; pj < max_pj; pj++) {
207
m = (1 + (si == sj && pi != pj));
208
for (pk = 0; pk < np_k; pk++) {
209
max_pl = (sk == sl) ? pk+1 : np_l;
210
for (pl = 0; pl < max_pl; pl++){
211
n = m * (1 + (sk == sl && pk != pl));
213
deriv1_quartet_data(&(Libderiv.PrimQuartet[num_prim_comb++]),
215
sp_ij, sp_kl, am, pi, pj, pk, pl, n*pfac);
217
deriv1_quartet_data(&(Libderiv.PrimQuartet[num_prim_comb++]),
218
&fjt_table, AB2, CD2,
219
sp_ij, sp_kl, am, pi, pj, pk, pl, n*pfac);
229
si_fao = BasisSet.shells[si].fao-1;
230
sj_fao = BasisSet.shells[sj].fao-1;
231
sk_fao = BasisSet.shells[sk].fao-1;
232
sl_fao = BasisSet.shells[sl].fao-1;
233
if (UserOptions.reftype == rhf || UserOptions.reftype == uhf) { /*--- RHF or UHF ---*/
235
for (ao_i = si_fao; ao_i < si_fao+ni; ao_i++)
236
for (ao_j = sj_fao; ao_j < sj_fao+nj; ao_j++)
237
for (ao_k = sk_fao; ao_k < sk_fao+nk; ao_k++)
238
for (ao_l = sl_fao; ao_l < sl_fao+nl; ao_l++) {
239
FourInd[count] = (4.0*Dens[ao_i][ao_j]*Dens[ao_k][ao_l] -
240
Dens[ao_i][ao_k]*Dens[ao_j][ao_l] -
241
Dens[ao_i][ao_l]*Dens[ao_k][ao_j])*
242
GTOs.bf_norm[orig_am[0]][ao_i-si_fao]*
243
GTOs.bf_norm[orig_am[1]][ao_j-sj_fao]*
244
GTOs.bf_norm[orig_am[2]][ao_k-sk_fao]*
245
GTOs.bf_norm[orig_am[3]][ao_l-sl_fao];
249
else { /*--- ROHF or TCSCF ---*/
251
memset((char *) FourInd, 0, sizeof(double)*quartet_size);
254
for(mosh_i=0;mosh_i<MOInfo.num_moshells;mosh_i++)
255
for(mosh_j=0;mosh_j<MOInfo.num_moshells;mosh_j++) {
257
dens_i = ShDens[mosh_i];
258
dens_j = ShDens[mosh_j];
259
alpha = 8.0*MOInfo.Alpha[mosh_i][mosh_j];
260
beta = 4.0*MOInfo.Beta[mosh_i][mosh_j];
261
for (ao_i = si_fao; ao_i < si_fao+ni; ao_i++)
262
for (ao_j = sj_fao; ao_j < sj_fao+nj; ao_j++)
263
for (ao_k = sk_fao; ao_k < sk_fao+nk; ao_k++)
264
for (ao_l = sl_fao; ao_l < sl_fao+nl; ao_l++) {
265
FourInd[count] += (alpha*dens_i[ao_i][ao_j]*dens_j[ao_k][ao_l] +
266
beta*(dens_i[ao_i][ao_k]*dens_j[ao_j][ao_l] +
267
dens_i[ao_i][ao_l]*dens_j[ao_k][ao_j]));
271
/*--- Normalize it ---*/
273
for (ao_i = 0; ao_i < ni; ao_i++)
274
for (ao_j = 0; ao_j < nj; ao_j++)
275
for (ao_k = 0; ao_k < nk; ao_k++)
276
for (ao_l = 0; ao_l < nl; ao_l++) {
277
FourInd[count] *= GTOs.bf_norm[orig_am[0]][ao_i]*
278
GTOs.bf_norm[orig_am[1]][ao_j]*
279
GTOs.bf_norm[orig_am[2]][ao_k]*
280
GTOs.bf_norm[orig_am[3]][ao_l];
285
build_deriv1_eri[orig_am[0]][orig_am[1]][orig_am[2]][orig_am[3]](&Libderiv,num_prim_comb);
287
center_i = BasisSet.shells[si].center-1;
288
center_j = BasisSet.shells[sj].center-1;
289
center_k = BasisSet.shells[sk].center-1;
290
center_l = BasisSet.shells[sl].center-1;
293
for(k=0;k<quartet_size;k++)
294
ddax += Libderiv.ABCD[0][k]*FourInd[k];
295
grad_te_local[center_i][0] += ddax;
298
for(k=0;k<quartet_size;k++)
299
dday += Libderiv.ABCD[1][k]*FourInd[k];
300
grad_te_local[center_i][1] += dday;
303
for(k=0;k<quartet_size;k++)
304
ddaz += Libderiv.ABCD[2][k]*FourInd[k];
305
grad_te_local[center_i][2] += ddaz;
308
for(k=0;k<quartet_size;k++)
309
ddbx += Libderiv.ABCD[3][k]*FourInd[k];
310
grad_te_local[center_j][0] += ddbx;
313
for(k=0;k<quartet_size;k++)
314
ddby += Libderiv.ABCD[4][k]*FourInd[k];
315
grad_te_local[center_j][1] += ddby;
318
for(k=0;k<quartet_size;k++)
319
ddbz += Libderiv.ABCD[5][k]*FourInd[k];
320
grad_te_local[center_j][2] += ddbz;*/
323
for(k=0;k<quartet_size;k++)
324
ddcx += Libderiv.ABCD[6][k]*FourInd[k];
325
grad_te_local[center_k][0] += ddcx;
328
for(k=0;k<quartet_size;k++)
329
ddcy += Libderiv.ABCD[7][k]*FourInd[k];
330
grad_te_local[center_k][1] += ddcy;
333
for(k=0;k<quartet_size;k++)
334
ddcz += Libderiv.ABCD[8][k]*FourInd[k];
335
grad_te_local[center_k][2] += ddcz;
338
for(k=0;k<quartet_size;k++)
339
dddx += Libderiv.ABCD[9][k]*FourInd[k];
340
grad_te_local[center_l][0] += dddx;
343
for(k=0;k<quartet_size;k++)
344
dddy += Libderiv.ABCD[10][k]*FourInd[k];
345
grad_te_local[center_l][1] += dddy;
348
for(k=0;k<quartet_size;k++)
349
dddz += Libderiv.ABCD[11][k]*FourInd[k];
350
grad_te_local[center_l][2] += dddz;
352
grad_te_local[center_j][0] -= ddax + ddcx + dddx;
353
grad_te_local[center_j][1] -= dday + ddcy + dddy;
354
grad_te_local[center_j][2] -= ddaz + ddcz + dddz;
357
pthread_mutex_lock(&deriv1_mutex);
358
for(i=0;i<Molecule.num_atoms;i++) {
359
grad_te[i][0] += grad_te_local[i][0];
360
grad_te[i][1] += grad_te_local[i][1];
361
grad_te[i][2] += grad_te_local[i][2];
363
pthread_mutex_unlock(&deriv1_mutex);
369
free_block(grad_te_local);
370
free_libderiv(&Libderiv);
371
#ifndef USE_TAYLOR_FM
372
free_fjt_table(&fjt_table);