4
// Copyright (C) 2003 Edward Valeev
6
// Author: Edward Valeev <edward.valeev@chemistry.gatech.edu>
9
// This file is part of the SC Toolkit.
11
// The SC Toolkit is free software; you can redistribute it and/or modify
12
// it under the terms of the GNU Library General Public License as published by
13
// the Free Software Foundation; either version 2, or (at your option)
16
// The SC Toolkit is distributed in the hope that it will be useful,
17
// but WITHOUT ANY WARRANTY; without even the implied warranty of
18
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
// GNU Library General Public License for more details.
21
// You should have received a copy of the GNU Library General Public License
22
// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23
// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
25
// The U.S. Government is granted a limited license as per AL 91-7.
32
#include <util/misc/formio.h>
33
#include <util/misc/timer.h>
34
#include <util/group/memory.h>
35
#include <util/group/message.h>
36
#include <util/class/class.h>
37
#include <util/state/state.h>
38
#include <util/state/state_text.h>
39
#include <util/state/state_bin.h>
40
#include <math/scmat/matrix.h>
41
#include <chemistry/molecule/molecule.h>
42
#include <chemistry/qc/basis/integral.h>
43
#include <chemistry/qc/basis/obint.h>
44
#include <chemistry/qc/basis/symmint.h>
45
#include <chemistry/qc/basis/orthog.h>
46
#include <chemistry/qc/mbpt/util.h>
47
#include <chemistry/qc/mbpt/bzerofast.h>
48
#include <chemistry/qc/mbptr12/trans123_r12a_abs.h>
49
#include <chemistry/qc/mbptr12/r12ia.h>
50
#include <chemistry/qc/mbptr12/r12ia_memgrp.h>
51
#include <chemistry/qc/mbptr12/r12ia_node0file.h>
53
#include <chemistry/qc/mbptr12/r12ia_mpiiofile.h>
55
#include <chemistry/qc/mbptr12/vxb_eval_abs_a.h>
60
#define SINGLE_THREAD_E123 0
64
#define PRINT_NUM_TE_TYPES 4
65
#define PRINT_R12_INTERMED 0
66
#define LINDEP_TOL 1.e-6
68
#define USE_GLOBAL_ORTHOG 1
70
#if PRINT_BIGGEST_INTS
71
BiggestContribs biggest_ints_1(4,40);
74
#define WRITE_DOUBLES 0
86
print_contrib(double tmpval, int num, int onum,
87
int P,int Q,int R,int S, int p,int q,int r,int s)
90
printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
91
num, P, Q, R, S, p, q, r, s, tmpval);
92
printf("noncanon: z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
93
onum, P, Q, R, S, p, q, r, s, -tmpval);
101
if (p < r || (p == r && q < s)) {
106
printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
107
num, P, Q, R, S, p, q, r, s, tmpval);
108
printf("z(%d)(%d %d %d %d)(%d %d %d %d) contrib = % 6.4f\n",
109
onum, P, Q, R, S, p, q, r, s, -tmpval);
113
/*-------------------------------------
114
Based on MBPT2::compute_mp2_energy()
115
-------------------------------------*/
117
R12IntEval_abs_A::compute(RefSCMatrix& Vaa, RefSCMatrix& Xaa, RefSCMatrix& Baa,
118
RefSCMatrix& Vab, RefSCMatrix& Xab, RefSCMatrix& Bab)
123
int debug_ = r12info()->debug_level();
125
MolecularEnergy* mole = r12info()->mole();
126
Ref<Integral> integral = r12info()->integral();
127
Ref<GaussianBasisSet> bs = r12info()->basis();
128
Ref<GaussianBasisSet> bs_aux = r12info()->basis_aux();
129
bool two_basis_form = (bs != bs_aux);
131
throw std::runtime_error("R12IntEval_abs_A::compute called when basis sets are identical");
132
integral->set_basis(bs,bs,bs,bs_aux);
134
Ref<SCMatrixKit> matrixkit_aux = bs_aux->matrixkit();
135
Ref<SCMatrixKit> so_matrixkit_aux = bs_aux->so_matrixkit();
136
Ref<MessageGrp> msg = r12info()->msg();
137
Ref<MemoryGrp> mem = r12info()->mem();
138
Ref<ThreadGrp> thr = r12info()->thr();
139
const int num_te_types = 4;
140
enum te_types {eri=0, r12=1, r12t1=2, r12t2=3};
142
// log2 of the erep tolerance
143
// (erep < 2^tol => discard)
144
const int tol = (int) (-10.0/log10(2.0)); // discard ints smaller than 10^-20
146
int aoint_computed = 0;
148
BiggestContribs biggest_coefs(5,10);
150
#if PRINT_BIGGEST_INTS
151
BiggestContribs biggest_ints_2(4,40);
152
BiggestContribs biggest_ints_2s(4,40);
153
BiggestContribs biggest_ints_3a(4,40);
154
BiggestContribs biggest_ints_3(4,40);
157
tim_enter("r12a-abs-mem");
160
int nproc = msg->n();
161
const size_t mem_alloc = r12info()->memory();
163
int nbasis = bs->nbasis();
164
int nbasis_aux = bs_aux->nbasis();
165
int nfuncmax = bs->max_nfunction_in_shell();
166
int nfuncmax_aux = bs_aux->max_nfunction_in_shell();
167
int nshell = bs->nshell();
168
int nshell_aux = bs_aux->nshell();
169
int nocc = r12info()->nocc();
170
int nocc_act = r12info()->nocc_act();
171
int nfzc = r12info()->nfzc();
172
int nfzv = r12info()->nfzv();
173
int noso = r12info()->noso();
174
int nvir = noso - nocc;
176
ExEnv::out0() << endl << indent
177
<< "Entered ABS A intermediates evaluator" << endl;
178
ExEnv::out0() << indent << scprintf("nproc = %i", nproc) << endl;
181
// Do a few preliminary tests to make sure the desired calculation
182
// can be done (and appears to be meaningful!)
185
throw std::runtime_error("There are no active occupied orbitals; program exiting");
188
throw std::runtime_error("There are no active virtual orbitals; program exiting");
190
if (restart_orbital_) {
191
ExEnv::out0() << indent
192
<< scprintf("Restarting at orbital %d",
197
////////////////////////////////////////////////////////
198
// Compute batch size ni for mp2 loops;
200
// The following arrays are kept throughout (all of type double):
202
// The following objects are kept throughout:
203
// integrals evaluators
204
// memory allocated for these arrays and objects is
207
////////////////////////////////////////////////////////
208
size_t mem_static = 0;
211
mem_static = nbasis*noso + nbasis_aux*nbasis_aux; // scf vector + aux_basis orthogonalizer
212
mem_static *= sizeof(double);
213
int nthreads = thr->nthread();
214
mem_static += nthreads * integral->storage_required_grt(bs,bs,bs,bs_aux); // integral evaluators
215
ni = compute_transform_batchsize_(mem_alloc, mem_static, nocc_act-restart_orbital_, num_te_types);
218
int max_norb = nocc_act - restart_orbital_;
222
// Send value of ni and mem_static to other nodes
224
double mem_static_double = (double) mem_static;
225
msg->bcast(mem_static_double);
226
mem_static = (size_t) mem_static_double;
228
// Compute the storage remaining for the integral routines
229
size_t dyn_mem = distsize_to_size(compute_transform_dynamic_memory_(ni,nocc_act,num_te_types));
230
size_t mem_remaining = 0;
231
if (mem_alloc <= (dyn_mem + mem_static)) mem_remaining += 0;
232
else mem_remaining += mem_alloc - dyn_mem - mem_static;
233
mem_remaining += thr->nthread() * integral->storage_required_grt(bs,bs,bs,bs_aux);
235
ExEnv::out0() << indent
236
<< "Memory available per node: " << mem_alloc << " Bytes"
238
ExEnv::out0() << indent
239
<< "Static memory used per node: " << mem_static << " Bytes"
241
ExEnv::out0() << indent
242
<< "Total memory used per node: " << dyn_mem+mem_static << " Bytes"
244
ExEnv::out0() << indent
245
<< "Memory required for one pass: "
246
<< compute_transform_dynamic_memory_(nocc_act,nocc_act,num_te_types)+mem_static
249
ExEnv::out0() << indent
250
<< "Minimum memory required: "
251
<< compute_transform_dynamic_memory_(1,nocc_act,num_te_types)+mem_static
254
ExEnv::out0() << indent
255
<< "Batch size: " << ni
259
throw std::runtime_error("R12IntEval_abs_A: batch size is 0: more memory or processors are needed");
261
if (r12info()->dynamic())
262
ExEnv::out0() << indent << "Using dynamic load balancing." << endl;
266
if (ni == nocc_act-restart_orbital_) {
271
rest = (nocc_act-restart_orbital_)%ni;
272
npass = (nocc_act-restart_orbital_ - rest)/ni + 1;
273
if (rest == 0) npass--;
276
ExEnv::out0() << indent
277
<< scprintf(" npass rest nbasis nshell nfuncmax nbasis(ABS) nshell(ABS) nfuncmax(ABS)") << endl;
278
ExEnv::out0() << indent
279
<< scprintf(" %-4i %-3i %-5i %-4i %-3i %-5i %-4i %-3i",
280
npass,rest,nbasis,nshell,nfuncmax,nbasis_aux,nshell_aux,nfuncmax_aux)
282
ExEnv::out0() << indent
283
<< scprintf(" nocc nvir nfzc nfzv") << endl;
284
ExEnv::out0() << indent
285
<< scprintf(" %-4i %-4i %-4i %-4i",
292
for (int i=0; i<ni; i++) {
293
for (int j=0; j<nocc_act; j++) {
294
if (index++ % nproc == me) nijmax++;
300
////////////////////////////////////////////////
301
// The scf vector is distributed between nodes;
302
// put a copy of the scf vector on each node;
303
////////////////////////////////////////////////
305
RefSCMatrix Scf_Vec = r12info()->scf_vec();
306
RefDiagSCMatrix evalmat = r12info()->evals();
307
int *mo_irrep = r12info()->orbsym();
309
evalmat.print("eigenvalues");
310
Scf_Vec.print("eigenvectors");
313
double *scf_vector_dat = new double[nbasis*noso];
314
Scf_Vec.t()->convert(scf_vector_dat);
316
double* evals = new double[noso];
317
double** scf_vector = new double*[nbasis];
318
for (int i=0; i<nbasis; i++) {
319
scf_vector[i] = &scf_vector_dat[i*noso];
321
for (int i=0; i<noso; i++) {
322
evals[i] = evalmat(i);
328
//////////////////////////////////////////////////
329
// For the auxiliary basis form an orthogonalizer
330
// and distribute to nodes
331
// Use canonical orthogonalization
332
// ( does Wavefunction call that symmetric? )
333
////////////////////////////////////////////////
335
RefSCMatrix orthog_aux;
336
RefSCDimension osodim_aux;
339
// Compute overlap matrix
342
// Make an Integral and initialize with bs_aux
343
Ref<Integral> integral_aux = integral->clone();
344
integral_aux->set_basis(bs_aux);
345
Ref<PetiteList> pl_aux = integral_aux->petite_list();
346
Ref<OneBodyInt> ov_aux_engine = integral_aux->overlap();
348
// form skeleton s matrix
349
RefSymmSCMatrix s(bs_aux->basisdim(), matrixkit_aux);
350
Ref<SCElementOp> ov =
351
new OneBodyIntOp(new SymmOneBodyIntIter(ov_aux_engine, pl_aux));
356
if (debug_ > 1) s.print("AO skeleton overlap (auxiliary basis)");
358
// then symmetrize it
359
RefSCDimension sodim_aux = pl_aux->SO_basisdim();
360
RefSymmSCMatrix overlap_aux(sodim_aux, so_matrixkit_aux);
361
pl_aux->symmetrize(s,overlap_aux);
363
// and clean up a bit
369
// Compute orthogonalizer for the auxiliary basis
371
#if USE_GLOBAL_ORTHOG
372
OverlapOrthog orthog(OverlapOrthog::Canonical,
378
RefSCMatrix orthog_aux_so = orthog.basis_to_orthog_basis();
379
orthog_aux_so = orthog_aux_so.t();
380
osodim_aux = orthog_aux_so.coldim();
382
RefSCMatrix overlap_aux_eigvec;
383
RefDiagSCMatrix overlap_isqrt_eigval;
384
RefDiagSCMatrix overlap_sqrt_eigval;
386
// diagonalize a copy of overlap_
387
RefSymmSCMatrix M = overlap_aux.copy();
388
RefSCMatrix U(M.dim(), M.dim(), M.kit());
389
RefDiagSCMatrix m(M.dim(), M.kit());
392
Ref<SCElementMaxAbs> maxabsop = new SCElementMaxAbs;
393
m.element_op(maxabsop.pointer());
394
double maxabs = maxabsop->result();
395
double s_tol = LINDEP_TOL * maxabs;
397
double minabs = maxabs;
398
BlockedDiagSCMatrix *bm = dynamic_cast<BlockedDiagSCMatrix*>(m.pointer());
400
ExEnv::out0() << "R12A_intermed_spinorb_abs: orthog_aux: expected blocked overlap" << endl;
403
double *pm_sqrt = new double[bm->dim()->n()];
404
double *pm_isqrt = new double[bm->dim()->n()];
405
int *pm_index = new int[bm->dim()->n()];
406
int *nfunc = new int[bm->nblocks()];
409
for (int i=0; i<bm->nblocks(); i++) {
411
if (bm->block(i).null()) continue;
412
int n = bm->block(i)->dim()->n();
413
double *pm = new double[n];
414
bm->block(i)->convert(pm);
415
for (int j=0; j<n; j++) {
417
if (pm[j] < minabs) { minabs = pm[j]; }
418
pm_sqrt[nfunctot] = sqrt(pm[j]);
419
pm_isqrt[nfunctot] = 1.0/pm_sqrt[nfunctot];
420
pm_index[nfunctot] = j;
432
ExEnv::out0() << indent << "Removed " << nlindep << " linearly dependent functions from the auxiliary basis" << endl;
434
// make sure all nodes end up with exactly the same data
435
msg->bcast(nfunctot);
436
msg->bcast(nfunc, bm->nblocks());
437
msg->bcast(pm_sqrt,nfunctot);
438
msg->bcast(pm_isqrt,nfunctot);
439
msg->bcast(pm_index,nfunctot);
440
osodim_aux = new SCDimension(nfunctot, bm->nblocks(),
441
nfunc, "ortho aux SO (canonical)");
442
for (int i=0; i<bm->nblocks(); i++) {
443
osodim_aux->blocks()->set_subdim(i, new SCDimension(nfunc[i]));
446
overlap_aux_eigvec = so_matrixkit_aux->matrix(sodim_aux, osodim_aux);
448
= dynamic_cast<BlockedSCMatrix*>(overlap_aux_eigvec.pointer());
450
= dynamic_cast<BlockedSCMatrix*>(U.pointer());
453
for (int i=0; i<bev->nblocks(); i++) {
454
if (bev->block(i).null()) continue;
455
RefSCMatrix bev_block = bev->block(i);
456
RefSCMatrix bU_block = bU->block(i);
457
for (int j=0; j<nfunc[i]; j++) {
458
// For some reason this produces a bunch of temp objects which are never destroyed
459
// and default_matrixkit is not detroyed at the end
460
// bev->block(i)->assign_column(
461
// bU->block(i)->get_column(pm_index[ifunc]),j
463
int nk = bev_block->rowdim().n();
464
for (int k=0; k<nk; k++)
465
bev_block->set_element(k,j,bU_block->get_element(k,pm_index[ifunc]));
470
overlap_sqrt_eigval = so_matrixkit_aux->diagmatrix(osodim_aux);
471
overlap_sqrt_eigval->assign(pm_sqrt);
472
overlap_isqrt_eigval = so_matrixkit_aux->diagmatrix(osodim_aux);
473
overlap_isqrt_eigval->assign(pm_isqrt);
481
overlap_aux.print("S aux");
482
overlap_aux_eigvec.print("S aux eigvec");
483
overlap_isqrt_eigval.print("s^(-1/2) aux eigval");
484
overlap_sqrt_eigval.print("s^(1/2) aux eigval");
487
RefSCMatrix orthog_aux_so = overlap_isqrt_eigval * overlap_aux_eigvec.t();
488
orthog_aux_so = orthog_aux_so.t();
491
orthog_aux = pl_aux->evecs_to_AO_basis(orthog_aux_so);
495
orthog_aux.print("Aux orthogonalizer");
496
RefSCMatrix tmp = orthog_aux.t() * pl_aux->to_AO_basis(overlap_aux) * orthog_aux;
498
tmp.print("Xt * S * X (Aux)");
501
int noso_aux = orthog_aux.coldim().n();
502
double *orthog_aux_vector = new double[nbasis_aux*noso_aux];
503
orthog_aux.convert(orthog_aux_vector);
505
int *symorb_irrep_aux = new int[noso_aux];
507
for (int i=0; i<osodim_aux->blocks()->nblock(); i++) {
508
for (int j=0; j<osodim_aux->blocks()->size(i); j++, orbnum++) {
509
symorb_irrep_aux[orbnum] = i;
514
/////////////////////////////////////
516
/////////////////////////////////////
520
ExEnv::outn() << indent
521
<< scprintf("node %i, begin loop over i-batches",me) << endl;
523
// end of debug print
525
// Initialize the integrals
526
integral->set_storage(mem_remaining);
527
Ref<TwoBodyInt>* tbints = new Ref<TwoBodyInt>[thr->nthread()];
528
for (int i=0; i<thr->nthread(); i++) {
529
tbints[i] = integral->grt();
531
ExEnv::out0() << indent
532
<< scprintf("Memory used for integral storage: %i Bytes",
533
integral->storage_used())
538
ExEnv::errn() << "MBPT2: memory group not initialized" << endl;
541
mem->set_localsize(num_te_types*nijmax*nocc*nbasis_aux*sizeof(double));
542
ExEnv::out0() << indent
543
<< "Size of global distributed array: "
548
Ref<ThreadLock> lock = thr->new_lock();
549
R12A_ABS_123Qtr** e123thread = new R12A_ABS_123Qtr*[thr->nthread()];
550
for (int i=0; i<thr->nthread(); i++) {
551
e123thread[i] = new R12A_ABS_123Qtr(i, thr->nthread(), me, nproc,
552
mem, msg, lock, bs, bs_aux, tbints[i],
553
nocc, nocc_act, scf_vector, tol, debug_,
554
r12info()->dynamic());
557
///////////////////////////////////////////////////////////
558
// Figure out which integrals accumulator should be used
559
///////////////////////////////////////////////////////////
561
Ref<R12IntsAcc> r12intsacc;
562
R12IntEvalInfo::StoreMethod ints_method = r12info()->ints_method();
563
char *r12ints_file = r12info()->ints_file();
564
bool restart = (restart_orbital_ > 0);
566
switch (ints_method) {
568
case R12IntEvalInfo::mem_only:
570
throw std::runtime_error("R12IntEval_abs_A::compute -- cannot use MemoryGrp-based accumulator when restarting");
571
ExEnv::out0() << indent << "Will hold transformed integrals in memory" << endl;
572
r12intsacc = new R12IntsAcc_MemoryGrp(mem,num_te_types,nocc,nbasis_aux,nocc,nfzc);
575
case R12IntEvalInfo::mem_posix:
577
ExEnv::out0() << indent << "Will hold transformed integrals in memory" << endl;
578
r12intsacc = new R12IntsAcc_MemoryGrp(mem,num_te_types,nocc,nbasis_aux,nocc,nfzc);
581
// else use the next case
583
case R12IntEvalInfo::posix:
584
ExEnv::out0() << indent << "Will use POSIX I/O on node 0 to handle transformed integrals" << endl;
585
r12intsacc = new R12IntsAcc_Node0File(mem,r12ints_file,num_te_types,nocc,nbasis_aux,nocc,nfzc,restart);
589
case R12IntEvalInfo::mem_mpi:
591
ExEnv::out0() << indent << "Will hold transformed integrals in memory" << endl;
592
r12intsacc = new R12IntsAcc_MemoryGrp(mem,num_te_types,nocc,nbasis_aux,nocc,nfzc);
595
// else use the next case
597
case R12IntEvalInfo::mpi:
598
ExEnv::out0() << indent << "Will use MPI-IO (individual I/O) to handle transformed integrals" << endl;
599
r12intsacc = new R12IntsAcc_MPIIOFile_Ind(mem,r12ints_file,num_te_types,nocc,nbasis_aux,nocc,nfzc,restart);
604
throw std::runtime_error("R12IntEval_abs_A::compute -- invalid integrals store method");
609
/*-----------------------------------
611
Start the integrals transformation
613
-----------------------------------*/
614
tim_enter("mp2-r12/a passes");
615
if (me == 0 && mole->if_to_checkpoint() && r12intsacc->can_restart()) {
616
StateOutBin stateout(mole->checkpoint_file());
617
SavableState::save_state(mole,stateout);
618
ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
621
for (int pass=0; pass<npass; pass++) {
623
ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
625
int i_offset = restart_orbital_ + pass*ni + nfzc;
626
if ((pass == npass - 1) && (rest != 0)) ni = rest;
628
// Compute number of of i,j pairs on each node during current pass for
633
for (int i=0; i<ni; i++) {
634
for (int j=0; j<nocc_act; j++) {
635
if (index++ % nproc == me) nij++;
641
ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
643
// Allocate and initialize some arrays
644
// (done here to avoid having these arrays
645
// overlap with arrays allocated later)
647
// Allocate (and initialize) some arrays
648
double* integral_ijsk = (double*) mem->localdata();
649
bzerofast(integral_ijsk, (num_te_types*nij*nocc*nbasis_aux));
652
ExEnv::out0() << indent
653
<< scprintf("Begin loop over shells (grt, 1.+2. q.t.)") << endl;
655
// Do the two electron integrals and the first three quarter transformations
656
tim_enter("grt+1.qt+2.qt+3.qt");
657
for (int i=0; i<thr->nthread(); i++) {
658
e123thread[i]->set_i_offset(i_offset);
659
e123thread[i]->set_ni(ni);
660
thr->add_thread(i,e123thread[i]);
661
# if SINGLE_THREAD_E123
662
e123thread[i]->run();
665
# if !SINGLE_THREAD_E123
666
thr->start_threads();
669
tim_exit("grt+1.qt+2.qt+3.qt");
670
ExEnv::out0() << indent << "End of loop over shells" << endl;
672
mem->sync(); // Make sure ijsk is complete on each node before continuing
673
integral_ijsk = (double*) mem->localdata();
680
for (int i = 0; i<ni; i++) {
681
for (int j = 0; j<nocc_act; j++) {
682
if (index++ % nproc == me) {
683
double *ij_offset = integral_ijsk + num_te_types*nocc*nbasis_aux*ij_index;
684
for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; ++te_type, ij_offset+=nocc*nbasis_aux) {
685
double *ijsk_ptr = ij_offset;
686
for (int s = 0; s<nbasis_aux; s++) {
687
for (int k = 0; k<nocc; k++) {
688
printf("3Q: type = %d (%d %d|%d %d) = %12.8f\n",
689
te_type,i+i_offset,k,j+j_offset,s,*ijsk_ptr);
701
double *kx_tmp = new double[nocc*noso_aux];
702
// in ikjx (stored as ijkx): i act; j act; k occ; x any MO.
704
// Begin fourth quarter transformation
705
ExEnv::out0() << indent << "Begin fourth q.t." << endl;
706
tim_enter("4. q.t.");
709
for (int i=0; i<ni; i++) {
710
for (int j=0; j<nocc_act; j++) {
711
if (index++ % nproc == me) {
712
double *ij_offset = integral_ijsk + num_te_types*nocc*nbasis_aux*ij_index;
714
for(int te_type=0; te_type<num_te_types; te_type++,ij_offset+=nocc*nbasis_aux) {
715
bzerofast(kx_tmp, nocc*noso_aux);
716
double *ijs_offset = ij_offset;
719
for (int s=0; s<nbasis_aux; s++, ijs_offset+=nocc) {
721
for (int x=0; x<noso_aux; x++, ++sx) {
722
double c_sx = orthog_aux_vector[sx];
723
double *kx_ptr = kx_tmp + x;
724
double *sk_ptr = ijs_offset;
726
for (int k=0; k<nocc; ++k) {
727
*kx_ptr += c_sx * *sk_ptr;
734
// Put kx_tmp into ijsk for one i,j while
735
// overwriting elements of ijsk
736
double *ijsk_ptr = ij_offset;
737
double *ijkx_ptr = kx_tmp;
738
int nkx = nocc*noso_aux;
739
for (int kx=0; kx<nkx; ++kx) {
740
*ijsk_ptr = *ijkx_ptr;
744
} // end of te_type loop
749
// end of fourth quarter transformation
751
ExEnv::out0() << indent << "End of fourth q.t." << endl;
753
// The array integral_ijsk has now been overwritten by MO integrals ijkx
754
// rename the array mo_int
755
double* mo_int = integral_ijsk;
758
// Zero out nonsymmetric integrals
763
for (int i = 0; i<ni; i++) {
764
for (int j = 0; j<nocc_act; j++) {
765
if (index++ % nproc == me) {
766
double *ij_offset = mo_int + num_te_types*nocc*nbasis_aux*ij_index;
767
for(int te_type=0; te_type<num_te_types; ++te_type,ij_offset+=nocc*nbasis_aux) {
768
double *ijkx_ptr = ij_offset;
769
for (int k=0; k<nocc; ++k) {
770
for (int x=0; x<noso_aux; ++x) {
771
if (( mo_irrep[i+i_offset] ^
772
mo_irrep[j+j_offset] ^
774
symorb_irrep_aux[x]) ) {
792
for (int i = 0; i<ni; i++) {
793
for (int j = 0; j<nocc_act; j++) {
794
if (index++ % nproc == me) {
795
double *ij_offset = mo_int + num_te_types*nocc*nbasis_aux*ij_index;
796
for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++,ij_offset+=nocc*nbasis_aux) {
797
double *ijkx_ptr = ij_offset;
798
for (int k=0; k<nocc; ++k) {
799
for (int x=0; x<noso_aux; ++x) {
800
printf("4Q: type = %d (%d %d|%d %d) = %12.8f\n",
801
te_type,i+i_offset,k,j+j_offset,x,*ijkx_ptr);
814
mem->sync(); // Make sure MO integrals are complete on all nodes before continuing
816
// Push locally stored integrals to an accumulator
817
// This could involve storing the data to disk or simply remembering the pointer
818
tim_enter("MO ints store");
819
r12intsacc->store_memorygrp(mem,ni);
820
tim_exit("MO ints store");
823
if (me == 0 && mole->if_to_checkpoint() && r12intsacc->can_restart()) {
824
current_orbital_ += ni;
825
StateOutBin stateout(mole->checkpoint_file());
826
SavableState::save_state(mole,stateout);
827
ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
830
} // end of loop over passes
831
tim_exit("mp2-r12/a passes");
833
ExEnv::out0() << indent << "End of mp2-r12/a transformation" << endl;
834
// Done storing integrals - commit the content
835
// WARNING: it is not safe to use mem until deactivate has been called on the accumulator
836
// After that deactivate the size of mem will be 0 [mem->set_localsize(0)]
837
r12intsacc->commit();
839
/*--------------------------------
840
Compute MP2-R12/A intermediates
842
--------------------------------*/
843
tim_enter("mp2-r12a intermeds");
844
int naa = (nocc_act*(nocc_act-1))/2; // Number of alpha-alpha pairs
845
int nab = nocc_act*nocc_act; // Number of alpha-beta pairs
847
ExEnv::out0() << indent << "naa = " << naa << endl;
848
ExEnv::out0() << indent << "nab = " << nab << endl;
850
double *Vaa_ijkl = new double[naa*naa];
851
double *Xaa_ijkl = new double[naa*naa];
852
double *Taa_ijkl = new double[naa*naa];
853
double *Vab_ijkl = new double[nab*nab];
854
double *Xab_ijkl = new double[nab*nab];
855
double *Tab_ijkl = new double[nab*nab];
857
ExEnv::out0() << indent << "Allocated intermediates V, X, and T" << endl;
858
bzerofast(Vaa_ijkl,naa*naa);
859
bzerofast(Xaa_ijkl,naa*naa);
860
bzerofast(Taa_ijkl,naa*naa);
861
bzerofast(Vab_ijkl,nab*nab);
862
bzerofast(Xab_ijkl,nab*nab);
863
bzerofast(Tab_ijkl,nab*nab);
865
// Compute intermediates
867
ExEnv::out0() << indent << "Ready to compute intermediates V, X, and T" << endl;
868
const double oosqrt2 = 1.0/sqrt(2.0);
869
// Compute the number of tasks that have full access to the integrals
870
// and split the work among them
871
int nproc_with_ints = 0;
872
for(int proc=0;proc<nproc;proc++)
873
if (r12intsacc->has_access(proc)) nproc_with_ints++;
874
int *proc_with_ints = new int[nproc];
876
for(int proc=0;proc<nproc;proc++)
877
if (r12intsacc->has_access(proc)) {
878
proc_with_ints[proc] = count;
882
proc_with_ints[proc] = -1;
884
ExEnv::out0() << indent << "Computing intermediates on " << nproc_with_ints
885
<< " processors" << endl;
888
//////////////////////////////////////////////////////////////
890
// Evaluation of the intermediates proceeds as follows:
892
// loop over batches of kl, k >= l, 0<=k,l<nocc_act
893
// load (kl|xy), (kl| [T1,r12] |xy), and (lk| [T1,r12] |xy)
894
// (aka kl-sets) into memory
896
// loop over batches of ij, i>=j, 0<=i,j<nocc_act
897
// load (ij|r12|xy) into memory
898
// (aka ij-sets) into memory
899
// compute V[ij][kl] and T[ij][kl] for all ij and kl in
900
// the "direct product" batch
904
/////////////////////////////////////////////////////////////////////////////////
906
if (r12intsacc->has_access(me)) {
908
for(int k=0;k<nocc_act;k++)
909
for(int l=0;l<=k;l++,kl++) {
910
double pfac_kl = (k==l) ? oosqrt2 : 1.0;
911
int kl_proc = kl%nproc_with_ints;
912
if (kl_proc != proc_with_ints[me])
914
int kl_aa = k*(k-1)/2 + l;
915
int kl_ab = k*nocc_act + l;
916
int lk_ab = l*nocc_act + k;
918
// Get (|r12|) and (|[r12,T1]|) integrals only
919
tim_enter("MO ints retrieve");
920
double *klox_buf_eri = r12intsacc->retrieve_pair_block(k,l,R12IntsAcc::eri);
921
double *klox_buf_r12 = r12intsacc->retrieve_pair_block(k,l,R12IntsAcc::r12);
922
double *klox_buf_r12t1 = r12intsacc->retrieve_pair_block(k,l,R12IntsAcc::r12t1);
923
double *klox_buf_r12t2 = r12intsacc->retrieve_pair_block(k,l,R12IntsAcc::r12t2);
925
double *lkox_buf_eri = r12intsacc->retrieve_pair_block(l,k,R12IntsAcc::eri);
926
double *lkox_buf_r12 = r12intsacc->retrieve_pair_block(l,k,R12IntsAcc::r12);
927
double *lkox_buf_r12t1 = r12intsacc->retrieve_pair_block(l,k,R12IntsAcc::r12t1);
928
double *lkox_buf_r12t2 = r12intsacc->retrieve_pair_block(l,k,R12IntsAcc::r12t2);
929
tim_exit("MO ints retrieve");
932
for(int i=0;i<nocc_act;i++)
933
for(int j=0;j<=i;j++,ij++) {
935
double pfac_ij = (i==j) ? oosqrt2 : 1.0;
936
int ij_aa = i*(i-1)/2 + j;
937
int ij_ab = i*nocc_act + j;
938
int ji_ab = j*nocc_act + i;
940
tim_enter("MO ints retrieve");
941
double *ijox_buf_r12 = r12intsacc->retrieve_pair_block(i,j,R12IntsAcc::r12);
942
double *jiox_buf_r12 = r12intsacc->retrieve_pair_block(j,i,R12IntsAcc::r12);
943
tim_exit("MO ints retrieve");
945
double *Vaa_ij = Vaa_ijkl + ij_aa*naa;
946
double *Vab_ij = Vab_ijkl + ij_ab*nab;
947
double *Vab_ji = Vab_ijkl + ji_ab*nab;
948
double *Xaa_ij = Xaa_ijkl + ij_aa*naa;
949
double *Xab_ij = Xab_ijkl + ij_ab*nab;
950
double *Xab_ji = Xab_ijkl + ji_ab*nab;
951
double *Taa_ij = Taa_ijkl + ij_aa*naa;
952
double *Tab_ij = Tab_ijkl + ij_ab*nab;
953
double *Tab_ji = Tab_ijkl + ji_ab*nab;
955
tim_enter("MO ints contraction");
956
double Vaa_ijkl, Vab_ijkl, Vab_jikl, Vab_ijlk, Vab_jilk;
957
double Xaa_ijkl, Xab_ijkl, Xab_jikl, Xab_ijlk, Xab_jilk;
958
double Taa_ijkl, Tab_ijkl, Tab_jikl, Tab_ijlk, Tab_jilk;
959
Vaa_ijkl = Vab_ijkl = Vab_jikl = Vab_ijlk = Vab_jilk = 0.0;
960
Xaa_ijkl = Xab_ijkl = Xab_jikl = Xab_ijlk = Xab_jilk = 0.0;
961
Taa_ijkl = Tab_ijkl = Tab_jikl = Tab_ijlk = Tab_jilk = 0.0;
962
for(int o=0;o<nocc;o++) {
963
double pfac_xy = 1.0;
964
for(int x=0;x<noso_aux;x++) {
965
int ox_offset = o*noso_aux + x;
966
double ij_r12_ox = ijox_buf_r12[ox_offset];
967
double ji_r12_ox = jiox_buf_r12[ox_offset];
968
double kl_eri_ox = klox_buf_eri[ox_offset];
969
double lk_eri_ox = lkox_buf_eri[ox_offset];
970
Vab_ijkl -= pfac_xy * (ij_r12_ox * kl_eri_ox + ji_r12_ox * lk_eri_ox);
972
Vab_jikl -= pfac_xy * (ji_r12_ox * kl_eri_ox + ij_r12_ox * lk_eri_ox);
974
Vab_ijlk -= pfac_xy * (ij_r12_ox * lk_eri_ox + ji_r12_ox * kl_eri_ox);
975
if (i != j && k != l) {
976
Vaa_ijkl -= pfac_xy * (ij_r12_ox - ji_r12_ox)*(kl_eri_ox - lk_eri_ox);
977
Vab_jilk -= pfac_xy * (ij_r12_ox * kl_eri_ox + ji_r12_ox * lk_eri_ox);
979
double kl_r12_ox = klox_buf_r12[ox_offset];
980
double lk_r12_ox = lkox_buf_r12[ox_offset];
981
Xab_ijkl -= pfac_xy * (ij_r12_ox * kl_r12_ox + ji_r12_ox * lk_r12_ox);
983
Xab_jikl -= pfac_xy * (ji_r12_ox * kl_r12_ox + ij_r12_ox * lk_r12_ox);
985
Xab_ijlk -= pfac_xy * (ij_r12_ox * lk_r12_ox + ji_r12_ox * kl_r12_ox);
986
if (i != j && k != l) {
987
Xaa_ijkl -= pfac_xy * (ij_r12_ox - ji_r12_ox)*(kl_r12_ox - lk_r12_ox);
988
Xab_jilk -= pfac_xy * (ij_r12_ox * kl_r12_ox + ji_r12_ox * lk_r12_ox);
990
double kl_r12t1_ox = klox_buf_r12t1[ox_offset];
991
double kl_r12t2_ox = klox_buf_r12t2[ox_offset];
992
double lk_r12t1_ox = lkox_buf_r12t1[ox_offset];
993
double lk_r12t2_ox = lkox_buf_r12t2[ox_offset];
994
double kl_Tr12_ox = -kl_r12t1_ox-kl_r12t2_ox;
995
double lk_Tr12_ox = -lk_r12t1_ox-lk_r12t2_ox;
996
Tab_ijkl += pfac_xy * (ij_r12_ox * kl_Tr12_ox + ji_r12_ox * lk_Tr12_ox);
998
Tab_jikl += pfac_xy * (ji_r12_ox * kl_Tr12_ox + ij_r12_ox * lk_Tr12_ox);
1000
Tab_ijlk += pfac_xy * (ij_r12_ox * lk_Tr12_ox + ji_r12_ox * kl_Tr12_ox);
1001
if (i != j && k != l) {
1002
Taa_ijkl += pfac_xy * (ij_r12_ox - ji_r12_ox)*(kl_Tr12_ox - lk_Tr12_ox);
1003
Tab_jilk += pfac_xy * (ij_r12_ox * kl_Tr12_ox + ji_r12_ox * lk_Tr12_ox);
1007
Vab_ij[kl_ab] += Vab_ijkl;
1009
Vab_ji[kl_ab] += Vab_jikl;
1011
Vab_ij[lk_ab] += Vab_ijlk;
1012
if (i != j && k != l) {
1013
Vaa_ij[kl_aa] += Vaa_ijkl;
1014
Vab_ji[lk_ab] += Vab_jilk;
1016
Xab_ij[kl_ab] += Xab_ijkl;
1018
Xab_ji[kl_ab] += Xab_jikl;
1020
Xab_ij[lk_ab] += Xab_ijlk;
1021
if (i != j && k != l) {
1022
Xaa_ij[kl_aa] += Xaa_ijkl;
1023
Xab_ji[lk_ab] += Xab_jilk;
1025
Tab_ij[kl_ab] += Tab_ijkl;
1027
Tab_ji[kl_ab] += Tab_jikl;
1029
Tab_ij[lk_ab] += Tab_ijlk;
1030
if (i != j && k != l) {
1031
Taa_ij[kl_aa] += Taa_ijkl;
1032
Tab_ji[lk_ab] += Tab_jilk;
1034
tim_exit("MO ints contraction");
1036
#if PRINT_R12_INTERMED
1037
if (i != j && k != l)
1038
printf("Vaa[%d][%d] = %lf\n",ij_aa,kl_aa,Vaa_ij[kl_aa]);
1039
printf("Vab[%d][%d] = %lf\n",ij_ab,kl_ab,Vab_ij[kl_ab]);
1041
printf("Vab[%d][%d] = %lf\n",ji_ab,kl_ab,Vab_ji[kl_ab]);
1043
printf("Vab[%d][%d] = %lf\n",ij_ab,lk_ab,Vab_ij[lk_ab]);
1044
if (i != j && k != l)
1045
printf("Vab[%d][%d] = %lf\n",ji_ab,lk_ab,Vab_ji[lk_ab]);
1046
if (i != j && k != l)
1047
printf("Xaa[%d][%d] = %lf\n",ij_aa,kl_aa,Xaa_ij[kl_aa]);
1048
printf("Xab[%d][%d] = %lf\n",ij_ab,kl_ab,Xab_ij[kl_ab]);
1050
printf("Xab[%d][%d] = %lf\n",ji_ab,kl_ab,Xab_ji[kl_ab]);
1052
printf("Xab[%d][%d] = %lf\n",ij_ab,lk_ab,Xab_ij[lk_ab]);
1053
if (i != j && k != l)
1054
printf("Xab[%d][%d] = %lf\n",ji_ab,lk_ab,Xab_ji[lk_ab]);
1055
if (i != j && k != l)
1056
printf("Taa[%d][%d] = %lf\n",ij_aa,kl_aa,Taa_ij[kl_aa]);
1057
printf("Tab[%d][%d] = %lf\n",ij_ab,kl_ab,Tab_ij[kl_ab]);
1059
printf("Tab[%d][%d] = %lf\n",ji_ab,kl_ab,Tab_ji[kl_ab]);
1061
printf("Tab[%d][%d] = %lf\n",ij_ab,lk_ab,Tab_ij[lk_ab]);
1062
if (i != j && k != l)
1063
printf("Tab[%d][%d] = %lf\n",ji_ab,lk_ab,Tab_ji[lk_ab]);
1065
r12intsacc->release_pair_block(i,j,R12IntsAcc::r12);
1066
r12intsacc->release_pair_block(j,i,R12IntsAcc::r12);
1068
r12intsacc->release_pair_block(k,l,R12IntsAcc::eri);
1069
r12intsacc->release_pair_block(k,l,R12IntsAcc::r12);
1070
r12intsacc->release_pair_block(k,l,R12IntsAcc::r12t1);
1071
r12intsacc->release_pair_block(k,l,R12IntsAcc::r12t2);
1072
r12intsacc->release_pair_block(l,k,R12IntsAcc::eri);
1073
r12intsacc->release_pair_block(l,k,R12IntsAcc::r12);
1074
r12intsacc->release_pair_block(l,k,R12IntsAcc::r12t1);
1075
r12intsacc->release_pair_block(l,k,R12IntsAcc::r12t2);
1079
// tasks that have nothing to do should still create timers
1080
tim_enter("MO ints retrieve");
1081
tim_exit("MO ints retrieve");
1082
tim_enter("MO ints contraction");
1083
tim_exit("MO ints contraction");
1085
delete[] proc_with_ints;
1086
tim_exit("mp2-r12a intermeds");
1087
r12intsacc->deactivate();
1089
ExEnv::out0() << indent << "Computed intermediates V, X, and T" << endl;
1092
// Use MemoryGrp to accumulate contributions to intermediates V, X, and T on all nodes
1093
msg->sum(Vaa_ijkl,naa*naa,0,-1);
1094
msg->sum(Vab_ijkl,nab*nab,0,-1);
1095
msg->sum(Xaa_ijkl,naa*naa,0,-1);
1096
msg->sum(Xab_ijkl,nab*nab,0,-1);
1097
msg->sum(Taa_ijkl,naa*naa,0,-1);
1098
msg->sum(Tab_ijkl,nab*nab,0,-1);
1102
ExEnv::out0() << indent << "Gathered intermediates V, X, and T" << endl;
1104
// Add intermediates contribution to their global values
1105
for(int ij=0;ij<naa;ij++)
1106
for(int kl=0;kl<=ij;kl++) {
1107
int ijkl = ij*naa+kl;
1108
int klij = kl*naa+ij;
1109
double velem = Vaa->get_element(ij,kl) + Vaa_ijkl[ijkl];
1110
Vaa->set_element(ij,kl,velem);
1112
velem = Vaa->get_element(kl,ij) + Vaa_ijkl[klij];
1113
Vaa->set_element(kl,ij,velem);
1115
double xelem = Xaa->get_element(ij,kl) + Xaa_ijkl[ijkl];
1116
Xaa->set_element(ij,kl,xelem);
1118
xelem = Xaa->get_element(kl,ij) + Xaa_ijkl[klij];
1119
Xaa->set_element(kl,ij,xelem);
1121
double belem = Baa->get_element(ij,kl) + 0.5*(Taa_ijkl[ijkl] + Taa_ijkl[klij]);
1122
Baa->set_element(ij,kl,belem);
1123
Baa->set_element(kl,ij,belem);
1126
for(int ij=0;ij<nab;ij++)
1127
for(int kl=0;kl<=ij;kl++) {
1128
int ijkl = ij*nab+kl;
1129
int klij = kl*nab+ij;
1130
double velem = Vab->get_element(ij,kl) + Vab_ijkl[ijkl];
1131
Vab->set_element(ij,kl,velem);
1133
velem = Vab->get_element(kl,ij) + Vab_ijkl[klij];
1134
Vab->set_element(kl,ij,velem);
1136
double xelem = Xab->get_element(ij,kl) + Xab_ijkl[ijkl];
1137
Xab->set_element(ij,kl,xelem);
1139
xelem = Xab->get_element(kl,ij) + Xab_ijkl[klij];
1140
Xab->set_element(kl,ij,xelem);
1142
double belem = Bab->get_element(ij,kl) + 0.5*(Tab_ijkl[ijkl] + Tab_ijkl[klij]);
1143
Bab->set_element(ij,kl,belem);
1144
Bab->set_element(kl,ij,belem);
1146
msg->sum(aoint_computed);
1148
if (me == 0 && mole->if_to_checkpoint() && r12intsacc->can_restart()) {
1149
StateOutBin stateout(mole->checkpoint_file());
1150
SavableState::save_state(mole,stateout);
1151
ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
1154
#if PRINT_BIGGEST_INTS
1155
biggest_ints_1.combine(msg);
1156
biggest_ints_2.combine(msg);
1157
biggest_ints_2s.combine(msg);
1158
biggest_ints_3a.combine(msg);
1159
biggest_ints_3.combine(msg);
1162
#if PRINT_BIGGEST_INTS
1163
ExEnv::out0() << "biggest 1/4 transformed ints" << endl;
1164
for (int i=0; i<biggest_ints_1.ncontrib(); i++) {
1165
ExEnv::out0() << scprintf("%3d %3d %3d %3d %16.12f",
1166
biggest_ints_1.indices(i)[0],
1167
biggest_ints_1.indices(i)[1],
1168
biggest_ints_1.indices(i)[2],
1169
biggest_ints_1.indices(i)[3],
1170
biggest_ints_1.val(i)
1174
ExEnv::out0() << "biggest 2/4 transformed ints" << endl;
1175
for (int i=0; i<biggest_ints_2.ncontrib(); i++) {
1176
ExEnv::out0() << scprintf("%3d %3d %3d %3d %16.12f",
1177
biggest_ints_2.indices(i)[0],
1178
biggest_ints_2.indices(i)[1],
1179
biggest_ints_2.indices(i)[2],
1180
biggest_ints_2.indices(i)[3],
1181
biggest_ints_2.val(i)
1185
ExEnv::out0() << "restricted 2/4 transformed ints" << endl;
1186
for (int i=0; i<biggest_ints_2s.ncontrib(); i++) {
1187
ExEnv::out0() << scprintf("%3d %3d %3d %3d %16.12f",
1188
biggest_ints_2s.indices(i)[0],
1189
biggest_ints_2s.indices(i)[1],
1190
biggest_ints_2s.indices(i)[2],
1191
biggest_ints_2s.indices(i)[3],
1192
biggest_ints_2s.val(i)
1196
ExEnv::out0() << "biggest 3/4 transformed ints (in 3.)" << endl;
1197
for (int i=0; i<biggest_ints_3a.ncontrib(); i++) {
1198
ExEnv::out0() << scprintf("%3d %3d %3d %3d %16.12f",
1199
biggest_ints_3a.indices(i)[0],
1200
biggest_ints_3a.indices(i)[1],
1201
biggest_ints_3a.indices(i)[2],
1202
biggest_ints_3a.indices(i)[3],
1203
biggest_ints_3a.val(i)
1207
ExEnv::out0() << "biggest 3/4 transformed ints (in 4.)" << endl;
1208
for (int i=0; i<biggest_ints_3.ncontrib(); i++) {
1209
ExEnv::out0() << scprintf("%3d %3d %3d %3d %16.12f",
1210
biggest_ints_3.indices(i)[0],
1211
biggest_ints_3.indices(i)[1],
1212
biggest_ints_3.indices(i)[2],
1213
biggest_ints_3.indices(i)[3],
1214
biggest_ints_3.val(i)
1220
// Print out various energies etc.
1223
ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
1224
<< indent << "would have been computed without bounds checking: "
1225
<< npass*nshell*(nshell+1)*nshell*nshell_aux/2
1228
ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n"
1229
<< indent << "were computed: " << aoint_computed
1233
/*--------------------------
1235
--------------------------*/
1243
for (int i=0; i<thr->nthread(); i++) {
1244
delete e123thread[i];
1246
delete[] e123thread;
1249
delete[] tbints; tbints = 0;
1250
delete[] scf_vector;
1251
delete[] scf_vector_dat;
1253
tim_exit("r12a-abs-mem");
1257
if (me == 0 && mole->if_to_checkpoint()) {
1258
StateOutBin stateout(mole->checkpoint_file());
1259
SavableState::save_state(mole,stateout);
1260
ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
1267
///////////////////////////////////////////////////////
1268
// Compute the batchsize for the transformation
1270
// Only arrays allocated before exiting the loop over
1271
// i-batches are included here - only these arrays
1272
// affect the batch size.
1273
///////////////////////////////////////////////////////
1275
R12IntEval_abs_A::compute_transform_batchsize_(size_t mem_alloc, size_t mem_static, int nocc_act, const int num_te_types)
1277
// Check is have enough for even static objects
1279
if (mem_alloc <= mem_static)
1282
mem_dyn = mem_alloc - mem_static;
1284
// Determine if calculation is possible at all (i.e., if ni=1 possible)
1286
distsize_t maxdyn = compute_transform_dynamic_memory_(ni, nocc_act, num_te_types);
1287
if (maxdyn > mem_dyn) {
1292
while (ni<=nocc_act) {
1293
maxdyn = compute_transform_dynamic_memory_(ni, nocc_act, num_te_types);
1294
if (maxdyn >= mem_dyn) {
1300
if (ni > nocc_act) ni = nocc_act;
1305
//////////////////////////////////////////////////////
1306
// Compute required (dynamic) memory
1307
// for a given batch size of the transformation
1309
// Only arrays allocated before exiting the loop over
1310
// i-batches are included here - only these arrays
1311
// affect the batch size.
1312
//////////////////////////////////////////////////////
1314
R12IntEval_abs_A::compute_transform_dynamic_memory_(int ni, int nocc_act, const int num_te_types)
1316
int nproc = r12info()->msg()->n();
1318
///////////////////////////////////////
1319
// the largest memory requirement will
1320
// occur just before
1321
// the end of the i-batch loop (mem)
1322
///////////////////////////////////////
1324
// compute nij as nij on node 0, since nij on node 0 is >= nij on other nodes
1327
for (int i=0; i<ni; i++) {
1328
for (int j=0; j<nocc_act; j++) {
1329
if (index++ % nproc == 0) nij++;
1333
int nbasis = r12info()->basis()->nbasis();
1334
int nfuncmax = r12info()->basis()->max_nfunction_in_shell();
1335
int nbasis_aux = r12info()->basis_aux()->nbasis();
1336
int nfuncmax_aux = r12info()->basis_aux()->max_nfunction_in_shell();
1337
int nthread = r12info()->thr()->nthread();
1338
int nocc = r12info()->nocc();
1340
distsize_t memsize = sizeof(double)*(num_te_types*((distsize_t)nthread * ni * nbasis * nfuncmax * nfuncmax_aux // iqrs
1341
+ (distsize_t)ni * nocc * nfuncmax_aux * nfuncmax_aux // ikrs
1342
+ (distsize_t)nij * nocc * nbasis_aux // ikjs - buffer of 3 q.t. and higher
1343
// transformed integrals
1345
+ (distsize_t)nocc * nfuncmax_aux // ks
1346
+ (distsize_t)nocc * nbasis_aux // kx
1353
////////////////////////////////////////////////////////////////////////////
1357
// c-file-style: "CLJ-CONDENSED"