4
// ***********************************************************************
7
// Copyright (2006) Sandia Corporation
9
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
10
// the U.S. Government retains certain rights in this software.
12
// This library is free software; you can redistribute it and/or modify
13
// it under the terms of the GNU Lesser General Public License as
14
// published by the Free Software Foundation; either version 2.1 of the
15
// License, or (at your option) any later version.
17
// This library is distributed in the hope that it will be useful, but
18
// WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
26
// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
27
// (etphipp@sandia.gov).
29
// ***********************************************************************
33
#include "Sacado_Fad_SimpleFad.hpp"
34
#include "Sacado_CacheFad_DFad.hpp"
37
#include "TinyFadET/tfad.h"
39
#include "Teuchos_Time.hpp"
40
#include "Teuchos_CommandLineProcessor.hpp"
50
#ifdef PACKAGE_BUGREPORT
51
#undef PACKAGE_BUGREPORT
56
#ifdef PACKAGE_TARNAME
57
#undef PACKAGE_TARNAME
59
#ifdef PACKAGE_VERSION
60
#undef PACKAGE_VERSION
65
#define ADOLC_TAPELESS
66
#define NUMBER_DIRECTIONS 100
67
#include "adolc/adouble.h"
68
#include "adolc/drivers/drivers.h"
69
#include "adolc/interfaces.h"
72
// A performance test that computes a finite-element-like Jacobian using
73
// several Fad variants
76
Sacado::Fad::MemPool* Sacado::Fad::MemPoolStorage<double>::defaultPool_ = NULL;
78
void FAD::error(const char *msg) {
79
std::cout << msg << std::endl;
83
static const unsigned int nqp = 2;
84
static const unsigned int nnode = 2;
85
double w[nqp], jac[nqp], phi[nqp][nnode], dphi[nqp][nnode];
86
unsigned int gid[nnode];
88
ElemData(double mesh_spacing) {
91
xi[0] = -1.0 / std::sqrt(3.0);
92
xi[1] = 1.0 / std::sqrt(3.0);
94
for (unsigned int i=0; i<nqp; i++) {
98
// Element transformation Jacobian
99
jac[i] = 0.5*mesh_spacing;
101
// Shape functions & derivatives
102
phi[i][0] = 0.5*(1.0 - xi[i]);
103
phi[i][1] = 0.5*(1.0 + xi[i]);
110
template <class FadType>
111
void fad_init_fill(const ElemData& e,
113
const std::vector<double>& x,
114
std::vector<FadType>& x_fad) {
115
for (unsigned int node=0; node<e.nnode; node++)
116
for (unsigned int eqn=0; eqn<neqn; eqn++)
117
x_fad[node*neqn+eqn] = FadType(e.nnode*neqn, node*neqn+eqn,
118
x[e.gid[node]*neqn+eqn]);
122
#ifndef ADOLC_TAPELESS
123
void adolc_init_fill(bool retape,
127
const std::vector<double>& x,
128
std::vector<double>& x_local,
129
std::vector<adouble>& x_ad) {
132
for (unsigned int node=0; node<e.nnode; node++)
133
for (unsigned int eqn=0; eqn<neqn; eqn++) {
134
x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
136
x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
142
void adolc_tapeless_init_fill(const ElemData& e,
144
const std::vector<double>& x,
145
std::vector<adtl::adouble>& x_ad) {
146
for (unsigned int node=0; node<e.nnode; node++)
147
for (unsigned int eqn=0; eqn<neqn; eqn++) {
148
x_ad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
149
for (unsigned int i=0; i<neqn*e.nnode; i++)
150
x_ad[node*neqn+eqn].setADValue(i, 0.0);
151
x_ad[node*neqn+eqn].setADValue(node*neqn+eqn, 1.0);
158
void analytic_init_fill(const ElemData& e,
160
const std::vector<double>& x,
161
std::vector<double>& x_local) {
162
for (unsigned int node=0; node<e.nnode; node++)
163
for (unsigned int eqn=0; eqn<neqn; eqn++)
164
x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
168
void template_element_fill(const ElemData& e,
170
const std::vector<T>& x,
174
// Construct element solution, derivative
175
for (unsigned int qp=0; qp<e.nqp; qp++) {
176
for (unsigned int eqn=0; eqn<neqn; eqn++) {
177
u[qp*neqn+eqn] = 0.0;
178
du[qp*neqn+eqn] = 0.0;
179
for (unsigned int node=0; node<e.nnode; node++) {
180
u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
181
du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
186
// Compute sum of equations for coupling
187
std::vector<T> s(e.nqp*neqn);
188
for (unsigned int qp=0; qp<e.nqp; qp++) {
189
for (unsigned int eqn=0; eqn<neqn; eqn++) {
190
s[qp*neqn+eqn] = 0.0;
191
for (unsigned int j=0; j<neqn; j++) {
193
s[qp*neqn+eqn] += u[qp*neqn+j];
198
// Evaluate element residual
199
for (unsigned int node=0; node<e.nnode; node++) {
200
for (unsigned int eqn=0; eqn<neqn; eqn++) {
201
unsigned int row = node*neqn+eqn;
203
for (unsigned int qp=0; qp<e.nqp; qp++) {
205
e.w[qp]*e.jac[qp]*(-(1.0/(e.jac[qp]*e.jac[qp]))*
206
du[qp*neqn+eqn]*e.dphi[qp][node] +
207
e.phi[qp][node]*(exp(u[qp*neqn+eqn]) +
208
u[qp*neqn+eqn]*s[qp*neqn+eqn]));
214
void analytic_element_fill(const ElemData& e,
216
const std::vector<double>& x,
217
std::vector<double>& u,
218
std::vector<double>& du,
219
std::vector<double>& f,
220
std::vector< std::vector<double> >& jac) {
221
// Construct element solution, derivative
222
for (unsigned int qp=0; qp<e.nqp; qp++) {
223
for (unsigned int eqn=0; eqn<neqn; eqn++) {
224
u[qp*neqn+eqn] = 0.0;
225
du[qp*neqn+eqn] = 0.0;
226
for (unsigned int node=0; node<e.nnode; node++) {
227
u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
228
du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
233
// Compute sum of equations for coupling
234
std::vector<double> s(e.nqp*neqn);
235
for (unsigned int qp=0; qp<e.nqp; qp++) {
236
for (unsigned int eqn=0; eqn<neqn; eqn++) {
237
s[qp*neqn+eqn] = 0.0;
238
for (unsigned int j=0; j<neqn; j++) {
240
s[qp*neqn+eqn] += u[qp*neqn+j];
245
// Evaluate element residual
246
for (unsigned int node_row=0; node_row<e.nnode; node_row++) {
247
for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
248
unsigned int row = node_row*neqn+eqn_row;
250
for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
251
for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
252
unsigned int col = node_col*neqn+eqn_col;
256
for (unsigned int qp=0; qp<e.nqp; qp++) {
257
double c1 = e.w[qp]*e.jac[qp];
258
double c2 = -(1.0/(e.jac[qp]*e.jac[qp]))*e.dphi[qp][node_row];
259
double c3 = e.phi[qp][node_row]*(exp(u[qp*neqn+eqn_row]));
260
double c4 = e.phi[qp][node_row]*u[qp*neqn+eqn_row];
261
double c5 = e.phi[qp][node_row]*s[qp*neqn+eqn_row];
264
f[row] += c1*(c2*du[qp*neqn+eqn_row] + c3 + c4*s[qp*neqn+eqn_row]);
265
for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
266
jac[row][node_col*neqn+eqn_row] +=
267
c1*(c2*e.dphi[qp][node_col] + c35*e.phi[qp][node_col]);
268
for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
269
if (eqn_col != eqn_row)
270
jac[row][node_col*neqn+eqn_col] += c14*e.phi[qp][node_col];
278
template <class FadType>
279
void fad_process_fill(const ElemData& e,
281
const std::vector<FadType>& f_fad,
282
std::vector<double>& f,
283
std::vector< std::vector<double> >& jac) {
284
for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
285
f[e.gid[0]*neqn+eqn_row] += f_fad[0*neqn+eqn_row].val();
286
f[e.gid[1]*neqn+eqn_row] += f_fad[1*neqn+eqn_row].val();
287
for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
288
for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
289
unsigned int col = node_col*neqn+eqn_col;
290
unsigned int next_col = (node_col+1)*neqn+eqn_col;
291
jac[e.gid[0]*neqn+eqn_row][next_col] +=
292
f_fad[0*neqn+eqn_row].fastAccessDx(col);
293
jac[e.gid[1]*neqn+eqn_row][col] +=
294
f_fad[1*neqn+eqn_row].fastAccessDx(col);
301
#ifndef ADOLC_TAPELESS
302
void adolc_process_fill(bool retape,
306
std::vector<double>& x_local,
307
std::vector<adouble>& f_ad,
308
std::vector<double>& f,
309
std::vector<double>& f_local,
310
std::vector< std::vector<double> >& jac,
312
double **jac_local) {
314
for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++)
315
f_ad[0*neqn+eqn_row] >>= f_local[0*neqn+eqn_row];
316
for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++)
317
f_ad[1*neqn+eqn_row] >>= f_local[1*neqn+eqn_row];
321
//jacobian(tag, neqn*e.nnode, neqn*e.nnode, &x_local[0], jac_local);
322
forward(tag, neqn*e.nnode, neqn*e.nnode, neqn*e.nnode, &x_local[0],
323
seed, &f_local[0], jac_local);
325
for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
326
f[e.gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
327
f[e.gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
328
for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
329
for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
330
unsigned int col = node_col*neqn+eqn_col;
331
unsigned int next_col = (node_col+1)*neqn+eqn_col;
332
jac[e.gid[0]*neqn+eqn_row][next_col] += jac_local[0*neqn+eqn_row][col];
333
jac[e.gid[1]*neqn+eqn_row][col] += jac_local[1*neqn+eqn_row][col];
341
void adolc_tapeless_process_fill(const ElemData& e,
343
const std::vector<adtl::adouble>& f_ad,
344
std::vector<double>& f,
345
std::vector< std::vector<double> >& jac) {
346
for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
347
f[e.gid[0]*neqn+eqn_row] += f_ad[0*neqn+eqn_row].getValue();
348
f[e.gid[1]*neqn+eqn_row] += f_ad[1*neqn+eqn_row].getValue();
349
for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
350
for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
351
unsigned int col = node_col*neqn+eqn_col;
352
unsigned int next_col = (node_col+1)*neqn+eqn_col;
353
jac[e.gid[0]*neqn+eqn_row][next_col] +=
354
f_ad[0*neqn+eqn_row].getADValue(col);
355
jac[e.gid[1]*neqn+eqn_row][col] +=
356
f_ad[1*neqn+eqn_row].getADValue(col);
364
void analytic_process_fill(const ElemData& e,
366
const std::vector<double>& f_local,
367
const std::vector< std::vector<double> >& jac_local,
368
std::vector<double>& f,
369
std::vector< std::vector<double> >& jac) {
370
for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
371
f[e.gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
372
f[e.gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
373
for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
374
for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
375
unsigned int col = node_col*neqn+eqn_col;
376
unsigned int next_col = (node_col+1)*neqn+eqn_col;
377
jac[e.gid[0]*neqn+eqn_row][next_col] += jac_local[0*neqn+eqn_row][col];
378
jac[e.gid[1]*neqn+eqn_row][col] += jac_local[1*neqn+eqn_row][col];
384
void residual_process_fill(const ElemData& e,
386
const std::vector<double>& f_local,
387
std::vector<double>& f) {
388
for (unsigned int eqn_row=0; eqn_row<neqn; eqn_row++) {
389
f[e.gid[0]*neqn+eqn_row] += f_local[0*neqn+eqn_row];
390
f[e.gid[1]*neqn+eqn_row] += f_local[1*neqn+eqn_row];
394
template <class FadType>
395
double fad_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
396
double mesh_spacing) {
397
ElemData e(mesh_spacing);
399
// Solution vector, residual, jacobian
400
std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
401
std::vector< std::vector<double> > jac(num_nodes*num_eqns);
402
for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
403
for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
404
unsigned int row = node_row*num_eqns + eqn_row;
405
x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
407
jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
408
for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
409
for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) {
410
unsigned int col = node_col*num_eqns + eqn_col;
417
Teuchos::Time timer("FE Fad Jacobian Fill", false);
419
std::vector<FadType> x_fad(e.nnode*num_eqns), f_fad(e.nnode*num_eqns);
420
std::vector<FadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
421
for (unsigned int i=0; i<num_nodes-1; i++) {
425
fad_init_fill(e, num_eqns, x, x_fad);
426
template_element_fill(e, num_eqns, x_fad, u, du, f_fad);
427
fad_process_fill(e, num_eqns, f_fad, f, jac);
431
// std::cout << "Fad Residual = " << std::endl;
432
// for (unsigned int i=0; i<num_nodes*num_eqns; i++)
433
// std::cout << "\t" << f[i] << std::endl;
435
// std::cout.precision(8);
436
// std::cout << "Fad Jacobian = " << std::endl;
437
// for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
438
// std::cout << "\t";
439
// for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
440
// std::cout << jac[i][j] << "\t";
441
// std::cout << std::endl;
444
return timer.totalElapsedTime();
448
#ifndef ADOLC_TAPELESS
449
double adolc_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
450
double mesh_spacing) {
451
ElemData e(mesh_spacing);
453
// Solution vector, residual, jacobian
454
std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
455
std::vector< std::vector<double> > jac(num_nodes*num_eqns);
456
for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
457
for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
458
unsigned int row = node_row*num_eqns + eqn_row;
459
x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
461
jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
462
for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
463
for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) {
464
unsigned int col = node_col*num_eqns + eqn_col;
471
Teuchos::Time timer("FE ADOL-C Jacobian Fill", false);
473
std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
474
std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
475
std::vector<double> x_local(e.nnode*num_eqns);
476
std::vector<double> f_local(e.nnode*num_eqns);
477
double **jac_local = new double*[e.nnode*num_eqns];
478
double **seed = new double*[e.nnode*num_eqns];
479
for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
480
jac_local[i] = new double[e.nnode*num_eqns];
481
seed[i] = new double[e.nnode*num_eqns];
482
for (unsigned int j=0; j<e.nnode*num_eqns; j++)
487
// Tape first element
490
adolc_init_fill(true, 0, e, num_eqns, x, x_local, x_ad);
491
template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
492
adolc_process_fill(true, 0, e, num_eqns, x_local, f_ad, f, f_local,
493
jac, seed, jac_local);
495
// Now do remaining fills reusing tape
496
for (unsigned int i=1; i<num_nodes-1; i++) {
500
adolc_init_fill(false, 0, e, num_eqns, x, x_local, x_ad);
501
adolc_process_fill(false, 0, e, num_eqns, x_local, f_ad, f, f_local,
502
jac, seed, jac_local);
504
for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
505
delete [] jac_local[i];
512
// std::cout << "ADOL-C Residual = " << std::endl;
513
// for (unsigned int i=0; i<num_nodes*num_eqns; i++)
514
// std::cout << "\t" << f[i] << std::endl;
516
// std::cout.precision(8);
517
// std::cout << "ADOL-C Jacobian = " << std::endl;
518
// for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
519
// std::cout << "\t";
520
// for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
521
// std::cout << jac[i][j] << "\t";
522
// std::cout << std::endl;
525
return timer.totalElapsedTime();
528
double adolc_retape_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
529
double mesh_spacing) {
530
ElemData e(mesh_spacing);
532
// Solution vector, residual, jacobian
533
std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
534
std::vector< std::vector<double> > jac(num_nodes*num_eqns);
535
for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
536
for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
537
unsigned int row = node_row*num_eqns + eqn_row;
538
x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
540
jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
541
for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
542
for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) {
543
unsigned int col = node_col*num_eqns + eqn_col;
550
Teuchos::Time timer("FE ADOL-C Retape Jacobian Fill", false);
552
std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
553
std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
554
std::vector<double> x_local(e.nnode*num_eqns);
555
std::vector<double> f_local(e.nnode*num_eqns);
556
double **jac_local = new double*[e.nnode*num_eqns];
557
double **seed = new double*[e.nnode*num_eqns];
558
for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
559
jac_local[i] = new double[e.nnode*num_eqns];
560
seed[i] = new double[e.nnode*num_eqns];
561
for (unsigned int j=0; j<e.nnode*num_eqns; j++)
565
for (unsigned int i=0; i<num_nodes-1; i++) {
569
adolc_init_fill(true, 1, e, num_eqns, x, x_local, x_ad);
570
template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
571
adolc_process_fill(true, 1, e, num_eqns, x_local, f_ad, f, f_local,
572
jac, seed, jac_local);
574
for (unsigned int i=0; i<e.nnode*num_eqns; i++) {
575
delete [] jac_local[i];
582
// std::cout << "ADOL-C Residual (retaped) = " << std::endl;
583
// for (unsigned int i=0; i<num_nodes*num_eqns; i++)
584
// std::cout << "\t" << f[i] << std::endl;
586
// std::cout.precision(8);
587
// std::cout << "ADOL-C Jacobian (retaped) = " << std::endl;
588
// for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
589
// std::cout << "\t";
590
// for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
591
// std::cout << jac[i][j] << "\t";
592
// std::cout << std::endl;
595
return timer.totalElapsedTime();
600
double adolc_tapeless_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
601
double mesh_spacing) {
602
ElemData e(mesh_spacing);
604
// Solution vector, residual, jacobian
605
std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
606
std::vector< std::vector<double> > jac(num_nodes*num_eqns);
607
for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
608
for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
609
unsigned int row = node_row*num_eqns + eqn_row;
610
x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
612
jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
613
for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
614
for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) {
615
unsigned int col = node_col*num_eqns + eqn_col;
622
Teuchos::Time timer("FE Tapeless ADOL-C Jacobian Fill", false);
624
std::vector<adtl::adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
625
std::vector<adtl::adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
626
for (unsigned int i=0; i<num_nodes-1; i++) {
630
adolc_tapeless_init_fill(e, num_eqns, x, x_ad);
631
template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
632
adolc_tapeless_process_fill(e, num_eqns, f_ad, f, jac);
636
// std::cout << "Tapeless ADOL-C Residual = " << std::endl;
637
// for (unsigned int i=0; i<num_nodes*num_eqns; i++)
638
// std::cout << "\t" << f[i] << std::endl;
640
// std::cout.precision(8);
641
// std::cout << "Tapeless ADOL-C Jacobian = " << std::endl;
642
// for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
643
// std::cout << "\t";
644
// for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
645
// std::cout << jac[i][j] << "\t";
646
// std::cout << std::endl;
649
return timer.totalElapsedTime();
654
double analytic_jac_fill(unsigned int num_nodes, unsigned int num_eqns,
655
double mesh_spacing) {
656
ElemData e(mesh_spacing);
658
// Solution vector, residual, jacobian
659
std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
660
std::vector< std::vector<double> > jac(num_nodes*num_eqns);
661
for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
662
for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
663
unsigned int row = node_row*num_eqns + eqn_row;
664
x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
666
jac[row] = std::vector<double>((e.nnode+1)*num_eqns);
667
for (unsigned int node_col=0; node_col<e.nnode+1; node_col++) {
668
for (unsigned int eqn_col=0; eqn_col<num_eqns; eqn_col++) {
669
unsigned int col = node_col*num_eqns + eqn_col;
676
Teuchos::Time timer("FE Analytic Jacobian Fill", false);
678
std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
679
std::vector< std::vector<double> > jac_local(e.nnode*num_eqns);
680
std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
681
for (unsigned int i=0; i<e.nnode*num_eqns; i++)
682
jac_local[i] = std::vector<double>(e.nnode*num_eqns);
683
for (unsigned int i=0; i<num_nodes-1; i++) {
687
analytic_init_fill(e, num_eqns, x, x_local);
688
analytic_element_fill(e, num_eqns, x_local, u, du, f_local, jac_local);
689
analytic_process_fill(e, num_eqns, f_local, jac_local, f, jac);
693
// std::cout.precision(8);
694
// std::cout << "Analytic Residual = " << std::endl;
695
// for (unsigned int i=0; i<num_nodes*num_eqns; i++)
696
// std::cout << "\t" << f[i] << std::endl;
698
// std::cout.precision(8);
699
// std::cout << "Analytic Jacobian = " << std::endl;
700
// for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
701
// std::cout << "\t";
702
// for (unsigned int j=0; j<(e.nnode+1)*num_eqns; j++)
703
// std::cout << jac[i][j] << "\t";
704
// std::cout << std::endl;
707
return timer.totalElapsedTime();
710
double residual_fill(unsigned int num_nodes, unsigned int num_eqns,
711
double mesh_spacing) {
712
ElemData e(mesh_spacing);
714
// Solution vector, residual, jacobian
715
std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
716
for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
717
for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
718
unsigned int row = node_row*num_eqns + eqn_row;
719
x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
724
Teuchos::Time timer("FE Residual Fill", false);
726
std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
727
std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
728
for (unsigned int i=0; i<num_nodes-1; i++) {
732
analytic_init_fill(e, num_eqns, x, x_local);
733
template_element_fill(e, num_eqns, x_local, u, du, f_local);
734
residual_process_fill(e, num_eqns, f_local, f);
738
// std::cout.precision(8);
739
// std::cout << "Analytic Residual = " << std::endl;
740
// for (unsigned int i=0; i<num_nodes*num_eqns; i++)
741
// std::cout << "\t" << f[i] << std::endl;
743
return timer.totalElapsedTime();
746
int main(int argc, char* argv[]) {
754
// Maximum number of derivative components for SLFad
755
const int slfad_max = 100;
757
// Set up command line options
758
Teuchos::CommandLineProcessor clp;
759
clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
760
int num_nodes = 100000;
763
clp.setOption("n", &num_nodes, "Number of nodes");
764
clp.setOption("p", &num_eqns, "Number of equations");
765
clp.setOption("rt", &rt, "Include ADOL-C retaping test");
768
Teuchos::CommandLineProcessor::EParseCommandLineReturn
769
parseReturn= clp.parse(argc, argv);
770
if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
773
double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1);
775
// Memory pool & manager
776
Sacado::Fad::MemPoolManager<double> poolManager(num_nodes*num_eqns);
777
Sacado::Fad::MemPool* pool = poolManager.getMemoryPool(num_nodes*num_eqns);
778
Sacado::Fad::DMFad<double>::setDefaultPool(pool);
780
std::cout.setf(std::ios::scientific);
781
std::cout.precision(p);
782
std::cout << "num_nodes = " << num_nodes
783
<< ", num_eqns = " << num_eqns << ": " << std::endl
784
<< " " << " Time" << "\t"<< "Time/Analytic" << "\t"
785
<< "Time/Residual" << std::endl;
790
tr = residual_fill(num_nodes, num_eqns, mesh_spacing);
792
ta = analytic_jac_fill(num_nodes, num_eqns, mesh_spacing);
793
std::cout << "Analytic: " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/tr << std::endl;
796
#ifndef ADOLC_TAPELESS
797
t = adolc_jac_fill(num_nodes, num_eqns, mesh_spacing);
798
std::cout << "ADOL-C: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
801
t = adolc_retape_jac_fill(num_nodes, num_eqns, mesh_spacing);
802
std::cout << "ADOL-C(rt):" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
806
t = adolc_tapeless_jac_fill(num_nodes, num_eqns, mesh_spacing);
807
std::cout << "ADOL-C(tl):" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
811
if (num_eqns*2 == 4) {
812
t = fad_jac_fill< FAD::TFad<4,double> >(num_nodes, num_eqns, mesh_spacing);
813
std::cout << "TFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
815
else if (num_eqns*2 == 40) {
816
t = fad_jac_fill< FAD::TFad<40,double> >(num_nodes, num_eqns, mesh_spacing);
817
std::cout << "TFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
820
t = fad_jac_fill< FAD::Fad<double> >(num_nodes, num_eqns, mesh_spacing);
821
std::cout << "Fad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
823
if (num_eqns*2 == 4) {
824
t = fad_jac_fill< Sacado::Fad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
825
std::cout << "SFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
827
else if (num_eqns*2 == 40) {
828
t = fad_jac_fill< Sacado::Fad::SFad<double,40> >(num_nodes, num_eqns, mesh_spacing);
829
std::cout << "SFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
832
if (num_eqns*2 < slfad_max) {
833
t = fad_jac_fill< Sacado::Fad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
834
std::cout << "SLFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
837
t = fad_jac_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
838
std::cout << "DFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
840
t = fad_jac_fill< Sacado::Fad::SimpleFad<double> >(num_nodes, num_eqns, mesh_spacing);
841
std::cout << "SimpleFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
843
t = fad_jac_fill< Sacado::Fad::DMFad<double> >(num_nodes, num_eqns, mesh_spacing);
844
std::cout << "DMFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
846
if (num_eqns*2 == 4) {
847
t = fad_jac_fill< Sacado::ELRFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
848
std::cout << "ELRSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
850
else if (num_eqns*2 == 40) {
851
t = fad_jac_fill< Sacado::ELRFad::SFad<double,40> >(num_nodes, num_eqns, mesh_spacing);
852
std::cout << "ELRSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
855
if (num_eqns*2 < slfad_max) {
856
t = fad_jac_fill< Sacado::ELRFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
857
std::cout << "ELRSLFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
860
t = fad_jac_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
861
std::cout << "ELRDFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
863
t = fad_jac_fill< Sacado::CacheFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
864
std::cout << "CacheFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
866
t = fad_jac_fill< Sacado::Fad::DVFad<double> >(num_nodes, num_eqns, mesh_spacing);
867
std::cout << "DVFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
870
catch (std::exception& e) {
871
cout << e.what() << endl;
874
catch (const char *s) {
879
cout << "Caught unknown exception!" << endl;