1
/*============================================================================
3
* This file is part of the Code_Saturne Kernel, element of the
4
* Code_Saturne CFD tool.
6
* Copyright (C) 1998-2009 EDF S.A., France
8
* contact: saturne-support@edf.fr
10
* The Code_Saturne Kernel is free software; you can redistribute it
11
* and/or modify it under the terms of the GNU General Public License
12
* as published by the Free Software Foundation; either version 2 of
13
* the License, or (at your option) any later version.
15
* The Code_Saturne Kernel is distributed in the hope that it will be
16
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
17
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
* GNU General Public License for more details.
20
* You should have received a copy of the GNU General Public License
21
* along with the Code_Saturne Kernel; if not, write to the
22
* Free Software Foundation, Inc.,
23
* 51 Franklin St, Fifth Floor,
24
* Boston, MA 02110-1301 USA
26
*============================================================================*/
28
/*============================================================================
29
* Sparse Linear Equation Solvers
30
*============================================================================*/
32
#if defined(HAVE_CONFIG_H)
33
#include "cs_config.h"
36
/*----------------------------------------------------------------------------
37
* Standard C library headers
38
*----------------------------------------------------------------------------*/
51
/*----------------------------------------------------------------------------
53
*----------------------------------------------------------------------------*/
56
#include <bft_error.h>
57
#include <bft_printf.h>
58
#include <bft_timer.h>
60
/*----------------------------------------------------------------------------
62
*----------------------------------------------------------------------------*/
64
/*----------------------------------------------------------------------------
66
*----------------------------------------------------------------------------*/
71
#include "cs_matrix.h"
75
/*----------------------------------------------------------------------------
76
* Header for the current file
77
*----------------------------------------------------------------------------*/
81
/*----------------------------------------------------------------------------*/
85
/*=============================================================================
86
* Local Macro Definitions
87
*============================================================================*/
92
#if !defined(HUGE_VAL)
93
#define HUGE_VAL 1.E+12
96
/*=============================================================================
97
* Local Structure Definitions
98
*============================================================================*/
100
/* Basic per linear system options and logging */
101
/*---------------------------------------------*/
103
typedef struct _cs_sles_info_t {
105
char *name; /* System name */
106
cs_sles_type_t type; /* Solver type */
108
unsigned n_calls; /* Number of times system solved */
110
unsigned n_iterations_last; /* Number of iterations for last
112
unsigned n_iterations_min; /* Minimum number ot iterations
113
in system resolution history */
114
unsigned n_iterations_max; /* Maximum number ot iterations
115
in system resolution history */
116
unsigned long long n_iterations_tot; /* Total accumulated number of
119
double wt_tot; /* Total wall-clock time used */
120
double cpu_tot; /* Total (local) CPU used */
124
/* Convergence testing and tracking */
125
/*----------------------------------*/
127
typedef struct _cs_sles_convergence_t {
129
int verbosity; /* Verbosity level */
131
unsigned n_iterations; /* Current number of iterations */
132
unsigned n_iterations_max; /* Maximum number of iterations */
134
double precision; /* Precision limit */
135
double r_norm; /* Residue normalization */
136
double residue; /* Current residue */
137
double initial_residue; /* Initial residue */
139
} cs_sles_convergence_t;
141
/*============================================================================
143
*============================================================================*/
145
static int cs_glob_sles_n_systems = 0; /* Current number of systems */
146
static int cs_glob_sles_n_max_systems = 0; /* Max. number of sytems for
147
cs_glob_sles_systems. */
149
static cs_sles_info_t **cs_glob_sles_systems = NULL; /* System info array */
152
Matrix structures re-used for various resolutions.
154
These structures are kept throughout the whole run, to avoid paying the
155
CPU overhead for their construction at each system resolution
156
(at the cost of extra memory use, depending on the chosen structure).
158
Two simultaneous matrixes may be needed for some solvers: one for the
159
linear system, one for the preconditionner.
160
We always have at least one structure of "native" matrix type, as
161
this type incurs negligible memory and assignment cpu overhead;
162
we use it for Jacobi (where the number of iterations done is often
163
small, and an assignment cost equivalent to a few matrix.vector
164
products may not be amortized).
167
cs_matrix_t *cs_glob_sles_base_matrix = NULL;
168
cs_matrix_t *cs_glob_sles_native_matrix = NULL;
170
/* Sparse linear equation solver type names */
172
/* Short names for solver types */
174
const char *cs_sles_type_name[] = {N_("Conjugate gradient"),
178
/*============================================================================
179
* Private function definitions
180
*============================================================================*/
182
/*----------------------------------------------------------------------------
183
* Return pointer to new linear system info structure.
186
* name <-- system name
187
* type <-- resolution method
190
* pointer to newly created linear system info structure
191
*----------------------------------------------------------------------------*/
193
static cs_sles_info_t *
194
_sles_info_create(const char *name,
197
cs_sles_info_t *new_info = NULL;
199
BFT_MALLOC(new_info, 1, cs_sles_info_t);
200
BFT_MALLOC(new_info->name, strlen(name) + 1, char);
202
strcpy(new_info->name, name);
203
new_info->type = type;
205
new_info->n_calls = 0;
206
new_info->n_iterations_min = 0;
207
new_info->n_iterations_max = 0;
208
new_info->n_iterations_last = 0;
209
new_info->n_iterations_tot = 0;
211
new_info->wt_tot = 0.0;
212
new_info->cpu_tot = 0.0;
217
/*----------------------------------------------------------------------------
218
* Destroy linear system info structure.
221
* this_info <-> pointer to linear system info structure pointer
222
*----------------------------------------------------------------------------*/
225
_sles_info_destroy(cs_sles_info_t **this_info)
227
if (*this_info != NULL) {
228
BFT_FREE((*this_info)->name);
229
BFT_FREE(*this_info);
233
/*----------------------------------------------------------------------------
234
* Output information regarding linear system resolution.
237
* this_info <-> pointer to linear system info structure
238
*----------------------------------------------------------------------------*/
241
_sles_info_dump(cs_sles_info_t *this_info)
243
int n_calls = this_info->n_calls;
244
int n_it_min = this_info->n_iterations_min;
245
int n_it_max = this_info->n_iterations_max;
246
int n_it_mean = (int)( this_info->n_iterations_tot
247
/ ((unsigned long long)n_calls));
250
"Summary of resolutions for %s (%s):\n"
252
" Number of calls: %d\n"
253
" Minimum number of iterations: %d\n"
254
" Maximum number of iterations: %d\n"
255
" Mean number of iterations: %d\n"
256
" Total elapsed time: %12.3f\n"),
257
this_info->name, cs_sles_type_name[this_info->type],
258
n_calls, n_it_min, n_it_max, n_it_mean,
261
#if defined(HAVE_MPI)
263
if (cs_glob_n_ranks > 1) {
265
double cpu_min, cpu_max, cpu_tot;
266
double cpu_loc = this_info->cpu_tot;
268
MPI_Allreduce(&cpu_loc, &cpu_min, 1, MPI_DOUBLE, MPI_MIN,
270
MPI_Allreduce(&cpu_loc, &cpu_max, 1, MPI_DOUBLE, MPI_MAX,
272
MPI_Allreduce(&cpu_loc, &cpu_tot, 1, MPI_DOUBLE, MPI_SUM,
275
bft_printf(_(" Min local total CPU time: %12.3f\n"
276
" Max local total CPU time: %12.3f\n"
277
" Total CPU time: %12.3f\n"),
278
cpu_min, cpu_max, cpu_tot);
284
if (cs_glob_n_ranks == 1)
285
bft_printf(_(" Total CPU time: %12.3f\n"),
289
/*----------------------------------------------------------------------------
290
* Return pointer to linear system info.
292
* If this system did not previously exist, it is added to the list of
296
* name <-- system name
297
* type <-- resolution method
298
*----------------------------------------------------------------------------*/
300
static cs_sles_info_t *
301
_find_or_add_system(const char *name,
304
int ii, start_id, end_id, mid_id;
307
/* Use binary search to find system */
310
end_id = cs_glob_sles_n_systems - 1;
311
mid_id = start_id + ((end_id -start_id) / 2);
313
while (start_id <= end_id) {
314
cmp_ret = strcmp((cs_glob_sles_systems[mid_id])->name, name);
316
cmp_ret = (cs_glob_sles_systems[mid_id])->type - type;
318
start_id = mid_id + 1;
319
else if (cmp_ret > 0)
323
mid_id = start_id + ((end_id -start_id) / 2);
326
/* If found, return */
329
return cs_glob_sles_systems[mid_id];
331
/* Reallocate global array if necessary */
333
if (cs_glob_sles_n_systems >= cs_glob_sles_n_max_systems) {
335
if (cs_glob_sles_n_max_systems == 0)
336
cs_glob_sles_n_max_systems = 10;
338
cs_glob_sles_n_max_systems *= 2;
339
BFT_REALLOC(cs_glob_sles_systems,
340
cs_glob_sles_n_max_systems,
345
/* Insert in sorted list */
347
for (ii = cs_glob_sles_n_systems; ii > mid_id; ii--)
348
cs_glob_sles_systems[ii] = cs_glob_sles_systems[ii - 1];
350
cs_glob_sles_systems[mid_id] = _sles_info_create(name,
352
cs_glob_sles_n_systems += 1;
354
return cs_glob_sles_systems[mid_id];
357
/*----------------------------------------------------------------------------
358
* Initialize or reset convergence info structure.
361
* solver_name <-- solver name
362
* var_name <-- variable name
363
* convergence <-> Convergence info structure
364
* verbosity <-- Verbosity level
365
* n_iter_max <-- Maximum number of iterations
366
* precision <-- Precision limit
367
* r_norm <-- Residue normalization
368
* residue <-- Initial residue
369
*----------------------------------------------------------------------------*/
372
_convergence_init(cs_sles_convergence_t *convergence,
373
const char *solver_name,
374
const char *var_name,
381
convergence->verbosity = verbosity;
383
convergence->n_iterations = 0;
384
convergence->n_iterations_max = n_iter_max;
386
convergence->precision = precision;
387
convergence->r_norm = r_norm;
388
convergence->residue = residue;
389
convergence->initial_residue = residue;
392
bft_printf("%s [%s]:\n", solver_name, var_name);
394
bft_printf(_(" n_iter res_abs res_nor\n"));
399
/*----------------------------------------------------------------------------
403
* solver_name <-- solver name
404
* var_name <-- variable name
405
* n_iter <-- Number of iterations done
406
* residue <-- Non normalized residue
407
* convergence <-> Convergence information structure
410
* 1 if converged, 0 if not converged, -1 if not converged and maximum
411
* iteration number reached, -2 if divergence is detected.
412
*----------------------------------------------------------------------------*/
415
_convergence_test(const char *solver_name,
416
const char *var_name,
419
cs_sles_convergence_t *convergence)
421
const int verbosity = convergence->verbosity;
423
const char final_fmt[]
424
= N_(" n_iter : %5d, res_abs : %11.4e, res_nor : %11.4e\n");
426
/* Update conversion info structure */
428
convergence->n_iterations = n_iter;
429
convergence->residue = residue;
431
/* Print convergence values if high verbosity */
434
bft_printf(" %5d %11.4e %11.4e\n",
435
n_iter, residue, residue/convergence->r_norm);
437
/* If not converged */
439
if (residue > convergence->precision * convergence->r_norm) {
441
if (n_iter < convergence->n_iterations_max) {
443
if (residue > convergence->initial_residue * 10000.0 && residue > 100.)
445
#if (_CS_STDC_VERSION >= 199901L)
446
else if (isnan(residue) || isinf(residue))
451
"%s [%s]: divergence after %u iterations:\n"
452
" initial residual: %11.4e; current residual: %11.4e\n"),
453
solver_name, var_name,
454
convergence->n_iterations,
455
convergence->initial_residue, convergence->residue);
463
if (verbosity == 1) /* Already output if verbosity > 1 */
464
bft_printf("%s [%s]:\n", solver_name, var_name);
465
if (verbosity <= 2) /* Already output if verbosity > 2 */
466
bft_printf(_(final_fmt),
467
n_iter, residue, residue/convergence->r_norm);
468
bft_printf(_(" @@ Warning: non convergence\n"));
478
if (verbosity == 2) /* Already output if verbosity > 2 */
479
bft_printf(final_fmt, n_iter, residue, residue/convergence->r_norm);
485
/*----------------------------------------------------------------------------
486
* Compute dot product, summing result over all ranks.
489
* n_elts <-- Local number of elements
490
* x <-- first vector in s = x.y
491
* y <-- second vector in s = x.y
495
*----------------------------------------------------------------------------*/
498
_dot_product(cs_int_t n_elts,
502
double s = cblas_ddot(n_elts, x, 1, y, 1);
504
#if defined(HAVE_MPI)
506
if (cs_glob_n_ranks > 1) {
508
MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, cs_glob_mpi_comm);
512
#endif /* defined(HAVE_MPI) */
517
/*----------------------------------------------------------------------------
518
* Compute 2 dot products, summing result over all ranks.
521
* n_elts <-- Local number of elements
522
* x1 <-- first vector in s1 = x1.y1
523
* y1 <-- second vector in s1 = x1.y1
524
* x2 <-- first vector in s2 = x2.y2
525
* y2 <-- second vector in s2 = x2.y2
526
* s1 --> result of s1 = x1.y1
527
* s2 --> result of s2 = x2.y2
528
*----------------------------------------------------------------------------*/
531
_dot_products_2(cs_int_t n_elts,
542
/* If a term appears in both dot products, we do not use the BLAS,
543
as grouping both dot products in a same loop allows for better
544
cache use and often better performance than separate dot products
545
with even optimized BLAS */
547
if (x1 == x2 || x1 == y2 || y1 == x2 || y1 == y2) {
549
/* Use temporary variables to help some compilers optimize */
550
double _s0 = 0.0, _s1 = 0.0;
551
for (ii = 0; ii < n_elts; ii++) {
552
_s0 += x1[ii] * y1[ii];
553
_s1 += x2[ii] * y2[ii];
555
s[0] = _s0; s[1] = _s1;
560
s[0] = cblas_ddot(n_elts, x1, 1, y1, 1);
561
s[1] = cblas_ddot(n_elts, x2, 1, y2, 1);
565
#if defined(HAVE_MPI)
567
if (cs_glob_n_ranks > 1) {
569
MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, cs_glob_mpi_comm);
574
#endif /* defined(HAVE_MPI) */
580
/*----------------------------------------------------------------------------
581
* Compute y <- x + alpha.y
584
* n <-- Number of elements in vectors x, y, z
585
* alpha <-- Scalar alpha
586
* x <-- Vector x (size: n)
587
* y <-> Vector y (size: n)
588
*----------------------------------------------------------------------------*/
593
cs_real_t *restrict x,
594
cs_real_t *restrict y)
596
#if defined(HAVE_ESSL)
598
dzaxpy(n, alpha, y, 1, x, 1, y, 1);
606
#pragma disjoint(alpha, *x, *y)
609
for (ii = 0; ii < n; ii++)
610
y[ii] = x[ii] + (alpha * y[ii]);
617
/*----------------------------------------------------------------------------
618
* Residue preconditioning Gk = C.Rk
620
* To increase the polynomial order with no major overhead, we may
621
* "implicit" the resolution using a red-black mesh coloring.
625
* 1: Gk = (1/ad - (1/ad).ax.(1/ad)).Rk
626
* 2: Gk = (1/ad - (1/ad).ax.(1/ad) + (1/ad).ax.(1/ad).ax.(1/ad)).Rk
629
* n_cells <-- Local number of cells
630
* poly_degree <-- preconditioning polynomial degree (0: diagonal)
631
* rotation_mode <-- Halo update option for rotational periodicity
632
* ad_inv <-- Inverse of matrix diagonal
633
* ax <-- Non-diagonal part of linear equation matrix
634
* rk <-- Residue vector
635
* gk --> Result vector
636
* wk --- Working array
637
*----------------------------------------------------------------------------*/
640
_polynomial_preconditionning(cs_int_t n_cells,
642
cs_perio_rota_t rotation_mode,
643
const cs_real_t *ad_inv,
644
const cs_matrix_t *ax,
646
cs_real_t *restrict gk,
647
cs_real_t *restrict wk)
653
#pragma disjoint(*ad_inv, *rk, *gk, *wk)
656
/* Polynomial of degree 0 (diagonal)
657
*-----------------------------------*/
659
for (ii = 0; ii < n_cells; ii++)
660
gk[ii] = rk[ii] * ad_inv[ii];
662
/* Polynomial of degree n
663
*-----------------------
666
* gk = ((1/ad) - (1/ad).ax.(1/ad) + (1/ad).ax.(1/ad).ax.(1/ad) + ... ).rk
669
for (deg_id = 1; deg_id <= poly_degree; deg_id++) {
671
/* Compute Wk = (A-diag).Gk */
673
cs_matrix_vector_multiply(rotation_mode, ax, gk, wk);
675
for (ii = 0; ii < n_cells; ii++)
676
gk[ii] = (rk[ii] - wk[ii]) * ad_inv[ii];
682
/*----------------------------------------------------------------------------
683
* Solution of (ad+ax).vx = Rhs using preconditioned conjugate gradient.
685
* Single-processor-optimized version.
687
* On entry, vx is considered initialized.
690
* var_name <-- Variable name
692
* ax <-- Non-diagonal part of linear equation matrix
693
* (only necessary if poly_degree > 0)
694
* poly_degree <-- Preconditioning polynomial degree (0: diagonal)
695
* rotation_mode <-- Halo update option for rotational periodicity
696
* convergence <-- Convergence information structure
697
* rhs <-- Right hand side
698
* vx --> System solution
699
* aux_size <-- Number of elements in aux_vectors
700
* aux_vectors --- Optional working area (allocation otherwise)
703
* 1 if converged, 0 if not converged, -1 if not converged and maximum
704
* iteration number reached, -2 if divergence is detected.
705
*----------------------------------------------------------------------------*/
708
_conjugate_gradient_sp(const char *var_name,
709
const cs_matrix_t *a,
710
const cs_matrix_t *ax,
712
cs_perio_rota_t rotation_mode,
713
cs_sles_convergence_t *convergence,
714
const cs_real_t *rhs,
715
cs_real_t *restrict vx,
719
const char *sles_name;
721
cs_int_t n_cols, n_rows, ii;
722
double ro_0, ro_1, alpha, rk_gkm1, rk_gk, beta, residue;
723
cs_real_t *_aux_vectors;
724
cs_real_t *restrict rk, *restrict dk, *restrict gk;
725
cs_real_t *restrict zk, *restrict wk, *restrict ad_inv;
729
/* Tell IBM compiler not to alias */
731
#pragma disjoint(*rhs, *vx, *rk, *dk, *gk, *zk, *wk, *ad_inv)
734
/* Preliminary calculations */
735
/*--------------------------*/
737
sles_name = _(cs_sles_type_name[CS_SLES_PCG]);
739
n_cols = cs_matrix_get_n_columns(a);
740
n_rows = cs_matrix_get_n_rows(a);
742
/* Allocate work arrays */
746
size_t wa_size = n_cols;
751
if (aux_vectors == NULL || aux_size < (wa_size * n_wa))
752
BFT_MALLOC(_aux_vectors, wa_size * n_wa, cs_real_t);
754
_aux_vectors = aux_vectors;
756
ad_inv = _aux_vectors;
758
rk = _aux_vectors + wa_size;
759
dk = _aux_vectors + wa_size*2;
760
gk = _aux_vectors + wa_size*3;
761
zk = _aux_vectors + wa_size*4;
764
wk = _aux_vectors + wa_size*5;
769
cs_matrix_get_diagonal(a, ad_inv);
770
for (ii = 0; ii < n_rows; ii++)
771
ad_inv[ii] = 1.0 / ad_inv[ii];
773
/* Initialize work arrays (not necessary, just for debugging) */
775
for (ii = 0; ii < n_rows; ii++) {
782
/* Initialize iterative calculation */
783
/*----------------------------------*/
785
/* Residue and descent direction */
787
cs_matrix_vector_multiply(rotation_mode, a, vx, rk);
789
for (ii = 0; ii < n_rows; ii++) {
790
rk[ii] = rk[ii] - rhs[ii];
794
/* Polynomial preconditionning of order poly_degre */
796
_polynomial_preconditionning(n_rows,
805
/* Descent direction */
806
/*-------------------*/
808
memcpy(dk, gk, n_rows * sizeof(cs_real_t));
810
rk_gkm1 = _dot_product(n_rows, rk, gk);
812
cs_matrix_vector_multiply(rotation_mode, a, dk, zk);
814
/* Descent parameter */
816
_dot_products_2(n_rows, rk, dk, dk, zk, &ro_0, &ro_1);
818
alpha = - ro_0 / ro_1;
820
cblas_daxpy(n_rows, alpha, dk, 1, vx, 1);
821
cblas_daxpy(n_rows, alpha, zk, 1, rk, 1);
823
/* Convergence test */
825
residue = sqrt(_dot_product(n_rows, rk, rk));
827
cvg = _convergence_test(sles_name, var_name,
828
n_iter, residue, convergence);
830
/* Current Iteration */
831
/*-------------------*/
837
_polynomial_preconditionning(n_rows,
846
/* Descent parameter */
848
rk_gk = _dot_product(n_rows, rk, gk);
850
/* Complete descent parameter computation and matrix.vector product */
852
beta = rk_gk / rk_gkm1;
855
_y_aypx(n_rows, beta, gk, dk); /* dk <- gk + (beta.dk) */
857
cs_matrix_vector_multiply(rotation_mode, a, dk, zk);
859
_dot_products_2(n_rows, rk, dk, dk, zk, &ro_0, &ro_1);
861
alpha = - ro_0 / ro_1;
863
cblas_daxpy(n_rows, alpha, dk, 1, vx, 1);
864
cblas_daxpy(n_rows, alpha, zk, 1, rk, 1);
866
/* Convergence test */
868
residue = sqrt(_dot_product(n_rows, rk, rk));
870
cvg = _convergence_test(sles_name, var_name,
871
n_iter, residue, convergence);
875
if (_aux_vectors != aux_vectors)
876
BFT_FREE(_aux_vectors);
881
/*----------------------------------------------------------------------------
882
* Solution of (ad+ax).vx = Rhs using preconditioned conjugate gradient.
884
* Parallel-optimized version, groups dot products, at the cost of
885
* computation of the preconditionning for n+1 iterations instead of n.
887
* On entry, vx is considered initialized.
890
* var_name <-- Variable name
892
* ax <-- Non-diagonal part of linear equation matrix
893
* (only necessary if poly_degree > 0)
894
* poly_degree <-- Preconditioning polynomial degree (0: diagonal)
895
* rotation_mode <-- Halo update option for rotational periodicity
896
* convergence <-- Convergence information structure
897
* rhs <-- Right hand side
898
* vx --> System solution
899
* aux_size <-- Number of elements in aux_vectors
900
* aux_vectors --- Optional working area (allocation otherwise)
903
* 1 if converged, 0 if not converged, -1 if not converged and maximum
904
* iteration number reached, -2 if divergence is detected.
905
*----------------------------------------------------------------------------*/
908
_conjugate_gradient_mp(const char *var_name,
909
const cs_matrix_t *a,
910
const cs_matrix_t *ax,
912
cs_perio_rota_t rotation_mode,
913
cs_sles_convergence_t *convergence,
914
const cs_real_t *rhs,
915
cs_real_t *restrict vx,
919
const char *sles_name;
921
cs_int_t n_cols, n_rows, ii;
922
double ro_0, ro_1, alpha, rk_gkm1, rk_gk, beta, residue;
923
cs_real_t *_aux_vectors;
924
cs_real_t *restrict rk, *restrict dk, *restrict gk;
925
cs_real_t *restrict zk, *restrict wk, *restrict ad_inv;
929
/* Tell IBM compiler not to alias */
931
#pragma disjoint(*rhs, *vx, *rk, *dk, *gk, *zk, *wk, *ad_inv)
934
/* Preliminary calculations */
935
/*--------------------------*/
937
sles_name = _(cs_sles_type_name[CS_SLES_PCG]);
939
n_cols = cs_matrix_get_n_columns(a);
940
n_rows = cs_matrix_get_n_rows(a);
942
/* Allocate work arrays */
946
size_t wa_size = n_cols;
951
if (aux_vectors == NULL || aux_size < (wa_size * n_wa))
952
BFT_MALLOC(_aux_vectors, wa_size * n_wa, cs_real_t);
954
_aux_vectors = aux_vectors;
956
ad_inv = _aux_vectors;
958
rk = _aux_vectors + wa_size;
959
dk = _aux_vectors + wa_size*2;
960
gk = _aux_vectors + wa_size*3;
961
zk = _aux_vectors + wa_size*4;
964
wk = _aux_vectors + wa_size*5;
969
cs_matrix_get_diagonal(a, ad_inv);
970
for (ii = 0; ii < n_rows; ii++)
971
ad_inv[ii] = 1.0 / ad_inv[ii];
973
/* Initialize work arrays (not necessary, just for debugging) */
975
for (ii = 0; ii < n_rows; ii++) {
982
/* Initialize iterative calculation */
983
/*----------------------------------*/
985
/* Residue and descent direction */
987
cs_matrix_vector_multiply(rotation_mode, a, vx, rk);
989
for (ii = 0; ii < n_rows; ii++) {
990
rk[ii] = rk[ii] - rhs[ii];
994
/* Polynomial preconditionning of order poly_degre */
996
_polynomial_preconditionning(n_rows,
1005
/* Descent direction */
1006
/*-------------------*/
1008
memcpy(dk, gk, n_rows * sizeof(cs_real_t));
1010
rk_gkm1 = _dot_product(n_rows, rk, gk);
1012
cs_matrix_vector_multiply(rotation_mode, a, dk, zk);
1014
/* Descent parameter */
1016
_dot_products_2(n_rows, rk, dk, dk, zk, &ro_0, &ro_1);
1018
alpha = - ro_0 / ro_1;
1020
cblas_daxpy(n_rows, alpha, dk, 1, vx, 1);
1021
cblas_daxpy(n_rows, alpha, zk, 1, rk, 1);
1023
/* Convergence test */
1025
residue = sqrt(_dot_product(n_rows, rk, rk));
1027
cvg = _convergence_test(sles_name, var_name,
1028
n_iter, residue, convergence);
1030
/* Current Iteration */
1031
/*-------------------*/
1035
_polynomial_preconditionning(n_rows,
1044
/* compute residue and prepare descent parameter */
1046
_dot_products_2(n_rows, rk, rk, rk, gk, &residue, &rk_gk);
1048
residue = sqrt(residue);
1050
/* Convergence test for end of previous iteration */
1053
cvg = _convergence_test(sles_name, var_name,
1054
n_iter, residue, convergence);
1061
/* Complete descent parameter computation and matrix.vector product */
1063
beta = rk_gk / rk_gkm1;
1066
_y_aypx(n_rows, beta, gk, dk); /* dk <- gk + (beta.dk) */
1068
cs_matrix_vector_multiply(rotation_mode, a, dk, zk);
1070
_dot_products_2(n_rows, rk, dk, dk, zk, &ro_0, &ro_1);
1072
alpha = - ro_0 / ro_1;
1074
cblas_daxpy(n_rows, alpha, dk, 1, vx, 1);
1075
cblas_daxpy(n_rows, alpha, zk, 1, rk, 1);
1079
if (_aux_vectors != aux_vectors)
1080
BFT_FREE(_aux_vectors);
1085
/*----------------------------------------------------------------------------
1086
* Solution of (ad+ax).vx = Rhs using Jacobi.
1088
* On entry, vx is considered initialized.
1091
* var_name <-- Variable name
1092
* ad <-- Diagonal part of linear equation matrix
1093
* ax <-- Non-diagonal part of linear equation matrix
1094
* rotation_mode <-- Halo update option for rotational periodicity
1095
* convergence <-- Convergence information structure
1096
* rhs <-- Right hand side
1097
* vx --> System solution
1098
* aux_size <-- Number of elements in aux_vectors
1099
* aux_vectors --- Optional working area (allocation otherwise)
1102
* 1 if converged, 0 if not converged, -1 if not converged and maximum
1103
* iteration number reached, -2 if divergence is detected.
1104
*----------------------------------------------------------------------------*/
1107
_jacobi(const char *var_name,
1108
const cs_real_t *restrict ad,
1109
const cs_matrix_t *ax,
1110
cs_perio_rota_t rotation_mode,
1111
cs_sles_convergence_t *convergence,
1112
const cs_real_t *rhs,
1113
cs_real_t *restrict vx,
1117
const char *sles_name;
1119
cs_int_t n_cols, n_rows, ii;
1120
double res2, residue;
1121
cs_real_t *_aux_vectors;
1122
cs_real_t *restrict ad_inv, *restrict rk;
1124
unsigned n_iter = 0;
1126
/* Tell IBM compiler not to alias */
1127
#if defined(__xlc__)
1128
#pragma disjoint(*rhs, *vx, *ad, *ad_inv)
1131
/* Preliminary calculations */
1132
/*--------------------------*/
1134
sles_name = _(cs_sles_type_name[CS_SLES_JACOBI]);
1136
n_cols = cs_matrix_get_n_columns(ax);
1137
n_rows = cs_matrix_get_n_rows(ax);
1139
/* Allocate work arrays */
1142
size_t wa_size = n_cols;
1144
if (aux_vectors == NULL || aux_size < (wa_size * 2))
1145
BFT_MALLOC(_aux_vectors, wa_size * 2, cs_real_t);
1147
_aux_vectors = aux_vectors;
1149
ad_inv = _aux_vectors;
1150
rk = _aux_vectors + wa_size;
1153
for (ii = 0; ii < n_rows; ii++)
1154
ad_inv[ii] = 1.0 / ad[ii];
1158
/* Current iteration */
1159
/*-------------------*/
1165
memcpy(rk, vx, n_rows * sizeof(cs_real_t)); /* rk <- vx */
1167
memcpy(vx, rhs, n_rows * sizeof(cs_real_t)); /* vx <- rhs */
1168
for (ii = n_rows; ii < n_cols; ii++)
1171
/* Compute Vx <- Vx - (A-diag).Rk */
1173
cs_matrix_alpha_a_x_p_beta_y(rotation_mode, -1.0, 1.0, ax, rk, vx);
1175
for (ii = 0; ii < n_rows; ii++)
1176
vx[ii] = vx[ii] * ad_inv[ii];
1178
/* Compute residue */
1182
for (ii = 0; ii < n_rows; ii++) {
1183
register double r = ad[ii] * (vx[ii]-rk[ii]);
1187
#if defined(HAVE_MPI)
1189
if (cs_glob_n_ranks > 1) {
1191
MPI_Allreduce(&res2, &_sum, 1, MPI_DOUBLE, MPI_SUM,
1196
#endif /* defined(HAVE_MPI) */
1198
residue = sqrt(res2);
1200
/* Convergence test */
1202
cvg = _convergence_test(sles_name, var_name,
1203
n_iter, residue, convergence);
1207
if (_aux_vectors != aux_vectors)
1208
BFT_FREE(_aux_vectors);
1213
/*----------------------------------------------------------------------------
1214
* Solution of (ad+ax).vx = Rhs using preconditioned Bi-CGSTAB.
1216
* Parallel-optimized version, groups dot products, at the cost of
1217
* computation of the preconditionning for n+1 iterations instead of n.
1219
* On entry, vx is considered initialized.
1222
* var_name <-- Variable name
1224
* ax <-- Non-diagonal part of linear equation matrix
1225
* (only necessary if poly_degree > 0)
1226
* poly_degree <-- Preconditioning polynomial degree (0: diagonal)
1227
* rotation_mode <-- Halo update option for rotational periodicity
1228
* convergence <-- Convergence information structure
1229
* rhs <-- Right hand side
1230
* vx --> System solution
1231
* aux_size <-- Number of elements in aux_vectors
1232
* aux_vectors --- Optional working area (allocation otherwise)
1235
* 1 if converged, 0 if not converged, -1 if not converged and maximum
1236
* iteration number reached, -2 if divergence is detected.
1237
*----------------------------------------------------------------------------*/
1240
_bi_cgstab(const char *var_name,
1241
const cs_matrix_t *a,
1242
const cs_matrix_t *ax,
1244
cs_perio_rota_t rotation_mode,
1245
cs_sles_convergence_t *convergence,
1246
const cs_real_t *rhs,
1247
cs_real_t *restrict vx,
1251
const char *sles_name;
1253
cs_int_t n_cols, n_rows, ii;
1254
double _epzero = 1.e-30; /* smaller than epzero */
1255
double ro_0, ro_1, alpha, beta, betam1, gamma, omega, ukres0;
1257
cs_real_t *_aux_vectors;
1258
cs_real_t *restrict res0, *restrict rk, *restrict pk, *restrict zk;
1259
cs_real_t *restrict uk, *restrict vk, *restrict ad_inv;
1261
unsigned n_iter = 0;
1263
/* Tell IBM compiler not to alias */
1264
#if defined(__xlc__)
1265
#pragma disjoint(*rhs, *vx, *res0, *rk, *pk, *zk, *uk, *vk, *ad_inv)
1268
/* Preliminary calculations */
1269
/*--------------------------*/
1271
sles_name = _(cs_sles_type_name[CS_SLES_BICGSTAB]);
1273
n_cols = cs_matrix_get_n_columns(a);
1274
n_rows = cs_matrix_get_n_rows(a);
1276
/* Allocate work arrays */
1280
size_t wa_size = n_cols;
1282
if (aux_vectors == NULL || aux_size < (wa_size * n_wa))
1283
BFT_MALLOC(_aux_vectors, wa_size * n_wa, cs_real_t);
1285
_aux_vectors = aux_vectors;
1287
ad_inv = _aux_vectors;
1289
res0 = _aux_vectors + wa_size;
1290
rk = _aux_vectors + wa_size*2;
1291
pk = _aux_vectors + wa_size*3;
1292
zk = _aux_vectors + wa_size*4;
1293
uk = _aux_vectors + wa_size*5;
1294
vk = _aux_vectors + wa_size*6;
1298
cs_matrix_get_diagonal(a, ad_inv);
1299
for (ii = 0; ii < n_rows; ii++)
1300
ad_inv[ii] = 1.0 / ad_inv[ii];
1302
/* Initialize work arrays (not necessary, just for debugging) */
1304
for (ii = 0; ii < n_rows; ii++) {
1313
/* Initialize iterative calculation */
1314
/*----------------------------------*/
1316
cs_matrix_vector_multiply(rotation_mode, a, vx, res0);
1318
for (ii = 0; ii < n_rows; ii++) {
1319
res0[ii] = -res0[ii] + rhs[ii];
1329
/* Current Iteration */
1330
/*-------------------*/
1334
/* Compute beta and omega;
1335
group dot products for new iteration's beta
1336
and previous iteration's residue to reduce total latency */
1340
beta = _dot_product(n_rows, res0, rk);
1341
residue = sqrt(beta);
1346
_dot_products_2(n_rows, res0, rk, rk, rk, &beta, &residue);
1347
residue = sqrt(residue);
1349
/* Convergence test */
1350
cvg = _convergence_test(sles_name, var_name,
1351
n_iter, residue, convergence);
1359
if (CS_ABS(beta) < _epzero) {
1361
if (convergence->verbosity == 2)
1362
bft_printf(_(" n_iter : %5d, res_abs : %11.4e, res_nor : %11.4e\n"),
1363
n_iter, residue, residue/convergence->r_norm);
1364
else if (convergence->verbosity > 2)
1365
bft_printf(" %5d %11.4e %11.4e\n",
1366
n_iter, residue, residue/convergence->r_norm);
1372
if (CS_ABS(alpha) < _epzero) {
1376
" @@ Warning: non convergence and abort\n"
1378
" Alpha coefficient is lower than %12.4e\n"
1380
" The matrix cannot be considered as invertible anymore."),
1381
sles_name, var_name, alpha);
1386
omega = beta*gamma / (alpha*betam1);
1391
for (ii = 0; ii < n_rows; ii++)
1392
pk[ii] = rk[ii] + omega*(pk[ii] - alpha*uk[ii]);
1394
/* Compute zk = c.pk */
1396
_polynomial_preconditionning(n_rows,
1405
/* Compute uk = A.zk */
1407
cs_matrix_vector_multiply(rotation_mode, a, zk, uk);
1409
/* Compute uk.res0 and gamma */
1411
ukres0 = _dot_product(n_rows, uk, res0);
1413
gamma = beta / ukres0;
1415
/* First update of vx and rk */
1417
cblas_daxpy(n_rows, gamma, zk, 1, vx, 1);
1418
cblas_daxpy(n_rows, -gamma, uk, 1, rk, 1);
1420
/* Compute zk = C.rk (zk is overwritten, vk is a working array */
1422
_polynomial_preconditionning(n_rows,
1431
/* Compute vk = A.zk and alpha */
1433
cs_matrix_vector_multiply(rotation_mode, a, zk, vk);
1435
_dot_products_2(n_rows, vk, rk, vk, vk, &ro_0, &ro_1);
1437
if (ro_1 < _epzero) {
1441
" @@ Warning: non convergence and abort\n"
1443
" The square of the norm of the descent vector\n"
1444
" is lower than %12.4e\n"
1446
" The resolution does not progress anymore."),
1447
sles_name, var_name, _epzero);
1452
alpha = ro_0 / ro_1;
1454
/* Final update of vx and rk */
1456
cblas_daxpy(n_rows, alpha, zk, 1, vx, 1);
1457
cblas_daxpy(n_rows, -alpha, vk, 1, rk, 1);
1459
/* Convergence test at beginning of next iteration so
1460
as to group dot products for better parallel performance */
1463
if (_aux_vectors != aux_vectors)
1464
BFT_FREE(_aux_vectors);
1469
/*----------------------------------------------------------------------------
1470
* Output post-processing data for failed system convergence.
1473
* n_vals <-- Size of val and val_type array
1474
* val <-> Values to post-process (set to 0 on output if not
1475
* normal floating-point values)
1476
* val_type --> 0: normal values, 1: infinite, 2: Nan
1479
* number of non-normal values
1480
*----------------------------------------------------------------------------*/
1483
_value_type(size_t n_vals,
1490
#if (_CS_STDC_VERSION >= 199901L)
1492
for (ii = 0; ii < n_vals; ii++) {
1494
int v_type = fpclassify(val[ii]);
1496
if (v_type == FP_INFINITE) {
1502
else if (v_type == FP_NAN) {
1508
else if (val[ii] > 1.e38 || val[ii] < -1.e38) {
1520
for (ii = 0; ii < n_vals; ii++) {
1522
if (val[ii] != val[ii]) { /* Test for NaN with IEEE 754 arithmetic */
1528
else if (val[ii] > 1.e38 || val[ii] < -1.e38) {
1543
/*----------------------------------------------------------------------------
1544
* Compute per-cell residual for Ax = b.
1547
* symmetric <-- indicates if matrix values are symmetric
1548
* rotation_mode <-- Halo update option for rotational periodicity
1549
* ad <-- Diagonal part of linear equation matrix
1550
* ax <-- Non-diagonal part of linear equation matrix
1551
* rhs <-- Right hand side
1552
* vx <-> Current system solution
1554
*----------------------------------------------------------------------------*/
1557
_cell_residual(cs_bool_t symmetric,
1558
cs_perio_rota_t rotation_mode,
1559
const cs_real_t ad[],
1560
const cs_real_t ax[],
1561
const cs_real_t rhs[],
1567
const cs_int_t n_cells = cs_glob_mesh->n_cells;
1569
cs_matrix_t *a = cs_glob_sles_base_matrix;
1571
cs_matrix_set_coefficients(a, symmetric, ad, ax);
1573
cs_matrix_vector_multiply(rotation_mode, a, vx, res);
1575
for (ii = 0; ii < n_cells; ii++)
1576
res[ii] = fabs(res[ii] - rhs[ii]);
1579
/*----------------------------------------------------------------------------
1580
* Compute diagonal dominance metric.
1583
* symmetric <-- indicates if matrix values are symmetric
1584
* ad <-- Diagonal part of linear equation matrix
1585
* ax <-- Non-diagonal part of linear equation matrix
1586
* dd <-- Diagonal dominance (normalized)
1587
*----------------------------------------------------------------------------*/
1590
_diag_dominance(cs_bool_t symmetric,
1591
const cs_real_t ad[],
1592
const cs_real_t ax[],
1595
cs_int_t ii, jj, face_id;
1597
const cs_int_t n_cells = cs_glob_mesh->n_cells;
1598
const cs_int_t n_faces = cs_glob_mesh->n_i_faces;
1599
const cs_int_t *face_cel = cs_glob_mesh->i_face_cells;
1600
const cs_halo_t *halo = cs_glob_mesh->halo;
1602
/* Diagonal part of matrix.vector product */
1604
for (ii = 0; ii < n_cells; ii++)
1605
dd[ii] = fabs(ad[ii]);
1608
cs_halo_sync_var(halo, CS_HALO_STANDARD, dd);
1610
/* non-diagonal terms */
1613
for (face_id = 0; face_id < n_faces; face_id++) {
1614
ii = face_cel[2*face_id] -1;
1615
jj = face_cel[2*face_id + 1] -1;
1616
dd[ii] -= fabs(ax[face_id]);
1617
dd[jj] -= fabs(ax[face_id]);
1621
for (face_id = 0; face_id < n_faces; face_id++) {
1622
ii = face_cel[2*face_id] -1;
1623
jj = face_cel[2*face_id + 1] -1;
1624
dd[ii] -= fabs(ax[face_id]);
1625
dd[jj] -= fabs(ax[face_id + n_faces]);
1629
for (ii = 0; ii < n_cells; ii++) {
1630
if (fabs(ad[ii]) > 1.e-18)
1631
dd[ii] /= fabs(ad[ii]);
1635
/*============================================================================
1636
* Public function definitions for Fortran API
1637
*============================================================================*/
1639
/*----------------------------------------------------------------------------
1640
* General sparse linear system resolution
1641
*----------------------------------------------------------------------------*/
1643
void CS_PROCF(reslin, RESLIN)
1645
const char *cname, /* <-- variable name */
1646
const cs_int_t *lname, /* <-- variable name length */
1647
const cs_int_t *ncelet, /* <-- Number of cells, halo included */
1648
const cs_int_t *ncel, /* <-- Number of local cells */
1649
const cs_int_t *nfac, /* <-- Number of faces */
1650
const cs_int_t *isym, /* <-- Symmetry indicator:
1651
1: symmetric; 2: not symmetric */
1652
const cs_int_t *ireslp, /* <-- Resolution type:
1653
0: pcg; 1: Jacobi; 2: cg-stab */
1654
const cs_int_t *ipol, /* <-- Preconditioning polynomial degree
1656
const cs_int_t *nitmap, /* <-- Number of max iterations */
1657
const cs_int_t *iinvpe, /* <-- Indicator to cancel increments
1658
in rotational periodicty (2) or
1659
to exchange them as scalars (1) */
1660
const cs_int_t *iwarnp, /* <-- Verbosity level */
1661
cs_int_t *niterf, /* --> Number of iterations done */
1662
const cs_real_t *epsilp, /* <-- Precision for iterative resolution */
1663
const cs_real_t *rnorm, /* <-- Residue normalization */
1664
cs_real_t *residu, /* --> Final non normalized residue */
1665
const cs_int_t *ifacel, /* <-- Face -> cell connectivity */
1666
const cs_real_t *dam, /* <-- Matrix diagonal */
1667
const cs_real_t *xam, /* <-- Matrix extra-diagonal terms */
1668
const cs_real_t *rhs, /* <-- System right-hand side */
1669
cs_real_t *vx /* <-> System solution */
1673
cs_sles_type_t type;
1676
int n_iter = *niterf;
1677
cs_bool_t symmetric = (*isym == 1) ? true : false;
1678
cs_perio_rota_t rotation_mode = CS_PERIO_ROTA_COPY;
1680
assert(*ncelet >= *ncel);
1682
assert(ifacel != NULL);
1685
rotation_mode = CS_PERIO_ROTA_RESET;
1686
else if (*iinvpe == 3)
1687
rotation_mode = CS_PERIO_ROTA_IGNORE;
1689
var_name = cs_base_string_f_to_c_create(cname, *lname);
1691
switch ((int)(*ireslp)) {
1696
type = CS_SLES_JACOBI;
1699
type = CS_SLES_BICGSTAB;
1702
type = CS_SLES_N_TYPES;
1706
cvg = cs_sles_solve(var_name,
1712
cs_glob_sles_base_matrix,
1713
cs_glob_sles_native_matrix,
1729
/* If divergence is detected, try diagnostics and abort */
1733
int mesh_id = cs_post_init_error_writer_cells();
1735
cs_sles_post_error_output_def(var_name,
1746
bft_error(__FILE__, __LINE__, 0,
1747
_("%s: error (divergence) solving for %s"),
1748
_(cs_sles_type_name[type]), var_name);
1751
cs_base_string_f_to_c_free(&var_name);
1754
/*============================================================================
1755
* Public function definitions
1756
*============================================================================*/
1758
/*----------------------------------------------------------------------------
1759
* Initialize sparse linear equation solver API.
1760
*----------------------------------------------------------------------------*/
1763
cs_sles_initialize(void)
1765
cs_mesh_t *mesh = cs_glob_mesh;
1766
cs_bool_t periodic = false;
1768
assert(mesh != NULL);
1770
if (mesh->n_init_perio > 0)
1773
cs_glob_sles_base_matrix = cs_matrix_create(CS_MATRIX_NATIVE,
1778
mesh->n_cells_with_ghosts,
1780
mesh->global_cell_num,
1783
mesh->i_face_numbering);
1785
cs_glob_sles_native_matrix = cs_matrix_create(CS_MATRIX_NATIVE,
1790
mesh->n_cells_with_ghosts,
1792
mesh->global_cell_num,
1795
mesh->i_face_numbering);
1798
/*----------------------------------------------------------------------------
1799
* Finalize sparse linear equation solver API.
1800
*----------------------------------------------------------------------------*/
1803
cs_sles_finalize(void)
1807
/* Free system info */
1809
for (ii = 0; ii < cs_glob_sles_n_systems; ii++) {
1810
_sles_info_dump(cs_glob_sles_systems[ii]);
1811
_sles_info_destroy(&(cs_glob_sles_systems[ii]));
1814
BFT_FREE(cs_glob_sles_systems);
1816
cs_glob_sles_n_systems = 0;
1817
cs_glob_sles_n_max_systems = 0;
1819
/* Free matrix structures */
1821
cs_matrix_destroy(&cs_glob_sles_native_matrix);
1822
cs_matrix_destroy(&cs_glob_sles_base_matrix);
1825
/*----------------------------------------------------------------------------
1826
* Test if a general sparse linear system needs solving or if the right-hand
1827
* side is already zero within convergence criteria.
1829
* The computed residue is also updated;
1832
* var_name <-- Variable name
1833
* solver_name <-- Name of solver
1834
* n_rows <-- Number of (non ghost) rows in rhs
1835
* verbosity <-- Verbosity level
1836
* r_norm <-- Residue normalization
1837
* residue <-> Residue
1838
* rhs <-- Right hand side
1841
* 1 if solving is required, 0 if the rhs is already zero within tolerance
1842
* criteria (precision of residue normalization)
1843
*----------------------------------------------------------------------------*/
1846
cs_sles_needs_solving(const char *var_name,
1847
const char *solver_name,
1852
const cs_real_t *rhs)
1856
/* Initialize residue, check for immediate return */
1858
*residue = sqrt(_dot_product(n_rows, rhs, rhs));
1860
if (r_norm <= EPZERO || *residue <= EPZERO) {
1862
bft_printf(_("%s [%s]:\n"
1863
" immediate exit; r_norm = %11.4e, residual = %11.4e\n"),
1864
solver_name, var_name, r_norm, *residue);
1871
/*----------------------------------------------------------------------------
1872
* General sparse linear system resolution.
1874
* Note that in most cases (if the right-hand side is not already zero
1875
* within convergence criteria), coefficients are assigned to matrixes
1876
* then released by this function, so coefficients need not be assigned
1877
* prior to this call, and will have been released upon returning.
1880
* var_name <-- Variable name
1881
* solver_type <-- Type of solver (PCG, Jacobi, ...)
1882
* update_stats <-- Automatic solver statistics indicator
1883
* symmetric <-- Symmetric coefficients indicator
1884
* ad_coeffs <-- Diagonal coefficients of linear equation matrix
1885
* ax_coeffs <-- Non-diagonal coefficients of linear equation matrix
1887
* ax <-> Non-diagonal part of linear equation matrix
1888
* (only necessary if poly_degree > 0)
1889
* poly_degree <-- Preconditioning polynomial degree (0: diagonal)
1890
* rotation_mode <-- Halo update option for rotational periodicity
1891
* verbosity <-- Verbosity level
1892
* n_max_iter <-- Maximum number of iterations
1893
* precision <-- Precision limit
1894
* r_norm <-- Residue normalization
1895
* n_iter --> Number of iterations
1896
* residue <-> Residue
1897
* rhs <-- Right hand side
1898
* vx --> System solution
1899
* aux_size <-- Number of elements in aux_vectors
1900
* aux_vectors --- Optional working area (allocation otherwise)
1903
* 1 if converged, 0 if not converged, -1 if not converged and maximum
1904
* iteration number reached, -2 if divergence is detected.
1905
*----------------------------------------------------------------------------*/
1908
cs_sles_solve(const char *var_name,
1909
cs_sles_type_t solver_type,
1910
cs_bool_t update_stats,
1911
cs_bool_t symmetric,
1912
const cs_real_t *ad_coeffs,
1913
const cs_real_t *ax_coeffs,
1917
cs_perio_rota_t rotation_mode,
1924
const cs_real_t *rhs,
1930
unsigned _n_iter = 0;
1932
cs_sles_convergence_t convergence;
1934
cs_sles_info_t *sles_info = NULL;
1935
double wt_start = 0.0, wt_stop = 0.0;
1936
double cpu_start = 0.0, cpu_stop = 0.0;
1938
cs_matrix_t *_a = a;
1939
cs_matrix_t *_ax = ax;
1941
n_rows = cs_matrix_get_n_rows(a);
1943
if (update_stats == true) {
1944
wt_start =bft_timer_wtime();
1945
cpu_start =bft_timer_cpu_time();
1946
sles_info = _find_or_add_system(var_name, solver_type);
1949
/* Initialize number of iterations and residue,
1950
check for immediate return,
1951
and solve sparse linear system */
1955
if (cs_sles_needs_solving(var_name,
1956
_(cs_sles_type_name[solver_type]),
1963
/* Set matrix coefficients */
1965
if (solver_type == CS_SLES_JACOBI) {
1972
cs_matrix_set_coefficients(_ax, symmetric, NULL, ax_coeffs);
1975
else { /* if (solver_type != CS_SLES_JACOBI) */
1977
cs_matrix_set_coefficients(_a, symmetric, ad_coeffs, ax_coeffs);
1979
if (poly_degree > 0) {
1980
cs_matrix_set_coefficients(_ax, symmetric, NULL, ax_coeffs);
1984
/* Solve sparse linear system */
1986
_convergence_init(&convergence,
1987
_(cs_sles_type_name[solver_type]),
1995
switch (solver_type) {
1997
if (cs_glob_n_ranks == 1)
1998
cvg = _conjugate_gradient_sp(var_name,
2009
cvg = _conjugate_gradient_mp(var_name,
2020
case CS_SLES_JACOBI:
2021
cvg = _jacobi(var_name,
2031
case CS_SLES_BICGSTAB:
2032
cvg = _bi_cgstab(var_name,
2047
/* Release matrix coefficients */
2050
cs_matrix_release_coefficients(_a);
2052
cs_matrix_release_coefficients(_ax);
2054
/* Update return values */
2056
_n_iter = convergence.n_iterations;
2058
*n_iter = convergence.n_iterations;
2059
*residue = convergence.residue;
2062
if (update_stats == true) {
2064
wt_stop =bft_timer_wtime();
2065
cpu_stop =bft_timer_cpu_time();
2067
if (sles_info->n_calls == 0)
2068
sles_info->n_iterations_min = _n_iter;
2070
sles_info->n_calls += 1;
2072
if (sles_info->n_iterations_min > _n_iter)
2073
sles_info->n_iterations_min = _n_iter;
2074
if (sles_info->n_iterations_max < _n_iter)
2075
sles_info->n_iterations_max = _n_iter;
2077
sles_info->n_iterations_last = _n_iter;
2078
sles_info->n_iterations_tot += _n_iter;
2080
sles_info->wt_tot += (wt_stop - wt_start);
2081
sles_info->cpu_tot += (cpu_stop - cpu_start);
2087
/*----------------------------------------------------------------------------
2088
* Output default post-processing data for failed system convergence.
2091
* var_name <-- Variable name
2092
* mesh_id <-- id of error output mesh, or 0 if none
2093
* symmetric <-- indicates if matrix values are symmetric
2094
* rotation_mode <-- Halo update option for rotational periodicity
2095
* ad <-- Diagonal part of linear equation matrix
2096
* ax <-- Non-diagonal part of linear equation matrix
2097
* rhs <-- Right hand side
2098
* vx <-> Current system solution
2099
*----------------------------------------------------------------------------*/
2102
cs_sles_post_error_output_def(const char *var_name,
2104
cs_bool_t symmetric,
2105
cs_perio_rota_t rotation_mode,
2106
const cs_real_t *ad,
2107
const cs_real_t *ax,
2108
const cs_real_t *rhs,
2113
const cs_mesh_t *mesh = cs_glob_mesh;
2115
char base_name[32], val_name[32];
2118
const cs_int_t n_cells = mesh->n_cells;
2122
BFT_MALLOC(val, mesh->n_cells_with_ghosts, cs_real_t);
2124
for (val_id = 0; val_id < 5; val_id++) {
2129
strcpy(base_name, "Diag");
2130
memcpy(val, ad, n_cells*sizeof(cs_real_t));
2134
strcpy(base_name, "RHS");
2135
memcpy(val, rhs, n_cells*sizeof(cs_real_t));
2139
strcpy(base_name, "X");
2140
memcpy(val, vx, n_cells*sizeof(cs_real_t));
2144
strcpy(base_name, "Residual");
2145
_cell_residual(symmetric, rotation_mode, ad, ax, rhs, vx, val);
2149
strcpy(base_name, "Diag_Dom");
2150
_diag_dominance(symmetric, ad, ax, val);
2155
if (strlen(var_name) + strlen(base_name) < 31) {
2156
strcpy(val_name, base_name);
2157
strcat(val_name, "_");
2158
strcat(val_name, var_name);
2161
strncpy(val_name, base_name, 31);
2162
val_name[31] = '\0';
2165
cs_sles_post_error_output_var(val_name,
2175
/*----------------------------------------------------------------------------
2176
* Output post-processing variable for failed system convergence.
2179
* var_name <-- Variable name
2180
* mesh_id <-- id of error output mesh, or 0 if none
2181
* var <-- Variable values
2182
*----------------------------------------------------------------------------*/
2185
cs_sles_post_error_output_var(const char *var_name,
2191
const cs_mesh_t *mesh = cs_glob_mesh;
2194
const cs_int_t n_cells = mesh->n_cells;
2198
BFT_MALLOC(val_type, n_cells, int);
2200
n_non_norm = _value_type(n_cells, var, val_type);
2202
cs_post_write_var(mesh_id,
2205
false, /* no interlace */
2206
true, /* use parents */
2207
CS_POST_TYPE_cs_real_t,
2214
if (n_non_norm > 0) {
2217
size_t l = strlen(var_name);
2222
l -= strlen("_fp_type");
2224
strncpy(type_name, var_name, l);
2225
type_name[l] = '\0';
2227
strcat(type_name, "_fp_type");
2229
cs_post_write_var(mesh_id,
2232
false, /* no interlace */
2233
true, /* use parents */
2247
/*----------------------------------------------------------------------------*/