2351
* form_diag_manip class::
1094
* geo_domain class::
1095
* geo_domain_indirect class::
2352
1096
* space class::
2354
* form_manip class::
1097
* form_element class::
1098
* domain_indirect class::
2355
1100
* field class::
2360
1103
* tensor class::
2361
1104
* point class::
2363
* permutation class::
1107
* mpi_assembly_end class::
1108
* msg_left_permutation_apply class::
1109
* polymorphic_map class::
1110
* mpi_polymorphic_assembly_begin class::
1111
* polymorphic_array class::
1112
* mpi_polymorphic_assembly_end class::
1113
* distributor class::
1114
* msg_from_context_indices class::
1115
* msg_both_permutation_apply class::
1116
* msg_right_permutation_apply class::
1117
* mpi_assembly_begin class::
1121
* msg_local_optimize class::
1122
* msg_to_context class::
1125
* msg_from_context_pattern class::
1126
* msg_local_context class::
1127
* mpi_scatter_init class::
1129
* mpi_scatter_begin class::
1130
* mpi_scatter_end class::
2368
1131
* iorheo class::
2369
1132
* Vector class::
2370
1133
* catchmark class::
2371
1134
* rheostream class::
2374
File: rheolef.info, Node: cad class, Up: Classes
2376
5.1 `cad' - the boundary definition class
2377
=========================================
2379
(Source file: `nfem/lib/cad.h')
2384
The `cad' class defines a container for the description of the
2385
boundary of a geometry, by using Bezier patches.
2387
The aim is to handle high-level polynomial interpolation together
2388
with curved boundaries (non-polynomial). So, the description bases on
2389
Bezier curves and surfaces.
2391
Also, the adaptive mesh loop requieres interpolation on the boundary,
2392
and this class should facilitates such procedures.
2394
This class is actually under development.
2401
File format conversion
2402
----------------------
2404
Under development. Conversion to geomview and vtk for visualization.
2405
Also to and from qmg, grummp, modulef/ghs, opencascade for
2411
class cad : public smart_pointer<cad_rep> {
2415
typedef cad_rep::size_type size_type;
2421
cad (const std::string& file_name);
2425
size_type dimension() const;
2426
size_type n_face() const;
2428
const point& xmin() const;
2429
const point& xmax() const;
2431
void eval (const cad_element& S, const point& ref_coord, point & real_coord) const;
2432
point eval (const cad_element& S, const point& ref_coord) const;
2436
friend std::istream& operator >> (std::istream& is, cad& x);
2437
friend std::ostream& operator << (std::ostream& os, const cad& x);
2441
1137
File: rheolef.info, Node: geo class, Up: Classes
2443
5.2 `geo' - the finite element mesh class
2444
=========================================
1139
5.1 geo - finite element mesh
1140
=============================
2446
(Source file: `nfem/lib/geo.h')
1142
(Source file: `nfem/plib/geo.h')
2451
The `geo' class defines a container for a finite element mesh. This
2452
describes the nodes coordinates and the connectivity. `geo' can
2453
contains domains, usefull for boundary condition setting.
2458
A sample usage of the geo class writes
2461
cout << mayavi << full << omega;
2466
The empty constructor makes an empty geo. A string argument, as
2468
will recursively look for a `square.geo[.gz]' file in the directory
2469
mentionned by the RHEOPATH environment variable, while `gzip'
2470
decompression is assumed. If the file starts with `.' as `./square' or
2471
with a `/' as in `/home/oscar/square', no search occurs. Also, if the
2472
environment variable RHEOPATH is not set, the default value is the
2475
Input and output on streams are available, and manipulators works for
2476
text or graphic output (*note geo command::).
2478
Finally, an STL-like interface provides efficient accesses to nodes,
2479
elements and domains.
2484
The `geo_adapt' functions performs a mesh adaptation to improve the
2485
`P1'-Lagrange interpolation of the `field' given in argument (*note
2488
Axisymetric geometry
2489
--------------------
2491
The `coordinate_system' and `set_coordinate_system' members supports
2492
both `cartesian', `rz' and `zr' (axisymmetric) coordinate systems. This
2493
information is used by the `form' class (*note form class::).
2495
Access to connectivity
2496
----------------------
2498
The folowing code prints triangle vertex numbers
2499
geo omega ("circle");
2500
for (geo::const_iterator i = g.begin(); i != i.end(); i++) {
2501
const geo_element& K = *i;
2502
if (K.name() != 't') continue;
2503
for (geo::size_type j = 0; j < 3; j++)
2504
cout << K [j] << " ";
2507
*Note geo_element internal::.
2509
Access to vertice coordinates
2510
-----------------------------
2512
The folowing code prints vertices coordinate
2513
for (geo::const_iterator_vertex i = g.begin_vertex(); i != g.end_node(); i++) {
2514
const point& xi = *i;
2515
for (geo::size_type j = 0; j < g.dimension(); j++)
2516
cout << xi [j] << " ";
2523
The following code prints edges on domain:
2524
for (geo::const_iterator_domain i = g.begin_domain(); i != i.end_domain(); i++) {
2525
const domain& dom = *i;
2526
if (dom.dimension() != 2) continue;
2527
for (domain::const_iterator j = dom.begin(); j < dom.end(); j++) {
2528
const geo_element& E = *j;
2529
cout << E [0] << " " << E[1] << endl;
2533
*Note domain class::.
2538
RHEOPATH: search path for geo data file. Also *note form class::.
2543
class geo : public smart_pointer<georep> {
2548
typedef georep::plot_options plot_options;
2549
void write_gnuplot_postscript_options (std::ostream& plot, const plot_options& opt) const;
2551
typedef georep::iterator iterator;
2552
typedef georep::const_iterator const_iterator;
2553
typedef georep::elemlist_type elemlist_type;
2554
typedef georep::nodelist_type nodelist_type;
2555
typedef georep::size_type size_type;
2556
typedef georep::domlist_type domlist_type;
2558
typedef georep::const_iterator_node const_iterator_node;
2559
typedef georep::const_iterator_vertex const_iterator_vertex;
2560
typedef georep::const_iterator_domain const_iterator_domain;
2561
typedef georep::iterator_node iterator_node;
2562
typedef georep::iterator_vertex iterator_vertex;
2563
typedef georep::iterator_domain iterator_domain;
2565
// allocators/deallocators:
2567
explicit geo (const std::string& filename, const std::string& coord_sys = "cartesian");
2568
geo(const geo& g, const domain& d);
2569
geo(const nodelist_type& p, const geo& g);
2572
friend geo geo_adapt (const class field& criteria, const Float& hcoef,
2573
bool reinterpolate_criteria = false);
2575
friend geo geo_adapt (const class field& criteria,
2576
const adapt_option_type& = adapt_option_type(),
2577
bool reinterpolate_criteria = false);
2579
friend geo geo_metric_adapt (const field& mh,
2580
const adapt_option_type& = adapt_option_type());
2584
friend std::istream& operator >> (std::istream&, geo&);
2585
friend std::ostream& operator << (std::ostream&, const geo&);
2587
void use_double_precision_in_saving();
2588
std::ostream& dump (std::ostream& s = std::cerr) const;
2589
std::ostream& put_mtv_domains (std::ostream&, size_type=0) const;
2593
const point& vertex (size_type i) const;
2594
const geo_element& element (size_type K_idx) const;
2595
Float measure (const geo_element& K) const;
2597
std::string name() const;
2598
// Family name plus number
2599
std::string basename() const;
2600
// For moving boundary problems
2601
std::string familyname() const;
2602
// Number of moving boundary mesh
2603
size_type number() const;
2604
// Refinment iteration for the current mesh number
2605
size_type version() const;
2606
size_type dimension() const;
2607
size_type map_dimension() const;
2608
std::string coordinate_system () const; // "cartesian", "rz", "zr"
2609
fem_helper::coordinate_type coordinate_system_type() const;
2611
size_type serial_number() const;
2613
const domain& boundary() const;
2614
void build_subregion(const domain& start_from, const domain& dont_cross,
2615
std::string name, std::string name_of_complement="");
2616
// Builds a new domain on the side of domain `dont_cross' on which `start_from'
2617
// lies. These must have no intersection.
2619
size_type size() const;
2620
size_type n_element() const; // same as size()
2621
size_type n_vertex() const;
2622
size_type n_vertice() const;
2623
size_type n_node() const; // same as n_vertex()
2624
size_type n_edge() const ;
2625
size_type n_face() const ;
2626
size_type n_triangle() const ;
2627
size_type n_quadrangle() const ;
2628
size_type n_volume() const ;
2629
size_type n_tetraedra() const ;
2630
size_type n_prism() const ;
2631
size_type n_hexaedra() const ;
2632
size_type n_subgeo(size_type d) const ;
2633
size_type n_element(reference_element::enum_type t) const;
2637
const point& xmin() const;
2638
const point& xmax() const;
2640
meshpoint hatter (const point& x, size_type K) const;
2641
point dehatter (const meshpoint& S) const;
2642
point dehatter (const point& x_hat, size_type e) const;
2644
const_iterator begin() const;
2645
const_iterator end() const;
2646
const_iterator_node begin_node() const;
2647
const_iterator_node end_node() const;
2648
const_iterator_vertex begin_vertex() const;
2649
const_iterator_vertex end_vertex() const;
2652
bool localize (const point& x, geo_element::size_type& element) const;
2653
void localize_nearest (const point& x, point& y, geo_element::size_type& element) const;
2654
bool trace (const point& x0, const point& v, point& x, Float& t, size_type& element) const;
2656
// access to domains:
2657
const domain& get_domain(size_type i) const;
2658
size_type n_domain() const;
2659
bool has_domain (const std::string& domname) const;
2660
const domain& operator[] (const std::string& domname) const;
2661
const_iterator_domain begin_domain() const;
2662
const_iterator_domain end_domain() const;
2664
point normal (const geo_element& S) const;
2665
point normal (const geo_element& K, georep::size_type side_idx) const;
2667
sort_interface(const domain&, const interface&) const;
2669
normal (const class space& Nh, const domain& d,
2670
const std::string& region="") const;
2672
normal (const class space& Nh, const domain& d, const interface& bc) const;
2674
tangent (const class space& Nh, const domain& d,
2675
const std::string& region="") const;
2677
tangent (const class space& Nh, const domain& d, const interface& bc) const;
2678
// Gives normal to d in discontinuous space Nh (P0 or P1d) and can
2679
// initialize orientation of domain through `bc' data structure.
2680
// Currently limited to straight-sided elements.
2682
tangent_spline (const space& Nh, const domain& d, const interface& bc) const;
2684
plane_curvature (const space& Nh, const domain& d,
2685
const interface& bc) const;
2686
// Gives curvature of d in the (x,y) or (r,z) plane.
2687
// Currently limited to straight-sided elements.
2689
plane_curvature_spline (const space& Nh, const domain& d,
2690
const interface& bc) const;
2691
// Gives curvature of d in the (x,y) or (r,z) plane based on a spline interpolation.
2693
plane_curvature_quadratic (const space& Nh, const domain& d,
2694
const interface& bc) const;
2695
// Gives curvature of d in the (x,y) or (r,z) plane based on a local quadratic interpolation.
2698
axisymmetric_curvature (const class space& Nh, const domain& d) const;
2700
axisymmetric_curvature (const class space& Nh, const domain& d,
2701
const interface& bc) const;
2702
// Specific to "rz" and "zr" coordinate systems:
2703
// Gives curvature of d in a plane orthogonal to the (r,z) plane.
2704
// Can also initialize orientation of domain through `bc' data structure.
2705
// Currently limited to straight-sided elements.
2707
interface_process (const domain& d, const interface& bc,
2708
geometric_event& event) const;
2709
// Detects event along domain d sorted according to bc's.
2711
// construction of jump-interface domain data:
2712
void jump_interface(const domain& interface, const domain& subgeo,
2713
std::map<size_type, tiny_vector<size_type> >& special_elements,
2714
std::map<size_type,size_type>& node_global_to_interface) const;
2718
bool operator == (const geo&) const;
2719
bool operator != (const geo&) const;
2723
// build from a list of vertex and elements:
2724
template <class ElementContainer, class VertexContainer>
2725
void build (const ElementContainer&, const VertexContainer&);
2727
void set_name (const std::string&);
2728
void set_coordinate_system (const std::string&); // "cartesian", "rz", "zr"
2730
void label_interface(const domain& dom1, const domain& dom2, const std::string& name);
2733
void erase_domain (const std::string& name);
2734
void insert_domain (const domain&);
2736
// accessors to internals:
2737
bool localizer_initialized () const;
2738
void init_localizer (const domain& boundary, Float tolerance=-1, int list_size=0) const;
2739
const size_type* get_geo_counter() const;
2740
const size_type* get_element_counter() const;
2743
int gnuplot2d (const std::string& basename,
2744
plot_options& opt) const;
2746
// check consistency
2752
friend class spacerep;
2755
void may_have_version_2() const; // fatal if not !
2757
// modifiers to internals:
2759
void set_dimension (size_type d);
2762
iterator_node begin_node();
2763
iterator_node end_node();
2764
iterator_vertex begin_vertex();
2765
iterator_vertex end_vertex();
2769
File: rheolef.info, Node: form_diag_manip class, Up: Classes
2771
5.3 `form_diag_manip' - concatenation on a product space
2772
========================================================
2774
(Source file: `nfem/lib/form_diag_manip.h')
2779
build a form_diag on a product space.
2784
The following example builds a 3-blocks diagonal form:
2791
I_manip << size(3) << Iv << Iv << Iv;
2797
class form_diag_manip {
2802
typedef form_diag::size_type size_type;
2804
// constructors/destructors:
2810
size_type nbloc() const;
2811
size_type nform() const;
2812
form_diag& bloc(size_type i);
2813
const form_diag& bloc(size_type i) const;
2817
form_diag_manip& operator << (const form_diag& a);
2818
form_diag_manip& operator >> (form_diag& a);
2819
friend form_diag_manip& verbose (form_diag_manip &fm);
2820
friend form_diag_manip& noverbose (form_diag_manip &fm);
2821
form_diag_manip& operator<< (form_diag_manip& (*pf)(form_diag_manip&));
2826
friend form_diag_manip& operator << (form_diag_manip &fm, const class sized& s);
2832
std::vector<form_diag> _a;
1147
Distributed finite element mesh.
1153
class geo_basic<T,sequential> : public smart_pointer<geo_abstract_rep<T,sequential> > {
1158
typedef sequential memory_type;
1159
typedef geo_abstract_rep<T,sequential> rep;
1160
typedef smart_pointer<rep> base;
1161
typedef typename rep::size_type size_type;
1162
typedef typename rep::node_type node_type;
1163
typedef typename rep::variant_type variant_type;
1164
typedef typename rep::reference reference;
1165
typedef typename rep::const_reference const_reference;
1166
typedef typename rep::iterator iterator;
1167
typedef typename rep::const_iterator const_iterator;
1168
typedef typename rep::iterator_by_variant iterator_by_variant;
1169
typedef typename rep::const_iterator_by_variant const_iterator_by_variant;
1174
geo_basic (std::string name, const communicator& comm = communicator());
1175
void load (std::string name, const communicator& comm = communicator());
1176
geo_basic (const domain_indirect_basic<sequential>& dom, const geo_basic<T,sequential>& omega);
1180
std::string name() const { return base::data().name(); }
1181
size_type dimension() const { return base::data().dimension(); }
1182
size_type map_dimension() const { return base::data().map_dimension(); }
1183
size_type order() const { return base::data().order(); }
1184
const node_type& xmin() const { return base::data().xmin(); }
1185
const node_type& xmax() const { return base::data().xmax(); }
1186
const distributor& geo_element_ownership(size_type dim) const { return base::data().geo_element_ownership(dim); }
1187
const geo_size& sizes() const { return base::data().sizes(); }
1188
const geo_size& ios_sizes() const { return base::data().ios_sizes(); }
1189
const_reference get_geo_element (size_type dim, size_type ige) const { return base::data().get_geo_element (dim, ige); }
1190
reference get_geo_element (size_type dim, size_type ige) { return base::data().get_geo_element (dim, ige); }
1191
const node_type& node (size_type inod) const { return base::data().node(inod); }
1192
const node_type& dis_node (size_type inod) const { return node(inod); }
1193
void dis_inod (const geo_element& K, std::vector<size_type>& dis_inod) const {
1194
return base::data().dis_inod(K,dis_inod); }
1196
size_type n_domain_indirect () const { return base::data().n_domain_indirect (); }
1197
const domain_indirect_basic<sequential>& get_domain_indirect (size_type i) const {
1198
return base::data().get_domain_indirect (i); }
1199
const domain_indirect_basic<sequential>& get_domain_indirect (const std::string& name) const {
1200
return base::data().get_domain_indirect (name); }
1202
size_type n_domain () const { return base::data().n_domain_indirect (); }
1203
geo_basic<T,sequential> get_domain (size_type i) const;
1204
geo_basic<T,sequential> operator[] (const std::string& name) const;
1206
// extended accessors:
1208
const communicator& comm() const { return geo_element_ownership (0).comm(); }
1209
size_type size(size_type dim) const { return base::data().geo_element_ownership(dim).size(); }
1210
size_type dis_size(size_type dim) const { return base::data().geo_element_ownership(dim).dis_size(); }
1211
size_type size() const { return size (map_dimension()); }
1212
size_type dis_size() const { return dis_size (map_dimension()); }
1213
size_type n_vertex() const { return size (0); }
1214
size_type dis_n_vertex() const { return dis_size (0); }
1215
const_reference operator[] (size_type ie) const { return get_geo_element (map_dimension(), ie); }
1216
reference operator[] (size_type ie) { return get_geo_element (map_dimension(), ie); }
1217
const_iterator begin (size_type dim) const { return base::data().begin(dim); }
1218
const_iterator end (size_type dim) const { return base::data().end (dim); }
1219
const_iterator begin () const { return begin(map_dimension()); }
1220
const_iterator end () const { return end (map_dimension()); }
1222
const_iterator_by_variant begin_by_variant (variant_type variant) const
1223
{ return base::data().begin_by_variant (variant); }
1224
const_iterator_by_variant end_by_variant (variant_type variant) const
1225
{ return base::data(). end_by_variant (variant); }
1227
iterator_by_variant begin_by_variant (variant_type variant)
1228
{ return base::data().begin_by_variant (variant); }
1229
iterator_by_variant end_by_variant (variant_type variant)
1230
{ return base::data(). end_by_variant (variant); }
1233
size_type dis_ige2ios_dis_ige (size_type dim, size_type dis_ige) const { return dis_ige; }
1235
const geo_basic<T,sequential>& get_background_geo() const; // code in geo_domain.h
1236
geo_basic<T,sequential> get_background_domain() const;
1240
bool operator== (const geo_basic<T,sequential>& omega2) const { return base::data().operator== (omega2.data()); }
1244
idiststream& get (idiststream& ips);
1245
odiststream& put (odiststream& ops) const { return base::data().put (ops); }
1246
void dump (std::string name) const { base::data().dump (name); }
1253
class geo_basic<T,distributed> : public smart_pointer<geo_abstract_rep<T,distributed> > {
1258
typedef distributed memory_type;
1259
typedef geo_abstract_rep<T,distributed> rep;
1260
typedef smart_pointer<rep> base;
1261
typedef typename rep::size_type size_type;
1262
typedef typename rep::node_type node_type;
1263
typedef typename rep::variant_type variant_type;
1264
typedef typename rep::node_map_type node_map_type;
1265
typedef typename rep::reference reference;
1266
typedef typename rep::const_reference const_reference;
1267
typedef typename rep::iterator iterator;
1268
typedef typename rep::const_iterator const_iterator;
1269
typedef typename rep::iterator_by_variant iterator_by_variant;
1270
typedef typename rep::const_iterator_by_variant const_iterator_by_variant;
1275
geo_basic (std::string name, const communicator& comm = communicator());
1276
void load (std::string name, const communicator& comm = communicator());
1277
geo_basic (const domain_indirect_basic<distributed>& dom, const geo_basic<T,distributed>& omega);
1281
std::string name() const { return base::data().name(); }
1282
size_type dimension() const { return base::data().dimension(); }
1283
size_type map_dimension() const { return base::data().map_dimension(); }
1284
size_type order() const { return base::data().order(); }
1285
const node_type& xmin() const { return base::data().xmin(); }
1286
const node_type& xmax() const { return base::data().xmax(); }
1287
const distributor& geo_element_ownership(size_type dim) const
1288
{ return base::data().geo_element_ownership (dim); }
1289
const geo_size& sizes() const { return base::data().sizes(); }
1290
const geo_size& ios_sizes() const { return base::data().ios_sizes(); }
1291
const_reference get_geo_element (size_type dim, size_type ige) const
1292
{ return base::data().get_geo_element (dim, ige); }
1293
const_reference dis_get_geo_element (size_type dim, size_type dis_ige) const
1294
{ return base::data().dis_get_geo_element (dim, dis_ige); }
1295
distributor geo_element_ios_ownership (size_type dim) const {
1296
return base::data().geo_element_ios_ownership (dim); }
1297
size_type ige2ios_dis_ige (size_type dim, size_type ige) const {
1298
return base::data().ige2ios_dis_ige (dim,ige); }
1299
size_type dis_ige2ios_dis_ige (size_type dim, size_type dis_ige) const {
1300
return base::data().dis_ige2ios_dis_ige (dim,dis_ige); }
1301
size_type ios_ige2dis_ige (size_type dim, size_type ios_ige) const {
1302
return base::data().ios_ige2dis_ige (dim, ios_ige); }
1303
const node_type& node(size_type inod) const { return base::data().node(inod); }
1304
const node_type& dis_node(size_type dis_inod) const { return base::data().dis_node(dis_inod); }
1305
void dis_inod (const geo_element& K, std::vector<size_type>& dis_inod) const {
1306
return base::data().dis_inod(K,dis_inod); }
1308
size_type n_domain_indirect () const { return base::data().n_domain_indirect (); }
1309
const domain_indirect_basic<distributed>& get_domain_indirect (size_type i) const {
1310
return base::data().get_domain_indirect (i); }
1311
const domain_indirect_basic<distributed>& get_domain_indirect (const std::string& name) const {
1312
return base::data().get_domain_indirect (name); }
1314
size_type n_domain () const { return base::data().n_domain_indirect (); }
1315
geo_basic<T,distributed> get_domain (size_type i) const;
1316
geo_basic<T,distributed> operator[] (const std::string& name) const;
1318
// extended accessors:
1320
size_type size(size_type dim) const { return base::data().geo_element_ownership(dim).size(); }
1321
size_type dis_size(size_type dim) const { return base::data().geo_element_ownership(dim).dis_size(); }
1322
const communicator& comm() const { return geo_element_ownership (0).comm(); }
1323
size_type size() const { return size (map_dimension()); }
1324
size_type dis_size() const { return dis_size (map_dimension()); }
1325
const_reference operator[] (size_type ie) const
1326
{ return get_geo_element (map_dimension(), ie); }
1328
const_iterator begin (size_type dim) const { return base::data().begin(dim); }
1329
const_iterator end (size_type dim) const { return base::data().end (dim); }
1330
const_iterator begin () const { return begin(map_dimension()); }
1331
const_iterator end () const { return end (map_dimension()); }
1333
const_iterator_by_variant begin_by_variant (variant_type variant) const
1334
{ return base::data().begin_by_variant (variant); }
1335
const_iterator_by_variant end_by_variant (variant_type variant) const
1336
{ return base::data(). end_by_variant (variant); }
1338
iterator_by_variant begin_by_variant (variant_type variant)
1339
{ return base::data().begin_by_variant (variant); }
1340
iterator_by_variant end_by_variant (variant_type variant)
1341
{ return base::data(). end_by_variant (variant); }
1344
const geo_basic<T,distributed>& get_background_geo() const; // code in geo_domain.h
1345
geo_basic<T,distributed> get_background_domain() const;
1349
bool operator== (const geo_basic<T,distributed>& omega2) const { return base::data().operator== (omega2.data()); }
1353
odiststream& put (odiststream& ops) const { return base::data().put (ops); }
1354
idiststream& get (idiststream& ips);
1358
File: rheolef.info, Node: geo_domain class, Up: Classes
1360
5.2 `geo_domain' - a named part of a finite element mesh that behaves as a mesh
1361
===============================================================================
1363
(Source file: `nfem/plib/geo_domain.h')
1368
The `geo_domain' class defines a container for a part of a finite
1369
element mesh. This class re-describes the vertices, edges or faces in
1370
a compact way, i.e. by skipping unused elements from the surrounding
1376
The `geo_domain' class conserves the link to the original mesh such
1377
that fields defined on a `geo_domain' can inter-operate with fields
1378
defined on the surrounding mesh.
1381
File: rheolef.info, Node: geo_domain_indirect class, Up: Classes
1383
5.3 `geo_domain_indirect_rep' - a named part of a finite element mesh
1384
=====================================================================
1386
(Source file: `nfem/plib/geo_domain_indirect.h')
1391
The `geo_domain_indirect_rep' class defines a container for a part of a
1392
finite element mesh. This describes the connectivity of edges or
1393
faces. This class is usefull for boundary condition setting.
1398
The `geo_domain_indirect_rep' class is splitted into two parts. The
1399
first one is the `domain_indirect' class, that contains the main
1400
renumbering features: it acts as an indirection on a `geo' class(*note
1401
geo class::). The second one is the `geo' class itself, named here
1402
the background geo. Thus, the `geo_domain_indirect' class develops a
1403
complete `geo'-like interface, via the `geo_abstract_rep' pure
1404
virtual class derivation, and can be used by the `space' class (*note
1407
The split between `domain_indirect' and `geo_domain_indirect' is
1408
necessary, because the `geo' class contains a list of domain_indirect.
1409
The `geo' class cannot contains a list of `geo_domain_indirect'
1410
classes, that refers to the `geo' class itself: a loop in reference
1411
counting leads to a blocking situation in the automatic deallocation.
2836
1414
File: rheolef.info, Node: space class, Up: Classes
2838
1416
5.4 `space' - piecewise polynomial finite element space
2839
1417
=======================================================
2841
(Source file: `nfem/lib/space.h')
1419
(Source file: `nfem/plib/space.h')
2846
1424
The `space' class contains some numbering for unknowns and blocked
2847
_degrees of freedoms_ related to a given mesh and polynomial
1425
degrees of freedoms related to a given mesh and polynomial
2850
Degree of freedom numbering
2851
---------------------------
2853
Numbering of degrees of freedom (or shortly, "dof") depends upon the
2854
mesh and the piecewise polynomial approximation used. This numbering
2855
is then used by the `field' and `form' classes. See also *note field
2856
class:: and *note form class::.
2858
The degree-of-freedom (or shortly, "dof") follows some simple rules,
2859
that depends of course of the approximation. These rules are suitable
2860
for easy `.field' file retrieval (*note field class::).
2864
dof numbers follow element numbers in mesh.
2867
dof numbers follow vertice numbers in mesh.
2870
dof numbers related to vertices follow vertice numbers in mesh.
2871
Follow dof numbers related to edges, in the edge numbering
2875
dof numbers follow element numbers in mesh. In each element, we
2876
loop on vertices following the local vertice order provided in
2879
Unknown and blocked degree of freedom
2880
-------------------------------------
2882
A second numbering is related to the "unknown" and "blocked" degrees of
2884
geo omega("square");
2885
space V(omega,"P1");
2886
V.block ("boundary");
2887
Here, all degrees of freedom located on the domain `"boundary"' are
2893
File output format for `space' is not of practical interest. This file
2894
format is provided for completness. Here is a small example
2905
where the `B' denotes blocked degres of freedom. The method
2906
`set_dof(K, dof_array)' gives the numbering in `dof_array', supplying
2907
an element `K'. The method `index(dof)' gives the unknown or blocked
2908
numbering from the initial numbering, suppling a given `dof'
2913
The method `xdof(K, i)' gives the geometrical location of the i-th
2914
degree of freedom in the element `K'.
2916
Product spaces, for non-scalar fields, such as vector-valued or
2917
tensor-valued (symmetric) fields,
2922
space U (omega, "P1", "vector");
2923
space T (omega, "P1", "tensor");
2924
Then, the number of component depends upon the geometrical dimension of
2927
Arbitrarily product spaces can also be constructed
2929
Spaces for fields defined on a boundary domain can also be constructed
2930
space W (omega, omega["top"], "P1");
2931
The correspondance between the degree-of-freedom numbering for the
2932
space `W' and the global degree-of-freedom numbering for the space `V'
2933
is supported. The method `set_global_dof(S, dof_array)' gives the
2934
global numbering in `dof_array', supplying a boundary element `S'. The
2935
method `domain_index(global_dof)' gives the unknown or blocked
2936
numbering from the initial numbering, suppling a given `global_dof',
2937
related to the space `V'.
2942
class space : public smart_pointer<spacerep> {
2946
typedef spacerep::size_type size_type;
2948
// allocator/deallocator:
2951
space (const geo& g, const std::string& approx_name, const std::string& valued = "scalar");
2952
space (const geo& g, const std::string& approx_name,
2953
const domain& interface, const domain& subgeo, const std::string& valued = "scalar");
2954
// For spaces with a discontinuity through domain `interface', but otherwise continuous
2955
space (const geo& g, const domain& d, const std::string& approx_name);
2956
space(const const_space_component&);
2958
space operator = (const const_space_component&);
2959
friend space operator * (const space&, const space&);
2960
friend space pow (const space&, size_type);
2964
void block () const;
2965
void block_Lagrange () const;
2966
void block (size_type i_comp) const;
2967
void block_Lagrange (size_type i_comp) const;
2968
void block (const std::string& d_name) const;
2969
void block (const domain& d) const;
2970
void block (const std::string& d_name, size_type i_comp) const;
2971
void block (const domain& d, size_type i_comp) const;
2972
void block_dof (size_type dof_idx, size_type i_comp) const;
2973
void block_dof (size_type dof_idx) const;
2974
void unblock_dof (size_type dof_idx, size_type i_comp) const;
2975
void unblock_dof (size_type dof_idx) const;
2976
void set_periodic (const domain& d1, const domain& d2) const;
2977
void set_periodic (const std::string& d1_name, const std::string& d2_name) const;
2978
void lock_components (const domain& d, point locked_dir) const;
2979
void lock_components (const std::string& d_name, point locked_dir) const;
2980
template <class Function>
2981
void lock_components (const std::string& d_name, Function locked_dir) const;
2982
// Allows to enforce vector-boundary conditions.
2983
/* Allows to enforce vector-boundary conditions.
2985
*! Current limitation: only 2D vector fields with components having same FE approximation.
2986
*! The space has a dof for the direction normal to locked_dir, while the value in the
2987
*! direction locked_dir is blocked.
2988
*! The field::at function implements the reconstruction of the original cartesian components.
2990
template <class Function>
2991
void lock_components (const domain& d, Function locked_dir) const;
2995
size_type size() const;
2996
size_type degree () const;
2997
size_type n_blocked() const;
2998
size_type n_unknown() const;
2999
size_type dimension() const;
3000
std::string coordinate_system() const;
3001
fem_helper::coordinate_type coordinate_system_type() const;
3004
const geo& get_geo () const;
3005
const basis& get_basis (size_type i_component = 0) const;
3006
const numbering& get_numbering (size_type i_component = 0) const;
3007
std::string get_approx(size_type i_component = 0) const;
3008
const basis& get_p1_transformation () const;
3010
std::string get_valued() const;
3011
fem_helper::valued_field_type get_valued_type() const;
3013
size_type n_component() const;
3014
space_component operator [] (size_type i_comp);
3015
const_space_component operator [] (size_type i_comp) const;
3017
size_type size_component (size_type i_component = 0) const;
3018
size_type n_unknown_component (size_type i_component = 0) const;
3019
size_type n_blocked_component (size_type i_component = 0) const;
3020
size_type start (size_type i_component = 0) const;
3021
size_type start_unknown (size_type i_component = 0) const;
3022
size_type start_blocked (size_type i_component = 0) const;
3024
size_type index (size_type degree_of_freedom) const;
3025
size_type period_association (size_type degree_of_freedom) const;
3026
bool is_blocked (size_type degree_of_freedom) const;
3027
bool is_periodic (size_type degree_of_freedom) const;
3029
bool has_locked () const;
3030
// for (2D) vector spaces only, field is only blocked along locked_dir
3031
bool is_locked (size_type degree_of_freedom) const;
3032
// for tangential-normal representation
3033
size_type locked_with (size_type degree_of_freedom) const;
3034
size_type index_locked_with (size_type degree_of_freedom) const;
3035
// dof with the other component (thru space::index() as well)
3036
Float locked_component (size_type dof, size_type i_comp) const;
3037
// i-th component of locked direction
3038
Float unlocked_component (size_type dof, size_type i_comp) const;
3039
// i-th component of unlocked direction
3041
void freeze () const;
3042
bool is_frozen() const;
3045
// informations related to boundary spaces
3046
bool is_on_boundary_domain() const;
3047
const domain& get_boundary_domain() const;
3048
const geo& get_global_geo () const;
3049
size_type global_size () const;
3050
size_type domain_dof (size_type global_dof) const;
3051
size_type domain_index (size_type global_dof) const;
3053
// get indices of degrees of freedom associated to an element
3054
// the tiny_vector is a "local index" -> "global index" table
3055
// "global" variants differ only for boundary spaces (non-global uses domain-indices, global use
3056
// mesh-wide indices)
3057
void set_dof (const geo_element& K, tiny_vector<size_type>& idx) const;
3058
void set_global_dof (const geo_element& K, tiny_vector<size_type>& idx) const;
3059
void set_dof (const geo_element& K, tiny_vector<size_type>& idx, size_type i_comp) const;
3060
void set_global_dof (const geo_element& K, tiny_vector<size_type>& idx, size_type i_comp) const;
3061
void set_dof (const geo_element& K, tiny_vector<size_type>& idx, size_type i_comp, reference_element::dof_family_type family) const;
3062
point x_dof (const geo_element& K, size_type i_local) const;
3063
// Hermite/Argyris: returns 1 if the global dof contains the derivative along outward normal of K, -1 else
3064
void dof_orientation(const geo_element& K, tiny_vector<int>& orientation) const;
3066
// piola transformation
3067
meshpoint hatter (const point& x, size_type K) const;
3068
point dehatter (const meshpoint& S) const;
3069
point dehatter (const point& x_hat, size_type e) const;
3073
bool operator == (const space&) const;
3074
bool operator != (const space&) const;
3078
friend std::ostream& operator << (std::ostream&, const space&);
3079
friend std::istream& operator >> (std::istream&, space&);
3080
std::ostream& dump(std::ostream& s = std::cerr) const;
3085
static size_type inquire_size(const geo&, const numbering&);
3089
space(smart_pointer<spacerep>);
3090
space (const space&, const space&); // X*Y
3091
space (const space&, size_type i_comp); // X[i_comp]
3096
File: rheolef.info, Node: form_diag class, Up: Classes
3098
5.5 `form_diag' - diagional form class
3099
======================================
3101
(Source file: `nfem/lib/form_diag.h')
3106
Implement a form using two diagonal matrix.
3111
Here is the computation of the mass matrix for the P1 finite element:
3114
form_diag m(V,"mass");
3126
typedef basic_diag<Float>::size_type size_type;
3128
// allocator/deallocator:
3131
form_diag (const space& X);
3132
form_diag (const space& X, const char* op_name);
3133
form_diag (const class field& dh);
3137
const space& get_space() const;
3138
const geo& get_geo() const;
3139
size_type size() const;
3140
size_type nrow() const;
3141
size_type ncol() const;
3143
// linear algebra (partial):
3145
friend form_diag operator * (const form_diag&, const form_diag&);
3146
friend class field operator * (const form_diag& a, const class field& x);
3147
friend form_diag operator / (const Float& lambda, const form_diag& m);
3149
friend form_diag operator * (const Float& lambda, const form_diag&);
3150
friend form_diag operator + (const form_diag&, const form_diag&);
3151
friend form_diag operator - (const form_diag&);
3152
friend form_diag operator - (const form_diag&, const form_diag&);
3157
friend std::ostream& operator << (std::ostream& s, const form_diag& a);
3163
basic_diag<Float> uu;
3164
basic_diag<Float> bb;
3168
File: rheolef.info, Node: form_manip class, Up: Classes
3170
5.6 `form_manip' - form concatenation on a product space
3171
========================================================
3173
(Source file: `nfem/lib/form_manip.h')
3178
build a form on a product space
3183
a general implementation without switch and with a loop.
3192
typedef form::size_type size_type;
3194
// constructors/destructor:
3200
size_type nform() const;
3201
size_type nrowbloc() const;
3202
size_type ncolbloc() const;
3203
form& bloc(size_type i, size_type j);
3204
const form& bloc(size_type i, size_type j) const;
3208
form_manip& operator << (const form&);
3209
form_manip& operator >> (form&);
3210
form_manip& operator << (form_manip& (*)(form_manip&));
3212
friend form_manip& verbose (form_manip&);
3213
friend form_manip& noverbose (form_manip&);
3218
friend form_manip& operator << (form_manip&, const class size&);
3223
size_type _nrowbloc;
3224
size_type _ncolbloc;
3225
std::vector<form> _a;
1432
class space_basic<T,sequential> : public smart_pointer<space_rep<T,sequential> > {
1437
typedef space_rep<T,sequential> rep;
1438
typedef smart_pointer<rep> base;
1439
typedef typename rep::size_type size_type;
1443
space_basic (const geo_basic<T,sequential>& omega = (geo_basic<T,sequential>()), std::string approx = "");
1447
void block (std::string dom_name);
1448
void unblock(std::string dom_name);
1449
void block (const domain_indirect_basic<sequential>& dom);
1450
void unblock(const domain_indirect_basic<sequential>& dom);
1452
const distributor& ownership() const;
1453
const communicator& comm() const;
1454
size_type size() const;
1455
size_type dis_size() const;
1457
const geo_basic<T,sequential>& get_geo() const;
1458
const element<T,sequential>& get_element() const;
1459
std::string get_stamp() const;
1461
void get_dis_idof (const geo_element& K, std::vector<size_type>& dis_idof) const;
1463
const distributor& iu_ownership() const;
1464
const distributor& ib_ownership() const;
1466
bool is_blocked (size_type idof) const;
1467
size_type iub (size_type idof) const;
1468
bool dis_is_blocked (size_type dis_idof) const;
1469
size_type dis_iub (size_type dis_idof) const;
1471
const distributor& ios_ownership() const;
1472
size_type idof2ios_dis_idof (size_type idof) const;
1473
size_type ios_idof2dis_idof (size_type ios_idof) const;
1475
basic_point<T> x_dof (size_type idof) const { return base::data().x_dof (idof); }
1477
template <class Function>
1478
T momentum (Function f, size_type idof) const;
1480
array<size_type, sequential> build_indirect_array (
1481
const space_basic<T,sequential>& Wh, const std::string& dom_name) const;
1485
bool operator== (const space_basic<T,sequential>& V2) const { return base::data().operator==(V2.data()); }
1486
bool operator!= (const space_basic<T,sequential>& V2) const { return ! operator== (V2); }
1487
friend bool are_compatible (const space_basic<T,sequential>& V1, const space_basic<T,sequential>& V2) {
1488
return are_compatible (V1.data(), V2.data()); }
1495
class space_basic<T,distributed> : public smart_pointer<space_rep<T,distributed> > {
1500
typedef space_rep<T,distributed> rep;
1501
typedef smart_pointer<rep> base;
1502
typedef typename rep::size_type size_type;
1506
space_basic (const geo_basic<T,distributed>& omega = (geo_basic<T,distributed>()), std::string approx = "");
1510
void block (std::string dom_name);
1511
void unblock(std::string dom_name);
1512
void block (const domain_indirect_basic<distributed>& dom);
1513
void unblock(const domain_indirect_basic<distributed>& dom);
1515
const distributor& ownership() const;
1516
const communicator& comm() const;
1517
size_type size() const;
1518
size_type dis_size() const;
1520
const geo_basic<T,distributed>& get_geo() const;
1521
const element<T,distributed>& get_element() const;
1522
std::string get_stamp() const;
1524
void get_dis_idof (const geo_element& K, std::vector<size_type>& dis_idof) const;
1526
const distributor& iu_ownership() const;
1527
const distributor& ib_ownership() const;
1529
bool is_blocked (size_type idof) const;
1530
size_type iub (size_type idof) const;
1532
bool dis_is_blocked (size_type dis_idof) const;
1533
size_type dis_iub (size_type dis_idof) const;
1535
const distributor& ios_ownership() const;
1536
size_type idof2ios_dis_idof (size_type idof) const;
1537
size_type ios_idof2dis_idof (size_type ios_idof) const;
1539
basic_point<T> x_dof (size_type idof) const { return base::data().x_dof (idof); }
1541
template <class Function>
1542
T momentum (Function f, size_type idof) const;
1544
array<size_type, distributed> build_indirect_array (
1545
const space_basic<T,distributed>& Wh, const std::string& dom_name) const;
1549
bool operator== (const space_basic<T,distributed>& V2) const { return base::data().operator==(V2.data()); }
1550
bool operator!= (const space_basic<T,distributed>& V2) const { return ! operator== (V2); }
1551
friend bool are_compatible (const space_basic<T,distributed>& V1, const space_basic<T,distributed>& V2) {
1552
return are_compatible (V1.data(), V2.data()); }
1556
File: rheolef.info, Node: form_element class, Up: Classes
1558
5.5 `form_element' - bilinear form on a single element
1559
======================================================
1561
(Source file: `nfem/plib/form_element.h')
1566
The `form_element' class defines functions that compute a bilinear
1567
form defined between two polynomial basis on a single geometrical
1568
element. This bilinear form is represented by a matrix.
1570
The bilinear form is designated by a string, e.g. "mass", "grad_grad",
1571
... indicating the form. The form depends also of the geometrical
1572
element: triangle, square, tetrahedron (*note geo_element internal::).
1577
The `form_element' class is managed by (*note smart_pointer
1578
internal::). This class uses a pointer on a pure virtual class
1579
`form_element_rep' while the effective code refers to the specific
1580
concrete derived classes: mass, grad_grad, etc.
1585
template <class T, class M>
1586
class form_element : public smart_pointer<form_element_rep<T,M> > {
1591
typedef form_element_rep<T,M> rep;
1592
typedef smart_pointer<rep> base;
1593
typedef typename rep::size_type size_type;
1594
typedef typename rep::vertex_type vertex_type;
1595
typedef typename rep::element_type element_type;
1596
typedef typename rep::geo_type geo_type;
1603
const element_type& X,
1604
const element_type& Y,
1605
const geo_type& omega);
1607
// accessors & modifier:
1609
void operator() (const geo_element& K, ublas::matrix<T>& m) const;
1610
virtual bool is_symmetric () const;
1613
void set_weight (const field& wh) const;
1614
const field& get_weight() const;
1615
void set_use_coordinate_system_weight(bool use) const;
1620
File: rheolef.info, Node: domain_indirect class, Up: Classes
1622
5.6 `domain_indirect' - a named part of a finite element mesh
1623
=============================================================
1625
(Source file: `nfem/plib/domain_indirect.h')
1630
The `domain_indirect' class defines a container for a part of a
1631
finite element mesh. This describes the connectivity of edges or
1632
faces. This class is usefull for boundary condition setting.
1637
The `domain' class is splitted into two parts. The first one is the
1638
`domain_indirect' class, that contains the main renumbering features:
1639
it acts as a indirect on a `geo' class(*note geo class::). The
1640
second one is the `domain' class, that simply contains two
1641
smart_pointers: one on a `domain_indirect' and the second on the
1642
`geo' where renumbering is acting. Thus, the domain class develops a
1643
complete `geo'-like interface, via the `geo_abstract_rep' pure
1644
virtual class derivation, and can be used by the `space' class (*note
1645
space class::). The split between domain_indirect and domain is
1646
necessary, because the `geo' class contains a list of domain_indirect.
1647
It cannot contains a list of `domain' classes, that refers to the
1648
geo class itself: a loop in reference counting leads to a blocking
1649
situation in the automatic deallocation.
1655
class domain_indirect_basic<sequential> : public smart_pointer<domain_indirect_rep<sequential> > {
1660
typedef domain_indirect_rep<sequential> rep;
1661
typedef smart_pointer<rep> base;
1662
typedef rep::size_type size_type;
1663
typedef rep::iterator_ioige iterator_ioige;
1664
typedef rep::const_iterator_ioige const_iterator_ioige;
1668
domain_indirect_basic ();
1669
void resize (size_type n);
1673
size_type size() const;
1674
size_type dis_size() const;
1675
const distributor& ownership() const;
1677
const_iterator_ioige ioige_begin() const;
1678
const_iterator_ioige ioige_end() const;
1679
iterator_ioige ioige_begin();
1680
iterator_ioige ioige_end();
1682
const geo_element_indirect& oige (size_type ioige) const;
1684
void set_name (std::string name);
1685
void set_map_dimension (size_type map_dim);
1686
std::string name () const;
1687
size_type map_dimension () const;
1691
odiststream& put (odiststream&) const;
1693
idiststream& get (idiststream& ips, const geo_rep<T,sequential>& omega, std::vector<index_set> *ball);
1700
class domain_indirect_basic<distributed> : public smart_pointer<domain_indirect_rep<distributed> > {
1705
typedef domain_indirect_rep<distributed> rep;
1706
typedef smart_pointer<rep> base;
1707
typedef rep::size_type size_type;
1711
domain_indirect_basic ();
1713
// accessors/modifiers:
1715
size_type size() const;
1716
size_type dis_size() const;
1717
const distributor& ownership() const;
1719
const geo_element_indirect& oige (size_type ioige) const;
1721
void set_name (std::string name);
1722
void set_map_dimension (size_type map_dim);
1723
std::string name () const;
1724
size_type map_dimension () const;
1726
// distributed specific acessors:
1728
const distributor& ini_ownership() const;
1729
size_type ioige2ini_dis_ioige (size_type ioige) const;
1730
size_type ini_ioige2dis_ioige (size_type ini_ioige) const;
1735
idiststream& get (idiststream& ips, const geo_rep<T,distributed>& omega);
1737
odiststream& put (odiststream& ops, const geo_rep<T,distributed>& omega) const;
1741
File: rheolef.info, Node: rheolef class, Up: Classes
1743
5.7 `rheolef' - The finite element library
1744
==========================================
1746
(Source file: `nfem/plib/rheolef.h')
1751
The `Rheolef' is a computer environment that serves as a convenient
1752
laboratory for computations involving finite element methods. It
1753
provides a set of unix commands and C++ algorithms and containers.
1754
In particular, this environment allows the user to express a partial
1755
derivative problem in terms of finite element `space', discrete
1756
`field', bilinear `form', and `geo', describing geometries and meshes.
1758
This set of data structure is completed by the most up-to-date
1759
algorithms: preconditioned solvers for incompressible elasticity,
1760
Stokes and Navier-Stokes flows, characteristic method for convection
1761
dominated heat problems,...
1763
The pdf file of the user's manual, with many examples, is located on
1764
your distribution in the documentation directory. This directory is
1765
given by the following unix command:
1766
rheolef-config --docdir
1767
All examples presented along the user's manual are available in the
1768
`example/' directory of the `Rheolef' source distribution: the
1769
directory, where all source code of examples are available, is given
1770
by the following unix command:
1771
rheolef-config --exampledir
3229
1774
File: rheolef.info, Node: field class, Up: Classes
3231
5.7 `field' - piecewise polynomial finite element field
1776
5.8 `field' - piecewise polynomial finite element field
3232
1777
=======================================================
3234
(Source file: `nfem/lib/field.h')
1779
(Source file: `nfem/plib/field.h')
3862
2384
const basic_point<T>& c, const basic_point<T>& x = basic_point<T>());
3865
File: rheolef.info, Node: ic0 class, Up: Classes
3867
5.14 `ic0' - incomplete Choleski factorization
3868
==============================================
3870
(Source file: `skit/lib/ic0.h')
3875
The class implements the icomplete Choleski factorization IC0(A) when
3876
$A$ is a square symmetric matrix stored in CSR format.
3877
csr<double> a = ...;
3878
ic0<double> fact(a);
3884
class basic_ic0 : public csr<T> {
3886
typedef typename csr<T>::size_type size_type;
3887
typedef T element_type;
3890
explicit basic_ic0 (const csr<T>& a);
3892
void inplace_solve (vec<T>& x) const;
3893
void solve (const vec<T>& b, vec<T>& x) const;
3894
vec<T> solve (const vec<T>& b) const;
2387
File: rheolef.info, Node: pcg class, Up: Classes
2389
5.13 `pcg' - conjugate gradient algorithm.
2390
==========================================
2392
(Source file: `skit/plib2/pcg.h')
2397
template <class Matrix, class Vector, class Preconditioner, class Real>
2398
int pcg (const Matrix &A, Vector &x, const Vector &b,
2399
const Preconditioner &M, int &max_iter, Real &tol, odiststream *p_derr=0);
2404
The simplest call to 'pcg' has the folling form:
2405
size_t max_iter = 100;
2407
int status = pcg(a, x, b, EYE, max_iter, tol, &cerr);
2412
`pcg' solves the symmetric positive definite linear system Ax=b using
2413
the Conjugate Gradient method.
2415
The return value indicates convergence within max_iter (input)
2416
iterations (0), or no convergence within max_iter iterations (1).
2417
Upon successful return, output arguments have the following values:
2419
approximate solution to Ax = b
2422
the number of iterations performed before the tolerance was reached
2425
the residual after the final iteration
2430
`pcg' is an iterative template routine.
2432
`pcg' follows the algorithm described on p. 15 in
2434
Templates for the Solution of Linear Systems: Building
2435
Blocks for Iterative Methods, 2nd Edition, R.
2436
Barrett, M. Berry, T. F. Chan, J. Demmel, J. Donato, J. Dongarra,
2437
V. Eijkhout, R. Pozo, C. Romine, H. Van der Vorst,
2438
SIAM, 1994, `ftp.netlib.org/templates/templates.ps'.
2440
The present implementation is inspired from `IML++ 1.2' iterative
2441
method library, `http://math.nist.gov/iml++'.
2446
template <class Matrix, class Vector, class Vector2, class Preconditioner, class Real, class Size>
2447
int pcg(const Matrix &A, Vector &x, const Vector2 &Mb, const Preconditioner &M,
2448
Size &max_iter, Real &tol, odiststream *p_derr = 0, std::string label = "cg")
2450
Vector b = M.solve(Mb);
2451
Real norm2_b = dot(Mb,b);
2452
if (norm2_b == Real(0)) norm2_b = 1;
2453
Vector Mr = Mb - A*x;
2455
if (p_derr) (*p_derr) << "[" << label << "] #iteration residue" << std::endl;
2457
for (Size n = 0; n <= max_iter; n++) {
2458
Vector r = M.solve(Mr);
2459
Real prev_norm2_r = norm2_r;
2460
norm2_r = dot(Mr, r);
2461
if (p_derr) (*p_derr) << "[" << label << "] " << n << " " << ::sqrt(norm2_r/norm2_b) << std::endl;
2462
if (norm2_r <= sqr(tol)*norm2_b) {
2463
tol = ::sqrt(norm2_r/norm2_b);
2470
Real beta = norm2_r/prev_norm2_r;
2474
Real alpha = norm2_r/dot(Mq, p);
2478
tol = ::sqrt(norm2_r/norm2_b);
2483
File: rheolef.info, Node: index_set class, Up: Classes
2485
5.14 index_set - a set of indexes
2486
=================================
2488
(Source file: `skit/plib2/index_set.h')
2493
A class for: l = {1,3,...9} i.e. a wrapper for STL `set<size_t>' with
2494
some assignment operators, such as l1 += l2. This class is
2495
suitable for use with the `array<T>' class, as `array<index_set>'
2496
(*note array class::).
2501
class index_set : public std::set<std::size_t> {
2506
typedef std::set<std::size_t> base;
2507
typedef std::size_t value_type;
2508
typedef std::size_t size_type;
2513
index_set (const index_set& x);
2514
index_set& operator= (const index_set& x);
2516
index_set& operator= (size_type x[N]);
2521
void insert (size_type dis_i); // a := a union {dis_i}
2522
index_set& operator+= (size_type dis_i); // idem
2523
index_set& operator+= (const index_set& b); // a := a union b
2526
void inplace_union (const index_set& b);
2527
void inplace_intersection (const index_set& b);
2530
friend void set_union (const index_set& a, const index_set& b, index_set& c);
2531
friend void set_intersection (const index_set& a, const index_set& b, index_set& c);
2535
friend std::istream& operator>> (std::istream& is, index_set& x);
2536
friend std::ostream& operator<< (std::ostream& os, const index_set& x);
2540
template <class Archive>
2541
void serialize (Archive& ar, const unsigned int version);
2545
File: rheolef.info, Node: mpi_assembly_end class, Up: Classes
2547
5.15 msgé_assembly_end - array or matrix assembly
2548
==================================================
2550
(Source file: `skit/plib2/mpi_assembly_end.h')
2555
Finish a dense array or sparse matrix assembly.
2574
Size receive_max_size,
2578
typedef Size size_type;
2579
typedef typename Container::data_type data_type;
2580
// -----------------------------------------------------------------
2581
// 1) receive data and store it in container
2582
// -----------------------------------------------------------------
2584
// note: for wait_any, build an iterator adapter that scan the pair.second in [index,request]
2585
// and then get an iterator in the pair using iter.base(): retrive the corresponding index
2586
// for computing the position in the receive.data buffer
2587
typedef boost::transform_iterator<select2nd<size_type,mpi::request>,
2588
typename std::list<std::pair<size_type,mpi::request> >::iterator>
2591
#define _RHEOLEF_BUG_FOR_NON_MPI_DATA_TYPE
2592
#ifdef _RHEOLEF_BUG_FOR_NON_MPI_DATA_TYPE
2593
while (receive.waits.size() != 0) {
2594
request_iterator iter_r_waits (receive.waits.begin(), select2nd<size_type,mpi::request>()),
2595
last_r_waits (receive.waits.end(), select2nd<size_type,mpi::request>());
2596
// waits on any receive...
2597
std::pair<mpi::status,request_iterator> pair_status = mpi::wait_any (iter_r_waits, last_r_waits);
2599
boost::optional<int> i_msg_size_opt = pair_status.first.template count<data_type>();
2600
check_macro (i_msg_size_opt, "receive wait failed");
2601
int iproc = pair_status.first.source();
2602
check_macro (iproc >= 0, "receive: source iproc = "<<iproc<<" < 0 !");
2603
// get size of receive and number in data
2604
size_type i_msg_size = (size_type)i_msg_size_opt.get();
2605
typename std::list<std::pair<size_type,mpi::request> >::iterator i_pair_ptr = pair_status.second.base();
2606
size_type i_receive = (*i_pair_ptr).first;
2607
size_type i_start = i_receive*receive_max_size;
2608
for (size_type j = i_start; j < i_start + i_msg_size; j++) {
2609
x (receive.data[j]);
2611
receive.waits.erase (i_pair_ptr);
2613
#else // _RHEOLEF_BUG_FOR_NON_MPI_DATA_TYPE
2614
// wait_all works better when using an array of non mpi_data_type, as an array of set<size_t>
2615
request_iterator iter_r_waits (receive.waits.begin(), select2nd<size_type,mpi::request>()),
2616
last_r_waits (receive.waits.end(), select2nd<size_type,mpi::request>());
2617
std::vector<mpi::status> recv_status (receive.waits.size());
2618
mpi::wait_all (iter_r_waits, last_r_waits, recv_status.begin());
2619
for (size_type i_recv = 0, n_recv = recv_status.size(); i_recv < n_recv; i_recv++) {
2620
boost::optional<int> i_msg_size_opt = recv_status[i_recv].template count<data_type>();
2621
check_macro (i_msg_size_opt, "receive wait failed");
2622
int iproc = recv_status[i_recv].source();
2623
check_macro (iproc >= 0, "receive: source iproc = "<<iproc<<" < 0 !");
2624
// get size of receive and number in data
2625
size_type i_msg_size = (size_type)i_msg_size_opt.get();
2626
size_type i_start = i_recv*receive_max_size;
2627
for (size_type j = i_start; j < i_start + i_msg_size; j++) {
2628
x (receive.data[j]);
2631
#endif // _RHEOLEF_BUG_FOR_NON_MPI_DATA_TYPE
2632
// -----------------------------------------------------------------
2634
// -----------------------------------------------------------------
2635
request_iterator iter_s_waits (send.waits.begin(), select2nd<size_type,mpi::request>()),
2636
last_s_waits (send.waits.end(), select2nd<size_type,mpi::request>());
2637
Size send_nproc = send.waits.size();
2638
std::vector<mpi::status> send_status(send_nproc);
2639
mpi::wait_all (iter_s_waits, last_s_waits, send_status.begin());
2640
// -----------------------------------------------------------------
2641
// 3) clear send & receive messages [waits,data]
2642
// -----------------------------------------------------------------
2645
receive.waits.clear();
2646
receive.data.clear();
2647
return x.n_new_entry();
2651
File: rheolef.info, Node: msg_left_permutation_apply class, Up: Classes
2653
5.16 msg_left_permutation_apply - sequentail apply
2654
==================================================
2656
(Source file: `skit/plib2/msg_left_permutation_apply.h')
2661
Applies a left permutation to an array.
2666
msg_left_permutation_apply
2668
"input": the length array | x(0:nx-1), py(0:n-1) "output": the
2669
pointer array and the total size | y(0:n) begin | for i := 0
2670
to n-1 do | y(py(i)) := x(i) | endfor end
2675
Time and memory complexity is O(n).
2681
class InputIterator1,
2682
class InputIterator2,
2684
class OutputRandomIterator>
2686
msg_left_permutation_apply (
2690
InputIterator2 last_py,
2691
OutputRandomIterator y)
2693
while (py != last_py)
2698
File: rheolef.info, Node: polymorphic_map class, Up: Classes
2700
5.17 polymorphic_map - a map<size_t,T> like polymorphic container
2701
=================================================================
2703
(Source file: `skit/plib2/polymorphic_map.h')
2708
template <class T> class polymorphic_map;
2713
template <class T, class V = typename polymorphic_traits<T>::derived_type>
2714
class polymorphic_map : public smart_pointer<polymorphic_map_rep<T,V,mpl::size<V>::value> > {
2719
static const size_t _n_variant = mpl::size<V>::value;
2721
typedef sequential memory_type;
2722
typedef polymorphic_map_rep<T,V,_n_variant> rep;
2723
typedef smart_pointer<rep> base;
2724
typedef typename rep::size_type size_type;
2725
typedef typename rep::reference reference;
2726
typedef typename rep::iterator iterator;
2727
typedef typename rep::const_reference const_reference;
2728
typedef typename rep::const_iterator const_iterator;
2734
// accessors & modifiers:
2736
size_type size () const;
2739
iterator find (size_type dis_i);
2740
const_iterator begin () const;
2741
const_iterator end () const;
2742
const_iterator find (size_type dis_i) const;
2746
odiststream& put (odiststream& ops) const;
2750
File: rheolef.info, Node: mpi_polymorphic_assembly_begin class, Up: Classes
2752
5.18 mpi_polymorph_assembly_begin - for polymorph_array
2753
=======================================================
2755
(Source file: `skit/plib2/mpi_polymorphic_assembly_begin.h')
2760
Start a dense polymorph array matrix assembly.
2765
Assume that stash has indexes in increasing order. cpu complexity :
2766
O(stash.size + nproc) memory complexity : O(stash.size + nproc) msg
2767
size complexity : O(stash.size + nproc)
2769
When assembling a finite element matrix, the stash.size is at boundaries
2770
of the mesh partition, and stash.size = O(nrow^alpha), where
2771
alpha=(d-1)/d and d=2,3. Using nproc <= O(nrow^alpha) is performant.
2776
The stash may be sorted by increasing nows and column.
2778
Inspirated from Petsc (petsc/src/vec/vec/impls/mpi/pdvec.c), here with
2779
a pre-sorted stash, thanks to the stl::map data structure.
2782
File: rheolef.info, Node: polymorphic_array class, Up: Classes
2784
5.19 polymorphic_array - an array of derived classes
2785
====================================================
2787
(Source file: `skit/plib2/polymorphic_array.h')
2792
template <class T, class V> class polymorphic_array_seq_rep;
2797
Geometric entities of finite elements are implemented by using a
2798
polymorphic hierarchy of classes: the base class `geo_element' is a
2799
pure virtual methods. Then, derived classes, such as `geo_element_t'
2800
for a triangle, are introduced with a storage zone for the indexes of
2801
its vertices in the mesh (See "geo"(3)).
2803
Each element has a fixed size storage zone: a buffer of a particular
2804
element type can be efficiently exchanged with MPI.
2806
Here is an atempt to generalize the distributed array<T>, with
2807
reference counting, to the polymorphic case. The base class is denoted
2808
as T and the derived ones are included as a vector of type V, e.g.
2809
V=mpl::vctor<T0,T1,T2>.
2814
Here, class Ti are suposed to have a method : size_type
2815
variant() const; that returns the class index i=0..n_variant-1. A
2816
more general approch should us run-time type identification.
2818
Finally, the container should honor an optional Allocator template
2824
template <class T, class V>
2825
class polymorphic_array<T,sequential,V> : public smart_pointer<polymorphic_array_seq_rep<T,V,mpl::size<V>::value> > {
2830
static const size_t _n_variant = mpl::size<V>::value;
2832
typedef sequential memory_type;
2833
typedef polymorphic_array_seq_rep<T,V,_n_variant> rep;
2834
typedef smart_pointer<rep> base;
2835
typedef typename rep::size_type size_type;
2836
typedef typename rep::reference reference;
2837
typedef typename rep::const_reference const_reference;
2838
typedef typename rep::iterator iterator;
2839
typedef typename rep::const_iterator const_iterator;
2843
polymorphic_array (size_type n = 0);
2844
void resize (size_type n);
2845
polymorphic_array (const distributor& ownership);
2846
void resize (const distributor& ownership);
2848
// accessors & modifiers:
2850
size_type size () const;
2851
size_type dis_size() const;
2852
reference operator[] (size_type i);
2853
const_reference operator[] (size_type i) const;
2854
const_iterator begin () const;
2855
const_iterator end () const;
2859
const distributor& ownership() const;
2860
const communicator& comm() const;
2864
idiststream& get_values (idiststream& ips);
2865
odiststream& put_values (odiststream& ops) const;
2866
template <class PutFunction> odiststream& put_values (odiststream& ops, PutFunction put_element) const;
2867
void dump (std::string name) const;
2869
template <class T, class V>
2870
odiststream& operator<< (odiststream& ops, const polymorphic_array<T,sequential,V>& x);
2872
template <class T, class V>
2873
idiststream& operator>> (idiststream& ips, polymorphic_array<T,sequential,V>& x);
2878
template <class T, class V>
2879
class polymorphic_array<T,distributed,V> : public smart_pointer<polymorphic_array_mpi_rep<T,V,mpl::size<V>::value> > {
2884
static const size_t _n_variant = mpl::size<V>::value;
2886
typedef distributed memory_type;
2887
typedef polymorphic_array_mpi_rep<T,V,_n_variant> rep;
2888
typedef smart_pointer<rep> base;
2889
typedef typename rep::size_type size_type;
2890
typedef typename rep::reference reference;
2891
typedef typename rep::const_reference const_reference;
2892
typedef typename rep::iterator iterator;
2893
typedef typename rep::const_iterator const_iterator;
2898
polymorphic_array (size_type n = 0);
2899
void resize (size_type n);
2901
polymorphic_array (const distributor& ownership = distributor());
2902
void resize (const distributor& ownership);
2904
// accessors & modifiers:
2906
size_type size () const;
2907
size_type dis_size () const;
2908
reference operator[] (size_type i);
2909
const_reference operator[] (size_type i) const;
2910
const_iterator begin () const;
2911
const_iterator end () const;
2915
const distributor& ownership() const;
2916
const communicator& comm() const;
2918
void dis_entry_assembly_begin ();
2919
void dis_entry_assembly_end ();
2921
void repartition ( // old_numbering for *this
2922
const array<size_type,distributed>& partition, // old_ownership
2923
polymorphic_array<T,distributed,V>& new_array, // new_ownership (created)
2924
array<size_type,distributed>& old_numbering, // new_ownership
2925
array<size_type,distributed>& new_numbering) const; // old_ownership
2928
void append_dis_entry (const Set& ext_idx_set, polymorphic_map<T,V>& ext_idx_map) const
2929
{ base::data().append_dis_entry (ext_idx_set, ext_idx_map.data()); }
2932
void get_dis_entry (const Set& ext_idx_set, polymorphic_map<T,V>& ext_idx_map) const
2933
{ base::data().get_dis_entry (ext_idx_set, ext_idx_map.data()); }
2936
void append_dis_indexes (const Set& ext_idx_set) { base::data().append_dis_indexes (ext_idx_set); }
2939
void set_dis_indexes (const Set& ext_idx_set) { base::data().set_dis_indexes (ext_idx_set); }
2941
T& dis_at (size_type dis_i) { return base::data().dis_at (dis_i); }
2942
const T& dis_at (size_type dis_i) const { return base::data().dis_at (dis_i); }
2946
idiststream& get_values (idiststream& ips);
2947
odiststream& put_values (odiststream& ops) const;
2948
template <class Permutation>
2949
odiststream& permuted_put_values (odiststream& ops, const Permutation& perm) const;
2950
template <class Permutation, class PutFunction>
2951
odiststream& permuted_put_values (odiststream& ops, const Permutation& perm,
2952
PutFunction put_element) const;
2953
void dump (std::string name) const;
2955
template <class T, class V>
2956
odiststream& operator<< (odiststream& ops, const polymorphic_array<T,distributed,V>& x);
2958
template <class T, class V>
2959
idiststream& operator>> (idiststream& ips, polymorphic_array<T,distributed,V>& x);
2962
File: rheolef.info, Node: mpi_polymorphic_assembly_end class, Up: Classes
2964
5.20 mpi_polymorphic_assembly_end - polymorphic array assembly
2965
==============================================================
2967
(Source file: `skit/plib2/mpi_polymorphic_assembly_end.h')
2972
Finish a dense polymorphic array assembly.
2982
template <class Container, class Message>
2983
struct mpi_polymorphic_assembly_end_t<Container,Message,N> {
2986
const distributor& ownership,
2987
Message& receive, // buffer
2988
Message& send, // buffer
2989
boost::array<typename Container::size_type,N>& receive_max_size,
2991
Container& x) const;
2995
template <class Container, class Message>
2997
mpi_polymorphic_assembly_end_t<Container,Message,N>::operator() (
2999
const distributor& ownership,
3000
Message& receive, // buffer
3001
Message& send, // buffer
3002
boost::array<typename Container::size_type,N>& receive_max_size,
3006
typedef typename Container::size_type size_type;
3007
const size_type _n_variant = Container::_n_variant; // should be = N
3008
// -----------------------------------------------------------------
3009
// 1) receive data and store it in container
3010
// -----------------------------------------------------------------
3011
// note: for wait_any, build an iterator adapter that scan the pair.second in [index,request]
3012
// and then get an iterator in the pair using iter.base(): retrive the corresponding index
3013
// for computing the position in the receive.data buffer
3014
typedef boost::transform_iterator<
3015
select2nd<size_type,mpi::request>,
3016
typename std::list<std::pair<size_type,mpi::request> >::iterator>
3019
#define _RHEOLEF_receive_data(z,k,unused) \
3020
while (receive.waits[k].size() != 0) { \
3021
typedef typename Container::T##k T##k; \
3022
typedef typename std::pair<size_type,T##k> data##k##_type; \
3023
request_iterator iter_r_waits (receive.waits[k].begin(), select2nd<size_type,mpi::request>()), \
3024
last_r_waits (receive.waits[k].end(), select2nd<size_type,mpi::request>()); \
3025
/* waits on any receive... */ \
3026
std::pair<mpi::status,request_iterator> pair_status = mpi::wait_any (iter_r_waits, last_r_waits); \
3027
/* check status */ \
3028
boost::optional<int> i_msg_size_opt = pair_status.first.template count<data##k##_type>(); \
3029
check_macro (i_msg_size_opt, "receive wait failed"); \
3030
int iproc = pair_status.first.source(); \
3031
check_macro (iproc >= 0, "receive: source iproc = "<<iproc<<" < 0 !"); \
3032
/* get size of receive and number in data */ \
3033
size_type i_msg_size = (size_type)i_msg_size_opt.get(); \
3034
typename std::list<std::pair<size_type,mpi::request> >::iterator i_pair_ptr \
3035
= pair_status.second.base(); \
3036
size_type i_receive = (*i_pair_ptr).first; \
3037
size_type i_start = i_receive*receive_max_size[k]; \
3038
for (size_type j = i_start; j < i_start + i_msg_size; j++) { \
3039
size_type first_index = ownership.first_index(); \
3040
size_type index = receive.data._stack_##k[j].first; \
3041
T##k value = receive.data._stack_##k[j].second; \
3042
x.assign (index - first_index, value); \
3044
receive.waits[k].erase (i_pair_ptr); \
3046
BOOST_PP_REPEAT(N, _RHEOLEF_receive_data, ~)
3047
#undef _RHEOLEF_receive_data
3050
size_type start = ownership().first_index(); \
3051
size_type last = ownership().last_index(); \
3052
if (i_global >= start && i_global < last) \
3053
polymorphic_array_seq_rep<T,V,N>::assign (i_global - start, val); \
3056
// -----------------------------------------------------------------
3058
// -----------------------------------------------------------------
3059
for (size_type k = 0; k < _n_variant; k++) {
3060
request_iterator iter_s_waits (send.waits[k].begin(), select2nd<size_type,mpi::request>()),
3061
last_s_waits (send.waits[k].end(), select2nd<size_type,mpi::request>());
3062
size_type send_nproc = send.waits[k].size();
3063
std::vector<mpi::status> send_status (send_nproc);
3064
mpi::wait_all (iter_s_waits, last_s_waits, send_status.begin());
3066
// -----------------------------------------------------------------
3067
// 3) clear send & receive messages [waits,data]
3068
// -----------------------------------------------------------------
3072
receive.waits.clear();
3073
receive.data.clear();
3078
File: rheolef.info, Node: distributor class, Up: Classes
3080
5.21 distributor - data distribution table
3081
==========================================
3083
(Source file: `skit/plib2/distributor.h')
3088
Used by "array"(1), "asr"(1) and "csr"(1). and such classes that
3089
distribute data as chunk.
3094
class distributor : public Vector<std::allocator<int>::size_type> {
3099
typedef std::allocator<int>::size_type size_type;
3100
typedef Vector<size_type> _base;
3101
typedef _base::iterator iterator;
3102
typedef _base::const_iterator const_iterator;
3103
typedef int tag_type;
3104
typedef communicator communicator_type;
3108
static const size_type decide = size_type(-1);
3110
// allocators/deallocators:
3113
size_type dis_size = 0,
3114
const communicator_type& c = communicator_type(),
3115
size_type loc_size = decide);
3117
distributor(const distributor&);
3121
size_type dis_size = 0,
3122
const communicator_type& c = communicator_type(),
3123
size_type loc_size = decide);
3896
size_type nrow () const { return csr<T>::nrow(); }
3897
size_type ncol () const { return csr<T>::ncol(); }
3898
size_type nnz () const { return csr<T>::nnz(); }
3901
basic_ic0<T> ic0 (const csr<T>& a);
3904
File: rheolef.info, Node: permutation class, Up: Classes
3906
5.15 `permutation' - permutation matrix
3907
=======================================
3909
(Source file: `skit/lib/permutation.h')
3914
This class implements a permutation matrix. Permutation matrix are
3915
used in factorization implementation for optimizing the skyline format,
3916
as used by the implementation of the `ssk' class (*note ssk class::).
3918
Let a be a square invertible matrix in `csr' format (*note csr
3922
We get the optimal Gibbs permutation matrix using Gibbs, Pooles and
3923
Stockmeyer renumbering algorithm by:
3924
permutation p = gibbs(a);
3929
class permutation : public Array<std::vector<int>::size_type> {
3934
typedef Array<int>::size_type size_type;
3935
typedef Array<int>::difference_type difference_type;
3937
// allocators/deallocators:
3939
explicit permutation (size_type n = 0);
3942
permutation gibbs (const csr<T>& a);
3945
File: rheolef.info, Node: vec class, Up: Classes
3947
5.16 `vec' - dense vector
3948
=========================
3950
(Source file: `skit/lib/vec.h')
3955
The class implements an array. A declaration whithout any parametrers
3956
correspond to an array with a null size:
3958
Here, a `Float' array is specified. Note that the `Float' depends
3959
upon the configuration (*note Installing::). The constructor can be
3960
invocated whith a size parameter:
3962
Notes that the constructor can be invocated with an initializer:
3966
vec<Float> y = lambda;
3971
Linear combinations are `x+y', `x-y',
3972
`x*lambda', `lambda*x', `x/lambda'.
3974
Others combinations are `x*y', `x/y',
3977
The euclidian norm is `norm(x)' while `dot(x,y)'
3978
denotes the euclidian scalar product,
3980
Input/output routines overload "<<" and ">>" operators:
3984
*Note iorheo class::.
3127
const communicator_type& comm() const;
3129
/// global and local sizes
3130
size_type dis_size () const;
3132
/// current process id
3133
size_type process () const;
3135
/// number of processes
3136
size_type n_process () const;
3138
/// find iproc associated to a global index dis_i: CPU=log(nproc)
3139
size_type find_owner (size_type dis_i) const;
3141
/// global index range and local size owned by ip-th process
3142
size_type first_index (size_type ip) const;
3143
size_type last_index (size_type ip) const;
3144
size_type size (size_type ip) const;
3146
/// global index range and local size owned by current process
3147
size_type first_index () const;
3148
size_type last_index () const;
3149
size_type size () const;
3151
/// true when dis_i in [first_index(ip):last_index(ip)[
3152
bool is_owned (size_type dis_i, size_type ip) const;
3154
// the same with ip=current process
3155
bool is_owned (size_type dis_i) const;
3157
/// returns a new tag
3158
static tag_type get_new_tag();
3162
communicator_type _comm;
3166
File: rheolef.info, Node: msg_from_context_indices class, Up: Classes
3168
5.22 msg_from_context_indices - gather
3169
======================================
3171
(Source file: `skit/plib2/msg_from_context_indices.h')
3176
Computes the receive compressed message pattern for gather and
3177
scatter. Suppose indexes are sorted by increasing order.
3182
msg_from_context_indices
3184
"input": the owner and indice arrays, the current process |
3185
owner(0:nidx-1), idy(0:nidx-1), | proc2from_proc(0:nproc-1), my_proc
3186
"input-output": the pointer array, used for accumulation |
3187
ptr(0:nidx) "output": the receive context (from) indice array |
3188
from_idx(0:nidx-1) begin | for k := 0 to nidx-1 do | iproc :=
3189
owner(k) | if iproc <> my_proc then | i :=
3190
proc2from_proc(iproc) | p := ptr(i) | ptr(i) := p + 1 |
3191
from_idx(p) := idy(k) | endif | endfor end
3196
Complexity is O(nidx).
3201
Uses input iterators.
3207
class InputIterator1,
3208
class InputIterator2,
3209
class InputRandomIterator,
3212
class MutableRandomIterator,
3213
class OutputIterator>
3215
msg_from_context_indices (
3216
InputIterator1 owner, // nidx
3217
InputIterator1 last_owner,
3218
InputIterator2 idy, // nidx
3219
InputRandomIterator proc2from_proc, // nproc
3222
MutableRandomIterator ptr, // send_nproc+1
3223
OutputIterator from_idx) // nidx
3225
Size nidx = distance(owner,last_owner);
3226
for (Size i = 0; i < nidx; i++) {
3227
if (owner[i] != my_proc) {
3228
assert_macro (idy[i] < idy_maxval, "Scattering past end of TO vector: idy="
3229
<< idy[i] << " out of range 0.." << idy_maxval-1);
3230
Size p = ptr[proc2from_proc[owner[i]]]++;
3231
from_idx[p] = idy[i];
3237
File: rheolef.info, Node: msg_both_permutation_apply class, Up: Classes
3239
5.23 msg_both_permutation_apply - sequentail apply
3240
==================================================
3242
(Source file: `skit/plib2/msg_both_permutation_apply.h')
3247
Applies permutations when copying an array.
3252
msg_both_permutation_apply
3254
"input": the length array | px(0:n-1), x(0:nx-1), py(0:n-1)
3255
"output": the pointer array and the total size | y(0:ny) begin
3256
| for i := 0 to n-1 do | y(py(i)) := x(px(i)) | endfor end
3261
Time and memory complexity is O(n).
3267
class InputIterator1,
3268
class InputIterator2,
3269
class InputRandomIterator,
3271
class OutputRandomIterator>
3273
msg_both_permutation_apply (
3275
InputIterator1 last_px,
3276
InputRandomIterator x,
3279
OutputRandomIterator y)
3281
while (px != last_px)
3282
set_op(y[*py++], x[*px++]);
3284
} // namespace rheolef
3287
File: rheolef.info, Node: msg_right_permutation_apply class, Up: Classes
3289
5.24 msg_right_permutation_apply - sequentail apply
3290
===================================================
3292
(Source file: `skit/plib2/msg_right_permutation_apply.h')
3297
Applies a permutation to an array.
3302
msg_right_permutation_apply
3304
"input": the length array | perm(0:n-1), x(0:nx-1) "output": the
3305
pointer array and the total size | y(0:n) begin | for i := 0
3306
to n-1 do | y(i) := x(perm(i)) | endfor end
3311
Time and memory complexity is O(n).
3317
class InputIterator,
3318
class InputRandomIterator,
3319
class OutputIterator,
3322
msg_right_permutation_apply (
3324
InputIterator last_perm,
3325
const InputRandomIterator& x,
3329
for (; perm != last_perm; y++, perm++) {
3330
// something like: (*y++) = x[(*perm++)];
3331
set_op (y, x, *perm);
3337
File: rheolef.info, Node: mpi_assembly_begin class, Up: Classes
3339
5.25 mpi_assembly_begin - for array or matrix
3340
=============================================
3342
(Source file: `skit/plib2/mpi_assembly_begin.h')
3347
Start a dense array or a sparse matrix assembly.
3352
Assume that stash has indexes in increasing order. cpu complexity :
3353
O(stash.size + nproc) memory complexity : O(stash.size + nproc) msg
3354
size complexity : O(stash.size + nproc)
3356
When assembling a finite element matrix, the stash.size is at boundaries
3357
of the mesh partition, and stash.size = O(nrow^alpha), where
3358
alpha=(d-1)/d and d=2,3. Using nproc <= O(nrow^alpha) is performant.
3989
Since the `vec<T>' class derives from the `Array<T>' class, the `vec'
3990
class present also a STL-like container interface.
3363
The stash may be sorted by increasing nows and column.
3992
Actually, the `vec<T>' implementation uses a STL `vector<T>' class.
3365
Inspirated from Petsc (petsc/src/vec/vec/impls/mpi/pdvec.c), here with
3366
a pre-sorted stash, thanks to the stl::map data structure.
3998
class vec : public Array<T>
3374
class InputIterator>
3375
typename Stash::size_type
3376
mpi_assembly_begin (
3379
InputIterator first_stash_idx, // wrapper in shash
3380
InputIterator last_stash_idx,
3381
const distributor& ownership,
3383
Message& receive, // buffer
3384
Message& send) // buffer
4002
typedef T element_type;
4003
typedef typename Array<T>::size_type size_type;
4005
// cstor, assignment
4006
explicit vec (unsigned int n = 0, const T& init_value = std::numeric_limits<T>::max())
4007
: Array<T>(n, init_value) {}
4008
vec<T> operator = (T lambda);
4011
unsigned int size () const { return Array<T>::size(); }
4012
unsigned int n () const { return size(); }
4014
#ifdef _RHEOLEF_HAVE_EXPRESSION_TEMPLATE
4017
vec(const VExpr<X>& x) : Array<T>(x.size()) { assign (*this, x); }
4020
vec<T>& operator = (const VExpr<X>& x)
4021
{ logmodif(*this); return assign (*this, x); }
4022
#endif // _RHEOLEF_HAVE_EXPRESSION_TEMPLATE
4025
template <class T> std::istream& operator >> (std::istream&, vec<T>&);
4026
template <class T> std::ostream& operator << (std::ostream&, const vec<T>&);
3386
typedef typename Stash::size_type size_type;
3388
mpi::communicator comm = ownership.comm();
3389
size_type my_proc = ownership.process();
3390
size_type nproc = ownership.n_process();
3391
distributor::tag_type tag = distributor::get_new_tag();
3393
// ----------------------------------------------------------------
3394
// 1) count the messages contained in stash by process id
3395
// ----------------------------------------------------------------
3396
// assume that stash elements are sorted by increasing stash_idx (e.g. stash = stl::map)
3397
// cpu complexity = O(stash.size + nproc)
3398
// mem complexity = O(stash.size + nproc)
3399
std::vector<size_type> msg_size(nproc, 0);
3400
std::vector<size_type> msg_mark(nproc, 0);
3401
size_type send_nproc = 0;
3403
size_type iproc = 0;
3405
for (InputIterator iter_idx = first_stash_idx; iter_idx != last_stash_idx; iter_idx++, i++) {
3406
for (; iproc < nproc; iproc++) {
3407
if (*iter_idx >= ownership[iproc] && *iter_idx < ownership[iproc+1]) {
3409
if (!msg_mark[iproc]) {
3410
msg_mark[iproc] = 1;
3416
assert_macro (iproc != nproc, "bad stash data: index "<<*iter_idx<<" out of range [0:"<<ownership[nproc]<<"[");
3419
// ----------------------------------------------------------------
3420
// 2) avoid to send message to my-proc in counting
3421
// ----------------------------------------------------------------
3422
if (msg_size [my_proc] != 0) {
3423
msg_size [my_proc] = 0;
3424
msg_mark [my_proc] = 0;
3427
// ----------------------------------------------------------------
3428
// 3) compute number of messages to be send to my_proc
3429
// ----------------------------------------------------------------
3430
// msg complexity : O(nproc) or O(log(nproc)), depending on reduce implementation
3431
std::vector<size_type> work (nproc);
3434
msg_mark.begin().operator->(),
3436
work.begin().operator->(),
3437
std::plus<size_type>());
3438
size_type receive_nproc = work [my_proc];
3439
// ----------------------------------------------------------------
3440
// 4) compute messages max size to be send to my_proc
3441
// ----------------------------------------------------------------
3442
// msg complexity : O(nproc) or O(log(nproc)), depending on reduce implementation
3445
msg_size.begin().operator->(),
3447
work.begin().operator->(),
3448
mpi::maximum<size_type>());
3449
size_type receive_max_size = work [my_proc];
3450
// ----------------------------------------------------------------
3451
// 5) post receive: exchange the buffer adresses between processes
3452
// ----------------------------------------------------------------
3453
// Each message will consist of ordered pairs (global index,value).
3454
// since we don't know how long each indiidual message is,
3455
// we allocate the largest : receive_nproc*receive_max_size
3456
// potentially, this is a lot of wasted space
3457
// TODO: how to optimize the receive.data buffer ?
3458
// cpu complexity : O(nproc)
3459
// mem complexity : O(nproc*(stash.size/nproc)) = O(stash.size), worst case ?
3460
// msg complexity : O(nproc)
3461
receive.data.resize (receive_nproc*receive_max_size);
3462
for (size_t i_receive = 0; i_receive < receive_nproc; i_receive++) {
3463
mpi::request i_req = comm.irecv (
3466
receive.data.begin().operator->() + i_receive*receive_max_size,
3468
receive.waits.push_back (std::make_pair(i_receive, i_req));
3470
// ----------------------------------------------------------------
3471
// 6) copy stash in send buffer
3472
// ----------------------------------------------------------------
3473
// since the stash is sorted by increasing order => simple copy
3474
// cpu complexity : O(stash.size)
3475
// mem complexity : O(stash.size)
3476
send.data.resize (stash.size());
3477
copy (stash.begin(), stash.end(), send.data.begin());
3478
// ---------------------------------------------------------------------------
3480
// ---------------------------------------------------------------------------
3481
// cpu complexity : O(nproc)
3482
// mem complexity : O(send_nproc) \approx O(nproc), worst case
3483
// msg complexity : O(stash.size)
3484
send.waits.resize(send_nproc);
3486
size_type i_send = 0;
3487
size_type i_start = 0;
3488
for (size_type iproc = 0; iproc < nproc; iproc++) {
3489
size_type i_msg_size = msg_size[iproc];
3490
if (i_msg_size == 0) continue;
3491
mpi::request i_req = comm.isend (
3494
send.data.begin().operator->() + i_start,
3496
send.waits.push_back(std::make_pair(i_send,i_req));
3498
i_start += i_msg_size;
3501
return receive_max_size;
4029
File: rheolef.info, Node: ssk class, Up: Classes
4031
5.17 `ssk' - symmetric skyline matrix format
4032
============================================
4034
(Source file: `skit/lib/ssk.h')
3505
File: rheolef.info, Node: solver class, Up: Classes
3507
5.26 `solver' - direct or interative solver interface
3508
=====================================================
3510
(Source file: `skit/plib2/solver.h')
4039
The class implements a symmetric matrix Choleski factorization. Let A
4040
be a square invertible matrix in `csr' format (*note csr class::).
3515
The class implements a matrix factorization: LU factorization for an
3516
unsymmetric matrix and Choleski fatorisation for a symmetric one.
3518
Let A be a square invertible matrix in `csr' format (*note csr
4042
3521
We get the factorization by:
4043
ssk<Float> m = ldlt(a);
3522
solver<Float> sa (a);
4044
3523
Each call to the direct solver for a*x = b writes either:
4045
vec<Float> x = m.solve(b);
4052
The storage is either skyline, multi-file or chevron. This
4053
alternative depends upon the configuration (*note Installing::). The
4054
chevron data structure is related to the multifrontal algorithm and is
4055
implemented by using the spooles library. The multi-file data
4056
structure refers to the out-of-core algorithm and is implemented by
4057
using the taucs library. If such a library is not available, the
4058
`ssk' class implements a skyline storage scheme and the profil storage
4059
of the `ssk' matrix is optimized by optimizing the renumbering, by
4060
using the Gibbs, Pooles and Stockmeyer algorithm available via the
4061
`permutation' class (*note permutation class::).
4063
Algorithm 582, collected Algorithms from ACM. Algorithm
4064
appeared in ACM-Trans. Math. Software, vol.8, no. 2, Jun.,
4066
When implementing the skyline data structure, we can go back to the
4067
`csr' format, for the pupose of pretty-printing:
4068
cout << ps << color << logscale << csr<Float>(m);
4074
class ssk : smart_pointer<spooles_rep<T> > {
4078
typedef T element_type;
4079
typedef typename spooles_rep<T>::size_type size_type;
4081
// allocators/deallocators:
4084
explicit ssk<T> (const csr<T>&);
4088
size_type nrow () const;
4089
size_type ncol () const;
4090
size_type nnz () const;
4092
// factorisation and solver:
4094
void solve(const vec<T>& b, vec<T>& x) const;
4095
vec<T> solve(const vec<T>& b) const;
4096
void factorize_ldlt();
4098
const spooles_rep<T>& data() const {
4099
return smart_pointer<spooles_rep<T> >::data();
4101
spooles_rep<T>& data() {
4102
return smart_pointer<spooles_rep<T> >::data();
4106
ssk<T> ldlt (const csr<T>& m);
4112
class ssk : smart_pointer<umfpack_rep<T> > {
4116
typedef T element_type;
4117
typedef typename umfpack_rep<T>::size_type size_type;
4119
// allocators/deallocators:
4122
explicit ssk<T> (const csr<T>&);
4126
size_type nrow () const;
4127
size_type ncol () const;
4128
size_type nnz () const;
4130
// factorisation and solver:
4132
void solve(const vec<T>& b, vec<T>& x) const;
4133
vec<T> solve(const vec<T>& b) const;
4134
void factorize_ldlt();
4135
void factorize_lu();
4137
const umfpack_rep<T>& data() const {
4138
return smart_pointer<umfpack_rep<T> >::data();
4140
umfpack_rep<T>& data() {
4141
return smart_pointer<umfpack_rep<T> >::data();
4145
ssk<T> ldlt (const csr<T>& m);
3524
vec<Float> x = sa.solve(b);
3525
When the matrix is modified in a computation loop but conserves its
3526
sparsity pattern, an efficient re-factorization writes:
3527
sa.update_values (new_a);
3529
This approach skip the long step of the symbolic factization step.
3534
The factorization can also be incomplete, i.e. a pseudo-inverse,
3535
suitable for preconditionning iterative methods. In that case, the
3536
sa.solve(b) call runs a conjugate gradient when the matrix is
3537
symmetric, or a generalized minimum residual algorithm when the matrix
3540
Automatic choice and customization
3541
----------------------------------
3543
The symmetry of the matrix is tested via the a.is_symmetric() property
3544
(*note csr class::) while the choice between direct or iterative solver
3545
is switched from the a.pattern_dimension() value. When the pattern is
3546
3D, an iterative method is faster and less memory consuming.
3547
Otherwhise, for 1D or 2D problems, the direct method is prefered.
3549
These default choices can be supersetted by using explicit options:
3550
solver_option_type opt;
3551
opt.incomplete = true;
3552
solver<Float> sa (a, opt);
3553
See the solver.h header for the complete list of available options.
3558
The implementation bases on the pastix library.
3563
template <class T, class M = rheo_default_memory_model>
3564
class solver_basic : public smart_pointer<solver_rep<T,M> > {
3568
explicit solver_basic (const csr<T,M>& a, const solver_option_type& opt = solver_option_type());
3569
void update_values (const csr<T,M>& a);
3573
vec<T,M> trans_solve (const vec<T,M>& b) /* TODO const */;
3574
vec<T,M> solve (const vec<T,M>& b) /* TODO const */;
3578
typedef solver_rep<T,M> rep;
3579
typedef smart_pointer<rep> base;
3581
typedef solver_basic<Float> solver;
3584
File: rheolef.info, Node: array class, Up: Classes
3586
5.27 array - container in distributed environment
3587
=================================================
3589
(Source file: `skit/plib2/array.h')
3594
STL-like vector container for a distributed memory machine model.
3599
A sample usage of the class is:
3600
int main(int argc, char**argv) {
3601
environment distributed(argc, argv);
3602
array<double> x(distributor(100), 3.14);
3605
The array<T> interface is similar to those of the std::vector<T>
3606
with the addition of some communication features in the distributed
3607
case: write accesses with entry/assembly and read access with dis_at.
3609
Distributed write access
3610
------------------------
3612
Loop on any `dis_i' that is not managed by the current processor:
3613
x.dis_entry (dis_i) = value;
3614
and then, after loop, perform all communication:
3615
x.dis_entry_assembly();
3616
After this command, each value is stored in the array, available the
3617
processor associated to `dis_i'.
3619
Distributed read access
3620
-----------------------
3622
First, define the set of indexes:
3623
std::set<size_t> ext_idx_set;
3624
Then, loop on `dis_i' indexes that are not managed by the current
3626
ext_idx_set.insert (dis_i);
3627
After the loop, performs the communications:
3628
x.set_dis_indexes (ext_idx_set);
3629
After this command, each values associated to the `dis_i' index,
3630
and that belongs to the index set, is now available also on the
3631
current processor as:
3632
value = x.dis_at (dis_i);
3633
For convenience, if `dis_i' is managed by the current processor, this
3634
function returns also the value.
3639
The class takes two template parameters: one for the type T and the
3640
second for the memory model M, that could be either M=distributed or
3641
M=sequential. The two cases are associated to two diferent
3642
implementations, but proposes exactly the same interface. The
3643
sequential interface propose also a supplementary constructor:
3644
array<double,sequential> x(local_size, init_val);
3645
This constructor is a STL-like one but could be consufused in the
3646
distributed case, since there are two sizes: a local one and a
3647
global one. In that case, the use of the distributor, as a
3648
generalization of the size concept, clarify the situation (*note
3649
distributor class::).
3654
"scatter" via "get_dis_entry".
3656
"gather" via "dis_entry(dis_i) = value" or "dis_entry(dis_i) +=
3657
value". Note that += applies when T=idx_set where idx_set is a
3658
wrapper class of std::set<size_t> ; the += operator represents the
3659
union of a set. The operator= is used when T=double or others simple T
3660
types without algebra. If there is a conflict, i.e. several processes
3661
set the dis_i index, then the result of operator+= depends upon the
3662
order of the process at each run and is not deterministic. Such
3663
ambiguous behavior is not detected yet at run time.
3668
template <class T, class A>
3669
class array<T,sequential,A> : public smart_pointer<array_seq_rep<T,A> > {
3674
typedef array_seq_rep<T,A> rep;
3675
typedef smart_pointer<rep> base;
3677
typedef sequential memory_type;
3678
typedef typename rep::size_type size_type;
3679
typedef typename rep::value_type value_type;
3680
typedef typename rep::reference reference;
3681
typedef typename rep::dis_reference dis_reference;
3682
typedef typename rep::iterator iterator;
3683
typedef typename rep::const_reference const_reference;
3684
typedef typename rep::const_iterator const_iterator;
3689
array (size_type loc_size = 0, const T& init_val = T(), const A& alloc = A());
3690
void resize (size_type loc_size = 0, const T& init_val = T());
3691
array (const distributor& ownership, const T& init_val = T(), const A& alloc = A());
3692
void resize (const distributor& ownership, const T& init_val = T());
3694
// local accessors & modifiers:
3696
A get_allocator() const { return base::data().get_allocator(); }
3697
size_type size () const { return base::data().size(); }
3698
size_type dis_size () const { return base::data().dis_size(); }
3699
const distributor& ownership() const { return base::data().ownership(); }
3700
const communicator& comm() const { return ownership().comm(); }
3702
reference operator[] (size_type i) { return base::data().operator[] (i); }
3703
const_reference operator[] (size_type i) const { return base::data().operator[] (i); }
3705
iterator begin() { return base::data().begin(); }
3706
const_iterator begin() const { return base::data().begin(); }
3707
iterator end() { return base::data().end(); }
3708
const_iterator end() const { return base::data().end(); }
3710
// global modifiers (for compatibility with distributed interface):
3712
dis_reference dis_entry (size_type dis_i) { return base::data().dis_entry(dis_i); }
3713
void dis_entry_assembly() {}
3714
template<class SetOp>
3715
void dis_entry_assembly(SetOp my_set_op) {}
3716
template<class SetOp>
3717
void dis_entry_assembly_begin (SetOp my_set_op) {}
3718
template<class SetOp>
3719
void dis_entry_assembly_end (SetOp my_set_op) {}
3721
// apply a partition:
3723
template<class RepSize>
3724
void repartition ( // old_numbering for *this
3725
const RepSize& partition, // old_ownership
3726
array<T,sequential,A>& new_array, // new_ownership (created)
3727
RepSize& old_numbering, // new_ownership
3728
RepSize& new_numbering) const // old_ownership
3729
{ return base::data().repartition (partition, new_array, old_numbering, new_numbering); }
3731
template<class RepSize>
3732
void permutation_apply ( // old_numbering for *this
3733
const RepSize& new_numbering, // old_ownership
3734
array<T,sequential,A>& new_array) const // new_ownership (already allocated)
3735
{ return base::data().permutation_apply (new_numbering, new_array); }
3739
odiststream& put_values (odiststream& ops) const { return base::data().put_values(ops); }
3740
idiststream& get_values (idiststream& ips) { return base::data().get_values(ips); }
3741
template <class GetFunction>
3742
idiststream& get_values (idiststream& ips, GetFunction get_element) { return base::data().get_values(ips, get_element); }
3743
template <class PutFunction>
3744
odiststream& put_values (odiststream& ops, PutFunction put_element) const { return base::data().put_values(ops, put_element); }
3745
void dump (std::string name) const { return base::data().dump(name); }
3751
template <class T, class A>
3752
class array<T,distributed,A> : public smart_pointer<array_mpi_rep<T,A> > {
3757
typedef array_mpi_rep<T,A> rep;
3758
typedef smart_pointer<rep> base;
3760
typedef distributed memory_type;
3761
typedef typename rep::size_type size_type;
3762
typedef typename rep::value_type value_type;
3763
typedef typename rep::reference reference;
3764
typedef typename rep::dis_reference dis_reference;
3765
typedef typename rep::iterator iterator;
3766
typedef typename rep::const_reference const_reference;
3767
typedef typename rep::const_iterator const_iterator;
3768
typedef typename rep::scatter_map_type scatter_map_type;
3772
array (const distributor& ownership = distributor(), const T& init_val = T(), const A& alloc = A());
3773
void resize (const distributor& ownership = distributor(), const T& init_val = T());
3775
// local accessors & modifiers:
3777
A get_allocator() const { return base::data().get_allocator(); }
3778
size_type size () const { return base::data().size(); }
3779
size_type dis_size () const { return base::data().dis_size(); }
3780
const distributor& ownership() const { return base::data().ownership(); }
3781
const communicator& comm() const { return base::data().comm(); }
3783
reference operator[] (size_type i) { return base::data().operator[] (i); }
3784
const_reference operator[] (size_type i) const { return base::data().operator[] (i); }
3786
iterator begin() { return base::data().begin(); }
3787
const_iterator begin() const { return base::data().begin(); }
3788
iterator end() { return base::data().end(); }
3789
const_iterator end() const { return base::data().end(); }
3793
template<class Set, class Map>
3794
void append_dis_entry (const Set& ext_idx_set, Map& ext_idx_map) const { base::data().append_dis_entry (ext_idx_set, ext_idx_map); }
3796
template<class Set, class Map>
3797
void get_dis_entry (const Set& ext_idx_set, Map& ext_idx_map) const { base::data().get_dis_entry (ext_idx_set, ext_idx_map); }
3800
void append_dis_indexes (const Set& ext_idx_set) { base::data().append_dis_indexes (ext_idx_set); }
3803
void set_dis_indexes (const Set& ext_idx_set) { base::data().set_dis_indexes (ext_idx_set); }
3805
const T& dis_at (size_type dis_i) const { return base::data().dis_at (dis_i); }
3807
// get all external pairs (dis_i, values):
3808
const scatter_map_type& get_dis_map_entries() const { return base::data().get_dis_map_entries(); }
3810
// global modifiers (for compatibility with distributed interface):
3812
dis_reference dis_entry (size_type dis_i) { return base::data().dis_entry(dis_i); }
3814
void dis_entry_assembly() { return base::data().dis_entry_assembly(); }
3816
template<class SetOp>
3817
void dis_entry_assembly (SetOp my_set_op) { return base::data().dis_entry_assembly (my_set_op); }
3818
template<class SetOp>
3819
void dis_entry_assembly_begin (SetOp my_set_op) { return base::data().dis_entry_assembly_begin (my_set_op); }
3820
template<class SetOp>
3821
void dis_entry_assembly_end (SetOp my_set_op) { return base::data().dis_entry_assembly_end (my_set_op); }
3823
// apply a partition:
3825
template<class RepSize>
3826
void repartition ( // old_numbering for *this
3827
const RepSize& partition, // old_ownership
3828
array<T,distributed>& new_array, // new_ownership (created)
3829
RepSize& old_numbering, // new_ownership
3830
RepSize& new_numbering) const // old_ownership
3831
{ return base::data().repartition (partition.data(), new_array.data(), old_numbering.data(), new_numbering.data()); }
3833
template<class RepSize>
3834
void permutation_apply ( // old_numbering for *this
3835
const RepSize& new_numbering, // old_ownership
3836
array<T,distributed,A>& new_array) const // new_ownership (already allocated)
3837
{ base::data().permutation_apply (new_numbering.data(), new_array.data()); }
3839
void reverse_permutation ( // old_ownership for *this=iold2dis_inew
3840
array<size_type,distributed,A>& inew2dis_iold) const // new_ownership
3841
{ base::data().reverse_permutation (inew2dis_iold.data()); }
3845
odiststream& put_values (odiststream& ops) const { return base::data().put_values(ops); }
3846
idiststream& get_values (idiststream& ips) { return base::data().get_values(ips); }
3847
void dump (std::string name) const { return base::data().dump(name); }
3849
template <class GetFunction>
3850
idiststream& get_values (idiststream& ips, GetFunction get_element) { return base::data().get_values(ips, get_element); }
3851
template <class PutFunction>
3852
odiststream& put_values (odiststream& ops, PutFunction put_element) const { return base::data().put_values(ops, put_element); }
3853
template <class PutFunction, class A2> odiststream& permuted_put_values (
3854
odiststream& ops, const array<size_type,distributed,A2>& perm, PutFunction put_element) const
3855
{ return base::data().permuted_put_values (ops, perm.data(), put_element); }
3859
File: rheolef.info, Node: vec class, Up: Classes
3861
5.28 vec - vector in distributed environment
3862
============================================
3864
(Source file: `skit/plib2/vec.h')
3869
STL-like vector container for a sequential or distributed memory
3870
machine model. Additional operation fom classical algebra.
3875
A sample usage of the class is:
3876
int main(int argc, char**argv) {
3877
environment distributed(argc, argv);
3878
vec<double> x(100, 3.14);
3885
Implementation use array<T,M>.
3890
template <class T, class M = rheo_default_memory_model>
3891
class vec : public array<T, M> {
3896
typedef array<T, M> base;
3897
typedef typename base::size_type size_type;
3898
typedef typename base::iterator iterator;
3899
typedef typename base::const_iterator const_iterator;
3901
// allocator/deallocator:
3903
vec (const distributor& ownership,
3904
const T& init_val = std::numeric_limits<T>::max());
3906
vec(size_type dis_size = 0,
3907
const T& init_val = std::numeric_limits<T>::max());
3910
const distributor& ownership,
3911
const T& init_val = std::numeric_limits<T>::max());
3915
const T& init_val = std::numeric_limits<T>::max());
3917
// expression template:
3919
template<typename Expr>
3920
vec (const Expr& expr);
3922
template<typename Expr>
3923
vec<T,M>& operator= (const Expr& expr);
3925
template<typename Expr>
3926
vec<T,M>& operator+= (const Expr& expr);
3928
template<typename Expr>
3929
vec<T,M>& operator-= (const Expr& expr);
3933
File: rheolef.info, Node: msg_local_optimize class, Up: Classes
3935
5.29 msg_local_optimize - local scatter optimize
3936
================================================
3938
(Source file: `skit/plib2/msg_local_optimize.h')
3943
Optimize local exchanges during gatter/scatter.
3950
"input": the send and receive local contexts (to, from) |
3951
to_loc_idx(0:n_local-1), from_loc_idy(0:n_local-1) "output": the
3952
boolean, true when optimization is possible | has_opt begin |
3953
if n_local = 0 then | has_opt := false | else |
3954
to_start := to_loc_idx(0) | from_start := from_loc_idy(0) |
3955
has_opt := true | i := 1 | while i < n_local and
3956
has_opt do | to_start := to_start + 1 |
3957
from_start := from_start + 1 | if to_loc_idx(i) <> to_start
3958
| or from_loc_idy(i) <> from_start then | has_opt
3959
:= false | endif | i := i + 1 | endwhile
3965
Memory and time complexity is O(receive_total_size).
3971
class InputIterator1,
3972
class InputIterator2>
3974
msg_local_optimize (
3975
InputIterator1 to_loc_idx, // n_local
3976
InputIterator1 last_to_loc_idx,
3977
InputIterator2 from_loc_idy) // n_local
3979
typedef typename std::iterator_traits<InputIterator1>::value_type Size;
3980
if (to_loc_idx == last_to_loc_idx) {
3983
Size to_start = *to_loc_idx++;
3984
Size from_start = *from_loc_idy++;
3985
bool has_opt = true;
3986
while (to_loc_idx != last_to_loc_idx && has_opt) {
3989
if ((*to_loc_idx++) != to_start ||
3990
(*from_loc_idy++) != from_start) {
3998
File: rheolef.info, Node: msg_to_context class, Up: Classes
4000
5.30 msg_to_context - receive pattern
4001
=====================================
4003
(Source file: `skit/plib2/msg_to_context.h')
4008
Computes the receive compresed message pattern for gather and scatter.
4009
The local message part is computed by a separate algorithm (see
4010
"msg_to_local_context"(5)).
4017
"input": the receive pattern and the permutation |
4018
perm(0:receive_nproc-1) | r_iproc(0:receive_nproc-1),
4019
r_size(0:receive_nproc-1), |
4020
r_idx(0:receive_nproc*receive_max_size-1) "output": the receive
4021
context (to) | to_proc(0:receive_nproc-1), to_ptr(0:receive_nproc),
4022
| to_idx(0:receive_total_size-1) begin | to_ptr(0) := 0 |
4023
for j := 0 to receive_nproc-1 do | j1 := perm(j) |
4024
to_proc(j) := r_iproc(j1) | to_ptr(j+1) := r_ptr(j) + rsize(j1)
4025
| for q := to_ptr(j) to to_tr(j+1)-1 do | to_idx(q) :=
4026
r_idx(j1, q-to_ptr(j)) - istart | endfor | endfor end
4031
Memory and time complexity is O(receive_total_size).
4037
class InputIterator1,
4038
class InputRandomIterator2,
4039
class InputRandomIterator3,
4040
class InputRandomIterator4,
4042
class OutputIterator1,
4043
class OutputIterator2,
4044
class OutputIterator3>
4047
InputIterator1 perm, // receive_nproc
4048
InputIterator1 last_perm,
4049
InputRandomIterator2 r_iproc, // receive_nproc
4050
InputRandomIterator3 r_size, // receive_nproc
4051
InputRandomIterator4 r_idx, // receive_nproc*receive_max_size
4052
Size receive_max_size,
4054
OutputIterator1 to_proc, // receive_nproc
4055
OutputIterator2 to_ptr, // receive_nproc+1
4056
OutputIterator3 to_idx) // receive_total_size
4058
OutputIterator2 prec_ptr = to_ptr;
4060
while (perm != last_perm) {
4061
Size j1 = (*perm++);
4062
(*to_proc++) = r_iproc[j1];
4063
Size size = r_size[j1];
4064
(*to_ptr++) = (*prec_ptr++) + size;
4065
InputRandomIterator4 iter_idx = r_idx + j1*receive_max_size;
4066
InputRandomIterator4 last_idx = iter_idx + size;
4067
while (iter_idx != last_idx)
4068
(*to_idx++) = (*iter_idx++) - istart;
4073
File: rheolef.info, Node: asr class, Up: Classes
4075
5.31 asr - associative sparse matrix
4076
====================================
4078
(Source file: `skit/plib2/asr.h')
4083
Associative sparse matrix container stored row by row using the STL
4089
Implementation use MPI-1.1 and is inspired from Mat_MPI in
4095
For efficiency purpose, the assembly phase may access directly to
4096
the asr representation, without crossing the reference counting and
4097
pointer handler. Something like the iterator for dense vectors.
4103
class basic_asr : public smart_pointer<R> {
4108
typedef typename R::size_type size_type;
4109
typedef typename R::element_type element_type;
4110
typedef typename R::memory_type memory_type;
4111
typedef distributor::communicator_type communicator_type;
4113
// allocators/deallocators:
4115
basic_asr (size_type dis_nrow = 0, size_type dis_ncol = 0);
4116
basic_asr (const distributor& row_ownership, const distributor& col_ownership);
4117
explicit basic_asr (const csr<element_type,memory_type>&);
4121
const communicator_type& comm() const;
4124
size_type nrow () const;
4125
size_type ncol () const;
4126
size_type nnz () const;
4129
size_type dis_nrow () const;
4130
size_type dis_ncol () const;
4131
size_type dis_nnz () const;
4132
const distributor& row_ownership() const;
4133
const distributor& col_ownership() const;
4135
// range on local memory
4136
size_type row_first_index () const;
4137
size_type row_last_index () const;
4138
size_type col_first_index () const;
4139
size_type col_last_index () const;
4141
// global modifiers:
4143
element_type& dis_entry (size_type dis_i, size_type dis_j);
4144
void dis_entry_assembly();
4145
void dis_entry_assembly_begin ();
4146
void dis_entry_assembly_end ();
4147
void resize (size_type dis_nrow = 0, size_type dis_ncol = 0);
4151
void dump (const std::string& name) const;
4153
template <class T, class M = rheo_default_memory_model>
4156
typedef M memory_type;
4159
class asr<T,sequential> : public basic_asr<asr_seq_rep<T> > {
4161
typedef typename basic_asr<asr_seq_rep<T> >::size_type size_type;
4162
typedef sequential memory_type;
4163
asr (size_type dis_nrow = 0, size_type dis_ncol = 0);
4164
asr (const distributor& row_ownership, const distributor& col_ownertship);
4165
explicit asr(const csr<T,memory_type>&);
4167
#ifdef _RHEOLEF_HAVE_MPI
4169
class asr<T,distributed> : public basic_asr<asr_mpi_rep<T> > {
4171
typedef distributed memory_type;
4172
typedef typename basic_asr<asr_mpi_rep<T> >::size_type size_type;
4173
asr (size_type dis_nrow = 0, size_type dis_ncol = 0);
4174
asr (const distributor& row_ownership, const distributor& col_ownertship);
4175
explicit asr(const csr<T,memory_type>&);
4177
#endif // _RHEOLEF_HAVE_MPI
4180
template <class T, class M>
4181
idiststream& operator >> (idiststream& s, asr<T,M>& x);
4183
template <class T, class M>
4184
odiststream& operator << (odiststream& s, const asr<T,M>& x);
4148
4187
File: rheolef.info, Node: csr class, Up: Classes
4150
5.18 `csr' - compressed sparse row matrix format
4151
================================================
4153
(Source file: `skit/lib/csr.h')
4158
The class implements a matrix in compressed sparse row format. A
4159
declaration whithout any parametrers correspond to a matrix with null
4162
Notes that the constructor can be invocated with an initializer:
4165
Input and output, as usual (*note iorheo class::):
4169
Default is the Harwell-Boeing format. Various others formated options
4170
are available: matlab and postscript plots.
4172
Affectation from a scalar
4175
The matrix/vector multiply:
4177
and the transposed matrix/ vector multiply:
4180
The binary operators are:
4181
a*b, a+b, a-b, lambda*a, a*lambda, a/lambda
4182
The unary operators are sign inversion and transposition:
4184
The combination with a diagonal matrix is not yet completely
4185
available. The interface would be something like:
4186
basic_diag<double> d;
4190
left_div(a,d) // aij/dii
4191
When applied to the matrix directly: this feature is not yet
4192
completely available. The interface would be something like:
4196
a.left_mult(d); // a := d*a
4197
a /= d; // aij := aij/djj
4198
a.left_div(d); // aij := aij/dii
4199
The combination with a constant-identity matrix: this feature is not
4200
yet completely available. The interface would be something like:
4202
a + k*EYE, k*EYE + a, a - k*EYE, k*EYE - a,
4206
Get the lower triangular part:
4207
csr<double> l = tril(a);
4208
Conversely, `triu' get the upper triangular part.
4210
For optimizing the profile storage, I could be convenient to use a
4211
renumbering algorithm (see also *note ssk class:: and *note
4212
permutation class::).
4213
permutation p = gibbs(a);
4214
csr<double> b = perm(a, p, q); // B := P*A*trans(Q)
4215
Horizontal matrix concatenation:
4217
Vertical matrix concatenation:
4219
Explicit conversion from an associative `asr e' matrix writes:
4221
from a dense `dns m' matrix writes:
4228
The `csr' class is currently under reconstruction for the distributed
4229
memory support by using a MPI-based implementation.
4232
File: rheolef.info, Node: diag class, Up: Classes
4234
5.19 `basic_diag' - diagonal matrix
4235
===================================
4237
(Source file: `skit/lib/diag.h')
4242
The class implements a diagonal matrix. A declaration whithout any
4243
parametrers correspond to a null size matrix:
4244
basic_diag<Float> d;
4245
The constructor can be invocated whith a size parameter:
4246
basic_diag<Float> d(n);
4247
or an initialiser, either a vector (*note vec class::):
4248
basic_diag<Float> d = basic_diag(v);
4249
or a csr matrix *note csr class:::
4250
basic_diag<Float> d = basic_diag(a);
4251
The conversion from `basic_diag' to `vec' or `csr' is explicit.
4253
When a diagonal matrix is constructed from a `csr' matrix, the
4254
definition of the diagonal of matrix is _always_ a vector of size NROW
4255
which contains the elements in rows 1 to NROW of the matrix that are
4256
contained in the diagonal. If the diagonal element falls outside the
4257
matrix, i.e. NCOL < NROW then it is defined as a zero entry.
4262
Since the `basic_diag' class derives from the `vec', the `basic_diag'
4263
class present also a STL-like interface.
4269
class basic_diag : public vec<T> {
4274
typedef typename vec<T>::element_type element_type;
4275
typedef typename vec<T>::size_type size_type;
4276
typedef typename vec<T>::iterator iterator;
4278
// allocators/deallocators:
4280
explicit basic_diag (size_type sz = 0);
4281
explicit basic_diag (const vec<T>& u);
4282
explicit basic_diag (const csr<T>& a);
4286
basic_diag<T> operator = (const T& lambda);
4290
size_type nrow () const { return vec<T>::size(); }
4291
size_type ncol () const { return vec<T>::size(); }
4293
// basic_diag as a preconditionner: solves D.x=b
4295
vec<T> solve (const vec<T>& b) const;
4296
vec<T> trans_solve (const vec<T>& b) const;
4299
basic_diag<T> dcat (const basic_diag<T>& a1, const basic_diag<T>& a2);
4302
basic_diag<T> operator / (const T& lambda, const basic_diag<T>& d);
4306
operator * (const basic_diag<T>& d, const vec<T>& x);
4309
vec<T> left_div (const vec<T>& x, const basic_diag<T>& d);
4189
5.32 csr - compressed sparse row matrix
4190
=======================================
4192
(Source file: `skit/plib2/csr.h')
4197
Distributed compressed sparse matrix container stored row by row.
4202
For efficiency purpose, the assembly phase may access directly to
4203
the csr representation, without crossing the reference counting and
4204
pointer handler. Something like the iterator for dense vectors.
4210
class csr<T,sequential> : public smart_pointer<csr_seq_rep<T> > {
4215
typedef csr_seq_rep<T> rep;
4216
typedef smart_pointer<rep> base;
4217
typedef typename rep::memory_type memory_type;
4218
typedef typename rep::size_type size_type;
4219
typedef typename rep::element_type element_type;
4220
typedef typename rep::iterator iterator;
4221
typedef typename rep::const_iterator const_iterator;
4222
typedef typename rep::const_data_iterator const_data_iterator;
4224
// allocators/deallocators:
4226
csr() : base(new_macro(rep())) {}
4227
explicit csr(const asr<T,sequential>& a) : base(new_macro(rep(a.data()))) {}
4232
const distributor& row_ownership() const { return base::data().row_ownership(); }
4233
const distributor& col_ownership() const { return base::data().col_ownership(); }
4234
size_type dis_nrow () const { return row_ownership().dis_size(); }
4235
size_type dis_ncol () const { return col_ownership().dis_size(); }
4236
size_type dis_nnz () const { return base::data().nnz(); }
4237
bool is_symmetric() const { return base::data().is_symmetric(); }
4238
void set_symmetry (bool is_symm) { base::data().set_symmetry(is_symm); }
4239
size_type pattern_dimension() const { return base::data().pattern_dimension(); }
4240
void set_pattern_dimension(size_type dim){ base::data().set_pattern_dimension(dim); }
4243
size_type nrow () const { return base::data().nrow(); }
4244
size_type ncol () const { return base::data().ncol(); }
4245
size_type nnz () const { return base::data().nnz(); }
4247
// range on local memory
4248
size_type row_first_index () const { return base::data().row_first_index(); }
4249
size_type row_last_index () const { return base::data().row_last_index(); }
4250
size_type col_first_index () const { return base::data().col_first_index(); }
4251
size_type col_last_index () const { return base::data().col_last_index(); }
4253
const_iterator begin() const { return base::data().begin(); }
4254
const_iterator end() const { return base::data().end(); }
4257
// accessors, only for distributed
4258
size_type ext_nnz() const { return base::data().ext_nnz(); }
4259
const_iterator ext_begin() const { return base::data().ext_begin(); }
4260
const_iterator ext_end() const { return base::data().ext_end(); }
4261
size_type jext2dis_j (size_type jext) const { return base::data().jext2dis_j(); }
4266
csr<T,sequential> operator+ (const csr<T,sequential>& b) const;
4267
csr<T,sequential> operator- (const csr<T,sequential>& b) const;
4269
void mult (const vec<element_type,sequential>& x, vec<element_type,sequential>& y) const
4270
{ base::data().mult (x,y); }
4271
vec<element_type,sequential> operator* (const vec<element_type,sequential>& x) const {
4272
vec<element_type,sequential> y (row_ownership(), element_type());
4279
void dump (const std::string& name) const { base::data().dump(name); }
4286
class csr<T,distributed> : public smart_pointer<csr_mpi_rep<T> > {
4291
typedef csr_mpi_rep<T> rep;
4292
typedef smart_pointer<rep> base;
4293
typedef typename rep::memory_type memory_type;
4294
typedef typename rep::size_type size_type;
4295
typedef typename rep::element_type element_type;
4296
typedef typename rep::iterator iterator;
4297
typedef typename rep::const_iterator const_iterator;
4298
typedef typename rep::const_data_iterator const_data_iterator;
4300
// allocators/deallocators:
4302
csr() : base(new_macro(rep())) {}
4303
explicit csr(const asr<T,memory_type>& a) : base(new_macro(rep(a.data()))) {}
4308
const distributor& row_ownership() const { return base::data().row_ownership(); }
4309
const distributor& col_ownership() const { return base::data().col_ownership(); }
4310
size_type dis_nrow () const { return row_ownership().dis_size(); }
4311
size_type dis_ncol () const { return col_ownership().dis_size(); }
4312
size_type dis_nnz () const { return base::data().dis_nnz(); }
4313
bool is_symmetric() const { return base::data().is_symmetric(); }
4314
void set_symmetry (bool is_symm) { base::data().set_symmetry(is_symm); }
4315
size_type pattern_dimension() const { return base::data().pattern_dimension(); }
4316
void set_pattern_dimension(size_type dim){ base::data().set_pattern_dimension(dim); }
4319
size_type nrow () const { return base::data().nrow(); }
4320
size_type ncol () const { return base::data().ncol(); }
4321
size_type nnz () const { return base::data().nnz(); }
4323
// range on local memory
4324
size_type row_first_index () const { return base::data().row_first_index(); }
4325
size_type row_last_index () const { return base::data().row_last_index(); }
4326
size_type col_first_index () const { return base::data().col_first_index(); }
4327
size_type col_last_index () const { return base::data().col_last_index(); }
4329
const_iterator begin() const { return base::data().begin(); }
4330
const_iterator end() const { return base::data().end(); }
4332
// accessors, only for distributed
4333
size_type ext_nnz() const { return base::data().ext_nnz(); }
4334
const_iterator ext_begin() const { return base::data().ext_begin(); }
4335
const_iterator ext_end() const { return base::data().ext_end(); }
4336
size_type jext2dis_j (size_type jext) const { return base::data().jext2dis_j(jext); }
4340
csr<T,distributed> operator+ (const csr<T,distributed>& b) const;
4341
csr<T,distributed> operator- (const csr<T,distributed>& b) const;
4343
void mult (const vec<element_type,distributed>& x, vec<element_type,distributed>& y) const
4344
{ base::data().mult (x,y); }
4345
vec<element_type,distributed> operator* (const vec<element_type,distributed>& x) const {
4346
vec<element_type,distributed> y (row_ownership(), element_type());
4353
void dump (const std::string& name) const { base::data().dump(name); }
4357
File: rheolef.info, Node: msg_from_context_pattern class, Up: Classes
4359
5.33 msg_from_context - gather
4360
==============================
4362
(Source file: `skit/plib2/msg_from_context_pattern.h')
4367
Computes the receive compressed message pattern for gather and
4373
msg_from_context_pattern
4375
"input": the ownership distribution and the indexes arrays |
4376
msg_size(0:nproc) "output": the receive context (from) and an
4377
auxilliary array proc2from_proc | from_proc(0:send_nproc-1),
4378
from_ptr(0:send_nproc), | proc2from_proc(0:nproc-1) begin |
4379
i := 0 | from_ptr(0) := 0 | for iproc := 0 to nproc-1 do
4380
| if msg_size(iproc) <> 0 then | from_proc(i) := iproc
4381
| from_ptr(i+1) := from_ptr(i) + msg_size(i) |
4382
proc2from_proc(iproc) := i | i := i+1 | endif |
4388
At the end of the algorithm, we have i = send_nproc.
4393
Complexity is O(nproc).
4399
class InputIterator1,
4400
class OutputIterator1,
4401
class OutputIterator2,
4402
class OutputIterator3>
4404
msg_from_context_pattern (
4405
InputIterator1 msg_size, // nproc
4406
InputIterator1 last_msg_size,
4407
OutputIterator1 from_proc, // send_nproc
4408
OutputIterator2 from_ptr, // send_nproc+1
4409
OutputIterator3 proc2from_proc) // nproc
4411
typedef typename std::iterator_traits<InputIterator1>::value_type Size;
4415
(*from_ptr++) = ptr_val;
4416
while (msg_size != last_msg_size) {
4417
Size sz = (*msg_size++);
4419
(*from_proc++) = iproc;
4421
(*from_ptr++) = ptr_val;
4422
(*proc2from_proc) = i;
4431
File: rheolef.info, Node: msg_local_context class, Up: Classes
4433
5.34 msg_local_context - receive pattern
4434
========================================
4436
(Source file: `skit/plib2/msg_local_context.h')
4441
Computes the receive compresed message local pattern, i.e. the only
4442
part that does not requires communication. (see "msg_to_context"(5)).
4449
"input": the index sets and the local processor index range |
4450
idx(0:nidx-1), idy(0:nidx-1), istart, ilast "output": the send and
4451
receive local contexts (to, from) | to_loc_idx(0:n_local-1),
4452
from_loc_idy(0:n_local-1) begin | if n_local <> 0 then |
4453
iloc := 0 | for k := 0 to nidx-1 do | if idy(k) in
4454
(istart, ilast( then | to_loc_idx(iloc) := idy(k) - istart
4455
| from_loc_idy(iloc) := idy(k) | iloc := iloc + 1
4456
| endif | endfor | endif end
4461
Memory and time complexity is O(receive_total_size).
4467
class InputIterator1,
4468
class InputIterator2,
4470
class OutputIterator1,
4471
class OutputIterator2>
4474
InputIterator1 idx, // nidx
4475
InputIterator1 last_idx,
4476
InputIterator2 idy, // nidx
4480
OutputIterator1 to_loc_idx, // n_local
4481
OutputIterator1 last_to_loc_idx,
4482
OutputIterator2 from_loc_idy) // n_local
4484
if (to_loc_idx == last_to_loc_idx) {
4487
while (idx != last_idx) {
4489
if (idx_i >= istart && idx_i < ilast) {
4491
assert_macro (idy_i < idy_maxval, "Scattering past end of TO vector");
4492
(*to_loc_idx++) = idx_i - istart;
4493
(*from_loc_idy++) = idy_i;
4501
File: rheolef.info, Node: mpi_scatter_init class, Up: Classes
4503
5.35 mpi_scatter_init - gather/scatter initialize
4504
=================================================
4506
(Source file: `skit/plib2/mpi_scatter_init.h')
4511
Initialize communication for distributed to sequential scatter
4517
Time and memory complexity is O(nidx+nproc). For finite-element
4518
problems in d dimenion
4520
| nidx ~ N^((d-1)/d)
4522
where N is the number of degrees of freedom.
4524
IMPLEMENTATION Inspirated from petsc-2.0/vpscat.c:
4525
VecScatterCreate_PtoS()
4530
template <class Message, class Size, class SizeRandomIterator1, class SizeRandomIterator2, class Tag>
4535
SizeRandomIterator1 idx,
4537
SizeRandomIterator1 idy,
4539
SizeRandomIterator2 ownership,
4541
const distributor::communicator_type& comm,
4546
typedef Size size_type;
4547
size_type my_proc = comm.rank();
4548
size_type nproc = comm.size();
4550
// -------------------------------------------------------
4551
// 1) first count number of contributors to each processor
4552
// -------------------------------------------------------
4553
std::vector<size_type> msg_size(nproc, 0);
4554
std::vector<size_type> msg_mark(nproc, 0);
4555
std::vector<size_type> owner (nidx);
4556
size_type send_nproc = 0;
4558
size_type iproc = 0;
4559
for (size_type i = 0; i < nidx; i++) {
4560
for (; iproc < nproc; iproc++) {
4561
if (idx[i] >= ownership[iproc] && idx[i] < ownership[iproc+1]) {
4564
if (!msg_mark[iproc]) {
4565
msg_mark[iproc] = 1;
4571
check_macro (iproc != nproc, "bad stash data: idx["<<i<<"]="<<idx[i]<<" out of range [0:"<<ownership[nproc]<<"[");
4574
// -------------------------------------------------------
4575
// 2) avoid to send message to my-proc in counting
4576
// -------------------------------------------------------
4577
size_type n_local = msg_size[my_proc];
4579
msg_size [my_proc] = 0;
4580
msg_mark [my_proc] = 0;
4583
// ----------------------------------------------------------------
4584
// 3) compute number of messages to be send to my_proc
4585
// ----------------------------------------------------------------
4586
std::vector<size_type> work(nproc);
4589
msg_mark.begin().operator->(),
4591
work.begin().operator->(),
4592
std::plus<size_type>());
4593
size_type receive_nproc = work [my_proc];
4594
// ----------------------------------------------------------------
4595
// 4) compute messages max size to be send to my_proc
4596
// ----------------------------------------------------------------
4599
msg_size.begin().operator->(),
4601
work.begin().operator->(),
4602
mpi::maximum<size_type>());
4603
size_type receive_max_size = work [my_proc];
4604
// ----------------------------------------------------------------
4605
// 5) post receive: exchange the buffer adresses between processes
4606
// ----------------------------------------------------------------
4607
std::list<std::pair<size_type,mpi::request> > receive_waits;
4608
std::vector<size_type> receive_data (receive_nproc*receive_max_size);
4609
for (size_type i_receive = 0; i_receive < receive_nproc; i_receive++) {
4610
mpi::request i_req = comm.irecv (
4613
receive_data.begin().operator->() + i_receive*receive_max_size,
4615
receive_waits.push_back (std::make_pair(i_receive, i_req));
4617
// ---------------------------------------------------------------------------
4618
// 6) compute the send indexes
4619
// ---------------------------------------------------------------------------
4620
// comme idx est trie, on peut faire une copie de idx dans send_data
4621
// et du coup owner et send_data_ownership sont inutiles
4622
std::vector<size_type> send_data (nidx);
4623
std::copy (idx, idx+nidx, send_data.begin());
4624
// ---------------------------------------------------------------------------
4626
// ---------------------------------------------------------------------------
4627
std::list<std::pair<size_type,mpi::request> > send_waits;
4629
size_type i_send = 0;
4630
size_type i_start = 0;
4631
for (size_type iproc = 0; iproc < nproc; iproc++) {
4632
size_type i_msg_size = msg_size[iproc];
4633
if (i_msg_size == 0) continue;
4634
mpi::request i_req = comm.isend (
4637
send_data.begin().operator->() + i_start,
4639
send_waits.push_back(std::make_pair(i_send,i_req));
4641
i_start += i_msg_size;
4644
// ---------------------------------------------------------------------------
4645
// 8) wait on receives
4646
// ---------------------------------------------------------------------------
4647
// note: for wait_all, build an iterator adapter that scan the pair.second in [index,request]
4648
// and then get an iterator in the pair using iter.base(): retrive the corresponding index
4649
// for computing the position in the receive.data buffer
4650
typedef boost::transform_iterator<select2nd<size_t,mpi::request>, std::list<std::pair<size_t,mpi::request> >::iterator>
4652
std::vector<size_type> receive_size (receive_nproc);
4653
std::vector<size_type> receive_proc (receive_nproc);
4654
size_type receive_total_size = 0;
4655
while (receive_waits.size() != 0) {
4656
typedef size_type data_type; // exchanged data is of "size_type"
4657
request_iterator iter_r_waits (receive_waits.begin(), select2nd<size_t,mpi::request>()),
4658
last_r_waits (receive_waits.end(), select2nd<size_t,mpi::request>());
4659
// waits on any receive...
4660
std::pair<mpi::status,request_iterator> pair_status = mpi::wait_any (iter_r_waits, last_r_waits);
4662
boost::optional<int> i_msg_size_opt = pair_status.first.count<data_type>();
4663
check_macro (i_msg_size_opt, "receive wait failed");
4664
int iproc = pair_status.first.source();
4665
check_macro (iproc >= 0, "receive: source iproc = "<<iproc<<" < 0 !");
4666
// get size of receive and number in data
4667
size_type i_msg_size = (size_t)i_msg_size_opt.get();
4668
std::list<std::pair<size_t,mpi::request> >::iterator i_pair_ptr = pair_status.second.base();
4669
size_type i_receive = (*i_pair_ptr).first;
4670
receive_proc [i_receive] = iproc;
4671
receive_size [i_receive] = i_msg_size;
4672
receive_total_size += i_msg_size;
4673
receive_waits.erase (i_pair_ptr);
4675
// ---------------------------------------------------------------------------
4676
// 9) allocate the entire send(to) scatter context
4677
// ---------------------------------------------------------------------------
4678
to.resize (receive_total_size, receive_nproc);
4680
// ---------------------------------------------------------------------------
4681
// 10) compute the permutation of values that gives the sorted source[] sequence
4682
// ---------------------------------------------------------------------------
4683
// init: perm[i] = i
4684
std::vector<size_type> perm(receive_nproc);
4685
copy(index_iterator<size_type>(), index_iterator<size_type>(receive_nproc), perm.begin());
4686
sort_with_permutation (
4688
receive_proc.begin().operator->(),
4689
perm.begin().operator->());
4690
// ---------------------------------------------------------------------------
4691
// 11) Computes the receive compresed message pattern for send(to)
4692
// ---------------------------------------------------------------------------
4693
size_type istart = ownership[my_proc]; // = ownership.first_index()
4697
receive_proc.begin(),
4698
receive_size.begin(),
4699
receive_data.begin(),
4703
to.starts().begin(),
4704
to.indices().begin());
4705
// ---------------------------------------------------------------------------
4706
// 12) allocate the entire receive(from) scatter context
4707
// ---------------------------------------------------------------------------
4708
from.resize(nidy, send_nproc);
4709
// ---------------------------------------------------------------------------
4710
// 13) Computes the receive compresed message pattern for receive(from)
4711
// ---------------------------------------------------------------------------
4712
std::vector<size_type> proc2from_proc(nproc);
4713
msg_from_context_pattern (
4716
from.procs().begin(),
4717
from.starts().begin(),
4718
proc2from_proc.begin());
4719
// ---------------------------------------------------------------------------
4720
// 14) Computes the receive compresed message indices for receive(from)
4721
// ---------------------------------------------------------------------------
4722
// assume that indices are sorted by increasing order
4723
std::vector<size_type> start(send_nproc+1);
4724
copy (from.starts().begin(), from.starts().end(), start.begin());
4725
msg_from_context_indices (
4729
proc2from_proc.begin(),
4733
from.indices().begin());
4734
// ---------------------------------------------------------------------------
4735
// 15) wait on sends
4736
// ---------------------------------------------------------------------------
4737
request_iterator iter_s_waits (send_waits.begin(), select2nd<size_type,mpi::request>()),
4738
last_s_waits (send_waits.end(), select2nd<size_type,mpi::request>());
4739
mpi::wait_all (iter_s_waits, last_s_waits);
4740
// ---------------------------------------------------------------------------
4741
// 16) Computes the receive compresed message local pattern,
4742
// i.e. the only part that does not requires communication.
4743
// ---------------------------------------------------------------------------
4744
from.local_slots.resize(n_local);
4745
to.local_slots.resize(n_local);
4746
size_type ilast = ownership[my_proc+1]; // = ownership.last_index()
4754
to.local_slots.begin(),
4755
to.local_slots.end(),
4756
from.local_slots.begin());
4757
// ---------------------------------------------------------------------------
4758
// 17) Optimize local exchanges during gatter/scatter
4759
// ---------------------------------------------------------------------------
4760
bool has_opt = msg_local_optimize (
4761
to.local_slots.begin(),
4762
to.local_slots.end(),
4763
from.local_slots.begin());
4765
if (has_opt && n_local != 0) {
4766
to.local_is_copy = true;
4767
to.local_copy_start = to.local_slots[0];
4768
to.local_copy_length = n_local;
4769
from.local_is_copy = true;
4770
from.local_copy_start = from.local_slots[0];
4771
from.local_copy_length = n_local;
4776
File: rheolef.info, Node: eye class, Up: Classes
4778
5.36 `eye' - identity matrix
4779
============================
4781
(Source file: `skit/plib2/eye.h')
4786
Following matlab, the name EYE is used in place of I to denote identity
4787
matrices because I is often used as a subscript or as sqrt(-1). The
4788
dimensions of EYE are determined by context. For example,
4790
adds 3 to the diagonal elements of A. Small caps 'eye' is the class
4791
name, and upper case 'EYE' is the const variable "eye<double>()", i.e.
4792
the identity matrix.
4794
The preconditioner interface is usefull when calling algorithms without
4795
any preconditioners, e.g.
4796
int status = cg (a, x, b, EYE, 100, 1e-7);
4805
const vec<T>& operator * (const vec<T>& x) const { return x; }
4806
const vec<T>& solve (const vec<T>& x) const { return x; }
4807
const vec<T>& trans_solve (const vec<T>& x) const { x; }
4809
#define EYE eye<Float>()
4812
File: rheolef.info, Node: mpi_scatter_begin class, Up: Classes
4814
5.37 mpi_scatter_begin - gather/scatter initialize
4815
==================================================
4817
(Source file: `skit/plib2/mpi_scatter_begin.h')
4822
Begin communication for distributed to sequential scatter context.
4830
Even though the next routines are written with distributed vectors,
4831
either x or y (but not both) may be sequential vectors, one for each
4834
from indices indicate where arriving stuff is stashed to indices
4835
indicate where departing stuff came from. the naming can be a
4838
Reverse scatter is obtained by swapping (to,from) in calls to
4839
scatter_begin and scatter_end. Reverse scatter is usefull for
4840
transpose matrix-vector multiply.
4842
Scatter general operation is handled by a template "SetOp" type. The
4843
special case "SetOp=set_op" of an affectation is treated separatly,
4844
tacking care of local scatter affectation. Thus, the
4845
"mpi_scatter_begin" routine is splitted into
4846
"msg_scatter_begin_global" and "msg_scatter_begin_local" parts.
4852
class InputIterator,
4857
mpi_scatter_begin_global (
4864
typedef typename Message::size_type size_type;
4865
// ----------------------------------------------------------
4867
// ----------------------------------------------------------
4868
from.requests.clear();
4870
size_type n_receive = from.starts().size() - 1;
4871
size_type i_start = 0;
4872
for (size_type i = 0; i < n_receive; i++) {
4873
size_type i_size = from.starts() [i+1] - from.starts() [i];
4874
mpi::request i_req = comm.irecv(
4877
from.values().begin().operator->() + i_start,
4880
from.requests.push_back (std::make_pair(i, i_req));
4883
// ----------------------------------------------------------
4884
// 2) apply right permutation
4885
// ----------------------------------------------------------
4888
// ----------------------------------------------------------
4890
// ----------------------------------------------------------
4891
to.requests.clear();
4893
size_type n_send = to.starts().size() - 1;
4894
size_type i_start = 0;
4895
for (size_type i = 0; i < n_send; i++) {
4896
size_type i_size = to.starts() [i+1] - to.starts() [i];
4897
mpi::request i_req = comm.isend(
4900
to.values().begin().operator->() + i_start,
4903
to.requests.push_back (std::make_pair(i, i_req));
4908
class InputIterator,
4909
class OutputIterator,
4913
mpi_scatter_begin_local (
4921
msg_both_permutation_apply (
4922
to.local_slots.begin(),
4923
to.local_slots.end(),
4926
from.local_slots.begin(),
4930
// take care of local insert: template specialisation
4932
class InputIterator,
4933
class OutputIterator,
4936
mpi_scatter_begin_local (
4941
set_op<typename Message::value_type, typename Message::value_type> op)
4944
// used when x & y have distinct pointer types (multi-valued)
4945
if (y == x && ! to.local_nonmatching_computed) {
4946
// scatter_local_optimize(to,from);
4947
fatal_macro ("y == x: adress matches in scatter: not yet -- sorry");
4949
if (to.local_is_copy) {
4951
std::copy(x + to.local_copy_start,
4952
x + to.local_copy_start + to.local_copy_length,
4953
y + from.local_copy_start);
4955
} else if (y != x || ! to.local_nonmatching_computed) {
4957
msg_both_permutation_apply (
4958
to.local_slots.begin(),
4959
to.local_slots.end(),
4962
from.local_slots.begin(),
4965
} else { // !to.local_is_copy && y == x && to.local_nonmatching_computed
4967
msg_both_permutation_apply (
4968
to.local_slots_nonmatching.begin(),
4969
to.local_slots_nonmatching.end(),
4972
from.local_slots_nonmatching.begin(),
4978
class InputIterator,
4979
class OutputIterator,
4995
mpi_scatter_begin_global (x, from, to, tag, comm);
4996
if (to.n_local() == 0) {
4999
error_macro ("local messages: no more supported");
5000
mpi_scatter_begin_local (x, y, from, to, op);
5004
File: rheolef.info, Node: mpi_scatter_end class, Up: Classes
5006
5.38 mpi_scatter_end - gather/scatter finalize
5007
==============================================
5009
(Source file: `skit/plib2/mpi_scatter_end.h')
5014
Finishes communication for distributed to sequential scatter context.
5020
class InputIterator,
5021
class OutputIterator,
5036
typedef typename Message::base_value_type data_type; // the data type to be received by mpi
5037
typedef boost::transform_iterator<select2nd<size_t,mpi::request>, std::list<std::pair<size_t,mpi::request> >::iterator>
5040
// -----------------------------------------------------------
5041
// 1) wait on receives and unpack receives into local space
5042
// -----------------------------------------------------------
5043
while (from.requests.size() != 0) {
5044
request_iterator iter_r_waits (from.requests.begin(), select2nd<size_t,mpi::request>()),
5045
last_r_waits (from.requests.end(), select2nd<size_t,mpi::request>());
5046
// waits on any receive...
5047
std::pair<mpi::status,request_iterator> pair_status = mpi::wait_any (iter_r_waits, last_r_waits);
5049
boost::optional<int> i_msg_size_opt = pair_status.first.count<data_type>();
5050
check_macro (i_msg_size_opt, "receive wait failed");
5051
int iproc = pair_status.first.source();
5052
check_macro (iproc >= 0, "receive: source iproc = "<<iproc<<" < 0 !");
5053
// get size of receive and number in data
5054
size_t i_msg_size = (size_t)i_msg_size_opt.get();
5055
std::list<std::pair<size_t,mpi::request> >::iterator i_pair_ptr = pair_status.second.base();
5056
size_t i_receive = (*i_pair_ptr).first;
5057
check_macro (i_msg_size == from.starts()[i_receive+1] - from.starts()[i_receive], "unexpected size");
5059
// unpack receives into our local space
5060
from.store_values (y, i_receive, op);
5061
from.requests.erase (i_pair_ptr);
5063
// -----------------------------------------------------------
5065
// -----------------------------------------------------------
5066
request_iterator iter_s_waits (to.requests.begin(), select2nd<size_t,mpi::request>()),
5067
last_s_waits (to.requests.end(), select2nd<size_t,mpi::request>());
5068
mpi::wait_all (iter_s_waits, last_s_waits);
4312
5072
File: rheolef.info, Node: iorheo class, Up: Classes
4314
5.20 `iorheo' - input and output functions and manipulation
5074
5.39 `iorheo' - input and output functions and manipulation
4315
5075
===========================================================
4317
5077
(Source file: `util/lib/iorheo.h')
4806
* newton algorithm::
4807
* damped-newton algorithm::
4808
* riesz_representer algorithm::
4809
* mixed_solver algorithm::
4810
* pminres algorithm::
4813
* bicgstab algorithm::
4814
* puzawa algorithm::
4818
File: rheolef.info, Node: newton algorithm, Up: Algorithms
4820
6.1 `newton' - Newton nonlinear algorithm
4821
=========================================
4823
(Source file: `nfem/lib/newton.h')
4828
Nonlinear Newton algorithm for the resolution of the following problem:
4830
A simple call to the algorithm writes:
4833
newton (P, uh, tol, max_iter);
4834
The `my_problem' class may contains methods for the evaluation of F
4835
(aka residue) and its derivative:
4839
field residue (const field& uh) const;
4840
void update_derivative (const field& uh) const;
4841
field derivative_solve (const field& mrh) const;
4842
Float norm (const field& uh) const;
4843
Float dual_norm (const field& Muh) const;
4845
See the example `p-laplacian.h' in the user's documentation for
4851
template <class Problem, class Field>
4852
int newton (Problem P, Field& uh, Float& tol, size_t& max_iter, std::ostream *p_cerr = 0) {
4853
if (p_cerr) *p_cerr << "# Newton: n r" << std::endl;
4854
for (size_t n = 0; true; n++) {
4855
Field rh = P.residue(uh);
4856
Float r = P.dual_norm(rh);
4857
if (p_cerr) *p_cerr << n << " " << r << std::endl;
4858
if (r <= tol) { tol = r; max_iter = n; return 0; }
4859
if (n == max_iter) { tol = r; return 1; }
4860
P.update_derivative (uh);
4861
Field delta_uh = P.derivative_solve (-rh);
4867
File: rheolef.info, Node: damped-newton algorithm, Up: Algorithms
4869
6.2 `damped_newton' - damped Newton nonlinear algorithm
4870
=======================================================
4872
(Source file: `nfem/lib/damped-newton.h')
4877
Nonlinear damped Newton algorithm for the resolution of the following
4880
A simple call to the algorithm writes:
4883
damped_newton (P, uh, tol, max_iter);
4884
The `my_problem' class may contains methods for the evaluation of F
4885
(aka residue) and its derivative:
4889
field residue (const field& uh) const;
4890
void update_derivative (const field& uh) const;
4891
field derivative_trans_mult (const field& mrh) const;
4892
field derivative_solve (const field& mrh) const;
4893
Float norm (const field& uh) const;
4894
Float dual_norm (const field& Muh) const;
4896
See the example `p-laplacian.h' in the user's documentation for
4902
template <class Problem, class Field, class Real, class Size>
4903
int damped_newton (Problem P, Field& u, Real& tol, Size& max_iter, std::ostream* p_cerr=0) {
4904
return damped_newton(P, newton_identity_preconditioner(), u, tol, max_iter, p_cerr);
4908
File: rheolef.info, Node: riesz_representer algorithm, Up: Algorithms
4910
6.3 `riesz_representer' - integrate a function by using quadrature formulae
4911
===========================================================================
4913
(Source file: `nfem/lib/riesz_representer.h')
4918
The function `riesz_representer' implements the approximation of an
4919
integral by using quadrature formulae. This is an expreimental
4920
implementation: please do not use yet for practical usage.
4925
template <class Function> field riesz_representer (const space& Vh,
4928
template <class Function> field riesz_representer (const space& Vh,
4929
const Function& f, quadrature_option_type qopt);
4934
The following code compute the Riesz representant, denoted by `mfh' of
4935
f(x), and the integral of f over the domain omega:
4936
Float f(const point& x);
4938
space Vh (omega_h, "P1");
4939
field mfh = riesz_representer(Vh, f);
4940
Float int_f = dot(mfh, field(Vh,1.0));
4941
The Riesz representer is the `mfh' vector of values:
4942
mfh(i) = integrate f(x) phi_i(x) dx
4943
where phi_i is the i-th basis function in Vh and the integral is
4944
evaluated by using a quadrature formulae. By default the quadrature
4945
formule is the Gauss one with the order equal to the polynomial order
4946
of Vh. Alternative quadrature formulae and order is available by
4947
passing an optional variable to riesz_representer.
4952
template <class Function>
4957
quadrature_option_type qopt
4958
= quadrature_option_type(quadrature_option_type::max_family,0))
4961
File: rheolef.info, Node: mixed_solver algorithm, Up: Algorithms
4963
6.4 `pcg_abtb', `pcg_abtbc', `pminres_abtb', `pminres_abtbc' - solvers for mixed linear problems
4964
================================================================================================
4966
(Source file: `skit/lib/mixed_solver.h')
4971
template <class Matrix, class Vector, class Solver, class Preconditioner, class Size, class Real>
4972
int pcg_abtb (const Matrix& A, const Matrix& B, Vector& u, Vector& p,
4973
const Vector& Mf, const Vector& Mg, const Preconditioner& S1,
4974
const Solver& inner_solver_A, Size& max_iter, Real& tol,
4975
std::ostream *p_cerr = 0, std::string label = "pcg_abtb");
4977
template <class Matrix, class Vector, class Solver, class Preconditioner, class Size, class Real>
4978
int pcg_abtbc (const Matrix& A, const Matrix& B, const Matrix& C, Vector& u, Vector& p,
4979
const Vector& Mf, const Vector& Mg, const Preconditioner& S1,
4980
const Solver& inner_solver_A, Size& max_iter, Real& tol,
4981
std::ostream *p_cerr = 0, std::string label = "pcg_abtbc");
4982
The synopsis is the same with the pminres algorithm.
4987
See the user's manual for practical examples for the nearly
4988
incompressible elasticity, the Stokes and the Navier-Stokes problems.
4993
Preconditioned conjugate gradient algorithm on the pressure p applied to
4994
the stabilized stokes problem:
4995
[ A B^T ] [ u ] [ Mf ]
4997
[ B -C ] [ p ] [ Mg ]
4998
where A is symmetric positive definite and C is symmetric positive
4999
and semi-definite. Such mixed linear problems appears for instance
5000
with the discretization of Stokes problems with stabilized P1-P1
5001
element, or with nearly incompressible elasticity. Formaly u =
5002
inv(A)*(Mf - B^T*p) and the reduced system writes for all
5003
non-singular matrix S1:
5004
inv(S1)*(B*inv(A)*B^T)*p = inv(S1)*(B*inv(A)*Mf - Mg)
5005
Uzawa or conjugate gradient algorithms are considered on the
5006
reduced problem. Here, S1 is some preconditioner for the Schur
5007
complement S=B*inv(A)*B^T. Both direct or iterative solvers for S1*q
5008
= t are supported. Application of inv(A) is performed via a call to
5009
a solver for systems such as A*v = b. This last system may be
5010
solved either by direct or iterative algorithms, thus, a general
5011
matrix solver class is submitted to the algorithm. For most
5012
applications, such as the Stokes problem, the mass matrix for the p
5013
variable is a good S1 preconditioner for the Schur complement. The
5014
stoping criteria is expressed using the S1 matrix, i.e. in L2 norm
5015
when this choice is considered. It is scaled by the L2 norm of the
5016
right-hand side of the reduced system, also in S1 norm.
5019
File: rheolef.info, Node: pminres algorithm, Up: Algorithms
5021
6.5 `pminres' - conjugate gradient algorithm.
5573
* msg_local_context algorithm::
5574
* msg_left_permutation_apply algorithm::
5575
* msg_local_optimize algorithm::
5576
* mpi_polymorphic_assembly_end algorithm::
5577
* mpi_polymorphic_assembly_begin algorithm::
5578
* msg_right_permutation_apply algorithm::
5579
* mpi_assembly_end algorithm::
5580
* mpi_scatter_begin algorithm::
5581
* msg_from_context_pattern algorithm::
5582
* mpi_scatter_init algorithm::
5583
* msg_both_permutation_apply algorithm::
5584
* mpi_assembly_begin algorithm::
5585
* msg_to_context algorithm::
5586
* msg_from_context_indices algorithm::
5587
* mpi_scatter_end algorithm::
5590
File: rheolef.info, Node: msg_local_context algorithm, Up: Algorithms
5592
6.1 msg_local_context - receive pattern
5593
=======================================
5595
(Source file: `skit/plib2/msg_local_context.h')
5600
Computes the receive compresed message local pattern, i.e. the only
5601
part that does not requires communication. (see "msg_to_context"(5)).
5608
"input": the index sets and the local processor index range |
5609
idx(0:nidx-1), idy(0:nidx-1), istart, ilast "output": the send and
5610
receive local contexts (to, from) | to_loc_idx(0:n_local-1),
5611
from_loc_idy(0:n_local-1) begin | if n_local <> 0 then |
5612
iloc := 0 | for k := 0 to nidx-1 do | if idy(k) in
5613
(istart, ilast( then | to_loc_idx(iloc) := idy(k) - istart
5614
| from_loc_idy(iloc) := idy(k) | iloc := iloc + 1
5615
| endif | endfor | endif end
5620
Memory and time complexity is O(receive_total_size).
5626
class InputIterator1,
5627
class InputIterator2,
5629
class OutputIterator1,
5630
class OutputIterator2>
5633
InputIterator1 idx, // nidx
5634
InputIterator1 last_idx,
5635
InputIterator2 idy, // nidx
5639
OutputIterator1 to_loc_idx, // n_local
5640
OutputIterator1 last_to_loc_idx,
5641
OutputIterator2 from_loc_idy) // n_local
5643
if (to_loc_idx == last_to_loc_idx) {
5646
while (idx != last_idx) {
5648
if (idx_i >= istart && idx_i < ilast) {
5650
assert_macro (idy_i < idy_maxval, "Scattering past end of TO vector");
5651
(*to_loc_idx++) = idx_i - istart;
5652
(*from_loc_idy++) = idy_i;
5660
File: rheolef.info, Node: msg_left_permutation_apply algorithm, Up: Algorithms
5662
6.2 msg_left_permutation_apply - sequentail apply
5663
=================================================
5665
(Source file: `skit/plib2/msg_left_permutation_apply.h')
5670
Applies a left permutation to an array.
5675
msg_left_permutation_apply
5677
"input": the length array | x(0:nx-1), py(0:n-1) "output": the
5678
pointer array and the total size | y(0:n) begin | for i := 0
5679
to n-1 do | y(py(i)) := x(i) | endfor end
5684
Time and memory complexity is O(n).
5690
class InputIterator1,
5691
class InputIterator2,
5693
class OutputRandomIterator>
5695
msg_left_permutation_apply (
5699
InputIterator2 last_py,
5700
OutputRandomIterator y)
5702
while (py != last_py)
5707
File: rheolef.info, Node: msg_local_optimize algorithm, Up: Algorithms
5709
6.3 msg_local_optimize - local scatter optimize
5710
===============================================
5712
(Source file: `skit/plib2/msg_local_optimize.h')
5717
Optimize local exchanges during gatter/scatter.
5724
"input": the send and receive local contexts (to, from) |
5725
to_loc_idx(0:n_local-1), from_loc_idy(0:n_local-1) "output": the
5726
boolean, true when optimization is possible | has_opt begin |
5727
if n_local = 0 then | has_opt := false | else |
5728
to_start := to_loc_idx(0) | from_start := from_loc_idy(0) |
5729
has_opt := true | i := 1 | while i < n_local and
5730
has_opt do | to_start := to_start + 1 |
5731
from_start := from_start + 1 | if to_loc_idx(i) <> to_start
5732
| or from_loc_idy(i) <> from_start then | has_opt
5733
:= false | endif | i := i + 1 | endwhile
5739
Memory and time complexity is O(receive_total_size).
5745
class InputIterator1,
5746
class InputIterator2>
5748
msg_local_optimize (
5749
InputIterator1 to_loc_idx, // n_local
5750
InputIterator1 last_to_loc_idx,
5751
InputIterator2 from_loc_idy) // n_local
5753
typedef typename std::iterator_traits<InputIterator1>::value_type Size;
5754
if (to_loc_idx == last_to_loc_idx) {
5757
Size to_start = *to_loc_idx++;
5758
Size from_start = *from_loc_idy++;
5759
bool has_opt = true;
5760
while (to_loc_idx != last_to_loc_idx && has_opt) {
5763
if ((*to_loc_idx++) != to_start ||
5764
(*from_loc_idy++) != from_start) {
5772
File: rheolef.info, Node: mpi_polymorphic_assembly_end algorithm, Up: Algorithms
5774
6.4 mpi_polymorphic_assembly_end - polymorphic array assembly
5775
=============================================================
5777
(Source file: `skit/plib2/mpi_polymorphic_assembly_end.h')
5782
Finish a dense polymorphic array assembly.
5792
template <class Container, class Message>
5793
struct mpi_polymorphic_assembly_end_t<Container,Message,N> {
5796
const distributor& ownership,
5797
Message& receive, // buffer
5798
Message& send, // buffer
5799
boost::array<typename Container::size_type,N>& receive_max_size,
5801
Container& x) const;
5805
template <class Container, class Message>
5807
mpi_polymorphic_assembly_end_t<Container,Message,N>::operator() (
5809
const distributor& ownership,
5810
Message& receive, // buffer
5811
Message& send, // buffer
5812
boost::array<typename Container::size_type,N>& receive_max_size,
5816
typedef typename Container::size_type size_type;
5817
const size_type _n_variant = Container::_n_variant; // should be = N
5818
// -----------------------------------------------------------------
5819
// 1) receive data and store it in container
5820
// -----------------------------------------------------------------
5821
// note: for wait_any, build an iterator adapter that scan the pair.second in [index,request]
5822
// and then get an iterator in the pair using iter.base(): retrive the corresponding index
5823
// for computing the position in the receive.data buffer
5824
typedef boost::transform_iterator<
5825
select2nd<size_type,mpi::request>,
5826
typename std::list<std::pair<size_type,mpi::request> >::iterator>
5829
#define _RHEOLEF_receive_data(z,k,unused) \
5830
while (receive.waits[k].size() != 0) { \
5831
typedef typename Container::T##k T##k; \
5832
typedef typename std::pair<size_type,T##k> data##k##_type; \
5833
request_iterator iter_r_waits (receive.waits[k].begin(), select2nd<size_type,mpi::request>()), \
5834
last_r_waits (receive.waits[k].end(), select2nd<size_type,mpi::request>()); \
5835
/* waits on any receive... */ \
5836
std::pair<mpi::status,request_iterator> pair_status = mpi::wait_any (iter_r_waits, last_r_waits); \
5837
/* check status */ \
5838
boost::optional<int> i_msg_size_opt = pair_status.first.template count<data##k##_type>(); \
5839
check_macro (i_msg_size_opt, "receive wait failed"); \
5840
int iproc = pair_status.first.source(); \
5841
check_macro (iproc >= 0, "receive: source iproc = "<<iproc<<" < 0 !"); \
5842
/* get size of receive and number in data */ \
5843
size_type i_msg_size = (size_type)i_msg_size_opt.get(); \
5844
typename std::list<std::pair<size_type,mpi::request> >::iterator i_pair_ptr \
5845
= pair_status.second.base(); \
5846
size_type i_receive = (*i_pair_ptr).first; \
5847
size_type i_start = i_receive*receive_max_size[k]; \
5848
for (size_type j = i_start; j < i_start + i_msg_size; j++) { \
5849
size_type first_index = ownership.first_index(); \
5850
size_type index = receive.data._stack_##k[j].first; \
5851
T##k value = receive.data._stack_##k[j].second; \
5852
x.assign (index - first_index, value); \
5854
receive.waits[k].erase (i_pair_ptr); \
5856
BOOST_PP_REPEAT(N, _RHEOLEF_receive_data, ~)
5857
#undef _RHEOLEF_receive_data
5860
size_type start = ownership().first_index(); \
5861
size_type last = ownership().last_index(); \
5862
if (i_global >= start && i_global < last) \
5863
polymorphic_array_seq_rep<T,V,N>::assign (i_global - start, val); \
5866
// -----------------------------------------------------------------
5868
// -----------------------------------------------------------------
5869
for (size_type k = 0; k < _n_variant; k++) {
5870
request_iterator iter_s_waits (send.waits[k].begin(), select2nd<size_type,mpi::request>()),
5871
last_s_waits (send.waits[k].end(), select2nd<size_type,mpi::request>());
5872
size_type send_nproc = send.waits[k].size();
5873
std::vector<mpi::status> send_status (send_nproc);
5874
mpi::wait_all (iter_s_waits, last_s_waits, send_status.begin());
5876
// -----------------------------------------------------------------
5877
// 3) clear send & receive messages [waits,data]
5878
// -----------------------------------------------------------------
5882
receive.waits.clear();
5883
receive.data.clear();
5888
File: rheolef.info, Node: mpi_polymorphic_assembly_begin algorithm, Up: Algorithms
5890
6.5 mpi_polymorph_assembly_begin - for polymorph_array
5891
======================================================
5893
(Source file: `skit/plib2/mpi_polymorphic_assembly_begin.h')
5898
Start a dense polymorph array matrix assembly.
5903
Assume that stash has indexes in increasing order. cpu complexity :
5904
O(stash.size + nproc) memory complexity : O(stash.size + nproc) msg
5905
size complexity : O(stash.size + nproc)
5907
When assembling a finite element matrix, the stash.size is at boundaries
5908
of the mesh partition, and stash.size = O(nrow^alpha), where
5909
alpha=(d-1)/d and d=2,3. Using nproc <= O(nrow^alpha) is performant.
5914
The stash may be sorted by increasing nows and column.
5916
Inspirated from Petsc (petsc/src/vec/vec/impls/mpi/pdvec.c), here with
5917
a pre-sorted stash, thanks to the stl::map data structure.
5920
File: rheolef.info, Node: msg_right_permutation_apply algorithm, Up: Algorithms
5922
6.6 msg_right_permutation_apply - sequentail apply
5923
==================================================
5925
(Source file: `skit/plib2/msg_right_permutation_apply.h')
5930
Applies a permutation to an array.
5935
msg_right_permutation_apply
5937
"input": the length array | perm(0:n-1), x(0:nx-1) "output": the
5938
pointer array and the total size | y(0:n) begin | for i := 0
5939
to n-1 do | y(i) := x(perm(i)) | endfor end
5944
Time and memory complexity is O(n).
5950
class InputIterator,
5951
class InputRandomIterator,
5952
class OutputIterator,
5955
msg_right_permutation_apply (
5957
InputIterator last_perm,
5958
const InputRandomIterator& x,
5962
for (; perm != last_perm; y++, perm++) {
5963
// something like: (*y++) = x[(*perm++)];
5964
set_op (y, x, *perm);
5970
File: rheolef.info, Node: mpi_assembly_end algorithm, Up: Algorithms
5972
6.7 msgé_assembly_end - array or matrix assembly
5973
=================================================
5975
(Source file: `skit/plib2/mpi_assembly_end.h')
5980
Finish a dense array or sparse matrix assembly.
5999
Size receive_max_size,
6003
typedef Size size_type;
6004
typedef typename Container::data_type data_type;
6005
// -----------------------------------------------------------------
6006
// 1) receive data and store it in container
6007
// -----------------------------------------------------------------
6009
// note: for wait_any, build an iterator adapter that scan the pair.second in [index,request]
6010
// and then get an iterator in the pair using iter.base(): retrive the corresponding index
6011
// for computing the position in the receive.data buffer
6012
typedef boost::transform_iterator<select2nd<size_type,mpi::request>,
6013
typename std::list<std::pair<size_type,mpi::request> >::iterator>
6016
#define _RHEOLEF_BUG_FOR_NON_MPI_DATA_TYPE
6017
#ifdef _RHEOLEF_BUG_FOR_NON_MPI_DATA_TYPE
6018
while (receive.waits.size() != 0) {
6019
request_iterator iter_r_waits (receive.waits.begin(), select2nd<size_type,mpi::request>()),
6020
last_r_waits (receive.waits.end(), select2nd<size_type,mpi::request>());
6021
// waits on any receive...
6022
std::pair<mpi::status,request_iterator> pair_status = mpi::wait_any (iter_r_waits, last_r_waits);
6024
boost::optional<int> i_msg_size_opt = pair_status.first.template count<data_type>();
6025
check_macro (i_msg_size_opt, "receive wait failed");
6026
int iproc = pair_status.first.source();
6027
check_macro (iproc >= 0, "receive: source iproc = "<<iproc<<" < 0 !");
6028
// get size of receive and number in data
6029
size_type i_msg_size = (size_type)i_msg_size_opt.get();
6030
typename std::list<std::pair<size_type,mpi::request> >::iterator i_pair_ptr = pair_status.second.base();
6031
size_type i_receive = (*i_pair_ptr).first;
6032
size_type i_start = i_receive*receive_max_size;
6033
for (size_type j = i_start; j < i_start + i_msg_size; j++) {
6034
x (receive.data[j]);
6036
receive.waits.erase (i_pair_ptr);
6038
#else // _RHEOLEF_BUG_FOR_NON_MPI_DATA_TYPE
6039
// wait_all works better when using an array of non mpi_data_type, as an array of set<size_t>
6040
request_iterator iter_r_waits (receive.waits.begin(), select2nd<size_type,mpi::request>()),
6041
last_r_waits (receive.waits.end(), select2nd<size_type,mpi::request>());
6042
std::vector<mpi::status> recv_status (receive.waits.size());
6043
mpi::wait_all (iter_r_waits, last_r_waits, recv_status.begin());
6044
for (size_type i_recv = 0, n_recv = recv_status.size(); i_recv < n_recv; i_recv++) {
6045
boost::optional<int> i_msg_size_opt = recv_status[i_recv].template count<data_type>();
6046
check_macro (i_msg_size_opt, "receive wait failed");
6047
int iproc = recv_status[i_recv].source();
6048
check_macro (iproc >= 0, "receive: source iproc = "<<iproc<<" < 0 !");
6049
// get size of receive and number in data
6050
size_type i_msg_size = (size_type)i_msg_size_opt.get();
6051
size_type i_start = i_recv*receive_max_size;
6052
for (size_type j = i_start; j < i_start + i_msg_size; j++) {
6053
x (receive.data[j]);
6056
#endif // _RHEOLEF_BUG_FOR_NON_MPI_DATA_TYPE
6057
// -----------------------------------------------------------------
6059
// -----------------------------------------------------------------
6060
request_iterator iter_s_waits (send.waits.begin(), select2nd<size_type,mpi::request>()),
6061
last_s_waits (send.waits.end(), select2nd<size_type,mpi::request>());
6062
Size send_nproc = send.waits.size();
6063
std::vector<mpi::status> send_status(send_nproc);
6064
mpi::wait_all (iter_s_waits, last_s_waits, send_status.begin());
6065
// -----------------------------------------------------------------
6066
// 3) clear send & receive messages [waits,data]
6067
// -----------------------------------------------------------------
6070
receive.waits.clear();
6071
receive.data.clear();
6072
return x.n_new_entry();
6076
File: rheolef.info, Node: mpi_scatter_begin algorithm, Up: Algorithms
6078
6.8 mpi_scatter_begin - gather/scatter initialize
6079
=================================================
6081
(Source file: `skit/plib2/mpi_scatter_begin.h')
6086
Begin communication for distributed to sequential scatter context.
6094
Even though the next routines are written with distributed vectors,
6095
either x or y (but not both) may be sequential vectors, one for each
6098
from indices indicate where arriving stuff is stashed to indices
6099
indicate where departing stuff came from. the naming can be a
6102
Reverse scatter is obtained by swapping (to,from) in calls to
6103
scatter_begin and scatter_end. Reverse scatter is usefull for
6104
transpose matrix-vector multiply.
6106
Scatter general operation is handled by a template "SetOp" type. The
6107
special case "SetOp=set_op" of an affectation is treated separatly,
6108
tacking care of local scatter affectation. Thus, the
6109
"mpi_scatter_begin" routine is splitted into
6110
"msg_scatter_begin_global" and "msg_scatter_begin_local" parts.
6116
class InputIterator,
6121
mpi_scatter_begin_global (
6128
typedef typename Message::size_type size_type;
6129
// ----------------------------------------------------------
6131
// ----------------------------------------------------------
6132
from.requests.clear();
6134
size_type n_receive = from.starts().size() - 1;
6135
size_type i_start = 0;
6136
for (size_type i = 0; i < n_receive; i++) {
6137
size_type i_size = from.starts() [i+1] - from.starts() [i];
6138
mpi::request i_req = comm.irecv(
6141
from.values().begin().operator->() + i_start,
6144
from.requests.push_back (std::make_pair(i, i_req));
6147
// ----------------------------------------------------------
6148
// 2) apply right permutation
6149
// ----------------------------------------------------------
6152
// ----------------------------------------------------------
6154
// ----------------------------------------------------------
6155
to.requests.clear();
6157
size_type n_send = to.starts().size() - 1;
6158
size_type i_start = 0;
6159
for (size_type i = 0; i < n_send; i++) {
6160
size_type i_size = to.starts() [i+1] - to.starts() [i];
6161
mpi::request i_req = comm.isend(
6164
to.values().begin().operator->() + i_start,
6167
to.requests.push_back (std::make_pair(i, i_req));
6172
class InputIterator,
6173
class OutputIterator,
6177
mpi_scatter_begin_local (
6185
msg_both_permutation_apply (
6186
to.local_slots.begin(),
6187
to.local_slots.end(),
6190
from.local_slots.begin(),
6194
// take care of local insert: template specialisation
6196
class InputIterator,
6197
class OutputIterator,
6200
mpi_scatter_begin_local (
6205
set_op<typename Message::value_type, typename Message::value_type> op)
6208
// used when x & y have distinct pointer types (multi-valued)
6209
if (y == x && ! to.local_nonmatching_computed) {
6210
// scatter_local_optimize(to,from);
6211
fatal_macro ("y == x: adress matches in scatter: not yet -- sorry");
6213
if (to.local_is_copy) {
6215
std::copy(x + to.local_copy_start,
6216
x + to.local_copy_start + to.local_copy_length,
6217
y + from.local_copy_start);
6219
} else if (y != x || ! to.local_nonmatching_computed) {
6221
msg_both_permutation_apply (
6222
to.local_slots.begin(),
6223
to.local_slots.end(),
6226
from.local_slots.begin(),
6229
} else { // !to.local_is_copy && y == x && to.local_nonmatching_computed
6231
msg_both_permutation_apply (
6232
to.local_slots_nonmatching.begin(),
6233
to.local_slots_nonmatching.end(),
6236
from.local_slots_nonmatching.begin(),
6242
class InputIterator,
6243
class OutputIterator,
6259
mpi_scatter_begin_global (x, from, to, tag, comm);
6260
if (to.n_local() == 0) {
6263
error_macro ("local messages: no more supported");
6264
mpi_scatter_begin_local (x, y, from, to, op);
6268
File: rheolef.info, Node: msg_from_context_pattern algorithm, Up: Algorithms
6270
6.9 msg_from_context - gather
6271
=============================
6273
(Source file: `skit/plib2/msg_from_context_pattern.h')
6278
Computes the receive compressed message pattern for gather and
6284
msg_from_context_pattern
6286
"input": the ownership distribution and the indexes arrays |
6287
msg_size(0:nproc) "output": the receive context (from) and an
6288
auxilliary array proc2from_proc | from_proc(0:send_nproc-1),
6289
from_ptr(0:send_nproc), | proc2from_proc(0:nproc-1) begin |
6290
i := 0 | from_ptr(0) := 0 | for iproc := 0 to nproc-1 do
6291
| if msg_size(iproc) <> 0 then | from_proc(i) := iproc
6292
| from_ptr(i+1) := from_ptr(i) + msg_size(i) |
6293
proc2from_proc(iproc) := i | i := i+1 | endif |
6299
At the end of the algorithm, we have i = send_nproc.
6304
Complexity is O(nproc).
6310
class InputIterator1,
6311
class OutputIterator1,
6312
class OutputIterator2,
6313
class OutputIterator3>
6315
msg_from_context_pattern (
6316
InputIterator1 msg_size, // nproc
6317
InputIterator1 last_msg_size,
6318
OutputIterator1 from_proc, // send_nproc
6319
OutputIterator2 from_ptr, // send_nproc+1
6320
OutputIterator3 proc2from_proc) // nproc
6322
typedef typename std::iterator_traits<InputIterator1>::value_type Size;
6326
(*from_ptr++) = ptr_val;
6327
while (msg_size != last_msg_size) {
6328
Size sz = (*msg_size++);
6330
(*from_proc++) = iproc;
6332
(*from_ptr++) = ptr_val;
6333
(*proc2from_proc) = i;
6342
File: rheolef.info, Node: mpi_scatter_init algorithm, Up: Algorithms
6344
6.10 mpi_scatter_init - gather/scatter initialize
6345
=================================================
6347
(Source file: `skit/plib2/mpi_scatter_init.h')
6352
Initialize communication for distributed to sequential scatter
6358
Time and memory complexity is O(nidx+nproc). For finite-element
6359
problems in d dimenion
6361
| nidx ~ N^((d-1)/d)
6363
where N is the number of degrees of freedom.
6365
IMPLEMENTATION Inspirated from petsc-2.0/vpscat.c:
6366
VecScatterCreate_PtoS()
6371
template <class Message, class Size, class SizeRandomIterator1, class SizeRandomIterator2, class Tag>
6376
SizeRandomIterator1 idx,
6378
SizeRandomIterator1 idy,
6380
SizeRandomIterator2 ownership,
6382
const distributor::communicator_type& comm,
6387
typedef Size size_type;
6388
size_type my_proc = comm.rank();
6389
size_type nproc = comm.size();
6391
// -------------------------------------------------------
6392
// 1) first count number of contributors to each processor
6393
// -------------------------------------------------------
6394
std::vector<size_type> msg_size(nproc, 0);
6395
std::vector<size_type> msg_mark(nproc, 0);
6396
std::vector<size_type> owner (nidx);
6397
size_type send_nproc = 0;
6399
size_type iproc = 0;
6400
for (size_type i = 0; i < nidx; i++) {
6401
for (; iproc < nproc; iproc++) {
6402
if (idx[i] >= ownership[iproc] && idx[i] < ownership[iproc+1]) {
6405
if (!msg_mark[iproc]) {
6406
msg_mark[iproc] = 1;
6412
check_macro (iproc != nproc, "bad stash data: idx["<<i<<"]="<<idx[i]<<" out of range [0:"<<ownership[nproc]<<"[");
6415
// -------------------------------------------------------
6416
// 2) avoid to send message to my-proc in counting
6417
// -------------------------------------------------------
6418
size_type n_local = msg_size[my_proc];
6420
msg_size [my_proc] = 0;
6421
msg_mark [my_proc] = 0;
6424
// ----------------------------------------------------------------
6425
// 3) compute number of messages to be send to my_proc
6426
// ----------------------------------------------------------------
6427
std::vector<size_type> work(nproc);
6430
msg_mark.begin().operator->(),
6432
work.begin().operator->(),
6433
std::plus<size_type>());
6434
size_type receive_nproc = work [my_proc];
6435
// ----------------------------------------------------------------
6436
// 4) compute messages max size to be send to my_proc
6437
// ----------------------------------------------------------------
6440
msg_size.begin().operator->(),
6442
work.begin().operator->(),
6443
mpi::maximum<size_type>());
6444
size_type receive_max_size = work [my_proc];
6445
// ----------------------------------------------------------------
6446
// 5) post receive: exchange the buffer adresses between processes
6447
// ----------------------------------------------------------------
6448
std::list<std::pair<size_type,mpi::request> > receive_waits;
6449
std::vector<size_type> receive_data (receive_nproc*receive_max_size);
6450
for (size_type i_receive = 0; i_receive < receive_nproc; i_receive++) {
6451
mpi::request i_req = comm.irecv (
6454
receive_data.begin().operator->() + i_receive*receive_max_size,
6456
receive_waits.push_back (std::make_pair(i_receive, i_req));
6458
// ---------------------------------------------------------------------------
6459
// 6) compute the send indexes
6460
// ---------------------------------------------------------------------------
6461
// comme idx est trie, on peut faire une copie de idx dans send_data
6462
// et du coup owner et send_data_ownership sont inutiles
6463
std::vector<size_type> send_data (nidx);
6464
std::copy (idx, idx+nidx, send_data.begin());
6465
// ---------------------------------------------------------------------------
6467
// ---------------------------------------------------------------------------
6468
std::list<std::pair<size_type,mpi::request> > send_waits;
6470
size_type i_send = 0;
6471
size_type i_start = 0;
6472
for (size_type iproc = 0; iproc < nproc; iproc++) {
6473
size_type i_msg_size = msg_size[iproc];
6474
if (i_msg_size == 0) continue;
6475
mpi::request i_req = comm.isend (
6478
send_data.begin().operator->() + i_start,
6480
send_waits.push_back(std::make_pair(i_send,i_req));
6482
i_start += i_msg_size;
6485
// ---------------------------------------------------------------------------
6486
// 8) wait on receives
6487
// ---------------------------------------------------------------------------
6488
// note: for wait_all, build an iterator adapter that scan the pair.second in [index,request]
6489
// and then get an iterator in the pair using iter.base(): retrive the corresponding index
6490
// for computing the position in the receive.data buffer
6491
typedef boost::transform_iterator<select2nd<size_t,mpi::request>, std::list<std::pair<size_t,mpi::request> >::iterator>
6493
std::vector<size_type> receive_size (receive_nproc);
6494
std::vector<size_type> receive_proc (receive_nproc);
6495
size_type receive_total_size = 0;
6496
while (receive_waits.size() != 0) {
6497
typedef size_type data_type; // exchanged data is of "size_type"
6498
request_iterator iter_r_waits (receive_waits.begin(), select2nd<size_t,mpi::request>()),
6499
last_r_waits (receive_waits.end(), select2nd<size_t,mpi::request>());
6500
// waits on any receive...
6501
std::pair<mpi::status,request_iterator> pair_status = mpi::wait_any (iter_r_waits, last_r_waits);
6503
boost::optional<int> i_msg_size_opt = pair_status.first.count<data_type>();
6504
check_macro (i_msg_size_opt, "receive wait failed");
6505
int iproc = pair_status.first.source();
6506
check_macro (iproc >= 0, "receive: source iproc = "<<iproc<<" < 0 !");
6507
// get size of receive and number in data
6508
size_type i_msg_size = (size_t)i_msg_size_opt.get();
6509
std::list<std::pair<size_t,mpi::request> >::iterator i_pair_ptr = pair_status.second.base();
6510
size_type i_receive = (*i_pair_ptr).first;
6511
receive_proc [i_receive] = iproc;
6512
receive_size [i_receive] = i_msg_size;
6513
receive_total_size += i_msg_size;
6514
receive_waits.erase (i_pair_ptr);
6516
// ---------------------------------------------------------------------------
6517
// 9) allocate the entire send(to) scatter context
6518
// ---------------------------------------------------------------------------
6519
to.resize (receive_total_size, receive_nproc);
6521
// ---------------------------------------------------------------------------
6522
// 10) compute the permutation of values that gives the sorted source[] sequence
6523
// ---------------------------------------------------------------------------
6524
// init: perm[i] = i
6525
std::vector<size_type> perm(receive_nproc);
6526
copy(index_iterator<size_type>(), index_iterator<size_type>(receive_nproc), perm.begin());
6527
sort_with_permutation (
6529
receive_proc.begin().operator->(),
6530
perm.begin().operator->());
6531
// ---------------------------------------------------------------------------
6532
// 11) Computes the receive compresed message pattern for send(to)
6533
// ---------------------------------------------------------------------------
6534
size_type istart = ownership[my_proc]; // = ownership.first_index()
6538
receive_proc.begin(),
6539
receive_size.begin(),
6540
receive_data.begin(),
6544
to.starts().begin(),
6545
to.indices().begin());
6546
// ---------------------------------------------------------------------------
6547
// 12) allocate the entire receive(from) scatter context
6548
// ---------------------------------------------------------------------------
6549
from.resize(nidy, send_nproc);
6550
// ---------------------------------------------------------------------------
6551
// 13) Computes the receive compresed message pattern for receive(from)
6552
// ---------------------------------------------------------------------------
6553
std::vector<size_type> proc2from_proc(nproc);
6554
msg_from_context_pattern (
6557
from.procs().begin(),
6558
from.starts().begin(),
6559
proc2from_proc.begin());
6560
// ---------------------------------------------------------------------------
6561
// 14) Computes the receive compresed message indices for receive(from)
6562
// ---------------------------------------------------------------------------
6563
// assume that indices are sorted by increasing order
6564
std::vector<size_type> start(send_nproc+1);
6565
copy (from.starts().begin(), from.starts().end(), start.begin());
6566
msg_from_context_indices (
6570
proc2from_proc.begin(),
6574
from.indices().begin());
6575
// ---------------------------------------------------------------------------
6576
// 15) wait on sends
6577
// ---------------------------------------------------------------------------
6578
request_iterator iter_s_waits (send_waits.begin(), select2nd<size_type,mpi::request>()),
6579
last_s_waits (send_waits.end(), select2nd<size_type,mpi::request>());
6580
mpi::wait_all (iter_s_waits, last_s_waits);
6581
// ---------------------------------------------------------------------------
6582
// 16) Computes the receive compresed message local pattern,
6583
// i.e. the only part that does not requires communication.
6584
// ---------------------------------------------------------------------------
6585
from.local_slots.resize(n_local);
6586
to.local_slots.resize(n_local);
6587
size_type ilast = ownership[my_proc+1]; // = ownership.last_index()
6595
to.local_slots.begin(),
6596
to.local_slots.end(),
6597
from.local_slots.begin());
6598
// ---------------------------------------------------------------------------
6599
// 17) Optimize local exchanges during gatter/scatter
6600
// ---------------------------------------------------------------------------
6601
bool has_opt = msg_local_optimize (
6602
to.local_slots.begin(),
6603
to.local_slots.end(),
6604
from.local_slots.begin());
6606
if (has_opt && n_local != 0) {
6607
to.local_is_copy = true;
6608
to.local_copy_start = to.local_slots[0];
6609
to.local_copy_length = n_local;
6610
from.local_is_copy = true;
6611
from.local_copy_start = from.local_slots[0];
6612
from.local_copy_length = n_local;
6617
File: rheolef.info, Node: msg_both_permutation_apply algorithm, Up: Algorithms
6619
6.11 msg_both_permutation_apply - sequentail apply
6620
==================================================
6622
(Source file: `skit/plib2/msg_both_permutation_apply.h')
6627
Applies permutations when copying an array.
6632
msg_both_permutation_apply
6634
"input": the length array | px(0:n-1), x(0:nx-1), py(0:n-1)
6635
"output": the pointer array and the total size | y(0:ny) begin
6636
| for i := 0 to n-1 do | y(py(i)) := x(px(i)) | endfor end
6641
Time and memory complexity is O(n).
6647
class InputIterator1,
6648
class InputIterator2,
6649
class InputRandomIterator,
6651
class OutputRandomIterator>
6653
msg_both_permutation_apply (
6655
InputIterator1 last_px,
6656
InputRandomIterator x,
6659
OutputRandomIterator y)
6661
while (px != last_px)
6662
set_op(y[*py++], x[*px++]);
6664
} // namespace rheolef
6667
File: rheolef.info, Node: mpi_assembly_begin algorithm, Up: Algorithms
6669
6.12 mpi_assembly_begin - for array or matrix
5022
6670
=============================================
5024
(Source file: `skit/lib/pminres.h')
5029
template <class Matrix, class Vector, class Preconditioner, class Real>
5030
int pminres (const Matrix &A, Vector &x, const Vector &b, const Preconditioner &M,
5031
int &max_iter, Real &tol, std::ostream *p_cerr=0);
5036
The simplest call to 'pminres' has the folling form:
5037
size_t max_iter = 100;
5039
int status = pminres(a, x, b, EYE, max_iter, tol, &cerr);
5044
`pminres' solves the symmetric positive definite linear system Ax=b
5045
using the Conjugate Gradient method.
5047
The return value indicates convergence within max_iter (input)
5048
iterations (0), or no convergence within max_iter iterations (1).
5049
Upon successful return, output arguments have the following values:
5051
approximate solution to Ax = b
5054
the number of iterations performed before the tolerance was reached
5057
the residual after the final iteration
5062
`pminres' follows the algorithm described in "Solution of sparse
5063
indefinite systems of linear equations", C. C. Paige and M. A.
5064
Saunders, SIAM J. Numer. Anal., 12(4), 1975. For more, see
5065
http://www.stanford.edu/group/SOL/software.html and also the PhD
5066
"Iterative methods for singular linear equations and least-squares
5067
problems", S.-C. T. Choi, Stanford University, 2006,
5068
http://www.stanford.edu/group/SOL/dissertations/sou-cheng-choi-thesis.pdf
5069
at page 60. The present implementation style is inspired from `IML++
5070
1.2' iterative method library, `http://math.nist.gov/iml++'.
5075
template <class Matrix, class Vector, class Preconditioner, class Real, class Size>
5076
int pminres(const Matrix &A, Vector &x, const Vector &Mb, const Preconditioner &M,
5077
Size &max_iter, Real &tol, std::ostream *p_cerr = 0, std::string label = "minres")
5079
Vector b = M.solve(Mb);
5080
Real norm_b = sqrt(fabs(dot(Mb,b)));
5081
if (norm_b == Real(0.)) norm_b = 1;
5083
Vector Mr = Mb - A*x;
5084
Vector z = M.solve(Mr);
5085
Real beta2 = dot(Mr, z);
5086
Real norm_r = sqrt(fabs(beta2));
5087
if (p_cerr) (*p_cerr) << "[" << label << "] #iteration residue" << std::endl;
5088
if (p_cerr) (*p_cerr) << "[" << label << "] 0 " << norm_r/norm_b << std::endl;
5089
if (beta2 < 0 || norm_r <= tol*norm_b) {
5090
tol = norm_r/norm_b;
5092
warning_macro ("beta2 = " << beta2 << " < 0: stop");
5095
Real beta = sqrt(beta2);
5097
Vector Mv = Mr/beta;
5103
Vector u_old (x.size(), 0.);
5104
Vector Mv_old (x.size(), 0.);
5105
Vector w (x.size(), 0.);
5106
Vector w_old (x.size(), 0.);
5107
Vector w_old2 (x.size(), 0.);
5108
for (Size n = 1; n <= max_iter; n++) {
5112
Real alpha = dot(Mr, u);
5113
Mr = Mr - alpha*Mv - beta*Mv_old;
5114
z = z - alpha*u - beta*u_old;
5117
warning_macro ("pminres: machine precision problem");
5118
tol = norm_r/norm_b;
5122
Real beta_old = beta;
5125
Real c_old2 = c_old;
5126
Real s_old2 = s_old;
5129
Real rho0 = c_old*alpha - c_old2*s_old*beta_old;
5130
Real rho2 = s_old*alpha + c_old2*c_old*beta_old;
5131
Real rho1 = sqrt(sqr(rho0) + sqr(beta));
5132
Real rho3 = s_old2 * beta_old;
5139
w = (u - rho2*w_old - rho3*w_old2)/rho1;
5148
if (p_cerr) (*p_cerr) << "[" << label << "] " << n << " " << norm_r/norm_b << std::endl;
5149
if (norm_r <= tol*norm_b) {
5150
tol = norm_r/norm_b;
5155
tol = norm_r/norm_b;
5160
File: rheolef.info, Node: qmr algorithm, Up: Algorithms
5162
6.6 `qmr' - quasi-minimal residual algoritm
5163
===========================================
5165
(Source file: `skit/lib/qmr.h')
5170
template <class Matrix, class Vector, class Preconditioner1,
5171
class Preconditioner2, class Real>
5172
int qmr (const Matrix &A, Vector &x, const Vector &b,
5173
const Preconditioner1 &M1, const Preconditioner2 &M2,
5174
int &max_iter, Real &tol);
5179
The simplest call to 'qmr' has the folling form:
5180
int status = qmr(a, x, b, EYE, EYE, 100, 1e-7);
5185
`qmr' solves the unsymmetric linear system Ax = b using the the
5186
quasi-minimal residual method.
5188
The return value indicates convergence within max_iter (input)
5189
iterations (0), or no convergence within max_iter iterations (1).
5190
Upon successful return, output arguments have the following values:
5192
approximate solution to Ax = b
5195
the number of iterations performed before the tolerance was reached
5198
the residual after the final iteration
5200
A return value of 1 indicates that the method did not reach the
5201
specified convergence tolerance in the maximum numbefr of iterations.
5202
A return value of 2 indicates that a breackdown associated with `rho'
5203
occurred. A return value of 3 indicates that a breackdown associated
5204
with `beta' occurred. A return value of 4 indicates that a
5205
breackdown associated with `gamma' occurred. A return value of 5
5206
indicates that a breackdown associated with `delta' occurred. A
5207
return value of 6 indicates that a breackdown associated with `epsilon'
5208
occurred. A return value of 7 indicates that a breackdown associated
5214
`qmr' is an iterative template routine.
5216
`qmr' follows the algorithm described on p. 24 in
5218
Templates for the Solution of Linear Systems: Building
5219
Blocks for Iterative Methods, 2nd Edition, R.
5220
Barrett, M. Berry, T. F. Chan, J. Demmel, J. Donato, J. Dongarra,
5221
V. Eijkhout, R. Pozo, C. Romine, H. Van der Vorst,
5222
SIAM, 1994, `ftp.netlib.org/templates/templates.ps'.
5224
The present implementation is inspired from `IML++ 1.2' iterative
5225
method library, `http://math.nist.gov/iml++'.
5228
File: rheolef.info, Node: pcg algorithm, Up: Algorithms
5230
6.7 `pcg' - conjugate gradient algorithm.
5231
=========================================
5233
(Source file: `skit/lib/pcg.h')
5238
template <class Matrix, class Vector, class Preconditioner, class Real>
5239
int pcg (const Matrix &A, Vector &x, const Vector &b,
5240
const Preconditioner &M, int &max_iter, Real &tol, std::ostream *p_cerr=0);
5245
The simplest call to 'pcg' has the folling form:
5246
size_t max_iter = 100;
5248
int status = pcg(a, x, b, EYE, max_iter, tol, &cerr);
5253
`pcg' solves the symmetric positive definite linear system Ax=b using
5254
the Conjugate Gradient method.
5256
The return value indicates convergence within max_iter (input)
5257
iterations (0), or no convergence within max_iter iterations (1).
5258
Upon successful return, output arguments have the following values:
5260
approximate solution to Ax = b
5263
the number of iterations performed before the tolerance was reached
5266
the residual after the final iteration
5271
`pcg' is an iterative template routine.
5273
`pcg' follows the algorithm described on p. 15 in
5275
Templates for the Solution of Linear Systems: Building
5276
Blocks for Iterative Methods, 2nd Edition, R.
5277
Barrett, M. Berry, T. F. Chan, J. Demmel, J. Donato, J. Dongarra,
5278
V. Eijkhout, R. Pozo, C. Romine, H. Van der Vorst,
5279
SIAM, 1994, `ftp.netlib.org/templates/templates.ps'.
5281
The present implementation is inspired from `IML++ 1.2' iterative
5282
method library, `http://math.nist.gov/iml++'.
5287
template < class Matrix, class Vector, class Preconditioner, class Real, class Size>
5288
int pcg(const Matrix &A, Vector &x, const Vector &Mb, const Preconditioner &M,
5289
Size &max_iter, Real &tol, std::ostream *p_cerr = 0, std::string label = "cg")
5291
Vector b = M.solve(Mb);
5292
Real norm2_b = dot(Mb,b);
5293
if (norm2_b == Real(0)) norm2_b = 1;
5294
Vector Mr = Mb - A*x;
5296
if (p_cerr) (*p_cerr) << "[" << label << "] #iteration residue" << std::endl;
5298
for (Size n = 0; n <= max_iter; n++) {
5299
Vector r = M.solve(Mr);
5300
Real prev_norm2_r = norm2_r;
5301
norm2_r = dot(Mr, r);
5302
if (p_cerr) (*p_cerr) << "[" << label << "] " << n << " " << ::sqrt(norm2_r/norm2_b) << std::endl;
5303
if (norm2_r <= sqr(tol)*norm2_b) {
5304
tol = ::sqrt(norm2_r/norm2_b);
5311
Real beta = norm2_r/prev_norm2_r;
5315
Real alpha = norm2_r/dot(Mq, p);
5319
tol = ::sqrt(norm2_r/norm2_b);
5324
File: rheolef.info, Node: bicgstab algorithm, Up: Algorithms
5326
6.8 `bicgstab' - bi-conjugate gradient stabilized method
5327
========================================================
5329
(Source file: `skit/lib/bicgstab.h')
5334
int bicgstab (const Matrix &A, Vector &x, const Vector &b,
5335
const Preconditioner &M, int &max_iter, Real &tol);
5340
The simplest call to 'bicgstab' has the folling form:
5341
int status = bicgstab(a, x, b, EYE, 100, 1e-7);
5346
`bicgstab' solves the unsymmetric linear system Ax = b using the
5347
preconditioned bi-conjugate gradient stabilized method
5349
The return value indicates convergence within max_iter (input)
5350
iterations (0), or no convergence within max_iter iterations (1).
5351
Upon successful return, output arguments have the following values:
5353
approximate solution to Ax = b
5356
the number of iterations performed before the tolerance was reached
5359
the residual after the final iteration
5364
`bicgstab' is an iterative template routine.
5366
`bicgstab' follows the algorithm described on p. 24 in
5368
Templates for the Solution of Linear Systems: Building
5369
Blocks for Iterative Methods, 2nd Edition, R.
5370
Barrett, M. Berry, T. F. Chan, J. Demmel, J. Donato, J. Dongarra,
5371
V. Eijkhout, R. Pozo, C. Romine, H. Van der Vorst,
5372
SIAM, 1994, `ftp.netlib.org/templates/templates.ps'.
5374
The present implementation is inspired from `IML++ 1.2' iterative
5375
method library, `http://math.nist.gov/iml++'.
5378
File: rheolef.info, Node: puzawa algorithm, Up: Algorithms
5380
6.9 `puzawa' - Uzawa algorithm.
5381
===============================
5383
(Source file: `skit/lib/puzawa.h')
5388
template <class Matrix, class Vector, class Preconditioner, class Real>
5389
int puzawa (const Matrix &A, Vector &x, const Vector &b, const Preconditioner &M,
5390
int &max_iter, Real &tol, const Real& rho, std::ostream *p_cerr=0);
5395
The simplest call to 'puzawa' has the folling form:
5396
size_t max_iter = 100;
5398
int status = puzawa(A, x, b, EYE, max_iter, tol, 1.0, &cerr);
5403
`puzawa' solves the linear system A*x=b using the Uzawa method. The
5404
Uzawa method is a descent method in the direction opposite to the
5405
gradient, with a constant step length 'rho'. The convergence is
5406
assured when the step length 'rho' is small enough. If matrix A is
5407
symmetric positive definite, please uses 'pcg' that computes
5408
automatically the optimal descdnt step length at each iteration.
5410
The return value indicates convergence within max_iter (input)
5411
iterations (0), or no convergence within max_iter iterations (1).
5412
Upon successful return, output arguments have the following values:
5414
approximate solution to Ax = b
5417
the number of iterations performed before the tolerance was reached
5420
the residual after the final iteration
5425
template < class Matrix, class Vector, class Preconditioner, class Real, class Size>
5426
int puzawa(const Matrix &A, Vector &x, const Vector &Mb, const Preconditioner &M,
5427
Size &max_iter, Real &tol, const Real& rho,
5428
std::ostream *p_cerr, std::string label)
5430
Vector b = M.solve(Mb);
5431
Real norm2_b = dot(Mb,b);
5432
Real norm2_r = norm2_b;
5433
if (norm2_b == Real(0)) norm2_b = 1;
5434
if (p_cerr) (*p_cerr) << "[" << label << "] #iteration residue" << std::endl;
5435
for (Size n = 0; n <= max_iter; n++) {
5436
Vector Mr = A*x - Mb;
5437
Vector r = M.solve(Mr);
5438
norm2_r = dot(Mr, r);
5439
if (p_cerr) (*p_cerr) << "[" << label << "] " << n << " " << sqrt(norm2_r/norm2_b) << std::endl;
5440
if (norm2_r <= sqr(tol)*norm2_b) {
5441
tol = sqrt(norm2_r/norm2_b);
5447
tol = sqrt(norm2_r/norm2_b);
5452
File: rheolef.info, Node: gmres algorithm, Up: Algorithms
5454
6.10 `gmres' - generalized minimum residual method
5455
==================================================
5457
(Source file: `skit/lib/gmres.h')
5462
template <class Operator, class Vector, class Preconditioner,
5463
class Matrix, class Real, class Int>
5464
int gmres (const Operator &A, Vector &x, const Vector &b,
5465
const Preconditioner &M, Matrix &H, Int m, Int &max_iter, Real &tol);
5470
The simplest call to `gmres' has the folling form:
5473
int status = gmres(a, x, b, EYE, H, m, 100, 1e-7);
5478
`gmres' solves the unsymmetric linear system Ax = b using the
5479
generalized minimum residual method.
5481
The return value indicates convergence within max_iter (input)
5482
iterations (0), or no convergence within max_iter iterations (1).
5483
Upon successful return, output arguments have the following values:
5485
approximate solution to Ax = b
5488
the number of iterations performed before the tolerance was reached
5491
the residual after the final iteration
5492
In addition, M specifies a preconditioner, H specifies a matrix to
5493
hold the coefficients of the upper Hessenberg matrix constructed by
5494
the `gmres' iterations, `m' specifies the number of iterations for
5497
`gmres' requires two matrices as input, A and H. The matrix A, which
5498
will typically be a sparse matrix) corresponds to the matrix in the
5499
linear system Ax=b. The matrix H, which will be typically a dense
5500
matrix, corresponds to the upper Hessenberg matrix H that is
5501
constructed during the `gmres' iterations. Within `gmres', H is used
5502
in a different way than A, so its class must supply different
5503
functionality. That is, A is only accessed though its matrix-vector
5504
and transpose-matrix-vector multiplication functions. On the other
5505
hand, `gmres' solves a dense upper triangular linear system of
5506
equations on H. Therefore, the class to which H belongs must provide
5507
H(i,j) operator for element acess.
5512
It is important to remember that we use the convention that indices
5513
are 0-based. That is H(0,0) is the first component of the matrix H.
5514
Also, the type of the matrix must be compatible with the type of
5515
single vector entry. That is, operations such as H(i,j)*x(j) must be
5516
able to be carried out.
5518
`gmres' is an iterative template routine.
5520
`gmres' follows the algorithm described on p. 20 in
5522
Templates for the Solution of Linear Systems: Building
5523
Blocks for Iterative Methods, 2nd Edition, R.
5524
Barrett, M. Berry, T. F. Chan, J. Demmel, J. Donato, J. Dongarra,
5525
V. Eijkhout, R. Pozo, C. Romine, H. Van der Vorst,
5526
SIAM, 1994, `ftp.netlib.org/templates/templates.ps'.
5528
The present implementation is inspired from `IML++ 1.2' iterative
5529
method library, `http://math.nist.gov/iml++'.
5534
template < class Matrix, class Vector, class Int >
5536
Update(Vector &x, Int k, Matrix &h, Vector &s, Vector v[])
5541
for (Int i = k; i >= 0; i--) {
5543
for (Int j = i - 1; j >= 0; j--)
5544
y(j) -= h(j,i) * y(i);
5547
for (Int j = 0; j <= k; j++)
5552
template < class Real >
5556
return (x > Real(0) ? x : -x);
5561
template < class Operator, class Vector, class Preconditioner,
5562
class Matrix, class Real, class Int >
5564
gmres(const Operator &A, Vector &x, const Vector &b,
5565
const Preconditioner &M, Matrix &H, const Int &m, Int &max_iter,
5570
Vector s(m+1), cs(m+1), sn(m+1), w;
5572
Real normb = norm(M.solve(b));
5573
Vector r = M.solve(b - A * x);
5574
Real beta = norm(r);
5576
if (normb == Real(0))
5579
if ((resid = norm(r) / normb) <= tol) {
5585
Vector *v = new Vector[m+1];
5587
while (j <= max_iter) {
5588
v[0] = r * (1.0 / beta); // ??? r / beta
5592
for (i = 0; i < m && j <= max_iter; i++, j++) {
5593
w = M.solve(A * v[i]);
5594
for (k = 0; k <= i; k++) {
5595
H(k, i) = dot(w, v[k]);
5596
w -= H(k, i) * v[k];
5598
H(i+1, i) = norm(w);
5599
v[i+1] = w * (1.0 / H(i+1, i)); // ??? w / H(i+1, i)
5601
for (k = 0; k < i; k++)
5602
ApplyPlaneRotation(H(k,i), H(k+1,i), cs(k), sn(k));
5604
GeneratePlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
5605
ApplyPlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
5606
ApplyPlaneRotation(s(i), s(i+1), cs(i), sn(i));
5608
if ((resid = abs(s(i+1)) / normb) < tol) {
5609
Update(x, i, H, s, v);
5616
Update(x, m - 1, H, s, v);
5617
r = M.solve(b - A * x);
5619
if ((resid = beta / normb) < tol) {
5631
template<class Real>
5632
void GeneratePlaneRotation(Real &dx, Real &dy, Real &cs, Real &sn)
5634
if (dy == Real(0)) {
5637
} else if (abs(dy) > abs(dx)) {
5638
Real temp = dx / dy;
5639
sn = 1.0 / ::sqrt( 1.0 + temp*temp );
5642
Real temp = dy / dx;
5643
cs = 1.0 / ::sqrt( 1.0 + temp*temp );
5647
template<class Real>
5648
void ApplyPlaneRotation(Real &dx, Real &dy, Real &cs, Real &sn)
5650
Real temp = cs * dx + sn * dy;
5651
dy = -sn * dx + cs * dy;
6672
(Source file: `skit/plib2/mpi_assembly_begin.h')
6677
Start a dense array or a sparse matrix assembly.
6682
Assume that stash has indexes in increasing order. cpu complexity :
6683
O(stash.size + nproc) memory complexity : O(stash.size + nproc) msg
6684
size complexity : O(stash.size + nproc)
6686
When assembling a finite element matrix, the stash.size is at boundaries
6687
of the mesh partition, and stash.size = O(nrow^alpha), where
6688
alpha=(d-1)/d and d=2,3. Using nproc <= O(nrow^alpha) is performant.
6693
The stash may be sorted by increasing nows and column.
6695
Inspirated from Petsc (petsc/src/vec/vec/impls/mpi/pdvec.c), here with
6696
a pre-sorted stash, thanks to the stl::map data structure.
6704
class InputIterator>
6705
typename Stash::size_type
6706
mpi_assembly_begin (
6709
InputIterator first_stash_idx, // wrapper in shash
6710
InputIterator last_stash_idx,
6711
const distributor& ownership,
6713
Message& receive, // buffer
6714
Message& send) // buffer
6716
typedef typename Stash::size_type size_type;
6718
mpi::communicator comm = ownership.comm();
6719
size_type my_proc = ownership.process();
6720
size_type nproc = ownership.n_process();
6721
distributor::tag_type tag = distributor::get_new_tag();
6723
// ----------------------------------------------------------------
6724
// 1) count the messages contained in stash by process id
6725
// ----------------------------------------------------------------
6726
// assume that stash elements are sorted by increasing stash_idx (e.g. stash = stl::map)
6727
// cpu complexity = O(stash.size + nproc)
6728
// mem complexity = O(stash.size + nproc)
6729
std::vector<size_type> msg_size(nproc, 0);
6730
std::vector<size_type> msg_mark(nproc, 0);
6731
size_type send_nproc = 0;
6733
size_type iproc = 0;
6735
for (InputIterator iter_idx = first_stash_idx; iter_idx != last_stash_idx; iter_idx++, i++) {
6736
for (; iproc < nproc; iproc++) {
6737
if (*iter_idx >= ownership[iproc] && *iter_idx < ownership[iproc+1]) {
6739
if (!msg_mark[iproc]) {
6740
msg_mark[iproc] = 1;
6746
assert_macro (iproc != nproc, "bad stash data: index "<<*iter_idx<<" out of range [0:"<<ownership[nproc]<<"[");
6749
// ----------------------------------------------------------------
6750
// 2) avoid to send message to my-proc in counting
6751
// ----------------------------------------------------------------
6752
if (msg_size [my_proc] != 0) {
6753
msg_size [my_proc] = 0;
6754
msg_mark [my_proc] = 0;
6757
// ----------------------------------------------------------------
6758
// 3) compute number of messages to be send to my_proc
6759
// ----------------------------------------------------------------
6760
// msg complexity : O(nproc) or O(log(nproc)), depending on reduce implementation
6761
std::vector<size_type> work (nproc);
6764
msg_mark.begin().operator->(),
6766
work.begin().operator->(),
6767
std::plus<size_type>());
6768
size_type receive_nproc = work [my_proc];
6769
// ----------------------------------------------------------------
6770
// 4) compute messages max size to be send to my_proc
6771
// ----------------------------------------------------------------
6772
// msg complexity : O(nproc) or O(log(nproc)), depending on reduce implementation
6775
msg_size.begin().operator->(),
6777
work.begin().operator->(),
6778
mpi::maximum<size_type>());
6779
size_type receive_max_size = work [my_proc];
6780
// ----------------------------------------------------------------
6781
// 5) post receive: exchange the buffer adresses between processes
6782
// ----------------------------------------------------------------
6783
// Each message will consist of ordered pairs (global index,value).
6784
// since we don't know how long each indiidual message is,
6785
// we allocate the largest : receive_nproc*receive_max_size
6786
// potentially, this is a lot of wasted space
6787
// TODO: how to optimize the receive.data buffer ?
6788
// cpu complexity : O(nproc)
6789
// mem complexity : O(nproc*(stash.size/nproc)) = O(stash.size), worst case ?
6790
// msg complexity : O(nproc)
6791
receive.data.resize (receive_nproc*receive_max_size);
6792
for (size_t i_receive = 0; i_receive < receive_nproc; i_receive++) {
6793
mpi::request i_req = comm.irecv (
6796
receive.data.begin().operator->() + i_receive*receive_max_size,
6798
receive.waits.push_back (std::make_pair(i_receive, i_req));
6800
// ----------------------------------------------------------------
6801
// 6) copy stash in send buffer
6802
// ----------------------------------------------------------------
6803
// since the stash is sorted by increasing order => simple copy
6804
// cpu complexity : O(stash.size)
6805
// mem complexity : O(stash.size)
6806
send.data.resize (stash.size());
6807
copy (stash.begin(), stash.end(), send.data.begin());
6808
// ---------------------------------------------------------------------------
6810
// ---------------------------------------------------------------------------
6811
// cpu complexity : O(nproc)
6812
// mem complexity : O(send_nproc) \approx O(nproc), worst case
6813
// msg complexity : O(stash.size)
6814
send.waits.resize(send_nproc);
6816
size_type i_send = 0;
6817
size_type i_start = 0;
6818
for (size_type iproc = 0; iproc < nproc; iproc++) {
6819
size_type i_msg_size = msg_size[iproc];
6820
if (i_msg_size == 0) continue;
6821
mpi::request i_req = comm.isend (
6824
send.data.begin().operator->() + i_start,
6826
send.waits.push_back(std::make_pair(i_send,i_req));
6828
i_start += i_msg_size;
6831
return receive_max_size;
6835
File: rheolef.info, Node: msg_to_context algorithm, Up: Algorithms
6837
6.13 msg_to_context - receive pattern
6838
=====================================
6840
(Source file: `skit/plib2/msg_to_context.h')
6845
Computes the receive compresed message pattern for gather and scatter.
6846
The local message part is computed by a separate algorithm (see
6847
"msg_to_local_context"(5)).
6854
"input": the receive pattern and the permutation |
6855
perm(0:receive_nproc-1) | r_iproc(0:receive_nproc-1),
6856
r_size(0:receive_nproc-1), |
6857
r_idx(0:receive_nproc*receive_max_size-1) "output": the receive
6858
context (to) | to_proc(0:receive_nproc-1), to_ptr(0:receive_nproc),
6859
| to_idx(0:receive_total_size-1) begin | to_ptr(0) := 0 |
6860
for j := 0 to receive_nproc-1 do | j1 := perm(j) |
6861
to_proc(j) := r_iproc(j1) | to_ptr(j+1) := r_ptr(j) + rsize(j1)
6862
| for q := to_ptr(j) to to_tr(j+1)-1 do | to_idx(q) :=
6863
r_idx(j1, q-to_ptr(j)) - istart | endfor | endfor end
6868
Memory and time complexity is O(receive_total_size).
6874
class InputIterator1,
6875
class InputRandomIterator2,
6876
class InputRandomIterator3,
6877
class InputRandomIterator4,
6879
class OutputIterator1,
6880
class OutputIterator2,
6881
class OutputIterator3>
6884
InputIterator1 perm, // receive_nproc
6885
InputIterator1 last_perm,
6886
InputRandomIterator2 r_iproc, // receive_nproc
6887
InputRandomIterator3 r_size, // receive_nproc
6888
InputRandomIterator4 r_idx, // receive_nproc*receive_max_size
6889
Size receive_max_size,
6891
OutputIterator1 to_proc, // receive_nproc
6892
OutputIterator2 to_ptr, // receive_nproc+1
6893
OutputIterator3 to_idx) // receive_total_size
6895
OutputIterator2 prec_ptr = to_ptr;
6897
while (perm != last_perm) {
6898
Size j1 = (*perm++);
6899
(*to_proc++) = r_iproc[j1];
6900
Size size = r_size[j1];
6901
(*to_ptr++) = (*prec_ptr++) + size;
6902
InputRandomIterator4 iter_idx = r_idx + j1*receive_max_size;
6903
InputRandomIterator4 last_idx = iter_idx + size;
6904
while (iter_idx != last_idx)
6905
(*to_idx++) = (*iter_idx++) - istart;
6910
File: rheolef.info, Node: msg_from_context_indices algorithm, Up: Algorithms
6912
6.14 msg_from_context_indices - gather
6913
======================================
6915
(Source file: `skit/plib2/msg_from_context_indices.h')
6920
Computes the receive compressed message pattern for gather and
6921
scatter. Suppose indexes are sorted by increasing order.
6926
msg_from_context_indices
6928
"input": the owner and indice arrays, the current process |
6929
owner(0:nidx-1), idy(0:nidx-1), | proc2from_proc(0:nproc-1), my_proc
6930
"input-output": the pointer array, used for accumulation |
6931
ptr(0:nidx) "output": the receive context (from) indice array |
6932
from_idx(0:nidx-1) begin | for k := 0 to nidx-1 do | iproc :=
6933
owner(k) | if iproc <> my_proc then | i :=
6934
proc2from_proc(iproc) | p := ptr(i) | ptr(i) := p + 1 |
6935
from_idx(p) := idy(k) | endif | endfor end
6940
Complexity is O(nidx).
6945
Uses input iterators.
6951
class InputIterator1,
6952
class InputIterator2,
6953
class InputRandomIterator,
6956
class MutableRandomIterator,
6957
class OutputIterator>
6959
msg_from_context_indices (
6960
InputIterator1 owner, // nidx
6961
InputIterator1 last_owner,
6962
InputIterator2 idy, // nidx
6963
InputRandomIterator proc2from_proc, // nproc
6966
MutableRandomIterator ptr, // send_nproc+1
6967
OutputIterator from_idx) // nidx
6969
Size nidx = distance(owner,last_owner);
6970
for (Size i = 0; i < nidx; i++) {
6971
if (owner[i] != my_proc) {
6972
assert_macro (idy[i] < idy_maxval, "Scattering past end of TO vector: idy="
6973
<< idy[i] << " out of range 0.." << idy_maxval-1);
6974
Size p = ptr[proc2from_proc[owner[i]]]++;
6975
from_idx[p] = idy[i];
6981
File: rheolef.info, Node: mpi_scatter_end algorithm, Up: Algorithms
6983
6.15 mpi_scatter_end - gather/scatter finalize
6984
==============================================
6986
(Source file: `skit/plib2/mpi_scatter_end.h')
6991
Finishes communication for distributed to sequential scatter context.
6997
class InputIterator,
6998
class OutputIterator,
7013
typedef typename Message::base_value_type data_type; // the data type to be received by mpi
7014
typedef boost::transform_iterator<select2nd<size_t,mpi::request>, std::list<std::pair<size_t,mpi::request> >::iterator>
7017
// -----------------------------------------------------------
7018
// 1) wait on receives and unpack receives into local space
7019
// -----------------------------------------------------------
7020
while (from.requests.size() != 0) {
7021
request_iterator iter_r_waits (from.requests.begin(), select2nd<size_t,mpi::request>()),
7022
last_r_waits (from.requests.end(), select2nd<size_t,mpi::request>());
7023
// waits on any receive...
7024
std::pair<mpi::status,request_iterator> pair_status = mpi::wait_any (iter_r_waits, last_r_waits);
7026
boost::optional<int> i_msg_size_opt = pair_status.first.count<data_type>();
7027
check_macro (i_msg_size_opt, "receive wait failed");
7028
int iproc = pair_status.first.source();
7029
check_macro (iproc >= 0, "receive: source iproc = "<<iproc<<" < 0 !");
7030
// get size of receive and number in data
7031
size_t i_msg_size = (size_t)i_msg_size_opt.get();
7032
std::list<std::pair<size_t,mpi::request> >::iterator i_pair_ptr = pair_status.second.base();
7033
size_t i_receive = (*i_pair_ptr).first;
7034
check_macro (i_msg_size == from.starts()[i_receive+1] - from.starts()[i_receive], "unexpected size");
7036
// unpack receives into our local space
7037
from.store_values (y, i_receive, op);
7038
from.requests.erase (i_pair_ptr);
7040
// -----------------------------------------------------------
7042
// -----------------------------------------------------------
7043
request_iterator iter_s_waits (to.requests.begin(), select2nd<size_t,mpi::request>()),
7044
last_s_waits (to.requests.end(), select2nd<size_t,mpi::request>());
7045
mpi::wait_all (iter_s_waits, last_s_waits);
7334
File: rheolef.info, Node: numbering internal, Up: Internals
7336
8.2 `numbering' - global degree of freedom numbering
7337
====================================================
7339
(Source file: `nfem/plib/numbering.h')
7344
The `numbering' class defines methods that furnish global numbering
7345
of degrees of freedom. This numbering depends upon the degrees of
7346
polynoms on elements and upon the continuity requirement at
7347
inter-element boundary. For instance the "P1" continuous finite
7348
element approximation has one degree of freedom per vertice of the
7349
mesh, while its discontinuous counterpart has dim(basis) times the
7350
number of elements of the mesh, where dim(basis) is the size of the
7351
local finite element basis.
7356
template <class T, class M = rheo_default_memory_model>
7357
class numbering : public smart_pointer<numbering_rep<T,M> > {
7362
typedef numbering_rep<T,M> rep;
7363
typedef smart_pointer<rep> base;
7364
typedef size_t size_type;
7368
numbering (std::string name = "");
7369
numbering (numbering_rep<T,M>* ptr);
7372
// accessors & modifiers:
7374
std::string name() const;
7375
size_type degree () const;
7376
void set_degree (size_type degree) const;
7377
bool is_continuous() const;
7378
bool is_discontinuous() const { return !is_continuous(); }
7380
size_type ndof (const geo_size& gs, size_type map_dim) const;
7381
size_type dis_ndof (const geo_size& gs, size_type map_dim) const;
7382
void dis_idof (const geo_size& gs, const geo_element& K, std::vector<size_type>& dis_idof) const;
7383
basic_point<T> x_dof (const class geo_basic<T,M>& omega, size_type idof) const;
7384
void set_ios_permutations (const class geo_basic<T,M>& omega,
7385
array<size_type,M>& idof2ios_dis_idof,
7386
array<size_type,M>& ios_idof2dis_idof) const;
7390
void dump(std::ostream& out = std::cerr) const;
7394
File: rheolef.info, Node: tetrahedron internal, Up: Internals
7396
8.3 `tetrahedron' - Tetraedron reference element
7397
================================================
7399
(Source file: `nfem/geo_element/tetrahedron.icc')
7404
The tetrahedron reference element is
7405
K = { 0 < x < 1 and 0 < y < 1-x and 0 < z < 1-x-y }
7417
0-----------'.--------2 --> y
7425
Curved high order Pk tetrahedra (k >= 1) in 3d geometries are supported.
7426
These tetrahedra have additional edge-nodes, face-nodes and internal
7429
These nodes are numbered as
7430
---------------------------
7432
first vertex, then edge-node, following the edge numbering order and
7433
orientation, then face-nodes following the face numbering order and
7434
orientation, and finally the face internal nodes, following the
7435
tetrahedron lattice. See below for edges and faces numbering and
7443
0--------6--'.--------2
7454
The orientation is such that triedra (01, 02, 03) is direct, and all
7455
faces, see from exterior, are in the direct sens. References: P. L.
7456
Georges, "Generation automatique de maillages", page 24-, coll RMA, 16,
7457
Masson, 1994. Notice that the edge-nodes and face-nodes numbering
7458
slighly differ from those used in the `gmsh' mesh generator when using
7459
high-order elements. This difference is handled by the `msh2geo' mesh
7460
file converter (*note msh2geo command::).
7465
const size_t dimension = 3;
7466
const Float measure = Float(1.)/Float(6.);
7467
const size_t n_vertex = 4;
7468
const Float vertex [n_vertex][dimension] = {
7473
const size_t n_face = 4;
7474
const size_t face [n_face][3] = {
7479
const size_t n_edge = 6;
7480
const size_t edge [n_edge][2] = {
7489
File: rheolef.info, Node: point internal, Up: Internals
7491
8.4 `point' - Point reference element
7492
=====================================
7494
(Source file: `nfem/geo_element/point.icc')
7499
The point reference element is defined for convenience. It is a
7500
0-dimensional element with measure equal to 1.
7505
const size_t dimension = 0;
7506
const size_t measure = 1;
7509
File: rheolef.info, Node: scratch_array internal, Up: Internals
7511
8.5 scratch_array - container in distributed environment
7512
========================================================
7514
(Source file: `nfem/geo_element/scratch_array.h')
7519
STL-like vector container for a distributed memory machine model.
7520
Contrarily to array<T>, here T can have a size only known at compile
7521
time. This class is used when T is a geo_element raw class, i.e.
7522
T=geo_element_e_raw. The size of the geo_element depends upon the oder
7523
and is known only at run-time. For efficiency purpose, the
7524
scratch_array allocate all geo_elements of the same variant (e.g. edge)
7525
and order in a contiguous area, since the coreesponding element size is
7531
A sample usage of the class is:
7532
std::pair<size_t,size_t> param (reference_element::t, 3); // triangle, order=3
7533
scratch_array<geo_element_raw> x (distributor(100), param);
7534
The scratch_array<T> interface is similar to those of the array<T> one.
7539
There are many pre-requises for the template objet type T:
7540
class T : public T::generic_type {
7541
typedef variant_type;
7543
typedef genetic_type;
7544
typedef automatic_type;
7545
static const variant_type _variant;
7546
static size_t _data_size(const parameter_type& param);
7547
static size_t _value_size(const parameter_type& param);
7549
class T::automatic_type : public T::generic_type {
7550
automatic_type (const parameter_type& param);
7552
class T::generic_type {
7555
typedef const_iterator;
7556
iterator _data_begin();
7557
const_iterator _data_begin() const;
7559
ostream& operator<< (ostream&, const T::generic_type&);
7564
template <class T, class A>
7565
class scratch_array<T,sequential,A> : public smart_pointer<scratch_array_seq_rep<T,A> > {
7570
typedef scratch_array_seq_rep<T,A> rep;
7571
typedef smart_pointer<rep> base;
7573
typedef sequential memory_type;
7574
typedef typename rep::size_type size_type;
7575
typedef typename rep::value_type value_type;
7576
typedef typename rep::reference reference;
7577
typedef typename rep::dis_reference dis_reference;
7578
typedef typename rep::iterator iterator;
7579
typedef typename rep::const_reference const_reference;
7580
typedef typename rep::const_iterator const_iterator;
7581
typedef typename rep::parameter_type parameter_type;
7585
scratch_array (const A& alloc = A());
7586
scratch_array (size_type loc_size, const parameter_type& param, const A& alloc = A());
7587
void resize (const distributor& ownership, const parameter_type& param);
7588
scratch_array (const distributor& ownership, const parameter_type& param, const A& alloc = A());
7589
void resize (size_type loc_size, const parameter_type& param);
7591
// local accessors & modifiers:
7593
A get_allocator() const { return base::data().get_allocator(); }
7594
size_type size () const { return base::data().size(); }
7595
size_type dis_size () const { return base::data().dis_size(); }
7596
const distributor& ownership() const { return base::data().ownership(); }
7597
const communicator& comm() const { return ownership().comm(); }
7599
reference operator[] (size_type i) { return base::data().operator[] (i); }
7600
const_reference operator[] (size_type i) const { return base::data().operator[] (i); }
7602
iterator begin() { return base::data().begin(); }
7603
const_iterator begin() const { return base::data().begin(); }
7604
iterator end() { return base::data().end(); }
7605
const_iterator end() const { return base::data().end(); }
7607
// global modifiers (for compatibility with distributed interface):
7609
dis_reference dis_entry (size_type dis_i) { return operator[] (dis_i); }
7610
void dis_entry_assembly() {}
7611
template<class SetOp>
7612
void dis_entry_assembly(SetOp my_set_op) {}
7613
template<class SetOp>
7614
void dis_entry_assembly_begin (SetOp my_set_op) {}
7615
template<class SetOp>
7616
void dis_entry_assembly_end (SetOp my_set_op) {}
7618
// apply a partition:
7621
template<class RepSize>
7622
void repartition ( // old_numbering for *this
7623
const RepSize& partition, // old_ownership
7624
scratch_array<T,sequential,A>& new_array, // new_ownership (created)
7625
RepSize& old_numbering, // new_ownership
7626
RepSize& new_numbering) const // old_ownership
7627
{ return base::data().repartition (partition, new_array, old_numbering, new_numbering); }
7629
template<class RepSize>
7630
void permutation_apply ( // old_numbering for *this
7631
const RepSize& new_numbering, // old_ownership
7632
scratch_array<T,sequential,A>& new_array) const // new_ownership (already allocated)
7633
{ return base::data().permutation_apply (new_numbering, new_array); }
7638
odiststream& put_values (odiststream& ops) const { return base::data().put_values(ops); }
7639
idiststream& get_values (idiststream& ips) { return base::data().get_values(ips); }
7640
template <class GetFunction>
7641
idiststream& get_values (idiststream& ips, GetFunction get_element) { return base::data().get_values(ips, get_element); }
7642
template <class PutFunction>
7643
odiststream& put_values (odiststream& ops, PutFunction put_element) const { return base::data().put_values(ops, put_element); }
7645
void dump (std::string name) const { return base::data().dump(name); }
7652
template <class T, class A>
7653
class scratch_array<T,distributed,A> : public smart_pointer<scratch_array_mpi_rep<T,A> > {
7658
typedef scratch_array_mpi_rep<T,A> rep;
7659
typedef smart_pointer<rep> base;
7661
typedef distributed memory_type;
7662
typedef typename rep::size_type size_type;
7663
typedef typename rep::value_type value_type;
7664
typedef typename rep::reference reference;
7665
typedef typename rep::dis_reference dis_reference;
7666
typedef typename rep::iterator iterator;
7667
typedef typename rep::parameter_type parameter_type;
7668
typedef typename rep::const_reference const_reference;
7669
typedef typename rep::const_iterator const_iterator;
7670
typedef typename rep::scatter_map_type scatter_map_type;
7674
scratch_array (const A& alloc = A());
7675
scratch_array (const distributor& ownership, const parameter_type& param, const A& alloc = A());
7676
void resize (const distributor& ownership, const parameter_type& param);
7678
// local accessors & modifiers:
7680
A get_allocator() const { return base::data().get_allocator(); }
7681
size_type size () const { return base::data().size(); }
7682
size_type dis_size () const { return base::data().dis_size(); }
7683
const distributor& ownership() const { return base::data().ownership(); }
7684
const communicator& comm() const { return base::data().comm(); }
7686
reference operator[] (size_type i) { return base::data().operator[] (i); }
7687
const_reference operator[] (size_type i) const { return base::data().operator[] (i); }
7689
iterator begin() { return base::data().begin(); }
7690
const_iterator begin() const { return base::data().begin(); }
7691
iterator end() { return base::data().end(); }
7692
const_iterator end() const { return base::data().end(); }
7696
template<class Set, class Map>
7697
void append_dis_entry (const Set& ext_idx_set, Map& ext_idx_map) const { base::data().append_dis_entry (ext_idx_set, ext_idx_map); }
7699
template<class Set, class Map>
7700
void get_dis_entry (const Set& ext_idx_set, Map& ext_idx_map) const { base::data().get_dis_entry (ext_idx_set, ext_idx_map); }
7703
void append_dis_indexes (const Set& ext_idx_set) { base::data().append_dis_indexes (ext_idx_set); }
7706
void set_dis_indexes (const Set& ext_idx_set) { base::data().set_dis_indexes (ext_idx_set); }
7708
const_reference dis_at (size_type dis_i) const { return base::data().dis_at (dis_i); }
7710
// get all external pairs (dis_i, values):
7711
const scatter_map_type& get_dis_map_entries() const { return base::data().get_dis_map_entries(); }
7713
// global modifiers (for compatibility with distributed interface):
7715
dis_reference dis_entry (size_type dis_i) { return base::data().dis_entry(dis_i); }
7717
void dis_entry_assembly() { return base::data().dis_entry_assembly(); }
7719
template<class SetOp>
7720
void dis_entry_assembly (SetOp my_set_op) { return base::data().dis_entry_assembly (my_set_op); }
7721
template<class SetOp>
7722
void dis_entry_assembly_begin (SetOp my_set_op) { return base::data().dis_entry_assembly_begin (my_set_op); }
7723
template<class SetOp>
7724
void dis_entry_assembly_end (SetOp my_set_op) { return base::data().dis_entry_assembly_end (my_set_op); }
7726
// apply a partition:
7728
template<class RepSize>
7729
void repartition ( // old_numbering for *this
7730
const RepSize& partition, // old_ownership
7731
scratch_array<T,distributed>& new_array, // new_ownership (created)
7732
RepSize& old_numbering, // new_ownership
7733
RepSize& new_numbering) const // old_ownership
7734
{ return base::data().repartition (partition.data(), new_array.data(), old_numbering.data(), new_numbering.data()); }
7737
template<class RepSize>
7738
void permutation_apply ( // old_numbering for *this
7739
const RepSize& new_numbering, // old_ownership
7740
scratch_array<T,distributed,A>& new_array) const // new_ownership (already allocated)
7741
{ base::data().permutation_apply (new_numbering.data(), new_array.data()); }
7743
void reverse_permutation ( // old_ownership for *this=iold2dis_inew
7744
scratch_array<size_type,distributed,A>& inew2dis_iold) const // new_ownership
7745
{ base::data().reverse_permutation (inew2dis_iold.data()); }
7750
odiststream& put_values (odiststream& ops) const { return base::data().put_values(ops); }
7751
idiststream& get_values (idiststream& ips) { return base::data().get_values(ips); }
7753
void dump (std::string name) const { return base::data().dump(name); }
7756
template <class GetFunction>
7757
idiststream& get_values (idiststream& ips, GetFunction get_element)
7758
{ return base::data().get_values(ips, get_element); }
7759
template <class PutFunction>
7760
odiststream& put_values (odiststream& ops, PutFunction put_element) const
7761
{ return base::data().put_values(ops, put_element); }
7763
template <class PutFunction, class Permutation>
7764
odiststream& permuted_put_values (
7766
const Permutation& perm,
7767
PutFunction put_element) const
7768
{ return base::data().permuted_put_values (ops, perm.data(), put_element); }
7772
File: rheolef.info, Node: hexa internal, Up: Internals
7774
8.6 `hexa' - Hexaedra reference element
7775
=======================================
7777
(Source file: `nfem/geo_element/hexa.icc')
7782
The hexa reference element is [-1,1]^3.
7791
0---+------3 - | ---> y
7798
Curved high order Pk hexaedrons (k >= 1) in 3d geometries are supported.
7799
These hexaedrons have additional edge-nodes, face-nodes and internal
7802
These nodes are numbered as
7803
---------------------------
7805
first vertex, then edge-node, following the edge numbering order and
7806
orientation, then face-nodes following the face numbering order and
7807
orientation, and finally the face internal nodes, following the
7808
hexaedra lattice. See below for edges and faces numbering and
7826
The orientation is such that triedra (01, 03, 04) is direct and all
7827
faces, see from exterior, are in the direct sens. References: P. L.
7828
Georges, "Generation automatique de maillages", page 24-, coll RMA, 16,
7829
Masson, 1994. Notice that the edge-nodes and face-nodes numbering
7830
slighly differ from those used in the `gmsh' mesh generator when using
7831
high-order elements. This difference is handled by the `msh2geo' mesh
7832
file converter (*note msh2geo command::).
7837
const size_t dimension = 3;
7838
const Float measure = 8;
7839
const size_t n_vertex = 8;
7840
const Float vertex [n_vertex][dimension] = {
7849
const size_t n_face = 6;
7850
const size_t face [n_face][4] = {
7857
const size_t n_edge = 12;
7858
const size_t edge [n_edge][2] = {
7873
File: rheolef.info, Node: tetra internal, Up: Internals
7875
8.7 `tetra' - Tetraedra reference element
7876
=========================================
7878
(Source file: `nfem/geo_element/tetra.icc')
7883
The tetrahedron reference element is
7884
K = { 0 < x < 1 and 0 < y < 1-x and 0 < z < 1-x-y }
7896
0-----------'.--------2 --> y
7904
Curved high order Pk tetrahedra (k >= 1) in 3d geometries are supported.
7905
These tetrahedra have additional edge-nodes, face-nodes and internal
7908
These nodes are numbered as
7909
---------------------------
7911
first vertex, then edge-node, following the edge numbering order and
7912
orientation, then face-nodes following the face numbering order and
7913
orientation, and finally the face internal nodes, following the
7914
tetrahedron lattice. See below for edges and faces numbering and
7922
0--------6--'.--------2
7933
The orientation is such that triedra (01, 02, 03) is direct, and all
7934
faces, see from exterior, are in the direct sens. References: P. L.
7935
Georges, "Generation automatique de maillages", page 24-, coll RMA, 16,
7936
Masson, 1994. Notice that the edge-nodes and face-nodes numbering
7937
slighly differ from those used in the `gmsh' mesh generator when using
7938
high-order elements. This difference is handled by the `msh2geo' mesh
7939
file converter (*note msh2geo command::).
7944
const size_t dimension = 3;
7945
const Float measure = Float(1.)/Float(6.);
7946
const size_t n_vertex = 4;
7947
const Float vertex [n_vertex][dimension] = {
7952
const size_t n_face = 4;
7953
const size_t face [n_face][3] = {
7958
const size_t n_edge = 6;
7959
const size_t edge [n_edge][2] = {
7968
File: rheolef.info, Node: hexahedron internal, Up: Internals
7970
8.8 `hexahedron' - Hexaedron reference element
7971
==============================================
7973
(Source file: `nfem/geo_element/hexahedron.icc')
7978
The hexahedron reference element is [-1,1]^3.
7987
0---+------3 - | ---> y
7994
Curved high order Pk hexaedra (k >= 1) in 3d geometries are supported.
7995
These hexaedra have additional edge-nodes, face-nodes and internal
7998
These nodes are numbered as
7999
---------------------------
8001
first vertex, then edge-node, following the edge numbering order and
8002
orientation, then face-nodes following the face numbering order and
8003
orientation, and finally the face internal nodes, following the
8004
hexaedron lattice. See below for edges and faces numbering and
8022
The orientation is such that triedra (01, 03, 04) is direct and all
8023
faces, see from exterior, are in the direct sens. References: P. L.
8024
Georges, "Generation automatique de maillages", page 24-, coll RMA, 16,
8025
Masson, 1994. Notice that the edge-nodes and face-nodes numbering
8026
slighly differ from those used in the `gmsh' mesh generator when using
8027
high-order elements. This difference is handled by the `msh2geo' mesh
8028
file converter (*note msh2geo command::).
8033
const size_t dimension = 3;
8034
const Float measure = 8;
8035
const size_t n_vertex = 8;
8036
const Float vertex [n_vertex][dimension] = {
8045
const size_t n_face = 6;
8046
const size_t face [n_face][4] = {
8053
const size_t n_edge = 12;
8054
const size_t edge [n_edge][2] = {
8069
File: rheolef.info, Node: hack_array internal, Up: Internals
8071
8.9 hack_array - container in distributed environment
8072
=====================================================
8074
(Source file: `nfem/geo_element/hack_array.h')
8079
STL-like vector container for a distributed memory machine model.
8080
Contrarily to array<T>, here T can have a size only known at compile
8081
time. This class is used when T is a geo_element raw class, i.e.
8082
T=geo_element_e_raw. The size of the geo_element depends upon the oder
8083
and is known only at run-time. For efficiency purpose, the hack_array
8084
allocate all geo_elements of the same variant (e.g. edge) and order in
8085
a contiguous area, since the coreesponding element size is constant.
8090
A sample usage of the class is:
8091
std::pair<size_t,size_t> param (reference_element::t, 3); // triangle, order=3
8092
hack_array<geo_element_raw> x (distributor(100), param);
8093
The hack_array<T> interface is similar to those of the array<T> one.
8098
There are many pre-requises for the template objet type T:
8099
class T : public T::generic_type {
8100
typedef variant_type;
8102
typedef genetic_type;
8103
typedef automatic_type;
8104
static const variant_type _variant;
8105
static size_t _data_size(const parameter_type& param);
8106
static size_t _value_size(const parameter_type& param);
8108
class T::automatic_type : public T::generic_type {
8109
automatic_type (const parameter_type& param);
8111
class T::generic_type {
8114
typedef const_iterator;
8115
iterator _data_begin();
8116
const_iterator _data_begin() const;
8118
ostream& operator<< (ostream&, const T::generic_type&);
8123
template <class T, class A>
8124
class hack_array<T,sequential,A> : public smart_pointer<hack_array_seq_rep<T,A> > {
8129
typedef hack_array_seq_rep<T,A> rep;
8130
typedef smart_pointer<rep> base;
8132
typedef sequential memory_type;
8133
typedef typename rep::size_type size_type;
8134
typedef typename rep::value_type value_type;
8135
typedef typename rep::reference reference;
8136
typedef typename rep::dis_reference dis_reference;
8137
typedef typename rep::iterator iterator;
8138
typedef typename rep::const_reference const_reference;
8139
typedef typename rep::const_iterator const_iterator;
8140
typedef typename rep::parameter_type parameter_type;
8144
hack_array (const A& alloc = A());
8145
hack_array (size_type loc_size, const parameter_type& param, const A& alloc = A());
8146
void resize (const distributor& ownership, const parameter_type& param);
8147
hack_array (const distributor& ownership, const parameter_type& param, const A& alloc = A());
8148
void resize (size_type loc_size, const parameter_type& param);
8150
// local accessors & modifiers:
8152
A get_allocator() const { return base::data().get_allocator(); }
8153
size_type size () const { return base::data().size(); }
8154
size_type dis_size () const { return base::data().dis_size(); }
8155
const distributor& ownership() const { return base::data().ownership(); }
8156
const communicator& comm() const { return ownership().comm(); }
8158
reference operator[] (size_type i) { return base::data().operator[] (i); }
8159
const_reference operator[] (size_type i) const { return base::data().operator[] (i); }
8161
iterator begin() { return base::data().begin(); }
8162
const_iterator begin() const { return base::data().begin(); }
8163
iterator end() { return base::data().end(); }
8164
const_iterator end() const { return base::data().end(); }
8166
// global modifiers (for compatibility with distributed interface):
8168
dis_reference dis_entry (size_type dis_i) { return operator[] (dis_i); }
8169
void dis_entry_assembly() {}
8170
template<class SetOp>
8171
void dis_entry_assembly(SetOp my_set_op) {}
8172
template<class SetOp>
8173
void dis_entry_assembly_begin (SetOp my_set_op) {}
8174
template<class SetOp>
8175
void dis_entry_assembly_end (SetOp my_set_op) {}
8177
// apply a partition:
8180
template<class RepSize>
8181
void repartition ( // old_numbering for *this
8182
const RepSize& partition, // old_ownership
8183
hack_array<T,sequential,A>& new_array, // new_ownership (created)
8184
RepSize& old_numbering, // new_ownership
8185
RepSize& new_numbering) const // old_ownership
8186
{ return base::data().repartition (partition, new_array, old_numbering, new_numbering); }
8188
template<class RepSize>
8189
void permutation_apply ( // old_numbering for *this
8190
const RepSize& new_numbering, // old_ownership
8191
hack_array<T,sequential,A>& new_array) const // new_ownership (already allocated)
8192
{ return base::data().permutation_apply (new_numbering, new_array); }
8197
odiststream& put_values (odiststream& ops) const { return base::data().put_values(ops); }
8198
idiststream& get_values (idiststream& ips) { return base::data().get_values(ips); }
8199
template <class GetFunction>
8200
idiststream& get_values (idiststream& ips, GetFunction get_element) { return base::data().get_values(ips, get_element); }
8201
template <class PutFunction>
8202
odiststream& put_values (odiststream& ops, PutFunction put_element) const { return base::data().put_values(ops, put_element); }
8204
void dump (std::string name) const { return base::data().dump(name); }
8211
template <class T, class A>
8212
class hack_array<T,distributed,A> : public smart_pointer<hack_array_mpi_rep<T,A> > {
8217
typedef hack_array_mpi_rep<T,A> rep;
8218
typedef smart_pointer<rep> base;
8220
typedef distributed memory_type;
8221
typedef typename rep::size_type size_type;
8222
typedef typename rep::value_type value_type;
8223
typedef typename rep::reference reference;
8224
typedef typename rep::dis_reference dis_reference;
8225
typedef typename rep::iterator iterator;
8226
typedef typename rep::parameter_type parameter_type;
8227
typedef typename rep::const_reference const_reference;
8228
typedef typename rep::const_iterator const_iterator;
8229
typedef typename rep::scatter_map_type scatter_map_type;
8233
hack_array (const A& alloc = A());
8234
hack_array (const distributor& ownership, const parameter_type& param, const A& alloc = A());
8235
void resize (const distributor& ownership, const parameter_type& param);
8237
// local accessors & modifiers:
8239
A get_allocator() const { return base::data().get_allocator(); }
8240
size_type size () const { return base::data().size(); }
8241
size_type dis_size () const { return base::data().dis_size(); }
8242
const distributor& ownership() const { return base::data().ownership(); }
8243
const communicator& comm() const { return base::data().comm(); }
8245
reference operator[] (size_type i) { return base::data().operator[] (i); }
8246
const_reference operator[] (size_type i) const { return base::data().operator[] (i); }
8248
iterator begin() { return base::data().begin(); }
8249
const_iterator begin() const { return base::data().begin(); }
8250
iterator end() { return base::data().end(); }
8251
const_iterator end() const { return base::data().end(); }
8255
template<class Set, class Map>
8256
void append_dis_entry (const Set& ext_idx_set, Map& ext_idx_map) const { base::data().append_dis_entry (ext_idx_set, ext_idx_map); }
8258
template<class Set, class Map>
8259
void get_dis_entry (const Set& ext_idx_set, Map& ext_idx_map) const { base::data().get_dis_entry (ext_idx_set, ext_idx_map); }
8262
void append_dis_indexes (const Set& ext_idx_set) { base::data().append_dis_indexes (ext_idx_set); }
8265
void set_dis_indexes (const Set& ext_idx_set) { base::data().set_dis_indexes (ext_idx_set); }
8267
const_reference dis_at (size_type dis_i) const { return base::data().dis_at (dis_i); }
8269
// get all external pairs (dis_i, values):
8270
const scatter_map_type& get_dis_map_entries() const { return base::data().get_dis_map_entries(); }
8272
// global modifiers (for compatibility with distributed interface):
8274
dis_reference dis_entry (size_type dis_i) { return base::data().dis_entry(dis_i); }
8276
void dis_entry_assembly() { return base::data().dis_entry_assembly(); }
8278
template<class SetOp>
8279
void dis_entry_assembly (SetOp my_set_op) { return base::data().dis_entry_assembly (my_set_op); }
8280
template<class SetOp>
8281
void dis_entry_assembly_begin (SetOp my_set_op) { return base::data().dis_entry_assembly_begin (my_set_op); }
8282
template<class SetOp>
8283
void dis_entry_assembly_end (SetOp my_set_op) { return base::data().dis_entry_assembly_end (my_set_op); }
8285
// apply a partition:
8287
template<class RepSize>
8288
void repartition ( // old_numbering for *this
8289
const RepSize& partition, // old_ownership
8290
hack_array<T,distributed>& new_array, // new_ownership (created)
8291
RepSize& old_numbering, // new_ownership
8292
RepSize& new_numbering) const // old_ownership
8293
{ return base::data().repartition (partition.data(), new_array.data(), old_numbering.data(), new_numbering.data()); }
8296
template<class RepSize>
8297
void permutation_apply ( // old_numbering for *this
8298
const RepSize& new_numbering, // old_ownership
8299
hack_array<T,distributed,A>& new_array) const // new_ownership (already allocated)
8300
{ base::data().permutation_apply (new_numbering.data(), new_array.data()); }
8302
void reverse_permutation ( // old_ownership for *this=iold2dis_inew
8303
hack_array<size_type,distributed,A>& inew2dis_iold) const // new_ownership
8304
{ base::data().reverse_permutation (inew2dis_iold.data()); }
8309
odiststream& put_values (odiststream& ops) const { return base::data().put_values(ops); }
8310
idiststream& get_values (idiststream& ips) { return base::data().get_values(ips); }
8312
void dump (std::string name) const { return base::data().dump(name); }
8315
template <class GetFunction>
8316
idiststream& get_values (idiststream& ips, GetFunction get_element)
8317
{ return base::data().get_values(ips, get_element); }
8318
template <class PutFunction>
8319
odiststream& put_values (odiststream& ops, PutFunction put_element) const
8320
{ return base::data().put_values(ops, put_element); }
8322
template <class PutFunction, class Permutation>
8323
odiststream& permuted_put_values (
8325
const Permutation& perm,
8326
PutFunction put_element) const
8327
{ return base::data().permuted_put_values (ops, perm.data(), put_element); }
6807
8331
File: rheolef.info, Node: reference_element internal, Up: Internals
6809
8.7 `reference_element' - reference element
6810
===========================================
8333
8.10 `reference_element' - reference element
8334
============================================
6812
(Source file: `nfem/basis/reference_element.h')
8336
(Source file: `nfem/geo_element/reference_element.h')