1
/*============================================================================
2
* Manipulation of global indexed list
3
*===========================================================================*/
6
This file is part of Code_Saturne, a general-purpose CFD tool.
8
Copyright (C) 1998-2011 EDF S.A.
10
This program is free software; you can redistribute it and/or modify it under
11
the terms of the GNU General Public License as published by the Free Software
12
Foundation; either version 2 of the License, or (at your option) any later
15
This program is distributed in the hope that it will be useful, but WITHOUT
16
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
20
You should have received a copy of the GNU General Public License along with
21
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
Street, Fifth Floor, Boston, MA 02110-1301, USA.
25
/*----------------------------------------------------------------------------*/
27
#if defined(HAVE_CONFIG_H)
28
#include "cs_config.h"
31
/*----------------------------------------------------------------------------
32
* Standard C library headers
33
*---------------------------------------------------------------------------*/
39
/*----------------------------------------------------------------------------
41
*---------------------------------------------------------------------------*/
45
/*----------------------------------------------------------------------------
47
*---------------------------------------------------------------------------*/
49
#include <fvm_io_num.h>
50
#include <fvm_order.h>
51
#include <fvm_parall.h>
53
/*----------------------------------------------------------------------------
55
*---------------------------------------------------------------------------*/
57
#include "cs_join_util.h"
58
#include "cs_search.h"
61
/*----------------------------------------------------------------------------
62
* Header for the current file
63
*---------------------------------------------------------------------------*/
65
#include "cs_join_set.h"
67
/*---------------------------------------------------------------------------*/
71
/*============================================================================
72
* Macro and type definitions
73
*===========================================================================*/
75
/*============================================================================
76
* Private function definitions
77
*===========================================================================*/
79
/*----------------------------------------------------------------------------
80
* Sort an array "a" and apply the sort to its associated array "b"
83
* Sort is realized using a shell sort (Knuth algorithm).
89
* b <-> associated array
90
*---------------------------------------------------------------------------*/
93
_coupled_adapted_gnum_shellsort(int l,
106
cs_sort_coupled_gnum_shell(l, r, a, b);
108
/* Order b array for each sub-list where a[] is constant */
114
for (i = start; i < r && ref == a[i]; i++);
116
cs_sort_gnum_shell(start, i, b);
123
/*----------------------------------------------------------------------------
124
* Test if an array of local numbers with stride 2 is lexicographically
128
* number <-- array of all entity numbers (number of entity i
129
* given by number[i] or number[list[i] - 1])
130
* nb_ent <-- number of entities considered
133
* 1 if ordered, 0 otherwise.
134
*----------------------------------------------------------------------------*/
137
_order_local_test_s2(const fvm_lnum_t number[],
142
assert (number != NULL || n_elts == 0);
144
for (i = 1 ; i < n_elts ; i++) {
146
cs_bool_t unordered = false;
148
for (k = 0; k < 2; k++) {
149
if (number[i_prev*2 + k] < number[i*2 + k])
151
else if (number[i_prev*2 + k] > number[i*2 + k])
154
if (unordered == true)
158
if (i == n_elts || n_elts == 0)
164
/*----------------------------------------------------------------------------
165
* Descend binary tree for the lexicographical ordering of an local numbering
169
* number <-- pointer to numbers of entities that should be ordered.
170
* level <-- level of the binary tree to descend
171
* n_elts <-- number of entities in the binary tree to descend
172
* order <-> ordering array
173
*----------------------------------------------------------------------------*/
176
_order_descend_tree_s2(const fvm_lnum_t number[],
181
size_t i_save, i1, i2, j, lv_cur;
183
i_save = (size_t)(order[level]);
185
while (level <= (n_elts/2)) {
187
lv_cur = (2*level) + 1;
189
if (lv_cur < n_elts - 1) {
191
i1 = (size_t)(order[lv_cur+1]);
192
i2 = (size_t)(order[lv_cur]);
194
for (j = 0; j < 2; j++) {
195
if (number[i1*2 + j] != number[i2*2 + j])
200
if (number[i1*2 + j] > number[i2*2 + j])
206
if (lv_cur >= n_elts) break;
209
i2 = (size_t)(order[lv_cur]);
211
for (j = 0; j < 2; j++) {
212
if (number[i1*2 + j] != number[i2*2 + j])
217
if (number[i1*2 + j] >= number[i2*2 + j]) break;
219
order[level] = order[lv_cur];
224
order[level] = i_save;
227
/*----------------------------------------------------------------------------
228
* Order array of local numbers with stride 2 lexicographically.
231
* number <-- array of entity numbers
232
* order --> pre-allocated ordering table
233
* n_elts <-- number of entities considered
234
*----------------------------------------------------------------------------*/
237
_order_local_s2(const fvm_lnum_t number[],
244
assert (number != NULL || n_elts == 0);
246
/* Initialize ordering array */
248
for (i = 0 ; i < n_elts ; i++)
254
/* Create binary tree */
259
_order_descend_tree_s2(number, i, n_elts, order);
262
/* Sort binary tree */
264
for (i = n_elts - 1 ; i > 0 ; i--) {
268
_order_descend_tree_s2(number, 0, i, order);
272
/*============================================================================
273
* Public function definitions
274
*===========================================================================*/
276
/*----------------------------------------------------------------------------
277
* Allocate a resizable array.
280
* max_size <-- initial number of elements to allocate
283
* pointer to a new alloacted resizable array
284
*---------------------------------------------------------------------------*/
287
cs_join_rset_create(cs_int_t max_size)
289
cs_join_rset_t *new_set = NULL;
293
BFT_MALLOC(new_set, 1, cs_join_rset_t);
295
new_set->n_max_elts = max_size;
298
BFT_MALLOC(new_set->array, max_size, cs_int_t);
305
/*----------------------------------------------------------------------------
306
* Destroy a cs_join_rset_t structure.
309
* set <-- pointer to pointer to the cs_join_rset_t structure to destroy
310
*---------------------------------------------------------------------------*/
313
cs_join_rset_destroy(cs_join_rset_t **set)
316
BFT_FREE((*set)->array);
321
/*----------------------------------------------------------------------------
322
* Check if we need to resize the current cs_join_rset_t structure and do
326
* set <-- pointer to pointer to the cs_join_rset_t structure to test
327
* test_size <-- target size
328
*---------------------------------------------------------------------------*/
331
cs_join_rset_resize(cs_join_rset_t **set,
338
cs_join_rset_t *_set = *set;
340
if (_set->n_max_elts == 0)
341
_set->n_max_elts = test_size;
342
else if (test_size >= _set->n_max_elts) {
343
while (test_size >= _set->n_max_elts)
344
_set->n_max_elts *= 2; /* Double the list size */
347
BFT_REALLOC(_set->array, _set->n_max_elts, cs_int_t);
348
assert(test_size <= _set->n_max_elts);
354
*set = cs_join_rset_create(test_size);
357
/*----------------------------------------------------------------------------
358
* Create a new cs_join_eset_t structure.
361
* init_size <-- number of initial equivalences to allocate
364
* a pointer to a new cs_join_eset_t structure
365
*---------------------------------------------------------------------------*/
368
cs_join_eset_create(cs_int_t init_size)
370
cs_join_eset_t *new_set = NULL;
372
BFT_MALLOC(new_set, 1, cs_join_eset_t);
374
new_set->n_max_equiv = init_size; /* default value */
375
new_set->n_equiv = 0;
377
BFT_MALLOC(new_set->equiv_couple, 2*new_set->n_max_equiv, cs_int_t);
382
/*----------------------------------------------------------------------------
383
* Check if the requested size if allocated in the structure.
385
* Reallocate cs_join_eset_t structure if necessary.
388
* request_size <-- necessary size
389
* equiv_set <-> pointer to pointer to the cs_join_eset_t struct.
390
*---------------------------------------------------------------------------*/
393
cs_join_eset_check_size(cs_int_t request_size,
394
cs_join_eset_t **equiv_set)
396
cs_join_eset_t *eset = *equiv_set;
399
eset = cs_join_eset_create(request_size);
401
if (request_size + 1 > eset->n_max_equiv) {
403
if (eset->n_max_equiv == 0)
404
eset->n_max_equiv = 2;
406
eset->n_max_equiv *= 2;
408
BFT_REALLOC(eset->equiv_couple, 2*eset->n_max_equiv, cs_int_t);
412
/* Ensure return value is set */
417
/*----------------------------------------------------------------------------
418
* Destroy a cs_join_eset_t structure.
421
* equiv_set <-- pointer to pointer to the structure to destroy
422
*---------------------------------------------------------------------------*/
425
cs_join_eset_destroy(cs_join_eset_t **equiv_set)
427
if (*equiv_set != NULL) {
428
BFT_FREE((*equiv_set)->equiv_couple);
429
BFT_FREE(*equiv_set);
433
/*----------------------------------------------------------------------------
434
* Clean a cs_join_eset_t structure.
436
* If necessary, create a new cs_join_eset_t structure with no redundancy.
439
* eset <-- pointer to pointer to the cs_join_eset_t structure to clean
440
*---------------------------------------------------------------------------*/
443
cs_join_eset_clean(cs_join_eset_t **eset)
446
cs_int_t prev, current;
449
fvm_lnum_t *order = NULL;
450
cs_join_eset_t *new_eset = NULL;
451
cs_join_eset_t *_eset = *eset;
456
if (_eset->n_equiv == 1)
459
BFT_MALLOC(order, _eset->n_equiv, fvm_lnum_t);
461
if (_order_local_test_s2(_eset->equiv_couple,
462
_eset->n_equiv) == false) {
464
/* Order equiv_lst */
466
_order_local_s2(_eset->equiv_couple,
472
for (i = 0; i < _eset->n_equiv; i++)
475
/* Count the number of redundancies */
479
for (i = 1; i < _eset->n_equiv; i++) {
484
if (_eset->equiv_couple[2*prev] == _eset->equiv_couple[2*current])
485
if (_eset->equiv_couple[2*prev+1] == _eset->equiv_couple[2*current+1])
488
} /* End of loop on equivalences */
490
new_eset = cs_join_eset_create(_eset->n_equiv - count);
492
new_eset->n_equiv = _eset->n_equiv - count;
494
if (new_eset->n_equiv > new_eset->n_max_equiv) {
495
new_eset->n_max_equiv = new_eset->n_equiv;
496
BFT_REALLOC(new_eset->equiv_couple, 2*new_eset->n_max_equiv, cs_int_t);
499
if (new_eset->n_equiv > 0) {
501
new_eset->equiv_couple[0] = _eset->equiv_couple[2*order[0]];
502
new_eset->equiv_couple[1] = _eset->equiv_couple[2*order[0]+1];
505
for (i = 1; i < _eset->n_equiv; i++) {
510
if (_eset->equiv_couple[2*prev] != _eset->equiv_couple[2*current]) {
511
new_eset->equiv_couple[2*count] = _eset->equiv_couple[2*current];
512
new_eset->equiv_couple[2*count+1] = _eset->equiv_couple[2*current+1];
517
if (_eset->equiv_couple[2*prev+1] != _eset->equiv_couple[2*current+1]) {
518
new_eset->equiv_couple[2*count] = _eset->equiv_couple[2*current];
519
new_eset->equiv_couple[2*count+1] = _eset->equiv_couple[2*current+1];
525
} /* End of loop on equivalences */
527
assert(count == new_eset->n_equiv);
535
cs_join_eset_destroy(&_eset);
540
/*----------------------------------------------------------------------------
541
* Create a cs_join_gset_t structure (indexed list on global numbering)
544
* n_elts <-- number of elements composing the list
547
* a new allocated pointer to a cs_join_gset_t structure.
548
*---------------------------------------------------------------------------*/
551
cs_join_gset_create(cs_int_t n_elts)
555
cs_join_gset_t *new_set = NULL;
557
BFT_MALLOC(new_set, 1, cs_join_gset_t);
558
BFT_MALLOC(new_set->g_elts, n_elts, fvm_gnum_t);
560
new_set->n_elts = n_elts;
561
new_set->index = NULL;
563
BFT_MALLOC(new_set->index, n_elts + 1, cs_int_t);
565
for (i = 0; i < n_elts + 1; i++)
566
new_set->index[i] = 0;
568
new_set->g_list = NULL;
573
/*----------------------------------------------------------------------------
574
* Build a cs_join_gset_t structure to store all the potential groups
577
* Values in g_elts are the tag values and values in g_list
578
* are position in tag array.
581
* n_elts <-- number of elements in tag array
582
* tag <-- tag array used to define a new cs_join_gset_t
585
* a new allocated cs_join_gset_t structure
586
*---------------------------------------------------------------------------*/
589
cs_join_gset_create_from_tag(cs_int_t n_elts,
590
const fvm_gnum_t tag[])
592
cs_int_t i, n_list_elts;
595
fvm_lnum_t *order = NULL;
596
cs_join_gset_t *set = NULL;
605
BFT_MALLOC(order, n_elts, fvm_lnum_t);
607
fvm_order_local_allocated(NULL, tag, order, n_elts);
609
/* Create a cs_join_gset_t structure to store the initial position of equiv.
612
prev = tag[order[0]];
615
/* Count the number of elements which will compose the set->g_elts */
617
for (i = 1; i < n_elts; i++) {
619
fvm_gnum_t cur = tag[order[i]];
628
set = cs_join_gset_create(n_list_elts);
630
if (n_list_elts > 0) {
635
/* Define the list of elements in set->g_elts and count the number of
636
associated entities */
638
prev = tag[order[0]];
639
set->g_elts[0] = prev;
643
for (i = 1; i < n_elts; i++) {
645
fvm_gnum_t cur = tag[order[i]];
649
set->g_elts[n_list_elts] = cur;
651
set->index[n_list_elts] += 1;
654
set->index[n_list_elts] += 1;
658
/* Build index for the set */
660
for (i = 0; i < set->n_elts; i++)
661
set->index[i+1] += set->index[i];
665
BFT_MALLOC(set->g_list, set->index[set->n_elts], fvm_gnum_t);
668
prev = tag[order[0]];
669
set->g_list[0] = order[0];
671
for (i = 1; i < n_elts; i++) {
673
fvm_lnum_t o_id = order[i];
674
fvm_gnum_t cur = tag[o_id];
680
shift = set->index[n_list_elts];
681
set->g_list[shift] = o_id;
685
shift = count + set->index[n_list_elts];
686
set->g_list[shift] = o_id;
691
} /* End if n_elts > 0 */
697
/* Returns pointers */
702
/*----------------------------------------------------------------------------
703
* Create a new cs_join_gset_t which holds equivalences between elements of
704
* g_list in cs_join_gset_t.
706
* For a subset of equivalences, we store their initial value in the return
707
* cs_join_gset_t structure. A subset is defined if at least two elements
710
* The behavior of this function is near from cs_join_gset_create_from_tag
711
* but we don't store the position in init_array but its value in init_array.
714
* set <-- pointer to a cs_join_gset_t structure
715
* init_array <-- initial values of set->g_list
718
* a new allocated cs_join_gset_t structure
719
*---------------------------------------------------------------------------*/
722
cs_join_gset_create_by_equiv(const cs_join_gset_t *set,
723
const fvm_gnum_t init_array[])
725
cs_int_t i, list_size, n_equiv_grp, count, shift, o_id;
726
fvm_gnum_t prev, cur;
728
cs_int_t save_i = -1;
729
fvm_lnum_t *order = NULL;
730
fvm_gnum_t *couple_list = NULL;
731
cs_join_gset_t *equiv = NULL;
733
if (init_array == NULL)
738
list_size = set->index[set->n_elts];
740
/* Order set->g_list */
742
BFT_MALLOC(order, list_size, fvm_lnum_t);
743
BFT_MALLOC(couple_list, 2*list_size, fvm_gnum_t);
745
for (i = 0; i < list_size; i++) {
746
couple_list[2*i] = set->g_list[i];
747
couple_list[2*i+1] = init_array[i];
750
fvm_order_local_allocated_s(NULL, couple_list, 2, order, list_size);
752
/* Create a cs_join_gset_t structure to store the initial value of equiv.
753
element in set->g_list */
755
/* Count the number of elements which will compose the equiv->g_elts */
758
prev = set->g_list[order[0]];
761
for (i = 1; i < list_size; i++) {
763
cur = set->g_list[order[i]];
777
equiv = cs_join_gset_create(n_equiv_grp);
779
if (n_equiv_grp > 0) {
781
/* Define the list of elements in equiv->g_list and count the number of
782
associated elements */
785
prev = set->g_list[order[0]];
788
for (i = 1; i < list_size; i++) {
790
cur = set->g_list[order[i]];
798
if (count == 1) { /* Add this group */
799
equiv->g_elts[n_equiv_grp] = cur;
801
equiv->index[n_equiv_grp] = 1;
805
equiv->index[n_equiv_grp] += 1;
810
} /* End of loop on list_size */
812
/* Build index for the set */
814
for (i = 0; i < equiv->n_elts; i++)
815
equiv->index[i+1] += equiv->index[i];
819
BFT_MALLOC(equiv->g_list, equiv->index[equiv->n_elts], fvm_gnum_t);
822
prev = set->g_list[order[0]] + 1;
824
for (i = 0; i < list_size; i++) {
827
cur = set->g_list[o_id];
839
shift = count + equiv->index[n_equiv_grp-1];
840
if (cur != init_array[o_id])
841
equiv->g_list[shift] = init_array[o_id];
843
equiv->g_list[shift] = init_array[save_i];
848
} /* End of loop on list_size */
850
} /* End if n_elts > 0 */
854
BFT_FREE(couple_list);
857
/* Returns pointer */
862
/*----------------------------------------------------------------------------
863
* Copy a cs_join_gset_t structure.
866
* src <-- pointer to the cs_join_gset_t structure to copy
869
* a new allocated cs_join_gset_t structure.
870
*---------------------------------------------------------------------------*/
873
cs_join_gset_copy(const cs_join_gset_t *src)
877
cs_join_gset_t *copy = NULL;
882
copy = cs_join_gset_create(src->n_elts);
884
for (i = 0; i < src->n_elts; i++)
885
copy->g_elts[i] = src->g_elts[i];
887
for (i = 0; i < src->n_elts + 1; i++)
888
copy->index[i] = src->index[i];
890
BFT_MALLOC(copy->g_list, copy->index[copy->n_elts], fvm_gnum_t);
892
for (i = 0; i < src->index[src->n_elts]; i++)
893
copy->g_list[i] = src->g_list[i];
898
/*----------------------------------------------------------------------------
899
* Destroy a cs_join_gset_t structure.
902
* set <-- pointer to pointer to the cs_join_gset_t structure to destroy
903
*---------------------------------------------------------------------------*/
906
cs_join_gset_destroy(cs_join_gset_t **set)
909
BFT_FREE((*set)->index);
910
BFT_FREE((*set)->g_elts);
911
BFT_FREE((*set)->g_list);
916
/*----------------------------------------------------------------------------
917
* Sort a cs_join_gset_t structure according to the global numbering of
918
* the g_elts in cs_join_gset_t structure.
921
* set <-> pointer to the structure to order
922
*---------------------------------------------------------------------------*/
925
cs_join_gset_sort_elts(cs_join_gset_t *set)
927
int i, j, k, o_id, shift;
930
cs_int_t *new_index = NULL;
931
fvm_lnum_t *order = NULL;
932
fvm_gnum_t *tmp = NULL, *g_elts = NULL, *g_list = NULL;
937
g_elts = set->g_elts;
938
g_list = set->g_list;
939
n_elts = set->n_elts;
941
BFT_MALLOC(order, n_elts, fvm_lnum_t);
942
BFT_MALLOC(tmp, n_elts, fvm_gnum_t);
943
BFT_MALLOC(new_index, n_elts + 1, cs_int_t);
945
for (i = 0; i < n_elts; i++)
950
fvm_order_local_allocated(NULL, g_elts, order, n_elts);
952
/* Reshape cs_join_gset_t according to the new ordering */
956
for (i = 0; i < n_elts; i++) {
959
g_elts[i] = tmp[o_id];
960
new_index[i+1] = new_index[i] + set->index[o_id+1] - set->index[o_id];
962
} /* End of loop on elements */
964
assert(new_index[n_elts] == set->index[n_elts]);
966
/* Define new g_list */
968
BFT_REALLOC(tmp, set->index[n_elts], fvm_gnum_t);
970
for (i = 0; i < set->index[n_elts]; i++)
973
for (i = 0; i < n_elts; i++) {
976
shift = new_index[i];
978
for (k = 0, j = set->index[o_id]; j < set->index[o_id+1]; j++, k++)
979
g_list[shift + k] = tmp[j];
981
} /* End of loop on elements */
985
BFT_FREE(set->index);
989
/* Return structure */
991
set->index = new_index;
992
set->g_elts = g_elts;
993
set->g_list = g_list;
996
/*----------------------------------------------------------------------------
997
* Sort each sub-list of the g_list array in a cs_join_gset_t structure.
1000
* p_set <-> pointer to the structure to sort
1001
*---------------------------------------------------------------------------*/
1004
cs_join_gset_sort_sublist(cs_join_gset_t *set)
1011
/* Sort each sub-list */
1013
for (i = 0; i < set->n_elts; i++)
1014
cs_sort_gnum_shell(set->index[i], set->index[i+1], set->g_list);
1017
/*----------------------------------------------------------------------------
1018
* Invert a cs_join_gset_t structure.
1021
* set <-- pointer to the cs_join_gset_t structure to work with
1024
* the new allocated and inverted set structure
1025
*---------------------------------------------------------------------------*/
1028
cs_join_gset_invert(const cs_join_gset_t *set)
1030
int i, j, o_id, shift, elt_id;
1031
fvm_gnum_t prev, cur;
1033
cs_int_t list_size = 0, n_elts = 0;
1034
cs_int_t *count = NULL;
1035
fvm_lnum_t *order = NULL;
1036
cs_join_gset_t *invert_set = NULL;
1041
/* Order g_list to count the number of different entities */
1043
list_size = set->index[set->n_elts];
1046
return cs_join_gset_create(list_size);
1048
BFT_MALLOC(order, list_size, fvm_lnum_t);
1050
fvm_order_local_allocated(NULL, set->g_list, order, list_size);
1052
/* Count the number of elements */
1054
prev = set->g_list[order[0]] + 1;
1056
for (i = 0; i < list_size; i++) {
1059
cur = set->g_list[o_id];
1068
invert_set = cs_join_gset_create(n_elts);
1070
/* Fill g_elts for the inverted set */
1073
prev = set->g_list[order[0]] + 1;
1075
for (i = 0; i < list_size; i++) {
1078
cur = set->g_list[o_id];
1082
invert_set->g_elts[n_elts] = cur;
1090
/* Define an index for the inverted set */
1092
for (i = 0; i < set->n_elts; i++) {
1093
for (j = set->index[i]; j < set->index[i+1]; j++) {
1095
elt_id = cs_search_g_binary(invert_set->n_elts,
1097
invert_set->g_elts);
1100
bft_error(__FILE__, __LINE__, 0,
1101
_(" Fail to build an inverted cs_join_gset_t structure.\n"
1102
" Cannot find %u in element list.\n"), set->g_list[j]);
1104
invert_set->index[elt_id+1] += 1;
1107
} /* End of loop on elements of list */
1109
for (i = 0; i < invert_set->n_elts; i++)
1110
invert_set->index[i+1] += invert_set->index[i];
1112
BFT_MALLOC(invert_set->g_list,
1113
invert_set->index[invert_set->n_elts],
1116
/* Define invert_set->g_list */
1118
BFT_MALLOC(count, invert_set->n_elts, cs_int_t);
1120
for (i = 0; i < invert_set->n_elts; i++)
1123
for (i = 0; i < set->n_elts; i++) {
1124
for (j = set->index[i]; j < set->index[i+1]; j++) {
1126
elt_id = cs_search_g_binary(invert_set->n_elts,
1128
invert_set->g_elts);
1130
shift = count[elt_id] + invert_set->index[elt_id];
1131
invert_set->g_list[shift] = set->g_elts[i];
1136
} /* End of loop on elements of list */
1143
/*----------------------------------------------------------------------------
1144
* Delete redudancies in a cs_join_gset_t structure.
1146
* Output set has an ordered sub-list for each element in set.
1149
* set <-> pointer to the structure to clean
1150
*---------------------------------------------------------------------------*/
1153
cs_join_gset_clean(cs_join_gset_t *set)
1155
int i, j, l, r, save, n_elts;
1158
fvm_gnum_t *g_list = NULL;
1163
g_list = set->g_list;
1164
n_elts = set->n_elts;
1166
/* Sort g_list for each element in index */
1168
cs_join_gset_sort_sublist(set);
1170
/* Define a new index without redundant elements */
1172
save = set->index[0];
1174
for (i = 0; i < n_elts; i++) {
1177
r = set->index[i+1];
1181
g_list[shift++] = g_list[l];
1183
for (j = l + 1; j < r; j++)
1184
if (g_list[j] != g_list[j-1])
1185
g_list[shift++] = g_list[j];
1190
set->index[i+1] = shift;
1192
} /* End of loop on elements */
1195
/*----------------------------------------------------------------------------
1196
* Delete redudancies in g_list array of a cs_join_gset_t structure.
1199
* set <-> pointer to the structure to clean
1200
* linked_array <-> array for which redundancies are scanned
1201
*---------------------------------------------------------------------------*/
1204
cs_join_gset_clean_from_array(cs_join_gset_t *set,
1205
fvm_gnum_t linked_array[])
1211
cs_int_t *new_index = NULL;
1212
fvm_gnum_t *g_list = NULL;
1217
if (linked_array == NULL)
1220
g_list = set->g_list;
1221
n_elts = set->n_elts;
1223
/* Sort linked_array and apply change to g_list for each element in index */
1225
for (i = 0; i < n_elts; i++)
1226
_coupled_adapted_gnum_shellsort(set->index[i],
1231
/* Define a new index without redundant elements */
1233
BFT_MALLOC(new_index, n_elts + 1, cs_int_t);
1236
for (i = 0; i < n_elts; i++) {
1239
r = set->index[i+1];
1243
g_list[shift] = g_list[l];
1246
for (j = l + 1; j < r; j++) {
1248
if (linked_array[j] != linked_array[j-1]) {
1249
g_list[shift] = g_list[j];
1252
assert(g_list[shift-1] <= g_list[j]);
1255
new_index[i+1] = shift;
1258
else { /* No match for this element */
1260
new_index[i+1] = new_index[i];
1264
} /* End of loop on elements */
1266
/* Reshape cs_join_gset_t structure */
1268
BFT_REALLOC(g_list, new_index[n_elts], fvm_gnum_t);
1269
BFT_FREE(set->index);
1271
set->index = new_index;
1272
set->g_list = g_list;
1275
/*----------------------------------------------------------------------------
1276
* Concatenate the two g_elts and g_list arrays.
1278
* Order the new concatenated array and delete redundant elements.
1279
* We get a single ordered array.
1282
* set <-- pointer to the structure to work with
1283
* n_elts --> number of elements in the new set
1284
* new_array --> pointer to the new created array
1285
*---------------------------------------------------------------------------*/
1288
cs_join_gset_single_order(const cs_join_gset_t *set,
1290
fvm_gnum_t *new_array[])
1292
cs_int_t _n_elts = 0;
1293
fvm_gnum_t *_new_array = NULL;
1296
*new_array = _new_array;
1298
if (set == NULL) /* Nothing to do */
1301
_n_elts = set->n_elts;
1308
fvm_lnum_t *order = NULL;
1309
fvm_gnum_t *elt_list = NULL;
1311
_n_elts += set->index[_n_elts]; /* Add number of elements in g_list */
1313
BFT_MALLOC(elt_list, _n_elts, fvm_gnum_t);
1315
for (i = 0; i < set->n_elts; i++)
1316
elt_list[i] = set->g_elts[i];
1318
shift = set->n_elts;
1319
for (i = 0; i < set->index[set->n_elts]; i++)
1320
elt_list[shift + i] = set->g_list[i];
1322
/* Define an ordered list of elements */
1324
BFT_MALLOC(_new_array, _n_elts, fvm_gnum_t);
1325
BFT_MALLOC(order, _n_elts, fvm_lnum_t);
1327
fvm_order_local_allocated(NULL, elt_list, order, _n_elts);
1329
for (i = 0; i < _n_elts; i++)
1330
_new_array[i] = elt_list[order[i]];
1332
/* Delete redundant elements */
1335
prev = _new_array[0] + 1;
1337
for (i = 0; i < _n_elts; i++) {
1339
if (prev != _new_array[i]) {
1341
_new_array[shift] = _new_array[i];
1342
prev = _new_array[i];
1348
_n_elts = shift; /* Real number of elements without redundancy */
1350
/* Memory management */
1354
BFT_REALLOC(_new_array, _n_elts, fvm_gnum_t);
1356
} /* End if n_elts > 0 */
1358
/* Set return pointers */
1361
*new_array = _new_array;
1364
/*----------------------------------------------------------------------------
1365
* Compress a g_list such as for each element "e" in g_elts:
1366
* - there is no redundancy for the linked elements of set->g_list
1367
* - there is no element in set->g_list < e except if this element is not
1370
* g_list and g_elts must be ordered before calling this function
1373
* set <-> pointer to the structure to work with
1374
*---------------------------------------------------------------------------*/
1377
cs_join_gset_compress(cs_join_gset_t *set)
1379
cs_int_t i, j, start, end, save, shift;
1385
if (set->n_elts == 0)
1389
save = set->index[0];
1391
for (i = 0; i < set->n_elts; i++) {
1393
cur = set->g_elts[i];
1395
end = set->index[i+1];
1397
if (end - start > 0) {
1399
/* Sub-lists must be ordered */
1401
if (cur < set->g_list[start])
1402
set->g_list[shift++] = set->g_list[start];
1403
else if (cur > set->g_list[start]) {
1405
int id = cs_search_g_binary(i+1,
1409
if (id == -1) /* Not found. Keep it. */
1410
set->g_list[shift++] = set->g_list[start];
1414
for (j = start + 1; j < end; j++) {
1416
if (cur < set->g_list[j]) {
1417
if (set->g_list[j-1] != set->g_list[j])
1418
set->g_list[shift++] = set->g_list[j];
1420
else if (cur > set->g_list[j]) {
1422
int id = cs_search_g_binary(i+1,
1426
if (id == -1) /* Not found. Keep it. */
1427
set->g_list[shift++] = set->g_list[j];
1431
} /* End of loop on sub-elements */
1433
} /* end - start > 0 */
1436
set->index[i+1] = shift;
1438
} /* End of loop on elements in g_elts */
1440
/* Reshape cs_join_gset_t structure if necessary */
1442
if (save != set->index[set->n_elts]) {
1443
assert(save > set->index[set->n_elts]);
1444
BFT_REALLOC(set->g_list, set->index[set->n_elts], fvm_gnum_t);
1447
#if 0 && defined(DEBUG) && !defined(NDEBUG)
1448
cs_join_gset_dump(NULL, set);
1452
/*----------------------------------------------------------------------------
1453
* Delete redundancies in set->g_elts.
1455
* Merge sub-arrays associated to a common set->g_elts[i].
1458
* set <-- pointer to the structure to work with
1459
* order_tag <-- 0: set->g_elts is not ordered, 1: ordered
1460
*---------------------------------------------------------------------------*/
1463
cs_join_gset_merge_elts(cs_join_gset_t *set,
1466
cs_int_t i, save, start, end, n_init_elts, n_sub_elts;
1467
fvm_gnum_t prev, cur;
1472
n_init_elts = set->n_elts;
1474
if (n_init_elts < 2)
1477
assert(order_tag == 0 || order_tag == 1);
1479
/* Delete redundancies in g_elts. Merge sub-lists associated to common
1483
cs_join_gset_sort_elts(set);
1485
set->n_elts = 0; /* Reset and will be redefinied */
1486
prev = set->g_elts[0] + 1; /* Force prev to be different from g_elts[0] */
1487
save = set->index[0];
1489
for (i = 0; i < n_init_elts; i++) {
1491
cur = set->g_elts[i];
1493
end = set->index[i+1];
1495
n_sub_elts = end - start;
1499
set->g_elts[set->n_elts] = cur;
1501
set->index[set->n_elts] = n_sub_elts;
1507
set->index[set->n_elts] += n_sub_elts;
1509
} /* prev != next */
1511
} /* Loop on elements of set->g_elts */
1513
/* Get the new index */
1515
for (i = 0; i < set->n_elts; i++)
1516
set->index[i+1] += set->index[i];
1518
/* Reshape cs_join_gset_t structure if necessary */
1520
if (n_init_elts != set->n_elts) {
1522
assert(n_init_elts > set->n_elts);
1524
BFT_REALLOC(set->g_elts, set->n_elts, fvm_gnum_t);
1525
BFT_REALLOC(set->index, set->n_elts + 1, cs_int_t);
1526
BFT_REALLOC(set->g_list, set->index[set->n_elts], fvm_gnum_t);
1530
#if 0 && defined(DEBUG) && !defined(NDEBUG)
1531
cs_join_gset_dump(NULL, set);
1535
#if defined(HAVE_MPI)
1537
/*----------------------------------------------------------------------------
1538
* Synchronize a cs_join_gset_t structure and distribute the resulting set
1539
* over the rank using a round-robin distribution. Elements in sync_set
1540
* are ordered and there is no redundancy but list may have redundancies.
1541
* Use cs_join_gset_clean() to remove redundancies in g_list.
1544
* loc_set <-> pointer to the local structure to work with
1545
* comm <-- mpi_comm on which synchro. and distribution take place
1548
* a synchronized and distributed cs_join_gset_t structure.
1549
*---------------------------------------------------------------------------*/
1552
cs_join_gset_robin_sync(cs_join_gset_t *loc_set,
1555
int i, j, shift, elt_id;
1556
int rank, local_rank, n_ranks, n_recv_elts, n_sub_elts;
1559
cs_int_t *send_count = NULL, *recv_count = NULL;
1560
cs_int_t *send_shift = NULL, *recv_shift = NULL;
1561
fvm_gnum_t *send_buffer = NULL, *recv_buffer = NULL;
1562
cs_join_gset_t *sync_set = NULL;
1564
MPI_Comm_rank(comm, &local_rank);
1565
MPI_Comm_size(comm, &n_ranks);
1567
/* Allocate parameters for MPI functions */
1569
BFT_MALLOC(send_count, n_ranks, cs_int_t);
1570
BFT_MALLOC(recv_count, n_ranks, cs_int_t);
1571
BFT_MALLOC(send_shift, n_ranks + 1, cs_int_t);
1572
BFT_MALLOC(recv_shift, n_ranks + 1, cs_int_t);
1574
/* Initialization */
1576
for (i = 0; i < n_ranks; i++)
1579
/* Synchronize list definition for each global element */
1581
for (i = 0; i < loc_set->n_elts; i++) {
1582
rank = (loc_set->g_elts[i] - 1) % n_ranks;
1583
send_count[rank] += 1;
1586
MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1591
for (rank = 0; rank < n_ranks; rank++) {
1592
send_shift[rank + 1] = send_shift[rank] + send_count[rank];
1593
recv_shift[rank + 1] = recv_shift[rank] + recv_count[rank];
1596
n_recv_elts = recv_shift[n_ranks];
1598
/* Define sync_set: a distributed cs_join_gset_t structure which
1599
synchronize data over the ranks */
1601
sync_set = cs_join_gset_create(n_recv_elts);
1603
/* Synchronize list definition for each global element */
1605
for (i = 0; i < n_ranks; i++)
1608
for (i = 0; i < loc_set->n_elts; i++) {
1610
rank = (loc_set->g_elts[i] - 1) % n_ranks;
1611
n_sub_elts = loc_set->index[i+1] - loc_set->index[i];
1612
send_count[rank] += 2 + n_sub_elts;
1616
MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1621
for (rank = 0; rank < n_ranks; rank++) {
1622
send_shift[rank + 1] = send_shift[rank] + send_count[rank];
1623
recv_shift[rank + 1] = recv_shift[rank] + recv_count[rank];
1626
/* Fill send_buffer: global number and number of elements in index */
1628
BFT_MALLOC(send_buffer, send_shift[n_ranks], fvm_gnum_t);
1629
BFT_MALLOC(recv_buffer, recv_shift[n_ranks], fvm_gnum_t);
1631
for (i = 0; i < n_ranks; i++)
1634
for (i = 0; i < loc_set->n_elts; i++) {
1636
gnum = loc_set->g_elts[i];
1637
rank = (gnum - 1) % n_ranks;
1638
shift = send_shift[rank] + send_count[rank];
1639
n_sub_elts = loc_set->index[i+1] - loc_set->index[i];
1641
send_buffer[shift++] = gnum;
1642
send_buffer[shift++] = n_sub_elts;
1644
for (j = 0; j < n_sub_elts; j++)
1645
send_buffer[shift + j] = loc_set->g_list[loc_set->index[i] + j];
1647
send_count[rank] += 2 + n_sub_elts;
1651
MPI_Alltoallv(send_buffer, send_count, send_shift, FVM_MPI_GNUM,
1652
recv_buffer, recv_count, recv_shift, FVM_MPI_GNUM,
1655
n_recv_elts = recv_shift[n_ranks];
1657
/* Partial free memory */
1659
BFT_FREE(send_buffer);
1660
BFT_FREE(send_count);
1661
BFT_FREE(send_shift);
1662
BFT_FREE(recv_count);
1663
BFT_FREE(recv_shift);
1665
/* Fill sync_set->index and define a new recv_count for the next comm. */
1667
i = 0; /* position in recv_buffer */
1668
elt_id = 0; /* position of the element in sync_set */
1670
while (i < n_recv_elts) {
1672
sync_set->g_elts[elt_id] = recv_buffer[i++];
1673
n_sub_elts = recv_buffer[i++];
1674
sync_set->index[elt_id+1] = n_sub_elts;
1678
} /* End of loop on ranks */
1680
/* Build index on elements of sync_set */
1682
for (i = 0; i < sync_set->n_elts; i++)
1683
sync_set->index[i+1] += sync_set->index[i];
1685
BFT_MALLOC(sync_set->g_list,
1686
sync_set->index[sync_set->n_elts],
1689
/* Fill g_list of sync_set */
1691
i = 0; /* position in recv_buffer */
1692
elt_id = 0; /* position of the element in sync_set */
1694
while (i < n_recv_elts) {
1696
i++; /* element numbering */
1697
shift = sync_set->index[elt_id];
1698
n_sub_elts = recv_buffer[i++];
1700
for (j = 0; j < n_sub_elts; j++)
1701
sync_set->g_list[j + shift] = recv_buffer[i++];
1705
} /* End of loop on ranks */
1707
BFT_FREE(recv_buffer);
1709
/* Return pointer */
1711
cs_join_gset_merge_elts(sync_set, 0); /* sync_set elts are not ordered */
1716
/*----------------------------------------------------------------------------
1717
* Update a local cs_join_gset_t structure from a distributed and
1718
* synchronized cs_join_gset_t structure. Round-robin distribution is used
1719
* to store synchronized elements.
1721
* loc_set should not have redundant elements.
1724
* sync_set <-- pointer to the structure which holds a synchronized block
1725
* loc_set <-> pointer to a local structure holding elements to update
1726
* comm <-- comm on which synchronization and distribution take place
1727
*---------------------------------------------------------------------------*/
1730
cs_join_gset_robin_update(const cs_join_gset_t *sync_set,
1731
cs_join_gset_t *loc_set,
1734
int i, j, k, shift, elt_id, start, end;
1735
int rank, local_rank, n_ranks, n_sub_elts, n_recv_elts;
1738
cs_int_t *send_count = NULL, *recv_count = NULL;
1739
cs_int_t *send_shift = NULL, *recv_shift = NULL, *wanted_rank_index = NULL;
1740
fvm_gnum_t *send_buffer = NULL, *recv_buffer = NULL, *wanted_elts = NULL;
1744
assert(sync_set != NULL);
1746
/* Build a cs_join_block_info_t structure */
1748
MPI_Comm_rank(comm, &local_rank);
1749
MPI_Comm_size(comm, &n_ranks);
1751
/* Allocate parameters for MPI functions */
1753
BFT_MALLOC(send_count, n_ranks, cs_int_t);
1754
BFT_MALLOC(recv_count, n_ranks, cs_int_t);
1755
BFT_MALLOC(send_shift, n_ranks + 1, cs_int_t);
1756
BFT_MALLOC(recv_shift, n_ranks + 1, cs_int_t);
1757
BFT_MALLOC(wanted_rank_index, n_ranks + 1, cs_int_t);
1759
/* Initialization */
1761
for (i = 0; i < n_ranks; i++)
1764
/* Get a synchronized list definition for each global element */
1766
for (i = 0; i < loc_set->n_elts; i++) {
1767
rank = (loc_set->g_elts[i] - 1) % n_ranks;
1768
send_count[rank] += 1;
1771
MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1774
wanted_rank_index[0] = 0;
1776
for (rank = 0; rank < n_ranks; rank++) {
1777
send_shift[rank + 1] = send_shift[rank] + send_count[rank];
1778
wanted_rank_index[rank + 1] = wanted_rank_index[rank] + recv_count[rank];
1781
/* Fill send_buffer: global number */
1783
BFT_MALLOC(send_buffer, send_shift[n_ranks], fvm_gnum_t);
1784
BFT_MALLOC(wanted_elts, wanted_rank_index[n_ranks], fvm_gnum_t);
1786
for (i = 0; i < n_ranks; i++)
1789
for (i = 0; i < loc_set->n_elts; i++) {
1791
gnum = loc_set->g_elts[i];
1792
rank = (gnum - 1) % n_ranks;
1793
shift = send_shift[rank] + send_count[rank];
1795
send_buffer[shift] = gnum;
1796
send_count[rank] += 1;
1800
MPI_Alltoallv(send_buffer, send_count, send_shift, FVM_MPI_GNUM,
1801
wanted_elts, recv_count, wanted_rank_index, FVM_MPI_GNUM,
1804
/* Send new list definition holding by sync_set to ranks which have
1807
for (i = 0; i < n_ranks; i++)
1810
for (rank = 0; rank < n_ranks; rank++) {
1812
for (i = wanted_rank_index[rank]; i < wanted_rank_index[rank+1]; i++) {
1814
elt_id = cs_search_g_binary(sync_set->n_elts,
1818
assert(elt_id != -1);
1820
wanted_elts[i] = elt_id;
1821
n_sub_elts = sync_set->index[elt_id+1] - sync_set->index[elt_id];
1822
send_count[rank] += 2 + n_sub_elts; /* glob. num,
1828
} /* End of loop on ranks */
1830
MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1835
for (rank = 0; rank < n_ranks; rank++) {
1836
send_shift[rank + 1] = send_shift[rank] + send_count[rank];
1837
recv_shift[rank + 1] = recv_shift[rank] + recv_count[rank];
1840
BFT_REALLOC(send_buffer, send_shift[n_ranks], fvm_gnum_t);
1841
BFT_MALLOC(recv_buffer, recv_shift[n_ranks], fvm_gnum_t);
1843
for (i = 0; i < n_ranks; i++)
1846
for (rank = 0; rank < n_ranks; rank++) {
1848
for (i = wanted_rank_index[rank]; i < wanted_rank_index[rank+1]; i++) {
1850
shift = send_shift[rank] + send_count[rank];
1851
elt_id = wanted_elts[i];
1853
start = sync_set->index[elt_id];
1854
end = sync_set->index[elt_id+1];
1855
n_sub_elts = end - start;
1857
send_buffer[shift++] = sync_set->g_elts[elt_id];
1858
send_buffer[shift++] = n_sub_elts;
1860
for (j = start, k = 0; j < end; j++, k++)
1861
send_buffer[shift+k] = sync_set->g_list[j];
1863
send_count[rank] += 2 + n_sub_elts; /* glob. num, n_sub_elts, g_list */
1867
} /* End of loop on ranks */
1869
MPI_Alltoallv(send_buffer, send_count, send_shift, FVM_MPI_GNUM,
1870
recv_buffer, recv_count, recv_shift, FVM_MPI_GNUM,
1873
n_recv_elts = recv_shift[n_ranks];
1875
/* Partial memory free */
1877
BFT_FREE(send_buffer);
1878
BFT_FREE(send_count);
1879
BFT_FREE(send_shift);
1880
BFT_FREE(recv_count);
1881
BFT_FREE(recv_shift);
1883
/* Re-initialize loc_set
1884
As loc_set->g_elts and sync_set->g_elts are ordered, it's easier.
1885
We can take values as they come */
1887
/* First redefine index */
1889
assert(loc_set->index[0] == 0);
1891
for (i = 0; i < loc_set->n_elts; i++)
1892
loc_set->index[i+1] = 0;
1894
i = 0; /* id in recv_buffer */
1895
j = 0; /* id in g_elts */
1897
while (i < n_recv_elts) {
1899
gnum = recv_buffer[i++];
1901
assert(loc_set->g_elts[j] = gnum);
1903
n_sub_elts = recv_buffer[i++];
1904
loc_set->index[j+1] = n_sub_elts;
1907
j++; /* go to the next elements */
1909
} /* End of loop on elements of recv_buffer */
1911
/* Define the new index */
1913
for (i = 0; i < loc_set->n_elts; i++)
1914
loc_set->index[i+1] += loc_set->index[i];
1916
BFT_REALLOC(loc_set->g_list, loc_set->index[loc_set->n_elts], fvm_gnum_t);
1918
i = 0; /* id in recv_buffer */
1919
j = 0; /* id in g_elts */
1921
while (i < n_recv_elts) {
1923
gnum = recv_buffer[i++];
1924
n_sub_elts = recv_buffer[i++];
1926
assert(loc_set->g_elts[j] = gnum);
1928
for (k = 0; k < n_sub_elts; k++)
1929
loc_set->g_list[loc_set->index[j] + k] = recv_buffer[i + k];
1932
j++; /* go to the next elements */
1934
} /* End of loop on elements of recv_buffer */
1936
BFT_FREE(recv_buffer);
1937
BFT_FREE(wanted_elts);
1938
BFT_FREE(wanted_rank_index);
1942
/*----------------------------------------------------------------------------
1943
* Synchronize a cs_join_gset_t structure and distribute the resulting set
1944
* over the rank by block
1947
* max_gnum <-- max global number in global element numbering
1948
* loc_set <-> pointer to the local structure to work with
1949
* comm <-- mpi_comm on which synchro. and distribution take place
1952
* a synchronized and distributed cs_join_gset_t structure.
1953
*---------------------------------------------------------------------------*/
1956
cs_join_gset_block_sync(fvm_gnum_t max_gnum,
1957
cs_join_gset_t *loc_set,
1960
int i, j, shift, block_id;
1961
int rank, local_rank, n_ranks, n_recv_elts, n_sub_elts;
1963
cs_join_block_info_t block_info;
1965
cs_int_t *send_count = NULL, *recv_count = NULL, *counter = NULL;
1966
cs_int_t *send_shift = NULL, *recv_shift = NULL;
1967
fvm_gnum_t *send_buffer = NULL, *recv_buffer = NULL;
1968
cs_join_gset_t *sync_set = NULL;
1973
MPI_Comm_rank(comm, &local_rank);
1974
MPI_Comm_size(comm, &n_ranks);
1976
block_info = cs_join_get_block_info(max_gnum, n_ranks, local_rank);
1978
/* Allocate parameters for MPI functions */
1980
BFT_MALLOC(send_count, n_ranks, cs_int_t);
1981
BFT_MALLOC(recv_count, n_ranks, cs_int_t);
1982
BFT_MALLOC(send_shift, n_ranks + 1, cs_int_t);
1983
BFT_MALLOC(recv_shift, n_ranks + 1, cs_int_t);
1985
/* Initialization */
1987
for (i = 0; i < n_ranks; i++)
1990
/* Synchronize list definition for each global element */
1992
for (i = 0; i < loc_set->n_elts; i++) {
1994
rank = (loc_set->g_elts[i] - 1)/block_info.size;
1995
n_sub_elts = loc_set->index[i+1] - loc_set->index[i];
1996
send_count[rank] += 2 + n_sub_elts;
2000
MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
2005
for (rank = 0; rank < n_ranks; rank++) {
2006
send_shift[rank + 1] = send_shift[rank] + send_count[rank];
2007
recv_shift[rank + 1] = recv_shift[rank] + recv_count[rank];
2010
/* Fill send_buffer: global number and number of elements in index */
2012
BFT_MALLOC(send_buffer, send_shift[n_ranks], fvm_gnum_t);
2013
BFT_MALLOC(recv_buffer, recv_shift[n_ranks], fvm_gnum_t);
2015
for (i = 0; i < n_ranks; i++)
2018
for (i = 0; i < loc_set->n_elts; i++) {
2020
gnum = loc_set->g_elts[i];
2021
rank = (gnum - 1)/block_info.size;
2022
shift = send_shift[rank] + send_count[rank];
2023
n_sub_elts = loc_set->index[i+1] - loc_set->index[i];
2025
send_buffer[shift++] = gnum;
2026
send_buffer[shift++] = n_sub_elts;
2028
for (j = 0; j < n_sub_elts; j++)
2029
send_buffer[shift + j] = loc_set->g_list[loc_set->index[i] + j];
2031
send_count[rank] += 2 + n_sub_elts;
2035
MPI_Alltoallv(send_buffer, send_count, send_shift, FVM_MPI_GNUM,
2036
recv_buffer, recv_count, recv_shift, FVM_MPI_GNUM,
2039
n_recv_elts = recv_shift[n_ranks];
2041
/* Partial free memory */
2043
BFT_FREE(send_buffer);
2044
BFT_FREE(send_count);
2045
BFT_FREE(send_shift);
2046
BFT_FREE(recv_count);
2047
BFT_FREE(recv_shift);
2049
/* Define sync_set: a distributed cs_join_gset_t structure which
2050
synchronize data over the ranks */
2052
sync_set = cs_join_gset_create(block_info.local_size);
2054
for (i = 0; i < sync_set->n_elts; i++)
2055
sync_set->g_elts[i] = block_info.first_gnum + i;
2057
/* Fill sync_set->index and define a new recv_count for the next comm. */
2059
i = 0; /* position in recv_buffer */
2061
while (i < n_recv_elts) {
2063
block_id = recv_buffer[i++] - block_info.first_gnum;
2065
assert(sync_set->g_elts[block_id] == recv_buffer[i-1]);
2067
n_sub_elts = recv_buffer[i++];
2068
sync_set->index[block_id+1] += n_sub_elts;
2071
} /* End of loop on ranks */
2073
/* Build index on elements of sync_set */
2075
for (i = 0; i < sync_set->n_elts; i++)
2076
sync_set->index[i+1] += sync_set->index[i];
2078
BFT_MALLOC(sync_set->g_list,
2079
sync_set->index[sync_set->n_elts],
2082
/* Fill g_list of sync_set */
2084
BFT_MALLOC(counter, sync_set->n_elts, cs_int_t);
2086
for (i = 0; i < sync_set->n_elts; i++)
2089
i = 0; /* position in recv_buffer */
2091
while (i < n_recv_elts) {
2093
block_id = recv_buffer[i++] - block_info.first_gnum;
2094
shift = sync_set->index[block_id] + counter[block_id];
2096
n_sub_elts = recv_buffer[i++];
2098
for (j = 0; j < n_sub_elts; j++)
2099
sync_set->g_list[j + shift] = recv_buffer[i++];
2101
counter[block_id] += n_sub_elts;
2103
} /* End of loop on ranks */
2105
BFT_FREE(recv_buffer);
2108
/* Return pointer */
2110
cs_join_gset_clean(sync_set);
2115
/*----------------------------------------------------------------------------
2116
* Update a local cs_join_gset_t structure from a distributed and
2117
* synchronized cs_join_gset_t structure.
2119
* Loc_set should not have redundant elements.
2122
* max_gnum <-- max global number in global element numbering
2123
* sync_set <-- pointer to the structure which holds a synchronized block
2124
* loc_set <-> pointer to a local structure holding elements to update
2125
* comm <-- comm on which synchronization and distribution take place
2126
*---------------------------------------------------------------------------*/
2129
cs_join_gset_block_update(fvm_gnum_t max_gnum,
2130
const cs_join_gset_t *sync_set,
2131
cs_join_gset_t *loc_set,
2134
int i, j, k, shift, block_id, start, end;
2135
int rank, local_rank, n_ranks, n_sub_elts, n_recv_elts;
2137
cs_join_block_info_t block_info;
2139
cs_int_t *send_count = NULL, *recv_count = NULL;
2140
cs_int_t *send_shift = NULL, *recv_shift = NULL, *wanted_rank_index = NULL;
2141
fvm_gnum_t *send_buffer = NULL, *recv_buffer = NULL, *wanted_elts = NULL;
2148
assert(sync_set != NULL);
2150
/* Build a cs_join_block_info_t structure */
2152
MPI_Comm_rank(comm, &local_rank);
2153
MPI_Comm_size(comm, &n_ranks);
2155
block_info = cs_join_get_block_info(max_gnum, n_ranks, local_rank);
2157
/* Allocate parameters for MPI functions */
2159
BFT_MALLOC(send_count, n_ranks, cs_int_t);
2160
BFT_MALLOC(recv_count, n_ranks, cs_int_t);
2161
BFT_MALLOC(send_shift, n_ranks + 1, cs_int_t);
2162
BFT_MALLOC(recv_shift, n_ranks + 1, cs_int_t);
2163
BFT_MALLOC(wanted_rank_index, n_ranks + 1, cs_int_t);
2165
/* Initialization */
2167
for (i = 0; i < n_ranks; i++)
2170
/* Get a synchronized list definition for each global element */
2172
for (i = 0; i < loc_set->n_elts; i++) {
2173
rank = (loc_set->g_elts[i] - 1)/block_info.size;
2174
send_count[rank] += 1;
2177
MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
2180
wanted_rank_index[0] = 0;
2182
for (rank = 0; rank < n_ranks; rank++) {
2183
send_shift[rank + 1] = send_shift[rank] + send_count[rank];
2184
wanted_rank_index[rank + 1] = wanted_rank_index[rank] + recv_count[rank];
2187
/* Fill send_buffer: global number */
2189
BFT_MALLOC(send_buffer, send_shift[n_ranks], fvm_gnum_t);
2190
BFT_MALLOC(wanted_elts, wanted_rank_index[n_ranks], fvm_gnum_t);
2192
for (i = 0; i < n_ranks; i++)
2195
for (i = 0; i < loc_set->n_elts; i++) {
2197
gnum = loc_set->g_elts[i];
2198
rank = (gnum - 1)/block_info.size;
2199
shift = send_shift[rank] + send_count[rank];
2201
send_buffer[shift] = gnum;
2202
send_count[rank] += 1;
2206
MPI_Alltoallv(send_buffer, send_count, send_shift, FVM_MPI_GNUM,
2207
wanted_elts, recv_count, wanted_rank_index, FVM_MPI_GNUM,
2210
/* Send new list definition holding by sync_set to ranks which have
2213
for (i = 0; i < n_ranks; i++)
2216
for (rank = 0; rank < n_ranks; rank++) {
2218
for (i = wanted_rank_index[rank]; i < wanted_rank_index[rank+1]; i++) {
2220
block_id = wanted_elts[i] - block_info.first_gnum;
2221
n_sub_elts = sync_set->index[block_id+1] - sync_set->index[block_id];
2223
send_count[rank] += 2 + n_sub_elts; /* glob. num,
2229
} /* End of loop on ranks */
2231
MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
2236
for (rank = 0; rank < n_ranks; rank++) {
2237
send_shift[rank + 1] = send_shift[rank] + send_count[rank];
2238
recv_shift[rank + 1] = recv_shift[rank] + recv_count[rank];
2241
BFT_REALLOC(send_buffer, send_shift[n_ranks], fvm_gnum_t);
2242
BFT_MALLOC(recv_buffer, recv_shift[n_ranks], fvm_gnum_t);
2244
for (i = 0; i < n_ranks; i++)
2247
for (rank = 0; rank < n_ranks; rank++) {
2249
for (i = wanted_rank_index[rank]; i < wanted_rank_index[rank+1]; i++) {
2251
shift = send_shift[rank] + send_count[rank];
2252
block_id = wanted_elts[i] - block_info.first_gnum;
2254
start = sync_set->index[block_id];
2255
end = sync_set->index[block_id+1];
2256
n_sub_elts = end - start;
2258
send_buffer[shift++] = wanted_elts[i];
2259
send_buffer[shift++] = n_sub_elts;
2261
for (j = start, k = 0; j < end; j++, k++)
2262
send_buffer[shift+k] = sync_set->g_list[j];
2264
send_count[rank] += 2 + n_sub_elts; /* glob. num, n_sub_elts, g_list */
2268
} /* End of loop on ranks */
2270
MPI_Alltoallv(send_buffer, send_count, send_shift, FVM_MPI_GNUM,
2271
recv_buffer, recv_count, recv_shift, FVM_MPI_GNUM,
2274
n_recv_elts = recv_shift[n_ranks];
2276
/* Partial free memory */
2278
BFT_FREE(send_buffer);
2279
BFT_FREE(send_count);
2280
BFT_FREE(send_shift);
2281
BFT_FREE(recv_count);
2282
BFT_FREE(recv_shift);
2284
/* Re-initialize loc_set
2285
As loc_set->g_elts and sync_set->g_elts are ordered, it's easier.
2286
We can take values as they come */
2288
/* First redefine index */
2290
assert(loc_set->index[0] == 0);
2292
for (i = 0; i < loc_set->n_elts; i++)
2293
loc_set->index[i+1] = 0;
2295
i = 0; /* id in recv_buffer */
2296
j = 0; /* id in g_elts */
2298
while (i < n_recv_elts) {
2300
gnum = recv_buffer[i++];
2302
assert(loc_set->g_elts[j] = gnum);
2304
n_sub_elts = recv_buffer[i++];
2305
loc_set->index[j+1] = n_sub_elts;
2308
j++; /* go to the next elements */
2310
} /* End of loop on elements of recv_buffer */
2312
/* Define the new index */
2314
for (i = 0; i < loc_set->n_elts; i++)
2315
loc_set->index[i+1] += loc_set->index[i];
2317
BFT_REALLOC(loc_set->g_list, loc_set->index[loc_set->n_elts], fvm_gnum_t);
2319
i = 0; /* id in recv_buffer */
2320
j = 0; /* id in g_elts */
2322
while (i < n_recv_elts) {
2324
gnum = recv_buffer[i++];
2325
n_sub_elts = recv_buffer[i++];
2327
assert(loc_set->g_elts[j] = gnum);
2329
for (k = 0; k < n_sub_elts; k++)
2330
loc_set->g_list[loc_set->index[j] + k] = recv_buffer[i + k];
2333
j++; /* go to the next elements */
2335
} /* End of loop on elements of recv_buffer */
2337
BFT_FREE(recv_buffer);
2338
BFT_FREE(wanted_elts);
2339
BFT_FREE(wanted_rank_index);
2342
#endif /* HAVE_MPI */
2344
/*----------------------------------------------------------------------------
2345
* Dump an array (int or double).
2347
* This function is called according to the verbosity.
2350
* f <-- handle to output file
2351
* type <-- type of the array to display
2352
* header <-- header to display in front of the array
2353
* n_elts <-- number of elements to display
2354
* array <-- array to display
2355
*---------------------------------------------------------------------------*/
2358
cs_join_dump_array(FILE *f,
2366
fprintf(f, " %s: ", header);
2368
if (!strncmp(type, "int", strlen("int"))) { /* "int" array */
2370
const int *i_array = array;
2372
for (i = 0; i < n_elts; i++)
2373
fprintf(f, " %8d", i_array[i]);
2376
else if (!strncmp(type, "bool", strlen("bool"))) { /* "boolean" array */
2378
const cs_bool_t *b_array = array;
2380
for (i = 0; i < n_elts; i++)
2381
if (b_array[i] == true)
2384
assert(b_array[i] == false);
2388
else if (!strncmp(type, "double", strlen("double"))) { /* "double" array */
2390
const double *d_array = array;
2392
for (i = 0; i < n_elts; i++)
2393
fprintf(f, " %10.8e", d_array[i]);
2396
else if (!strncmp(type, "gnum", strlen("gnum"))) { /* "gnum" array */
2398
const fvm_gnum_t *u_array = array;
2400
for (i = 0; i < n_elts; i++)
2401
fprintf(f, " %9llu", (unsigned long long)u_array[i]);
2405
bft_error(__FILE__, __LINE__, 0,
2406
_(" Unexpected type (%s) to display in the current dump.\n"),
2412
/*----------------------------------------------------------------------------
2413
* Dump a cs_join_gset_t structure.
2416
* f <-- handle to output file
2417
* set <-- pointer to the cs_join_gset_t structure to dump
2418
*---------------------------------------------------------------------------*/
2421
cs_join_gset_dump(FILE *f,
2422
const cs_join_gset_t *set)
2432
fprintf(f, "\nDump cs_join_gset_t structure: %p\n", (const void *)set);
2433
fprintf(f, "number of elements: %10d\n", set->n_elts);
2434
fprintf(f, "size of the list : %10d\n\n", set->index[set->n_elts]);
2436
for (i = 0; i < set->n_elts; i++) {
2438
int s = set->index[i], e = set->index[i+1];
2439
int n_matches = e-s;
2440
int n_loops = n_matches/10;
2442
fprintf(f, "Global num: %8llu | subsize: %3d |",
2443
(unsigned long long)set->g_elts[i], n_matches);
2445
for (j = 0; j < n_loops; j++) {
2448
"%8llu %8llu %8llu %8llu %8llu "
2449
"%8llu %8llu %8llu %8llu %8llu\n",
2450
(unsigned long long)set->g_list[s+ 10*j],
2451
(unsigned long long)set->g_list[s+ 10*j + 1],
2452
(unsigned long long)set->g_list[s+ 10*j + 2],
2453
(unsigned long long)set->g_list[s+ 10*j + 3],
2454
(unsigned long long)set->g_list[s+ 10*j + 4],
2455
(unsigned long long)set->g_list[s+ 10*j + 5],
2456
(unsigned long long)set->g_list[s+ 10*j + 6],
2457
(unsigned long long)set->g_list[s+ 10*j + 7],
2458
(unsigned long long)set->g_list[s+ 10*j + 8],
2459
(unsigned long long)set->g_list[s+ 10*j + 9]);
2462
"%8llu %8llu %8llu %8llu %8llu "
2463
"%8llu %8llu %8llu %8llu %8llu\n",
2464
(unsigned long long)set->g_list[s+ 10*j],
2465
(unsigned long long)set->g_list[s+ 10*j + 1],
2466
(unsigned long long)set->g_list[s+ 10*j + 2],
2467
(unsigned long long)set->g_list[s+ 10*j + 3],
2468
(unsigned long long)set->g_list[s+ 10*j + 4],
2469
(unsigned long long)set->g_list[s+ 10*j + 5],
2470
(unsigned long long)set->g_list[s+ 10*j + 6],
2471
(unsigned long long)set->g_list[s+ 10*j + 7],
2472
(unsigned long long)set->g_list[s+ 10*j + 8],
2473
(unsigned long long)set->g_list[s+ 10*j + 9]);
2476
if (e - s+10*n_loops > 0) {
2477
for (j = s + 10*n_loops; j < e; j++) {
2478
if (j == s + 10*n_loops && n_loops > 0)
2480
fprintf(f, "%8llu ", (unsigned long long)set->g_list[j]);
2488
} /* End of loop on boxes */
2493
/*---------------------------------------------------------------------------*/