2
/*******************************************************
4
* Copyright (c) 2003-2010 by University of Queensland
5
* Earth Systems Science Computational Center (ESSCC)
6
* http://www.uq.edu.au/esscc
8
* Primary Business: Queensland, Australia
9
* Licensed under the Open Software License version 3.0
10
* http://www.opensource.org/licenses/osl-3.0.php
12
*******************************************************/
14
/**************************************************************/
16
/* Dudley: NodeFile : creates the mappings using the indexReducedNodes */
17
/* no distribution is happening */
19
/**************************************************************/
24
/**************************************************************/
26
void Dudley_Mesh_createDOFMappingAndCoupling(Dudley_Mesh * in, bool_t use_reduced_elements)
28
index_t min_DOF, max_DOF, *shared = NULL, *offsetInShared = NULL, *locDOFMask =
29
NULL, i, k, myFirstDOF, myLastDOF, *nodeMask = NULL, firstDOF, lastDOF, *globalDOFIndex, *wanted_DOFs = NULL;
30
dim_t mpiSize, len_loc_dof, numNeighbors, n, lastn, numNodes, *rcv_len = NULL, *snd_len = NULL, count;
31
Esys_MPI_rank myRank, p, p_min, p_max, *neighbor = NULL;
32
Paso_SharedComponents *rcv_shcomp = NULL, *snd_shcomp = NULL;
33
Dudley_NodeMapping *this_mapping = NULL;
34
Paso_Connector *this_connector = NULL;
35
Paso_Distribution *dof_distribution;
36
Esys_MPIInfo *mpi_info = in->MPIInfo;
38
MPI_Request *mpi_requests = NULL;
39
MPI_Status *mpi_stati = NULL;
41
int *mpi_requests = NULL, *mpi_stati = NULL;
44
numNodes = in->Nodes->numNodes;
45
if (use_reduced_elements)
47
dof_distribution = in->Nodes->reducedDegreesOfFreedomDistribution;
48
globalDOFIndex = in->Nodes->globalReducedDOFIndex;
52
dof_distribution = in->Nodes->degreesOfFreedomDistribution;
53
globalDOFIndex = in->Nodes->globalDegreesOfFreedom;
55
myFirstDOF = Paso_Distribution_getFirstComponent(dof_distribution);
56
myLastDOF = Paso_Distribution_getLastComponent(dof_distribution);
58
mpiSize = mpi_info->size;
59
myRank = mpi_info->rank;
61
min_DOF = Dudley_Util_getFlaggedMinInt(1, numNodes, globalDOFIndex, -1);
62
max_DOF = Dudley_Util_getFlaggedMaxInt(1, numNodes, globalDOFIndex, -1);
64
if (max_DOF < min_DOF)
67
max_DOF = myLastDOF - 1;
72
if (max_DOF >= min_DOF)
74
for (p = 0; p < mpiSize; ++p)
76
if (dof_distribution->first_component[p] <= min_DOF)
78
if (dof_distribution->first_component[p] <= max_DOF)
83
len_loc_dof = max_DOF - min_DOF + 1;
84
if (!((min_DOF <= myFirstDOF) && (myLastDOF - 1 <= max_DOF)))
86
Dudley_setError(SYSTEM_ERROR, "Local elements do not span local degrees of freedom.");
89
rcv_len = TMPMEMALLOC(mpiSize, dim_t);
90
snd_len = TMPMEMALLOC(mpiSize, dim_t);
92
mpi_requests = MEMALLOC(mpiSize * 2, MPI_Request);
93
mpi_stati = MEMALLOC(mpiSize * 2, MPI_Status);
95
mpi_requests = MEMALLOC(mpiSize * 2, int);
96
mpi_stati = MEMALLOC(mpiSize * 2, int);
98
wanted_DOFs = TMPMEMALLOC(numNodes, index_t);
99
nodeMask = TMPMEMALLOC(numNodes, index_t);
100
neighbor = TMPMEMALLOC(mpiSize, Esys_MPI_rank);
101
shared = TMPMEMALLOC(numNodes * (p_max - p_min + 1), index_t);
102
offsetInShared = TMPMEMALLOC(mpiSize + 1, index_t);
103
locDOFMask = TMPMEMALLOC(len_loc_dof, index_t);
105
(Dudley_checkPtr(neighbor) || Dudley_checkPtr(shared) || Dudley_checkPtr(offsetInShared)
106
|| Dudley_checkPtr(locDOFMask) || Dudley_checkPtr(nodeMask) || Dudley_checkPtr(rcv_len)
107
|| Dudley_checkPtr(snd_len) || Dudley_checkPtr(mpi_requests) || Dudley_checkPtr(mpi_stati)
108
|| Dudley_checkPtr(mpi_stati)))
111
memset(rcv_len, 0, sizeof(dim_t) * mpiSize);
114
#pragma omp for private(i) schedule(static)
115
for (i = 0; i < len_loc_dof; ++i)
116
locDOFMask[i] = UNUSED;
117
#pragma omp for private(i) schedule(static)
118
for (i = 0; i < numNodes; ++i)
119
nodeMask[i] = UNUSED;
120
#pragma omp for private(i,k) schedule(static)
121
for (i = 0; i < numNodes; ++i)
123
k = globalDOFIndex[i];
126
locDOFMask[k - min_DOF] = UNUSED - 1;
128
if ((k - min_DOF) >= len_loc_dof)
130
printf("BOUNDS_CHECK %s %d i=%d k=%d min_DOF=%d\n", __FILE__, __LINE__, i, k, min_DOF);
137
#pragma omp for private(i) schedule(static)
138
for (i = myFirstDOF - min_DOF; i < myLastDOF - min_DOF; ++i)
140
locDOFMask[i] = i - myFirstDOF + min_DOF;
142
if (i < 0 || i >= len_loc_dof)
144
printf("BOUNDS_CHECK %s %d i=%d\n", __FILE__, __LINE__, i);
154
for (p = p_min; p <= p_max; ++p)
156
firstDOF = MAX(min_DOF, dof_distribution->first_component[p]);
157
lastDOF = MIN(max_DOF + 1, dof_distribution->first_component[p + 1]);
160
for (i = firstDOF - min_DOF; i < lastDOF - min_DOF; ++i)
163
if (i < 0 || i >= len_loc_dof)
165
printf("BOUNDS_CHECK %s %d p=%d i=%d\n", __FILE__, __LINE__, p, i);
169
if (locDOFMask[i] == UNUSED - 1)
171
locDOFMask[i] = myLastDOF - myFirstDOF + n;
172
wanted_DOFs[n] = i + min_DOF;
178
rcv_len[p] = n - lastn;
179
neighbor[numNeighbors] = p;
181
if (numNeighbors < 0 || numNeighbors >= mpiSize + 1)
183
printf("BOUNDS_CHECK %s %d p=%d numNeighbors=%d n=%d\n", __FILE__, __LINE__, p, numNeighbors,
188
offsetInShared[numNeighbors] = lastn;
195
if (numNeighbors < 0 || numNeighbors >= mpiSize + 1)
197
printf("BOUNDS_CHECK %s %d numNeighbors=%d\n", __FILE__, __LINE__, numNeighbors);
201
offsetInShared[numNeighbors] = lastn;
203
/* assign new DOF labels to nodes */
204
#pragma omp parallel for private(i,k) schedule(static)
205
for (i = 0; i < numNodes; ++i)
207
k = globalDOFIndex[i];
209
nodeMask[i] = locDOFMask[k - min_DOF];
212
/* now we can set the mapping from nodes to local DOFs */
213
this_mapping = Dudley_NodeMapping_alloc(numNodes, nodeMask, UNUSED);
214
/* define how to get DOF values for controlled bu other processors */
216
for (i = 0; i < offsetInShared[numNeighbors]; ++i)
218
if (i < 0 || i >= numNodes * (p_max - p_min + 1))
220
printf("BOUNDS_CHECK %s %d i=%d\n", __FILE__, __LINE__, i);
225
#pragma omp parallel for private(i) schedule(static)
226
for (i = 0; i < offsetInShared[numNeighbors]; ++i)
227
shared[i] = myLastDOF - myFirstDOF + i;
230
Paso_SharedComponents_alloc(myLastDOF - myFirstDOF, numNeighbors, neighbor, shared, offsetInShared, 1, 0,
234
* now we build the sender
237
MPI_Alltoall(rcv_len, 1, MPI_INT, snd_len, 1, MPI_INT, mpi_info->comm);
239
for (p = 0; p < mpiSize; ++p)
240
snd_len[p] = rcv_len[p];
243
for (p = 0; p < rcv_shcomp->numNeighbors; p++)
246
MPI_Isend(&(wanted_DOFs[rcv_shcomp->offsetInShared[p]]),
247
rcv_shcomp->offsetInShared[p + 1] - rcv_shcomp->offsetInShared[p], MPI_INT,
248
rcv_shcomp->neighbor[p], mpi_info->msg_tag_counter + myRank, mpi_info->comm,
249
&mpi_requests[count]);
255
for (p = 0; p < mpiSize; p++)
260
MPI_Irecv(&(shared[n]), snd_len[p],
261
MPI_INT, p, mpi_info->msg_tag_counter + p, mpi_info->comm, &mpi_requests[count]);
264
neighbor[numNeighbors] = p;
265
offsetInShared[numNeighbors] = n;
270
mpi_info->msg_tag_counter += mpi_info->size;
271
offsetInShared[numNeighbors] = n;
273
MPI_Waitall(count, mpi_requests, mpi_stati);
275
/* map global ids to local id's */
276
#pragma omp parallel for private(i) schedule(static)
277
for (i = 0; i < offsetInShared[numNeighbors]; ++i)
279
shared[i] = locDOFMask[shared[i] - min_DOF];
283
Paso_SharedComponents_alloc(myLastDOF - myFirstDOF, numNeighbors, neighbor, shared, offsetInShared, 1, 0,
284
dof_distribution->mpi_info);
286
if (Dudley_noError())
287
this_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
288
/* assign new DOF labels to nodes */
289
Paso_SharedComponents_free(rcv_shcomp);
290
Paso_SharedComponents_free(snd_shcomp);
294
TMPMEMFREE(mpi_requests);
295
TMPMEMFREE(mpi_stati);
296
TMPMEMFREE(wanted_DOFs);
297
TMPMEMFREE(nodeMask);
298
TMPMEMFREE(neighbor);
300
TMPMEMFREE(offsetInShared);
301
TMPMEMFREE(locDOFMask);
302
if (Dudley_noError())
304
if (use_reduced_elements)
306
in->Nodes->reducedDegreesOfFreedomMapping = this_mapping;
307
in->Nodes->reducedDegreesOfFreedomConnector = this_connector;
311
in->Nodes->degreesOfFreedomMapping = this_mapping;
312
in->Nodes->degreesOfFreedomConnector = this_connector;
317
Dudley_NodeMapping_free(this_mapping);
318
Paso_Connector_free(this_connector);
323
void Dudley_Mesh_createMappings(Dudley_Mesh * mesh, index_t * dof_distribution, index_t * node_distribution)
326
index_t *maskReducedNodes = NULL, *indexReducedNodes = NULL;
327
dim_t numReducedNodes;
329
maskReducedNodes = TMPMEMALLOC(mesh->Nodes->numNodes, index_t);
330
indexReducedNodes = TMPMEMALLOC(mesh->Nodes->numNodes, index_t);
332
if (!(Dudley_checkPtr(maskReducedNodes) || Dudley_checkPtr(indexReducedNodes)))
334
#pragma omp parallel for private(i) schedule(static)
335
for (i = 0; i < mesh->Nodes->numNodes; ++i)
336
maskReducedNodes[i] = -1;
337
Dudley_Mesh_markNodes(maskReducedNodes, 0, mesh, TRUE);
339
numReducedNodes = Dudley_Util_packMask(mesh->Nodes->numNodes, maskReducedNodes, indexReducedNodes);
340
if (Dudley_noError())
341
Dudley_Mesh_createNodeFileMappings(mesh, numReducedNodes, indexReducedNodes, dof_distribution,
345
TMPMEMFREE(maskReducedNodes);
346
TMPMEMFREE(indexReducedNodes);
349
void Dudley_Mesh_createNodeFileMappings(Dudley_Mesh * in, dim_t numReducedNodes, index_t * indexReducedNodes,
350
index_t * dof_first_component, index_t * nodes_first_component)
353
index_t myFirstDOF, myLastDOF, myFirstNode, myLastNode, *reduced_dof_first_component = NULL, *nodeMask = NULL,
354
*reduced_nodes_first_component = NULL, k, *maskMyReducedDOF = NULL, *indexMyReducedDOF =
355
NULL, *maskMyReducedNodes = NULL, *indexMyReducedNodes = NULL;
356
dim_t myNumDOF, myNumNodes, myNumReducedNodes, myNumReducedDOF, globalNumReducedNodes, globalNumReducedDOF, i,
358
Esys_MPI_rank myRank;
360
mpiSize = in->Nodes->MPIInfo->size;
361
myRank = in->Nodes->MPIInfo->rank;
363
/* mark the nodes used by the reduced mesh */
365
reduced_dof_first_component = TMPMEMALLOC(mpiSize + 1, index_t);
366
reduced_nodes_first_component = TMPMEMALLOC(mpiSize + 1, index_t);
368
if (!(Dudley_checkPtr(reduced_dof_first_component) || Dudley_checkPtr(reduced_nodes_first_component)))
371
myFirstDOF = dof_first_component[myRank];
372
myLastDOF = dof_first_component[myRank + 1];
373
myNumDOF = myLastDOF - myFirstDOF;
375
myFirstNode = nodes_first_component[myRank];
376
myLastNode = nodes_first_component[myRank + 1];
377
myNumNodes = myLastNode - myFirstNode;
379
maskMyReducedDOF = TMPMEMALLOC(myNumDOF, index_t);
380
indexMyReducedDOF = TMPMEMALLOC(myNumDOF, index_t);
381
maskMyReducedNodes = TMPMEMALLOC(myNumNodes, index_t);
382
indexMyReducedNodes = TMPMEMALLOC(myNumNodes, index_t);
385
(Dudley_checkPtr(maskMyReducedDOF) || Dudley_checkPtr(indexMyReducedDOF)
386
|| Dudley_checkPtr(maskMyReducedNodes) || Dudley_checkPtr(indexMyReducedNodes)))
389
#pragma omp parallel private(i)
391
#pragma omp for schedule(static)
392
for (i = 0; i < myNumNodes; ++i)
393
maskMyReducedNodes[i] = -1;
394
#pragma omp for schedule(static)
395
for (i = 0; i < myNumDOF; ++i)
396
maskMyReducedDOF[i] = -1;
397
#pragma omp for private(k) schedule(static)
398
for (i = 0; i < numReducedNodes; ++i)
400
k = in->Nodes->globalNodesIndex[indexReducedNodes[i]];
401
if ((k >= myFirstNode) && (myLastNode > k))
402
maskMyReducedNodes[k - myFirstNode] = i;
403
k = in->Nodes->globalDegreesOfFreedom[indexReducedNodes[i]];
404
if ((k >= myFirstDOF) && (myLastDOF > k))
406
maskMyReducedDOF[k - myFirstDOF] = i;
410
myNumReducedNodes = Dudley_Util_packMask(myNumNodes, maskMyReducedNodes, indexMyReducedNodes);
411
myNumReducedDOF = Dudley_Util_packMask(myNumDOF, maskMyReducedDOF, indexMyReducedDOF);
414
MPI_Allgather(&myNumReducedNodes, 1, MPI_INT, reduced_nodes_first_component, 1, MPI_INT,
415
in->Nodes->MPIInfo->comm);
416
MPI_Allgather(&myNumReducedDOF, 1, MPI_INT, reduced_dof_first_component, 1, MPI_INT,
417
in->Nodes->MPIInfo->comm);
419
reduced_nodes_first_component[0] = myNumReducedNodes;
420
reduced_dof_first_component[0] = myNumReducedDOF;
422
globalNumReducedNodes = 0;
423
globalNumReducedDOF = 0;
424
for (i = 0; i < mpiSize; ++i)
426
k = reduced_nodes_first_component[i];
427
reduced_nodes_first_component[i] = globalNumReducedNodes;
428
globalNumReducedNodes += k;
430
k = reduced_dof_first_component[i];
431
reduced_dof_first_component[i] = globalNumReducedDOF;
432
globalNumReducedDOF += k;
434
reduced_nodes_first_component[mpiSize] = globalNumReducedNodes;
435
reduced_dof_first_component[mpiSize] = globalNumReducedDOF;
436
/* ==== distribution of Nodes =============================== */
437
in->Nodes->nodesDistribution = Paso_Distribution_alloc(in->Nodes->MPIInfo, nodes_first_component, 1, 0);
439
/* ==== distribution of DOFs =============================== */
440
in->Nodes->degreesOfFreedomDistribution =
441
Paso_Distribution_alloc(in->Nodes->MPIInfo, dof_first_component, 1, 0);
443
/* ==== distribution of reduced Nodes =============================== */
444
in->Nodes->reducedNodesDistribution =
445
Paso_Distribution_alloc(in->Nodes->MPIInfo, reduced_nodes_first_component, 1, 0);
447
/* ==== distribution of reduced DOF =============================== */
448
in->Nodes->reducedDegreesOfFreedomDistribution =
449
Paso_Distribution_alloc(in->Nodes->MPIInfo, reduced_dof_first_component, 1, 0);
451
TMPMEMFREE(maskMyReducedDOF);
452
TMPMEMFREE(indexMyReducedDOF);
453
TMPMEMFREE(maskMyReducedNodes);
454
TMPMEMFREE(indexMyReducedNodes);
456
TMPMEMFREE(reduced_dof_first_component);
457
TMPMEMFREE(reduced_nodes_first_component);
459
nodeMask = TMPMEMALLOC(in->Nodes->numNodes, index_t);
460
if (!Dudley_checkPtr(nodeMask) && Dudley_noError())
463
/* ==== nodes mapping which is a dummy structure ======== */
464
#pragma omp parallel for private(i) schedule(static)
465
for (i = 0; i < in->Nodes->numNodes; ++i)
467
in->Nodes->nodesMapping = Dudley_NodeMapping_alloc(in->Nodes->numNodes, nodeMask, UNUSED);
469
/* ==== mapping between nodes and reduced nodes ========== */
470
#pragma omp parallel for private(i) schedule(static)
471
for (i = 0; i < in->Nodes->numNodes; ++i)
472
nodeMask[i] = UNUSED;
473
#pragma omp parallel for private(i) schedule(static)
474
for (i = 0; i < numReducedNodes; ++i)
475
nodeMask[indexReducedNodes[i]] = i;
476
in->Nodes->reducedNodesMapping = Dudley_NodeMapping_alloc(in->Nodes->numNodes, nodeMask, UNUSED);
478
TMPMEMFREE(nodeMask);
479
/* ==== mapping between nodes and DOFs + DOF connector ========== */
480
if (Dudley_noError())
481
Dudley_Mesh_createDOFMappingAndCoupling(in, FALSE);
482
/* ==== mapping between nodes and reduced DOFs + reduced DOF connector ========== */
483
if (Dudley_noError())
484
Dudley_Mesh_createDOFMappingAndCoupling(in, TRUE);
486
/* get the Ids for DOFs and reduced nodes */
487
if (Dudley_noError())
489
#pragma omp parallel private(i)
492
for (i = 0; i < in->Nodes->reducedNodesMapping->numTargets; ++i)
493
in->Nodes->reducedNodesId[i] = in->Nodes->Id[in->Nodes->reducedNodesMapping->map[i]];
495
for (i = 0; i < in->Nodes->degreesOfFreedomMapping->numTargets; ++i)
496
in->Nodes->degreesOfFreedomId[i] = in->Nodes->Id[in->Nodes->degreesOfFreedomMapping->map[i]];
498
for (i = 0; i < in->Nodes->reducedDegreesOfFreedomMapping->numTargets; ++i)
499
in->Nodes->reducedDegreesOfFreedomId[i] =
500
in->Nodes->Id[in->Nodes->reducedDegreesOfFreedomMapping->map[i]];
505
Dudley_NodeMapping_free(in->Nodes->nodesMapping);
506
Dudley_NodeMapping_free(in->Nodes->reducedNodesMapping);
507
Dudley_NodeMapping_free(in->Nodes->degreesOfFreedomMapping);
508
Dudley_NodeMapping_free(in->Nodes->reducedDegreesOfFreedomMapping);
509
Paso_Distribution_free(in->Nodes->nodesDistribution);
510
Paso_Distribution_free(in->Nodes->reducedNodesDistribution);
511
Paso_Distribution_free(in->Nodes->degreesOfFreedomDistribution);
512
Paso_Distribution_free(in->Nodes->reducedDegreesOfFreedomDistribution);
513
Paso_Connector_free(in->Nodes->degreesOfFreedomConnector);
514
Paso_Connector_free(in->Nodes->reducedDegreesOfFreedomConnector);
515
in->Nodes->nodesMapping = NULL;
516
in->Nodes->reducedNodesMapping = NULL;
517
in->Nodes->degreesOfFreedomMapping = NULL;
518
in->Nodes->reducedDegreesOfFreedomMapping = NULL;
519
in->Nodes->nodesDistribution = NULL;
520
in->Nodes->reducedNodesDistribution = NULL;
521
in->Nodes->degreesOfFreedomDistribution = NULL;
522
in->Nodes->reducedDegreesOfFreedomDistribution = NULL;
523
in->Nodes->degreesOfFreedomConnector = NULL;
524
in->Nodes->reducedDegreesOfFreedomConnector = NULL;