2
/*******************************************************
4
* Copyright (c) 2003-2010 by University of Queensland
5
* Earth Systems Science Computational Center (ESSCC)
6
* http://www.uq.edu.au/esscc
8
* Primary Business: Queensland, Australia
9
* Licensed under the Open Software License version 3.0
10
* http://www.opensource.org/licenses/osl-3.0.php
12
*******************************************************/
14
/**************************************************************/
16
/* Dudley: ElementFile: this will redistribute the Elements including overlap by */
18
/**************************************************************/
20
#include "ElementFile.h"
25
/**************************************************************/
27
void Dudley_ElementFile_distributeByRankOfDOF(Dudley_ElementFile * self, Esys_MPI_rank * mpiRankOfDOF, index_t * Id)
30
Esys_MPI_rank myRank, p, *Owner_buffer = NULL, loc_proc_mask_max;
31
dim_t e, j, i, size, *send_count = NULL, *recv_count = NULL, *newOwner = NULL, *loc_proc_mask =
32
NULL, *loc_send_count = NULL, newNumElements, numElementsInBuffer, numNodes, numRequests, NN;
33
index_t *send_offset = NULL, *recv_offset = NULL, *Id_buffer = NULL, *Tag_buffer = NULL, *Nodes_buffer = NULL, k;
34
bool_t *proc_mask = NULL;
36
MPI_Request *mpi_requests = NULL;
37
MPI_Status *mpi_stati = NULL;
41
myRank = self->MPIInfo->rank;
42
size = self->MPIInfo->size;
43
size_size = size * sizeof(dim_t);
44
numNodes = self->numNodes;
49
mpi_requests = TMPMEMALLOC(8 * size, MPI_Request);
50
mpi_stati = TMPMEMALLOC(8 * size, MPI_Status);
51
Dudley_checkPtr(mpi_requests);
52
Dudley_checkPtr(mpi_stati);
55
/* count the number elements that have to be send to each processor (send_count)
56
and define a new element owner as the processor with the largest number of DOFs and the smallest id */
57
send_count = TMPMEMALLOC(size, dim_t);
58
recv_count = TMPMEMALLOC(size, dim_t);
59
newOwner = TMPMEMALLOC(self->numElements, Esys_MPI_rank);
60
if (!(Dudley_checkPtr(send_count) || Dudley_checkPtr(recv_count) || Dudley_checkPtr(newOwner)))
62
memset(send_count, 0, size_size);
63
#pragma omp parallel private(p,loc_proc_mask,loc_send_count)
65
loc_proc_mask = THREAD_MEMALLOC(size, dim_t);
66
loc_send_count = THREAD_MEMALLOC(size, dim_t);
67
memset(loc_send_count, 0, size_size);
68
#pragma omp for private(e,j,loc_proc_mask_max) schedule(static)
69
for (e = 0; e < self->numElements; e++)
71
if (self->Owner[e] == myRank)
74
memset(loc_proc_mask, 0, size_size);
75
for (j = 0; j < numNodes; j++)
77
p = mpiRankOfDOF[self->Nodes[INDEX2(j, e, NN)]];
80
loc_proc_mask_max = 0;
81
for (p = 0; p < size; ++p)
83
if (loc_proc_mask[p] > 0)
85
if (loc_proc_mask[p] > loc_proc_mask_max)
88
loc_proc_mask_max = loc_proc_mask[p];
99
for (p = 0; p < size; ++p)
100
send_count[p] += loc_send_count[p];
102
THREAD_MEMFREE(loc_proc_mask);
103
THREAD_MEMFREE(loc_send_count);
106
MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, self->MPIInfo->comm);
108
for (p = 0; p < size; ++p)
109
recv_count[p] = send_count[p];
111
/* get the new number of elements for this processor */
113
for (p = 0; p < size; ++p)
114
newNumElements += recv_count[p];
116
/* get the new number of elements for this processor */
117
numElementsInBuffer = 0;
118
for (p = 0; p < size; ++p)
119
numElementsInBuffer += send_count[p];
120
/* allocate buffers */
121
Id_buffer = TMPMEMALLOC(numElementsInBuffer, index_t);
122
Tag_buffer = TMPMEMALLOC(numElementsInBuffer, index_t);
123
Owner_buffer = TMPMEMALLOC(numElementsInBuffer, Esys_MPI_rank);
124
Nodes_buffer = TMPMEMALLOC(numElementsInBuffer * NN, index_t);
125
send_offset = TMPMEMALLOC(size, index_t);
126
recv_offset = TMPMEMALLOC(size, index_t);
127
proc_mask = TMPMEMALLOC(size, bool_t);
128
if (!(Dudley_checkPtr(Id_buffer) || Dudley_checkPtr(Tag_buffer) || Dudley_checkPtr(Owner_buffer) ||
129
Dudley_checkPtr(Nodes_buffer) || Dudley_checkPtr(send_offset) || Dudley_checkPtr(recv_offset) ||
130
Dudley_checkPtr(proc_mask)))
133
/* callculate the offsets for the processor buffers */
135
for (p = 0; p < size - 1; ++p)
136
recv_offset[p + 1] = recv_offset[p] + recv_count[p];
138
for (p = 0; p < size - 1; ++p)
139
send_offset[p + 1] = send_offset[p] + send_count[p];
141
memset(send_count, 0, size_size);
142
/* copy element into buffers. proc_mask makes sure that an element is copied once only for each processor */
143
for (e = 0; e < self->numElements; e++)
145
if (self->Owner[e] == myRank)
147
memset(proc_mask, TRUE, size_size);
148
for (j = 0; j < numNodes; j++)
150
p = mpiRankOfDOF[self->Nodes[INDEX2(j, e, NN)]];
153
k = send_offset[p] + send_count[p];
154
Id_buffer[k] = self->Id[e];
155
Tag_buffer[k] = self->Tag[e];
156
Owner_buffer[k] = newOwner[e];
157
for (i = 0; i < numNodes; i++)
158
Nodes_buffer[INDEX2(i, k, NN)] = Id[self->Nodes[INDEX2(i, e, NN)]];
160
proc_mask[p] = FALSE;
165
/* allocate new tables */
166
Dudley_ElementFile_allocTable(self, newNumElements);
168
/* start to receive new elements */
170
for (p = 0; p < size; ++p)
172
if (recv_count[p] > 0)
175
MPI_Irecv(&(self->Id[recv_offset[p]]), recv_count[p],
176
MPI_INT, p, self->MPIInfo->msg_tag_counter + myRank,
177
self->MPIInfo->comm, &mpi_requests[numRequests]);
179
MPI_Irecv(&(self->Tag[recv_offset[p]]), recv_count[p],
180
MPI_INT, p, self->MPIInfo->msg_tag_counter + size + myRank,
181
self->MPIInfo->comm, &mpi_requests[numRequests]);
183
MPI_Irecv(&(self->Owner[recv_offset[p]]), recv_count[p],
184
MPI_INT, p, self->MPIInfo->msg_tag_counter + 2 * size + myRank,
185
self->MPIInfo->comm, &mpi_requests[numRequests]);
187
MPI_Irecv(&(self->Nodes[recv_offset[p] * NN]), recv_count[p] * NN,
188
MPI_INT, p, self->MPIInfo->msg_tag_counter + 3 * size + myRank,
189
self->MPIInfo->comm, &mpi_requests[numRequests]);
194
/* now the buffers can be send away */
195
for (p = 0; p < size; ++p)
197
if (send_count[p] > 0)
200
MPI_Issend(&(Id_buffer[send_offset[p]]), send_count[p],
201
MPI_INT, p, self->MPIInfo->msg_tag_counter + p,
202
self->MPIInfo->comm, &mpi_requests[numRequests]);
204
MPI_Issend(&(Tag_buffer[send_offset[p]]), send_count[p],
205
MPI_INT, p, self->MPIInfo->msg_tag_counter + size + p,
206
self->MPIInfo->comm, &mpi_requests[numRequests]);
208
MPI_Issend(&(Owner_buffer[send_offset[p]]), send_count[p],
209
MPI_INT, p, self->MPIInfo->msg_tag_counter + 2 * size + p,
210
self->MPIInfo->comm, &mpi_requests[numRequests]);
212
MPI_Issend(&(Nodes_buffer[send_offset[p] * NN]), send_count[p] * NN,
213
MPI_INT, p, self->MPIInfo->msg_tag_counter + 3 * size + p,
214
self->MPIInfo->comm, &mpi_requests[numRequests]);
220
self->MPIInfo->msg_tag_counter += 4 * size;
221
/* wait for the requests to be finalized */
223
MPI_Waitall(numRequests, mpi_requests, mpi_stati);
227
TMPMEMFREE(Id_buffer);
228
TMPMEMFREE(Tag_buffer);
229
TMPMEMFREE(Owner_buffer);
230
TMPMEMFREE(Nodes_buffer);
231
TMPMEMFREE(send_offset);
232
TMPMEMFREE(recv_offset);
233
TMPMEMFREE(proc_mask);
236
TMPMEMFREE(mpi_requests);
237
TMPMEMFREE(mpi_stati);
239
TMPMEMFREE(send_count);
240
TMPMEMFREE(recv_count);
241
TMPMEMFREE(newOwner);
245
#pragma omp for private(e,i) schedule(static)
246
for (e = 0; e < self->numElements; e++)
248
self->Owner[e] = myRank;
249
for (i = 0; i < numNodes; i++)
250
self->Nodes[INDEX2(i, e, NN)] = Id[self->Nodes[INDEX2(i, e, NN)]];