~vcs-imports/escript-finley/trunk

« back to all changes in this revision

Viewing changes to dudley/src/ElementFile_distributeByRankOfDOF.c

  • Committer: jfenwick
  • Date: 2010-10-11 01:48:14 UTC
  • Revision ID: svn-v4:77569008-7704-0410-b7a0-a92fef0b09fd:trunk:3259
Merging dudley and scons updates from branches

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
 
 
2
/*******************************************************
 
3
*
 
4
* Copyright (c) 2003-2010 by University of Queensland
 
5
* Earth Systems Science Computational Center (ESSCC)
 
6
* http://www.uq.edu.au/esscc
 
7
*
 
8
* Primary Business: Queensland, Australia
 
9
* Licensed under the Open Software License version 3.0
 
10
* http://www.opensource.org/licenses/osl-3.0.php
 
11
*
 
12
*******************************************************/
 
13
 
 
14
/**************************************************************/
 
15
 
 
16
/*   Dudley: ElementFile: this will redistribute the Elements including overlap by */
 
17
 
 
18
/**************************************************************/
 
19
 
 
20
#include "ElementFile.h"
 
21
#ifdef _OPENMP
 
22
#include <omp.h>
 
23
#endif
 
24
 
 
25
/**************************************************************/
 
26
 
 
27
void Dudley_ElementFile_distributeByRankOfDOF(Dudley_ElementFile * self, Esys_MPI_rank * mpiRankOfDOF, index_t * Id)
 
28
{
 
29
    size_t size_size;
 
30
    Esys_MPI_rank myRank, p, *Owner_buffer = NULL, loc_proc_mask_max;
 
31
    dim_t e, j, i, size, *send_count = NULL, *recv_count = NULL, *newOwner = NULL, *loc_proc_mask =
 
32
        NULL, *loc_send_count = NULL, newNumElements, numElementsInBuffer, numNodes, numRequests, NN;
 
33
    index_t *send_offset = NULL, *recv_offset = NULL, *Id_buffer = NULL, *Tag_buffer = NULL, *Nodes_buffer = NULL, k;
 
34
    bool_t *proc_mask = NULL;
 
35
#ifdef ESYS_MPI
 
36
    MPI_Request *mpi_requests = NULL;
 
37
    MPI_Status *mpi_stati = NULL;
 
38
#endif
 
39
    if (self == NULL)
 
40
        return;
 
41
    myRank = self->MPIInfo->rank;
 
42
    size = self->MPIInfo->size;
 
43
    size_size = size * sizeof(dim_t);
 
44
    numNodes = self->numNodes;
 
45
    NN = self->numNodes;
 
46
    if (size > 1)
 
47
    {
 
48
#ifdef ESYS_MPI
 
49
        mpi_requests = TMPMEMALLOC(8 * size, MPI_Request);
 
50
        mpi_stati = TMPMEMALLOC(8 * size, MPI_Status);
 
51
        Dudley_checkPtr(mpi_requests);
 
52
        Dudley_checkPtr(mpi_stati);
 
53
#endif
 
54
 
 
55
        /* count the number elements that have to be send to each processor (send_count) 
 
56
           and define a new element owner as the processor with the largest number of DOFs and the smallest id */
 
57
        send_count = TMPMEMALLOC(size, dim_t);
 
58
        recv_count = TMPMEMALLOC(size, dim_t);
 
59
        newOwner = TMPMEMALLOC(self->numElements, Esys_MPI_rank);
 
60
        if (!(Dudley_checkPtr(send_count) || Dudley_checkPtr(recv_count) || Dudley_checkPtr(newOwner)))
 
61
        {
 
62
            memset(send_count, 0, size_size);
 
63
#pragma omp parallel private(p,loc_proc_mask,loc_send_count)
 
64
            {
 
65
                loc_proc_mask = THREAD_MEMALLOC(size, dim_t);
 
66
                loc_send_count = THREAD_MEMALLOC(size, dim_t);
 
67
                memset(loc_send_count, 0, size_size);
 
68
#pragma omp for private(e,j,loc_proc_mask_max) schedule(static)
 
69
                for (e = 0; e < self->numElements; e++)
 
70
                {
 
71
                    if (self->Owner[e] == myRank)
 
72
                    {
 
73
                        newOwner[e] = myRank;
 
74
                        memset(loc_proc_mask, 0, size_size);
 
75
                        for (j = 0; j < numNodes; j++)
 
76
                        {
 
77
                            p = mpiRankOfDOF[self->Nodes[INDEX2(j, e, NN)]];
 
78
                            loc_proc_mask[p]++;
 
79
                        }
 
80
                        loc_proc_mask_max = 0;
 
81
                        for (p = 0; p < size; ++p)
 
82
                        {
 
83
                            if (loc_proc_mask[p] > 0)
 
84
                                loc_send_count[p]++;
 
85
                            if (loc_proc_mask[p] > loc_proc_mask_max)
 
86
                            {
 
87
                                newOwner[e] = p;
 
88
                                loc_proc_mask_max = loc_proc_mask[p];
 
89
                            }
 
90
                        }
 
91
                    }
 
92
                    else
 
93
                    {
 
94
                        newOwner[e] = -1;
 
95
                    }
 
96
                }
 
97
#pragma omp critical
 
98
                {
 
99
                    for (p = 0; p < size; ++p)
 
100
                        send_count[p] += loc_send_count[p];
 
101
                }
 
102
                THREAD_MEMFREE(loc_proc_mask);
 
103
                THREAD_MEMFREE(loc_send_count);
 
104
            }
 
105
#ifdef ESYS_MPI
 
106
            MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, self->MPIInfo->comm);
 
107
#else
 
108
            for (p = 0; p < size; ++p)
 
109
                recv_count[p] = send_count[p];
 
110
#endif
 
111
            /* get the new number of elements for this processor */
 
112
            newNumElements = 0;
 
113
            for (p = 0; p < size; ++p)
 
114
                newNumElements += recv_count[p];
 
115
 
 
116
            /* get the new number of elements for this processor */
 
117
            numElementsInBuffer = 0;
 
118
            for (p = 0; p < size; ++p)
 
119
                numElementsInBuffer += send_count[p];
 
120
            /* allocate buffers */
 
121
            Id_buffer = TMPMEMALLOC(numElementsInBuffer, index_t);
 
122
            Tag_buffer = TMPMEMALLOC(numElementsInBuffer, index_t);
 
123
            Owner_buffer = TMPMEMALLOC(numElementsInBuffer, Esys_MPI_rank);
 
124
            Nodes_buffer = TMPMEMALLOC(numElementsInBuffer * NN, index_t);
 
125
            send_offset = TMPMEMALLOC(size, index_t);
 
126
            recv_offset = TMPMEMALLOC(size, index_t);
 
127
            proc_mask = TMPMEMALLOC(size, bool_t);
 
128
            if (!(Dudley_checkPtr(Id_buffer) || Dudley_checkPtr(Tag_buffer) || Dudley_checkPtr(Owner_buffer) ||
 
129
                  Dudley_checkPtr(Nodes_buffer) || Dudley_checkPtr(send_offset) || Dudley_checkPtr(recv_offset) ||
 
130
                  Dudley_checkPtr(proc_mask)))
 
131
            {
 
132
 
 
133
                /* callculate the offsets for the processor buffers */
 
134
                recv_offset[0] = 0;
 
135
                for (p = 0; p < size - 1; ++p)
 
136
                    recv_offset[p + 1] = recv_offset[p] + recv_count[p];
 
137
                send_offset[0] = 0;
 
138
                for (p = 0; p < size - 1; ++p)
 
139
                    send_offset[p + 1] = send_offset[p] + send_count[p];
 
140
 
 
141
                memset(send_count, 0, size_size);
 
142
                /* copy element into buffers. proc_mask makes sure that an element is copied once only for each processor */
 
143
                for (e = 0; e < self->numElements; e++)
 
144
                {
 
145
                    if (self->Owner[e] == myRank)
 
146
                    {
 
147
                        memset(proc_mask, TRUE, size_size);
 
148
                        for (j = 0; j < numNodes; j++)
 
149
                        {
 
150
                            p = mpiRankOfDOF[self->Nodes[INDEX2(j, e, NN)]];
 
151
                            if (proc_mask[p])
 
152
                            {
 
153
                                k = send_offset[p] + send_count[p];
 
154
                                Id_buffer[k] = self->Id[e];
 
155
                                Tag_buffer[k] = self->Tag[e];
 
156
                                Owner_buffer[k] = newOwner[e];
 
157
                                for (i = 0; i < numNodes; i++)
 
158
                                    Nodes_buffer[INDEX2(i, k, NN)] = Id[self->Nodes[INDEX2(i, e, NN)]];
 
159
                                send_count[p]++;
 
160
                                proc_mask[p] = FALSE;
 
161
                            }
 
162
                        }
 
163
                    }
 
164
                }
 
165
                /* allocate new tables */
 
166
                Dudley_ElementFile_allocTable(self, newNumElements);
 
167
 
 
168
                /* start to receive new elements */
 
169
                numRequests = 0;
 
170
                for (p = 0; p < size; ++p)
 
171
                {
 
172
                    if (recv_count[p] > 0)
 
173
                    {
 
174
#ifdef ESYS_MPI
 
175
                        MPI_Irecv(&(self->Id[recv_offset[p]]), recv_count[p],
 
176
                                  MPI_INT, p, self->MPIInfo->msg_tag_counter + myRank,
 
177
                                  self->MPIInfo->comm, &mpi_requests[numRequests]);
 
178
                        numRequests++;
 
179
                        MPI_Irecv(&(self->Tag[recv_offset[p]]), recv_count[p],
 
180
                                  MPI_INT, p, self->MPIInfo->msg_tag_counter + size + myRank,
 
181
                                  self->MPIInfo->comm, &mpi_requests[numRequests]);
 
182
                        numRequests++;
 
183
                        MPI_Irecv(&(self->Owner[recv_offset[p]]), recv_count[p],
 
184
                                  MPI_INT, p, self->MPIInfo->msg_tag_counter + 2 * size + myRank,
 
185
                                  self->MPIInfo->comm, &mpi_requests[numRequests]);
 
186
                        numRequests++;
 
187
                        MPI_Irecv(&(self->Nodes[recv_offset[p] * NN]), recv_count[p] * NN,
 
188
                                  MPI_INT, p, self->MPIInfo->msg_tag_counter + 3 * size + myRank,
 
189
                                  self->MPIInfo->comm, &mpi_requests[numRequests]);
 
190
                        numRequests++;
 
191
#endif
 
192
                    }
 
193
                }
 
194
                /* now the buffers can be send away */
 
195
                for (p = 0; p < size; ++p)
 
196
                {
 
197
                    if (send_count[p] > 0)
 
198
                    {
 
199
#ifdef ESYS_MPI
 
200
                        MPI_Issend(&(Id_buffer[send_offset[p]]), send_count[p],
 
201
                                   MPI_INT, p, self->MPIInfo->msg_tag_counter + p,
 
202
                                   self->MPIInfo->comm, &mpi_requests[numRequests]);
 
203
                        numRequests++;
 
204
                        MPI_Issend(&(Tag_buffer[send_offset[p]]), send_count[p],
 
205
                                   MPI_INT, p, self->MPIInfo->msg_tag_counter + size + p,
 
206
                                   self->MPIInfo->comm, &mpi_requests[numRequests]);
 
207
                        numRequests++;
 
208
                        MPI_Issend(&(Owner_buffer[send_offset[p]]), send_count[p],
 
209
                                   MPI_INT, p, self->MPIInfo->msg_tag_counter + 2 * size + p,
 
210
                                   self->MPIInfo->comm, &mpi_requests[numRequests]);
 
211
                        numRequests++;
 
212
                        MPI_Issend(&(Nodes_buffer[send_offset[p] * NN]), send_count[p] * NN,
 
213
                                   MPI_INT, p, self->MPIInfo->msg_tag_counter + 3 * size + p,
 
214
                                   self->MPIInfo->comm, &mpi_requests[numRequests]);
 
215
                        numRequests++;
 
216
#endif
 
217
 
 
218
                    }
 
219
                }
 
220
                self->MPIInfo->msg_tag_counter += 4 * size;
 
221
                /* wait for the requests to be finalized */
 
222
#ifdef ESYS_MPI
 
223
                MPI_Waitall(numRequests, mpi_requests, mpi_stati);
 
224
#endif
 
225
            }
 
226
            /* clear buffer */
 
227
            TMPMEMFREE(Id_buffer);
 
228
            TMPMEMFREE(Tag_buffer);
 
229
            TMPMEMFREE(Owner_buffer);
 
230
            TMPMEMFREE(Nodes_buffer);
 
231
            TMPMEMFREE(send_offset);
 
232
            TMPMEMFREE(recv_offset);
 
233
            TMPMEMFREE(proc_mask);
 
234
        }
 
235
#ifdef ESYS_MPI
 
236
        TMPMEMFREE(mpi_requests);
 
237
        TMPMEMFREE(mpi_stati);
 
238
#endif
 
239
        TMPMEMFREE(send_count);
 
240
        TMPMEMFREE(recv_count);
 
241
        TMPMEMFREE(newOwner);
 
242
    }
 
243
    else
 
244
    {
 
245
#pragma omp for private(e,i) schedule(static)
 
246
        for (e = 0; e < self->numElements; e++)
 
247
        {
 
248
            self->Owner[e] = myRank;
 
249
            for (i = 0; i < numNodes; i++)
 
250
                self->Nodes[INDEX2(i, e, NN)] = Id[self->Nodes[INDEX2(i, e, NN)]];
 
251
        }
 
252
    }
 
253
    return;
 
254
}