~vcs-imports/escript-finley/trunk

ParMETIS_V3_PartGeomKway(distribution, pattern->ptr, pattern->index, NULL, NULL, &wgtflag, &numflag, &dim, xyz, &ncon, &mpiSize, tpwgts, ubvec, options, &edgecut, partition, /* new CPU ownership of elements */

188

&(in->MPIInfo->comm));

189

/* printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1)); */

190

TMPMEMFREE(ubvec);

191

TMPMEMFREE(tpwgts);

192

}

193

else

194

{

195

for (i = 0; i < myNumVertices; ++i)

196

partition[i] = 0; /* CPU 0 owns it */

197

}

198

#else

199

for (i = 0; i < myNumVertices; ++i)

200

partition[i] = myRank; /* CPU 0 owns it */

201

#endif

202

203

}

204

205

Paso_Pattern_free(pattern);

206

207

/* create a new distributioin and labeling of the DOF */

208

memset(new_distribution, 0, mpiSize_size);

209

#pragma omp parallel private(loc_partition_count)

210

{

211

loc_partition_count = THREAD_MEMALLOC(mpiSize, dim_t);

212

memset(loc_partition_count, 0, mpiSize_size);

213

#pragma omp for private(i)

214

for (i = 0; i < myNumVertices; ++i)

215

loc_partition_count[partition[i]]++;

216

#pragma omp critical

217

{

218

for (i = 0; i < mpiSize; ++i)

219

new_distribution[i] += loc_partition_count[i];

220

}

221

THREAD_MEMFREE(loc_partition_count);

222

}

223

#ifdef ESYS_MPI

224

/* recvbuf will be the concatenation of each CPU's contribution to new_distribution */

225

MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);

226

#else

227

for (i = 0; i < mpiSize; ++i)

228

recvbuf[i] = new_distribution[i];

229

#endif

230

new_distribution[0] = 0;

231

for (rank = 0; rank < mpiSize; rank++)

232

{

233

c = 0;

234

for (i = 0; i < myRank; ++i)

235

c += recvbuf[rank + mpiSize * i];

236

for (i = 0; i < myNumVertices; ++i)

237

{

238

if (rank == partition[i])

239

{

240

newGlobalDOFID[i] = new_distribution[rank] + c;

241

c++;

242

}

243

}

244

for (i = myRank + 1; i < mpiSize; ++i)

245

c += recvbuf[rank + mpiSize * i];

246

new_distribution[rank + 1] = new_distribution[rank] + c;

247

}

248

TMPMEMFREE(recvbuf);

249

250

/* now the overlap needs to be created by sending the partition around */

251

252

dest = Esys_MPIInfo_mod(mpiSize, myRank + 1);

253

source = Esys_MPIInfo_mod(mpiSize, myRank - 1);

254

current_rank = myRank;

255

#pragma omp parallel for private(i)

256

for (i = 0; i < in->Nodes->numNodes; ++i)

257

setNewDOFId[i] = TRUE;

258

259

for (p = 0; p < mpiSize; ++p)

260

{

261

262

firstVertex = distribution[current_rank];

263

lastVertex = distribution[current_rank + 1];

264

#pragma omp parallel for private(i,j,k)

265

for (i = 0; i < in->Nodes->numNodes; ++i)

266

{

267

k = in->Nodes->globalDegreesOfFreedom[i];

268

if (setNewDOFId[i] && (firstVertex <= k) && (k < lastVertex))

269

{

270

in->Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];

271

setNewDOFId[i] = FALSE;

272

}

273

}

274

275

if (p < mpiSize - 1)

276

{ /* the final send can be skipped */

277

#ifdef ESYS_MPI

278

MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_INT,

279

dest, in->MPIInfo->msg_tag_counter,

280

source, in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);

281

#endif

282

in->MPIInfo->msg_tag_counter++;

283

current_rank = Esys_MPIInfo_mod(mpiSize, current_rank - 1);

284

}

285

}

286

for (i = 0; i < mpiSize + 1; ++i)

287

distribution[i] = new_distribution[i];

288

}

289

TMPMEMFREE(index_list);

290

}

291

TMPMEMFREE(newGlobalDOFID);

292

TMPMEMFREE(setNewDOFId);

293

TMPMEMFREE(new_distribution);

294

TMPMEMFREE(partition_count);

295

TMPMEMFREE(partition);

296

TMPMEMFREE(xyz);

297

return;

298

}

Older »