~ubuntu-branches/ubuntu/vivid/emscripten/vivid

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.

2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.

3. This notice may not be removed or altered from any source distribution.

#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"

#include "vectormath/vmInclude.h"

#include <stdio.h> //@todo: remove the debugging printf at some stage

#include "btSoftBodySolver_OpenCLSIMDAware.h"

#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"

#include "BulletSoftBody/btSoftBody.h"

#include "BulletCollision/CollisionShapes/btCapsuleShape.h"

#include <limits.h>

#define WAVEFRONT_SIZE 32

#define WAVEFRONT_BLOCK_MULTIPLIER 2

#define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)

#define LINKS_PER_SIMD_LANE 16

static const size_t workGroupSize = GROUP_SIZE;

//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it

#if (0)//CL_VERSION_1_1 == 1)

//OpenCL 1.1 kernels use float3

#define MSTRINGIFY(A) #A

static const char* UpdatePositionsFromVelocitiesCLString =

#include "OpenCLC/UpdatePositionsFromVelocities.cl"

static const char* SolvePositionsCLString =

#include "OpenCLC/SolvePositionsSIMDBatched.cl"

static const char* UpdateNodesCLString =

#include "OpenCLC/UpdateNodes.cl"

static const char* UpdatePositionsCLString =

#include "OpenCLC/UpdatePositions.cl"

static const char* UpdateConstantsCLString =

#include "OpenCLC/UpdateConstants.cl"

static const char* IntegrateCLString =

#include "OpenCLC/Integrate.cl"

static const char* ApplyForcesCLString =

#include "OpenCLC/ApplyForces.cl"

static const char* UpdateNormalsCLString =

#include "OpenCLC/UpdateNormals.cl"

static const char* VSolveLinksCLString =

#include "OpenCLC/VSolveLinks.cl"

static const char* SolveCollisionsAndUpdateVelocitiesCLString =

#include "OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"

static const char* OutputToVertexArrayCLString =

#include "OpenCLC/OutputToVertexArray.cl"

#else

////OpenCL 1.0 kernels don't use float3

#define MSTRINGIFY(A) #A

static const char* UpdatePositionsFromVelocitiesCLString =

#include "OpenCLC10/UpdatePositionsFromVelocities.cl"

static const char* SolvePositionsCLString =

#include "OpenCLC10/SolvePositionsSIMDBatched.cl"

static const char* UpdateNodesCLString =

#include "OpenCLC10/UpdateNodes.cl"

static const char* UpdatePositionsCLString =

#include "OpenCLC10/UpdatePositions.cl"

static const char* UpdateConstantsCLString =

#include "OpenCLC10/UpdateConstants.cl"

static const char* IntegrateCLString =

#include "OpenCLC10/Integrate.cl"

static const char* ApplyForcesCLString =

#include "OpenCLC10/ApplyForces.cl"

static const char* UpdateNormalsCLString =

#include "OpenCLC10/UpdateNormals.cl"

static const char* VSolveLinksCLString =

#include "OpenCLC10/VSolveLinks.cl"

static const char* SolveCollisionsAndUpdateVelocitiesCLString =

#include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"

static const char* OutputToVertexArrayCLString =

#include "OpenCLC10/OutputToVertexArray.cl"

#endif //CL_VERSION_1_1

btSoftBodyLinkDataOpenCLSIMDAware::btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx) :

m_cqCommandQue(queue),

m_wavefrontSize( WAVEFRONT_SIZE ),

m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),

m_maxBatchesWithinWave( 0 ),

m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),

m_numWavefronts( 0 ),

m_maxVertex( 0 ),

m_clNumBatchesAndVerticesWithinWaves( queue, ctx, &m_numBatchesAndVerticesWithinWaves, true ),

m_clWavefrontVerticesGlobalAddresses( queue, ctx, &m_wavefrontVerticesGlobalAddresses, true ),

100

m_clLinkVerticesLocalAddresses( queue, ctx, &m_linkVerticesLocalAddresses, true ),

101

m_clLinkStrength( queue, ctx, &m_linkStrength, false ),

102

m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),

103

m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),

104

m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),

105

m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )

106

{

107

}

108

109

btSoftBodyLinkDataOpenCLSIMDAware::~btSoftBodyLinkDataOpenCLSIMDAware()

110

{

111

}

112

113

static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )

114

{

115

Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );

116

return outVec;

117

}

118

119

/** Allocate enough space in all link-related arrays to fit numLinks links */

120

void btSoftBodyLinkDataOpenCLSIMDAware::createLinks( int numLinks )

121

{

122

int previousSize = m_links.size();

123

int newSize = previousSize + numLinks;

124

125

btSoftBodyLinkData::createLinks( numLinks );

126

127

// Resize the link addresses array as well

128

m_linkAddresses.resize( newSize );

129

}

130

131

/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */

132

void btSoftBodyLinkDataOpenCLSIMDAware::setLinkAt(

133

const LinkDescription &link,

134

int linkIndex )

135

{

136

btSoftBodyLinkData::setLinkAt( link, linkIndex );

137

138

if( link.getVertex0() > m_maxVertex )

139

m_maxVertex = link.getVertex0();

140

if( link.getVertex1() > m_maxVertex )

141

m_maxVertex = link.getVertex1();

142

143

// Set the link index correctly for initialisation

144

m_linkAddresses[linkIndex] = linkIndex;

145

}

146

147

bool btSoftBodyLinkDataOpenCLSIMDAware::onAccelerator()

148

{

149

return m_onGPU;

150

}

151

152

bool btSoftBodyLinkDataOpenCLSIMDAware::moveToAccelerator()

153

{

154

bool success = true;

155

success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();

156

success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();

157

success = success && m_clLinkVerticesLocalAddresses.moveToGPU();

158

success = success && m_clLinkStrength.moveToGPU();

159

success = success && m_clLinksMassLSC.moveToGPU();

160

success = success && m_clLinksRestLengthSquared.moveToGPU();

161

success = success && m_clLinksRestLength.moveToGPU();

162

success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveToGPU();

163

164

if( success ) {

165

m_onGPU = true;

166

}

167

168

return success;

169

}

170

171

bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator()

172

{

173

bool success = true;

174

success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();

175

success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();

176

success = success && m_clLinkVerticesLocalAddresses.moveToGPU();

177

success = success && m_clLinkStrength.moveFromGPU();

178

success = success && m_clLinksMassLSC.moveFromGPU();

179

success = success && m_clLinksRestLengthSquared.moveFromGPU();

180

success = success && m_clLinksRestLength.moveFromGPU();

181

success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveFromGPU();

182

183

if( success ) {

184

m_onGPU = false;

185

}

186

187

return success;

188

}

189

190

191

192

193

194

195

196

197

btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx) :

198

btOpenCLSoftBodySolver( queue, ctx ),

199

m_linkData(queue, ctx)

200

{

201

// Initial we will clearly need to update solver constants

202

// For now this is global for the cloths linked with this solver - we should probably make this body specific

203

// for performance in future once we understand more clearly when constants need to be updated

204

m_updateSolverConstants = true;

205

206

m_shadersInitialized = false;

207

}

208

209

btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware()

210

{

211

releaseKernels();

212

}

213

214

void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate)

215

{

216

if( forceUpdate|| m_softBodySet.size() != softBodies.size() )

217

{

218

// Have a change in the soft body set so update, reloading all the data

219

getVertexData().clear();

220

getTriangleData().clear();

221

getLinkData().clear();

222

m_softBodySet.resize(0);

223

224

225

for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )

226

{

227

btSoftBody *softBody = softBodies[ softBodyIndex ];

228

using Vectormath::Aos::Matrix3;

229

using Vectormath::Aos::Point3;

230

231

// Create SoftBody that will store the information within the solver

232

btOpenCLAcceleratedSoftBodyInterface* newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );

233

m_softBodySet.push_back( newSoftBody );

234

235

m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );

236

m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);

237

m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );

238

m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );

239

m_perClothDragFactor.push_back( softBody->m_cfg.kDG );

240

m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);

241

242

243

m_perClothFriction.push_back( softBody->getFriction() );

244

m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );

245

246

// Add space for new vertices and triangles in the default solver for now

247

// TODO: Include space here for tearing too later

248

int firstVertex = getVertexData().getNumVertices();

249

int numVertices = softBody->m_nodes.size();

250

// Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group

251

// maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed

252

int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);

253

// Allocate space for new vertices in all the vertex arrays

254

getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );

255

256

int firstTriangle = getTriangleData().getNumTriangles();

257

int numTriangles = softBody->m_faces.size();

258

int maxTriangles = numTriangles;

259

getTriangleData().createTriangles( maxTriangles );

260

261

// Copy vertices from softbody into the solver

262

for( int vertex = 0; vertex < numVertices; ++vertex )

263

{

264

Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());

265

btSoftBodyVertexData::VertexDescription desc;

266

267

// TODO: Position in the softbody might be pre-transformed

268

// or we may need to adapt for the pose.

269

//desc.setPosition( cloth.getMeshTransform()*multPoint );

270

desc.setPosition( multPoint );

271

272

float vertexInverseMass = softBody->m_nodes[vertex].m_im;

273

desc.setInverseMass(vertexInverseMass);

274

getVertexData().setVertexAt( desc, firstVertex + vertex );

275

}

276

277

// Copy triangles similarly

278

// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene

279

for( int triangle = 0; triangle < numTriangles; ++triangle )

280

{

281

// Note that large array storage is relative to the array not to the cloth

282

// So we need to add firstVertex to each value

283

int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));

284

int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));

285

int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));

286

btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);

287

getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );

288

289

// Increase vertex triangle counts for this triangle

290

getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;

291

getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;

292

getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;

293

}

294

295

int firstLink = getLinkData().getNumLinks();

296

int numLinks = softBody->m_links.size();

297

int maxLinks = numLinks;

298

299

// Allocate space for the links

300

getLinkData().createLinks( numLinks );

301

302

// Add the links

303

for( int link = 0; link < numLinks; ++link )

304

{

305

int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);

306

int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);

307

308

btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);

309

newLink.setLinkStrength(1.f);

310

getLinkData().setLinkAt(newLink, firstLink + link);

311

}

312

313

newSoftBody->setFirstVertex( firstVertex );

314

newSoftBody->setFirstTriangle( firstTriangle );

315

newSoftBody->setNumVertices( numVertices );

316

newSoftBody->setMaxVertices( maxVertices );

317

newSoftBody->setNumTriangles( numTriangles );

318

newSoftBody->setMaxTriangles( maxTriangles );

319

newSoftBody->setFirstLink( firstLink );

320

newSoftBody->setNumLinks( numLinks );

321

}

322

323

324

325

updateConstants(0.f);

326

327

328

m_linkData.generateBatches();

329

m_triangleData.generateBatches();

330

331

332

// Build the shaders to match the batching parameters

333

buildShaders();

334

}

335

}

336

337

338

btSoftBodyLinkData &btOpenCLSoftBodySolverSIMDAware::getLinkData()

339

{

340

// TODO: Consider setting link data to "changed" here

341

return m_linkData;

342

}

343

344

345

346

347

void btOpenCLSoftBodySolverSIMDAware::updateConstants( float timeStep )

348

{

349

350

using namespace Vectormath::Aos;

351

352

if( m_updateSolverConstants )

353

{

354

m_updateSolverConstants = false;

355

356

// Will have to redo this if we change the structure (tear, maybe) or various other possible changes

357

358

// Initialise link constants

359

const int numLinks = m_linkData.getNumLinks();

360

for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )

361

{

362

btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );

363

m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));

364

float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);

365

float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);

366

float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);

367

float massLSC = (invMass0 + invMass1)/linearStiffness;

368

m_linkData.getMassLSC(linkIndex) = massLSC;

369

float restLength = m_linkData.getRestLength(linkIndex);

370

float restLengthSquared = restLength*restLength;

371

m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;

372

}

373

}

374

375

}

376

377

378

379

void btOpenCLSoftBodySolverSIMDAware::solveConstraints( float solverdt )

380

{

381

382

using Vectormath::Aos::Vector3;

383

using Vectormath::Aos::Point3;

384

using Vectormath::Aos::lengthSqr;

385

using Vectormath::Aos::dot;

386

387

// Prepare links

388

int numLinks = m_linkData.getNumLinks();

389

int numVertices = m_vertexData.getNumVertices();

390

391

float kst = 1.f;

392

float ti = 0.f;

393

394

395

m_clPerClothDampingFactor.moveToGPU();

396

m_clPerClothVelocityCorrectionCoefficient.moveToGPU();

397

398

399

// Ensure data is on accelerator

400

m_linkData.moveToAccelerator();

401

m_vertexData.moveToAccelerator();

402

403

404

//prepareLinks();

405

406

prepareCollisionConstraints();

407

408

// Solve drift

409

for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )

410

{

411

412

for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )

413

{

414

int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;

415

int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;

416

solveLinksForPosition( startWave, numWaves, kst, ti );

417

}

418

} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )

419

420

421

// At this point assume that the force array is blank - we will overwrite it

422

solveCollisionsAndUpdateVelocities( 1.f/solverdt );

423

}

424

425

426

//////////////////////////////////////

427

// Kernel dispatches

428

429

430

void btOpenCLSoftBodySolverSIMDAware::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )

431

{

432

cl_int ciErrNum;

433

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,0, sizeof(int), &startWave);

434

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,1, sizeof(int), &numWaves);

435

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,2, sizeof(float), &kst);

436

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,3, sizeof(float), &ti);

437

438

439

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clNumBatchesAndVerticesWithinWaves.m_buffer);

440

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clWavefrontVerticesGlobalAddresses.m_buffer);

441

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinkVerticesLocalAddresses.m_buffer);

442

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);

443

444

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);

445

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,9, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);

446

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,10, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);

447

448

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,11, WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_int2), 0);

449

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,12, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float4), 0);

450

ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,13, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float), 0);

451

452

size_t numWorkItems = workGroupSize*((numWaves*WAVEFRONT_SIZE + (workGroupSize-1)) / workGroupSize);

453

454

ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0);

455

456

if( ciErrNum!= CL_SUCCESS )

457

{

458

btAssert( 0 && "enqueueNDRangeKernel(solvePositionsFromLinksKernel)");

459

}

460

461

} // solveLinksForPosition

462

463

void btOpenCLSoftBodySolverSIMDAware::solveCollisionsAndUpdateVelocities( float isolverdt )

464

{

465

// Copy kernel parameters to GPU

466

m_vertexData.moveToAccelerator();

467

m_clPerClothFriction.moveToGPU();

468

m_clPerClothDampingFactor.moveToGPU();

469

m_clPerClothCollisionObjects.moveToGPU();

470

m_clCollisionObjectDetails.moveToGPU();

471

472

cl_int ciErrNum;

473

int numVerts = m_vertexData.getNumVertices();

474

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);

475

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);

476

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);

477

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);

478

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);

479

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);

480

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);

481

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);

482

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);

483

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);

484

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);

485

ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 11, sizeof(CollisionShapeDescription)*16,0);

486

size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);

487

488

if (numWorkItems)

489

{

490

ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);

491

492

if( ciErrNum != CL_SUCCESS )

493

{

494

btAssert( 0 && "enqueueNDRangeKernel(solveCollisionsAndUpdateVelocitiesKernel)");

495

}

496

}

497

498

} // btOpenCLSoftBodySolverSIMDAware::updateVelocitiesFromPositionsWithoutVelocities

499

500

// End kernel dispatches

501

/////////////////////////////////////

502

503

504

505

bool btOpenCLSoftBodySolverSIMDAware::buildShaders()

506

{

507

bool returnVal = true;

508

509

if( m_shadersInitialized )

510

return true;

511

512

char *wavefrontMacros = new char[256];

513

514

sprintf(

515

wavefrontMacros,

516

"-DMAX_NUM_VERTICES_PER_WAVE=%d -DMAX_BATCHES_PER_WAVE=%d -DWAVEFRONT_SIZE=%d -DWAVEFRONT_BLOCK_MULTIPLIER=%d -DBLOCK_SIZE=%d",

517

m_linkData.getMaxVerticesPerWavefront(),

518

m_linkData.getMaxBatchesPerWavefront(),

519

m_linkData.getWavefrontSize(),

520

WAVEFRONT_BLOCK_MULTIPLIER,

521

WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());

522

523

updatePositionsFromVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", "" );

524

solvePositionsFromLinksKernel = clFunctions.compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros );

525

updateVelocitiesFromPositionsWithVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", "" );

526

updateVelocitiesFromPositionsWithoutVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", "" );

527

integrateKernel = clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", "" );

528

applyForcesKernel = clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", "" );

529

solveCollisionsAndUpdateVelocitiesKernel = clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", "" );

530

531

// TODO: Rename to UpdateSoftBodies

532

resetNormalsAndAreasKernel = clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", "" );

533

normalizeNormalsAndAreasKernel = clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", "" );

534

updateSoftBodiesKernel = clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", "" );

535

536

delete [] wavefrontMacros;

537

538

if( returnVal )

539

m_shadersInitialized = true;

540

541

return returnVal;

542

}

543

544

545

546

547

static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )

548

{

549

Vectormath::Aos::Transform3 outTransform;

550

outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));

551

outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));

552

outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));

553

outTransform.setCol(3, toVector3(transform.getOrigin()));

554

return outTransform;

555

}

556

557

558

static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )

559

{

560

// A per-batch map of truth values stating whether a given vertex is in that batch

561

// This allows us to significantly optimize the batching

562

btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;

563

564

for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )

565

{

566

btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );

567

568

int batch = 0;

569

bool placed = false;

570

while( batch < wavefrontBatches.size() && !placed )

571

{

572

// Test the current batch, see if this wave shares any vertex with the waves in the batch

573

bool foundSharedVertex = false;

574

for( int link = 0; link < wavefront.size(); ++link )

575

{

576

btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );

577

if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )

578

{

579

foundSharedVertex = true;

580

}

581

}

582

583

if( !foundSharedVertex )

584

{

585

wavefrontBatches[batch].push_back( waveIndex );

586

// Insert vertices into this batch too

587

for( int link = 0; link < wavefront.size(); ++link )

588

{

589

btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );

590

(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;

591

(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;

592

}

593

placed = true;

594

}

595

batch++;

596

}

597

if( batch == wavefrontBatches.size() && !placed )

598

{

599

wavefrontBatches.resize( batch + 1 );

600

wavefrontBatches[batch].push_back( waveIndex );

601

602

// And resize map as well

603

mapOfVerticesInBatches.resize( batch + 1 );

604

605

// Resize maps with total number of vertices

606

mapOfVerticesInBatches[batch].resize( numVertices, false );

607

608

// Insert vertices into this batch too

609

for( int link = 0; link < wavefront.size(); ++link )

610

{

611

btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );

612

(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;

613

(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;

614

}

615

}

616

}

617

mapOfVerticesInBatches.clear();

618

}

619

620

// Function to remove an object from a vector maintaining correct ordering of the vector

621

template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )

622

{

623

int currentSize = vectorToUpdate.size();

624

for( int i = indexToRemove; i < (currentSize-1); ++i )

625

{

626

vectorToUpdate[i] = vectorToUpdate[i+1];

627

}

628

if( currentSize > 0 )

629

vectorToUpdate.resize( currentSize - 1 );

630

}

631

632

/**

633

* Insert element into vectorToUpdate at index index.

634

635

template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )

636

{

637

vectorToUpdate.resize( vectorToUpdate.size() + 1 );

638

for( int i = (vectorToUpdate.size() - 1); i > index; --i )

639

{

640

vectorToUpdate[i] = vectorToUpdate[i-1];

641

}

642

vectorToUpdate[index] = element;

643

}

644

645

/**

646

* Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.

647

* ie it treats vectorToUpdate as an ordered set.

648

649

template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )

650

{

651

int index = 0;

652

while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )

653

{

654

index++;

655

}

656

if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )

657

insertAtIndex( vectorToUpdate, index, element );

658

}

659

660

static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )

661

{

662

for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )

663

{

664

btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );

665

numLinksPerVertex[nodes.vertex0]++;

666

numLinksPerVertex[nodes.vertex1]++;

667

}

668

int maxLinksPerVertex = 0;

669

for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )

670

{

671

maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);

672

}

673

maxLinks = maxLinksPerVertex;

674

675

btAlignedObjectArray< int > linksFoundPerVertex;

676

linksFoundPerVertex.resize( numVertices, 0 );

677

678

listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );

679

680

for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )

681

{

682

btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );

683

{

684

// Do vertex 0

685

int vertexIndex = nodes.vertex0;

686

int linkForVertex = linksFoundPerVertex[nodes.vertex0];

687

int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;

688

689

listOfLinksPerVertex[linkAddress] = linkIndex;

690

691

linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;

692

}

693

{

694

// Do vertex 1

695

int vertexIndex = nodes.vertex1;

696

int linkForVertex = linksFoundPerVertex[nodes.vertex1];

697

int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;

698

699

listOfLinksPerVertex[linkAddress] = linkIndex;

700

701

linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;

702

}

703

}

704

}

705

706

static void computeBatchingIntoWavefronts(

707

btSoftBodyLinkData &linkData,

708

int wavefrontSize,

709

int linksPerWorkItem,

710

int maxLinksPerWavefront,

711

btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts,

712

btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */

713

btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */

714

)

715

{

716

717

718

// Attempt generation of larger batches of links.

719

btAlignedObjectArray< bool > processedLink;

720

processedLink.resize( linkData.getNumLinks() );

721

btAlignedObjectArray< int > listOfLinksPerVertex;

722

int maxLinksPerVertex = 0;

723

724

// Count num vertices

725

int numVertices = 0;

726

for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )

727

{

728

btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );

729

numVertices = btMax( numVertices, nodes.vertex0 + 1 );

730

numVertices = btMax( numVertices, nodes.vertex1 + 1 );

731

}

732

733

// Need list of links per vertex

734

// Compute valence of each vertex

735

btAlignedObjectArray <int> numLinksPerVertex;

736

numLinksPerVertex.resize(0);

737

numLinksPerVertex.resize( numVertices, 0 );

738

739

generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );

740

741

if (!numVertices)

742

return;

743

744

for( int vertex = 0; vertex < 10; ++vertex )

745

{

746

for( int link = 0; link < numLinksPerVertex[vertex]; ++link )

747

{

748

int linkAddress = vertex * maxLinksPerVertex + link;

749

}

750

}

751

752

753

// At this point we know what links we have for each vertex so we can start batching

754

755

// We want a vertex to start with, let's go with 0

756

int currentVertex = 0;

757

int linksProcessed = 0;

758

759

btAlignedObjectArray <int> verticesToProcess;

760

761

while( linksProcessed < linkData.getNumLinks() )

762

{

763

// Next wavefront

764

int nextWavefront = linksForWavefronts.size();

765

linksForWavefronts.resize( nextWavefront + 1 );

766

btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);

767

verticesForWavefronts.resize( nextWavefront + 1 );

768

btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );

769

770

linksForWavefront.resize(0);

771

772

// Loop to find enough links to fill the wavefront

773

// Stopping if we either run out of links, or fill it

774

while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )

775

{

776

// Go through the links for the current vertex

777

for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )

778

{

779

int linkAddress = currentVertex * maxLinksPerVertex + link;

780

int linkIndex = listOfLinksPerVertex[linkAddress];

781

782

// If we have not already processed this link, add it to the wavefront

783

// Claim it as another processed link

784

// Add the vertex at the far end to the list of vertices to process.

785

if( !processedLink[linkIndex] )

786

{

787

linksForWavefront.push_back( linkIndex );

788

linksProcessed++;

789

processedLink[linkIndex] = true;

790

int v0 = linkData.getVertexPair(linkIndex).vertex0;

791

int v1 = linkData.getVertexPair(linkIndex).vertex1;

792

if( v0 == currentVertex )

793

verticesToProcess.push_back( v1 );

794

else

795

verticesToProcess.push_back( v0 );

796

}

797

}

798

if( verticesToProcess.size() > 0 )

799

{

800

// Get the element on the front of the queue and remove it

801

currentVertex = verticesToProcess[0];

802

removeFromVector( verticesToProcess, 0 );

803

} else {

804

// If we've not yet processed all the links, find the first unprocessed one

805

// and select one of its vertices as the current vertex

806

if( linksProcessed < linkData.getNumLinks() )

807

{

808

int searchLink = 0;

809

while( processedLink[searchLink] )

810

searchLink++;

811

currentVertex = linkData.getVertexPair(searchLink).vertex0;

812

}

813

}

814

}

815

816

// We have either finished or filled a wavefront

817

for( int link = 0; link < linksForWavefront.size(); ++link )

818

{

819

int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;

820

int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;

821

insertUniqueAndOrderedIntoVector( vertexSet, v0 );

822

insertUniqueAndOrderedIntoVector( vertexSet, v1 );

823

}

824

// Iterate over links mapped to the wave and batch those

825

// We can run a batch on each cycle trivially

826

827

batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );

828

btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );

829

830

831

for( int link = 0; link < linksForWavefront.size(); ++link )

832

{

833

int linkIndex = linksForWavefront[link];

834

btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );

835

836

int batch = 0;

837

bool placed = false;

838

while( batch < batchesWithinWave.size() && !placed )

839

{

840

bool foundSharedVertex = false;

841

if( batchesWithinWave[batch].size() >= wavefrontSize )

842

{

843

// If we have already filled this batch, move on to another

844

foundSharedVertex = true;

845

} else {

846

for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )

847

{

848

btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );

849

850

if( vertices.vertex0 == vertices2.vertex0 ||

851

vertices.vertex1 == vertices2.vertex0 ||

852

vertices.vertex0 == vertices2.vertex1 ||

853

vertices.vertex1 == vertices2.vertex1 )

854

{

855

foundSharedVertex = true;

856

break;

857

}

858

}

859

}

860

if( !foundSharedVertex )

861

{

862

batchesWithinWave[batch].push_back( linkIndex );

863

placed = true;

864

} else {

865

++batch;

866

}

867

}

868

if( batch == batchesWithinWave.size() && !placed )

869

{

870

batchesWithinWave.resize( batch + 1 );

871

batchesWithinWave[batch].push_back( linkIndex );

872

}

873

}

874

875

}

876

877

}

878

879

void btSoftBodyLinkDataOpenCLSIMDAware::generateBatches()

880

{

881

btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;

882

btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;

883

btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;

884

btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set

885

886

// Group the links into wavefronts

887

computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );

888

889

890

// Batch the wavefronts

891

generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );

892

893

m_numWavefronts = linksForWavefronts.size();

894

895

// At this point we have a description of which links we need to process in each wavefront

896

897

// First correctly fill the batch ranges vector

898

int numBatches = wavefrontBatches.size();

899

m_wavefrontBatchStartLengths.resize(0);

900

int prefixSum = 0;

901

for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )

902

{

903

int wavesInBatch = wavefrontBatches[batchIndex].size();

904

int nextPrefixSum = prefixSum + wavesInBatch;

905

m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );

906

907

prefixSum += wavesInBatch;

908

}

909

910

// Also find max number of batches within a wave

911

m_maxBatchesWithinWave = 0;

912

m_maxVerticesWithinWave = 0;

913

m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );

914

for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )

915

{

916

// See if the number of batches in this wave is greater than the current maxium

917

int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();

918

int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();

919

m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );

920

m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );

921

}

922

923

// Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around

924

m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);

925

926

// Now we know the maximum number of vertices per-wave we can resize the global vertices array

927

m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );

928

929

// Grab backup copies of all the link data arrays for the sorting process

930

btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair> m_links_Backup(m_links);

931

btAlignedObjectArray<float> m_linkStrength_Backup(m_linkStrength);

932

btAlignedObjectArray<float> m_linksMassLSC_Backup(m_linksMassLSC);

933

btAlignedObjectArray<float> m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);

934

//btAlignedObjectArray<Vectormath::Aos::Vector3> m_linksCLength_Backup(m_linksCLength);

935

//btAlignedObjectArray<float> m_linksLengthRatio_Backup(m_linksLengthRatio);

936

btAlignedObjectArray<float> m_linksRestLength_Backup(m_linksRestLength);

937

btAlignedObjectArray<float> m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);

938

939

// Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.

940

m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );

941

m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );

942

m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );

943

m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );

944

m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );

945

m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );

946

m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );

947

948

// Then re-order links into wavefront blocks

949

950

// Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.

951

int wavefrontCount = 0;

952

953

// Iterate over batches of wavefronts, then wavefronts in the batch

954

for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )

955

{

956

btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );

957

int wavefrontsInBatch = batch.size();

958

959

960

for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )

961

{

962

963

int originalWavefrontIndex = batch[wavefrontIndex];

964

btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );

965

int verticesUsedByWavefront = wavefrontVertices.size();

966

967

// Copy the set of vertices into the correctly structured array for use on the device

968

// Fill the non-vertices with -1s

969

// so we can mask out those reads

970

for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )

971

{

972

m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];

973

}

974

for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )

975

{

976

m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;

977

}

978

979

// Obtain the set of batches within the current wavefront

980

btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );

981

// Set the size of the batches for use in the solver, correctly ordered

982

NumBatchesVerticesPair batchesAndVertices;

983

batchesAndVertices.numBatches = batchesWithinWavefront.size();

984

batchesAndVertices.numVertices = verticesUsedByWavefront;

985

m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;

986

987

988

// Now iterate over batches within the wavefront to structure the links correctly

989

for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )

990

{

991

btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );

992

int wavefrontBatchSize = linksInBatch.size();

993

994

int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;

995

996

for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )

997

{

998

int originalLinkAddress = linksInBatch[linkIndex];

999

// Reorder simple arrays trivially

1000

m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];

1001

m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];

1002

m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];

1003

m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];

1004

m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];

1005

m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];

1006

1007

// The local address is more complicated. We need to work out where a given vertex will end up

1008

// by searching the set of vertices for this link and using the index as the local address

1009

btSoftBodyLinkData::LinkNodePair localPair;

1010

btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];

1011

localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );

1012

localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );

1013

m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;

1014

}

1015

for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )

1016

{

1017

// Put 0s into these arrays for padding for cleanliness

1018

m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);

1019

m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;

1020

m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;

1021

m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;

1022

m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;

1023

m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;

1024

1025

1026

// For local addresses of junk data choose a set of addresses just above the range of valid ones

1027

// and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses

1028

// The valid addresses will do scatter and gather in the valid range, the junk ones should happily work

1029

// off the end of that range so we need no control

1030

btSoftBodyLinkData::LinkNodePair localPair;

1031

localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);

1032

localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);

1033

m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;

1034

}

1035

1036

}

1037

1038

1039

wavefrontCount++;

1040

}

1041

1042

1043

}

1044

1045

} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()

1046

1047

1048

Older »