2
Bullet Continuous Collision Detection and Physics Library
3
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
5
This software is provided 'as-is', without any express or implied warranty.
6
In no event will the authors be held liable for any damages arising from the use of this software.
7
Permission is granted to anyone to use this software for any purpose,
8
including commercial applications, and to alter it and redistribute it freely,
9
subject to the following restrictions:
11
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13
3. This notice may not be removed or altered from any source distribution.
19
#define WAVEFRONT_SIZE 32
20
#define WAVEFRONT_BLOCK_MULTIPLIER 2
21
#define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
22
#define LINKS_PER_SIMD_LANE 16
24
#define STRINGIFY( S ) STRINGIFY2( S )
25
#define STRINGIFY2( S ) #S
27
#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
28
#include "vectormath/vmInclude.h"
30
#include "btSoftBodySolverLinkData_DX11SIMDAware.h"
31
#include "btSoftBodySolver_DX11SIMDAware.h"
32
#include "btSoftBodySolverVertexBuffer_DX11.h"
33
#include "BulletSoftBody/btSoftBody.h"
34
#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
36
#define MSTRINGIFY(A) #A
37
static char* UpdatePositionsFromVelocitiesHLSLString =
38
#include "HLSL/UpdatePositionsFromVelocities.hlsl"
39
static char* SolvePositionsSIMDBatchedHLSLString =
40
#include "HLSL/SolvePositionsSIMDBatched.hlsl"
41
static char* UpdateNodesHLSLString =
42
#include "HLSL/UpdateNodes.hlsl"
43
static char* UpdatePositionsHLSLString =
44
#include "HLSL/UpdatePositions.hlsl"
45
static char* UpdateConstantsHLSLString =
46
#include "HLSL/UpdateConstants.hlsl"
47
static char* IntegrateHLSLString =
48
#include "HLSL/Integrate.hlsl"
49
static char* ApplyForcesHLSLString =
50
#include "HLSL/ApplyForces.hlsl"
51
static char* UpdateNormalsHLSLString =
52
#include "HLSL/UpdateNormals.hlsl"
53
static char* OutputToVertexArrayHLSLString =
54
#include "HLSL/OutputToVertexArray.hlsl"
55
static char* VSolveLinksHLSLString =
56
#include "HLSL/VSolveLinks.hlsl"
57
static char* ComputeBoundsHLSLString =
58
#include "HLSL/ComputeBounds.hlsl"
59
static char* SolveCollisionsAndUpdateVelocitiesHLSLString =
60
#include "HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl"
64
btSoftBodyLinkDataDX11SIMDAware::btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) :
65
m_d3dDevice( d3dDevice ),
66
m_d3dDeviceContext( d3dDeviceContext ),
67
m_wavefrontSize( WAVEFRONT_SIZE ),
68
m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
69
m_maxBatchesWithinWave( 0 ),
70
m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
73
m_dx11NumBatchesAndVerticesWithinWaves( d3dDevice, d3dDeviceContext, &m_numBatchesAndVerticesWithinWaves, true ),
74
m_dx11WavefrontVerticesGlobalAddresses( d3dDevice, d3dDeviceContext, &m_wavefrontVerticesGlobalAddresses, true ),
75
m_dx11LinkVerticesLocalAddresses( d3dDevice, d3dDeviceContext, &m_linkVerticesLocalAddresses, true ),
76
m_dx11LinkStrength( d3dDevice, d3dDeviceContext, &m_linkStrength, true ),
77
m_dx11LinksMassLSC( d3dDevice, d3dDeviceContext, &m_linksMassLSC, true ),
78
m_dx11LinksRestLengthSquared( d3dDevice, d3dDeviceContext, &m_linksRestLengthSquared, true ),
79
m_dx11LinksRestLength( d3dDevice, d3dDeviceContext, &m_linksRestLength, true ),
80
m_dx11LinksMaterialLinearStiffnessCoefficient( d3dDevice, d3dDeviceContext, &m_linksMaterialLinearStiffnessCoefficient, true )
82
m_d3dDevice = d3dDevice;
83
m_d3dDeviceContext = d3dDeviceContext;
86
btSoftBodyLinkDataDX11SIMDAware::~btSoftBodyLinkDataDX11SIMDAware()
90
static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
92
Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
96
void btSoftBodyLinkDataDX11SIMDAware::createLinks( int numLinks )
98
int previousSize = m_links.size();
99
int newSize = previousSize + numLinks;
101
btSoftBodyLinkData::createLinks( numLinks );
103
// Resize the link addresses array as well
104
m_linkAddresses.resize( newSize );
107
void btSoftBodyLinkDataDX11SIMDAware::setLinkAt( const btSoftBodyLinkData::LinkDescription &link, int linkIndex )
109
btSoftBodyLinkData::setLinkAt( link, linkIndex );
111
if( link.getVertex0() > m_maxVertex )
112
m_maxVertex = link.getVertex0();
113
if( link.getVertex1() > m_maxVertex )
114
m_maxVertex = link.getVertex1();
116
// Set the link index correctly for initialisation
117
m_linkAddresses[linkIndex] = linkIndex;
120
bool btSoftBodyLinkDataDX11SIMDAware::onAccelerator()
125
bool btSoftBodyLinkDataDX11SIMDAware::moveToAccelerator()
129
success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveToGPU();
130
success = success && m_dx11WavefrontVerticesGlobalAddresses.moveToGPU();
131
success = success && m_dx11LinkVerticesLocalAddresses.moveToGPU();
132
success = success && m_dx11LinkStrength.moveToGPU();
133
success = success && m_dx11LinksMassLSC.moveToGPU();
134
success = success && m_dx11LinksRestLengthSquared.moveToGPU();
135
success = success && m_dx11LinksRestLength.moveToGPU();
136
success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveToGPU();
144
bool btSoftBodyLinkDataDX11SIMDAware::moveFromAccelerator()
147
success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveFromGPU();
148
success = success && m_dx11WavefrontVerticesGlobalAddresses.moveFromGPU();
149
success = success && m_dx11LinkVerticesLocalAddresses.moveFromGPU();
150
success = success && m_dx11LinkStrength.moveFromGPU();
151
success = success && m_dx11LinksMassLSC.moveFromGPU();
152
success = success && m_dx11LinksRestLengthSquared.moveFromGPU();
153
success = success && m_dx11LinksRestLength.moveFromGPU();
154
success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveFromGPU();
176
btDX11SIMDAwareSoftBodySolver::btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory) :
177
btDX11SoftBodySolver( dx11Device, dx11Context, dx11CompileFromMemory ),
178
m_linkData(m_dx11Device, m_dx11Context)
180
// Initial we will clearly need to update solver constants
181
// For now this is global for the cloths linked with this solver - we should probably make this body specific
182
// for performance in future once we understand more clearly when constants need to be updated
183
m_updateSolverConstants = true;
185
m_shadersInitialized = false;
188
btDX11SIMDAwareSoftBodySolver::~btDX11SIMDAwareSoftBodySolver()
194
btSoftBodyLinkData &btDX11SIMDAwareSoftBodySolver::getLinkData()
196
// TODO: Consider setting link data to "changed" here
202
void btDX11SIMDAwareSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate)
204
if(forceUpdate || m_softBodySet.size() != softBodies.size() )
206
// Have a change in the soft body set so update, reloading all the data
207
getVertexData().clear();
208
getTriangleData().clear();
209
getLinkData().clear();
210
m_softBodySet.resize(0);
213
for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
215
btSoftBody *softBody = softBodies[ softBodyIndex ];
216
using Vectormath::Aos::Matrix3;
217
using Vectormath::Aos::Point3;
219
// Create SoftBody that will store the information within the solver
220
btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody );
221
m_softBodySet.push_back( newSoftBody );
223
m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
224
m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
225
m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
226
m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
227
m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
228
m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
229
// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
230
m_perClothMinBounds.push_back( UIntVector3( 0, 0, 0 ) );
231
m_perClothMaxBounds.push_back( UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX ) );
232
m_perClothFriction.push_back( softBody->getFriction() );
233
m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
235
// Add space for new vertices and triangles in the default solver for now
236
// TODO: Include space here for tearing too later
237
int firstVertex = getVertexData().getNumVertices();
238
int numVertices = softBody->m_nodes.size();
239
// Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
240
// maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
241
int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
242
// Allocate space for new vertices in all the vertex arrays
243
getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
245
int firstTriangle = getTriangleData().getNumTriangles();
246
int numTriangles = softBody->m_faces.size();
247
int maxTriangles = numTriangles;
248
getTriangleData().createTriangles( maxTriangles );
250
// Copy vertices from softbody into the solver
251
for( int vertex = 0; vertex < numVertices; ++vertex )
253
Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
254
btSoftBodyVertexData::VertexDescription desc;
256
// TODO: Position in the softbody might be pre-transformed
257
// or we may need to adapt for the pose.
258
//desc.setPosition( cloth.getMeshTransform()*multPoint );
259
desc.setPosition( multPoint );
261
float vertexInverseMass = softBody->m_nodes[vertex].m_im;
262
desc.setInverseMass(vertexInverseMass);
263
getVertexData().setVertexAt( desc, firstVertex + vertex );
266
// Copy triangles similarly
267
// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
268
for( int triangle = 0; triangle < numTriangles; ++triangle )
270
// Note that large array storage is relative to the array not to the cloth
271
// So we need to add firstVertex to each value
272
int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
273
int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
274
int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
275
btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
276
getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
278
// Increase vertex triangle counts for this triangle
279
getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
280
getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
281
getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
284
int firstLink = getLinkData().getNumLinks();
285
int numLinks = softBody->m_links.size();
286
int maxLinks = numLinks;
288
// Allocate space for the links
289
getLinkData().createLinks( numLinks );
292
for( int link = 0; link < numLinks; ++link )
294
int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
295
int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
297
btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
298
newLink.setLinkStrength(1.f);
299
getLinkData().setLinkAt(newLink, firstLink + link);
302
newSoftBody->setFirstVertex( firstVertex );
303
newSoftBody->setFirstTriangle( firstTriangle );
304
newSoftBody->setNumVertices( numVertices );
305
newSoftBody->setMaxVertices( maxVertices );
306
newSoftBody->setNumTriangles( numTriangles );
307
newSoftBody->setMaxTriangles( maxTriangles );
308
newSoftBody->setFirstLink( firstLink );
309
newSoftBody->setNumLinks( numLinks );
314
updateConstants(0.f);
317
m_linkData.generateBatches();
318
m_triangleData.generateBatches();
321
// Build the shaders to match the batching parameters
329
void btDX11SIMDAwareSoftBodySolver::solveConstraints( float solverdt )
332
//std::cerr << "'GPU' solve constraints\n";
333
using Vectormath::Aos::Vector3;
334
using Vectormath::Aos::Point3;
335
using Vectormath::Aos::lengthSqr;
336
using Vectormath::Aos::dot;
339
int numLinks = m_linkData.getNumLinks();
340
int numVertices = m_vertexData.getNumVertices();
346
m_dx11PerClothDampingFactor.moveToGPU();
347
m_dx11PerClothVelocityCorrectionCoefficient.moveToGPU();
351
// Ensure data is on accelerator
352
m_linkData.moveToAccelerator();
353
m_vertexData.moveToAccelerator();
357
prepareCollisionConstraints();
361
for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
364
for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
366
int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
367
int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
369
solveLinksForPosition( startWave, numWaves, kst, ti );
372
} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
377
// At this point assume that the force array is blank - we will overwrite it
378
solveCollisionsAndUpdateVelocities( 1.f/solverdt );
380
} // btDX11SIMDAwareSoftBodySolver::solveConstraints
383
void btDX11SIMDAwareSoftBodySolver::updateConstants( float timeStep )
385
using namespace Vectormath::Aos;
387
if( m_updateSolverConstants )
389
m_updateSolverConstants = false;
391
// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
393
// Initialise link constants
394
const int numLinks = m_linkData.getNumLinks();
395
for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
397
btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
398
m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
399
float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
400
float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
401
float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
402
float massLSC = (invMass0 + invMass1)/linearStiffness;
403
m_linkData.getMassLSC(linkIndex) = massLSC;
404
float restLength = m_linkData.getRestLength(linkIndex);
405
float restLengthSquared = restLength*restLength;
406
m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
409
} // btDX11SIMDAwareSoftBodySolver::updateConstants
411
//////////////////////////////////////
415
void btDX11SIMDAwareSoftBodySolver::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
419
m_vertexData.moveToAccelerator();
420
m_linkData.moveToAccelerator();
422
// Copy kernel parameters to GPU
423
SolvePositionsFromLinksKernelCB constBuffer;
425
// Set the first wave of the batch and the number of waves
426
constBuffer.startWave = startWave;
427
constBuffer.numWaves = numWaves;
429
constBuffer.kst = kst;
432
D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
433
m_dx11Context->Map( solvePositionsFromLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
434
memcpy( MappedResource.pData, &constBuffer, sizeof(SolvePositionsFromLinksKernelCB) );
435
m_dx11Context->Unmap( solvePositionsFromLinksKernel.constBuffer, 0 );
436
m_dx11Context->CSSetConstantBuffers( 0, 1, &solvePositionsFromLinksKernel.constBuffer );
438
// Set resources and dispatch
439
m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11NumBatchesAndVerticesWithinWaves.getSRV()) );
440
m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11WavefrontVerticesGlobalAddresses.getSRV()) );
441
m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
442
m_dx11Context->CSSetShaderResources( 3, 1, &(m_linkData.m_dx11LinkVerticesLocalAddresses.getSRV()) );
443
m_dx11Context->CSSetShaderResources( 4, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) );
444
m_dx11Context->CSSetShaderResources( 5, 1, &(m_linkData.m_dx11LinksRestLengthSquared.getSRV()) );
446
m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
448
// Execute the kernel
449
m_dx11Context->CSSetShader( solvePositionsFromLinksKernel.kernel, NULL, 0 );
451
int numBlocks = ((constBuffer.numWaves + WAVEFRONT_BLOCK_MULTIPLIER - 1) / WAVEFRONT_BLOCK_MULTIPLIER );
452
m_dx11Context->Dispatch(numBlocks , 1, 1 );
456
ID3D11ShaderResourceView* pViewNULL = NULL;
457
m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
458
m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
459
m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
460
m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
461
m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
462
m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL );
464
ID3D11UnorderedAccessView* pUAViewNULL = NULL;
465
m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
467
ID3D11Buffer *pBufferNull = NULL;
468
m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
470
} // btDX11SIMDAwareSoftBodySolver::solveLinksForPosition
474
// End kernel dispatches
475
/////////////////////////////////////
485
bool btDX11SIMDAwareSoftBodySolver::buildShaders()
487
// Ensure current kernels are released first
490
bool returnVal = true;
493
if( m_shadersInitialized )
497
updatePositionsFromVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsFromVelocitiesHLSLString, "UpdatePositionsFromVelocitiesKernel", sizeof(UpdatePositionsFromVelocitiesCB) );
498
if( !updatePositionsFromVelocitiesKernel.constBuffer )
501
char maxVerticesPerWavefront[20];
502
char maxBatchesPerWavefront[20];
503
char waveFrontSize[20];
504
char waveFrontBlockMultiplier[20];
507
sprintf(maxVerticesPerWavefront, "%d", m_linkData.getMaxVerticesPerWavefront());
508
sprintf(maxBatchesPerWavefront, "%d", m_linkData.getMaxBatchesPerWavefront());
509
sprintf(waveFrontSize, "%d", m_linkData.getWavefrontSize());
510
sprintf(waveFrontBlockMultiplier, "%d", WAVEFRONT_BLOCK_MULTIPLIER);
511
sprintf(blockSize, "%d", WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
513
D3D10_SHADER_MACRO solvePositionsMacros[6] = { "MAX_NUM_VERTICES_PER_WAVE", maxVerticesPerWavefront, "MAX_BATCHES_PER_WAVE", maxBatchesPerWavefront, "WAVEFRONT_SIZE", waveFrontSize, "WAVEFRONT_BLOCK_MULTIPLIER", waveFrontBlockMultiplier, "BLOCK_SIZE", blockSize, 0, 0 };
515
solvePositionsFromLinksKernel = dxFunctions.compileComputeShaderFromString( SolvePositionsSIMDBatchedHLSLString, "SolvePositionsFromLinksKernel", sizeof(SolvePositionsFromLinksKernelCB), solvePositionsMacros );
516
if( !solvePositionsFromLinksKernel.constBuffer )
519
updateVelocitiesFromPositionsWithVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNodesHLSLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) );
520
if( !updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer )
522
updateVelocitiesFromPositionsWithoutVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsHLSLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB));
523
if( !updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer )
525
integrateKernel = dxFunctions.compileComputeShaderFromString( IntegrateHLSLString, "IntegrateKernel", sizeof(IntegrateCB) );
526
if( !integrateKernel.constBuffer )
528
applyForcesKernel = dxFunctions.compileComputeShaderFromString( ApplyForcesHLSLString, "ApplyForcesKernel", sizeof(ApplyForcesCB) );
529
if( !applyForcesKernel.constBuffer )
531
solveCollisionsAndUpdateVelocitiesKernel = dxFunctions.compileComputeShaderFromString( SolveCollisionsAndUpdateVelocitiesHLSLString, "SolveCollisionsAndUpdateVelocitiesKernel", sizeof(SolveCollisionsAndUpdateVelocitiesCB) );
532
if( !solveCollisionsAndUpdateVelocitiesKernel.constBuffer )
534
resetNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "ResetNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
535
if( !resetNormalsAndAreasKernel.constBuffer )
537
normalizeNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "NormalizeNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
538
if( !normalizeNormalsAndAreasKernel.constBuffer )
540
updateSoftBodiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "UpdateSoftBodiesKernel", sizeof(UpdateSoftBodiesCB) );
541
if( !updateSoftBodiesKernel.constBuffer )
544
computeBoundsKernel = dxFunctions.compileComputeShaderFromString( ComputeBoundsHLSLString, "ComputeBoundsKernel", sizeof(ComputeBoundsCB) );
545
if( !computeBoundsKernel.constBuffer )
549
m_shadersInitialized = true;
552
} // btDX11SIMDAwareSoftBodySolver::buildShaders
554
static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
556
Vectormath::Aos::Transform3 outTransform;
557
outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
558
outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
559
outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
560
outTransform.setCol(3, toVector3(transform.getOrigin()));
575
static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
577
// A per-batch map of truth values stating whether a given vertex is in that batch
578
// This allows us to significantly optimize the batching
579
btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
581
for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
583
btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
587
while( batch < wavefrontBatches.size() && !placed )
589
// Test the current batch, see if this wave shares any vertex with the waves in the batch
590
bool foundSharedVertex = false;
591
for( int link = 0; link < wavefront.size(); ++link )
593
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
594
if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
596
foundSharedVertex = true;
600
if( !foundSharedVertex )
602
wavefrontBatches[batch].push_back( waveIndex );
603
// Insert vertices into this batch too
604
for( int link = 0; link < wavefront.size(); ++link )
606
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
607
(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
608
(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
614
if( batch == wavefrontBatches.size() && !placed )
616
wavefrontBatches.resize( batch + 1 );
617
wavefrontBatches[batch].push_back( waveIndex );
619
// And resize map as well
620
mapOfVerticesInBatches.resize( batch + 1 );
622
// Resize maps with total number of vertices
623
mapOfVerticesInBatches[batch].resize( numVertices, false );
625
// Insert vertices into this batch too
626
for( int link = 0; link < wavefront.size(); ++link )
628
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
629
(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
630
(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
634
mapOfVerticesInBatches.clear();
637
// Function to remove an object from a vector maintaining correct ordering of the vector
638
template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
640
int currentSize = vectorToUpdate.size();
641
for( int i = indexToRemove; i < (currentSize-1); ++i )
643
vectorToUpdate[i] = vectorToUpdate[i+1];
645
if( currentSize > 0 )
646
vectorToUpdate.resize( currentSize - 1 );
650
* Insert element into vectorToUpdate at index index.
652
template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
654
vectorToUpdate.resize( vectorToUpdate.size() + 1 );
655
for( int i = (vectorToUpdate.size() - 1); i > index; --i )
657
vectorToUpdate[i] = vectorToUpdate[i-1];
659
vectorToUpdate[index] = element;
663
* Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.
664
* ie it treats vectorToUpdate as an ordered set.
666
template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
669
while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
673
if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
674
insertAtIndex( vectorToUpdate, index, element );
677
static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
679
for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
681
btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
682
numLinksPerVertex[nodes.vertex0]++;
683
numLinksPerVertex[nodes.vertex1]++;
685
int maxLinksPerVertex = 0;
686
for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
688
maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
690
maxLinks = maxLinksPerVertex;
692
btAlignedObjectArray< int > linksFoundPerVertex;
693
linksFoundPerVertex.resize( numVertices, 0 );
695
listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
697
for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
699
btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
702
int vertexIndex = nodes.vertex0;
703
int linkForVertex = linksFoundPerVertex[nodes.vertex0];
704
int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
706
listOfLinksPerVertex[linkAddress] = linkIndex;
708
linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
712
int vertexIndex = nodes.vertex1;
713
int linkForVertex = linksFoundPerVertex[nodes.vertex1];
714
int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
716
listOfLinksPerVertex[linkAddress] = linkIndex;
718
linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
723
static void computeBatchingIntoWavefronts(
724
btSoftBodyLinkData &linkData,
726
int linksPerWorkItem,
727
int maxLinksPerWavefront,
728
btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts,
729
btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
730
btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
735
// Attempt generation of larger batches of links.
736
btAlignedObjectArray< bool > processedLink;
737
processedLink.resize( linkData.getNumLinks() );
738
btAlignedObjectArray< int > listOfLinksPerVertex;
739
int maxLinksPerVertex = 0;
741
// Count num vertices
743
for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
745
btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
746
numVertices = btMax( numVertices, nodes.vertex0 + 1 );
747
numVertices = btMax( numVertices, nodes.vertex1 + 1 );
750
// Need list of links per vertex
751
// Compute valence of each vertex
752
btAlignedObjectArray <int> numLinksPerVertex;
753
numLinksPerVertex.resize(0);
754
numLinksPerVertex.resize( numVertices, 0 );
756
generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
759
// At this point we know what links we have for each vertex so we can start batching
761
// We want a vertex to start with, let's go with 0
762
int currentVertex = 0;
763
int linksProcessed = 0;
765
btAlignedObjectArray <int> verticesToProcess;
767
while( linksProcessed < linkData.getNumLinks() )
770
int nextWavefront = linksForWavefronts.size();
771
linksForWavefronts.resize( nextWavefront + 1 );
772
btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
773
verticesForWavefronts.resize( nextWavefront + 1 );
774
btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
776
linksForWavefront.resize(0);
778
// Loop to find enough links to fill the wavefront
779
// Stopping if we either run out of links, or fill it
780
while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
782
// Go through the links for the current vertex
783
for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
785
int linkAddress = currentVertex * maxLinksPerVertex + link;
786
int linkIndex = listOfLinksPerVertex[linkAddress];
788
// If we have not already processed this link, add it to the wavefront
789
// Claim it as another processed link
790
// Add the vertex at the far end to the list of vertices to process.
791
if( !processedLink[linkIndex] )
793
linksForWavefront.push_back( linkIndex );
795
processedLink[linkIndex] = true;
796
int v0 = linkData.getVertexPair(linkIndex).vertex0;
797
int v1 = linkData.getVertexPair(linkIndex).vertex1;
798
if( v0 == currentVertex )
799
verticesToProcess.push_back( v1 );
801
verticesToProcess.push_back( v0 );
804
if( verticesToProcess.size() > 0 )
806
// Get the element on the front of the queue and remove it
807
currentVertex = verticesToProcess[0];
808
removeFromVector( verticesToProcess, 0 );
810
// If we've not yet processed all the links, find the first unprocessed one
811
// and select one of its vertices as the current vertex
812
if( linksProcessed < linkData.getNumLinks() )
815
while( processedLink[searchLink] )
817
currentVertex = linkData.getVertexPair(searchLink).vertex0;
822
// We have either finished or filled a wavefront
823
for( int link = 0; link < linksForWavefront.size(); ++link )
825
int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
826
int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
827
insertUniqueAndOrderedIntoVector( vertexSet, v0 );
828
insertUniqueAndOrderedIntoVector( vertexSet, v1 );
830
// Iterate over links mapped to the wave and batch those
831
// We can run a batch on each cycle trivially
833
batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
834
btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
837
for( int link = 0; link < linksForWavefront.size(); ++link )
839
int linkIndex = linksForWavefront[link];
840
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
844
while( batch < batchesWithinWave.size() && !placed )
846
bool foundSharedVertex = false;
847
if( batchesWithinWave[batch].size() >= wavefrontSize )
849
// If we have already filled this batch, move on to another
850
foundSharedVertex = true;
852
for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
854
btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
856
if( vertices.vertex0 == vertices2.vertex0 ||
857
vertices.vertex1 == vertices2.vertex0 ||
858
vertices.vertex0 == vertices2.vertex1 ||
859
vertices.vertex1 == vertices2.vertex1 )
861
foundSharedVertex = true;
866
if( !foundSharedVertex )
868
batchesWithinWave[batch].push_back( linkIndex );
874
if( batch == batchesWithinWave.size() && !placed )
876
batchesWithinWave.resize( batch + 1 );
877
batchesWithinWave[batch].push_back( linkIndex );
885
void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
887
btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
888
btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
889
btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
890
btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
892
// Group the links into wavefronts
893
computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
896
// Batch the wavefronts
897
generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
899
m_numWavefronts = linksForWavefronts.size();
901
// At this point we have a description of which links we need to process in each wavefront
903
// First correctly fill the batch ranges vector
904
int numBatches = wavefrontBatches.size();
905
m_wavefrontBatchStartLengths.resize(0);
907
for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
909
int wavesInBatch = wavefrontBatches[batchIndex].size();
910
int nextPrefixSum = prefixSum + wavesInBatch;
911
m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
913
prefixSum += wavesInBatch;
916
// Also find max number of batches within a wave
917
m_maxBatchesWithinWave = 0;
918
m_maxVerticesWithinWave = 0;
919
m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
920
for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
922
// See if the number of batches in this wave is greater than the current maxium
923
int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
924
int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
925
m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
926
m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
929
// Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
930
m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
932
// Now we know the maximum number of vertices per-wave we can resize the global vertices array
933
m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
935
// Grab backup copies of all the link data arrays for the sorting process
936
btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair> m_links_Backup(m_links);
937
btAlignedObjectArray<float> m_linkStrength_Backup(m_linkStrength);
938
btAlignedObjectArray<float> m_linksMassLSC_Backup(m_linksMassLSC);
939
btAlignedObjectArray<float> m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
940
//btAlignedObjectArray<Vectormath::Aos::Vector3> m_linksCLength_Backup(m_linksCLength);
941
//btAlignedObjectArray<float> m_linksLengthRatio_Backup(m_linksLengthRatio);
942
btAlignedObjectArray<float> m_linksRestLength_Backup(m_linksRestLength);
943
btAlignedObjectArray<float> m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
945
// Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
946
m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
947
m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
948
m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
949
m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
950
m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
951
m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
952
m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
954
// Then re-order links into wavefront blocks
956
// Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
957
int wavefrontCount = 0;
959
// Iterate over batches of wavefronts, then wavefronts in the batch
960
for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
962
btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
963
int wavefrontsInBatch = batch.size();
966
for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
969
int originalWavefrontIndex = batch[wavefrontIndex];
970
btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
971
int verticesUsedByWavefront = wavefrontVertices.size();
973
// Copy the set of vertices into the correctly structured array for use on the device
974
// Fill the non-vertices with -1s
975
// so we can mask out those reads
976
for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
978
m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
980
for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
982
m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
985
// Obtain the set of batches within the current wavefront
986
btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
987
// Set the size of the batches for use in the solver, correctly ordered
988
NumBatchesVerticesPair batchesAndVertices;
989
batchesAndVertices.numBatches = batchesWithinWavefront.size();
990
batchesAndVertices.numVertices = verticesUsedByWavefront;
991
m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
994
// Now iterate over batches within the wavefront to structure the links correctly
995
for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
997
btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
998
int wavefrontBatchSize = linksInBatch.size();
1000
int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
1002
for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
1004
int originalLinkAddress = linksInBatch[linkIndex];
1005
// Reorder simple arrays trivially
1006
m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
1007
m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
1008
m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
1009
m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
1010
m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
1011
m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
1013
// The local address is more complicated. We need to work out where a given vertex will end up
1014
// by searching the set of vertices for this link and using the index as the local address
1015
btSoftBodyLinkData::LinkNodePair localPair;
1016
btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
1017
localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
1018
localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
1019
m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
1021
for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
1023
// Put 0s into these arrays for padding for cleanliness
1024
m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
1025
m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
1026
m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
1027
m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
1028
m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
1029
m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
1032
// For local addresses of junk data choose a set of addresses just above the range of valid ones
1033
// and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
1034
// The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
1035
// off the end of that range so we need no control
1036
btSoftBodyLinkData::LinkNodePair localPair;
1037
localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
1038
localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
1039
m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
1051
} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()