~ubuntu-branches/ubuntu/vivid/emscripten/vivid

« back to all changes in this revision

Viewing changes to tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp

  • Committer: Package Import Robot
  • Author(s): Sylvestre Ledru
  • Date: 2013-05-02 13:11:51 UTC
  • Revision ID: package-import@ubuntu.com-20130502131151-q8dvteqr1ef2x7xz
Tags: upstream-1.4.1~20130504~adb56cb
ImportĀ upstreamĀ versionĀ 1.4.1~20130504~adb56cb

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
Bullet Continuous Collision Detection and Physics Library
 
3
Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 
4
 
 
5
This software is provided 'as-is', without any express or implied warranty.
 
6
In no event will the authors be held liable for any damages arising from the use of this software.
 
7
Permission is granted to anyone to use this software for any purpose, 
 
8
including commercial applications, and to alter it and redistribute it freely, 
 
9
subject to the following restrictions:
 
10
 
 
11
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 
12
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 
13
3. This notice may not be removed or altered from any source distribution.
 
14
*/
 
15
 
 
16
#include <cstdio>
 
17
 
 
18
 
 
19
#define WAVEFRONT_SIZE 32
 
20
#define WAVEFRONT_BLOCK_MULTIPLIER 2
 
21
#define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
 
22
#define LINKS_PER_SIMD_LANE 16
 
23
 
 
24
#define STRINGIFY( S ) STRINGIFY2( S )
 
25
#define STRINGIFY2( S ) #S
 
26
 
 
27
#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
 
28
#include "vectormath/vmInclude.h"
 
29
 
 
30
#include "btSoftBodySolverLinkData_DX11SIMDAware.h"
 
31
#include "btSoftBodySolver_DX11SIMDAware.h"
 
32
#include "btSoftBodySolverVertexBuffer_DX11.h"
 
33
#include "BulletSoftBody/btSoftBody.h"
 
34
#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
 
35
 
 
36
#define MSTRINGIFY(A) #A
 
37
static char* UpdatePositionsFromVelocitiesHLSLString = 
 
38
#include "HLSL/UpdatePositionsFromVelocities.hlsl"
 
39
static char* SolvePositionsSIMDBatchedHLSLString = 
 
40
#include "HLSL/SolvePositionsSIMDBatched.hlsl"
 
41
static char* UpdateNodesHLSLString = 
 
42
#include "HLSL/UpdateNodes.hlsl"
 
43
static char* UpdatePositionsHLSLString = 
 
44
#include "HLSL/UpdatePositions.hlsl"
 
45
static char* UpdateConstantsHLSLString = 
 
46
#include "HLSL/UpdateConstants.hlsl"
 
47
static char* IntegrateHLSLString = 
 
48
#include "HLSL/Integrate.hlsl"
 
49
static char* ApplyForcesHLSLString = 
 
50
#include "HLSL/ApplyForces.hlsl"
 
51
static char* UpdateNormalsHLSLString = 
 
52
#include "HLSL/UpdateNormals.hlsl"
 
53
static char* OutputToVertexArrayHLSLString = 
 
54
#include "HLSL/OutputToVertexArray.hlsl"
 
55
static char* VSolveLinksHLSLString = 
 
56
#include "HLSL/VSolveLinks.hlsl"
 
57
static char* ComputeBoundsHLSLString = 
 
58
#include "HLSL/ComputeBounds.hlsl"
 
59
static char* SolveCollisionsAndUpdateVelocitiesHLSLString =
 
60
#include "HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl"
 
61
 
 
62
 
 
63
 
 
64
btSoftBodyLinkDataDX11SIMDAware::btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : 
 
65
                m_d3dDevice( d3dDevice ),
 
66
                m_d3dDeviceContext( d3dDeviceContext ),
 
67
                m_wavefrontSize( WAVEFRONT_SIZE ),
 
68
                m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
 
69
                m_maxBatchesWithinWave( 0 ),
 
70
                m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
 
71
                m_numWavefronts( 0 ),
 
72
                m_maxVertex( 0 ),
 
73
                m_dx11NumBatchesAndVerticesWithinWaves( d3dDevice, d3dDeviceContext, &m_numBatchesAndVerticesWithinWaves, true ),
 
74
                m_dx11WavefrontVerticesGlobalAddresses( d3dDevice, d3dDeviceContext, &m_wavefrontVerticesGlobalAddresses, true ),
 
75
                m_dx11LinkVerticesLocalAddresses( d3dDevice, d3dDeviceContext, &m_linkVerticesLocalAddresses, true ),
 
76
                m_dx11LinkStrength( d3dDevice, d3dDeviceContext, &m_linkStrength, true ),
 
77
                m_dx11LinksMassLSC( d3dDevice, d3dDeviceContext, &m_linksMassLSC, true ),
 
78
                m_dx11LinksRestLengthSquared( d3dDevice, d3dDeviceContext, &m_linksRestLengthSquared, true ),
 
79
                m_dx11LinksRestLength( d3dDevice, d3dDeviceContext, &m_linksRestLength, true ),
 
80
                m_dx11LinksMaterialLinearStiffnessCoefficient( d3dDevice, d3dDeviceContext, &m_linksMaterialLinearStiffnessCoefficient, true )
 
81
{
 
82
        m_d3dDevice = d3dDevice;
 
83
        m_d3dDeviceContext = d3dDeviceContext;
 
84
}
 
85
 
 
86
btSoftBodyLinkDataDX11SIMDAware::~btSoftBodyLinkDataDX11SIMDAware()
 
87
{
 
88
}
 
89
 
 
90
static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
 
91
{
 
92
        Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
 
93
        return outVec;
 
94
}
 
95
 
 
96
void btSoftBodyLinkDataDX11SIMDAware::createLinks( int numLinks )
 
97
{
 
98
        int previousSize = m_links.size();
 
99
        int newSize = previousSize + numLinks;
 
100
 
 
101
        btSoftBodyLinkData::createLinks( numLinks );
 
102
 
 
103
        // Resize the link addresses array as well
 
104
        m_linkAddresses.resize( newSize );
 
105
}
 
106
 
 
107
void btSoftBodyLinkDataDX11SIMDAware::setLinkAt( const btSoftBodyLinkData::LinkDescription &link, int linkIndex )
 
108
{
 
109
        btSoftBodyLinkData::setLinkAt( link, linkIndex );
 
110
 
 
111
        if( link.getVertex0() > m_maxVertex )
 
112
                m_maxVertex = link.getVertex0();
 
113
        if( link.getVertex1() > m_maxVertex )
 
114
                m_maxVertex = link.getVertex1();
 
115
 
 
116
        // Set the link index correctly for initialisation
 
117
        m_linkAddresses[linkIndex] = linkIndex;
 
118
}
 
119
 
 
120
bool btSoftBodyLinkDataDX11SIMDAware::onAccelerator()
 
121
{
 
122
        return m_onGPU;
 
123
}
 
124
 
 
125
bool btSoftBodyLinkDataDX11SIMDAware::moveToAccelerator()
 
126
{
 
127
        bool success = true;
 
128
 
 
129
        success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveToGPU();
 
130
        success = success && m_dx11WavefrontVerticesGlobalAddresses.moveToGPU();
 
131
        success = success && m_dx11LinkVerticesLocalAddresses.moveToGPU();
 
132
        success = success && m_dx11LinkStrength.moveToGPU();
 
133
        success = success && m_dx11LinksMassLSC.moveToGPU();
 
134
        success = success && m_dx11LinksRestLengthSquared.moveToGPU();
 
135
        success = success && m_dx11LinksRestLength.moveToGPU();
 
136
        success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveToGPU();
 
137
 
 
138
        if( success )
 
139
                m_onGPU = true;
 
140
 
 
141
        return success;
 
142
}
 
143
 
 
144
bool btSoftBodyLinkDataDX11SIMDAware::moveFromAccelerator()
 
145
{
 
146
        bool success = true;
 
147
        success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveFromGPU();
 
148
        success = success && m_dx11WavefrontVerticesGlobalAddresses.moveFromGPU();
 
149
        success = success && m_dx11LinkVerticesLocalAddresses.moveFromGPU();
 
150
        success = success && m_dx11LinkStrength.moveFromGPU();
 
151
        success = success && m_dx11LinksMassLSC.moveFromGPU();
 
152
        success = success && m_dx11LinksRestLengthSquared.moveFromGPU();
 
153
        success = success && m_dx11LinksRestLength.moveFromGPU();
 
154
        success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveFromGPU();
 
155
 
 
156
        if( success )
 
157
                m_onGPU = false;
 
158
 
 
159
        return success;
 
160
}
 
161
 
 
162
 
 
163
 
 
164
 
 
165
 
 
166
 
 
167
 
 
168
 
 
169
 
 
170
 
 
171
 
 
172
 
 
173
 
 
174
 
 
175
 
 
176
btDX11SIMDAwareSoftBodySolver::btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory) :
 
177
        btDX11SoftBodySolver( dx11Device, dx11Context, dx11CompileFromMemory ),
 
178
        m_linkData(m_dx11Device, m_dx11Context)
 
179
{
 
180
        // Initial we will clearly need to update solver constants
 
181
        // For now this is global for the cloths linked with this solver - we should probably make this body specific 
 
182
        // for performance in future once we understand more clearly when constants need to be updated
 
183
        m_updateSolverConstants = true;
 
184
 
 
185
        m_shadersInitialized = false;
 
186
}
 
187
 
 
188
btDX11SIMDAwareSoftBodySolver::~btDX11SIMDAwareSoftBodySolver()
 
189
{
 
190
        releaseKernels();
 
191
}
 
192
 
 
193
 
 
194
btSoftBodyLinkData &btDX11SIMDAwareSoftBodySolver::getLinkData()
 
195
{
 
196
        // TODO: Consider setting link data to "changed" here
 
197
        return m_linkData;
 
198
}
 
199
 
 
200
 
 
201
 
 
202
void btDX11SIMDAwareSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate)
 
203
{
 
204
        if(forceUpdate || m_softBodySet.size() != softBodies.size() )
 
205
        {
 
206
                // Have a change in the soft body set so update, reloading all the data
 
207
                getVertexData().clear();
 
208
                getTriangleData().clear();
 
209
                getLinkData().clear();
 
210
                m_softBodySet.resize(0);
 
211
 
 
212
 
 
213
                for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
 
214
                {
 
215
                        btSoftBody *softBody = softBodies[ softBodyIndex ];
 
216
                        using Vectormath::Aos::Matrix3;
 
217
                        using Vectormath::Aos::Point3;
 
218
 
 
219
                        // Create SoftBody that will store the information within the solver
 
220
                        btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody );
 
221
                        m_softBodySet.push_back( newSoftBody );
 
222
 
 
223
                        m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
 
224
                        m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
 
225
                        m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
 
226
                        m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
 
227
                        m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
 
228
                        m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
 
229
                        // Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
 
230
                        m_perClothMinBounds.push_back( UIntVector3( 0, 0, 0 ) );
 
231
                        m_perClothMaxBounds.push_back( UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX ) );
 
232
                        m_perClothFriction.push_back( softBody->getFriction() );
 
233
                        m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
 
234
 
 
235
                        // Add space for new vertices and triangles in the default solver for now
 
236
                        // TODO: Include space here for tearing too later
 
237
                        int firstVertex = getVertexData().getNumVertices();
 
238
                        int numVertices = softBody->m_nodes.size();
 
239
                        // Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
 
240
                        // maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
 
241
                        int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
 
242
                        // Allocate space for new vertices in all the vertex arrays
 
243
                        getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
 
244
 
 
245
                        int firstTriangle = getTriangleData().getNumTriangles();
 
246
                        int numTriangles = softBody->m_faces.size();
 
247
                        int maxTriangles = numTriangles;
 
248
                        getTriangleData().createTriangles( maxTriangles );
 
249
 
 
250
                        // Copy vertices from softbody into the solver
 
251
                        for( int vertex = 0; vertex < numVertices; ++vertex )
 
252
                        {
 
253
                                Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
 
254
                                btSoftBodyVertexData::VertexDescription desc;
 
255
 
 
256
                                // TODO: Position in the softbody might be pre-transformed
 
257
                                // or we may need to adapt for the pose.
 
258
                                //desc.setPosition( cloth.getMeshTransform()*multPoint );
 
259
                                desc.setPosition( multPoint );
 
260
 
 
261
                                float vertexInverseMass = softBody->m_nodes[vertex].m_im;
 
262
                                desc.setInverseMass(vertexInverseMass);
 
263
                                getVertexData().setVertexAt( desc, firstVertex + vertex );
 
264
                        }
 
265
 
 
266
                        // Copy triangles similarly
 
267
                        // We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
 
268
                        for( int triangle = 0; triangle < numTriangles; ++triangle )
 
269
                        {
 
270
                                // Note that large array storage is relative to the array not to the cloth
 
271
                                // So we need to add firstVertex to each value
 
272
                                int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
 
273
                                int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
 
274
                                int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
 
275
                                btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
 
276
                                getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
 
277
                                
 
278
                                // Increase vertex triangle counts for this triangle            
 
279
                                getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
 
280
                                getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
 
281
                                getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
 
282
                        }
 
283
 
 
284
                        int firstLink = getLinkData().getNumLinks();
 
285
                        int numLinks = softBody->m_links.size();
 
286
                        int maxLinks = numLinks;
 
287
                        
 
288
                        // Allocate space for the links
 
289
                        getLinkData().createLinks( numLinks );
 
290
 
 
291
                        // Add the links
 
292
                        for( int link = 0; link < numLinks; ++link )
 
293
                        {
 
294
                                int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
 
295
                                int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
 
296
 
 
297
                                btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
 
298
                                newLink.setLinkStrength(1.f);
 
299
                                getLinkData().setLinkAt(newLink, firstLink + link);
 
300
                        }
 
301
                        
 
302
                        newSoftBody->setFirstVertex( firstVertex );
 
303
                        newSoftBody->setFirstTriangle( firstTriangle );
 
304
                        newSoftBody->setNumVertices( numVertices );
 
305
                        newSoftBody->setMaxVertices( maxVertices );
 
306
                        newSoftBody->setNumTriangles( numTriangles );
 
307
                        newSoftBody->setMaxTriangles( maxTriangles );
 
308
                        newSoftBody->setFirstLink( firstLink );
 
309
                        newSoftBody->setNumLinks( numLinks );
 
310
                }
 
311
 
 
312
 
 
313
 
 
314
                updateConstants(0.f);
 
315
 
 
316
 
 
317
                m_linkData.generateBatches();           
 
318
                m_triangleData.generateBatches();
 
319
 
 
320
                
 
321
                // Build the shaders to match the batching parameters
 
322
                buildShaders();
 
323
        }
 
324
 
 
325
}
 
326
 
 
327
 
 
328
 
 
329
void btDX11SIMDAwareSoftBodySolver::solveConstraints( float solverdt )
 
330
{
 
331
 
 
332
        //std::cerr << "'GPU' solve constraints\n";
 
333
        using Vectormath::Aos::Vector3;
 
334
        using Vectormath::Aos::Point3;
 
335
        using Vectormath::Aos::lengthSqr;
 
336
        using Vectormath::Aos::dot;
 
337
 
 
338
        // Prepare links
 
339
        int numLinks = m_linkData.getNumLinks();
 
340
        int numVertices = m_vertexData.getNumVertices();
 
341
 
 
342
        float kst = 1.f;
 
343
        float ti = 0.f;
 
344
 
 
345
 
 
346
        m_dx11PerClothDampingFactor.moveToGPU();
 
347
        m_dx11PerClothVelocityCorrectionCoefficient.moveToGPU();
 
348
 
 
349
        
 
350
 
 
351
        // Ensure data is on accelerator
 
352
        m_linkData.moveToAccelerator();
 
353
        m_vertexData.moveToAccelerator();
 
354
 
 
355
 
 
356
        
 
357
        prepareCollisionConstraints();
 
358
 
 
359
 
 
360
        // Solve drift
 
361
        for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
 
362
        {
 
363
 
 
364
                for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
 
365
                {
 
366
                        int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
 
367
                        int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
 
368
 
 
369
                        solveLinksForPosition( startWave, numWaves, kst, ti );
 
370
                }       
 
371
 
 
372
        } // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
 
373
 
 
374
 
 
375
 
 
376
        
 
377
        // At this point assume that the force array is blank - we will overwrite it
 
378
        solveCollisionsAndUpdateVelocities( 1.f/solverdt );
 
379
 
 
380
} // btDX11SIMDAwareSoftBodySolver::solveConstraints
 
381
 
 
382
 
 
383
void btDX11SIMDAwareSoftBodySolver::updateConstants( float timeStep )
 
384
{
 
385
        using namespace Vectormath::Aos;
 
386
 
 
387
        if( m_updateSolverConstants )
 
388
        {
 
389
                m_updateSolverConstants = false;
 
390
 
 
391
                // Will have to redo this if we change the structure (tear, maybe) or various other possible changes
 
392
 
 
393
                // Initialise link constants
 
394
                const int numLinks = m_linkData.getNumLinks();
 
395
                for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
 
396
                {
 
397
                        btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
 
398
                        m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
 
399
                        float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
 
400
                        float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
 
401
                        float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
 
402
                        float massLSC = (invMass0 + invMass1)/linearStiffness;
 
403
                        m_linkData.getMassLSC(linkIndex) = massLSC;
 
404
                        float restLength = m_linkData.getRestLength(linkIndex);
 
405
                        float restLengthSquared = restLength*restLength;
 
406
                        m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
 
407
                }
 
408
        }
 
409
} // btDX11SIMDAwareSoftBodySolver::updateConstants
 
410
 
 
411
//////////////////////////////////////
 
412
// Kernel dispatches
 
413
 
 
414
 
 
415
void btDX11SIMDAwareSoftBodySolver::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
 
416
{
 
417
 
 
418
 
 
419
        m_vertexData.moveToAccelerator();
 
420
        m_linkData.moveToAccelerator();
 
421
 
 
422
        // Copy kernel parameters to GPU
 
423
        SolvePositionsFromLinksKernelCB constBuffer;
 
424
 
 
425
        // Set the first wave of the batch and the number of waves
 
426
        constBuffer.startWave = startWave;
 
427
        constBuffer.numWaves = numWaves;
 
428
 
 
429
        constBuffer.kst = kst;
 
430
        constBuffer.ti = ti;
 
431
        
 
432
        D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
 
433
        m_dx11Context->Map( solvePositionsFromLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
 
434
        memcpy( MappedResource.pData, &constBuffer, sizeof(SolvePositionsFromLinksKernelCB) );  
 
435
        m_dx11Context->Unmap( solvePositionsFromLinksKernel.constBuffer, 0 );
 
436
        m_dx11Context->CSSetConstantBuffers( 0, 1, &solvePositionsFromLinksKernel.constBuffer );
 
437
 
 
438
        // Set resources and dispatch
 
439
        m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11NumBatchesAndVerticesWithinWaves.getSRV()) );
 
440
        m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11WavefrontVerticesGlobalAddresses.getSRV()) );
 
441
        m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
 
442
        m_dx11Context->CSSetShaderResources( 3, 1, &(m_linkData.m_dx11LinkVerticesLocalAddresses.getSRV()) );
 
443
        m_dx11Context->CSSetShaderResources( 4, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) );
 
444
        m_dx11Context->CSSetShaderResources( 5, 1, &(m_linkData.m_dx11LinksRestLengthSquared.getSRV()) );
 
445
        
 
446
        m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
 
447
 
 
448
        // Execute the kernel
 
449
        m_dx11Context->CSSetShader( solvePositionsFromLinksKernel.kernel, NULL, 0 );
 
450
 
 
451
        int     numBlocks = ((constBuffer.numWaves + WAVEFRONT_BLOCK_MULTIPLIER - 1) / WAVEFRONT_BLOCK_MULTIPLIER );
 
452
        m_dx11Context->Dispatch(numBlocks , 1, 1 );
 
453
 
 
454
        {
 
455
                // Tidy up 
 
456
                ID3D11ShaderResourceView* pViewNULL = NULL;
 
457
                m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
 
458
                m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
 
459
                m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
 
460
                m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
 
461
                m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
 
462
                m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL );
 
463
 
 
464
                ID3D11UnorderedAccessView* pUAViewNULL = NULL;
 
465
                m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
 
466
 
 
467
                ID3D11Buffer *pBufferNull = NULL;
 
468
                m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
 
469
        }       
 
470
} // btDX11SIMDAwareSoftBodySolver::solveLinksForPosition
 
471
 
 
472
 
 
473
 
 
474
// End kernel dispatches
 
475
/////////////////////////////////////
 
476
 
 
477
 
 
478
 
 
479
 
 
480
 
 
481
 
 
482
 
 
483
 
 
484
 
 
485
bool btDX11SIMDAwareSoftBodySolver::buildShaders()
 
486
{
 
487
        // Ensure current kernels are released first
 
488
        releaseKernels();
 
489
 
 
490
        bool returnVal = true;
 
491
 
 
492
 
 
493
        if( m_shadersInitialized )
 
494
                return true;
 
495
 
 
496
        
 
497
        updatePositionsFromVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsFromVelocitiesHLSLString, "UpdatePositionsFromVelocitiesKernel", sizeof(UpdatePositionsFromVelocitiesCB) );
 
498
        if( !updatePositionsFromVelocitiesKernel.constBuffer )
 
499
                returnVal = false;
 
500
        
 
501
        char maxVerticesPerWavefront[20];
 
502
        char maxBatchesPerWavefront[20];
 
503
        char waveFrontSize[20];
 
504
        char waveFrontBlockMultiplier[20];
 
505
        char blockSize[20];
 
506
 
 
507
        sprintf(maxVerticesPerWavefront, "%d", m_linkData.getMaxVerticesPerWavefront());
 
508
        sprintf(maxBatchesPerWavefront, "%d", m_linkData.getMaxBatchesPerWavefront());
 
509
        sprintf(waveFrontSize, "%d", m_linkData.getWavefrontSize());    
 
510
        sprintf(waveFrontBlockMultiplier, "%d", WAVEFRONT_BLOCK_MULTIPLIER);
 
511
        sprintf(blockSize, "%d", WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
 
512
        
 
513
        D3D10_SHADER_MACRO solvePositionsMacros[6] = { "MAX_NUM_VERTICES_PER_WAVE", maxVerticesPerWavefront, "MAX_BATCHES_PER_WAVE", maxBatchesPerWavefront, "WAVEFRONT_SIZE", waveFrontSize, "WAVEFRONT_BLOCK_MULTIPLIER", waveFrontBlockMultiplier, "BLOCK_SIZE", blockSize, 0, 0 };
 
514
 
 
515
        solvePositionsFromLinksKernel = dxFunctions.compileComputeShaderFromString( SolvePositionsSIMDBatchedHLSLString, "SolvePositionsFromLinksKernel", sizeof(SolvePositionsFromLinksKernelCB), solvePositionsMacros );
 
516
        if( !solvePositionsFromLinksKernel.constBuffer )
 
517
                returnVal = false;
 
518
 
 
519
        updateVelocitiesFromPositionsWithVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNodesHLSLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) );
 
520
        if( !updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer )
 
521
                returnVal = false;
 
522
        updateVelocitiesFromPositionsWithoutVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsHLSLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB));
 
523
        if( !updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer )
 
524
                returnVal = false;
 
525
        integrateKernel = dxFunctions.compileComputeShaderFromString( IntegrateHLSLString, "IntegrateKernel", sizeof(IntegrateCB) );
 
526
        if( !integrateKernel.constBuffer )
 
527
                returnVal = false;
 
528
        applyForcesKernel = dxFunctions.compileComputeShaderFromString( ApplyForcesHLSLString, "ApplyForcesKernel", sizeof(ApplyForcesCB) );
 
529
        if( !applyForcesKernel.constBuffer )
 
530
                returnVal = false;
 
531
        solveCollisionsAndUpdateVelocitiesKernel = dxFunctions.compileComputeShaderFromString( SolveCollisionsAndUpdateVelocitiesHLSLString, "SolveCollisionsAndUpdateVelocitiesKernel", sizeof(SolveCollisionsAndUpdateVelocitiesCB) );
 
532
        if( !solveCollisionsAndUpdateVelocitiesKernel.constBuffer )
 
533
                returnVal = false;
 
534
        resetNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "ResetNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
 
535
        if( !resetNormalsAndAreasKernel.constBuffer )
 
536
                returnVal = false;
 
537
        normalizeNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "NormalizeNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
 
538
        if( !normalizeNormalsAndAreasKernel.constBuffer )
 
539
                returnVal = false;
 
540
        updateSoftBodiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "UpdateSoftBodiesKernel", sizeof(UpdateSoftBodiesCB) );
 
541
        if( !updateSoftBodiesKernel.constBuffer )
 
542
                returnVal = false;
 
543
        
 
544
        computeBoundsKernel = dxFunctions.compileComputeShaderFromString( ComputeBoundsHLSLString, "ComputeBoundsKernel", sizeof(ComputeBoundsCB) );
 
545
        if( !computeBoundsKernel.constBuffer )
 
546
                returnVal = false;
 
547
 
 
548
        if( returnVal )
 
549
                m_shadersInitialized = true;
 
550
 
 
551
        return returnVal;
 
552
} // btDX11SIMDAwareSoftBodySolver::buildShaders
 
553
 
 
554
static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
 
555
{
 
556
        Vectormath::Aos::Transform3 outTransform;
 
557
        outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
 
558
        outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
 
559
        outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
 
560
        outTransform.setCol(3, toVector3(transform.getOrigin()));
 
561
        return outTransform;    
 
562
}
 
563
 
 
564
 
 
565
 
 
566
 
 
567
 
 
568
 
 
569
 
 
570
 
 
571
 
 
572
 
 
573
 
 
574
 
 
575
static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
 
576
{
 
577
        // A per-batch map of truth values stating whether a given vertex is in that batch
 
578
        // This allows us to significantly optimize the batching
 
579
        btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
 
580
 
 
581
        for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
 
582
        {
 
583
                btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
 
584
 
 
585
                int batch = 0;
 
586
                bool placed = false;
 
587
                while( batch < wavefrontBatches.size() && !placed )
 
588
                {
 
589
                        // Test the current batch, see if this wave shares any vertex with the waves in the batch
 
590
                        bool foundSharedVertex = false;
 
591
                        for( int link = 0; link < wavefront.size(); ++link )
 
592
                        {
 
593
                                btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
 
594
                                if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
 
595
                                {
 
596
                                        foundSharedVertex = true;
 
597
                                }
 
598
                        }
 
599
 
 
600
                        if( !foundSharedVertex )
 
601
                        {
 
602
                                wavefrontBatches[batch].push_back( waveIndex ); 
 
603
                                // Insert vertices into this batch too
 
604
                                for( int link = 0; link < wavefront.size(); ++link )
 
605
                                {
 
606
                                        btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
 
607
                                        (mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
 
608
                                        (mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
 
609
                                }
 
610
                                placed = true;
 
611
                        }
 
612
                        batch++;
 
613
                }
 
614
                if( batch == wavefrontBatches.size() && !placed )
 
615
                {
 
616
                        wavefrontBatches.resize( batch + 1 );
 
617
                        wavefrontBatches[batch].push_back( waveIndex );
 
618
 
 
619
                        // And resize map as well
 
620
                        mapOfVerticesInBatches.resize( batch + 1 );
 
621
                        
 
622
                        // Resize maps with total number of vertices
 
623
                        mapOfVerticesInBatches[batch].resize( numVertices, false );
 
624
 
 
625
                        // Insert vertices into this batch too
 
626
                        for( int link = 0; link < wavefront.size(); ++link )
 
627
                        {
 
628
                                btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
 
629
                                (mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
 
630
                                (mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
 
631
                        }
 
632
                }
 
633
        }
 
634
        mapOfVerticesInBatches.clear();
 
635
}
 
636
 
 
637
// Function to remove an object from a vector maintaining correct ordering of the vector
 
638
template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
 
639
{
 
640
        int currentSize = vectorToUpdate.size();
 
641
        for( int i = indexToRemove; i < (currentSize-1); ++i )
 
642
        {
 
643
                vectorToUpdate[i] = vectorToUpdate[i+1];
 
644
        }
 
645
        if( currentSize > 0 )
 
646
                vectorToUpdate.resize( currentSize - 1 );
 
647
}
 
648
 
 
649
/**
 
650
 * Insert element into vectorToUpdate at index index.
 
651
 */
 
652
template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
 
653
{
 
654
        vectorToUpdate.resize( vectorToUpdate.size() + 1 );
 
655
        for( int i = (vectorToUpdate.size() - 1); i > index; --i )
 
656
        {
 
657
                vectorToUpdate[i] = vectorToUpdate[i-1];
 
658
        }
 
659
        vectorToUpdate[index] = element;
 
660
}
 
661
 
 
662
/** 
 
663
 * Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.
 
664
 * ie it treats vectorToUpdate as an ordered set.
 
665
 */
 
666
template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
 
667
{
 
668
        int index = 0;
 
669
        while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
 
670
        {
 
671
                index++;
 
672
        }
 
673
        if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
 
674
                insertAtIndex( vectorToUpdate, index, element );
 
675
}
 
676
 
 
677
static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
 
678
{
 
679
        for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
 
680
        {
 
681
                btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
 
682
                numLinksPerVertex[nodes.vertex0]++;
 
683
                numLinksPerVertex[nodes.vertex1]++;
 
684
        }
 
685
        int maxLinksPerVertex = 0;
 
686
        for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
 
687
        {
 
688
                maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
 
689
        }
 
690
        maxLinks = maxLinksPerVertex;
 
691
 
 
692
        btAlignedObjectArray< int > linksFoundPerVertex;
 
693
        linksFoundPerVertex.resize( numVertices, 0 );
 
694
 
 
695
        listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
 
696
 
 
697
        for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
 
698
        {
 
699
                btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
 
700
                {
 
701
                        // Do vertex 0
 
702
                        int vertexIndex = nodes.vertex0;
 
703
                        int linkForVertex = linksFoundPerVertex[nodes.vertex0];
 
704
                        int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
 
705
 
 
706
                        listOfLinksPerVertex[linkAddress] = linkIndex;
 
707
 
 
708
                        linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
 
709
                }
 
710
                {
 
711
                        // Do vertex 1
 
712
                        int vertexIndex = nodes.vertex1;
 
713
                        int linkForVertex = linksFoundPerVertex[nodes.vertex1];
 
714
                        int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
 
715
 
 
716
                        listOfLinksPerVertex[linkAddress] = linkIndex;
 
717
 
 
718
                        linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
 
719
                }
 
720
        }
 
721
}
 
722
 
 
723
static void computeBatchingIntoWavefronts( 
 
724
        btSoftBodyLinkData &linkData, 
 
725
        int wavefrontSize, 
 
726
        int linksPerWorkItem, 
 
727
        int maxLinksPerWavefront, 
 
728
        btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, 
 
729
        btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
 
730
        btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
 
731
        )
 
732
{
 
733
        
 
734
 
 
735
        // Attempt generation of larger batches of links.
 
736
        btAlignedObjectArray< bool > processedLink;
 
737
        processedLink.resize( linkData.getNumLinks() );
 
738
        btAlignedObjectArray< int > listOfLinksPerVertex;
 
739
        int maxLinksPerVertex = 0;
 
740
 
 
741
        // Count num vertices
 
742
        int numVertices = 0;
 
743
        for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
 
744
        {
 
745
                btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
 
746
                numVertices = btMax( numVertices, nodes.vertex0 + 1 );
 
747
                numVertices = btMax( numVertices, nodes.vertex1 + 1 );
 
748
        }
 
749
 
 
750
        // Need list of links per vertex
 
751
        // Compute valence of each vertex
 
752
        btAlignedObjectArray <int> numLinksPerVertex;
 
753
        numLinksPerVertex.resize(0);
 
754
        numLinksPerVertex.resize( numVertices, 0 );
 
755
 
 
756
        generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
 
757
 
 
758
 
 
759
        // At this point we know what links we have for each vertex so we can start batching
 
760
        
 
761
        // We want a vertex to start with, let's go with 0
 
762
        int currentVertex = 0;
 
763
        int linksProcessed = 0;
 
764
 
 
765
        btAlignedObjectArray <int> verticesToProcess;
 
766
 
 
767
        while( linksProcessed < linkData.getNumLinks() )
 
768
        {
 
769
                // Next wavefront
 
770
                int nextWavefront = linksForWavefronts.size();
 
771
                linksForWavefronts.resize( nextWavefront + 1 );
 
772
                btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
 
773
                verticesForWavefronts.resize( nextWavefront + 1 );
 
774
                btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
 
775
 
 
776
                linksForWavefront.resize(0);
 
777
 
 
778
                // Loop to find enough links to fill the wavefront
 
779
                // Stopping if we either run out of links, or fill it
 
780
                while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
 
781
                {
 
782
                        // Go through the links for the current vertex
 
783
                        for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
 
784
                        {
 
785
                                int linkAddress = currentVertex * maxLinksPerVertex + link;
 
786
                                int linkIndex = listOfLinksPerVertex[linkAddress];
 
787
                                
 
788
                                // If we have not already processed this link, add it to the wavefront
 
789
                                // Claim it as another processed link
 
790
                                // Add the vertex at the far end to the list of vertices to process.
 
791
                                if( !processedLink[linkIndex] )
 
792
                                {
 
793
                                        linksForWavefront.push_back( linkIndex );
 
794
                                        linksProcessed++;
 
795
                                        processedLink[linkIndex] = true;
 
796
                                        int v0 = linkData.getVertexPair(linkIndex).vertex0;
 
797
                                        int v1 = linkData.getVertexPair(linkIndex).vertex1;
 
798
                                        if( v0 == currentVertex )
 
799
                                                verticesToProcess.push_back( v1 );
 
800
                                        else
 
801
                                                verticesToProcess.push_back( v0 );
 
802
                                }
 
803
                        }
 
804
                        if( verticesToProcess.size() > 0 )
 
805
                        {
 
806
                                // Get the element on the front of the queue and remove it
 
807
                                currentVertex = verticesToProcess[0];
 
808
                                removeFromVector( verticesToProcess, 0 );
 
809
                        } else {                
 
810
                                // If we've not yet processed all the links, find the first unprocessed one
 
811
                                // and select one of its vertices as the current vertex
 
812
                                if( linksProcessed < linkData.getNumLinks() )
 
813
                                {
 
814
                                        int searchLink = 0;
 
815
                                        while( processedLink[searchLink] )
 
816
                                                searchLink++;
 
817
                                        currentVertex = linkData.getVertexPair(searchLink).vertex0;
 
818
                                }       
 
819
                        }
 
820
                }
 
821
 
 
822
                // We have either finished or filled a wavefront
 
823
                for( int link = 0; link < linksForWavefront.size(); ++link )
 
824
                {
 
825
                        int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
 
826
                        int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
 
827
                        insertUniqueAndOrderedIntoVector( vertexSet, v0 );
 
828
                        insertUniqueAndOrderedIntoVector( vertexSet, v1 );
 
829
                }
 
830
                // Iterate over links mapped to the wave and batch those
 
831
                // We can run a batch on each cycle trivially
 
832
                
 
833
                batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
 
834
                btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
 
835
                
 
836
 
 
837
                for( int link = 0; link < linksForWavefront.size(); ++link )
 
838
                {
 
839
                        int linkIndex = linksForWavefront[link];
 
840
                        btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
 
841
                        
 
842
                        int batch = 0;
 
843
                        bool placed = false;
 
844
                        while( batch < batchesWithinWave.size() && !placed )
 
845
                        {
 
846
                                bool foundSharedVertex = false;
 
847
                                if( batchesWithinWave[batch].size() >= wavefrontSize )
 
848
                                {
 
849
                                        // If we have already filled this batch, move on to another
 
850
                                        foundSharedVertex = true;
 
851
                                } else {
 
852
                                        for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
 
853
                                        {
 
854
                                                btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
 
855
 
 
856
                                                if( vertices.vertex0 == vertices2.vertex0 ||
 
857
                                                        vertices.vertex1 == vertices2.vertex0 ||
 
858
                                                        vertices.vertex0 == vertices2.vertex1 ||
 
859
                                                        vertices.vertex1 == vertices2.vertex1 )
 
860
                                                {
 
861
                                                        foundSharedVertex = true;
 
862
                                                        break;
 
863
                                                }
 
864
                                        }
 
865
                                }
 
866
                                if( !foundSharedVertex )
 
867
                                {
 
868
                                        batchesWithinWave[batch].push_back( linkIndex );
 
869
                                        placed = true;
 
870
                                } else {
 
871
                                        ++batch;
 
872
                                }
 
873
                        }
 
874
                        if( batch == batchesWithinWave.size() && !placed )
 
875
                        {
 
876
                                batchesWithinWave.resize( batch + 1 );
 
877
                                batchesWithinWave[batch].push_back( linkIndex );
 
878
                        }
 
879
                }
 
880
                
 
881
        }
 
882
 
 
883
}
 
884
 
 
885
void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
 
886
{
 
887
        btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
 
888
        btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
 
889
        btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
 
890
        btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
 
891
 
 
892
        // Group the links into wavefronts
 
893
        computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
 
894
 
 
895
 
 
896
        // Batch the wavefronts
 
897
        generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
 
898
 
 
899
        m_numWavefronts = linksForWavefronts.size();
 
900
 
 
901
        // At this point we have a description of which links we need to process in each wavefront
 
902
 
 
903
        // First correctly fill the batch ranges vector
 
904
        int numBatches = wavefrontBatches.size();
 
905
        m_wavefrontBatchStartLengths.resize(0);
 
906
        int prefixSum = 0;
 
907
        for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
 
908
        {
 
909
                int wavesInBatch = wavefrontBatches[batchIndex].size();
 
910
                int nextPrefixSum = prefixSum + wavesInBatch;
 
911
                m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
 
912
 
 
913
                prefixSum += wavesInBatch;
 
914
        }
 
915
        
 
916
        // Also find max number of batches within a wave
 
917
        m_maxBatchesWithinWave = 0;
 
918
        m_maxVerticesWithinWave = 0;
 
919
        m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
 
920
        for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
 
921
        {
 
922
                // See if the number of batches in this wave is greater than the current maxium
 
923
                int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
 
924
                int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
 
925
                m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
 
926
                m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
 
927
        }
 
928
        
 
929
        // Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
 
930
        m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
 
931
 
 
932
        // Now we know the maximum number of vertices per-wave we can resize the global vertices array
 
933
        m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
 
934
 
 
935
        // Grab backup copies of all the link data arrays for the sorting process
 
936
        btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair>                          m_links_Backup(m_links);
 
937
        btAlignedObjectArray<float>                                                                                     m_linkStrength_Backup(m_linkStrength);
 
938
        btAlignedObjectArray<float>                                                                                     m_linksMassLSC_Backup(m_linksMassLSC);
 
939
        btAlignedObjectArray<float>                                                                                     m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
 
940
        //btAlignedObjectArray<Vectormath::Aos::Vector3>                                                m_linksCLength_Backup(m_linksCLength);
 
941
        //btAlignedObjectArray<float>                                                                                   m_linksLengthRatio_Backup(m_linksLengthRatio);
 
942
        btAlignedObjectArray<float>                                                                                     m_linksRestLength_Backup(m_linksRestLength);
 
943
        btAlignedObjectArray<float>                                                                                     m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
 
944
 
 
945
        // Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
 
946
        m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
947
        m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
948
        m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
949
        m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
950
        m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
951
        m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
952
        m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); 
 
953
                
 
954
        // Then re-order links into wavefront blocks
 
955
 
 
956
        // Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
 
957
        int wavefrontCount = 0;
 
958
 
 
959
        // Iterate over batches of wavefronts, then wavefronts in the batch
 
960
        for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
 
961
        {
 
962
                btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
 
963
                int wavefrontsInBatch = batch.size();
 
964
 
 
965
                
 
966
                for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
 
967
                {       
 
968
 
 
969
                        int originalWavefrontIndex = batch[wavefrontIndex];
 
970
                        btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
 
971
                        int verticesUsedByWavefront = wavefrontVertices.size();
 
972
 
 
973
                        // Copy the set of vertices into the correctly structured array for use on the device
 
974
                        // Fill the non-vertices with -1s
 
975
                        // so we can mask out those reads
 
976
                        for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
 
977
                        {
 
978
                                m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
 
979
                        }
 
980
                        for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
 
981
                        {
 
982
                                m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
 
983
                        }
 
984
 
 
985
                        // Obtain the set of batches within the current wavefront
 
986
                        btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
 
987
                        // Set the size of the batches for use in the solver, correctly ordered
 
988
                        NumBatchesVerticesPair batchesAndVertices;
 
989
                        batchesAndVertices.numBatches = batchesWithinWavefront.size();
 
990
                        batchesAndVertices.numVertices = verticesUsedByWavefront;
 
991
                        m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
 
992
                        
 
993
 
 
994
                        // Now iterate over batches within the wavefront to structure the links correctly
 
995
                        for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
 
996
                        {
 
997
                                btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
 
998
                                int wavefrontBatchSize = linksInBatch.size();
 
999
 
 
1000
                                int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
 
1001
 
 
1002
                                for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
 
1003
                                {
 
1004
                                        int originalLinkAddress = linksInBatch[linkIndex];
 
1005
                                        // Reorder simple arrays trivially
 
1006
                                        m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
 
1007
                                        m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
 
1008
                                        m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
 
1009
                                        m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
 
1010
                                        m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
 
1011
                                        m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
 
1012
 
 
1013
                                        // The local address is more complicated. We need to work out where a given vertex will end up
 
1014
                                        // by searching the set of vertices for this link and using the index as the local address
 
1015
                                        btSoftBodyLinkData::LinkNodePair localPair;
 
1016
                                        btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
 
1017
                                        localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
 
1018
                                        localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
 
1019
                                        m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
 
1020
                                }
 
1021
                                for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
 
1022
                                {
 
1023
                                        // Put 0s into these arrays for padding for cleanliness
 
1024
                                        m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
 
1025
                                        m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
 
1026
                                        m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
 
1027
                                        m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
 
1028
                                        m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
 
1029
                                        m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
 
1030
 
 
1031
 
 
1032
                                        // For local addresses of junk data choose a set of addresses just above the range of valid ones 
 
1033
                                        // and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
 
1034
                                        // The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
 
1035
                                        // off the end of that range so we need no control
 
1036
                                        btSoftBodyLinkData::LinkNodePair localPair;
 
1037
                                        localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
 
1038
                                        localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
 
1039
                                        m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
 
1040
                                }
 
1041
 
 
1042
                        }
 
1043
 
 
1044
                        
 
1045
                        wavefrontCount++;
 
1046
                }
 
1047
 
 
1048
        
 
1049
        }
 
1050
 
 
1051
} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()