~ubuntu-branches/ubuntu/vivid/emscripten/vivid

« back to all changes in this revision

Viewing changes to tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp

  • Committer: Package Import Robot
  • Author(s): Sylvestre Ledru
  • Date: 2013-05-02 13:11:51 UTC
  • Revision ID: package-import@ubuntu.com-20130502131151-q8dvteqr1ef2x7xz
Tags: upstream-1.4.1~20130504~adb56cb
ImportĀ upstreamĀ versionĀ 1.4.1~20130504~adb56cb

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
Bullet Continuous Collision Detection and Physics Library
 
3
Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 
4
 
 
5
This software is provided 'as-is', without any express or implied warranty.
 
6
In no event will the authors be held liable for any damages arising from the use of this software.
 
7
Permission is granted to anyone to use this software for any purpose, 
 
8
including commercial applications, and to alter it and redistribute it freely, 
 
9
subject to the following restrictions:
 
10
 
 
11
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 
12
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 
13
3. This notice may not be removed or altered from any source distribution.
 
14
*/
 
15
 
 
16
 
 
17
#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
 
18
#include "vectormath/vmInclude.h"
 
19
#include <stdio.h> //@todo: remove the debugging printf at some stage
 
20
#include "btSoftBodySolver_OpenCLSIMDAware.h"
 
21
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
 
22
#include "BulletSoftBody/btSoftBody.h"
 
23
#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
 
24
#include <limits.h>
 
25
 
 
26
#define WAVEFRONT_SIZE 32
 
27
#define WAVEFRONT_BLOCK_MULTIPLIER 2
 
28
#define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
 
29
#define LINKS_PER_SIMD_LANE 16
 
30
 
 
31
static const size_t workGroupSize = GROUP_SIZE;
 
32
 
 
33
 
 
34
//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
 
35
 
 
36
#if (0)//CL_VERSION_1_1 == 1)
 
37
 //OpenCL 1.1 kernels use float3
 
38
#define MSTRINGIFY(A) #A
 
39
static const char* UpdatePositionsFromVelocitiesCLString = 
 
40
#include "OpenCLC/UpdatePositionsFromVelocities.cl"
 
41
static const char* SolvePositionsCLString = 
 
42
#include "OpenCLC/SolvePositionsSIMDBatched.cl"
 
43
static const char* UpdateNodesCLString = 
 
44
#include "OpenCLC/UpdateNodes.cl"
 
45
static const char* UpdatePositionsCLString = 
 
46
#include "OpenCLC/UpdatePositions.cl"
 
47
static const char* UpdateConstantsCLString = 
 
48
#include "OpenCLC/UpdateConstants.cl"
 
49
static const char* IntegrateCLString = 
 
50
#include "OpenCLC/Integrate.cl"
 
51
static const char* ApplyForcesCLString = 
 
52
#include "OpenCLC/ApplyForces.cl"
 
53
static const char* UpdateNormalsCLString = 
 
54
#include "OpenCLC/UpdateNormals.cl"
 
55
static const char* VSolveLinksCLString = 
 
56
#include "OpenCLC/VSolveLinks.cl"
 
57
static const char* SolveCollisionsAndUpdateVelocitiesCLString =
 
58
#include "OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
 
59
static const char* OutputToVertexArrayCLString =
 
60
#include "OpenCLC/OutputToVertexArray.cl"
 
61
#else
 
62
////OpenCL 1.0 kernels don't use float3
 
63
#define MSTRINGIFY(A) #A
 
64
static const char* UpdatePositionsFromVelocitiesCLString = 
 
65
#include "OpenCLC10/UpdatePositionsFromVelocities.cl"
 
66
static const char* SolvePositionsCLString = 
 
67
#include "OpenCLC10/SolvePositionsSIMDBatched.cl"
 
68
static const char* UpdateNodesCLString = 
 
69
#include "OpenCLC10/UpdateNodes.cl"
 
70
static const char* UpdatePositionsCLString = 
 
71
#include "OpenCLC10/UpdatePositions.cl"
 
72
static const char* UpdateConstantsCLString = 
 
73
#include "OpenCLC10/UpdateConstants.cl"
 
74
static const char* IntegrateCLString = 
 
75
#include "OpenCLC10/Integrate.cl"
 
76
static const char* ApplyForcesCLString = 
 
77
#include "OpenCLC10/ApplyForces.cl"
 
78
static const char* UpdateNormalsCLString = 
 
79
#include "OpenCLC10/UpdateNormals.cl"
 
80
static const char* VSolveLinksCLString = 
 
81
#include "OpenCLC10/VSolveLinks.cl"
 
82
static const char* SolveCollisionsAndUpdateVelocitiesCLString =
 
83
#include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
 
84
static const char* OutputToVertexArrayCLString =
 
85
#include "OpenCLC10/OutputToVertexArray.cl"
 
86
#endif //CL_VERSION_1_1
 
87
 
 
88
 
 
89
 
 
90
btSoftBodyLinkDataOpenCLSIMDAware::btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue,  cl_context ctx) :
 
91
        m_cqCommandQue(queue),
 
92
        m_wavefrontSize( WAVEFRONT_SIZE ),
 
93
        m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
 
94
        m_maxBatchesWithinWave( 0 ),
 
95
        m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
 
96
        m_numWavefronts( 0 ),
 
97
        m_maxVertex( 0 ),
 
98
        m_clNumBatchesAndVerticesWithinWaves( queue, ctx, &m_numBatchesAndVerticesWithinWaves, true ),
 
99
        m_clWavefrontVerticesGlobalAddresses( queue, ctx, &m_wavefrontVerticesGlobalAddresses, true ),
 
100
        m_clLinkVerticesLocalAddresses( queue, ctx, &m_linkVerticesLocalAddresses, true ),
 
101
        m_clLinkStrength( queue, ctx, &m_linkStrength, false ),
 
102
        m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),
 
103
        m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),
 
104
        m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),
 
105
        m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )
 
106
{
 
107
}
 
108
 
 
109
btSoftBodyLinkDataOpenCLSIMDAware::~btSoftBodyLinkDataOpenCLSIMDAware()
 
110
{
 
111
}
 
112
 
 
113
static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
 
114
{
 
115
        Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
 
116
        return outVec;
 
117
}
 
118
 
 
119
/** Allocate enough space in all link-related arrays to fit numLinks links */
 
120
void btSoftBodyLinkDataOpenCLSIMDAware::createLinks( int numLinks )
 
121
{
 
122
        int previousSize = m_links.size();
 
123
        int newSize = previousSize + numLinks;
 
124
 
 
125
        btSoftBodyLinkData::createLinks( numLinks );
 
126
 
 
127
        // Resize the link addresses array as well
 
128
        m_linkAddresses.resize( newSize );
 
129
}
 
130
 
 
131
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
 
132
void btSoftBodyLinkDataOpenCLSIMDAware::setLinkAt( 
 
133
        const LinkDescription &link, 
 
134
        int linkIndex )
 
135
{
 
136
        btSoftBodyLinkData::setLinkAt( link, linkIndex );
 
137
 
 
138
        if( link.getVertex0() > m_maxVertex )
 
139
                m_maxVertex = link.getVertex0();
 
140
        if( link.getVertex1() > m_maxVertex )
 
141
                m_maxVertex = link.getVertex1();
 
142
 
 
143
        // Set the link index correctly for initialisation
 
144
        m_linkAddresses[linkIndex] = linkIndex;
 
145
}
 
146
 
 
147
bool btSoftBodyLinkDataOpenCLSIMDAware::onAccelerator()
 
148
{
 
149
        return m_onGPU;
 
150
}
 
151
 
 
152
bool btSoftBodyLinkDataOpenCLSIMDAware::moveToAccelerator()
 
153
{
 
154
        bool success = true;
 
155
        success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
 
156
        success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
 
157
        success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
 
158
        success = success && m_clLinkStrength.moveToGPU();
 
159
        success = success && m_clLinksMassLSC.moveToGPU();
 
160
        success = success && m_clLinksRestLengthSquared.moveToGPU();
 
161
        success = success && m_clLinksRestLength.moveToGPU();
 
162
        success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveToGPU();
 
163
 
 
164
        if( success ) {
 
165
                m_onGPU = true;
 
166
        }
 
167
 
 
168
        return success;
 
169
}
 
170
 
 
171
bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator()
 
172
{
 
173
        bool success = true;
 
174
        success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
 
175
        success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
 
176
        success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
 
177
        success = success && m_clLinkStrength.moveFromGPU();
 
178
        success = success && m_clLinksMassLSC.moveFromGPU();
 
179
        success = success && m_clLinksRestLengthSquared.moveFromGPU();
 
180
        success = success && m_clLinksRestLength.moveFromGPU();
 
181
        success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveFromGPU();
 
182
 
 
183
        if( success ) {
 
184
                m_onGPU = false;
 
185
        }
 
186
 
 
187
        return success;
 
188
}
 
189
 
 
190
 
 
191
 
 
192
 
 
193
 
 
194
 
 
195
 
 
196
 
 
197
btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx) :
 
198
        btOpenCLSoftBodySolver( queue, ctx ),
 
199
        m_linkData(queue, ctx)
 
200
{
 
201
        // Initial we will clearly need to update solver constants
 
202
        // For now this is global for the cloths linked with this solver - we should probably make this body specific 
 
203
        // for performance in future once we understand more clearly when constants need to be updated
 
204
        m_updateSolverConstants = true;
 
205
 
 
206
        m_shadersInitialized = false;
 
207
}
 
208
 
 
209
btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware()
 
210
{
 
211
        releaseKernels();
 
212
}
 
213
 
 
214
void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate)
 
215
{
 
216
        if( forceUpdate|| m_softBodySet.size() != softBodies.size() )
 
217
        {
 
218
                // Have a change in the soft body set so update, reloading all the data
 
219
                getVertexData().clear();
 
220
                getTriangleData().clear();
 
221
                getLinkData().clear();
 
222
                m_softBodySet.resize(0);
 
223
 
 
224
 
 
225
                for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
 
226
                {
 
227
                        btSoftBody *softBody = softBodies[ softBodyIndex ];
 
228
                        using Vectormath::Aos::Matrix3;
 
229
                        using Vectormath::Aos::Point3;
 
230
 
 
231
                        // Create SoftBody that will store the information within the solver
 
232
                        btOpenCLAcceleratedSoftBodyInterface* newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );
 
233
                        m_softBodySet.push_back( newSoftBody );
 
234
 
 
235
                        m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
 
236
                        m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
 
237
                        m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
 
238
                        m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
 
239
                        m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
 
240
                        m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
 
241
 
 
242
 
 
243
                        m_perClothFriction.push_back( softBody->getFriction() );
 
244
                        m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
 
245
 
 
246
                        // Add space for new vertices and triangles in the default solver for now
 
247
                        // TODO: Include space here for tearing too later
 
248
                        int firstVertex = getVertexData().getNumVertices();
 
249
                        int numVertices = softBody->m_nodes.size();
 
250
                        // Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
 
251
                        // maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
 
252
                        int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
 
253
                        // Allocate space for new vertices in all the vertex arrays
 
254
                        getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
 
255
 
 
256
                        int firstTriangle = getTriangleData().getNumTriangles();
 
257
                        int numTriangles = softBody->m_faces.size();
 
258
                        int maxTriangles = numTriangles;
 
259
                        getTriangleData().createTriangles( maxTriangles );
 
260
 
 
261
                        // Copy vertices from softbody into the solver
 
262
                        for( int vertex = 0; vertex < numVertices; ++vertex )
 
263
                        {
 
264
                                Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
 
265
                                btSoftBodyVertexData::VertexDescription desc;
 
266
 
 
267
                                // TODO: Position in the softbody might be pre-transformed
 
268
                                // or we may need to adapt for the pose.
 
269
                                //desc.setPosition( cloth.getMeshTransform()*multPoint );
 
270
                                desc.setPosition( multPoint );
 
271
 
 
272
                                float vertexInverseMass = softBody->m_nodes[vertex].m_im;
 
273
                                desc.setInverseMass(vertexInverseMass);
 
274
                                getVertexData().setVertexAt( desc, firstVertex + vertex );
 
275
                        }
 
276
 
 
277
                        // Copy triangles similarly
 
278
                        // We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
 
279
                        for( int triangle = 0; triangle < numTriangles; ++triangle )
 
280
                        {
 
281
                                // Note that large array storage is relative to the array not to the cloth
 
282
                                // So we need to add firstVertex to each value
 
283
                                int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
 
284
                                int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
 
285
                                int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
 
286
                                btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
 
287
                                getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
 
288
                                
 
289
                                // Increase vertex triangle counts for this triangle            
 
290
                                getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
 
291
                                getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
 
292
                                getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
 
293
                        }
 
294
 
 
295
                        int firstLink = getLinkData().getNumLinks();
 
296
                        int numLinks = softBody->m_links.size();
 
297
                        int maxLinks = numLinks;
 
298
                        
 
299
                        // Allocate space for the links
 
300
                        getLinkData().createLinks( numLinks );
 
301
 
 
302
                        // Add the links
 
303
                        for( int link = 0; link < numLinks; ++link )
 
304
                        {
 
305
                                int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
 
306
                                int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
 
307
 
 
308
                                btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
 
309
                                newLink.setLinkStrength(1.f);
 
310
                                getLinkData().setLinkAt(newLink, firstLink + link);
 
311
                        }
 
312
                        
 
313
                        newSoftBody->setFirstVertex( firstVertex );
 
314
                        newSoftBody->setFirstTriangle( firstTriangle );
 
315
                        newSoftBody->setNumVertices( numVertices );
 
316
                        newSoftBody->setMaxVertices( maxVertices );
 
317
                        newSoftBody->setNumTriangles( numTriangles );
 
318
                        newSoftBody->setMaxTriangles( maxTriangles );
 
319
                        newSoftBody->setFirstLink( firstLink );
 
320
                        newSoftBody->setNumLinks( numLinks );
 
321
                }
 
322
 
 
323
 
 
324
 
 
325
                updateConstants(0.f);
 
326
 
 
327
 
 
328
                m_linkData.generateBatches();           
 
329
                m_triangleData.generateBatches();
 
330
 
 
331
                
 
332
                // Build the shaders to match the batching parameters
 
333
                buildShaders();
 
334
        }
 
335
}
 
336
 
 
337
 
 
338
btSoftBodyLinkData &btOpenCLSoftBodySolverSIMDAware::getLinkData()
 
339
{
 
340
        // TODO: Consider setting link data to "changed" here
 
341
        return m_linkData;
 
342
}
 
343
 
 
344
 
 
345
 
 
346
 
 
347
void btOpenCLSoftBodySolverSIMDAware::updateConstants( float timeStep )
 
348
{                       
 
349
 
 
350
        using namespace Vectormath::Aos;
 
351
 
 
352
        if( m_updateSolverConstants )
 
353
        {
 
354
                m_updateSolverConstants = false;
 
355
 
 
356
                // Will have to redo this if we change the structure (tear, maybe) or various other possible changes
 
357
 
 
358
                // Initialise link constants
 
359
                const int numLinks = m_linkData.getNumLinks();
 
360
                for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
 
361
                {
 
362
                        btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
 
363
                        m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
 
364
                        float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
 
365
                        float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
 
366
                        float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
 
367
                        float massLSC = (invMass0 + invMass1)/linearStiffness;
 
368
                        m_linkData.getMassLSC(linkIndex) = massLSC;
 
369
                        float restLength = m_linkData.getRestLength(linkIndex);
 
370
                        float restLengthSquared = restLength*restLength;
 
371
                        m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
 
372
                }
 
373
        }
 
374
 
 
375
}
 
376
 
 
377
 
 
378
 
 
379
void btOpenCLSoftBodySolverSIMDAware::solveConstraints( float solverdt )
 
380
{
 
381
 
 
382
        using Vectormath::Aos::Vector3;
 
383
        using Vectormath::Aos::Point3;
 
384
        using Vectormath::Aos::lengthSqr;
 
385
        using Vectormath::Aos::dot;
 
386
 
 
387
        // Prepare links
 
388
        int numLinks = m_linkData.getNumLinks();
 
389
        int numVertices = m_vertexData.getNumVertices();
 
390
 
 
391
        float kst = 1.f;
 
392
        float ti = 0.f;
 
393
 
 
394
 
 
395
        m_clPerClothDampingFactor.moveToGPU();
 
396
        m_clPerClothVelocityCorrectionCoefficient.moveToGPU();
 
397
 
 
398
 
 
399
        // Ensure data is on accelerator
 
400
        m_linkData.moveToAccelerator();
 
401
        m_vertexData.moveToAccelerator();
 
402
 
 
403
        
 
404
        //prepareLinks();       
 
405
 
 
406
        prepareCollisionConstraints();
 
407
 
 
408
        // Solve drift
 
409
        for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
 
410
        {
 
411
 
 
412
                for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
 
413
                {
 
414
                        int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
 
415
                        int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
 
416
                        solveLinksForPosition( startWave, numWaves, kst, ti );
 
417
                }
 
418
        } // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
 
419
 
 
420
        
 
421
        // At this point assume that the force array is blank - we will overwrite it
 
422
        solveCollisionsAndUpdateVelocities( 1.f/solverdt );
 
423
}
 
424
 
 
425
 
 
426
//////////////////////////////////////
 
427
// Kernel dispatches
 
428
 
 
429
 
 
430
void btOpenCLSoftBodySolverSIMDAware::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
 
431
{
 
432
        cl_int ciErrNum;
 
433
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,0, sizeof(int), &startWave);
 
434
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,1, sizeof(int), &numWaves);
 
435
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,2, sizeof(float), &kst);
 
436
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,3, sizeof(float), &ti);
 
437
        
 
438
        
 
439
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clNumBatchesAndVerticesWithinWaves.m_buffer);
 
440
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clWavefrontVerticesGlobalAddresses.m_buffer);
 
441
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinkVerticesLocalAddresses.m_buffer);
 
442
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
 
443
 
 
444
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);
 
445
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,9, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
 
446
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,10, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
 
447
 
 
448
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,11, WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_int2), 0);
 
449
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,12, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float4), 0);
 
450
        ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,13, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float), 0);
 
451
 
 
452
        size_t  numWorkItems = workGroupSize*((numWaves*WAVEFRONT_SIZE + (workGroupSize-1)) / workGroupSize);
 
453
        
 
454
        ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0);
 
455
        
 
456
        if( ciErrNum!= CL_SUCCESS ) 
 
457
        {
 
458
                btAssert( 0 &&  "enqueueNDRangeKernel(solvePositionsFromLinksKernel)");
 
459
        }
 
460
 
 
461
} // solveLinksForPosition
 
462
 
 
463
void btOpenCLSoftBodySolverSIMDAware::solveCollisionsAndUpdateVelocities( float isolverdt )
 
464
{
 
465
        // Copy kernel parameters to GPU
 
466
        m_vertexData.moveToAccelerator();
 
467
        m_clPerClothFriction.moveToGPU();
 
468
        m_clPerClothDampingFactor.moveToGPU();
 
469
        m_clPerClothCollisionObjects.moveToGPU();
 
470
        m_clCollisionObjectDetails.moveToGPU();
 
471
        
 
472
        cl_int ciErrNum;
 
473
        int numVerts = m_vertexData.getNumVertices();
 
474
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
 
475
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);
 
476
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
 
477
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
 
478
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);
 
479
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
 
480
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);
 
481
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);
 
482
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
 
483
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
 
484
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
 
485
        ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 11, sizeof(CollisionShapeDescription)*16,0);
 
486
        size_t  numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
 
487
        
 
488
        if (numWorkItems)
 
489
        {
 
490
                ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
 
491
                
 
492
                if( ciErrNum != CL_SUCCESS ) 
 
493
                {
 
494
                        btAssert( 0 &&  "enqueueNDRangeKernel(solveCollisionsAndUpdateVelocitiesKernel)");
 
495
                }
 
496
        }
 
497
 
 
498
} // btOpenCLSoftBodySolverSIMDAware::updateVelocitiesFromPositionsWithoutVelocities
 
499
 
 
500
// End kernel dispatches
 
501
/////////////////////////////////////
 
502
 
 
503
 
 
504
 
 
505
bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
 
506
{
 
507
        bool returnVal = true;
 
508
 
 
509
        if( m_shadersInitialized )
 
510
                return true;
 
511
 
 
512
        char *wavefrontMacros = new char[256];
 
513
 
 
514
        sprintf(
 
515
                wavefrontMacros, 
 
516
                "-DMAX_NUM_VERTICES_PER_WAVE=%d -DMAX_BATCHES_PER_WAVE=%d -DWAVEFRONT_SIZE=%d -DWAVEFRONT_BLOCK_MULTIPLIER=%d -DBLOCK_SIZE=%d", 
 
517
                m_linkData.getMaxVerticesPerWavefront(),
 
518
                m_linkData.getMaxBatchesPerWavefront(),
 
519
                m_linkData.getWavefrontSize(),
 
520
                WAVEFRONT_BLOCK_MULTIPLIER,
 
521
                WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
 
522
        
 
523
        updatePositionsFromVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", "" );
 
524
        solvePositionsFromLinksKernel = clFunctions.compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros );
 
525
        updateVelocitiesFromPositionsWithVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", "" );
 
526
        updateVelocitiesFromPositionsWithoutVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", "" );
 
527
        integrateKernel = clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", "" );
 
528
        applyForcesKernel = clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", "" );
 
529
        solveCollisionsAndUpdateVelocitiesKernel = clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", "" );
 
530
 
 
531
        // TODO: Rename to UpdateSoftBodies
 
532
        resetNormalsAndAreasKernel = clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", "" );
 
533
        normalizeNormalsAndAreasKernel = clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", "" );
 
534
        updateSoftBodiesKernel = clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", "" );
 
535
 
 
536
        delete [] wavefrontMacros;
 
537
 
 
538
        if( returnVal )
 
539
                m_shadersInitialized = true;
 
540
 
 
541
        return returnVal;
 
542
}
 
543
 
 
544
 
 
545
 
 
546
 
 
547
static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
 
548
{
 
549
        Vectormath::Aos::Transform3 outTransform;
 
550
        outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
 
551
        outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
 
552
        outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
 
553
        outTransform.setCol(3, toVector3(transform.getOrigin()));
 
554
        return outTransform;    
 
555
}
 
556
 
 
557
 
 
558
static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
 
559
{
 
560
        // A per-batch map of truth values stating whether a given vertex is in that batch
 
561
        // This allows us to significantly optimize the batching
 
562
        btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
 
563
 
 
564
        for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
 
565
        {
 
566
                btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
 
567
 
 
568
                int batch = 0;
 
569
                bool placed = false;
 
570
                while( batch < wavefrontBatches.size() && !placed )
 
571
                {
 
572
                        // Test the current batch, see if this wave shares any vertex with the waves in the batch
 
573
                        bool foundSharedVertex = false;
 
574
                        for( int link = 0; link < wavefront.size(); ++link )
 
575
                        {
 
576
                                btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
 
577
                                if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
 
578
                                {
 
579
                                        foundSharedVertex = true;
 
580
                                }
 
581
                        }
 
582
 
 
583
                        if( !foundSharedVertex )
 
584
                        {
 
585
                                wavefrontBatches[batch].push_back( waveIndex ); 
 
586
                                // Insert vertices into this batch too
 
587
                                for( int link = 0; link < wavefront.size(); ++link )
 
588
                                {
 
589
                                        btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
 
590
                                        (mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
 
591
                                        (mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
 
592
                                }
 
593
                                placed = true;
 
594
                        }
 
595
                        batch++;
 
596
                }
 
597
                if( batch == wavefrontBatches.size() && !placed )
 
598
                {
 
599
                        wavefrontBatches.resize( batch + 1 );
 
600
                        wavefrontBatches[batch].push_back( waveIndex );
 
601
 
 
602
                        // And resize map as well
 
603
                        mapOfVerticesInBatches.resize( batch + 1 );
 
604
                        
 
605
                        // Resize maps with total number of vertices
 
606
                        mapOfVerticesInBatches[batch].resize( numVertices, false );
 
607
 
 
608
                        // Insert vertices into this batch too
 
609
                        for( int link = 0; link < wavefront.size(); ++link )
 
610
                        {
 
611
                                btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
 
612
                                (mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
 
613
                                (mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
 
614
                        }
 
615
                }
 
616
        }
 
617
        mapOfVerticesInBatches.clear();
 
618
}
 
619
 
 
620
// Function to remove an object from a vector maintaining correct ordering of the vector
 
621
template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
 
622
{
 
623
        int currentSize = vectorToUpdate.size();
 
624
        for( int i = indexToRemove; i < (currentSize-1); ++i )
 
625
        {
 
626
                vectorToUpdate[i] = vectorToUpdate[i+1];
 
627
        }
 
628
        if( currentSize > 0 )
 
629
                vectorToUpdate.resize( currentSize - 1 );
 
630
}
 
631
 
 
632
/**
 
633
 * Insert element into vectorToUpdate at index index.
 
634
 */
 
635
template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
 
636
{
 
637
        vectorToUpdate.resize( vectorToUpdate.size() + 1 );
 
638
        for( int i = (vectorToUpdate.size() - 1); i > index; --i )
 
639
        {
 
640
                vectorToUpdate[i] = vectorToUpdate[i-1];
 
641
        }
 
642
        vectorToUpdate[index] = element;
 
643
}
 
644
 
 
645
/** 
 
646
 * Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.
 
647
 * ie it treats vectorToUpdate as an ordered set.
 
648
 */
 
649
template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
 
650
{
 
651
        int index = 0;
 
652
        while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
 
653
        {
 
654
                index++;
 
655
        }
 
656
        if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
 
657
                insertAtIndex( vectorToUpdate, index, element );
 
658
}
 
659
 
 
660
static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
 
661
{
 
662
        for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
 
663
        {
 
664
                btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
 
665
                numLinksPerVertex[nodes.vertex0]++;
 
666
                numLinksPerVertex[nodes.vertex1]++;
 
667
        }
 
668
        int maxLinksPerVertex = 0;
 
669
        for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
 
670
        {
 
671
                maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
 
672
        }
 
673
        maxLinks = maxLinksPerVertex;
 
674
 
 
675
        btAlignedObjectArray< int > linksFoundPerVertex;
 
676
        linksFoundPerVertex.resize( numVertices, 0 );
 
677
 
 
678
        listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
 
679
 
 
680
        for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
 
681
        {
 
682
                btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
 
683
                {
 
684
                        // Do vertex 0
 
685
                        int vertexIndex = nodes.vertex0;
 
686
                        int linkForVertex = linksFoundPerVertex[nodes.vertex0];
 
687
                        int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
 
688
 
 
689
                        listOfLinksPerVertex[linkAddress] = linkIndex;
 
690
 
 
691
                        linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
 
692
                }
 
693
                {
 
694
                        // Do vertex 1
 
695
                        int vertexIndex = nodes.vertex1;
 
696
                        int linkForVertex = linksFoundPerVertex[nodes.vertex1];
 
697
                        int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
 
698
 
 
699
                        listOfLinksPerVertex[linkAddress] = linkIndex;
 
700
 
 
701
                        linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
 
702
                }
 
703
        }
 
704
}
 
705
 
 
706
static void computeBatchingIntoWavefronts( 
 
707
        btSoftBodyLinkData &linkData, 
 
708
        int wavefrontSize, 
 
709
        int linksPerWorkItem, 
 
710
        int maxLinksPerWavefront, 
 
711
        btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, 
 
712
        btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
 
713
        btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
 
714
        )
 
715
{
 
716
        
 
717
 
 
718
        // Attempt generation of larger batches of links.
 
719
        btAlignedObjectArray< bool > processedLink;
 
720
        processedLink.resize( linkData.getNumLinks() );
 
721
        btAlignedObjectArray< int > listOfLinksPerVertex;
 
722
        int maxLinksPerVertex = 0;
 
723
 
 
724
        // Count num vertices
 
725
        int numVertices = 0;
 
726
        for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
 
727
        {
 
728
                btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
 
729
                numVertices = btMax( numVertices, nodes.vertex0 + 1 );
 
730
                numVertices = btMax( numVertices, nodes.vertex1 + 1 );
 
731
        }
 
732
 
 
733
        // Need list of links per vertex
 
734
        // Compute valence of each vertex
 
735
        btAlignedObjectArray <int> numLinksPerVertex;
 
736
        numLinksPerVertex.resize(0);
 
737
        numLinksPerVertex.resize( numVertices, 0 );
 
738
 
 
739
        generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
 
740
 
 
741
        if (!numVertices)
 
742
                return;
 
743
 
 
744
        for( int vertex = 0; vertex < 10; ++vertex )
 
745
        {
 
746
                for( int link = 0; link < numLinksPerVertex[vertex]; ++link )
 
747
                {
 
748
                        int linkAddress = vertex * maxLinksPerVertex + link;
 
749
                }
 
750
        }
 
751
 
 
752
 
 
753
        // At this point we know what links we have for each vertex so we can start batching
 
754
        
 
755
        // We want a vertex to start with, let's go with 0
 
756
        int currentVertex = 0;
 
757
        int linksProcessed = 0;
 
758
 
 
759
        btAlignedObjectArray <int> verticesToProcess;
 
760
 
 
761
        while( linksProcessed < linkData.getNumLinks() )
 
762
        {
 
763
                // Next wavefront
 
764
                int nextWavefront = linksForWavefronts.size();
 
765
                linksForWavefronts.resize( nextWavefront + 1 );
 
766
                btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
 
767
                verticesForWavefronts.resize( nextWavefront + 1 );
 
768
                btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
 
769
 
 
770
                linksForWavefront.resize(0);
 
771
 
 
772
                // Loop to find enough links to fill the wavefront
 
773
                // Stopping if we either run out of links, or fill it
 
774
                while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
 
775
                {
 
776
                        // Go through the links for the current vertex
 
777
                        for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
 
778
                        {
 
779
                                int linkAddress = currentVertex * maxLinksPerVertex + link;
 
780
                                int linkIndex = listOfLinksPerVertex[linkAddress];
 
781
                                
 
782
                                // If we have not already processed this link, add it to the wavefront
 
783
                                // Claim it as another processed link
 
784
                                // Add the vertex at the far end to the list of vertices to process.
 
785
                                if( !processedLink[linkIndex] )
 
786
                                {
 
787
                                        linksForWavefront.push_back( linkIndex );
 
788
                                        linksProcessed++;
 
789
                                        processedLink[linkIndex] = true;
 
790
                                        int v0 = linkData.getVertexPair(linkIndex).vertex0;
 
791
                                        int v1 = linkData.getVertexPair(linkIndex).vertex1;
 
792
                                        if( v0 == currentVertex )
 
793
                                                verticesToProcess.push_back( v1 );
 
794
                                        else
 
795
                                                verticesToProcess.push_back( v0 );
 
796
                                }
 
797
                        }
 
798
                        if( verticesToProcess.size() > 0 )
 
799
                        {
 
800
                                // Get the element on the front of the queue and remove it
 
801
                                currentVertex = verticesToProcess[0];
 
802
                                removeFromVector( verticesToProcess, 0 );
 
803
                        } else {                
 
804
                                // If we've not yet processed all the links, find the first unprocessed one
 
805
                                // and select one of its vertices as the current vertex
 
806
                                if( linksProcessed < linkData.getNumLinks() )
 
807
                                {
 
808
                                        int searchLink = 0;
 
809
                                        while( processedLink[searchLink] )
 
810
                                                searchLink++;
 
811
                                        currentVertex = linkData.getVertexPair(searchLink).vertex0;
 
812
                                }       
 
813
                        }
 
814
                }
 
815
 
 
816
                // We have either finished or filled a wavefront
 
817
                for( int link = 0; link < linksForWavefront.size(); ++link )
 
818
                {
 
819
                        int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
 
820
                        int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
 
821
                        insertUniqueAndOrderedIntoVector( vertexSet, v0 );
 
822
                        insertUniqueAndOrderedIntoVector( vertexSet, v1 );
 
823
                }
 
824
                // Iterate over links mapped to the wave and batch those
 
825
                // We can run a batch on each cycle trivially
 
826
                
 
827
                batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
 
828
                btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
 
829
                
 
830
 
 
831
                for( int link = 0; link < linksForWavefront.size(); ++link )
 
832
                {
 
833
                        int linkIndex = linksForWavefront[link];
 
834
                        btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
 
835
                        
 
836
                        int batch = 0;
 
837
                        bool placed = false;
 
838
                        while( batch < batchesWithinWave.size() && !placed )
 
839
                        {
 
840
                                bool foundSharedVertex = false;
 
841
                                if( batchesWithinWave[batch].size() >= wavefrontSize )
 
842
                                {
 
843
                                        // If we have already filled this batch, move on to another
 
844
                                        foundSharedVertex = true;
 
845
                                } else {
 
846
                                        for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
 
847
                                        {
 
848
                                                btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
 
849
 
 
850
                                                if( vertices.vertex0 == vertices2.vertex0 ||
 
851
                                                        vertices.vertex1 == vertices2.vertex0 ||
 
852
                                                        vertices.vertex0 == vertices2.vertex1 ||
 
853
                                                        vertices.vertex1 == vertices2.vertex1 )
 
854
                                                {
 
855
                                                        foundSharedVertex = true;
 
856
                                                        break;
 
857
                                                }
 
858
                                        }
 
859
                                }
 
860
                                if( !foundSharedVertex )
 
861
                                {
 
862
                                        batchesWithinWave[batch].push_back( linkIndex );
 
863
                                        placed = true;
 
864
                                } else {
 
865
                                        ++batch;
 
866
                                }
 
867
                        }
 
868
                        if( batch == batchesWithinWave.size() && !placed )
 
869
                        {
 
870
                                batchesWithinWave.resize( batch + 1 );
 
871
                                batchesWithinWave[batch].push_back( linkIndex );
 
872
                        }
 
873
                }
 
874
                
 
875
        }
 
876
 
 
877
}
 
878
 
 
879
void btSoftBodyLinkDataOpenCLSIMDAware::generateBatches()
 
880
{
 
881
        btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
 
882
        btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
 
883
        btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
 
884
        btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
 
885
 
 
886
        // Group the links into wavefronts
 
887
        computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
 
888
 
 
889
 
 
890
        // Batch the wavefronts
 
891
        generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
 
892
 
 
893
        m_numWavefronts = linksForWavefronts.size();
 
894
 
 
895
        // At this point we have a description of which links we need to process in each wavefront
 
896
 
 
897
        // First correctly fill the batch ranges vector
 
898
        int numBatches = wavefrontBatches.size();
 
899
        m_wavefrontBatchStartLengths.resize(0);
 
900
        int prefixSum = 0;
 
901
        for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
 
902
        {
 
903
                int wavesInBatch = wavefrontBatches[batchIndex].size();
 
904
                int nextPrefixSum = prefixSum + wavesInBatch;
 
905
                m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
 
906
 
 
907
                prefixSum += wavesInBatch;
 
908
        }
 
909
        
 
910
        // Also find max number of batches within a wave
 
911
        m_maxBatchesWithinWave = 0;
 
912
        m_maxVerticesWithinWave = 0;
 
913
        m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
 
914
        for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
 
915
        {
 
916
                // See if the number of batches in this wave is greater than the current maxium
 
917
                int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
 
918
                int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
 
919
                m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
 
920
                m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
 
921
        }
 
922
        
 
923
        // Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
 
924
        m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
 
925
 
 
926
        // Now we know the maximum number of vertices per-wave we can resize the global vertices array
 
927
        m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
 
928
 
 
929
        // Grab backup copies of all the link data arrays for the sorting process
 
930
        btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair>                          m_links_Backup(m_links);
 
931
        btAlignedObjectArray<float>                                                                                     m_linkStrength_Backup(m_linkStrength);
 
932
        btAlignedObjectArray<float>                                                                                     m_linksMassLSC_Backup(m_linksMassLSC);
 
933
        btAlignedObjectArray<float>                                                                                     m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
 
934
        //btAlignedObjectArray<Vectormath::Aos::Vector3>                                                m_linksCLength_Backup(m_linksCLength);
 
935
        //btAlignedObjectArray<float>                                                                                   m_linksLengthRatio_Backup(m_linksLengthRatio);
 
936
        btAlignedObjectArray<float>                                                                                     m_linksRestLength_Backup(m_linksRestLength);
 
937
        btAlignedObjectArray<float>                                                                                     m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
 
938
 
 
939
        // Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
 
940
        m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
941
        m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
942
        m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
943
        m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
944
        m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
945
        m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
 
946
        m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); 
 
947
                
 
948
        // Then re-order links into wavefront blocks
 
949
 
 
950
        // Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
 
951
        int wavefrontCount = 0;
 
952
 
 
953
        // Iterate over batches of wavefronts, then wavefronts in the batch
 
954
        for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
 
955
        {
 
956
                btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
 
957
                int wavefrontsInBatch = batch.size();
 
958
 
 
959
                
 
960
                for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
 
961
                {       
 
962
 
 
963
                        int originalWavefrontIndex = batch[wavefrontIndex];
 
964
                        btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
 
965
                        int verticesUsedByWavefront = wavefrontVertices.size();
 
966
 
 
967
                        // Copy the set of vertices into the correctly structured array for use on the device
 
968
                        // Fill the non-vertices with -1s
 
969
                        // so we can mask out those reads
 
970
                        for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
 
971
                        {
 
972
                                m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
 
973
                        }
 
974
                        for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
 
975
                        {
 
976
                                m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
 
977
                        }
 
978
 
 
979
                        // Obtain the set of batches within the current wavefront
 
980
                        btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
 
981
                        // Set the size of the batches for use in the solver, correctly ordered
 
982
                        NumBatchesVerticesPair batchesAndVertices;
 
983
                        batchesAndVertices.numBatches = batchesWithinWavefront.size();
 
984
                        batchesAndVertices.numVertices = verticesUsedByWavefront;
 
985
                        m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
 
986
                        
 
987
 
 
988
                        // Now iterate over batches within the wavefront to structure the links correctly
 
989
                        for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
 
990
                        {
 
991
                                btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
 
992
                                int wavefrontBatchSize = linksInBatch.size();
 
993
 
 
994
                                int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
 
995
 
 
996
                                for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
 
997
                                {
 
998
                                        int originalLinkAddress = linksInBatch[linkIndex];
 
999
                                        // Reorder simple arrays trivially
 
1000
                                        m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
 
1001
                                        m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
 
1002
                                        m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
 
1003
                                        m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
 
1004
                                        m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
 
1005
                                        m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
 
1006
 
 
1007
                                        // The local address is more complicated. We need to work out where a given vertex will end up
 
1008
                                        // by searching the set of vertices for this link and using the index as the local address
 
1009
                                        btSoftBodyLinkData::LinkNodePair localPair;
 
1010
                                        btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
 
1011
                                        localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
 
1012
                                        localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
 
1013
                                        m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
 
1014
                                }
 
1015
                                for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
 
1016
                                {
 
1017
                                        // Put 0s into these arrays for padding for cleanliness
 
1018
                                        m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
 
1019
                                        m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
 
1020
                                        m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
 
1021
                                        m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
 
1022
                                        m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
 
1023
                                        m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
 
1024
 
 
1025
 
 
1026
                                        // For local addresses of junk data choose a set of addresses just above the range of valid ones 
 
1027
                                        // and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
 
1028
                                        // The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
 
1029
                                        // off the end of that range so we need no control
 
1030
                                        btSoftBodyLinkData::LinkNodePair localPair;
 
1031
                                        localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
 
1032
                                        localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
 
1033
                                        m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
 
1034
                                }
 
1035
 
 
1036
                        }
 
1037
 
 
1038
                        
 
1039
                        wavefrontCount++;
 
1040
                }
 
1041
 
 
1042
        
 
1043
        }
 
1044
 
 
1045
} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
 
1046
 
 
1047
 
 
1048