2
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n
3
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n
8
const int numSoftBodies,
9
__global int * g_vertexClothIdentifier,
10
__global float4 * g_vertexPositions,
11
/* Unfortunately, to get the atomics below to work these arrays cannot be */
12
/* uint4, though that is the layout of the data */
13
/* Therefore this is little-endian-only code */
14
volatile __global uint * g_clothMinBounds,
15
volatile __global uint * g_clothMaxBounds,
16
volatile __local uint * clothMinBounds,
17
volatile __local uint * clothMaxBounds)
19
// Init min and max bounds arrays
20
if( get_local_id(0) < numSoftBodies )
23
clothMinBounds[get_local_id(0)*4] = UINT_MAX;
24
clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
25
clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
26
clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
27
clothMaxBounds[get_local_id(0)*4] = 0;
28
clothMaxBounds[get_local_id(0)*4+1] = 0;
29
clothMaxBounds[get_local_id(0)*4+2] = 0;
30
clothMaxBounds[get_local_id(0)*4+3] = 0;
34
barrier(CLK_LOCAL_MEM_FENCE);
36
int nodeID = get_global_id(0);
37
if( nodeID < numNodes )
39
int clothIdentifier = g_vertexClothIdentifier[nodeID];
40
if( clothIdentifier >= 0 )
43
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
45
/* Reinterpret position as uint */
46
uint4 positionUInt = (uint4)(as_uint(position.x), as_uint(position.y), as_uint(position.z), 0);
48
/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
49
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
50
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
51
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
53
// Min/max with the LDS values
54
atom_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
55
atom_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
56
atom_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
58
atom_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
59
atom_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
60
atom_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
64
barrier(CLK_LOCAL_MEM_FENCE);
67
/* Use global atomics to update the global versions of the data */
68
if( get_local_id(0) < numSoftBodies )
70
/*atom_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
71
atom_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
72
atom_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
73
atom_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
75
atom_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
76
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
77
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);