59
59
copy_v3_v3(start, isec->start);
60
60
copy_v3_v3(idot_axis, isec->idot_axis);
62
const __m128 tmin1 = _mm_max_ps(tmin0, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[0]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) );
63
const __m128 tmax1 = _mm_min_ps(tmax0, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[1]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) );
64
const __m128 tmin2 = _mm_max_ps(tmin1, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[2]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) );
65
const __m128 tmax2 = _mm_min_ps(tmax1, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[3]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) );
66
const __m128 tmin3 = _mm_max_ps(tmin2, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[4]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) );
67
const __m128 tmax3 = _mm_min_ps(tmax2, _mm_mul_ps( _mm_sub_ps( bb_group[isec->bv_index[5]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) );
62
const __m128 tmin1 = _mm_max_ps(tmin0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[0]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) );
63
const __m128 tmax1 = _mm_min_ps(tmax0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[1]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) );
64
const __m128 tmin2 = _mm_max_ps(tmin1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[2]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) );
65
const __m128 tmax2 = _mm_min_ps(tmax1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[3]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) );
66
const __m128 tmin3 = _mm_max_ps(tmin2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[4]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) );
67
const __m128 tmax3 = _mm_min_ps(tmax2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[5]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) );
69
69
return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3));
168
166
template<class Node> static inline void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, int &stack_pos);
170
template<class Node,int MAX_STACK_SIZE,bool TEST_ROOT,bool SHADOW>
168
template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT, bool SHADOW>
171
169
static int bvh_node_stack_raycast(Node *root, Isect *isec)
173
171
Node *stack[MAX_STACK_SIZE];
174
172
int hit = 0, stack_pos = 0;
176
if(!TEST_ROOT && !is_leaf(root))
174
if (!TEST_ROOT && !is_leaf(root))
177
175
bvh_node_push_childs(root, isec, stack, stack_pos);
179
177
stack[stack_pos++] = root;
183
180
Node *node = stack[--stack_pos];
186
if(bvh_node_hit_test(node,isec))
181
if (!is_leaf(node)) {
182
if (bvh_node_hit_test(node, isec)) {
188
183
bvh_node_push_childs(node, isec, stack, stack_pos);
189
184
assert(stack_pos <= MAX_STACK_SIZE);
194
hit |= RE_rayobject_intersect( (RayObject*)node, isec);
195
if(SHADOW && hit) return hit;
188
hit |= RE_rayobject_intersect( (RayObject *)node, isec);
189
if (SHADOW && hit) return hit;
205
199
* this was created to be able to use any simd (with the cost of some memmoves)
206
200
* it can take advantage of any SIMD width and doens't needs any special tree care
208
template<class Node,int MAX_STACK_SIZE,bool TEST_ROOT>
202
template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT>
209
203
static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
211
205
Node *stack[MAX_STACK_SIZE];
213
207
int hit = 0, stack_pos = 0;
219
if(!is_leaf(root->child))
210
if (!is_leaf(root)) {
211
if (!is_leaf(root->child))
220
212
bvh_node_push_childs(root, isec, stack, stack_pos);
222
return RE_rayobject_intersect( (RayObject*)root->child, isec);
214
return RE_rayobject_intersect( (RayObject *)root->child, isec);
225
return RE_rayobject_intersect( (RayObject*)root, isec);
217
return RE_rayobject_intersect( (RayObject *)root, isec);
230
221
stack[stack_pos++] = root;
232
return RE_rayobject_intersect( (RayObject*)root, isec);
223
return RE_rayobject_intersect( (RayObject *)root, isec);
228
if (stack_pos >= 4) {
245
234
/* prepare the 4BB for SIMD */
246
t_node[0] = stack[stack_pos+0]->child;
247
t_node[1] = stack[stack_pos+1]->child;
248
t_node[2] = stack[stack_pos+2]->child;
249
t_node[3] = stack[stack_pos+3]->child;
251
const float *bb0 = stack[stack_pos+0]->bb;
252
const float *bb1 = stack[stack_pos+1]->bb;
253
const float *bb2 = stack[stack_pos+2]->bb;
254
const float *bb3 = stack[stack_pos+3]->bb;
256
const __m128 x0y0x1y1 = _mm_shuffle_ps( _mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(1,0,1,0) );
257
const __m128 x2y2x3y3 = _mm_shuffle_ps( _mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(1,0,1,0) );
258
t_bb[0] = _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) );
259
t_bb[1] = _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) );
261
const __m128 z0X0z1X1 = _mm_shuffle_ps( _mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(3,2,3,2) );
262
const __m128 z2X2z3X3 = _mm_shuffle_ps( _mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(3,2,3,2) );
263
t_bb[2] = _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) );
264
t_bb[3] = _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) );
266
const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps( _mm_load_ps(bb0+4), _mm_load_ps(bb1+4), _MM_SHUFFLE(1,0,1,0) );
267
const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps( _mm_load_ps(bb2+4), _mm_load_ps(bb3+4), _MM_SHUFFLE(1,0,1,0) );
268
t_bb[4] = _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) );
269
t_bb[5] = _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) );
235
t_node[0] = stack[stack_pos + 0]->child;
236
t_node[1] = stack[stack_pos + 1]->child;
237
t_node[2] = stack[stack_pos + 2]->child;
238
t_node[3] = stack[stack_pos + 3]->child;
240
const float *bb0 = stack[stack_pos + 0]->bb;
241
const float *bb1 = stack[stack_pos + 1]->bb;
242
const float *bb2 = stack[stack_pos + 2]->bb;
243
const float *bb3 = stack[stack_pos + 3]->bb;
245
const __m128 x0y0x1y1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(1, 0, 1, 0) );
246
const __m128 x2y2x3y3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(1, 0, 1, 0) );
247
t_bb[0] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2, 0, 2, 0) );
248
t_bb[1] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3, 1, 3, 1) );
250
const __m128 z0X0z1X1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(3, 2, 3, 2) );
251
const __m128 z2X2z3X3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(3, 2, 3, 2) );
252
t_bb[2] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2, 0, 2, 0) );
253
t_bb[3] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3, 1, 3, 1) );
255
const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps(_mm_load_ps(bb0 + 4), _mm_load_ps(bb1 + 4), _MM_SHUFFLE(1, 0, 1, 0) );
256
const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps(_mm_load_ps(bb2 + 4), _mm_load_ps(bb3 + 4), _MM_SHUFFLE(1, 0, 1, 0) );
257
t_bb[4] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2, 0, 2, 0) );
258
t_bb[5] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3, 1, 3, 1) );
271
for(int i=0; i<4; i++)
260
for (int i = 0; i < 4; i++)
273
Node *t = stack[stack_pos+i];
262
Node *t = stack[stack_pos + i];
274
263
assert(!is_leaf(t));
276
float *bb = ((float*)t_bb)+i;
265
float *bb = ((float *)t_bb) + i;
266
bb[4 * 0] = t->bb[0];
267
bb[4 * 1] = t->bb[1];
268
bb[4 * 2] = t->bb[2];
269
bb[4 * 3] = t->bb[3];
270
bb[4 * 4] = t->bb[4];
271
bb[4 * 5] = t->bb[5];
283
272
t_node[i] = t->child;
286
275
RE_RC_COUNT(isec->raycounter->simd_bb.test);
287
int res = test_bb_group4( t_bb, isec );
276
int res = test_bb_group4(t_bb, isec);
289
for(int i=0; i<4; i++)
292
RE_RC_COUNT(isec->raycounter->simd_bb.hit);
293
if(!is_leaf(t_node[i]))
295
for(Node *t=t_node[i]; t; t=t->sibling)
297
assert(stack_pos < MAX_STACK_SIZE);
298
stack[stack_pos++] = t;
278
for (int i = 0; i < 4; i++)
279
if (res & (1 << i)) {
280
RE_RC_COUNT(isec->raycounter->simd_bb.hit);
281
if (!is_leaf(t_node[i])) {
282
for (Node *t = t_node[i]; t; t = t->sibling) {
283
assert(stack_pos < MAX_STACK_SIZE);
284
stack[stack_pos++] = t;
288
hit |= RE_rayobject_intersect( (RayObject *)t_node[i], isec);
289
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
303
hit |= RE_rayobject_intersect( (RayObject*)t_node[i], isec);
304
if(hit && isec->mode == RE_RAY_SHADOW) return hit;
308
else if(stack_pos > 0)
293
else if (stack_pos > 0) {
310
294
Node *node = stack[--stack_pos];
311
295
assert(!is_leaf(node));
313
if(bvh_node_hit_test(node,isec))
315
if(!is_leaf(node->child))
297
if (bvh_node_hit_test(node, isec)) {
298
if (!is_leaf(node->child)) {
317
299
bvh_node_push_childs(node, isec, stack, stack_pos);
318
300
assert(stack_pos <= MAX_STACK_SIZE);
322
hit |= RE_rayobject_intersect( (RayObject*)node->child, isec);
323
if(hit && isec->mode == RE_RAY_SHADOW) return hit;
303
hit |= RE_rayobject_intersect( (RayObject *)node->child, isec);
304
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
338
319
static int bvh_node_raycast(Node *node, Isect *isec)
341
if(bvh_test_node(node, isec))
322
if (bvh_test_node(node, isec))
343
if(isec->idot_axis[node->split_axis] > 0.0f)
324
if (isec->idot_axis[node->split_axis] > 0.0f)
346
for(i=0; i<BVH_NCHILDS; i++)
347
if(!is_leaf(node->child[i]))
327
for (i = 0; i < BVH_NCHILDS; i++)
328
if (!is_leaf(node->child[i]))
349
if(node->child[i] == 0) break;
330
if (node->child[i] == 0) break;
351
332
hit |= bvh_node_raycast(node->child[i], isec);
352
if(hit && isec->mode == RE_RAY_SHADOW) return hit;
333
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
356
hit |= RE_rayobject_intersect( (RayObject*)node->child[i], isec);
357
if(hit && isec->mode == RE_RAY_SHADOW) return hit;
336
hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec);
337
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
363
for(i=BVH_NCHILDS-1; i>=0; i--)
364
if(!is_leaf(node->child[i]))
342
for (i = BVH_NCHILDS - 1; i >= 0; i--)
343
if (!is_leaf(node->child[i]))
368
347
hit |= dfs_raycast(node->child[i], isec);
369
if(hit && isec->mode == RE_RAY_SHADOW) return hit;
348
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
374
hit |= RE_rayobject_intersect( (RayObject*)node->child[i], isec);
375
if(hit && isec->mode == RE_RAY_SHADOW) return hit;
352
hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec);
353
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
383
template<class Node,class HintObject>
361
template<class Node, class HintObject>
384
362
void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject *hintObject)
386
assert( hint->size + reserve_space + 1 <= RE_RAY_LCTS_MAX_SIZE );
364
assert(hint->size + reserve_space + 1 <= RE_RAY_LCTS_MAX_SIZE);
390
hint->stack[hint->size++] = (RayObject*)node;
367
hint->stack[hint->size++] = (RayObject *)node;
394
370
int childs = count_childs(node);
395
if(hint->size + reserve_space + childs <= RE_RAY_LCTS_MAX_SIZE)
397
int result = hint_test_bb(hintObject, node->bb, node->bb+3);
398
if(result == HINT_RECURSE)
371
if (hint->size + reserve_space + childs <= RE_RAY_LCTS_MAX_SIZE) {
372
int result = hint_test_bb(hintObject, node->bb, node->bb + 3);
373
if (result == HINT_RECURSE) {
400
374
/* We are 100% sure the ray will be pass inside this node */
401
375
bvh_dfs_make_hint_push_siblings(node->child, hint, reserve_space, hintObject);
403
else if(result == HINT_ACCEPT)
405
hint->stack[hint->size++] = (RayObject*)node;
377
else if (result == HINT_ACCEPT) {
378
hint->stack[hint->size++] = (RayObject *)node;
410
hint->stack[hint->size++] = (RayObject*)node;
382
hint->stack[hint->size++] = (RayObject *)node;
416
388
template<class Tree>
417
static RayObjectAPI* bvh_get_api(int maxstacksize);
389
static RayObjectAPI *bvh_get_api(int maxstacksize);
420
392
template<class Tree, int DFS_STACK_SIZE>
421
393
static inline RayObject *bvh_create_tree(int size)
423
Tree *obj= (Tree*)MEM_callocN(sizeof(Tree), "BVHTree" );
424
assert( RE_rayobject_isAligned(obj) ); /* RayObject API assumes real data to be 4-byte aligned */
395
Tree *obj = (Tree *)MEM_callocN(sizeof(Tree), "BVHTree");
396
assert(RE_rayobject_isAligned(obj)); /* RayObject API assumes real data to be 4-byte aligned */
426
398
obj->rayobj.api = bvh_get_api<Tree>(DFS_STACK_SIZE);
427
399
obj->root = NULL;
429
401
obj->node_arena = NULL;
430
obj->builder = rtbuild_create( size );
402
obj->builder = rtbuild_create(size);
432
return RE_rayobject_unalignRayAPI((RayObject*) obj);
404
return RE_rayobject_unalignRayAPI((RayObject *) obj);