~siretart/ubuntu/utopic/blender/libav10

« back to all changes in this revision

Viewing changes to intern/cycles/kernel/kernel_mbvh.h

  • Committer: Package Import Robot
  • Author(s): Matteo F. Vescovi
  • Date: 2012-07-23 08:54:18 UTC
  • mfrom: (14.2.16 sid)
  • mto: (14.2.19 sid)
  • mto: This revision was merged to the branch mainline in revision 42.
  • Revision ID: package-import@ubuntu.com-20120723085418-9foz30v6afaf5ffs
Tags: 2.63a-2
* debian/: Cycles support added (Closes: #658075)
  For now, this top feature has been enabled only
  on [any-amd64 any-i386] architectures because
  of OpenImageIO failing on all others
* debian/: scripts installation path changed
  from /usr/lib to /usr/share:
  + debian/patches/: patchset re-worked for path changing
  + debian/control: "Breaks" field added on yafaray-exporter

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright 2011, Blender Foundation.
 
3
 *
 
4
 * This program is free software; you can redistribute it and/or
 
5
 * modify it under the terms of the GNU General Public License
 
6
 * as published by the Free Software Foundation; either version 2
 
7
 * of the License, or (at your option) any later version.
 
8
 *
 
9
 * This program is distributed in the hope that it will be useful,
 
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
12
 * GNU General Public License for more details.
 
13
 *
 
14
 * You should have received a copy of the GNU General Public License
 
15
 * along with this program; if not, write to the Free Software Foundation,
 
16
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
17
 */
 
18
 
 
19
CCL_NAMESPACE_BEGIN
 
20
 
 
21
#define MBVH_OBJECT_SENTINEL 0x76543210
 
22
#define MBVH_NODE_SIZE 8
 
23
#define MBVH_STACK_SIZE 1024
 
24
#define MBVH_RAY_STACK_SIZE 10000
 
25
 
 
26
typedef struct MBVHTask {
 
27
        int node;
 
28
        int index;
 
29
        int num;
 
30
        int object;
 
31
} MBVHTask;
 
32
 
 
33
typedef struct MVBHRay {
 
34
        float3 P;
 
35
        float u;
 
36
        float3 idir;
 
37
        float v;
 
38
        float t;
 
39
        int index;
 
40
        int object;
 
41
 
 
42
        float3 origP;
 
43
        float3 origD;
 
44
        float tmax;
 
45
} MBVHRay;
 
46
 
 
47
__device float3 mbvh_inverse_direction(float3 dir)
 
48
{
 
49
        // Avoid divide by zero (ooeps = exp2f(-80.0f))
 
50
        float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f;
 
51
        float3 idir;
 
52
 
 
53
        idir.x = 1.0f / (fabsf(dir.x) > ooeps ? dir.x : copysignf(ooeps, dir.x));
 
54
        idir.y = 1.0f / (fabsf(dir.y) > ooeps ? dir.y : copysignf(ooeps, dir.y));
 
55
        idir.z = 1.0f / (fabsf(dir.z) > ooeps ? dir.z : copysignf(ooeps, dir.z));
 
56
 
 
57
        return idir;
 
58
}
 
59
 
 
60
__device void mbvh_instance_push(KernelGlobals *kg, int object, MBVHRay *ray)
 
61
{
 
62
        Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 
63
 
 
64
        ray->P = transform_point(&tfm, ray->origP);
 
65
 
 
66
        float3 dir = ray->origD;
 
67
 
 
68
        if(ray->t != ray->tmax) dir *= ray->t;
 
69
 
 
70
        dir = transform_direction(&tfm, dir);
 
71
        ray->idir = mbvh_inverse_direction(normalize(dir));
 
72
 
 
73
        if(ray->t != ray->tmax) ray->t = len(dir);
 
74
}
 
75
 
 
76
__device void mbvh_instance_pop(KernelGlobals *kg, int object, MBVHRay *ray)
 
77
{
 
78
        Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
 
79
 
 
80
        if(ray->t != ray->tmax)
 
81
                ray->t = len(transform_direction(&tfm, (1.0f/(ray->idir)) * (ray->t)));
 
82
 
 
83
        ray->P = ray->origP;
 
84
        ray->idir = mbvh_inverse_direction(ray->origD);
 
85
}
 
86
 
 
87
/* Sven Woop's algorithm */
 
88
__device void mbvh_triangle_intersect(KernelGlobals *kg, MBVHRay *ray, int object, int triAddr)
 
89
{
 
90
        float3 P = ray->P;
 
91
        float3 idir = ray->idir;
 
92
 
 
93
        /* compute and check intersection t-value */
 
94
        float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+0);
 
95
        float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+1);
 
96
        float3 dir = 1.0f/idir;
 
97
 
 
98
        float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
 
99
        float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
 
100
        float t = Oz * invDz;
 
101
 
 
102
        if(t > 0.0f && t < ray->t) {
 
103
                /* compute and check barycentric u */
 
104
                float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
 
105
                float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
 
106
                float u = Ox + t*Dx;
 
107
 
 
108
                if(u >= 0.0f) {
 
109
                        /* compute and check barycentric v */
 
110
                        float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+2);
 
111
                        float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
 
112
                        float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
 
113
                        float v = Oy + t*Dy;
 
114
 
 
115
                        if(v >= 0.0f && u + v <= 1.0f) {
 
116
                                /* record intersection */
 
117
                                ray->index = triAddr;
 
118
                                ray->object = object;
 
119
                                ray->u = u;
 
120
                                ray->v = v;
 
121
                                ray->t = t;
 
122
                        }
 
123
                }
 
124
        }
 
125
}
 
126
 
 
127
__device void mbvh_node_intersect(KernelGlobals *kg, __m128 *traverseChild,
 
128
        __m128 *tHit, float3 P, float3 idir, float t, int nodeAddr)
 
129
{
 
130
        /* X axis */
 
131
        const __m128 bminx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+0);
 
132
        const __m128 t0x = _mm_mul_ps(_mm_sub_ps(bminx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x));
 
133
        const __m128 bmaxx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+1);
 
134
        const __m128 t1x = _mm_mul_ps(_mm_sub_ps(bmaxx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x));
 
135
 
 
136
        __m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
 
137
        __m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
 
138
 
 
139
        /* Y axis */
 
140
        const __m128 bminy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+2);
 
141
        const __m128 t0y = _mm_mul_ps(_mm_sub_ps(bminy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y));
 
142
        const __m128 bmaxy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+3);
 
143
        const __m128 t1y = _mm_mul_ps(_mm_sub_ps(bmaxy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y));
 
144
 
 
145
        tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
 
146
        tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
 
147
 
 
148
        /* Z axis */
 
149
        const __m128 bminz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+4);
 
150
        const __m128 t0z = _mm_mul_ps(_mm_sub_ps(bminz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z));
 
151
        const __m128 bmaxz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+5);
 
152
        const __m128 t1z = _mm_mul_ps(_mm_sub_ps(bmaxz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z));
 
153
 
 
154
        tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
 
155
        tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
 
156
 
 
157
        /* compare and get mask */
 
158
        *traverseChild = _mm_cmple_ps(tmin, tmax);
 
159
 
 
160
        /* get distance XXX probably wrong */
 
161
        *tHit = tmin;
 
162
}
 
163
 
 
164
static void mbvh_sort_by_length(int id[4], float len[4])
 
165
{
 
166
        for(int i = 1; i < 4; i++) {
 
167
                int j = i - 1;
 
168
 
 
169
                while(j >= 0 && len[j] > len[j+1]) {
 
170
                        swap(len[j], len[j+1]);
 
171
                        swap(id[j], id[j+1]);
 
172
                        j--;
 
173
                }
 
174
        }
 
175
}
 
176
 
 
177
__device void scene_intersect(KernelGlobals *kg, MBVHRay *rays, int numrays)
 
178
{
 
179
        /* traversal stacks */
 
180
        MBVHTask task_stack[MBVH_STACK_SIZE];
 
181
        int active_ray_stacks[4][MBVH_RAY_STACK_SIZE];
 
182
        int num_task, num_active[4] = {0, 0, 0, 0};
 
183
        __m128i one_mm = _mm_set1_epi32(1);
 
184
 
 
185
        /* push root node task on stack */
 
186
        task_stack[0].node = kernel_data.bvh.root;
 
187
        task_stack[0].index = 0;
 
188
        task_stack[0].num = numrays;
 
189
        task_stack[0].object = ~0;
 
190
        num_task = 1;
 
191
 
 
192
        /* push all rays in first SIMD lane */
 
193
        for(int i = 0; i < numrays; i++)
 
194
                active_ray_stacks[0][i] = i;
 
195
        num_active[0] = numrays;
 
196
        
 
197
        while(num_task >= 1) {
 
198
                /* pop task */
 
199
                MBVHTask task = task_stack[--num_task];
 
200
 
 
201
                if(task.node == MBVH_OBJECT_SENTINEL) {
 
202
                        /* instance pop */
 
203
 
 
204
                        /* pop rays from stack */
 
205
                        num_active[task.index] -= task.num;
 
206
                        int ray_offset = num_active[task.index];
 
207
 
 
208
                        /* transform rays */
 
209
                        for(int i = 0; i < task.num; i++) {
 
210
                                MBVHRay *ray = &rays[active_ray_stacks[task.index][ray_offset + i]];
 
211
                                mbvh_instance_pop(kg, task.object, ray);
 
212
                        }
 
213
                }
 
214
                else if(task.node >= 0) {
 
215
                        /* inner node? */
 
216
 
 
217
                        /* pop rays from stack*/
 
218
                        num_active[task.index] -= task.num;
 
219
                        int ray_offset = num_active[task.index];
 
220
 
 
221
                        /* initialze simd values */
 
222
                        __m128i num_active_mm = _mm_load_si128((__m128i*)num_active);
 
223
                        __m128 len_mm = _mm_set_ps1(0.0f);
 
224
 
 
225
                        for(int i = 0; i < task.num; i++) {
 
226
                                int rayid = active_ray_stacks[task.index][ray_offset + i];
 
227
                                MVBHRay *ray = rays + rayid;
 
228
 
 
229
                                /* intersect 4 QBVH node children */
 
230
                                __m128 result;
 
231
                                __m128 thit;
 
232
 
 
233
                                mbvh_node_intersect(kg, &result, &thit, ray->P, ray->idir, ray->t, task.node);
 
234
 
 
235
                                /* update length for sorting */
 
236
                                len_mm = _mm_add_ps(len_mm, _mm_and_ps(thit, result));
 
237
 
 
238
                                /* push rays on stack */
 
239
                                for(int j = 0; j < 4; j++)
 
240
                                        active_ray_stacks[j][num_active[j]] = rayid;
 
241
 
 
242
                                /* update num active */
 
243
                                __m128i resulti = _mm_and_si128(*((__m128i*)&result), one_mm);
 
244
                                num_active_mm = _mm_add_epi32(resulti, num_active_mm);
 
245
                                _mm_store_si128((__m128i*)num_active, num_active_mm);
 
246
                        }
 
247
 
 
248
                        if(num_active[0] || num_active[1] || num_active[2] || num_active[3]) {
 
249
                                /* load child node addresses */
 
250
                                float4 cnodes = kernel_tex_fetch(__bvh_nodes, task.node);
 
251
                                int child[4] = {
 
252
                                        __float_as_int(cnodes.x),
 
253
                                        __float_as_int(cnodes.y),
 
254
                                        __float_as_int(cnodes.z),
 
255
                                        __float_as_int(cnodes.w)};
 
256
 
 
257
                                /* sort nodes by average intersection distance */
 
258
                                int ids[4] = {0, 1, 2, 3};
 
259
                                float len[4];
 
260
 
 
261
                                _mm_store_ps(len, len_mm);
 
262
                                mbvh_sort_by_length(ids, len);
 
263
 
 
264
                                /* push new tasks on stack */
 
265
                                for(int j = 0; j < 4; j++) {
 
266
                                        if(num_active[j]) {
 
267
                                                int id = ids[j];
 
268
 
 
269
                                                task_stack[num_task].node = child[id];
 
270
                                                task_stack[num_task].index = id;
 
271
                                                task_stack[num_task].num = num_active[id];
 
272
                                                task_stack[num_task].object = task.object;
 
273
                                                num_task++;
 
274
                                        }
 
275
                                }
 
276
                        }
 
277
                }
 
278
                else {
 
279
                        /* fetch leaf node data */
 
280
                        float4 leaf = kernel_tex_fetch(__bvh_nodes, (-task.node-1)*MBVH_NODE_SIZE+(MBVH_NODE_SIZE-2));
 
281
                        int triAddr = __float_as_int(leaf.x);
 
282
                        int triAddr2 = __float_as_int(leaf.y);
 
283
 
 
284
                        /* pop rays from stack*/
 
285
                        num_active[task.index] -= task.num;
 
286
                        int ray_offset = num_active[task.index];
 
287
 
 
288
                        /* triangles */
 
289
                        if(triAddr >= 0) {
 
290
                                int i, numq = (task.num >> 2) << 2;
 
291
 
 
292
                                /* SIMD ray leaf intersection */
 
293
                                for(i = 0; i < numq; i += 4) {
 
294
                                        MBVHRay *ray4[4] = {
 
295
                                                &rays[active_ray_stacks[task.index][ray_offset + i + 0]],
 
296
                                                &rays[active_ray_stacks[task.index][ray_offset + i + 1]],
 
297
                                                &rays[active_ray_stacks[task.index][ray_offset + i + 2]],
 
298
                                                &rays[active_ray_stacks[task.index][ray_offset + i + 3]]};
 
299
 
 
300
                                        /* load SoA */
 
301
 
 
302
                                        while(triAddr < triAddr2) {
 
303
                                                mbvh_triangle_intersect(ray4[0], task.object, task.node);
 
304
                                                mbvh_triangle_intersect(ray4[1], task.object, task.node);
 
305
                                                mbvh_triangle_intersect(ray4[2], task.object, task.node);
 
306
                                                mbvh_triangle_intersect(ray4[3], task.object, task.node);
 
307
                                                triAddr++;
 
308
 
 
309
                                                /* some shadow ray optim could be done by setting t=0 */
 
310
                                        }
 
311
 
 
312
                                        /* store AoS */
 
313
                                }
 
314
 
 
315
                                /* mono ray leaf intersection */
 
316
                                for(; i < task.num; i++) {
 
317
                                        MBVHRay *ray = &rays[active_ray_stacks[task.index][ray_offset + i]];
 
318
 
 
319
                                        while(triAddr < triAddr2) {
 
320
                                                mbvh_triangle_intersect(kg, ray, task.object, task.node);
 
321
                                                triAddr++;
 
322
                                        }
 
323
                                }
 
324
                        }
 
325
                        else {
 
326
                                /* instance push */
 
327
                                int object = -triAddr-1;
 
328
                                int node = triAddr;
 
329
 
 
330
                                /* push instance pop task */
 
331
                                task_stack[num_task].node = MBVH_OBJECT_SENTINEL;
 
332
                                task_stack[num_task].index = task.index;
 
333
                                task_stack[num_task].num = task.num;
 
334
                                task_stack[num_task].object = object;
 
335
                                num_task++;
 
336
 
 
337
                                num_active[task.index] += task.num;
 
338
 
 
339
                                /* push node task */
 
340
                                task_stack[num_task].node = node;
 
341
                                task_stack[num_task].index = task.index;
 
342
                                task_stack[num_task].num = task.num;
 
343
                                task_stack[num_task].object = object;
 
344
                                num_task++;
 
345
 
 
346
                                for(int i = 0; i < task.num; i++) {
 
347
                                        int rayid = active_ray_stacks[task.index][ray_offset + i];
 
348
 
 
349
                                        /* push on stack for last task */
 
350
                                        active_ray_stacks[task.index][num_active[task.index]] = rayid;
 
351
                                        num_active[task.index]++;
 
352
 
 
353
                                        /* transform ray */
 
354
                                        MBVHRay *ray = &rays[rayid];
 
355
                                        mbvh_instance_push(kg, object, ray);
 
356
                                }
 
357
                        }
 
358
                }
 
359
        }
 
360
}
 
361
 
 
362
__device void mbvh_set_ray(MBVHRay *rays, int i, Ray *ray, float tmax)
 
363
{
 
364
        MBVHRay *mray = &rays[i];
 
365
 
 
366
        /* ray parameters in registers */
 
367
        mray->P = ray->P;
 
368
        mray->idir = mbvh_inverse_direction(ray->D);
 
369
        mray->t = tmax;
 
370
}
 
371
 
 
372
__device bool mbvh_get_intersection(MVBHRay *rays, int i, Intersection *isect, float tmax)
 
373
{
 
374
        MBVHRay *mray = &rays[i];
 
375
 
 
376
        if(mray->t == tmax)
 
377
                return false;
 
378
        
 
379
        isect->t = mray->t;
 
380
        isect->u = mray->u;
 
381
        isect->v = mray->v;
 
382
        isect->index = mray->index;
 
383
        isect->object = mray->object;
 
384
 
 
385
        return true;
 
386
}
 
387
 
 
388
__device bool mbvh_get_shadow(MBVHRay *rays, int i, float tmax)
 
389
{
 
390
        return (rays[i].t == tmax);
 
391
}
 
392
 
 
393
CCL_NAMESPACE_END
 
394