~ubuntu-branches/ubuntu/trusty/blender/trusty

« back to all changes in this revision

Viewing changes to intern/cycles/device/device_cpu.cpp

  • Committer: Package Import Robot
  • Author(s): Jeremy Bicha
  • Date: 2013-03-06 12:08:47 UTC
  • mfrom: (1.5.1) (14.1.8 experimental)
  • Revision ID: package-import@ubuntu.com-20130306120847-frjfaryb2zrotwcg
Tags: 2.66a-1ubuntu1
* Resynchronize with Debian (LP: #1076930, #1089256, #1052743, #999024,
  #1122888, #1147084)
* debian/control:
  - Lower build-depends on libavcodec-dev since we're not
    doing the libav9 transition in Ubuntu yet

Show diffs side-by-side

added added

removed removed

Lines of Context:
23
23
#include "device_intern.h"
24
24
 
25
25
#include "kernel.h"
 
26
#include "kernel_compat_cpu.h"
26
27
#include "kernel_types.h"
 
28
#include "kernel_globals.h"
27
29
 
28
30
#include "osl_shader.h"
 
31
#include "osl_globals.h"
 
32
 
 
33
#include "buffers.h"
29
34
 
30
35
#include "util_debug.h"
31
36
#include "util_foreach.h"
40
45
class CPUDevice : public Device
41
46
{
42
47
public:
43
 
        vector<thread*> threads;
44
 
        ThreadQueue<DeviceTask> tasks;
45
 
        KernelGlobals *kg;
 
48
        TaskPool task_pool;
 
49
        KernelGlobals kernel_globals;
 
50
#ifdef WITH_OSL
 
51
        OSLGlobals osl_globals;
 
52
#endif
46
53
        
47
 
        CPUDevice(int threads_num)
 
54
        CPUDevice(Stats &stats) : Device(stats)
48
55
        {
49
 
                kg = kernel_globals_create();
 
56
#ifdef WITH_OSL
 
57
                kernel_globals.osl = &osl_globals;
 
58
#endif
50
59
 
51
60
                /* do now to avoid thread issues */
52
 
                system_cpu_support_optimized();
53
 
 
54
 
                if(threads_num == 0)
55
 
                        threads_num = system_cpu_thread_count();
56
 
 
57
 
                threads.resize(threads_num);
58
 
 
59
 
                for(size_t i = 0; i < threads.size(); i++)
60
 
                        threads[i] = new thread(function_bind(&CPUDevice::thread_run, this, i));
 
61
                system_cpu_support_sse2();
 
62
                system_cpu_support_sse3();
61
63
        }
62
64
 
63
65
        ~CPUDevice()
64
66
        {
65
 
                tasks.stop();
66
 
 
67
 
                foreach(thread *t, threads) {
68
 
                        t->join();
69
 
                        delete t;
70
 
                }
71
 
 
72
 
                kernel_globals_free(kg);
 
67
                task_pool.stop();
73
68
        }
74
69
 
75
70
        bool support_advanced_shading()
80
75
        void mem_alloc(device_memory& mem, MemoryType type)
81
76
        {
82
77
                mem.device_pointer = mem.data_pointer;
 
78
 
 
79
                stats.mem_alloc(mem.memory_size());
83
80
        }
84
81
 
85
82
        void mem_copy_to(device_memory& mem)
100
97
        void mem_free(device_memory& mem)
101
98
        {
102
99
                mem.device_pointer = 0;
 
100
 
 
101
                stats.mem_free(mem.memory_size());
103
102
        }
104
103
 
105
104
        void const_copy_to(const char *name, void *host, size_t size)
106
105
        {
107
 
                kernel_const_copy(kg, name, host, size);
 
106
                kernel_const_copy(&kernel_globals, name, host, size);
108
107
        }
109
108
 
110
109
        void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
111
110
        {
112
 
                kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
 
111
                kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height);
113
112
                mem.device_pointer = mem.data_pointer;
 
113
 
 
114
                stats.mem_alloc(mem.memory_size());
114
115
        }
115
116
 
116
117
        void tex_free(device_memory& mem)
117
118
        {
118
119
                mem.device_pointer = 0;
 
120
 
 
121
                stats.mem_free(mem.memory_size());
119
122
        }
120
123
 
121
124
        void *osl_memory()
122
125
        {
123
126
#ifdef WITH_OSL
124
 
                return kernel_osl_memory(kg);
 
127
                return &osl_globals;
125
128
#else
126
129
                return NULL;
127
130
#endif
128
131
        }
129
132
 
130
 
        void thread_run(int t)
 
133
        void thread_run(DeviceTask *task)
131
134
        {
132
 
                DeviceTask task;
133
 
 
134
 
                while(tasks.worker_wait_pop(task)) {
135
 
                        if(task.type == DeviceTask::PATH_TRACE)
136
 
                                thread_path_trace(task);
137
 
                        else if(task.type == DeviceTask::TONEMAP)
138
 
                                thread_tonemap(task);
139
 
                        else if(task.type == DeviceTask::SHADER)
140
 
                                thread_shader(task);
141
 
 
142
 
                        tasks.worker_done();
 
135
                if(task->type == DeviceTask::PATH_TRACE)
 
136
                        thread_path_trace(*task);
 
137
                else if(task->type == DeviceTask::TONEMAP)
 
138
                        thread_tonemap(*task);
 
139
                else if(task->type == DeviceTask::SHADER)
 
140
                        thread_shader(*task);
 
141
        }
 
142
 
 
143
        class CPUDeviceTask : public DeviceTask {
 
144
        public:
 
145
                CPUDeviceTask(CPUDevice *device, DeviceTask& task)
 
146
                : DeviceTask(task)
 
147
                {
 
148
                        run = function_bind(&CPUDevice::thread_run, device, this);
143
149
                }
144
 
        }
 
150
        };
145
151
 
146
152
        void thread_path_trace(DeviceTask& task)
147
153
        {
148
 
                if(tasks.worker_cancel())
149
 
                        return;
 
154
                if(task_pool.cancelled()) {
 
155
                        if(task.need_finish_queue == false)
 
156
                                return;
 
157
                }
 
158
 
 
159
                KernelGlobals kg = kernel_globals;
150
160
 
151
161
#ifdef WITH_OSL
152
 
                if(kernel_osl_use(kg))
153
 
                        OSLShader::thread_init(kg);
 
162
                OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
154
163
#endif
155
164
 
 
165
                RenderTile tile;
 
166
                
 
167
                while(task.acquire_tile(this, tile)) {
 
168
                        float *render_buffer = (float*)tile.buffer;
 
169
                        uint *rng_state = (uint*)tile.rng_state;
 
170
                        int start_sample = tile.start_sample;
 
171
                        int end_sample = tile.start_sample + tile.num_samples;
 
172
 
156
173
#ifdef WITH_OPTIMIZED_KERNEL
157
 
                if(system_cpu_support_optimized()) {
158
 
                        for(int y = task.y; y < task.y + task.h; y++) {
159
 
                                for(int x = task.x; x < task.x + task.w; x++)
160
 
                                        kernel_cpu_optimized_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
161
 
                                                task.sample, x, y, task.offset, task.stride);
162
 
 
163
 
                                if(tasks.worker_cancel())
164
 
                                        break;
165
 
                        }
166
 
                }
167
 
                else
 
174
                        if(system_cpu_support_sse3()) {
 
175
                                for(int sample = start_sample; sample < end_sample; sample++) {
 
176
                                        if (task.get_cancel() || task_pool.cancelled()) {
 
177
                                                if(task.need_finish_queue == false)
 
178
                                                        break;
 
179
                                        }
 
180
 
 
181
                                        for(int y = tile.y; y < tile.y + tile.h; y++) {
 
182
                                                for(int x = tile.x; x < tile.x + tile.w; x++) {
 
183
                                                        kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state,
 
184
                                                                sample, x, y, tile.offset, tile.stride);
 
185
                                                }
 
186
                                        }
 
187
 
 
188
                                        tile.sample = sample + 1;
 
189
 
 
190
                                        task.update_progress(tile);
 
191
                                }
 
192
                        }
 
193
                        else if(system_cpu_support_sse2()) {
 
194
                                for(int sample = start_sample; sample < end_sample; sample++) {
 
195
                                        if (task.get_cancel() || task_pool.cancelled()) {
 
196
                                                if(task.need_finish_queue == false)
 
197
                                                        break;
 
198
                                        }
 
199
 
 
200
                                        for(int y = tile.y; y < tile.y + tile.h; y++) {
 
201
                                                for(int x = tile.x; x < tile.x + tile.w; x++) {
 
202
                                                        kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state,
 
203
                                                                sample, x, y, tile.offset, tile.stride);
 
204
                                                }
 
205
                                        }
 
206
 
 
207
                                        tile.sample = sample + 1;
 
208
 
 
209
                                        task.update_progress(tile);
 
210
                                }
 
211
                        }
 
212
                        else
168
213
#endif
169
 
                {
170
 
                        for(int y = task.y; y < task.y + task.h; y++) {
171
 
                                for(int x = task.x; x < task.x + task.w; x++)
172
 
                                        kernel_cpu_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
173
 
                                                task.sample, x, y, task.offset, task.stride);
174
 
 
175
 
                                if(tasks.worker_cancel())
 
214
                        {
 
215
                                for(int sample = start_sample; sample < end_sample; sample++) {
 
216
                                        if (task.get_cancel() || task_pool.cancelled()) {
 
217
                                                if(task.need_finish_queue == false)
 
218
                                                        break;
 
219
                                        }
 
220
 
 
221
                                        for(int y = tile.y; y < tile.y + tile.h; y++) {
 
222
                                                for(int x = tile.x; x < tile.x + tile.w; x++) {
 
223
                                                        kernel_cpu_path_trace(&kg, render_buffer, rng_state,
 
224
                                                                sample, x, y, tile.offset, tile.stride);
 
225
                                                }
 
226
                                        }
 
227
 
 
228
                                        tile.sample = sample + 1;
 
229
 
 
230
                                        task.update_progress(tile);
 
231
                                }
 
232
                        }
 
233
 
 
234
                        task.release_tile(tile);
 
235
 
 
236
                        if(task_pool.cancelled()) {
 
237
                                if(task.need_finish_queue == false)
176
238
                                        break;
177
239
                        }
178
240
                }
179
241
 
180
242
#ifdef WITH_OSL
181
 
                if(kernel_osl_use(kg))
182
 
                        OSLShader::thread_free(kg);
 
243
                OSLShader::thread_free(&kg);
183
244
#endif
184
245
        }
185
246
 
186
247
        void thread_tonemap(DeviceTask& task)
187
248
        {
188
249
#ifdef WITH_OPTIMIZED_KERNEL
189
 
                if(system_cpu_support_optimized()) {
190
 
                        for(int y = task.y; y < task.y + task.h; y++)
191
 
                                for(int x = task.x; x < task.x + task.w; x++)
192
 
                                        kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
 
250
                if(system_cpu_support_sse3()) {
 
251
                        for(int y = task.y; y < task.y + task.h; y++)
 
252
                                for(int x = task.x; x < task.x + task.w; x++)
 
253
                                        kernel_cpu_sse3_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
 
254
                                                task.sample, task.resolution, x, y, task.offset, task.stride);
 
255
                }
 
256
                else if(system_cpu_support_sse2()) {
 
257
                        for(int y = task.y; y < task.y + task.h; y++)
 
258
                                for(int x = task.x; x < task.x + task.w; x++)
 
259
                                        kernel_cpu_sse2_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
193
260
                                                task.sample, task.resolution, x, y, task.offset, task.stride);
194
261
                }
195
262
                else
197
264
                {
198
265
                        for(int y = task.y; y < task.y + task.h; y++)
199
266
                                for(int x = task.x; x < task.x + task.w; x++)
200
 
                                        kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
 
267
                                        kernel_cpu_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
201
268
                                                task.sample, task.resolution, x, y, task.offset, task.stride);
202
269
                }
203
270
        }
204
271
 
205
272
        void thread_shader(DeviceTask& task)
206
273
        {
 
274
                KernelGlobals kg = kernel_globals;
 
275
 
207
276
#ifdef WITH_OSL
208
 
                if(kernel_osl_use(kg))
209
 
                        OSLShader::thread_init(kg);
 
277
                OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
210
278
#endif
211
279
 
212
280
#ifdef WITH_OPTIMIZED_KERNEL
213
 
                if(system_cpu_support_optimized()) {
214
 
                        for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
215
 
                                kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
216
 
 
217
 
                                if(tasks.worker_cancel())
 
281
                if(system_cpu_support_sse3()) {
 
282
                        for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
 
283
                                kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
 
284
 
 
285
                                if(task_pool.cancelled())
 
286
                                        break;
 
287
                        }
 
288
                }
 
289
                else if(system_cpu_support_sse2()) {
 
290
                        for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
 
291
                                kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
 
292
 
 
293
                                if(task_pool.cancelled())
218
294
                                        break;
219
295
                        }
220
296
                }
222
298
#endif
223
299
                {
224
300
                        for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
225
 
                                kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
 
301
                                kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
226
302
 
227
 
                                if(tasks.worker_cancel())
 
303
                                if(task_pool.cancelled())
228
304
                                        break;
229
305
                        }
230
306
                }
231
307
 
232
308
#ifdef WITH_OSL
233
 
                if(kernel_osl_use(kg))
234
 
                        OSLShader::thread_free(kg);
 
309
                OSLShader::thread_free(&kg);
235
310
#endif
236
311
        }
237
312
 
238
313
        void task_add(DeviceTask& task)
239
314
        {
240
315
                /* split task into smaller ones, more than number of threads for uneven
241
 
                   workloads where some parts of the image render slower than others */
242
 
                task.split(tasks, threads.size()*10);
 
316
                 * workloads where some parts of the image render slower than others */
 
317
                list<DeviceTask> tasks;
 
318
                task.split(tasks, TaskScheduler::num_threads());
 
319
 
 
320
                foreach(DeviceTask& task, tasks)
 
321
                        task_pool.push(new CPUDeviceTask(this, task));
243
322
        }
244
323
 
245
324
        void task_wait()
246
325
        {
247
 
                tasks.wait_done();
 
326
                task_pool.wait_work();
248
327
        }
249
328
 
250
329
        void task_cancel()
251
330
        {
252
 
                tasks.cancel();
 
331
                task_pool.cancel();
253
332
        }
254
333
};
255
334
 
256
 
Device *device_cpu_create(DeviceInfo& info, int threads)
 
335
Device *device_cpu_create(DeviceInfo& info, Stats &stats)
257
336
{
258
 
        return new CPUDevice(threads);
 
337
        return new CPUDevice(stats);
259
338
}
260
339
 
261
340
void device_cpu_info(vector<DeviceInfo>& devices)
267
346
        info.id = "CPU";
268
347
        info.num = 0;
269
348
        info.advanced_shading = true;
 
349
        info.pack_images = false;
270
350
 
271
351
        devices.insert(devices.begin(), info);
272
352
}