2
This file is part of darktable,
3
copyright (c) 2011 johannes hanika.
5
darktable is free software: you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation, either version 3 of the License, or
8
(at your option) any later version.
10
darktable is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with darktable. If not, see <http://www.gnu.org/licenses/>.
21
#include "develop/imageop.h"
22
#include "dtgtk/slider.h"
24
#include "common/opencl.h"
28
// this is the version of the modules parameters,
29
// and includes version information about compile-time dt
32
typedef struct dt_iop_nlmeans_params_t
34
// these are stored in db.
38
dt_iop_nlmeans_params_t;
40
typedef struct dt_iop_nlmeans_gui_data_t
42
GtkDarktableSlider *luma;
43
GtkDarktableSlider *chroma;
45
dt_iop_nlmeans_gui_data_t;
47
typedef dt_iop_nlmeans_params_t dt_iop_nlmeans_data_t;
49
typedef struct dt_iop_nlmeans_global_data_t
53
dt_iop_nlmeans_global_data_t;
57
return _("denoising (extra slow)");
63
return IOP_GROUP_CORRECT;
66
/** modify regions of interest (optional, per pixel ops don't need this) */
67
// void modify_roi_out(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, dt_iop_roi_t *roi_out, const dt_iop_roi_t *roi_in);
68
// void modify_roi_in(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, const dt_iop_roi_t *roi_out, dt_iop_roi_t *roi_in);
70
static float gh(const float f)
72
// return 0.0001f + dt_fast_expf(-fabsf(f)*800.0f);
73
// return 1.0f/(1.0f + f*f);
74
// make spread bigger: less smoothing
75
const float spread = 100.f;
76
return 1.0f/(1.0f + fabsf(f)*spread);
79
// temporarily disabled, because it is really quite unbearably slow the way it is implemented now..
80
#if 0//def HAVE_OPENCL
82
process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
84
dt_iop_nlmeans_params_t *d = (dt_iop_nlmeans_params_t *)piece->data;
85
dt_iop_nlmeans_global_data_t *gd = (dt_iop_nlmeans_global_data_t *)self->data;
86
const int devid = piece->pipe->devid;
88
const int P = ceilf(3 * roi_in->scale / piece->iscale); // pixel filter size
89
const int K = ceilf(7 * roi_in->scale / piece->iscale); // nbhood
93
size_t origin[] = {0, 0, 0};
94
size_t region[] = {roi_in->width, roi_in->height, 1};
95
err = dt_opencl_enqueue_copy_image(darktable.opencl->dev[devid].cmd_queue, dev_in, dev_out, origin, origin, region, 0, NULL, NULL);
96
if (err != CL_SUCCESS) goto error;
99
float max_L = 100.0f, max_C = 256.0f;
100
float nL = 1.0f/(d->luma*max_L), nC = 1.0f/(d->chroma*max_C);
102
size_t sizes[] = {roi_in->width, roi_in->height, 1};
103
dt_opencl_set_kernel_arg(darktable.opencl, devid, gd->kernel_nlmeans, 0, sizeof(cl_mem), (void *)&dev_in);
104
dt_opencl_set_kernel_arg(darktable.opencl, devid, gd->kernel_nlmeans, 1, sizeof(cl_mem), (void *)&dev_out);
105
dt_opencl_set_kernel_arg(darktable.opencl, devid, gd->kernel_nlmeans, 2, sizeof(int32_t), (void *)&P);
106
dt_opencl_set_kernel_arg(darktable.opencl, devid, gd->kernel_nlmeans, 3, sizeof(int32_t), (void *)&K);
107
dt_opencl_set_kernel_arg(darktable.opencl, devid, gd->kernel_nlmeans, 4, sizeof(float), (void *)&nL);
108
dt_opencl_set_kernel_arg(darktable.opencl, devid, gd->kernel_nlmeans, 5, sizeof(float), (void *)&nC);
109
err = dt_opencl_enqueue_kernel_2d(darktable.opencl, devid, gd->kernel_nlmeans, sizes);
110
if(err != CL_SUCCESS) goto error;
114
dt_print(DT_DEBUG_OPENCL, "[opencl_nlmeans] couldn't enqueue kernel! %d\n", err);
119
/** process, all real work is done here. */
120
void process (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *i, void *o, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
122
// this is called for preview and full pipe separately, each with its own pixelpipe piece.
123
// get our data struct:
124
dt_iop_nlmeans_params_t *d = (dt_iop_nlmeans_params_t *)piece->data;
126
// adjust to zoom size:
127
const int P = ceilf(3 * roi_in->scale / piece->iscale); // pixel filter size
128
const int K = ceilf(7 * roi_in->scale / piece->iscale); // nbhood
131
// nothing to do from this distance:
132
memcpy (o, i, sizeof(float)*4*roi_out->width*roi_out->height);
136
// adjust to Lab, make L more important
137
float max_L = 100.0f, max_C = 256.0f;
138
float nL = 1.0f/(d->luma*max_L), nC = 1.0f/(d->chroma*max_C);
139
const float norm2[4] = { nL*nL, nC*nC, nC*nC, 1.0f };
141
#define SLIDING_WINDOW // brings down time from 15 secs to 3 secs on a core2 duo
142
#ifdef SLIDING_WINDOW
143
float *Sa = dt_alloc_align(64, sizeof(float)*roi_out->width*dt_get_num_threads());
145
float *S = dt_alloc_align(64, sizeof(float)*roi_out->width*roi_out->height);
147
// we want to sum up weights in col[3], so need to init to 0:
148
memset(o, 0x0, sizeof(float)*roi_out->width*roi_out->height*4);
150
// for each shift vector
151
for(int kj=-K;kj<=K;kj++)
153
for(int ki=-K;ki<=K;ki++)
155
#ifdef SLIDING_WINDOW
156
int inited_slide = 0;
157
// don't construct summed area tables but use sliding window! (applies to cpu version res < 1k only, or else we will add up errors)
158
// do this in parallel with a little threading overhead. could parallelize the outer loops with a bit more memory
160
# pragma omp parallel for schedule(static) default(none) firstprivate(inited_slide) shared(kj, ki, roi_out, roi_in, i, o, Sa)
162
for(int j=0; j<roi_out->height; j++)
164
if(j+kj < 0 || j+kj >= roi_out->height) continue;
165
float *S = Sa + dt_get_thread_num() * roi_out->width;
166
const float *ins = ((float *)i) + 4*(roi_in->width *(j+kj) + ki);
167
float *out = ((float *)o) + 4*roi_out->width*j;
169
const int Pm = MIN(MIN(P, j+kj), j);
170
const int PM = MIN(MIN(P, roi_out->height-1-j-kj), roi_out->height-1-j);
171
// first line of every thread
172
// TODO: also every once in a while to assert numerical precision!
176
memset(S, 0x0, sizeof(float)*roi_out->width);
177
for(int jj=-Pm;jj<=PM;jj++)
180
const float *inp = ((float *)i) + 4* roi_in->width *(j+jj);
181
const float *inps = ((float *)i) + 4*(roi_in->width *(j+jj+kj) + ki);
182
for(int i=0; i<roi_out->width; i++)
184
if(i+ki >= 0 && i+ki < roi_out->width)
187
s[0] += (inp[4*i + k] - inps[4*i + k])*(inp[4*i + k] - inps[4*i + k]) * norm2[k];
192
// only reuse this if we had a full stripe
193
if(Pm == P && PM == P) inited_slide = 1;
196
// sliding window for this line:
199
// sum up the first -P..P
200
for(int i=0;i<2*P+1;i++) slide += s[i];
201
for(int i=0; i<roi_out->width; i++)
203
if(i-P > 0 && i+P<roi_out->width)
204
slide += s[P] - s[-P-1];
205
if(i+ki >= 0 && i+ki < roi_out->width)
207
const float w = gh(slide);
208
for(int k=0;k<3;k++) out[k] += ins[k] * w;
215
if(inited_slide && j+P+1+MAX(0,kj) < roi_out->height)
217
// sliding window in j direction:
219
const float *inp = ((float *)i) + 4* roi_in->width *(j+P+1);
220
const float *inps = ((float *)i) + 4*(roi_in->width *(j+P+1+kj) + ki);
221
const float *inm = ((float *)i) + 4* roi_in->width *(j-P);
222
const float *inms = ((float *)i) + 4*(roi_in->width *(j-P+kj) + ki);
223
for(int i=0; i<roi_out->width; i++)
225
if(i+ki >= 0 && i+ki < roi_out->width) for(int k=0;k<3;k++)
226
s[0] += ((inp[4*i + k] - inps[4*i + k])*(inp[4*i + k] - inps[4*i + k])
227
- (inm[4*i + k] - inms[4*i + k])*(inm[4*i + k] - inms[4*i + k])) * norm2[k];
231
else inited_slide = 0;
234
// construct summed area table of weights:
236
#pragma omp parallel for default(none) schedule(static) shared(i,S,roi_in,roi_out,kj,ki)
238
for(int j=0; j<roi_out->height; j++)
240
const float *in = ((float *)i) + 4* roi_in->width * j;
241
const float *ins = ((float *)i) + 4*(roi_in->width *(j+kj) + ki);
242
float *out = ((float *)S) + roi_out->width*j;
243
if(j+kj < 0 || j+kj >= roi_out->height) memset(out, 0x0, sizeof(float)*roi_out->width);
244
else for(int i=0; i<roi_out->width; i++)
246
if(i+ki < 0 || i+ki >= roi_out->width) out[0] = 0.0f;
249
out[0] = (in[0] - ins[0])*(in[0] - ins[0]) * norm2[0];
250
out[0] += (in[1] - ins[1])*(in[1] - ins[1]) * norm2[1];
251
out[0] += (in[2] - ins[2])*(in[2] - ins[2]) * norm2[2];
261
#pragma omp parallel for default(none) schedule(static) shared(S,roi_out)
263
for(int j=0; j<roi_out->height; j++)
266
while(stride < roi_out->width)
268
float *out = ((float *)S) + roi_out->width*j;
269
for(int i=0;i<roi_out->width-stride;i++)
271
out[0] += out[stride];
279
#pragma omp parallel for default(none) schedule(static) shared(S,roi_out)
281
for(int i=0; i<roi_out->width; i++)
284
while(stride < roi_out->height)
287
for(int j=0;j<roi_out->height-stride;j++)
289
out[0] += out[roi_out->width*stride];
290
out += roi_out->width;
295
// now the denoising loop:
297
#pragma omp parallel for default(none) schedule(static) shared(i,o,S,kj,ki,roi_in,roi_out)
299
for(int j=0; j<roi_out->height; j++)
301
if(j+kj < 0 || j+kj >= roi_out->height) continue;
302
const float *in = ((float *)i) + 4*(roi_in->width *(j+kj) + ki);
303
float *out = ((float *)o) + 4*roi_out->width*j;
304
const float *s = S + roi_out->width*j;
305
const int offy = MIN(j, MIN(roi_out->height - j - 1, P)) * roi_out->width;
306
for(int i=0; i<roi_out->width; i++)
308
if(i+ki >= 0 && i+ki < roi_out->width)
310
const int offx = MIN(i, MIN(roi_out->width - i - 1, P));
311
const float m1 = s[offx - offy], m2 = s[- offx + offy], p1 = s[offx + offy], p2 = s[- offx - offy];
312
const float w = gh(p1 + p2 - m1 - m2);
313
for(int k=0;k<3;k++) out[k] += in[k] * w;
326
#pragma omp parallel for default(none) schedule(static) shared(o,roi_out)
328
for(int j=0; j<roi_out->height; j++)
330
float *out = ((float *)o) + 4*roi_out->width*j;
331
for(int i=0; i<roi_out->width; i++)
333
for(int k=0;k<3;k++) out[k] *= 1.0f/out[3];
337
#ifdef SLIDING_WINDOW
338
// free shared tmp memory:
341
// free the summed area table:
346
/** this will be called to init new defaults if a new image is loaded from film strip mode. */
347
void reload_defaults(dt_iop_module_t *module)
349
// our module is disabled by default
350
module->default_enabled = 0;
352
dt_iop_nlmeans_params_t tmp = (dt_iop_nlmeans_params_t)
356
memcpy(module->params, &tmp, sizeof(dt_iop_nlmeans_params_t));
357
memcpy(module->default_params, &tmp, sizeof(dt_iop_nlmeans_params_t));
360
/** init, cleanup, commit to pipeline */
361
void init(dt_iop_module_t *module)
363
module->params = malloc(sizeof(dt_iop_nlmeans_params_t));
364
module->default_params = malloc(sizeof(dt_iop_nlmeans_params_t));
365
// about the first thing to do in Lab space:
366
module->priority = 444; // module order created by iop_dependencies.py, do not edit!
367
module->params_size = sizeof(dt_iop_nlmeans_params_t);
368
module->gui_data = NULL;
372
void cleanup(dt_iop_module_t *module)
374
free(module->gui_data);
375
module->gui_data = NULL; // just to be sure
376
free(module->params);
377
module->params = NULL;
380
void init_global(dt_iop_module_so_t *module)
382
const int program = 5; // nlmeans.cl, from programs.conf
383
dt_iop_nlmeans_global_data_t *gd = (dt_iop_nlmeans_global_data_t *)malloc(sizeof(dt_iop_nlmeans_global_data_t));
385
gd->kernel_nlmeans = dt_opencl_create_kernel(darktable.opencl, program, "nlmeans");
388
void cleanup_global(dt_iop_module_so_t *module)
390
dt_iop_nlmeans_global_data_t *gd = (dt_iop_nlmeans_global_data_t *)module->data;
391
dt_opencl_free_kernel(darktable.opencl, gd->kernel_nlmeans);
396
/** commit is the synch point between core and gui, so it copies params to pipe data. */
397
void commit_params (struct dt_iop_module_t *self, dt_iop_params_t *params, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece)
399
dt_iop_nlmeans_params_t *p = (dt_iop_nlmeans_params_t *)params;
400
dt_iop_nlmeans_data_t *d = (dt_iop_nlmeans_data_t *)piece->data;
401
d->luma = MAX(0.0001f, p->luma);
402
d->chroma = MAX(0.0001f, p->chroma);
405
void init_pipe (struct dt_iop_module_t *self, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece)
407
piece->data = malloc(sizeof(dt_iop_nlmeans_data_t));
408
self->commit_params(self, self->default_params, pipe, piece);
411
void cleanup_pipe (struct dt_iop_module_t *self, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece)
417
luma_callback(GtkRange *range, dt_iop_module_t *self)
419
// this is important to avoid cycles!
420
if(darktable.gui->reset) return;
421
dt_iop_nlmeans_gui_data_t *g = (dt_iop_nlmeans_gui_data_t *)self->gui_data;
422
dt_iop_nlmeans_params_t *p = (dt_iop_nlmeans_params_t *)self->params;
423
p->luma = dtgtk_slider_get_value(g->luma)*(1.0f/100.0f);
424
dt_dev_add_history_item(darktable.develop, self, TRUE);
428
chroma_callback(GtkRange *range, dt_iop_module_t *self)
430
// this is important to avoid cycles!
431
if(darktable.gui->reset) return;
432
dt_iop_nlmeans_gui_data_t *g = (dt_iop_nlmeans_gui_data_t *)self->gui_data;
433
dt_iop_nlmeans_params_t *p = (dt_iop_nlmeans_params_t *)self->params;
434
p->chroma = dtgtk_slider_get_value(g->chroma)*(1.0f/100.0f);
435
dt_dev_add_history_item(darktable.develop, self, TRUE);
438
/** gui callbacks, these are needed. */
439
void gui_update (dt_iop_module_t *self)
441
// let gui slider match current parameters:
442
dt_iop_nlmeans_gui_data_t *g = (dt_iop_nlmeans_gui_data_t *)self->gui_data;
443
dt_iop_nlmeans_params_t *p = (dt_iop_nlmeans_params_t *)self->params;
444
dtgtk_slider_set_value(g->luma, p->luma * 100.f);
445
dtgtk_slider_set_value(g->chroma, p->chroma * 100.f);
448
void gui_init (dt_iop_module_t *self)
450
// init the slider (more sophisticated layouts are possible with gtk tables and boxes):
451
self->gui_data = malloc(sizeof(dt_iop_nlmeans_gui_data_t));
452
dt_iop_nlmeans_gui_data_t *g = (dt_iop_nlmeans_gui_data_t *)self->gui_data;
453
self->widget = gtk_vbox_new(TRUE, DT_GUI_IOP_MODULE_CONTROL_SPACING);
454
// TODO: adjust defaults:
455
g->luma = DTGTK_SLIDER(dtgtk_slider_new_with_range(DARKTABLE_SLIDER_BAR, 0.0f, 100.0f, 1., 50.f, 0));
456
g->chroma = DTGTK_SLIDER(dtgtk_slider_new_with_range(DARKTABLE_SLIDER_BAR, 0.0f, 100.0f, 1., 50.f, 0));
457
dtgtk_slider_set_default_value(g->luma, 50.f);
458
dtgtk_slider_set_default_value(g->chroma, 50.f);
459
gtk_box_pack_start(GTK_BOX(self->widget), GTK_WIDGET(g->luma), TRUE, TRUE, 0);
460
gtk_box_pack_start(GTK_BOX(self->widget), GTK_WIDGET(g->chroma), TRUE, TRUE, 0);
461
dtgtk_slider_set_label(g->luma, _("luma"));
462
dtgtk_slider_set_unit (g->luma, "%");
463
dtgtk_slider_set_label(g->chroma, _("chroma"));
464
dtgtk_slider_set_unit (g->chroma, "%");
465
g_object_set (GTK_OBJECT(g->luma), "tooltip-text", _("how much to smooth brightness"), (char *)NULL);
466
g_object_set (GTK_OBJECT(g->chroma), "tooltip-text", _("how much to smooth colors"), (char *)NULL);
467
g_signal_connect (G_OBJECT (g->luma), "value-changed", G_CALLBACK (luma_callback), self);
468
g_signal_connect (G_OBJECT (g->chroma), "value-changed", G_CALLBACK (chroma_callback), self);
471
void gui_cleanup (dt_iop_module_t *self)
473
// nothing else necessary, gtk will clean up the slider.
474
free(self->gui_data);
475
self->gui_data = NULL;
478
// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-space on;