~ubuntu-branches/ubuntu/jaunty/xvidcap/jaunty-proposed

« back to all changes in this revision

Viewing changes to ffmpeg/libavcodec/libpostproc/postprocess.c

  • Committer: Bazaar Package Importer
  • Author(s): John Dong
  • Date: 2008-02-25 15:47:12 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20080225154712-qvr11ekcea4c9ry8
Tags: 1.1.6-0.1ubuntu1
* Merge from debian-multimedia (LP: #120003), Ubuntu Changes:
 - For ffmpeg-related build-deps, remove cvs from package names.
 - Standards-Version 3.7.3
 - Maintainer Spec

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
    Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 
 
4
 
    This program is free software; you can redistribute it and/or modify
5
 
    it under the terms of the GNU General Public License as published by
6
 
    the Free Software Foundation; either version 2 of the License, or
7
 
    (at your option) any later version.
8
 
 
9
 
    This program is distributed in the hope that it will be useful,
10
 
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
 
    GNU General Public License for more details.
13
 
 
14
 
    You should have received a copy of the GNU General Public License
15
 
    along with this program; if not, write to the Free Software
16
 
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 
*/
18
 
 
19
 
/**
20
 
 * @file postprocess.c
21
 
 * postprocessing.
22
 
 */
23
 
 
24
 
/*
25
 
                        C       MMX     MMX2    3DNow
26
 
isVertDC                Ec      Ec
27
 
isVertMinMaxOk          Ec      Ec
28
 
doVertLowPass           E               e       e
29
 
doVertDefFilter         Ec      Ec      e       e
30
 
isHorizDC               Ec      Ec
31
 
isHorizMinMaxOk         a       E
32
 
doHorizLowPass          E               e       e
33
 
doHorizDefFilter        Ec      Ec      e       e
34
 
deRing                  E               e       e*
35
 
Vertical RKAlgo1        E               a       a
36
 
Horizontal RKAlgo1                      a       a
37
 
Vertical X1#            a               E       E
38
 
Horizontal X1#          a               E       E
39
 
LinIpolDeinterlace      e               E       E*
40
 
CubicIpolDeinterlace    a               e       e*
41
 
LinBlendDeinterlace     e               E       E*
42
 
MedianDeinterlace#      E       Ec      Ec
43
 
TempDeNoiser#           E               e       e
44
 
 
45
 
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
46
 
# more or less selfinvented filters so the exactness isnt too meaningfull
47
 
E = Exact implementation
48
 
e = allmost exact implementation (slightly different rounding,...)
49
 
a = alternative / approximate impl
50
 
c = checked against the other implementations (-vo md5)
51
 
*/
52
 
 
53
 
/*
54
 
TODO:
55
 
reduce the time wasted on the mem transfer
56
 
unroll stuff if instructions depend too much on the prior one
57
 
move YScale thing to the end instead of fixing QP
58
 
write a faster and higher quality deblocking filter :)
59
 
make the mainloop more flexible (variable number of blocks at once
60
 
        (the if/else stuff per block is slowing things down)
61
 
compare the quality & speed of all filters
62
 
split this huge file
63
 
optimize c versions
64
 
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
65
 
...
66
 
*/
67
 
 
68
 
//Changelog: use the CVS log
69
 
 
70
 
#include "config.h"
71
 
#include <inttypes.h>
72
 
#include <stdio.h>
73
 
#include <stdlib.h>
74
 
#include <string.h>
75
 
#ifdef HAVE_MALLOC_H
76
 
#include <malloc.h>
77
 
#endif
78
 
//#undef HAVE_MMX2
79
 
//#define HAVE_3DNOW
80
 
//#undef HAVE_MMX
81
 
//#undef ARCH_X86
82
 
//#define DEBUG_BRIGHTNESS
83
 
#ifdef USE_FASTMEMCPY
84
 
#include "../fastmemcpy.h"
85
 
#endif
86
 
#include "postprocess.h"
87
 
#include "postprocess_internal.h"
88
 
 
89
 
#include "mangle.h" //FIXME should be supressed
90
 
 
91
 
#ifndef HAVE_MEMALIGN
92
 
#define memalign(a,b) malloc(b)
93
 
#endif
94
 
 
95
 
#define MIN(a,b) ((a) > (b) ? (b) : (a))
96
 
#define MAX(a,b) ((a) < (b) ? (b) : (a))
97
 
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
98
 
#define SIGN(a) ((a) > 0 ? 1 : -1)
99
 
 
100
 
#define GET_MODE_BUFFER_SIZE 500
101
 
#define OPTIONS_ARRAY_SIZE 10
102
 
#define BLOCK_SIZE 8
103
 
#define TEMP_STRIDE 8
104
 
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
105
 
 
106
 
#ifdef ARCH_X86
107
 
static uint64_t __attribute__((aligned(8))) w05=                0x0005000500050005LL;
108
 
static uint64_t __attribute__((aligned(8))) w20=                0x0020002000200020LL;
109
 
static uint64_t __attribute__((aligned(8))) b00=                0x0000000000000000LL;
110
 
static uint64_t __attribute__((aligned(8))) b01=                0x0101010101010101LL;
111
 
static uint64_t __attribute__((aligned(8))) b02=                0x0202020202020202LL;
112
 
static uint64_t __attribute__((aligned(8))) b08=                0x0808080808080808LL;
113
 
static uint64_t __attribute__((aligned(8))) b80=                0x8080808080808080LL;
114
 
#endif
115
 
 
116
 
 
117
 
static uint8_t clip_table[3*256];
118
 
static uint8_t * const clip_tab= clip_table + 256;
119
 
 
120
 
static int verbose= 0;
121
 
 
122
 
static const int deringThreshold= 20;
123
 
 
124
 
 
125
 
static struct PPFilter filters[]=
126
 
{
127
 
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
128
 
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
129
 
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
130
 
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
131
 
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
132
 
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
133
 
        {"dr", "dering",                1, 5, 6, DERING},
134
 
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
135
 
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
136
 
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
137
 
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
138
 
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
139
 
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
140
 
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
141
 
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
142
 
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
143
 
        {NULL, NULL,0,0,0,0} //End Marker
144
 
};
145
 
 
146
 
static char *replaceTable[]=
147
 
{
148
 
        "default",      "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
149
 
        "de",           "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
150
 
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
151
 
        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
152
 
        NULL //End Marker
153
 
};
154
 
 
155
 
#ifdef ARCH_X86
156
 
static inline void unusedVariableWarningFixer()
157
 
{
158
 
        if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
159
 
}
160
 
#endif
161
 
 
162
 
 
163
 
#ifdef ARCH_X86
164
 
static inline void prefetchnta(void *p)
165
 
{
166
 
        asm volatile(   "prefetchnta (%0)\n\t"
167
 
                : : "r" (p)
168
 
        );
169
 
}
170
 
 
171
 
static inline void prefetcht0(void *p)
172
 
{
173
 
        asm volatile(   "prefetcht0 (%0)\n\t"
174
 
                : : "r" (p)
175
 
        );
176
 
}
177
 
 
178
 
static inline void prefetcht1(void *p)
179
 
{
180
 
        asm volatile(   "prefetcht1 (%0)\n\t"
181
 
                : : "r" (p)
182
 
        );
183
 
}
184
 
 
185
 
static inline void prefetcht2(void *p)
186
 
{
187
 
        asm volatile(   "prefetcht2 (%0)\n\t"
188
 
                : : "r" (p)
189
 
        );
190
 
}
191
 
#endif
192
 
 
193
 
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
194
 
 
195
 
/**
196
 
 * Check if the given 8x8 Block is mostly "flat"
197
 
 */
198
 
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
199
 
{
200
 
        int numEq= 0;
201
 
        int y;
202
 
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
203
 
        const int dcThreshold= dcOffset*2 + 1;
204
 
 
205
 
        for(y=0; y<BLOCK_SIZE; y++)
206
 
        {
207
 
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
208
 
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
209
 
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
210
 
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
211
 
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
212
 
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
213
 
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
214
 
                src+= stride;
215
 
        }
216
 
        return numEq > c->ppMode.flatnessThreshold;
217
 
}
218
 
 
219
 
/**
220
 
 * Check if the middle 8x8 Block in the given 8x16 block is flat
221
 
 */
222
 
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
223
 
        int numEq= 0;
224
 
        int y;
225
 
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
226
 
        const int dcThreshold= dcOffset*2 + 1;
227
 
 
228
 
        src+= stride*4; // src points to begin of the 8x8 Block
229
 
        for(y=0; y<BLOCK_SIZE-1; y++)
230
 
        {
231
 
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
232
 
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
233
 
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
234
 
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
235
 
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
236
 
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
237
 
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
238
 
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
239
 
                src+= stride;
240
 
        }
241
 
        return numEq > c->ppMode.flatnessThreshold;
242
 
}
243
 
 
244
 
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
245
 
{
246
 
        int i;
247
 
#if 1
248
 
        for(i=0; i<2; i++){
249
 
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
250
 
                src += stride;
251
 
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
252
 
                src += stride;
253
 
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
254
 
                src += stride;
255
 
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
256
 
                src += stride;
257
 
        }
258
 
#else        
259
 
        for(i=0; i<8; i++){
260
 
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
261
 
                src += stride;
262
 
        }
263
 
#endif
264
 
        return 1;
265
 
}
266
 
 
267
 
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
268
 
{
269
 
#if 1
270
 
#if 1
271
 
        int x;
272
 
        src+= stride*4;
273
 
        for(x=0; x<BLOCK_SIZE; x+=4)
274
 
        {
275
 
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
276
 
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
277
 
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
278
 
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
279
 
        }
280
 
#else
281
 
        int x;
282
 
        src+= stride*3;
283
 
        for(x=0; x<BLOCK_SIZE; x++)
284
 
        {
285
 
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
286
 
        }
287
 
#endif
288
 
        return 1;
289
 
#else
290
 
        int x;
291
 
        src+= stride*4;
292
 
        for(x=0; x<BLOCK_SIZE; x++)
293
 
        {
294
 
                int min=255;
295
 
                int max=0;
296
 
                int y;
297
 
                for(y=0; y<8; y++){
298
 
                        int v= src[x + y*stride];
299
 
                        if(v>max) max=v;
300
 
                        if(v<min) min=v;
301
 
                }
302
 
                if(max-min > 2*QP) return 0;
303
 
        }
304
 
        return 1;
305
 
#endif
306
 
}
307
 
 
308
 
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
309
 
        if( isVertDC_C(src, stride, c) ){
310
 
                if( isVertMinMaxOk_C(src, stride, c->QP) )
311
 
                        return 1;
312
 
                else
313
 
                        return 0;
314
 
        }else{
315
 
                return 2;
316
 
        }
317
 
}
318
 
 
319
 
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
320
 
{
321
 
        int y;
322
 
        for(y=0; y<BLOCK_SIZE; y++)
323
 
        {
324
 
                const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
325
 
 
326
 
                if(ABS(middleEnergy) < 8*QP)
327
 
                {
328
 
                        const int q=(dst[3] - dst[4])/2;
329
 
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
330
 
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
331
 
 
332
 
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
333
 
                        d= MAX(d, 0);
334
 
 
335
 
                        d= (5*d + 32) >> 6;
336
 
                        d*= SIGN(-middleEnergy);
337
 
 
338
 
                        if(q>0)
339
 
                        {
340
 
                                d= d<0 ? 0 : d;
341
 
                                d= d>q ? q : d;
342
 
                        }
343
 
                        else
344
 
                        {
345
 
                                d= d>0 ? 0 : d;
346
 
                                d= d<q ? q : d;
347
 
                        }
348
 
 
349
 
                        dst[3]-= d;
350
 
                        dst[4]+= d;
351
 
                }
352
 
                dst+= stride;
353
 
        }
354
 
}
355
 
 
356
 
/**
357
 
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
358
 
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
359
 
 */
360
 
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
361
 
{
362
 
 
363
 
        int y;
364
 
        for(y=0; y<BLOCK_SIZE; y++)
365
 
        {
366
 
                const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
367
 
                const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
368
 
 
369
 
                int sums[9];
370
 
                sums[0] = first + dst[0];
371
 
                sums[1] = dst[0] + dst[1];
372
 
                sums[2] = dst[1] + dst[2];
373
 
                sums[3] = dst[2] + dst[3];
374
 
                sums[4] = dst[3] + dst[4];
375
 
                sums[5] = dst[4] + dst[5];
376
 
                sums[6] = dst[5] + dst[6];
377
 
                sums[7] = dst[6] + dst[7];
378
 
                sums[8] = dst[7] + last;
379
 
 
380
 
                dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
381
 
                dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
382
 
                dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
383
 
                dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
384
 
                dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
385
 
                dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
386
 
                dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
387
 
                dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
388
 
 
389
 
                dst+= stride;
390
 
        }
391
 
}
392
 
 
393
 
/**
394
 
 * Experimental Filter 1 (Horizontal)
395
 
 * will not damage linear gradients
396
 
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
397
 
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
398
 
 * MMX2 version does correct clipping C version doesnt
399
 
 * not identical with the vertical one
400
 
 */
401
 
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
402
 
{
403
 
        int y;
404
 
        static uint64_t *lut= NULL;
405
 
        if(lut==NULL)
406
 
        {
407
 
                int i;
408
 
                lut= (uint64_t*)memalign(8, 256*8);
409
 
                for(i=0; i<256; i++)
410
 
                {
411
 
                        int v= i < 128 ? 2*i : 2*(i-256);
412
 
/*
413
 
//Simulate 112242211 9-Tap filter
414
 
                        uint64_t a= (v/16) & 0xFF;
415
 
                        uint64_t b= (v/8) & 0xFF;
416
 
                        uint64_t c= (v/4) & 0xFF;
417
 
                        uint64_t d= (3*v/8) & 0xFF;
418
 
*/
419
 
//Simulate piecewise linear interpolation
420
 
                        uint64_t a= (v/16) & 0xFF;
421
 
                        uint64_t b= (v*3/16) & 0xFF;
422
 
                        uint64_t c= (v*5/16) & 0xFF;
423
 
                        uint64_t d= (7*v/16) & 0xFF;
424
 
                        uint64_t A= (0x100 - a)&0xFF;
425
 
                        uint64_t B= (0x100 - b)&0xFF;
426
 
                        uint64_t C= (0x100 - c)&0xFF;
427
 
                        uint64_t D= (0x100 - c)&0xFF;
428
 
 
429
 
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
430
 
                                (D<<24) | (C<<16) | (B<<8) | (A);
431
 
                        //lut[i] = (v<<32) | (v<<24);
432
 
                }
433
 
        }
434
 
 
435
 
        for(y=0; y<BLOCK_SIZE; y++)
436
 
        {
437
 
                int a= src[1] - src[2];
438
 
                int b= src[3] - src[4];
439
 
                int c= src[5] - src[6];
440
 
 
441
 
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
442
 
 
443
 
                if(d < QP)
444
 
                {
445
 
                        int v = d * SIGN(-b);
446
 
 
447
 
                        src[1] +=v/8;
448
 
                        src[2] +=v/4;
449
 
                        src[3] +=3*v/8;
450
 
                        src[4] -=3*v/8;
451
 
                        src[5] -=v/4;
452
 
                        src[6] -=v/8;
453
 
 
454
 
                }
455
 
                src+=stride;
456
 
        }
457
 
}
458
 
 
459
 
 
460
 
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
461
 
//Plain C versions
462
 
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
463
 
#define COMPILE_C
464
 
#endif
465
 
 
466
 
#ifdef ARCH_X86
467
 
 
468
 
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
469
 
#define COMPILE_MMX
470
 
#endif
471
 
 
472
 
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
473
 
#define COMPILE_MMX2
474
 
#endif
475
 
 
476
 
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
477
 
#define COMPILE_3DNOW
478
 
#endif
479
 
#endif //ARCH_X86
480
 
 
481
 
#undef HAVE_MMX
482
 
#undef HAVE_MMX2
483
 
#undef HAVE_3DNOW
484
 
#undef ARCH_X86
485
 
 
486
 
#ifdef COMPILE_C
487
 
#undef HAVE_MMX
488
 
#undef HAVE_MMX2
489
 
#undef HAVE_3DNOW
490
 
#undef ARCH_X86
491
 
#define RENAME(a) a ## _C
492
 
#include "postprocess_template.c"
493
 
#endif
494
 
 
495
 
//MMX versions
496
 
#ifdef COMPILE_MMX
497
 
#undef RENAME
498
 
#define HAVE_MMX
499
 
#undef HAVE_MMX2
500
 
#undef HAVE_3DNOW
501
 
#define ARCH_X86
502
 
#define RENAME(a) a ## _MMX
503
 
#include "postprocess_template.c"
504
 
#endif
505
 
 
506
 
//MMX2 versions
507
 
#ifdef COMPILE_MMX2
508
 
#undef RENAME
509
 
#define HAVE_MMX
510
 
#define HAVE_MMX2
511
 
#undef HAVE_3DNOW
512
 
#define ARCH_X86
513
 
#define RENAME(a) a ## _MMX2
514
 
#include "postprocess_template.c"
515
 
#endif
516
 
 
517
 
//3DNOW versions
518
 
#ifdef COMPILE_3DNOW
519
 
#undef RENAME
520
 
#define HAVE_MMX
521
 
#undef HAVE_MMX2
522
 
#define HAVE_3DNOW
523
 
#define ARCH_X86
524
 
#define RENAME(a) a ## _3DNow
525
 
#include "postprocess_template.c"
526
 
#endif
527
 
 
528
 
// minor note: the HAVE_xyz is messed up after that line so dont use it
529
 
 
530
 
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
531
 
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
532
 
{
533
 
        PPContext *c= (PPContext *)vc;
534
 
        PPMode *ppMode= (PPMode *)vm;
535
 
        c->ppMode= *ppMode; //FIXME
536
 
 
537
 
        // useing ifs here as they are faster than function pointers allthough the
538
 
        // difference wouldnt be messureable here but its much better because
539
 
        // someone might exchange the cpu whithout restarting mplayer ;)
540
 
#ifdef RUNTIME_CPUDETECT
541
 
#ifdef ARCH_X86
542
 
        // ordered per speed fasterst first
543
 
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
544
 
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
545
 
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
546
 
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
547
 
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
548
 
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
549
 
        else
550
 
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
551
 
#else
552
 
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
553
 
#endif
554
 
#else //RUNTIME_CPUDETECT
555
 
#ifdef HAVE_MMX2
556
 
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
557
 
#elif defined (HAVE_3DNOW)
558
 
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
559
 
#elif defined (HAVE_MMX)
560
 
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
561
 
#else
562
 
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
563
 
#endif
564
 
#endif //!RUNTIME_CPUDETECT
565
 
}
566
 
 
567
 
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
568
 
//      QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
569
 
 
570
 
/* -pp Command line Help
571
 
*/
572
 
char *pp_help=
573
 
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
574
 
"long form example:\n"
575
 
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
576
 
"short form example:\n"
577
 
"vb:a/hb:a/lb                                   de,-vb\n"
578
 
"more examples:\n"
579
 
"tn:64:128:256\n"
580
 
"Filters                        Options\n"
581
 
"short  long name       short   long option     Description\n"
582
 
"*      *               a       autoq           CPU power dependent enabler\n"
583
 
"                       c       chrom           chrominance filtering enabled\n"
584
 
"                       y       nochrom         chrominance filtering disabled\n"
585
 
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
586
 
"       1. difference factor: default=32, higher -> more deblocking\n"
587
 
"       2. flatness threshold: default=39, lower -> more deblocking\n"
588
 
"                       the h & v deblocking filters share these\n"
589
 
"                       so you can't set different thresholds for h / v\n"
590
 
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
591
 
"h1     x1hdeblock                              experimental h deblock filter 1\n"
592
 
"v1     x1vdeblock                              experimental v deblock filter 1\n"
593
 
"dr     dering                                  deringing filter\n"
594
 
"al     autolevels                              automatic brightness / contrast\n"
595
 
"                       f       fullyrange      stretch luminance to (0..255)\n"
596
 
"lb     linblenddeint                           linear blend deinterlacer\n"
597
 
"li     linipoldeint                            linear interpolating deinterlace\n"
598
 
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
599
 
"md     mediandeint                             median deinterlacer\n"
600
 
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
601
 
"de     default                                 hb:a,vb:a,dr:a,al\n"
602
 
"fa     fast                                    h1:a,v1:a,dr:a,al\n"
603
 
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
604
 
"                       1. <= 2. <= 3.          larger -> stronger filtering\n"
605
 
"fq     forceQuant      <quantizer>             force quantizer\n"
606
 
;
607
 
 
608
 
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
609
 
{
610
 
        char temp[GET_MODE_BUFFER_SIZE];
611
 
        char *p= temp;
612
 
        char *filterDelimiters= ",/";
613
 
        char *optionDelimiters= ":";
614
 
        struct PPMode *ppMode;
615
 
        char *filterToken;
616
 
 
617
 
        ppMode= memalign(8, sizeof(PPMode));
618
 
        
619
 
        ppMode->lumMode= 0;
620
 
        ppMode->chromMode= 0;
621
 
        ppMode->maxTmpNoise[0]= 700;
622
 
        ppMode->maxTmpNoise[1]= 1500;
623
 
        ppMode->maxTmpNoise[2]= 3000;
624
 
        ppMode->maxAllowedY= 234;
625
 
        ppMode->minAllowedY= 16;
626
 
        ppMode->baseDcDiff= 256/8;
627
 
        ppMode->flatnessThreshold= 56-16-1;
628
 
        ppMode->maxClippedThreshold= 0.01;
629
 
        ppMode->error=0;
630
 
 
631
 
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
632
 
 
633
 
        if(verbose>1) printf("pp: %s\n", name);
634
 
 
635
 
        for(;;){
636
 
                char *filterName;
637
 
                int q= 1000000; //PP_QUALITY_MAX;
638
 
                int chrom=-1;
639
 
                char *option;
640
 
                char *options[OPTIONS_ARRAY_SIZE];
641
 
                int i;
642
 
                int filterNameOk=0;
643
 
                int numOfUnknownOptions=0;
644
 
                int enable=1; //does the user want us to enabled or disabled the filter
645
 
 
646
 
                filterToken= strtok(p, filterDelimiters);
647
 
                if(filterToken == NULL) break;
648
 
                p+= strlen(filterToken) + 1; // p points to next filterToken
649
 
                filterName= strtok(filterToken, optionDelimiters);
650
 
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
651
 
 
652
 
                if(*filterName == '-')
653
 
                {
654
 
                        enable=0;
655
 
                        filterName++;
656
 
                }
657
 
 
658
 
                for(;;){ //for all options
659
 
                        option= strtok(NULL, optionDelimiters);
660
 
                        if(option == NULL) break;
661
 
 
662
 
                        if(verbose>1) printf("pp: option: %s\n", option);
663
 
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
664
 
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
665
 
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
666
 
                        else
667
 
                        {
668
 
                                options[numOfUnknownOptions] = option;
669
 
                                numOfUnknownOptions++;
670
 
                        }
671
 
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
672
 
                }
673
 
                options[numOfUnknownOptions] = NULL;
674
 
 
675
 
                /* replace stuff from the replace Table */
676
 
                for(i=0; replaceTable[2*i]!=NULL; i++)
677
 
                {
678
 
                        if(!strcmp(replaceTable[2*i], filterName))
679
 
                        {
680
 
                                int newlen= strlen(replaceTable[2*i + 1]);
681
 
                                int plen;
682
 
                                int spaceLeft;
683
 
 
684
 
                                if(p==NULL) p= temp, *p=0;      //last filter
685
 
                                else p--, *p=',';               //not last filter
686
 
 
687
 
                                plen= strlen(p);
688
 
                                spaceLeft= p - temp + plen;
689
 
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
690
 
                                {
691
 
                                        ppMode->error++;
692
 
                                        break;
693
 
                                }
694
 
                                memmove(p + newlen, p, plen+1);
695
 
                                memcpy(p, replaceTable[2*i + 1], newlen);
696
 
                                filterNameOk=1;
697
 
                        }
698
 
                }
699
 
 
700
 
                for(i=0; filters[i].shortName!=NULL; i++)
701
 
                {
702
 
//                      printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
703
 
                        if(   !strcmp(filters[i].longName, filterName)
704
 
                           || !strcmp(filters[i].shortName, filterName))
705
 
                        {
706
 
                                ppMode->lumMode &= ~filters[i].mask;
707
 
                                ppMode->chromMode &= ~filters[i].mask;
708
 
 
709
 
                                filterNameOk=1;
710
 
                                if(!enable) break; // user wants to disable it
711
 
 
712
 
                                if(q >= filters[i].minLumQuality)
713
 
                                        ppMode->lumMode|= filters[i].mask;
714
 
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
715
 
                                        if(q >= filters[i].minChromQuality)
716
 
                                                ppMode->chromMode|= filters[i].mask;
717
 
 
718
 
                                if(filters[i].mask == LEVEL_FIX)
719
 
                                {
720
 
                                        int o;
721
 
                                        ppMode->minAllowedY= 16;
722
 
                                        ppMode->maxAllowedY= 234;
723
 
                                        for(o=0; options[o]!=NULL; o++)
724
 
                                        {
725
 
                                                if(  !strcmp(options[o],"fullyrange")
726
 
                                                   ||!strcmp(options[o],"f"))
727
 
                                                {
728
 
                                                        ppMode->minAllowedY= 0;
729
 
                                                        ppMode->maxAllowedY= 255;
730
 
                                                        numOfUnknownOptions--;
731
 
                                                }
732
 
                                        }
733
 
                                }
734
 
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
735
 
                                {
736
 
                                        int o;
737
 
                                        int numOfNoises=0;
738
 
 
739
 
                                        for(o=0; options[o]!=NULL; o++)
740
 
                                        {
741
 
                                                char *tail;
742
 
                                                ppMode->maxTmpNoise[numOfNoises]=
743
 
                                                        strtol(options[o], &tail, 0);
744
 
                                                if(tail!=options[o])
745
 
                                                {
746
 
                                                        numOfNoises++;
747
 
                                                        numOfUnknownOptions--;
748
 
                                                        if(numOfNoises >= 3) break;
749
 
                                                }
750
 
                                        }
751
 
                                }
752
 
                                else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
753
 
                                {
754
 
                                        int o;
755
 
 
756
 
                                        for(o=0; options[o]!=NULL && o<2; o++)
757
 
                                        {
758
 
                                                char *tail;
759
 
                                                int val= strtol(options[o], &tail, 0);
760
 
                                                if(tail==options[o]) break;
761
 
 
762
 
                                                numOfUnknownOptions--;
763
 
                                                if(o==0) ppMode->baseDcDiff= val;
764
 
                                                else ppMode->flatnessThreshold= val;
765
 
                                        }
766
 
                                }
767
 
                                else if(filters[i].mask == FORCE_QUANT)
768
 
                                {
769
 
                                        int o;
770
 
                                        ppMode->forcedQuant= 15;
771
 
 
772
 
                                        for(o=0; options[o]!=NULL && o<1; o++)
773
 
                                        {
774
 
                                                char *tail;
775
 
                                                int val= strtol(options[o], &tail, 0);
776
 
                                                if(tail==options[o]) break;
777
 
 
778
 
                                                numOfUnknownOptions--;
779
 
                                                ppMode->forcedQuant= val;
780
 
                                        }
781
 
                                }
782
 
                        }
783
 
                }
784
 
                if(!filterNameOk) ppMode->error++;
785
 
                ppMode->error += numOfUnknownOptions;
786
 
        }
787
 
 
788
 
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
789
 
        if(ppMode->error)
790
 
        {
791
 
                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
792
 
                free(ppMode);
793
 
                return NULL;
794
 
        }
795
 
        return ppMode;
796
 
}
797
 
 
798
 
void pp_free_mode(pp_mode_t *mode){
799
 
    if(mode) free(mode);
800
 
}
801
 
 
802
 
static void reallocAlign(void **p, int alignment, int size){
803
 
        if(*p) free(*p);
804
 
        *p= memalign(alignment, size);
805
 
        memset(*p, 0, size);
806
 
}
807
 
 
808
 
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
809
 
        int mbWidth = (width+15)>>4;
810
 
        int mbHeight= (height+15)>>4;
811
 
        int i;
812
 
 
813
 
        c->stride= stride;
814
 
        c->qpStride= qpStride;
815
 
 
816
 
        reallocAlign((void **)&c->tempDst, 8, stride*24);
817
 
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
818
 
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
819
 
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
820
 
        for(i=0; i<256; i++)
821
 
                c->yHistogram[i]= width*height/64*15/256;
822
 
 
823
 
        for(i=0; i<3; i++)
824
 
        {
825
 
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
826
 
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
827
 
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
828
 
        }
829
 
 
830
 
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
831
 
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
832
 
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
833
 
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
834
 
}
835
 
 
836
 
static void global_init(void){
837
 
        int i;
838
 
        memset(clip_table, 0, 256);
839
 
        for(i=256; i<512; i++)
840
 
                clip_table[i]= i;
841
 
        memset(clip_table+512, 0, 256);
842
 
}
843
 
 
844
 
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
845
 
        PPContext *c= memalign(32, sizeof(PPContext));
846
 
        int stride= (width+15)&(~15); //assumed / will realloc if needed
847
 
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
848
 
        
849
 
        global_init();
850
 
 
851
 
        memset(c, 0, sizeof(PPContext));
852
 
        c->cpuCaps= cpuCaps;
853
 
        if(cpuCaps&PP_FORMAT){
854
 
                c->hChromaSubSample= cpuCaps&0x3;
855
 
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
856
 
        }else{
857
 
                c->hChromaSubSample= 1;
858
 
                c->vChromaSubSample= 1;
859
 
        }
860
 
 
861
 
        reallocBuffers(c, width, height, stride, qpStride);
862
 
        
863
 
        c->frameNum=-1;
864
 
 
865
 
        return c;
866
 
}
867
 
 
868
 
void pp_free_context(void *vc){
869
 
        PPContext *c = (PPContext*)vc;
870
 
        int i;
871
 
        
872
 
        for(i=0; i<3; i++) free(c->tempBlured[i]);
873
 
        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
874
 
        
875
 
        free(c->tempBlocks);
876
 
        free(c->yHistogram);
877
 
        free(c->tempDst);
878
 
        free(c->tempSrc);
879
 
        free(c->deintTemp);
880
 
        free(c->stdQPTable);
881
 
        free(c->nonBQPTable);
882
 
        free(c->forcedQPTable);
883
 
        
884
 
        memset(c, 0, sizeof(PPContext));
885
 
 
886
 
        free(c);
887
 
}
888
 
 
889
 
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
890
 
                 uint8_t * dst[3], int dstStride[3],
891
 
                 int width, int height,
892
 
                 QP_STORE_T *QP_store,  int QPStride,
893
 
                 pp_mode_t *vm,  void *vc, int pict_type)
894
 
{
895
 
        int mbWidth = (width+15)>>4;
896
 
        int mbHeight= (height+15)>>4;
897
 
        PPMode *mode = (PPMode*)vm;
898
 
        PPContext *c = (PPContext*)vc;
899
 
        int minStride= MAX(srcStride[0], dstStride[0]);
900
 
 
901
 
        if(c->stride < minStride || c->qpStride < QPStride)
902
 
                reallocBuffers(c, width, height, 
903
 
                                MAX(minStride, c->stride), 
904
 
                                MAX(c->qpStride, QPStride));
905
 
 
906
 
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
907
 
        {
908
 
                int i;
909
 
                QP_store= c->forcedQPTable;
910
 
                QPStride= 0;
911
 
                if(mode->lumMode & FORCE_QUANT)
912
 
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
913
 
                else
914
 
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
915
 
        }
916
 
//printf("pict_type:%d\n", pict_type);
917
 
 
918
 
        if(pict_type & PP_PICT_TYPE_QP2){
919
 
                int i;
920
 
                const int count= mbHeight * QPStride;
921
 
                for(i=0; i<(count>>2); i++){
922
 
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
923
 
                }
924
 
                for(i<<=2; i<count; i++){
925
 
                        c->stdQPTable[i] = QP_store[i]>>1;
926
 
                }
927
 
                QP_store= c->stdQPTable;
928
 
        }
929
 
 
930
 
if(0){
931
 
int x,y;
932
 
for(y=0; y<mbHeight; y++){
933
 
        for(x=0; x<mbWidth; x++){
934
 
                printf("%2d ", QP_store[x + y*QPStride]);
935
 
        }
936
 
        printf("\n");
937
 
}
938
 
        printf("\n");
939
 
}
940
 
 
941
 
        if((pict_type&7)!=3)
942
 
        {
943
 
                int i;
944
 
                const int count= mbHeight * QPStride;
945
 
                for(i=0; i<(count>>2); i++){
946
 
                        ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x1F1F1F1F;
947
 
                }
948
 
                for(i<<=2; i<count; i++){
949
 
                        c->nonBQPTable[i] = QP_store[i] & 0x1F;
950
 
                }
951
 
        }
952
 
 
953
 
        if(verbose>2)
954
 
        {
955
 
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
956
 
        }
957
 
 
958
 
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
959
 
                width, height, QP_store, QPStride, 0, mode, c);
960
 
 
961
 
        width  = (width )>>c->hChromaSubSample;
962
 
        height = (height)>>c->vChromaSubSample;
963
 
 
964
 
        if(mode->chromMode)
965
 
        {
966
 
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
967
 
                        width, height, QP_store, QPStride, 1, mode, c);
968
 
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
969
 
                        width, height, QP_store, QPStride, 2, mode, c);
970
 
        }
971
 
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
972
 
        {
973
 
                memcpy(dst[1], src[1], srcStride[1]*height);
974
 
                memcpy(dst[2], src[2], srcStride[2]*height);
975
 
        }
976
 
        else
977
 
        {
978
 
                int y;
979
 
                for(y=0; y<height; y++)
980
 
                {
981
 
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
982
 
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
983
 
                }
984
 
        }
985
 
}
986