~ubuntu-branches/ubuntu/jaunty/xvidcap/jaunty-proposed

« back to all changes in this revision

Viewing changes to ffmpeg/libpostproc/postprocess.c

  • Committer: Bazaar Package Importer
  • Author(s): John Dong
  • Date: 2008-02-25 15:47:12 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20080225154712-qvr11ekcea4c9ry8
Tags: 1.1.6-0.1ubuntu1
* Merge from debian-multimedia (LP: #120003), Ubuntu Changes:
 - For ffmpeg-related build-deps, remove cvs from package names.
 - Standards-Version 3.7.3
 - Maintainer Spec

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
 
3
 *
 
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
 
5
 *
 
6
 * This file is part of FFmpeg.
 
7
 *
 
8
 * FFmpeg is free software; you can redistribute it and/or modify
 
9
 * it under the terms of the GNU General Public License as published by
 
10
 * the Free Software Foundation; either version 2 of the License, or
 
11
 * (at your option) any later version.
 
12
 *
 
13
 * FFmpeg is distributed in the hope that it will be useful,
 
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
16
 * GNU General Public License for more details.
 
17
 *
 
18
 * You should have received a copy of the GNU General Public License
 
19
 * along with FFmpeg; if not, write to the Free Software
 
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
21
 */
 
22
 
 
23
/**
 
24
 * @file postprocess.c
 
25
 * postprocessing.
 
26
 */
 
27
 
 
28
/*
 
29
                        C       MMX     MMX2    3DNow   AltiVec
 
30
isVertDC                Ec      Ec                      Ec
 
31
isVertMinMaxOk          Ec      Ec                      Ec
 
32
doVertLowPass           E               e       e       Ec
 
33
doVertDefFilter         Ec      Ec      e       e       Ec
 
34
isHorizDC               Ec      Ec                      Ec
 
35
isHorizMinMaxOk         a       E                       Ec
 
36
doHorizLowPass          E               e       e       Ec
 
37
doHorizDefFilter        Ec      Ec      e       e       Ec
 
38
do_a_deblock            Ec      E       Ec      E
 
39
deRing                  E               e       e*      Ecp
 
40
Vertical RKAlgo1        E               a       a
 
41
Horizontal RKAlgo1                      a       a
 
42
Vertical X1#            a               E       E
 
43
Horizontal X1#          a               E       E
 
44
LinIpolDeinterlace      e               E       E*
 
45
CubicIpolDeinterlace    a               e       e*
 
46
LinBlendDeinterlace     e               E       E*
 
47
MedianDeinterlace#      E       Ec      Ec
 
48
TempDeNoiser#           E               e       e       Ec
 
49
 
 
50
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
 
51
# more or less selfinvented filters so the exactness isnt too meaningfull
 
52
E = Exact implementation
 
53
e = allmost exact implementation (slightly different rounding,...)
 
54
a = alternative / approximate impl
 
55
c = checked against the other implementations (-vo md5)
 
56
p = partially optimized, still some work to do
 
57
*/
 
58
 
 
59
/*
 
60
TODO:
 
61
reduce the time wasted on the mem transfer
 
62
unroll stuff if instructions depend too much on the prior one
 
63
move YScale thing to the end instead of fixing QP
 
64
write a faster and higher quality deblocking filter :)
 
65
make the mainloop more flexible (variable number of blocks at once
 
66
        (the if/else stuff per block is slowing things down)
 
67
compare the quality & speed of all filters
 
68
split this huge file
 
69
optimize c versions
 
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
 
71
...
 
72
*/
 
73
 
 
74
//Changelog: use the Subversion log
 
75
 
 
76
#include "config.h"
 
77
#include "avutil.h"
 
78
#include <inttypes.h>
 
79
#include <stdio.h>
 
80
#include <stdlib.h>
 
81
#include <string.h>
 
82
#ifdef HAVE_MALLOC_H
 
83
#include <malloc.h>
 
84
#endif
 
85
//#undef HAVE_MMX2
 
86
//#define HAVE_3DNOW
 
87
//#undef HAVE_MMX
 
88
//#undef ARCH_X86
 
89
//#define DEBUG_BRIGHTNESS
 
90
#ifdef USE_FASTMEMCPY
 
91
#include "libvo/fastmemcpy.h"
 
92
#endif
 
93
#include "postprocess.h"
 
94
#include "postprocess_internal.h"
 
95
 
 
96
#include "mangle.h" //FIXME should be supressed
 
97
 
 
98
#ifdef HAVE_ALTIVEC_H
 
99
#include <altivec.h>
 
100
#endif
 
101
 
 
102
#define GET_MODE_BUFFER_SIZE 500
 
103
#define OPTIONS_ARRAY_SIZE 10
 
104
#define BLOCK_SIZE 8
 
105
#define TEMP_STRIDE 8
 
106
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 
107
 
 
108
#if defined(ARCH_X86)
 
109
static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
 
110
static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
 
111
static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
 
112
static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
 
113
static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
 
114
static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
 
115
static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
 
116
static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
 
117
#endif
 
118
 
 
119
static uint8_t clip_table[3*256];
 
120
static uint8_t * const clip_tab= clip_table + 256;
 
121
 
 
122
static const int attribute_used deringThreshold= 20;
 
123
 
 
124
 
 
125
static struct PPFilter filters[]=
 
126
{
 
127
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 
128
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 
129
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 
130
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 
131
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 
132
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 
133
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 
134
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 
135
        {"dr", "dering",                1, 5, 6, DERING},
 
136
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 
137
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 
138
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 
139
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 
140
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 
141
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 
142
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 
143
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 
144
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 
145
        {NULL, NULL,0,0,0,0} //End Marker
 
146
};
 
147
 
 
148
static const char *replaceTable[]=
 
149
{
 
150
        "default",      "hdeblock:a,vdeblock:a,dering:a",
 
151
        "de",           "hdeblock:a,vdeblock:a,dering:a",
 
152
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
 
153
        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
 
154
        "ac",           "ha:a:128:7,va:a,dering:a",
 
155
        NULL //End Marker
 
156
};
 
157
 
 
158
 
 
159
#if defined(ARCH_X86)
 
160
static inline void prefetchnta(void *p)
 
161
{
 
162
        asm volatile(   "prefetchnta (%0)\n\t"
 
163
                : : "r" (p)
 
164
        );
 
165
}
 
166
 
 
167
static inline void prefetcht0(void *p)
 
168
{
 
169
        asm volatile(   "prefetcht0 (%0)\n\t"
 
170
                : : "r" (p)
 
171
        );
 
172
}
 
173
 
 
174
static inline void prefetcht1(void *p)
 
175
{
 
176
        asm volatile(   "prefetcht1 (%0)\n\t"
 
177
                : : "r" (p)
 
178
        );
 
179
}
 
180
 
 
181
static inline void prefetcht2(void *p)
 
182
{
 
183
        asm volatile(   "prefetcht2 (%0)\n\t"
 
184
                : : "r" (p)
 
185
        );
 
186
}
 
187
#endif
 
188
 
 
189
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
 
190
 
 
191
/**
 
192
 * Check if the given 8x8 Block is mostly "flat"
 
193
 */
 
194
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
 
195
{
 
196
        int numEq= 0;
 
197
        int y;
 
198
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 
199
        const int dcThreshold= dcOffset*2 + 1;
 
200
 
 
201
        for(y=0; y<BLOCK_SIZE; y++)
 
202
        {
 
203
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
 
204
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
 
205
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
 
206
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
 
207
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
 
208
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
 
209
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
 
210
                src+= stride;
 
211
        }
 
212
        return numEq > c->ppMode.flatnessThreshold;
 
213
}
 
214
 
 
215
/**
 
216
 * Check if the middle 8x8 Block in the given 8x16 block is flat
 
217
 */
 
218
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
 
219
        int numEq= 0;
 
220
        int y;
 
221
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 
222
        const int dcThreshold= dcOffset*2 + 1;
 
223
 
 
224
        src+= stride*4; // src points to begin of the 8x8 Block
 
225
        for(y=0; y<BLOCK_SIZE-1; y++)
 
226
        {
 
227
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
 
228
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
 
229
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
 
230
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
 
231
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
 
232
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
 
233
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
 
234
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
 
235
                src+= stride;
 
236
        }
 
237
        return numEq > c->ppMode.flatnessThreshold;
 
238
}
 
239
 
 
240
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
 
241
{
 
242
        int i;
 
243
#if 1
 
244
        for(i=0; i<2; i++){
 
245
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 
246
                src += stride;
 
247
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 
248
                src += stride;
 
249
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 
250
                src += stride;
 
251
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 
252
                src += stride;
 
253
        }
 
254
#else
 
255
        for(i=0; i<8; i++){
 
256
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
 
257
                src += stride;
 
258
        }
 
259
#endif
 
260
        return 1;
 
261
}
 
262
 
 
263
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
 
264
{
 
265
#if 1
 
266
#if 1
 
267
        int x;
 
268
        src+= stride*4;
 
269
        for(x=0; x<BLOCK_SIZE; x+=4)
 
270
        {
 
271
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 
272
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 
273
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 
274
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 
275
        }
 
276
#else
 
277
        int x;
 
278
        src+= stride*3;
 
279
        for(x=0; x<BLOCK_SIZE; x++)
 
280
        {
 
281
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
 
282
        }
 
283
#endif
 
284
        return 1;
 
285
#else
 
286
        int x;
 
287
        src+= stride*4;
 
288
        for(x=0; x<BLOCK_SIZE; x++)
 
289
        {
 
290
                int min=255;
 
291
                int max=0;
 
292
                int y;
 
293
                for(y=0; y<8; y++){
 
294
                        int v= src[x + y*stride];
 
295
                        if(v>max) max=v;
 
296
                        if(v<min) min=v;
 
297
                }
 
298
                if(max-min > 2*QP) return 0;
 
299
        }
 
300
        return 1;
 
301
#endif
 
302
}
 
303
 
 
304
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
 
305
        if( isHorizDC_C(src, stride, c) ){
 
306
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
 
307
                        return 1;
 
308
                else
 
309
                        return 0;
 
310
        }else{
 
311
                return 2;
 
312
        }
 
313
}
 
314
 
 
315
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
 
316
        if( isVertDC_C(src, stride, c) ){
 
317
                if( isVertMinMaxOk_C(src, stride, c->QP) )
 
318
                        return 1;
 
319
                else
 
320
                        return 0;
 
321
        }else{
 
322
                return 2;
 
323
        }
 
324
}
 
325
 
 
326
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
 
327
{
 
328
        int y;
 
329
        for(y=0; y<BLOCK_SIZE; y++)
 
330
        {
 
331
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 
332
 
 
333
                if(FFABS(middleEnergy) < 8*c->QP)
 
334
                {
 
335
                        const int q=(dst[3] - dst[4])/2;
 
336
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 
337
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 
338
 
 
339
                        int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 
340
                        d= FFMAX(d, 0);
 
341
 
 
342
                        d= (5*d + 32) >> 6;
 
343
                        d*= FFSIGN(-middleEnergy);
 
344
 
 
345
                        if(q>0)
 
346
                        {
 
347
                                d= d<0 ? 0 : d;
 
348
                                d= d>q ? q : d;
 
349
                        }
 
350
                        else
 
351
                        {
 
352
                                d= d>0 ? 0 : d;
 
353
                                d= d<q ? q : d;
 
354
                        }
 
355
 
 
356
                        dst[3]-= d;
 
357
                        dst[4]+= d;
 
358
                }
 
359
                dst+= stride;
 
360
        }
 
361
}
 
362
 
 
363
/**
 
364
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 
365
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 
366
 */
 
367
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
 
368
{
 
369
        int y;
 
370
        for(y=0; y<BLOCK_SIZE; y++)
 
371
        {
 
372
                const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 
373
                const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 
374
 
 
375
                int sums[10];
 
376
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 
377
                sums[1] = sums[0] - first  + dst[3];
 
378
                sums[2] = sums[1] - first  + dst[4];
 
379
                sums[3] = sums[2] - first  + dst[5];
 
380
                sums[4] = sums[3] - first  + dst[6];
 
381
                sums[5] = sums[4] - dst[0] + dst[7];
 
382
                sums[6] = sums[5] - dst[1] + last;
 
383
                sums[7] = sums[6] - dst[2] + last;
 
384
                sums[8] = sums[7] - dst[3] + last;
 
385
                sums[9] = sums[8] - dst[4] + last;
 
386
 
 
387
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 
388
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 
389
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 
390
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 
391
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 
392
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 
393
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 
394
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 
395
 
 
396
                dst+= stride;
 
397
        }
 
398
}
 
399
 
 
400
/**
 
401
 * Experimental Filter 1 (Horizontal)
 
402
 * will not damage linear gradients
 
403
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 
404
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
 
405
 * MMX2 version does correct clipping C version doesnt
 
406
 * not identical with the vertical one
 
407
 */
 
408
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 
409
{
 
410
        int y;
 
411
        static uint64_t *lut= NULL;
 
412
        if(lut==NULL)
 
413
        {
 
414
                int i;
 
415
                lut = av_malloc(256*8);
 
416
                for(i=0; i<256; i++)
 
417
                {
 
418
                        int v= i < 128 ? 2*i : 2*(i-256);
 
419
/*
 
420
//Simulate 112242211 9-Tap filter
 
421
                        uint64_t a= (v/16) & 0xFF;
 
422
                        uint64_t b= (v/8) & 0xFF;
 
423
                        uint64_t c= (v/4) & 0xFF;
 
424
                        uint64_t d= (3*v/8) & 0xFF;
 
425
*/
 
426
//Simulate piecewise linear interpolation
 
427
                        uint64_t a= (v/16) & 0xFF;
 
428
                        uint64_t b= (v*3/16) & 0xFF;
 
429
                        uint64_t c= (v*5/16) & 0xFF;
 
430
                        uint64_t d= (7*v/16) & 0xFF;
 
431
                        uint64_t A= (0x100 - a)&0xFF;
 
432
                        uint64_t B= (0x100 - b)&0xFF;
 
433
                        uint64_t C= (0x100 - c)&0xFF;
 
434
                        uint64_t D= (0x100 - c)&0xFF;
 
435
 
 
436
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 
437
                                (D<<24) | (C<<16) | (B<<8) | (A);
 
438
                        //lut[i] = (v<<32) | (v<<24);
 
439
                }
 
440
        }
 
441
 
 
442
        for(y=0; y<BLOCK_SIZE; y++)
 
443
        {
 
444
                int a= src[1] - src[2];
 
445
                int b= src[3] - src[4];
 
446
                int c= src[5] - src[6];
 
447
 
 
448
                int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 
449
 
 
450
                if(d < QP)
 
451
                {
 
452
                        int v = d * FFSIGN(-b);
 
453
 
 
454
                        src[1] +=v/8;
 
455
                        src[2] +=v/4;
 
456
                        src[3] +=3*v/8;
 
457
                        src[4] -=3*v/8;
 
458
                        src[5] -=v/4;
 
459
                        src[6] -=v/8;
 
460
 
 
461
                }
 
462
                src+=stride;
 
463
        }
 
464
}
 
465
 
 
466
/**
 
467
 * accurate deblock filter
 
468
 */
 
469
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
 
470
        int y;
 
471
        const int QP= c->QP;
 
472
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 
473
        const int dcThreshold= dcOffset*2 + 1;
 
474
//START_TIMER
 
475
        src+= step*4; // src points to begin of the 8x8 Block
 
476
        for(y=0; y<8; y++){
 
477
                int numEq= 0;
 
478
 
 
479
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
 
480
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
 
481
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
 
482
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
 
483
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
 
484
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
 
485
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
 
486
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
 
487
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
 
488
                if(numEq > c->ppMode.flatnessThreshold){
 
489
                        int min, max, x;
 
490
 
 
491
                        if(src[0] > src[step]){
 
492
                            max= src[0];
 
493
                            min= src[step];
 
494
                        }else{
 
495
                            max= src[step];
 
496
                            min= src[0];
 
497
                        }
 
498
                        for(x=2; x<8; x+=2){
 
499
                                if(src[x*step] > src[(x+1)*step]){
 
500
                                        if(src[x    *step] > max) max= src[ x   *step];
 
501
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
 
502
                                }else{
 
503
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
 
504
                                        if(src[ x   *step] < min) min= src[ x   *step];
 
505
                                }
 
506
                        }
 
507
                        if(max-min < 2*QP){
 
508
                                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 
509
                                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 
510
 
 
511
                                int sums[10];
 
512
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 
513
                                sums[1] = sums[0] - first       + src[3*step];
 
514
                                sums[2] = sums[1] - first       + src[4*step];
 
515
                                sums[3] = sums[2] - first       + src[5*step];
 
516
                                sums[4] = sums[3] - first       + src[6*step];
 
517
                                sums[5] = sums[4] - src[0*step] + src[7*step];
 
518
                                sums[6] = sums[5] - src[1*step] + last;
 
519
                                sums[7] = sums[6] - src[2*step] + last;
 
520
                                sums[8] = sums[7] - src[3*step] + last;
 
521
                                sums[9] = sums[8] - src[4*step] + last;
 
522
 
 
523
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 
524
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 
525
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 
526
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 
527
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 
528
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 
529
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 
530
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 
531
                        }
 
532
                }else{
 
533
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 
534
 
 
535
                        if(FFABS(middleEnergy) < 8*QP)
 
536
                        {
 
537
                                const int q=(src[3*step] - src[4*step])/2;
 
538
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 
539
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 
540
 
 
541
                                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 
542
                                d= FFMAX(d, 0);
 
543
 
 
544
                                d= (5*d + 32) >> 6;
 
545
                                d*= FFSIGN(-middleEnergy);
 
546
 
 
547
                                if(q>0)
 
548
                                {
 
549
                                        d= d<0 ? 0 : d;
 
550
                                        d= d>q ? q : d;
 
551
                                }
 
552
                                else
 
553
                                {
 
554
                                        d= d>0 ? 0 : d;
 
555
                                        d= d<q ? q : d;
 
556
                                }
 
557
 
 
558
                                src[3*step]-= d;
 
559
                                src[4*step]+= d;
 
560
                        }
 
561
                }
 
562
 
 
563
                src += stride;
 
564
        }
 
565
/*if(step==16){
 
566
    STOP_TIMER("step16")
 
567
}else{
 
568
    STOP_TIMER("stepX")
 
569
}*/
 
570
}
 
571
 
 
572
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 
573
//Plain C versions
 
574
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
 
575
#define COMPILE_C
 
576
#endif
 
577
 
 
578
#ifdef ARCH_POWERPC
 
579
#ifdef HAVE_ALTIVEC
 
580
#define COMPILE_ALTIVEC
 
581
#endif //HAVE_ALTIVEC
 
582
#endif //ARCH_POWERPC
 
583
 
 
584
#if defined(ARCH_X86)
 
585
 
 
586
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
 
587
#define COMPILE_MMX
 
588
#endif
 
589
 
 
590
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
 
591
#define COMPILE_MMX2
 
592
#endif
 
593
 
 
594
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
 
595
#define COMPILE_3DNOW
 
596
#endif
 
597
#endif /* defined(ARCH_X86) */
 
598
 
 
599
#undef HAVE_MMX
 
600
#undef HAVE_MMX2
 
601
#undef HAVE_3DNOW
 
602
#undef HAVE_ALTIVEC
 
603
 
 
604
#ifdef COMPILE_C
 
605
#undef HAVE_MMX
 
606
#undef HAVE_MMX2
 
607
#undef HAVE_3DNOW
 
608
#define RENAME(a) a ## _C
 
609
#include "postprocess_template.c"
 
610
#endif
 
611
 
 
612
#ifdef ARCH_POWERPC
 
613
#ifdef COMPILE_ALTIVEC
 
614
#undef RENAME
 
615
#define HAVE_ALTIVEC
 
616
#define RENAME(a) a ## _altivec
 
617
#include "postprocess_altivec_template.c"
 
618
#include "postprocess_template.c"
 
619
#endif
 
620
#endif //ARCH_POWERPC
 
621
 
 
622
//MMX versions
 
623
#ifdef COMPILE_MMX
 
624
#undef RENAME
 
625
#define HAVE_MMX
 
626
#undef HAVE_MMX2
 
627
#undef HAVE_3DNOW
 
628
#define RENAME(a) a ## _MMX
 
629
#include "postprocess_template.c"
 
630
#endif
 
631
 
 
632
//MMX2 versions
 
633
#ifdef COMPILE_MMX2
 
634
#undef RENAME
 
635
#define HAVE_MMX
 
636
#define HAVE_MMX2
 
637
#undef HAVE_3DNOW
 
638
#define RENAME(a) a ## _MMX2
 
639
#include "postprocess_template.c"
 
640
#endif
 
641
 
 
642
//3DNOW versions
 
643
#ifdef COMPILE_3DNOW
 
644
#undef RENAME
 
645
#define HAVE_MMX
 
646
#undef HAVE_MMX2
 
647
#define HAVE_3DNOW
 
648
#define RENAME(a) a ## _3DNow
 
649
#include "postprocess_template.c"
 
650
#endif
 
651
 
 
652
// minor note: the HAVE_xyz is messed up after that line so dont use it
 
653
 
 
654
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 
655
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
 
656
{
 
657
        PPContext *c= (PPContext *)vc;
 
658
        PPMode *ppMode= (PPMode *)vm;
 
659
        c->ppMode= *ppMode; //FIXME
 
660
 
 
661
        // useing ifs here as they are faster than function pointers allthough the
 
662
        // difference wouldnt be messureable here but its much better because
 
663
        // someone might exchange the cpu whithout restarting mplayer ;)
 
664
#ifdef RUNTIME_CPUDETECT
 
665
#if defined(ARCH_X86)
 
666
        // ordered per speed fasterst first
 
667
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
 
668
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
669
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
 
670
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
671
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
 
672
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
673
        else
 
674
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
675
#else
 
676
#ifdef ARCH_POWERPC
 
677
#ifdef HAVE_ALTIVEC
 
678
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
 
679
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
680
        else
 
681
#endif
 
682
#endif
 
683
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
684
#endif
 
685
#else //RUNTIME_CPUDETECT
 
686
#ifdef HAVE_MMX2
 
687
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
688
#elif defined (HAVE_3DNOW)
 
689
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
690
#elif defined (HAVE_MMX)
 
691
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
692
#elif defined (HAVE_ALTIVEC)
 
693
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
694
#else
 
695
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 
696
#endif
 
697
#endif //!RUNTIME_CPUDETECT
 
698
}
 
699
 
 
700
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 
701
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
 
702
 
 
703
/* -pp Command line Help
 
704
*/
 
705
char *pp_help=
 
706
"Available postprocessing filters:\n"
 
707
"Filters                        Options\n"
 
708
"short  long name       short   long option     Description\n"
 
709
"*      *               a       autoq           CPU power dependent enabler\n"
 
710
"                       c       chrom           chrominance filtering enabled\n"
 
711
"                       y       nochrom         chrominance filtering disabled\n"
 
712
"                       n       noluma          luma filtering disabled\n"
 
713
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 
714
"       1. difference factor: default=32, higher -> more deblocking\n"
 
715
"       2. flatness threshold: default=39, lower -> more deblocking\n"
 
716
"                       the h & v deblocking filters share these\n"
 
717
"                       so you can't set different thresholds for h / v\n"
 
718
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 
719
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 
720
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 
721
"h1     x1hdeblock                              experimental h deblock filter 1\n"
 
722
"v1     x1vdeblock                              experimental v deblock filter 1\n"
 
723
"dr     dering                                  deringing filter\n"
 
724
"al     autolevels                              automatic brightness / contrast\n"
 
725
"                       f        fullyrange     stretch luminance to (0..255)\n"
 
726
"lb     linblenddeint                           linear blend deinterlacer\n"
 
727
"li     linipoldeint                            linear interpolating deinterlace\n"
 
728
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 
729
"md     mediandeint                             median deinterlacer\n"
 
730
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 
731
"l5     lowpass5                                FIR lowpass deinterlacer\n"
 
732
"de     default                                 hb:a,vb:a,dr:a\n"
 
733
"fa     fast                                    h1:a,v1:a,dr:a\n"
 
734
"ac                                             ha:a:128:7,va:a,dr:a\n"
 
735
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 
736
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 
737
"fq     forceQuant      <quantizer>             force quantizer\n"
 
738
"Usage:\n"
 
739
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 
740
"long form example:\n"
 
741
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 
742
"short form example:\n"
 
743
"vb:a/hb:a/lb                                   de,-vb\n"
 
744
"more examples:\n"
 
745
"tn:64:128:256\n"
 
746
"\n"
 
747
;
 
748
 
 
749
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
 
750
{
 
751
        char temp[GET_MODE_BUFFER_SIZE];
 
752
        char *p= temp;
 
753
        const char *filterDelimiters= ",/";
 
754
        const char *optionDelimiters= ":";
 
755
        struct PPMode *ppMode;
 
756
        char *filterToken;
 
757
 
 
758
        ppMode= av_malloc(sizeof(PPMode));
 
759
 
 
760
        ppMode->lumMode= 0;
 
761
        ppMode->chromMode= 0;
 
762
        ppMode->maxTmpNoise[0]= 700;
 
763
        ppMode->maxTmpNoise[1]= 1500;
 
764
        ppMode->maxTmpNoise[2]= 3000;
 
765
        ppMode->maxAllowedY= 234;
 
766
        ppMode->minAllowedY= 16;
 
767
        ppMode->baseDcDiff= 256/8;
 
768
        ppMode->flatnessThreshold= 56-16-1;
 
769
        ppMode->maxClippedThreshold= 0.01;
 
770
        ppMode->error=0;
 
771
 
 
772
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
 
773
 
 
774
        av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 
775
 
 
776
        for(;;){
 
777
                char *filterName;
 
778
                int q= 1000000; //PP_QUALITY_MAX;
 
779
                int chrom=-1;
 
780
                int luma=-1;
 
781
                char *option;
 
782
                char *options[OPTIONS_ARRAY_SIZE];
 
783
                int i;
 
784
                int filterNameOk=0;
 
785
                int numOfUnknownOptions=0;
 
786
                int enable=1; //does the user want us to enabled or disabled the filter
 
787
 
 
788
                filterToken= strtok(p, filterDelimiters);
 
789
                if(filterToken == NULL) break;
 
790
                p+= strlen(filterToken) + 1; // p points to next filterToken
 
791
                filterName= strtok(filterToken, optionDelimiters);
 
792
                av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 
793
 
 
794
                if(*filterName == '-')
 
795
                {
 
796
                        enable=0;
 
797
                        filterName++;
 
798
                }
 
799
 
 
800
                for(;;){ //for all options
 
801
                        option= strtok(NULL, optionDelimiters);
 
802
                        if(option == NULL) break;
 
803
 
 
804
                        av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 
805
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 
806
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 
807
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 
808
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 
809
                        else
 
810
                        {
 
811
                                options[numOfUnknownOptions] = option;
 
812
                                numOfUnknownOptions++;
 
813
                        }
 
814
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 
815
                }
 
816
                options[numOfUnknownOptions] = NULL;
 
817
 
 
818
                /* replace stuff from the replace Table */
 
819
                for(i=0; replaceTable[2*i]!=NULL; i++)
 
820
                {
 
821
                        if(!strcmp(replaceTable[2*i], filterName))
 
822
                        {
 
823
                                int newlen= strlen(replaceTable[2*i + 1]);
 
824
                                int plen;
 
825
                                int spaceLeft;
 
826
 
 
827
                                if(p==NULL) p= temp, *p=0;      //last filter
 
828
                                else p--, *p=',';               //not last filter
 
829
 
 
830
                                plen= strlen(p);
 
831
                                spaceLeft= p - temp + plen;
 
832
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
 
833
                                {
 
834
                                        ppMode->error++;
 
835
                                        break;
 
836
                                }
 
837
                                memmove(p + newlen, p, plen+1);
 
838
                                memcpy(p, replaceTable[2*i + 1], newlen);
 
839
                                filterNameOk=1;
 
840
                        }
 
841
                }
 
842
 
 
843
                for(i=0; filters[i].shortName!=NULL; i++)
 
844
                {
 
845
                        if(   !strcmp(filters[i].longName, filterName)
 
846
                           || !strcmp(filters[i].shortName, filterName))
 
847
                        {
 
848
                                ppMode->lumMode &= ~filters[i].mask;
 
849
                                ppMode->chromMode &= ~filters[i].mask;
 
850
 
 
851
                                filterNameOk=1;
 
852
                                if(!enable) break; // user wants to disable it
 
853
 
 
854
                                if(q >= filters[i].minLumQuality && luma)
 
855
                                        ppMode->lumMode|= filters[i].mask;
 
856
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 
857
                                        if(q >= filters[i].minChromQuality)
 
858
                                                ppMode->chromMode|= filters[i].mask;
 
859
 
 
860
                                if(filters[i].mask == LEVEL_FIX)
 
861
                                {
 
862
                                        int o;
 
863
                                        ppMode->minAllowedY= 16;
 
864
                                        ppMode->maxAllowedY= 234;
 
865
                                        for(o=0; options[o]!=NULL; o++)
 
866
                                        {
 
867
                                                if(  !strcmp(options[o],"fullyrange")
 
868
                                                   ||!strcmp(options[o],"f"))
 
869
                                                {
 
870
                                                        ppMode->minAllowedY= 0;
 
871
                                                        ppMode->maxAllowedY= 255;
 
872
                                                        numOfUnknownOptions--;
 
873
                                                }
 
874
                                        }
 
875
                                }
 
876
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
 
877
                                {
 
878
                                        int o;
 
879
                                        int numOfNoises=0;
 
880
 
 
881
                                        for(o=0; options[o]!=NULL; o++)
 
882
                                        {
 
883
                                                char *tail;
 
884
                                                ppMode->maxTmpNoise[numOfNoises]=
 
885
                                                        strtol(options[o], &tail, 0);
 
886
                                                if(tail!=options[o])
 
887
                                                {
 
888
                                                        numOfNoises++;
 
889
                                                        numOfUnknownOptions--;
 
890
                                                        if(numOfNoises >= 3) break;
 
891
                                                }
 
892
                                        }
 
893
                                }
 
894
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 
895
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
 
896
                                {
 
897
                                        int o;
 
898
 
 
899
                                        for(o=0; options[o]!=NULL && o<2; o++)
 
900
                                        {
 
901
                                                char *tail;
 
902
                                                int val= strtol(options[o], &tail, 0);
 
903
                                                if(tail==options[o]) break;
 
904
 
 
905
                                                numOfUnknownOptions--;
 
906
                                                if(o==0) ppMode->baseDcDiff= val;
 
907
                                                else ppMode->flatnessThreshold= val;
 
908
                                        }
 
909
                                }
 
910
                                else if(filters[i].mask == FORCE_QUANT)
 
911
                                {
 
912
                                        int o;
 
913
                                        ppMode->forcedQuant= 15;
 
914
 
 
915
                                        for(o=0; options[o]!=NULL && o<1; o++)
 
916
                                        {
 
917
                                                char *tail;
 
918
                                                int val= strtol(options[o], &tail, 0);
 
919
                                                if(tail==options[o]) break;
 
920
 
 
921
                                                numOfUnknownOptions--;
 
922
                                                ppMode->forcedQuant= val;
 
923
                                        }
 
924
                                }
 
925
                        }
 
926
                }
 
927
                if(!filterNameOk) ppMode->error++;
 
928
                ppMode->error += numOfUnknownOptions;
 
929
        }
 
930
 
 
931
        av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 
932
        if(ppMode->error)
 
933
        {
 
934
                av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 
935
                av_free(ppMode);
 
936
                return NULL;
 
937
        }
 
938
        return ppMode;
 
939
}
 
940
 
 
941
void pp_free_mode(pp_mode_t *mode){
 
942
    av_free(mode);
 
943
}
 
944
 
 
945
static void reallocAlign(void **p, int alignment, int size){
 
946
        av_free(*p);
 
947
        *p= av_mallocz(size);
 
948
}
 
949
 
 
950
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 
951
        int mbWidth = (width+15)>>4;
 
952
        int mbHeight= (height+15)>>4;
 
953
        int i;
 
954
 
 
955
        c->stride= stride;
 
956
        c->qpStride= qpStride;
 
957
 
 
958
        reallocAlign((void **)&c->tempDst, 8, stride*24);
 
959
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
 
960
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
 
961
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
 
962
        for(i=0; i<256; i++)
 
963
                c->yHistogram[i]= width*height/64*15/256;
 
964
 
 
965
        for(i=0; i<3; i++)
 
966
        {
 
967
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
 
968
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
 
969
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 
970
        }
 
971
 
 
972
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
 
973
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 
974
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 
975
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
 
976
}
 
977
 
 
978
static void global_init(void){
 
979
        int i;
 
980
        memset(clip_table, 0, 256);
 
981
        for(i=256; i<512; i++)
 
982
                clip_table[i]= i;
 
983
        memset(clip_table+512, 0, 256);
 
984
}
 
985
 
 
986
static const char * context_to_name(void * ptr) {
 
987
    return "postproc";
 
988
}
 
989
 
 
990
static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 
991
 
 
992
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
 
993
        PPContext *c= av_malloc(sizeof(PPContext));
 
994
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
 
995
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 
996
 
 
997
        global_init();
 
998
 
 
999
        memset(c, 0, sizeof(PPContext));
 
1000
        c->av_class = &av_codec_context_class;
 
1001
        c->cpuCaps= cpuCaps;
 
1002
        if(cpuCaps&PP_FORMAT){
 
1003
                c->hChromaSubSample= cpuCaps&0x3;
 
1004
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
 
1005
        }else{
 
1006
                c->hChromaSubSample= 1;
 
1007
                c->vChromaSubSample= 1;
 
1008
        }
 
1009
 
 
1010
        reallocBuffers(c, width, height, stride, qpStride);
 
1011
 
 
1012
        c->frameNum=-1;
 
1013
 
 
1014
        return c;
 
1015
}
 
1016
 
 
1017
void pp_free_context(void *vc){
 
1018
        PPContext *c = (PPContext*)vc;
 
1019
        int i;
 
1020
 
 
1021
        for(i=0; i<3; i++) av_free(c->tempBlured[i]);
 
1022
        for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
 
1023
 
 
1024
        av_free(c->tempBlocks);
 
1025
        av_free(c->yHistogram);
 
1026
        av_free(c->tempDst);
 
1027
        av_free(c->tempSrc);
 
1028
        av_free(c->deintTemp);
 
1029
        av_free(c->stdQPTable);
 
1030
        av_free(c->nonBQPTable);
 
1031
        av_free(c->forcedQPTable);
 
1032
 
 
1033
        memset(c, 0, sizeof(PPContext));
 
1034
 
 
1035
        av_free(c);
 
1036
}
 
1037
 
 
1038
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
 
1039
                 uint8_t * dst[3], int dstStride[3],
 
1040
                 int width, int height,
 
1041
                 QP_STORE_T *QP_store,  int QPStride,
 
1042
                 pp_mode_t *vm,  void *vc, int pict_type)
 
1043
{
 
1044
        int mbWidth = (width+15)>>4;
 
1045
        int mbHeight= (height+15)>>4;
 
1046
        PPMode *mode = (PPMode*)vm;
 
1047
        PPContext *c = (PPContext*)vc;
 
1048
        int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
 
1049
        int absQPStride = FFABS(QPStride);
 
1050
 
 
1051
        // c->stride and c->QPStride are always positive
 
1052
        if(c->stride < minStride || c->qpStride < absQPStride)
 
1053
                reallocBuffers(c, width, height,
 
1054
                                FFMAX(minStride, c->stride),
 
1055
                                FFMAX(c->qpStride, absQPStride));
 
1056
 
 
1057
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
 
1058
        {
 
1059
                int i;
 
1060
                QP_store= c->forcedQPTable;
 
1061
                absQPStride = QPStride = 0;
 
1062
                if(mode->lumMode & FORCE_QUANT)
 
1063
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
 
1064
                else
 
1065
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
 
1066
        }
 
1067
 
 
1068
        if(pict_type & PP_PICT_TYPE_QP2){
 
1069
                int i;
 
1070
                const int count= mbHeight * absQPStride;
 
1071
                for(i=0; i<(count>>2); i++){
 
1072
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
 
1073
                }
 
1074
                for(i<<=2; i<count; i++){
 
1075
                        c->stdQPTable[i] = QP_store[i]>>1;
 
1076
                }
 
1077
                QP_store= c->stdQPTable;
 
1078
                QPStride= absQPStride;
 
1079
        }
 
1080
 
 
1081
if(0){
 
1082
int x,y;
 
1083
for(y=0; y<mbHeight; y++){
 
1084
        for(x=0; x<mbWidth; x++){
 
1085
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
 
1086
        }
 
1087
        av_log(c, AV_LOG_INFO, "\n");
 
1088
}
 
1089
        av_log(c, AV_LOG_INFO, "\n");
 
1090
}
 
1091
 
 
1092
        if((pict_type&7)!=3)
 
1093
        {
 
1094
                if (QPStride >= 0) {
 
1095
                        int i;
 
1096
                        const int count= mbHeight * QPStride;
 
1097
                        for(i=0; i<(count>>2); i++){
 
1098
                                ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
 
1099
                        }
 
1100
                        for(i<<=2; i<count; i++){
 
1101
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
 
1102
                        }
 
1103
                } else {
 
1104
                        int i,j;
 
1105
                        for(i=0; i<mbHeight; i++) {
 
1106
                                    for(j=0; j<absQPStride; j++) {
 
1107
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
 
1108
                                }
 
1109
                        }
 
1110
                }
 
1111
        }
 
1112
 
 
1113
        av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
 
1114
               mode->lumMode, mode->chromMode);
 
1115
 
 
1116
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
 
1117
                width, height, QP_store, QPStride, 0, mode, c);
 
1118
 
 
1119
        width  = (width )>>c->hChromaSubSample;
 
1120
        height = (height)>>c->vChromaSubSample;
 
1121
 
 
1122
        if(mode->chromMode)
 
1123
        {
 
1124
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
 
1125
                        width, height, QP_store, QPStride, 1, mode, c);
 
1126
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
 
1127
                        width, height, QP_store, QPStride, 2, mode, c);
 
1128
        }
 
1129
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
 
1130
        {
 
1131
                linecpy(dst[1], src[1], height, srcStride[1]);
 
1132
                linecpy(dst[2], src[2], height, srcStride[2]);
 
1133
        }
 
1134
        else
 
1135
        {
 
1136
                int y;
 
1137
                for(y=0; y<height; y++)
 
1138
                {
 
1139
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
 
1140
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
 
1141
                }
 
1142
        }
 
1143
}
 
1144