~ubuntu-branches/ubuntu/hardy/avidemux/hardy

« back to all changes in this revision

Viewing changes to adm_lavcodec/libpostproc/postprocess.c

  • Committer: Bazaar Package Importer
  • Author(s): Daniel T Chen
  • Date: 2006-12-15 17:13:20 UTC
  • mfrom: (1.1.6 upstream)
  • Revision ID: james.westby@ubuntu.com-20061215171320-w79pvpehxx2fr217
Tags: 1:2.3.0-0.0ubuntu1
* Merge from debian-multimedia.org, remaining Ubuntu change:
  - desktop file,
  - no support for ccache and make -j.
* Closes Ubuntu: #69614.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
    Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 
 
4
 
    AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 
 
6
 
    This program is free software; you can redistribute it and/or modify
7
 
    it under the terms of the GNU General Public License as published by
8
 
    the Free Software Foundation; either version 2 of the License, or
9
 
    (at your option) any later version.
10
 
 
11
 
    This program is distributed in the hope that it will be useful,
12
 
    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 
    GNU General Public License for more details.
15
 
 
16
 
    You should have received a copy of the GNU General Public License
17
 
    along with this program; if not, write to the Free Software
18
 
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
 
*/
20
 
 
21
 
/**
22
 
 * @file postprocess.c
23
 
 * postprocessing.
24
 
 */
25
 
 
26
 
/*
27
 
                        C       MMX     MMX2    3DNow   AltiVec
28
 
isVertDC                Ec      Ec                      Ec
29
 
isVertMinMaxOk          Ec      Ec                      Ec
30
 
doVertLowPass           E               e       e       Ec
31
 
doVertDefFilter         Ec      Ec      e       e       Ec
32
 
isHorizDC               Ec      Ec                      Ec
33
 
isHorizMinMaxOk         a       E                       Ec
34
 
doHorizLowPass          E               e       e       Ec
35
 
doHorizDefFilter        Ec      Ec      e       e       Ec
36
 
do_a_deblock            Ec      E       Ec      E
37
 
deRing                  E               e       e*      Ecp
38
 
Vertical RKAlgo1        E               a       a
39
 
Horizontal RKAlgo1                      a       a
40
 
Vertical X1#            a               E       E
41
 
Horizontal X1#          a               E       E
42
 
LinIpolDeinterlace      e               E       E*
43
 
CubicIpolDeinterlace    a               e       e*
44
 
LinBlendDeinterlace     e               E       E*
45
 
MedianDeinterlace#      E       Ec      Ec
46
 
TempDeNoiser#           E               e       e       Ec
47
 
 
48
 
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49
 
# more or less selfinvented filters so the exactness isnt too meaningfull
50
 
E = Exact implementation
51
 
e = allmost exact implementation (slightly different rounding,...)
52
 
a = alternative / approximate impl
53
 
c = checked against the other implementations (-vo md5)
54
 
p = partially optimized, still some work to do
55
 
*/
56
 
 
57
 
/*
58
 
TODO:
59
 
reduce the time wasted on the mem transfer
60
 
unroll stuff if instructions depend too much on the prior one
61
 
move YScale thing to the end instead of fixing QP
62
 
write a faster and higher quality deblocking filter :)
63
 
make the mainloop more flexible (variable number of blocks at once
64
 
        (the if/else stuff per block is slowing things down)
65
 
compare the quality & speed of all filters
66
 
split this huge file
67
 
optimize c versions
68
 
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
69
 
...
70
 
*/
71
 
 
72
 
//Changelog: use the CVS log
73
 
 
74
 
#include "config.h"
75
 
#include <inttypes.h>
76
 
#include <stdio.h>
77
 
#include <stdlib.h>
78
 
#include <string.h>
79
 
#ifdef HAVE_MALLOC_H
80
 
#include <malloc.h>
81
 
#endif
82
 
//#undef HAVE_MMX2
83
 
//#define HAVE_3DNOW
84
 
//#undef HAVE_MMX
85
 
//#undef ARCH_X86
86
 
//#define DEBUG_BRIGHTNESS
87
 
#ifdef USE_FASTMEMCPY
88
 
#include "fastmemcpy.h"
89
 
#endif
90
 
#include "postprocess.h"
91
 
#include "postprocess_internal.h"
92
 
 
93
 
#include "mangle.h" //FIXME should be supressed
94
 
 
95
 
#ifdef HAVE_ALTIVEC_H
96
 
#include <altivec.h>
97
 
#endif
98
 
 
99
 
#ifndef HAVE_MEMALIGN
100
 
#define memalign(a,b) malloc(b)
101
 
#endif
102
 
 
103
 
#define MIN(a,b) ((a) > (b) ? (b) : (a))
104
 
#define MAX(a,b) ((a) < (b) ? (b) : (a))
105
 
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
106
 
#define SIGN(a) ((a) > 0 ? 1 : -1)
107
 
 
108
 
#define GET_MODE_BUFFER_SIZE 500
109
 
#define OPTIONS_ARRAY_SIZE 10
110
 
#define BLOCK_SIZE 8
111
 
#define TEMP_STRIDE 8
112
 
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
113
 
 
114
 
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
115
 
#    define attribute_used __attribute__((used))
116
 
#    define always_inline __attribute__((always_inline)) inline
117
 
#else
118
 
#    define attribute_used
119
 
#    define always_inline inline
120
 
#endif
121
 
 
122
 
#if defined(ARCH_X86) || defined(ARCH_X86_64)
123
 
static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
124
 
static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
125
 
static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
126
 
static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
127
 
static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
128
 
static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
129
 
static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
130
 
static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
131
 
#endif
132
 
 
133
 
static uint8_t clip_table[3*256];
134
 
static uint8_t * const clip_tab= clip_table + 256;
135
 
 
136
 
static const int verbose= 0;
137
 
 
138
 
static const int attribute_used deringThreshold= 20;
139
 
 
140
 
 
141
 
static struct PPFilter filters[]=
142
 
{
143
 
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
144
 
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
145
 
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
146
 
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
147
 
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
148
 
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
149
 
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
150
 
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
151
 
        {"dr", "dering",                1, 5, 6, DERING},
152
 
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
153
 
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
154
 
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
155
 
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
156
 
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
157
 
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
158
 
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
159
 
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
160
 
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
161
 
        {NULL, NULL,0,0,0,0} //End Marker
162
 
};
163
 
 
164
 
static char *replaceTable[]=
165
 
{
166
 
        "default",      "hdeblock:a,vdeblock:a,dering:a",
167
 
        "de",           "hdeblock:a,vdeblock:a,dering:a",
168
 
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
169
 
        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
170
 
        "ac",           "ha:a:128:7,va:a,dering:a",
171
 
        NULL //End Marker
172
 
};
173
 
 
174
 
 
175
 
#if defined(ARCH_X86) || defined(ARCH_X86_64)
176
 
static inline void prefetchnta(void *p)
177
 
{
178
 
        asm volatile(   "prefetchnta (%0)\n\t"
179
 
                : : "r" (p)
180
 
        );
181
 
}
182
 
 
183
 
static inline void prefetcht0(void *p)
184
 
{
185
 
        asm volatile(   "prefetcht0 (%0)\n\t"
186
 
                : : "r" (p)
187
 
        );
188
 
}
189
 
 
190
 
static inline void prefetcht1(void *p)
191
 
{
192
 
        asm volatile(   "prefetcht1 (%0)\n\t"
193
 
                : : "r" (p)
194
 
        );
195
 
}
196
 
 
197
 
static inline void prefetcht2(void *p)
198
 
{
199
 
        asm volatile(   "prefetcht2 (%0)\n\t"
200
 
                : : "r" (p)
201
 
        );
202
 
}
203
 
#endif
204
 
 
205
 
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
206
 
 
207
 
/**
208
 
 * Check if the given 8x8 Block is mostly "flat"
209
 
 */
210
 
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
211
 
{
212
 
        int numEq= 0;
213
 
        int y;
214
 
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
215
 
        const int dcThreshold= dcOffset*2 + 1;
216
 
 
217
 
        for(y=0; y<BLOCK_SIZE; y++)
218
 
        {
219
 
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
220
 
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
221
 
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
222
 
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
223
 
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
224
 
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
225
 
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
226
 
                src+= stride;
227
 
        }
228
 
        return numEq > c->ppMode.flatnessThreshold;
229
 
}
230
 
 
231
 
/**
232
 
 * Check if the middle 8x8 Block in the given 8x16 block is flat
233
 
 */
234
 
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
235
 
        int numEq= 0;
236
 
        int y;
237
 
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
238
 
        const int dcThreshold= dcOffset*2 + 1;
239
 
 
240
 
        src+= stride*4; // src points to begin of the 8x8 Block
241
 
        for(y=0; y<BLOCK_SIZE-1; y++)
242
 
        {
243
 
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
244
 
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
245
 
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
246
 
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
247
 
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
248
 
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
249
 
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
250
 
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
251
 
                src+= stride;
252
 
        }
253
 
        return numEq > c->ppMode.flatnessThreshold;
254
 
}
255
 
 
256
 
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
257
 
{
258
 
        int i;
259
 
#if 1
260
 
        for(i=0; i<2; i++){
261
 
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
262
 
                src += stride;
263
 
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
264
 
                src += stride;
265
 
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
266
 
                src += stride;
267
 
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
268
 
                src += stride;
269
 
        }
270
 
#else
271
 
        for(i=0; i<8; i++){
272
 
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
273
 
                src += stride;
274
 
        }
275
 
#endif
276
 
        return 1;
277
 
}
278
 
 
279
 
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
280
 
{
281
 
#if 1
282
 
#if 1
283
 
        int x;
284
 
        src+= stride*4;
285
 
        for(x=0; x<BLOCK_SIZE; x+=4)
286
 
        {
287
 
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
288
 
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
289
 
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
290
 
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
291
 
        }
292
 
#else
293
 
        int x;
294
 
        src+= stride*3;
295
 
        for(x=0; x<BLOCK_SIZE; x++)
296
 
        {
297
 
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
298
 
        }
299
 
#endif
300
 
        return 1;
301
 
#else
302
 
        int x;
303
 
        src+= stride*4;
304
 
        for(x=0; x<BLOCK_SIZE; x++)
305
 
        {
306
 
                int min=255;
307
 
                int max=0;
308
 
                int y;
309
 
                for(y=0; y<8; y++){
310
 
                        int v= src[x + y*stride];
311
 
                        if(v>max) max=v;
312
 
                        if(v<min) min=v;
313
 
                }
314
 
                if(max-min > 2*QP) return 0;
315
 
        }
316
 
        return 1;
317
 
#endif
318
 
}
319
 
 
320
 
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
321
 
        if( isHorizDC_C(src, stride, c) ){
322
 
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
323
 
                        return 1;
324
 
                else
325
 
                        return 0;
326
 
        }else{
327
 
                return 2;
328
 
        }
329
 
}
330
 
 
331
 
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
332
 
        if( isVertDC_C(src, stride, c) ){
333
 
                if( isVertMinMaxOk_C(src, stride, c->QP) )
334
 
                        return 1;
335
 
                else
336
 
                        return 0;
337
 
        }else{
338
 
                return 2;
339
 
        }
340
 
}
341
 
 
342
 
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
343
 
{
344
 
        int y;
345
 
        for(y=0; y<BLOCK_SIZE; y++)
346
 
        {
347
 
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
348
 
 
349
 
                if(ABS(middleEnergy) < 8*c->QP)
350
 
                {
351
 
                        const int q=(dst[3] - dst[4])/2;
352
 
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
353
 
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
354
 
 
355
 
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
356
 
                        d= MAX(d, 0);
357
 
 
358
 
                        d= (5*d + 32) >> 6;
359
 
                        d*= SIGN(-middleEnergy);
360
 
 
361
 
                        if(q>0)
362
 
                        {
363
 
                                d= d<0 ? 0 : d;
364
 
                                d= d>q ? q : d;
365
 
                        }
366
 
                        else
367
 
                        {
368
 
                                d= d>0 ? 0 : d;
369
 
                                d= d<q ? q : d;
370
 
                        }
371
 
 
372
 
                        dst[3]-= d;
373
 
                        dst[4]+= d;
374
 
                }
375
 
                dst+= stride;
376
 
        }
377
 
}
378
 
 
379
 
/**
380
 
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
381
 
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
382
 
 */
383
 
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
384
 
{
385
 
        int y;
386
 
        for(y=0; y<BLOCK_SIZE; y++)
387
 
        {
388
 
                const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
389
 
                const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
390
 
 
391
 
                int sums[10];
392
 
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
393
 
                sums[1] = sums[0] - first  + dst[3];
394
 
                sums[2] = sums[1] - first  + dst[4];
395
 
                sums[3] = sums[2] - first  + dst[5];
396
 
                sums[4] = sums[3] - first  + dst[6];
397
 
                sums[5] = sums[4] - dst[0] + dst[7];
398
 
                sums[6] = sums[5] - dst[1] + last;
399
 
                sums[7] = sums[6] - dst[2] + last;
400
 
                sums[8] = sums[7] - dst[3] + last;
401
 
                sums[9] = sums[8] - dst[4] + last;
402
 
 
403
 
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
404
 
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
405
 
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
406
 
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
407
 
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
408
 
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
409
 
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
410
 
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
411
 
 
412
 
                dst+= stride;
413
 
        }
414
 
}
415
 
 
416
 
/**
417
 
 * Experimental Filter 1 (Horizontal)
418
 
 * will not damage linear gradients
419
 
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
420
 
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
421
 
 * MMX2 version does correct clipping C version doesnt
422
 
 * not identical with the vertical one
423
 
 */
424
 
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
425
 
{
426
 
        int y;
427
 
        static uint64_t *lut= NULL;
428
 
        if(lut==NULL)
429
 
        {
430
 
                int i;
431
 
                lut= (uint64_t*)memalign(8, 256*8);
432
 
                for(i=0; i<256; i++)
433
 
                {
434
 
                        int v= i < 128 ? 2*i : 2*(i-256);
435
 
/*
436
 
//Simulate 112242211 9-Tap filter
437
 
                        uint64_t a= (v/16) & 0xFF;
438
 
                        uint64_t b= (v/8) & 0xFF;
439
 
                        uint64_t c= (v/4) & 0xFF;
440
 
                        uint64_t d= (3*v/8) & 0xFF;
441
 
*/
442
 
//Simulate piecewise linear interpolation
443
 
                        uint64_t a= (v/16) & 0xFF;
444
 
                        uint64_t b= (v*3/16) & 0xFF;
445
 
                        uint64_t c= (v*5/16) & 0xFF;
446
 
                        uint64_t d= (7*v/16) & 0xFF;
447
 
                        uint64_t A= (0x100 - a)&0xFF;
448
 
                        uint64_t B= (0x100 - b)&0xFF;
449
 
                        uint64_t C= (0x100 - c)&0xFF;
450
 
                        uint64_t D= (0x100 - c)&0xFF;
451
 
 
452
 
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
453
 
                                (D<<24) | (C<<16) | (B<<8) | (A);
454
 
                        //lut[i] = (v<<32) | (v<<24);
455
 
                }
456
 
        }
457
 
 
458
 
        for(y=0; y<BLOCK_SIZE; y++)
459
 
        {
460
 
                int a= src[1] - src[2];
461
 
                int b= src[3] - src[4];
462
 
                int c= src[5] - src[6];
463
 
 
464
 
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
465
 
 
466
 
                if(d < QP)
467
 
                {
468
 
                        int v = d * SIGN(-b);
469
 
 
470
 
                        src[1] +=v/8;
471
 
                        src[2] +=v/4;
472
 
                        src[3] +=3*v/8;
473
 
                        src[4] -=3*v/8;
474
 
                        src[5] -=v/4;
475
 
                        src[6] -=v/8;
476
 
 
477
 
                }
478
 
                src+=stride;
479
 
        }
480
 
}
481
 
 
482
 
/**
483
 
 * accurate deblock filter
484
 
 */
485
 
static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
486
 
        int y;
487
 
        const int QP= c->QP;
488
 
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
489
 
        const int dcThreshold= dcOffset*2 + 1;
490
 
//START_TIMER
491
 
        src+= step*4; // src points to begin of the 8x8 Block
492
 
        for(y=0; y<8; y++){
493
 
                int numEq= 0;
494
 
 
495
 
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
496
 
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
497
 
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
498
 
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
499
 
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
500
 
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
501
 
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
502
 
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
503
 
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
504
 
                if(numEq > c->ppMode.flatnessThreshold){
505
 
                        int min, max, x;
506
 
 
507
 
                        if(src[0] > src[step]){
508
 
                            max= src[0];
509
 
                            min= src[step];
510
 
                        }else{
511
 
                            max= src[step];
512
 
                            min= src[0];
513
 
                        }
514
 
                        for(x=2; x<8; x+=2){
515
 
                                if(src[x*step] > src[(x+1)*step]){
516
 
                                        if(src[x    *step] > max) max= src[ x   *step];
517
 
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
518
 
                                }else{
519
 
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
520
 
                                        if(src[ x   *step] < min) min= src[ x   *step];
521
 
                                }
522
 
                        }
523
 
                        if(max-min < 2*QP){
524
 
                                const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
525
 
                                const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
526
 
 
527
 
                                int sums[10];
528
 
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
529
 
                                sums[1] = sums[0] - first       + src[3*step];
530
 
                                sums[2] = sums[1] - first       + src[4*step];
531
 
                                sums[3] = sums[2] - first       + src[5*step];
532
 
                                sums[4] = sums[3] - first       + src[6*step];
533
 
                                sums[5] = sums[4] - src[0*step] + src[7*step];
534
 
                                sums[6] = sums[5] - src[1*step] + last;
535
 
                                sums[7] = sums[6] - src[2*step] + last;
536
 
                                sums[8] = sums[7] - src[3*step] + last;
537
 
                                sums[9] = sums[8] - src[4*step] + last;
538
 
 
539
 
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
540
 
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
541
 
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
542
 
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
543
 
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
544
 
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
545
 
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
546
 
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
547
 
                        }
548
 
                }else{
549
 
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
550
 
 
551
 
                        if(ABS(middleEnergy) < 8*QP)
552
 
                        {
553
 
                                const int q=(src[3*step] - src[4*step])/2;
554
 
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
555
 
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
556
 
 
557
 
                                int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
558
 
                                d= MAX(d, 0);
559
 
 
560
 
                                d= (5*d + 32) >> 6;
561
 
                                d*= SIGN(-middleEnergy);
562
 
 
563
 
                                if(q>0)
564
 
                                {
565
 
                                        d= d<0 ? 0 : d;
566
 
                                        d= d>q ? q : d;
567
 
                                }
568
 
                                else
569
 
                                {
570
 
                                        d= d>0 ? 0 : d;
571
 
                                        d= d<q ? q : d;
572
 
                                }
573
 
 
574
 
                                src[3*step]-= d;
575
 
                                src[4*step]+= d;
576
 
                        }
577
 
                }
578
 
 
579
 
                src += stride;
580
 
        }
581
 
/*if(step==16){
582
 
    STOP_TIMER("step16")
583
 
}else{
584
 
    STOP_TIMER("stepX")
585
 
}*/
586
 
}
587
 
 
588
 
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
589
 
//Plain C versions
590
 
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
591
 
#define COMPILE_C
592
 
#endif
593
 
 
594
 
#ifdef ARCH_POWERPC
595
 
#ifdef HAVE_ALTIVEC
596
 
#define COMPILE_ALTIVEC
597
 
#endif //HAVE_ALTIVEC
598
 
#endif //ARCH_POWERPC
599
 
 
600
 
#if defined(ARCH_X86) || defined(ARCH_X86_64)
601
 
 
602
 
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
603
 
#define COMPILE_MMX
604
 
#endif
605
 
 
606
 
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
607
 
#define COMPILE_MMX2
608
 
#endif
609
 
 
610
 
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
611
 
#define COMPILE_3DNOW
612
 
#endif
613
 
#endif //ARCH_X86
614
 
 
615
 
#undef HAVE_MMX
616
 
#undef HAVE_MMX2
617
 
#undef HAVE_3DNOW
618
 
#undef HAVE_ALTIVEC
619
 
 
620
 
#ifdef COMPILE_C
621
 
#undef HAVE_MMX
622
 
#undef HAVE_MMX2
623
 
#undef HAVE_3DNOW
624
 
#define RENAME(a) a ## _C
625
 
#include "postprocess_template.c"
626
 
#endif
627
 
 
628
 
#ifdef ARCH_POWERPC
629
 
#ifdef COMPILE_ALTIVEC
630
 
#undef RENAME
631
 
#define HAVE_ALTIVEC
632
 
#define RENAME(a) a ## _altivec
633
 
#include "postprocess_altivec_template.c"
634
 
#include "postprocess_template.c"
635
 
#endif
636
 
#endif //ARCH_POWERPC
637
 
 
638
 
//MMX versions
639
 
#ifdef COMPILE_MMX
640
 
#undef RENAME
641
 
#define HAVE_MMX
642
 
#undef HAVE_MMX2
643
 
#undef HAVE_3DNOW
644
 
#define RENAME(a) a ## _MMX
645
 
#include "postprocess_template.c"
646
 
#endif
647
 
 
648
 
//MMX2 versions
649
 
#ifdef COMPILE_MMX2
650
 
#undef RENAME
651
 
#define HAVE_MMX
652
 
#define HAVE_MMX2
653
 
#undef HAVE_3DNOW
654
 
#define RENAME(a) a ## _MMX2
655
 
#include "postprocess_template.c"
656
 
#endif
657
 
 
658
 
//3DNOW versions
659
 
#ifdef COMPILE_3DNOW
660
 
#undef RENAME
661
 
#define HAVE_MMX
662
 
#undef HAVE_MMX2
663
 
#define HAVE_3DNOW
664
 
#define RENAME(a) a ## _3DNow
665
 
#include "postprocess_template.c"
666
 
#endif
667
 
 
668
 
// minor note: the HAVE_xyz is messed up after that line so dont use it
669
 
 
670
 
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
671
 
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
672
 
{
673
 
        PPContext *c= (PPContext *)vc;
674
 
        PPMode *ppMode= (PPMode *)vm;
675
 
        c->ppMode= *ppMode; //FIXME
676
 
 
677
 
        // useing ifs here as they are faster than function pointers allthough the
678
 
        // difference wouldnt be messureable here but its much better because
679
 
        // someone might exchange the cpu whithout restarting mplayer ;)
680
 
#ifdef RUNTIME_CPUDETECT
681
 
#if defined(ARCH_X86) || defined(ARCH_X86_64)
682
 
        // ordered per speed fasterst first
683
 
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
684
 
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685
 
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
686
 
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687
 
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
688
 
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689
 
        else
690
 
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691
 
#else
692
 
#ifdef ARCH_POWERPC
693
 
#ifdef HAVE_ALTIVEC
694
 
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
695
 
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
696
 
        else
697
 
#endif
698
 
#endif
699
 
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
700
 
#endif
701
 
#else //RUNTIME_CPUDETECT
702
 
#ifdef HAVE_MMX2
703
 
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
704
 
#elif defined (HAVE_3DNOW)
705
 
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
706
 
#elif defined (HAVE_MMX)
707
 
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
708
 
#elif defined (HAVE_ALTIVEC)
709
 
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
710
 
#else
711
 
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
712
 
#endif
713
 
#endif //!RUNTIME_CPUDETECT
714
 
}
715
 
 
716
 
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
717
 
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
718
 
 
719
 
/* -pp Command line Help
720
 
*/
721
 
char *pp_help=
722
 
"Available postprocessing filters:\n"
723
 
"Filters                        Options\n"
724
 
"short  long name       short   long option     Description\n"
725
 
"*      *               a       autoq           CPU power dependent enabler\n"
726
 
"                       c       chrom           chrominance filtering enabled\n"
727
 
"                       y       nochrom         chrominance filtering disabled\n"
728
 
"                       n       noluma          luma filtering disabled\n"
729
 
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
730
 
"       1. difference factor: default=32, higher -> more deblocking\n"
731
 
"       2. flatness threshold: default=39, lower -> more deblocking\n"
732
 
"                       the h & v deblocking filters share these\n"
733
 
"                       so you can't set different thresholds for h / v\n"
734
 
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
735
 
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
736
 
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
737
 
"h1     x1hdeblock                              experimental h deblock filter 1\n"
738
 
"v1     x1vdeblock                              experimental v deblock filter 1\n"
739
 
"dr     dering                                  deringing filter\n"
740
 
"al     autolevels                              automatic brightness / contrast\n"
741
 
"                       f        fullyrange     stretch luminance to (0..255)\n"
742
 
"lb     linblenddeint                           linear blend deinterlacer\n"
743
 
"li     linipoldeint                            linear interpolating deinterlace\n"
744
 
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
745
 
"md     mediandeint                             median deinterlacer\n"
746
 
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
747
 
"l5     lowpass5                                FIR lowpass deinterlacer\n"
748
 
"de     default                                 hb:a,vb:a,dr:a\n"
749
 
"fa     fast                                    h1:a,v1:a,dr:a\n"
750
 
"ac                                             ha:a:128:7,va:a,dr:a\n"
751
 
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
752
 
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
753
 
"fq     forceQuant      <quantizer>             force quantizer\n"
754
 
"Usage:\n"
755
 
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
756
 
"long form example:\n"
757
 
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
758
 
"short form example:\n"
759
 
"vb:a/hb:a/lb                                   de,-vb\n"
760
 
"more examples:\n"
761
 
"tn:64:128:256\n"
762
 
;
763
 
 
764
 
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
765
 
{
766
 
        char temp[GET_MODE_BUFFER_SIZE];
767
 
        char *p= temp;
768
 
        char *filterDelimiters= ",/";
769
 
        char *optionDelimiters= ":";
770
 
        struct PPMode *ppMode;
771
 
        char *filterToken;
772
 
 
773
 
        ppMode= memalign(8, sizeof(PPMode));
774
 
 
775
 
        ppMode->lumMode= 0;
776
 
        ppMode->chromMode= 0;
777
 
        ppMode->maxTmpNoise[0]= 700;
778
 
        ppMode->maxTmpNoise[1]= 1500;
779
 
        ppMode->maxTmpNoise[2]= 3000;
780
 
        ppMode->maxAllowedY= 234;
781
 
        ppMode->minAllowedY= 16;
782
 
        ppMode->baseDcDiff= 256/8;
783
 
        ppMode->flatnessThreshold= 56-16-1;
784
 
        ppMode->maxClippedThreshold= 0.01;
785
 
        ppMode->error=0;
786
 
 
787
 
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
788
 
 
789
 
        if(verbose>1) printf("pp: %s\n", name);
790
 
 
791
 
        for(;;){
792
 
                char *filterName;
793
 
                int q= 1000000; //PP_QUALITY_MAX;
794
 
                int chrom=-1;
795
 
                int luma=-1;
796
 
                char *option;
797
 
                char *options[OPTIONS_ARRAY_SIZE];
798
 
                int i;
799
 
                int filterNameOk=0;
800
 
                int numOfUnknownOptions=0;
801
 
                int enable=1; //does the user want us to enabled or disabled the filter
802
 
 
803
 
                filterToken= strtok(p, filterDelimiters);
804
 
                if(filterToken == NULL) break;
805
 
                p+= strlen(filterToken) + 1; // p points to next filterToken
806
 
                filterName= strtok(filterToken, optionDelimiters);
807
 
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
808
 
 
809
 
                if(*filterName == '-')
810
 
                {
811
 
                        enable=0;
812
 
                        filterName++;
813
 
                }
814
 
 
815
 
                for(;;){ //for all options
816
 
                        option= strtok(NULL, optionDelimiters);
817
 
                        if(option == NULL) break;
818
 
 
819
 
                        if(verbose>1) printf("pp: option: %s\n", option);
820
 
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
821
 
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
822
 
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
823
 
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
824
 
                        else
825
 
                        {
826
 
                                options[numOfUnknownOptions] = option;
827
 
                                numOfUnknownOptions++;
828
 
                        }
829
 
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
830
 
                }
831
 
                options[numOfUnknownOptions] = NULL;
832
 
 
833
 
                /* replace stuff from the replace Table */
834
 
                for(i=0; replaceTable[2*i]!=NULL; i++)
835
 
                {
836
 
                        if(!strcmp(replaceTable[2*i], filterName))
837
 
                        {
838
 
                                int newlen= strlen(replaceTable[2*i + 1]);
839
 
                                int plen;
840
 
                                int spaceLeft;
841
 
 
842
 
                                if(p==NULL) p= temp, *p=0;      //last filter
843
 
                                else p--, *p=',';               //not last filter
844
 
 
845
 
                                plen= strlen(p);
846
 
                                spaceLeft= p - temp + plen;
847
 
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
848
 
                                {
849
 
                                        ppMode->error++;
850
 
                                        break;
851
 
                                }
852
 
                                memmove(p + newlen, p, plen+1);
853
 
                                memcpy(p, replaceTable[2*i + 1], newlen);
854
 
                                filterNameOk=1;
855
 
                        }
856
 
                }
857
 
 
858
 
                for(i=0; filters[i].shortName!=NULL; i++)
859
 
                {
860
 
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
861
 
                        if(   !strcmp(filters[i].longName, filterName)
862
 
                           || !strcmp(filters[i].shortName, filterName))
863
 
                        {
864
 
                                ppMode->lumMode &= ~filters[i].mask;
865
 
                                ppMode->chromMode &= ~filters[i].mask;
866
 
 
867
 
                                filterNameOk=1;
868
 
                                if(!enable) break; // user wants to disable it
869
 
 
870
 
                                if(q >= filters[i].minLumQuality && luma)
871
 
                                        ppMode->lumMode|= filters[i].mask;
872
 
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
873
 
                                        if(q >= filters[i].minChromQuality)
874
 
                                                ppMode->chromMode|= filters[i].mask;
875
 
 
876
 
                                if(filters[i].mask == LEVEL_FIX)
877
 
                                {
878
 
                                        int o;
879
 
                                        ppMode->minAllowedY= 16;
880
 
                                        ppMode->maxAllowedY= 234;
881
 
                                        for(o=0; options[o]!=NULL; o++)
882
 
                                        {
883
 
                                                if(  !strcmp(options[o],"fullyrange")
884
 
                                                   ||!strcmp(options[o],"f"))
885
 
                                                {
886
 
                                                        ppMode->minAllowedY= 0;
887
 
                                                        ppMode->maxAllowedY= 255;
888
 
                                                        numOfUnknownOptions--;
889
 
                                                }
890
 
                                        }
891
 
                                }
892
 
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
893
 
                                {
894
 
                                        int o;
895
 
                                        int numOfNoises=0;
896
 
 
897
 
                                        for(o=0; options[o]!=NULL; o++)
898
 
                                        {
899
 
                                                char *tail;
900
 
                                                ppMode->maxTmpNoise[numOfNoises]=
901
 
                                                        strtol(options[o], &tail, 0);
902
 
                                                if(tail!=options[o])
903
 
                                                {
904
 
                                                        numOfNoises++;
905
 
                                                        numOfUnknownOptions--;
906
 
                                                        if(numOfNoises >= 3) break;
907
 
                                                }
908
 
                                        }
909
 
                                }
910
 
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
911
 
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
912
 
                                {
913
 
                                        int o;
914
 
 
915
 
                                        for(o=0; options[o]!=NULL && o<2; o++)
916
 
                                        {
917
 
                                                char *tail;
918
 
                                                int val= strtol(options[o], &tail, 0);
919
 
                                                if(tail==options[o]) break;
920
 
 
921
 
                                                numOfUnknownOptions--;
922
 
                                                if(o==0) ppMode->baseDcDiff= val;
923
 
                                                else ppMode->flatnessThreshold= val;
924
 
                                        }
925
 
                                }
926
 
                                else if(filters[i].mask == FORCE_QUANT)
927
 
                                {
928
 
                                        int o;
929
 
                                        ppMode->forcedQuant= 15;
930
 
 
931
 
                                        for(o=0; options[o]!=NULL && o<1; o++)
932
 
                                        {
933
 
                                                char *tail;
934
 
                                                int val= strtol(options[o], &tail, 0);
935
 
                                                if(tail==options[o]) break;
936
 
 
937
 
                                                numOfUnknownOptions--;
938
 
                                                ppMode->forcedQuant= val;
939
 
                                        }
940
 
                                }
941
 
                        }
942
 
                }
943
 
                if(!filterNameOk) ppMode->error++;
944
 
                ppMode->error += numOfUnknownOptions;
945
 
        }
946
 
 
947
 
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
948
 
        if(ppMode->error)
949
 
        {
950
 
                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
951
 
                free(ppMode);
952
 
                return NULL;
953
 
        }
954
 
        return ppMode;
955
 
}
956
 
 
957
 
void pp_free_mode(pp_mode_t *mode){
958
 
    if(mode) free(mode);
959
 
}
960
 
 
961
 
static void reallocAlign(void **p, int alignment, int size){
962
 
        if(*p) free(*p);
963
 
        *p= memalign(alignment, size);
964
 
        memset(*p, 0, size);
965
 
}
966
 
 
967
 
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
968
 
        int mbWidth = (width+15)>>4;
969
 
        int mbHeight= (height+15)>>4;
970
 
        int i;
971
 
 
972
 
        c->stride= stride;
973
 
        c->qpStride= qpStride;
974
 
 
975
 
        reallocAlign((void **)&c->tempDst, 8, stride*24);
976
 
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
977
 
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
978
 
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
979
 
        for(i=0; i<256; i++)
980
 
                c->yHistogram[i]= width*height/64*15/256;
981
 
 
982
 
        for(i=0; i<3; i++)
983
 
        {
984
 
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
985
 
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
986
 
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
987
 
        }
988
 
 
989
 
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
990
 
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
991
 
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
992
 
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
993
 
}
994
 
 
995
 
static void global_init(void){
996
 
        int i;
997
 
        memset(clip_table, 0, 256);
998
 
        for(i=256; i<512; i++)
999
 
                clip_table[i]= i;
1000
 
        memset(clip_table+512, 0, 256);
1001
 
}
1002
 
 
1003
 
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
1004
 
        PPContext *c= memalign(32, sizeof(PPContext));
1005
 
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
1006
 
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
1007
 
 
1008
 
        global_init();
1009
 
 
1010
 
        memset(c, 0, sizeof(PPContext));
1011
 
        c->cpuCaps= cpuCaps;
1012
 
        if(cpuCaps&PP_FORMAT){
1013
 
                c->hChromaSubSample= cpuCaps&0x3;
1014
 
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
1015
 
        }else{
1016
 
                c->hChromaSubSample= 1;
1017
 
                c->vChromaSubSample= 1;
1018
 
        }
1019
 
 
1020
 
        reallocBuffers(c, width, height, stride, qpStride);
1021
 
 
1022
 
        c->frameNum=-1;
1023
 
 
1024
 
        return c;
1025
 
}
1026
 
 
1027
 
void pp_free_context(void *vc){
1028
 
        PPContext *c = (PPContext*)vc;
1029
 
        int i;
1030
 
 
1031
 
        for(i=0; i<3; i++) free(c->tempBlured[i]);
1032
 
        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
1033
 
 
1034
 
        free(c->tempBlocks);
1035
 
        free(c->yHistogram);
1036
 
        free(c->tempDst);
1037
 
        free(c->tempSrc);
1038
 
        free(c->deintTemp);
1039
 
        free(c->stdQPTable);
1040
 
        free(c->nonBQPTable);
1041
 
        free(c->forcedQPTable);
1042
 
 
1043
 
        memset(c, 0, sizeof(PPContext));
1044
 
 
1045
 
        free(c);
1046
 
}
1047
 
 
1048
 
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1049
 
                 uint8_t * dst[3], int dstStride[3],
1050
 
                 int width, int height,
1051
 
                 QP_STORE_T *QP_store,  int QPStride,
1052
 
                 pp_mode_t *vm,  void *vc, int pict_type)
1053
 
{
1054
 
        int mbWidth = (width+15)>>4;
1055
 
        int mbHeight= (height+15)>>4;
1056
 
        PPMode *mode = (PPMode*)vm;
1057
 
        PPContext *c = (PPContext*)vc;
1058
 
        int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1059
 
        int absQPStride = ABS(QPStride);
1060
 
 
1061
 
        // c->stride and c->QPStride are always positive
1062
 
        if(c->stride < minStride || c->qpStride < absQPStride)
1063
 
                reallocBuffers(c, width, height,
1064
 
                                MAX(minStride, c->stride),
1065
 
                                MAX(c->qpStride, absQPStride));
1066
 
 
1067
 
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1068
 
        {
1069
 
                int i;
1070
 
                QP_store= c->forcedQPTable;
1071
 
                absQPStride = QPStride = 0;
1072
 
                if(mode->lumMode & FORCE_QUANT)
1073
 
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1074
 
                else
1075
 
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1076
 
        }
1077
 
//printf("pict_type:%d\n", pict_type);
1078
 
 
1079
 
        if(pict_type & PP_PICT_TYPE_QP2){
1080
 
                int i;
1081
 
                const int count= mbHeight * absQPStride;
1082
 
                for(i=0; i<(count>>2); i++){
1083
 
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1084
 
                }
1085
 
                for(i<<=2; i<count; i++){
1086
 
                        c->stdQPTable[i] = QP_store[i]>>1;
1087
 
                }
1088
 
                QP_store= c->stdQPTable;
1089
 
                QPStride= absQPStride;
1090
 
        }
1091
 
 
1092
 
if(0){
1093
 
int x,y;
1094
 
for(y=0; y<mbHeight; y++){
1095
 
        for(x=0; x<mbWidth; x++){
1096
 
                printf("%2d ", QP_store[x + y*QPStride]);
1097
 
        }
1098
 
        printf("\n");
1099
 
}
1100
 
        printf("\n");
1101
 
}
1102
 
 
1103
 
        if((pict_type&7)!=3)
1104
 
        {
1105
 
                if (QPStride >= 0) {
1106
 
                        int i;
1107
 
                        const int count= mbHeight * QPStride;
1108
 
                        for(i=0; i<(count>>2); i++){
1109
 
                                ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1110
 
                        }
1111
 
                        for(i<<=2; i<count; i++){
1112
 
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1113
 
                        }
1114
 
                } else {
1115
 
                        int i,j;
1116
 
                        for(i=0; i<mbHeight; i++) {
1117
 
                                    for(j=0; j<absQPStride; j++) {
1118
 
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1119
 
                                }
1120
 
                        }
1121
 
                }
1122
 
        }
1123
 
 
1124
 
        if(verbose>2)
1125
 
        {
1126
 
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1127
 
        }
1128
 
 
1129
 
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1130
 
                width, height, QP_store, QPStride, 0, mode, c);
1131
 
 
1132
 
        width  = (width )>>c->hChromaSubSample;
1133
 
        height = (height)>>c->vChromaSubSample;
1134
 
 
1135
 
        if(mode->chromMode)
1136
 
        {
1137
 
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1138
 
                        width, height, QP_store, QPStride, 1, mode, c);
1139
 
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1140
 
                        width, height, QP_store, QPStride, 2, mode, c);
1141
 
        }
1142
 
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1143
 
        {
1144
 
                linecpy(dst[1], src[1], height, srcStride[1]);
1145
 
                linecpy(dst[2], src[2], height, srcStride[2]);
1146
 
        }
1147
 
        else
1148
 
        {
1149
 
                int y;
1150
 
                for(y=0; y<height; y++)
1151
 
                {
1152
 
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1153
 
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1154
 
                }
1155
 
        }
1156
 
}
1157