2
* Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4
* AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6
* This file is part of FFmpeg.
8
* FFmpeg is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
13
* FFmpeg is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with FFmpeg; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29
C MMX MMX2 3DNow AltiVec
31
isVertMinMaxOk Ec Ec Ec
32
doVertLowPass E e e Ec
33
doVertDefFilter Ec Ec e e Ec
35
isHorizMinMaxOk a E Ec
36
doHorizLowPass E e e Ec
37
doHorizDefFilter Ec Ec e e Ec
38
do_a_deblock Ec E Ec E
40
Vertical RKAlgo1 E a a
41
Horizontal RKAlgo1 a a
44
LinIpolDeinterlace e E E*
45
CubicIpolDeinterlace a e e*
46
LinBlendDeinterlace e E E*
47
MedianDeinterlace# E Ec Ec
48
TempDeNoiser# E e e Ec
50
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51
# more or less selfinvented filters so the exactness isnt too meaningfull
52
E = Exact implementation
53
e = allmost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
(the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74
//Changelog: use the Subversion log
89
//#define DEBUG_BRIGHTNESS
91
#include "libvo/fastmemcpy.h"
93
#include "postprocess.h"
94
#include "postprocess_internal.h"
96
#include "mangle.h" //FIXME should be supressed
102
#define GET_MODE_BUFFER_SIZE 500
103
#define OPTIONS_ARRAY_SIZE 10
105
#define TEMP_STRIDE 8
106
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
108
#if defined(ARCH_X86)
109
static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
110
static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
111
static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
112
static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
113
static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
114
static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
115
static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
116
static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
119
static uint8_t clip_table[3*256];
120
static uint8_t * const clip_tab= clip_table + 256;
122
static const int attribute_used deringThreshold= 20;
125
static struct PPFilter filters[]=
127
{"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
128
{"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
129
/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
130
{"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
131
{"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
132
{"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
133
{"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
134
{"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
135
{"dr", "dering", 1, 5, 6, DERING},
136
{"al", "autolevels", 0, 1, 2, LEVEL_FIX},
137
{"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
138
{"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
139
{"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
140
{"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
141
{"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
142
{"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
143
{"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
144
{"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
145
{NULL, NULL,0,0,0,0} //End Marker
148
static const char *replaceTable[]=
150
"default", "hdeblock:a,vdeblock:a,dering:a",
151
"de", "hdeblock:a,vdeblock:a,dering:a",
152
"fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
153
"fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
154
"ac", "ha:a:128:7,va:a,dering:a",
159
#if defined(ARCH_X86)
160
static inline void prefetchnta(void *p)
162
asm volatile( "prefetchnta (%0)\n\t"
167
static inline void prefetcht0(void *p)
169
asm volatile( "prefetcht0 (%0)\n\t"
174
static inline void prefetcht1(void *p)
176
asm volatile( "prefetcht1 (%0)\n\t"
181
static inline void prefetcht2(void *p)
183
asm volatile( "prefetcht2 (%0)\n\t"
189
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
192
* Check if the given 8x8 Block is mostly "flat"
194
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
198
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
199
const int dcThreshold= dcOffset*2 + 1;
201
for(y=0; y<BLOCK_SIZE; y++)
203
if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
204
if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
205
if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
206
if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
207
if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
208
if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
209
if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
212
return numEq > c->ppMode.flatnessThreshold;
216
* Check if the middle 8x8 Block in the given 8x16 block is flat
218
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
221
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
222
const int dcThreshold= dcOffset*2 + 1;
224
src+= stride*4; // src points to begin of the 8x8 Block
225
for(y=0; y<BLOCK_SIZE-1; y++)
227
if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
228
if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
229
if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
230
if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
231
if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
232
if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
233
if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
234
if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
237
return numEq > c->ppMode.flatnessThreshold;
240
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
245
if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
247
if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
249
if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
251
if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
256
if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
263
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
269
for(x=0; x<BLOCK_SIZE; x+=4)
271
if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
272
if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
273
if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
274
if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
279
for(x=0; x<BLOCK_SIZE; x++)
281
if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
288
for(x=0; x<BLOCK_SIZE; x++)
294
int v= src[x + y*stride];
298
if(max-min > 2*QP) return 0;
304
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
305
if( isHorizDC_C(src, stride, c) ){
306
if( isHorizMinMaxOk_C(src, stride, c->QP) )
315
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
316
if( isVertDC_C(src, stride, c) ){
317
if( isVertMinMaxOk_C(src, stride, c->QP) )
326
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
329
for(y=0; y<BLOCK_SIZE; y++)
331
const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
333
if(FFABS(middleEnergy) < 8*c->QP)
335
const int q=(dst[3] - dst[4])/2;
336
const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
337
const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
339
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
343
d*= FFSIGN(-middleEnergy);
364
* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
365
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
367
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
370
for(y=0; y<BLOCK_SIZE; y++)
372
const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
373
const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
376
sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
377
sums[1] = sums[0] - first + dst[3];
378
sums[2] = sums[1] - first + dst[4];
379
sums[3] = sums[2] - first + dst[5];
380
sums[4] = sums[3] - first + dst[6];
381
sums[5] = sums[4] - dst[0] + dst[7];
382
sums[6] = sums[5] - dst[1] + last;
383
sums[7] = sums[6] - dst[2] + last;
384
sums[8] = sums[7] - dst[3] + last;
385
sums[9] = sums[8] - dst[4] + last;
387
dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
388
dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
389
dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
390
dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
391
dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
392
dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
393
dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
394
dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
401
* Experimental Filter 1 (Horizontal)
402
* will not damage linear gradients
403
* Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
404
* can only smooth blocks at the expected locations (it cant smooth them if they did move)
405
* MMX2 version does correct clipping C version doesnt
406
* not identical with the vertical one
408
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
411
static uint64_t *lut= NULL;
415
lut = av_malloc(256*8);
418
int v= i < 128 ? 2*i : 2*(i-256);
420
//Simulate 112242211 9-Tap filter
421
uint64_t a= (v/16) & 0xFF;
422
uint64_t b= (v/8) & 0xFF;
423
uint64_t c= (v/4) & 0xFF;
424
uint64_t d= (3*v/8) & 0xFF;
426
//Simulate piecewise linear interpolation
427
uint64_t a= (v/16) & 0xFF;
428
uint64_t b= (v*3/16) & 0xFF;
429
uint64_t c= (v*5/16) & 0xFF;
430
uint64_t d= (7*v/16) & 0xFF;
431
uint64_t A= (0x100 - a)&0xFF;
432
uint64_t B= (0x100 - b)&0xFF;
433
uint64_t C= (0x100 - c)&0xFF;
434
uint64_t D= (0x100 - c)&0xFF;
436
lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
437
(D<<24) | (C<<16) | (B<<8) | (A);
438
//lut[i] = (v<<32) | (v<<24);
442
for(y=0; y<BLOCK_SIZE; y++)
444
int a= src[1] - src[2];
445
int b= src[3] - src[4];
446
int c= src[5] - src[6];
448
int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
452
int v = d * FFSIGN(-b);
467
* accurate deblock filter
469
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
472
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
473
const int dcThreshold= dcOffset*2 + 1;
475
src+= step*4; // src points to begin of the 8x8 Block
479
if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
480
if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
481
if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
482
if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
483
if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
484
if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
485
if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
486
if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
487
if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
488
if(numEq > c->ppMode.flatnessThreshold){
491
if(src[0] > src[step]){
499
if(src[x*step] > src[(x+1)*step]){
500
if(src[x *step] > max) max= src[ x *step];
501
if(src[(x+1)*step] < min) min= src[(x+1)*step];
503
if(src[(x+1)*step] > max) max= src[(x+1)*step];
504
if(src[ x *step] < min) min= src[ x *step];
508
const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
509
const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
512
sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
513
sums[1] = sums[0] - first + src[3*step];
514
sums[2] = sums[1] - first + src[4*step];
515
sums[3] = sums[2] - first + src[5*step];
516
sums[4] = sums[3] - first + src[6*step];
517
sums[5] = sums[4] - src[0*step] + src[7*step];
518
sums[6] = sums[5] - src[1*step] + last;
519
sums[7] = sums[6] - src[2*step] + last;
520
sums[8] = sums[7] - src[3*step] + last;
521
sums[9] = sums[8] - src[4*step] + last;
523
src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
524
src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
525
src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
526
src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
527
src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
528
src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
529
src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
530
src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
533
const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
535
if(FFABS(middleEnergy) < 8*QP)
537
const int q=(src[3*step] - src[4*step])/2;
538
const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
539
const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
541
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
545
d*= FFSIGN(-middleEnergy);
572
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
574
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
580
#define COMPILE_ALTIVEC
581
#endif //HAVE_ALTIVEC
582
#endif //ARCH_POWERPC
584
#if defined(ARCH_X86)
586
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
590
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
594
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
595
#define COMPILE_3DNOW
597
#endif /* defined(ARCH_X86) */
608
#define RENAME(a) a ## _C
609
#include "postprocess_template.c"
613
#ifdef COMPILE_ALTIVEC
616
#define RENAME(a) a ## _altivec
617
#include "postprocess_altivec_template.c"
618
#include "postprocess_template.c"
620
#endif //ARCH_POWERPC
628
#define RENAME(a) a ## _MMX
629
#include "postprocess_template.c"
638
#define RENAME(a) a ## _MMX2
639
#include "postprocess_template.c"
648
#define RENAME(a) a ## _3DNow
649
#include "postprocess_template.c"
652
// minor note: the HAVE_xyz is messed up after that line so dont use it
654
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
655
QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
657
PPContext *c= (PPContext *)vc;
658
PPMode *ppMode= (PPMode *)vm;
659
c->ppMode= *ppMode; //FIXME
661
// useing ifs here as they are faster than function pointers allthough the
662
// difference wouldnt be messureable here but its much better because
663
// someone might exchange the cpu whithout restarting mplayer ;)
664
#ifdef RUNTIME_CPUDETECT
665
#if defined(ARCH_X86)
666
// ordered per speed fasterst first
667
if(c->cpuCaps & PP_CPU_CAPS_MMX2)
668
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669
else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
670
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671
else if(c->cpuCaps & PP_CPU_CAPS_MMX)
672
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
678
if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
679
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685
#else //RUNTIME_CPUDETECT
687
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688
#elif defined (HAVE_3DNOW)
689
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
690
#elif defined (HAVE_MMX)
691
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
692
#elif defined (HAVE_ALTIVEC)
693
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
695
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
697
#endif //!RUNTIME_CPUDETECT
700
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
701
// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
703
/* -pp Command line Help
706
"Available postprocessing filters:\n"
708
"short long name short long option Description\n"
709
"* * a autoq CPU power dependent enabler\n"
710
" c chrom chrominance filtering enabled\n"
711
" y nochrom chrominance filtering disabled\n"
712
" n noluma luma filtering disabled\n"
713
"hb hdeblock (2 threshold) horizontal deblocking filter\n"
714
" 1. difference factor: default=32, higher -> more deblocking\n"
715
" 2. flatness threshold: default=39, lower -> more deblocking\n"
716
" the h & v deblocking filters share these\n"
717
" so you can't set different thresholds for h / v\n"
718
"vb vdeblock (2 threshold) vertical deblocking filter\n"
719
"ha hadeblock (2 threshold) horizontal deblocking filter\n"
720
"va vadeblock (2 threshold) vertical deblocking filter\n"
721
"h1 x1hdeblock experimental h deblock filter 1\n"
722
"v1 x1vdeblock experimental v deblock filter 1\n"
723
"dr dering deringing filter\n"
724
"al autolevels automatic brightness / contrast\n"
725
" f fullyrange stretch luminance to (0..255)\n"
726
"lb linblenddeint linear blend deinterlacer\n"
727
"li linipoldeint linear interpolating deinterlace\n"
728
"ci cubicipoldeint cubic interpolating deinterlacer\n"
729
"md mediandeint median deinterlacer\n"
730
"fd ffmpegdeint ffmpeg deinterlacer\n"
731
"l5 lowpass5 FIR lowpass deinterlacer\n"
732
"de default hb:a,vb:a,dr:a\n"
733
"fa fast h1:a,v1:a,dr:a\n"
734
"ac ha:a:128:7,va:a,dr:a\n"
735
"tn tmpnoise (3 threshold) temporal noise reducer\n"
736
" 1. <= 2. <= 3. larger -> stronger filtering\n"
737
"fq forceQuant <quantizer> force quantizer\n"
739
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
740
"long form example:\n"
741
"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
742
"short form example:\n"
743
"vb:a/hb:a/lb de,-vb\n"
749
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
751
char temp[GET_MODE_BUFFER_SIZE];
753
const char *filterDelimiters= ",/";
754
const char *optionDelimiters= ":";
755
struct PPMode *ppMode;
758
ppMode= av_malloc(sizeof(PPMode));
761
ppMode->chromMode= 0;
762
ppMode->maxTmpNoise[0]= 700;
763
ppMode->maxTmpNoise[1]= 1500;
764
ppMode->maxTmpNoise[2]= 3000;
765
ppMode->maxAllowedY= 234;
766
ppMode->minAllowedY= 16;
767
ppMode->baseDcDiff= 256/8;
768
ppMode->flatnessThreshold= 56-16-1;
769
ppMode->maxClippedThreshold= 0.01;
772
strncpy(temp, name, GET_MODE_BUFFER_SIZE);
774
av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
778
int q= 1000000; //PP_QUALITY_MAX;
782
char *options[OPTIONS_ARRAY_SIZE];
785
int numOfUnknownOptions=0;
786
int enable=1; //does the user want us to enabled or disabled the filter
788
filterToken= strtok(p, filterDelimiters);
789
if(filterToken == NULL) break;
790
p+= strlen(filterToken) + 1; // p points to next filterToken
791
filterName= strtok(filterToken, optionDelimiters);
792
av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
794
if(*filterName == '-')
800
for(;;){ //for all options
801
option= strtok(NULL, optionDelimiters);
802
if(option == NULL) break;
804
av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
805
if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
806
else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
807
else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
808
else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
811
options[numOfUnknownOptions] = option;
812
numOfUnknownOptions++;
814
if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
816
options[numOfUnknownOptions] = NULL;
818
/* replace stuff from the replace Table */
819
for(i=0; replaceTable[2*i]!=NULL; i++)
821
if(!strcmp(replaceTable[2*i], filterName))
823
int newlen= strlen(replaceTable[2*i + 1]);
827
if(p==NULL) p= temp, *p=0; //last filter
828
else p--, *p=','; //not last filter
831
spaceLeft= p - temp + plen;
832
if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
837
memmove(p + newlen, p, plen+1);
838
memcpy(p, replaceTable[2*i + 1], newlen);
843
for(i=0; filters[i].shortName!=NULL; i++)
845
if( !strcmp(filters[i].longName, filterName)
846
|| !strcmp(filters[i].shortName, filterName))
848
ppMode->lumMode &= ~filters[i].mask;
849
ppMode->chromMode &= ~filters[i].mask;
852
if(!enable) break; // user wants to disable it
854
if(q >= filters[i].minLumQuality && luma)
855
ppMode->lumMode|= filters[i].mask;
856
if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
857
if(q >= filters[i].minChromQuality)
858
ppMode->chromMode|= filters[i].mask;
860
if(filters[i].mask == LEVEL_FIX)
863
ppMode->minAllowedY= 16;
864
ppMode->maxAllowedY= 234;
865
for(o=0; options[o]!=NULL; o++)
867
if( !strcmp(options[o],"fullyrange")
868
||!strcmp(options[o],"f"))
870
ppMode->minAllowedY= 0;
871
ppMode->maxAllowedY= 255;
872
numOfUnknownOptions--;
876
else if(filters[i].mask == TEMP_NOISE_FILTER)
881
for(o=0; options[o]!=NULL; o++)
884
ppMode->maxTmpNoise[numOfNoises]=
885
strtol(options[o], &tail, 0);
889
numOfUnknownOptions--;
890
if(numOfNoises >= 3) break;
894
else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
895
|| filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
899
for(o=0; options[o]!=NULL && o<2; o++)
902
int val= strtol(options[o], &tail, 0);
903
if(tail==options[o]) break;
905
numOfUnknownOptions--;
906
if(o==0) ppMode->baseDcDiff= val;
907
else ppMode->flatnessThreshold= val;
910
else if(filters[i].mask == FORCE_QUANT)
913
ppMode->forcedQuant= 15;
915
for(o=0; options[o]!=NULL && o<1; o++)
918
int val= strtol(options[o], &tail, 0);
919
if(tail==options[o]) break;
921
numOfUnknownOptions--;
922
ppMode->forcedQuant= val;
927
if(!filterNameOk) ppMode->error++;
928
ppMode->error += numOfUnknownOptions;
931
av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
934
av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
941
void pp_free_mode(pp_mode_t *mode){
945
static void reallocAlign(void **p, int alignment, int size){
947
*p= av_mallocz(size);
950
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
951
int mbWidth = (width+15)>>4;
952
int mbHeight= (height+15)>>4;
956
c->qpStride= qpStride;
958
reallocAlign((void **)&c->tempDst, 8, stride*24);
959
reallocAlign((void **)&c->tempSrc, 8, stride*24);
960
reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
961
reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
963
c->yHistogram[i]= width*height/64*15/256;
967
//Note:the +17*1024 is just there so i dont have to worry about r/w over te end
968
reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
969
reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
972
reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
973
reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
974
reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
975
reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
978
static void global_init(void){
980
memset(clip_table, 0, 256);
981
for(i=256; i<512; i++)
983
memset(clip_table+512, 0, 256);
986
static const char * context_to_name(void * ptr) {
990
static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
992
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
993
PPContext *c= av_malloc(sizeof(PPContext));
994
int stride= (width+15)&(~15); //assumed / will realloc if needed
995
int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
999
memset(c, 0, sizeof(PPContext));
1000
c->av_class = &av_codec_context_class;
1001
c->cpuCaps= cpuCaps;
1002
if(cpuCaps&PP_FORMAT){
1003
c->hChromaSubSample= cpuCaps&0x3;
1004
c->vChromaSubSample= (cpuCaps>>4)&0x3;
1006
c->hChromaSubSample= 1;
1007
c->vChromaSubSample= 1;
1010
reallocBuffers(c, width, height, stride, qpStride);
1017
void pp_free_context(void *vc){
1018
PPContext *c = (PPContext*)vc;
1021
for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1022
for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1024
av_free(c->tempBlocks);
1025
av_free(c->yHistogram);
1026
av_free(c->tempDst);
1027
av_free(c->tempSrc);
1028
av_free(c->deintTemp);
1029
av_free(c->stdQPTable);
1030
av_free(c->nonBQPTable);
1031
av_free(c->forcedQPTable);
1033
memset(c, 0, sizeof(PPContext));
1038
void pp_postprocess(uint8_t * src[3], int srcStride[3],
1039
uint8_t * dst[3], int dstStride[3],
1040
int width, int height,
1041
QP_STORE_T *QP_store, int QPStride,
1042
pp_mode_t *vm, void *vc, int pict_type)
1044
int mbWidth = (width+15)>>4;
1045
int mbHeight= (height+15)>>4;
1046
PPMode *mode = (PPMode*)vm;
1047
PPContext *c = (PPContext*)vc;
1048
int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1049
int absQPStride = FFABS(QPStride);
1051
// c->stride and c->QPStride are always positive
1052
if(c->stride < minStride || c->qpStride < absQPStride)
1053
reallocBuffers(c, width, height,
1054
FFMAX(minStride, c->stride),
1055
FFMAX(c->qpStride, absQPStride));
1057
if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1060
QP_store= c->forcedQPTable;
1061
absQPStride = QPStride = 0;
1062
if(mode->lumMode & FORCE_QUANT)
1063
for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1065
for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1068
if(pict_type & PP_PICT_TYPE_QP2){
1070
const int count= mbHeight * absQPStride;
1071
for(i=0; i<(count>>2); i++){
1072
((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1074
for(i<<=2; i<count; i++){
1075
c->stdQPTable[i] = QP_store[i]>>1;
1077
QP_store= c->stdQPTable;
1078
QPStride= absQPStride;
1083
for(y=0; y<mbHeight; y++){
1084
for(x=0; x<mbWidth; x++){
1085
av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1087
av_log(c, AV_LOG_INFO, "\n");
1089
av_log(c, AV_LOG_INFO, "\n");
1092
if((pict_type&7)!=3)
1094
if (QPStride >= 0) {
1096
const int count= mbHeight * QPStride;
1097
for(i=0; i<(count>>2); i++){
1098
((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1100
for(i<<=2; i<count; i++){
1101
c->nonBQPTable[i] = QP_store[i] & 0x3F;
1105
for(i=0; i<mbHeight; i++) {
1106
for(j=0; j<absQPStride; j++) {
1107
c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1113
av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1114
mode->lumMode, mode->chromMode);
1116
postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1117
width, height, QP_store, QPStride, 0, mode, c);
1119
width = (width )>>c->hChromaSubSample;
1120
height = (height)>>c->vChromaSubSample;
1124
postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1125
width, height, QP_store, QPStride, 1, mode, c);
1126
postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1127
width, height, QP_store, QPStride, 2, mode, c);
1129
else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1131
linecpy(dst[1], src[1], height, srcStride[1]);
1132
linecpy(dst[2], src[2], height, srcStride[2]);
1137
for(y=0; y<height; y++)
1139
memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1140
memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);