2
* Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4
* AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6
* This file is part of Libav.
8
* Libav is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
13
* Libav is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with Libav; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29
C MMX MMX2 3DNow AltiVec
31
isVertMinMaxOk Ec Ec Ec
32
doVertLowPass E e e Ec
33
doVertDefFilter Ec Ec e e Ec
35
isHorizMinMaxOk a E Ec
36
doHorizLowPass E e e Ec
37
doHorizDefFilter Ec Ec e e Ec
38
do_a_deblock Ec E Ec E
40
Vertical RKAlgo1 E a a
41
Horizontal RKAlgo1 a a
44
LinIpolDeinterlace e E E*
45
CubicIpolDeinterlace a e e*
46
LinBlendDeinterlace e E E*
47
MedianDeinterlace# E Ec Ec
48
TempDeNoiser# E e e Ec
50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = almost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
(the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74
//Changelog: use git log
77
#include "libavutil/avutil.h"
83
//#define HAVE_AMD3DNOW
86
//#define DEBUG_BRIGHTNESS
87
#include "postprocess.h"
88
#include "postprocess_internal.h"
89
#include "libavutil/avstring.h"
91
unsigned postproc_version(void)
93
return LIBPOSTPROC_VERSION_INT;
96
const char *postproc_configuration(void)
98
return LIBAV_CONFIGURATION;
101
const char *postproc_license(void)
103
#define LICENSE_PREFIX "libpostproc license: "
104
return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
111
#define GET_MODE_BUFFER_SIZE 500
112
#define OPTIONS_ARRAY_SIZE 10
114
#define TEMP_STRIDE 8
115
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
118
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
119
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
120
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
121
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
122
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
123
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
124
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
125
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
128
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
131
static struct PPFilter filters[]=
133
{"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
134
{"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
135
/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
136
{"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
137
{"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
138
{"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
139
{"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
140
{"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
141
{"dr", "dering", 1, 5, 6, DERING},
142
{"al", "autolevels", 0, 1, 2, LEVEL_FIX},
143
{"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
144
{"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
145
{"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
146
{"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
147
{"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
148
{"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
149
{"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
150
{"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
151
{NULL, NULL,0,0,0,0} //End Marker
154
static const char *replaceTable[]=
156
"default", "hb:a,vb:a,dr:a",
157
"de", "hb:a,vb:a,dr:a",
158
"fast", "h1:a,v1:a,dr:a",
159
"fa", "h1:a,v1:a,dr:a",
160
"ac", "ha:a:128:7,va:a,dr:a",
166
static inline void prefetchnta(void *p)
168
__asm__ volatile( "prefetchnta (%0)\n\t"
173
static inline void prefetcht0(void *p)
175
__asm__ volatile( "prefetcht0 (%0)\n\t"
180
static inline void prefetcht1(void *p)
182
__asm__ volatile( "prefetcht1 (%0)\n\t"
187
static inline void prefetcht2(void *p)
189
__asm__ volatile( "prefetcht2 (%0)\n\t"
195
/* The horizontal functions exist only in C because the MMX
196
* code is faster with vertical filters and transposing. */
199
* Check if the given 8x8 Block is mostly "flat"
201
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
205
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206
const int dcThreshold= dcOffset*2 + 1;
208
for(y=0; y<BLOCK_SIZE; y++){
209
if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
210
if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
211
if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
212
if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
213
if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
214
if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
215
if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
218
return numEq > c->ppMode.flatnessThreshold;
222
* Check if the middle 8x8 Block in the given 8x16 block is flat
224
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
228
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
229
const int dcThreshold= dcOffset*2 + 1;
231
src+= stride*4; // src points to begin of the 8x8 Block
232
for(y=0; y<BLOCK_SIZE-1; y++){
233
if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
234
if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
235
if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
236
if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
237
if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
238
if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
239
if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
240
if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
243
return numEq > c->ppMode.flatnessThreshold;
246
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
251
if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
253
if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
255
if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
257
if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
262
if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
269
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
275
for(x=0; x<BLOCK_SIZE; x+=4){
276
if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
277
if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
278
if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
279
if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
284
for(x=0; x<BLOCK_SIZE; x++){
285
if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
292
for(x=0; x<BLOCK_SIZE; x++){
297
int v= src[x + y*stride];
301
if(max-min > 2*QP) return 0;
307
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
309
if( isHorizDC_C(src, stride, c) ){
310
if( isHorizMinMaxOk_C(src, stride, c->QP) )
319
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
321
if( isVertDC_C(src, stride, c) ){
322
if( isVertMinMaxOk_C(src, stride, c->QP) )
331
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
334
for(y=0; y<BLOCK_SIZE; y++){
335
const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
337
if(FFABS(middleEnergy) < 8*c->QP){
338
const int q=(dst[3] - dst[4])/2;
339
const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
340
const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
342
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
346
d*= FFSIGN(-middleEnergy);
367
* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
368
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
370
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
373
for(y=0; y<BLOCK_SIZE; y++){
374
const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
375
const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
378
sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
379
sums[1] = sums[0] - first + dst[3];
380
sums[2] = sums[1] - first + dst[4];
381
sums[3] = sums[2] - first + dst[5];
382
sums[4] = sums[3] - first + dst[6];
383
sums[5] = sums[4] - dst[0] + dst[7];
384
sums[6] = sums[5] - dst[1] + last;
385
sums[7] = sums[6] - dst[2] + last;
386
sums[8] = sums[7] - dst[3] + last;
387
sums[9] = sums[8] - dst[4] + last;
389
dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
390
dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
391
dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
392
dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
393
dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
394
dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
395
dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
396
dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
403
* Experimental Filter 1 (Horizontal)
404
* will not damage linear gradients
405
* Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
406
* can only smooth blocks at the expected locations (it cannot smooth them if they did move)
407
* MMX2 version does correct clipping C version does not
408
* not identical with the vertical one
410
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
413
static uint64_t *lut= NULL;
417
lut = av_malloc(256*8);
420
int v= i < 128 ? 2*i : 2*(i-256);
422
//Simulate 112242211 9-Tap filter
423
uint64_t a= (v/16) & 0xFF;
424
uint64_t b= (v/8) & 0xFF;
425
uint64_t c= (v/4) & 0xFF;
426
uint64_t d= (3*v/8) & 0xFF;
428
//Simulate piecewise linear interpolation
429
uint64_t a= (v/16) & 0xFF;
430
uint64_t b= (v*3/16) & 0xFF;
431
uint64_t c= (v*5/16) & 0xFF;
432
uint64_t d= (7*v/16) & 0xFF;
433
uint64_t A= (0x100 - a)&0xFF;
434
uint64_t B= (0x100 - b)&0xFF;
435
uint64_t C= (0x100 - c)&0xFF;
436
uint64_t D= (0x100 - c)&0xFF;
438
lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
439
(D<<24) | (C<<16) | (B<<8) | (A);
440
//lut[i] = (v<<32) | (v<<24);
444
for(y=0; y<BLOCK_SIZE; y++){
445
int a= src[1] - src[2];
446
int b= src[3] - src[4];
447
int c= src[5] - src[6];
449
int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
452
int v = d * FFSIGN(-b);
466
* accurate deblock filter
468
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
471
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
472
const int dcThreshold= dcOffset*2 + 1;
474
src+= step*4; // src points to begin of the 8x8 Block
478
if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
479
if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
480
if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
481
if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
482
if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
483
if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
484
if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
485
if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
486
if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
487
if(numEq > c->ppMode.flatnessThreshold){
490
if(src[0] > src[step]){
498
if(src[x*step] > src[(x+1)*step]){
499
if(src[x *step] > max) max= src[ x *step];
500
if(src[(x+1)*step] < min) min= src[(x+1)*step];
502
if(src[(x+1)*step] > max) max= src[(x+1)*step];
503
if(src[ x *step] < min) min= src[ x *step];
507
const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
508
const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
511
sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
512
sums[1] = sums[0] - first + src[3*step];
513
sums[2] = sums[1] - first + src[4*step];
514
sums[3] = sums[2] - first + src[5*step];
515
sums[4] = sums[3] - first + src[6*step];
516
sums[5] = sums[4] - src[0*step] + src[7*step];
517
sums[6] = sums[5] - src[1*step] + last;
518
sums[7] = sums[6] - src[2*step] + last;
519
sums[8] = sums[7] - src[3*step] + last;
520
sums[9] = sums[8] - src[4*step] + last;
522
src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
523
src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
524
src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
525
src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
526
src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
527
src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
528
src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
529
src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
532
const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
534
if(FFABS(middleEnergy) < 8*QP){
535
const int q=(src[3*step] - src[4*step])/2;
536
const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
537
const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
539
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
543
d*= FFSIGN(-middleEnergy);
567
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
569
#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
574
#define COMPILE_ALTIVEC
575
#endif //HAVE_ALTIVEC
579
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
583
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
587
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
588
#define COMPILE_3DNOW
590
#endif /* ARCH_X86 */
597
#define HAVE_AMD3DNOW 0
599
#define HAVE_ALTIVEC 0
602
#define RENAME(a) a ## _C
603
#include "postprocess_template.c"
606
#ifdef COMPILE_ALTIVEC
609
#define HAVE_ALTIVEC 1
610
#define RENAME(a) a ## _altivec
611
#include "postprocess_altivec_template.c"
612
#include "postprocess_template.c"
620
#define RENAME(a) a ## _MMX
621
#include "postprocess_template.c"
631
#define RENAME(a) a ## _MMX2
632
#include "postprocess_template.c"
643
#define HAVE_AMD3DNOW 1
644
#define RENAME(a) a ## _3DNow
645
#include "postprocess_template.c"
648
// minor note: the HAVE_xyz is messed up after that line so do not use it.
650
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
651
const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
653
PPContext *c= (PPContext *)vc;
654
PPMode *ppMode= (PPMode *)vm;
655
c->ppMode= *ppMode; //FIXME
657
// Using ifs here as they are faster than function pointers although the
658
// difference would not be measurable here but it is much better because
659
// someone might exchange the CPU whithout restarting MPlayer ;)
660
#if CONFIG_RUNTIME_CPUDETECT
662
// ordered per speed fastest first
663
if(c->cpuCaps & PP_CPU_CAPS_MMX2)
664
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
665
else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
666
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
667
else if(c->cpuCaps & PP_CPU_CAPS_MMX)
668
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
673
if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
674
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
677
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
679
#else //CONFIG_RUNTIME_CPUDETECT
681
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691
#endif //!CONFIG_RUNTIME_CPUDETECT
694
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
695
// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
697
/* -pp Command line Help
699
const char pp_help[] =
700
"Available postprocessing filters:\n"
702
"short long name short long option Description\n"
703
"* * a autoq CPU power dependent enabler\n"
704
" c chrom chrominance filtering enabled\n"
705
" y nochrom chrominance filtering disabled\n"
706
" n noluma luma filtering disabled\n"
707
"hb hdeblock (2 threshold) horizontal deblocking filter\n"
708
" 1. difference factor: default=32, higher -> more deblocking\n"
709
" 2. flatness threshold: default=39, lower -> more deblocking\n"
710
" the h & v deblocking filters share these\n"
711
" so you can't set different thresholds for h / v\n"
712
"vb vdeblock (2 threshold) vertical deblocking filter\n"
713
"ha hadeblock (2 threshold) horizontal deblocking filter\n"
714
"va vadeblock (2 threshold) vertical deblocking filter\n"
715
"h1 x1hdeblock experimental h deblock filter 1\n"
716
"v1 x1vdeblock experimental v deblock filter 1\n"
717
"dr dering deringing filter\n"
718
"al autolevels automatic brightness / contrast\n"
719
" f fullyrange stretch luminance to (0..255)\n"
720
"lb linblenddeint linear blend deinterlacer\n"
721
"li linipoldeint linear interpolating deinterlace\n"
722
"ci cubicipoldeint cubic interpolating deinterlacer\n"
723
"md mediandeint median deinterlacer\n"
724
"fd ffmpegdeint ffmpeg deinterlacer\n"
725
"l5 lowpass5 FIR lowpass deinterlacer\n"
726
"de default hb:a,vb:a,dr:a\n"
727
"fa fast h1:a,v1:a,dr:a\n"
728
"ac ha:a:128:7,va:a,dr:a\n"
729
"tn tmpnoise (3 threshold) temporal noise reducer\n"
730
" 1. <= 2. <= 3. larger -> stronger filtering\n"
731
"fq forceQuant <quantizer> force quantizer\n"
733
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
734
"long form example:\n"
735
"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
736
"short form example:\n"
737
"vb:a/hb:a/lb de,-vb\n"
743
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
745
char temp[GET_MODE_BUFFER_SIZE];
747
static const char filterDelimiters[] = ",/";
748
static const char optionDelimiters[] = ":";
749
struct PPMode *ppMode;
752
ppMode= av_malloc(sizeof(PPMode));
755
ppMode->chromMode= 0;
756
ppMode->maxTmpNoise[0]= 700;
757
ppMode->maxTmpNoise[1]= 1500;
758
ppMode->maxTmpNoise[2]= 3000;
759
ppMode->maxAllowedY= 234;
760
ppMode->minAllowedY= 16;
761
ppMode->baseDcDiff= 256/8;
762
ppMode->flatnessThreshold= 56-16-1;
763
ppMode->maxClippedThreshold= 0.01;
766
av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE);
768
av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
772
int q= 1000000; //PP_QUALITY_MAX;
776
char *options[OPTIONS_ARRAY_SIZE];
779
int numOfUnknownOptions=0;
780
int enable=1; //does the user want us to enabled or disabled the filter
782
filterToken= strtok(p, filterDelimiters);
783
if(filterToken == NULL) break;
784
p+= strlen(filterToken) + 1; // p points to next filterToken
785
filterName= strtok(filterToken, optionDelimiters);
786
av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
788
if(*filterName == '-'){
793
for(;;){ //for all options
794
option= strtok(NULL, optionDelimiters);
795
if(option == NULL) break;
797
av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
798
if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
799
else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
800
else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
801
else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
803
options[numOfUnknownOptions] = option;
804
numOfUnknownOptions++;
806
if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
808
options[numOfUnknownOptions] = NULL;
810
/* replace stuff from the replace Table */
811
for(i=0; replaceTable[2*i]!=NULL; i++){
812
if(!strcmp(replaceTable[2*i], filterName)){
813
int newlen= strlen(replaceTable[2*i + 1]);
817
if(p==NULL) p= temp, *p=0; //last filter
818
else p--, *p=','; //not last filter
821
spaceLeft= p - temp + plen;
822
if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
826
memmove(p + newlen, p, plen+1);
827
memcpy(p, replaceTable[2*i + 1], newlen);
832
for(i=0; filters[i].shortName!=NULL; i++){
833
if( !strcmp(filters[i].longName, filterName)
834
|| !strcmp(filters[i].shortName, filterName)){
835
ppMode->lumMode &= ~filters[i].mask;
836
ppMode->chromMode &= ~filters[i].mask;
839
if(!enable) break; // user wants to disable it
841
if(q >= filters[i].minLumQuality && luma)
842
ppMode->lumMode|= filters[i].mask;
843
if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
844
if(q >= filters[i].minChromQuality)
845
ppMode->chromMode|= filters[i].mask;
847
if(filters[i].mask == LEVEL_FIX){
849
ppMode->minAllowedY= 16;
850
ppMode->maxAllowedY= 234;
851
for(o=0; options[o]!=NULL; o++){
852
if( !strcmp(options[o],"fullyrange")
853
||!strcmp(options[o],"f")){
854
ppMode->minAllowedY= 0;
855
ppMode->maxAllowedY= 255;
856
numOfUnknownOptions--;
860
else if(filters[i].mask == TEMP_NOISE_FILTER)
865
for(o=0; options[o]!=NULL; o++){
867
ppMode->maxTmpNoise[numOfNoises]=
868
strtol(options[o], &tail, 0);
869
if(tail!=options[o]){
871
numOfUnknownOptions--;
872
if(numOfNoises >= 3) break;
876
else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
877
|| filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
880
for(o=0; options[o]!=NULL && o<2; o++){
882
int val= strtol(options[o], &tail, 0);
883
if(tail==options[o]) break;
885
numOfUnknownOptions--;
886
if(o==0) ppMode->baseDcDiff= val;
887
else ppMode->flatnessThreshold= val;
890
else if(filters[i].mask == FORCE_QUANT){
892
ppMode->forcedQuant= 15;
894
for(o=0; options[o]!=NULL && o<1; o++){
896
int val= strtol(options[o], &tail, 0);
897
if(tail==options[o]) break;
899
numOfUnknownOptions--;
900
ppMode->forcedQuant= val;
905
if(!filterNameOk) ppMode->error++;
906
ppMode->error += numOfUnknownOptions;
909
av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
911
av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
918
void pp_free_mode(pp_mode *mode){
922
static void reallocAlign(void **p, int alignment, int size){
924
*p= av_mallocz(size);
927
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
928
int mbWidth = (width+15)>>4;
929
int mbHeight= (height+15)>>4;
933
c->qpStride= qpStride;
935
reallocAlign((void **)&c->tempDst, 8, stride*24);
936
reallocAlign((void **)&c->tempSrc, 8, stride*24);
937
reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
938
reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
940
c->yHistogram[i]= width*height/64*15/256;
943
//Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
944
reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
945
reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
948
reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
949
reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
950
reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
951
reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
954
static const char * context_to_name(void * ptr) {
958
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
960
pp_context *pp_get_context(int width, int height, int cpuCaps){
961
PPContext *c= av_malloc(sizeof(PPContext));
962
int stride= FFALIGN(width, 16); //assumed / will realloc if needed
963
int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
965
memset(c, 0, sizeof(PPContext));
966
c->av_class = &av_codec_context_class;
968
if(cpuCaps&PP_FORMAT){
969
c->hChromaSubSample= cpuCaps&0x3;
970
c->vChromaSubSample= (cpuCaps>>4)&0x3;
972
c->hChromaSubSample= 1;
973
c->vChromaSubSample= 1;
976
reallocBuffers(c, width, height, stride, qpStride);
983
void pp_free_context(void *vc){
984
PPContext *c = (PPContext*)vc;
987
for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
988
for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
990
av_free(c->tempBlocks);
991
av_free(c->yHistogram);
994
av_free(c->deintTemp);
995
av_free(c->stdQPTable);
996
av_free(c->nonBQPTable);
997
av_free(c->forcedQPTable);
999
memset(c, 0, sizeof(PPContext));
1004
void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1005
uint8_t * dst[3], const int dstStride[3],
1006
int width, int height,
1007
const QP_STORE_T *QP_store, int QPStride,
1008
pp_mode *vm, void *vc, int pict_type)
1010
int mbWidth = (width+15)>>4;
1011
int mbHeight= (height+15)>>4;
1012
PPMode *mode = (PPMode*)vm;
1013
PPContext *c = (PPContext*)vc;
1014
int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1015
int absQPStride = FFABS(QPStride);
1017
// c->stride and c->QPStride are always positive
1018
if(c->stride < minStride || c->qpStride < absQPStride)
1019
reallocBuffers(c, width, height,
1020
FFMAX(minStride, c->stride),
1021
FFMAX(c->qpStride, absQPStride));
1023
if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1025
QP_store= c->forcedQPTable;
1026
absQPStride = QPStride = 0;
1027
if(mode->lumMode & FORCE_QUANT)
1028
for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1030
for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1033
if(pict_type & PP_PICT_TYPE_QP2){
1035
const int count= mbHeight * absQPStride;
1036
for(i=0; i<(count>>2); i++){
1037
((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1039
for(i<<=2; i<count; i++){
1040
c->stdQPTable[i] = QP_store[i]>>1;
1042
QP_store= c->stdQPTable;
1043
QPStride= absQPStride;
1048
for(y=0; y<mbHeight; y++){
1049
for(x=0; x<mbWidth; x++){
1050
av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1052
av_log(c, AV_LOG_INFO, "\n");
1054
av_log(c, AV_LOG_INFO, "\n");
1057
if((pict_type&7)!=3){
1060
const int count= mbHeight * QPStride;
1061
for(i=0; i<(count>>2); i++){
1062
((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1064
for(i<<=2; i<count; i++){
1065
c->nonBQPTable[i] = QP_store[i] & 0x3F;
1069
for(i=0; i<mbHeight; i++) {
1070
for(j=0; j<absQPStride; j++) {
1071
c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1077
av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1078
mode->lumMode, mode->chromMode);
1080
postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1081
width, height, QP_store, QPStride, 0, mode, c);
1083
width = (width )>>c->hChromaSubSample;
1084
height = (height)>>c->vChromaSubSample;
1086
if(mode->chromMode){
1087
postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1088
width, height, QP_store, QPStride, 1, mode, c);
1089
postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1090
width, height, QP_store, QPStride, 2, mode, c);
1092
else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1093
linecpy(dst[1], src[1], height, srcStride[1]);
1094
linecpy(dst[2], src[2], height, srcStride[2]);
1097
for(y=0; y<height; y++){
1098
memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1099
memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);