2
* Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4
* AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6
* This file is part of Libav.
8
* Libav is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
13
* Libav is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with Libav; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29
C MMX MMX2 3DNow AltiVec
31
isVertMinMaxOk Ec Ec Ec
32
doVertLowPass E e e Ec
33
doVertDefFilter Ec Ec e e Ec
35
isHorizMinMaxOk a E Ec
36
doHorizLowPass E e e Ec
37
doHorizDefFilter Ec Ec e e Ec
38
do_a_deblock Ec E Ec E
40
Vertical RKAlgo1 E a a
41
Horizontal RKAlgo1 a a
44
LinIpolDeinterlace e E E*
45
CubicIpolDeinterlace a e e*
46
LinBlendDeinterlace e E E*
47
MedianDeinterlace# E Ec Ec
48
TempDeNoiser# E e e Ec
50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = almost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
(the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74
//Changelog: use git log
77
#include "libavutil/avutil.h"
83
//#define HAVE_AMD3DNOW
86
//#define DEBUG_BRIGHTNESS
87
#include "postprocess.h"
88
#include "postprocess_internal.h"
89
#include "libavutil/avstring.h"
91
unsigned postproc_version(void)
93
return LIBPOSTPROC_VERSION_INT;
96
const char *postproc_configuration(void)
98
return LIBAV_CONFIGURATION;
101
const char *postproc_license(void)
103
#define LICENSE_PREFIX "libpostproc license: "
104
return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
111
#define GET_MODE_BUFFER_SIZE 500
112
#define OPTIONS_ARRAY_SIZE 10
114
#define TEMP_STRIDE 8
115
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
118
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
119
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
120
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
121
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
122
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
123
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
124
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
125
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
128
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
131
static struct PPFilter filters[]=
133
{"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
134
{"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
135
/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
136
{"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
137
{"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
138
{"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
139
{"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
140
{"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
141
{"dr", "dering", 1, 5, 6, DERING},
142
{"al", "autolevels", 0, 1, 2, LEVEL_FIX},
143
{"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
144
{"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
145
{"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
146
{"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
147
{"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
148
{"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
149
{"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
150
{"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
151
{NULL, NULL,0,0,0,0} //End Marker
154
static const char *replaceTable[]=
156
"default", "hb:a,vb:a,dr:a",
157
"de", "hb:a,vb:a,dr:a",
158
"fast", "h1:a,v1:a,dr:a",
159
"fa", "h1:a,v1:a,dr:a",
160
"ac", "ha:a:128:7,va:a,dr:a",
166
static inline void prefetchnta(void *p)
168
__asm__ volatile( "prefetchnta (%0)\n\t"
173
static inline void prefetcht0(void *p)
175
__asm__ volatile( "prefetcht0 (%0)\n\t"
180
static inline void prefetcht1(void *p)
182
__asm__ volatile( "prefetcht1 (%0)\n\t"
187
static inline void prefetcht2(void *p)
189
__asm__ volatile( "prefetcht2 (%0)\n\t"
195
/* The horizontal functions exist only in C because the MMX
196
* code is faster with vertical filters and transposing. */
199
* Check if the given 8x8 Block is mostly "flat"
201
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
205
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206
const int dcThreshold= dcOffset*2 + 1;
208
for(y=0; y<BLOCK_SIZE; y++){
209
if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
210
if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
211
if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
212
if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
213
if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
214
if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
215
if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
218
return numEq > c->ppMode.flatnessThreshold;
222
* Check if the middle 8x8 Block in the given 8x16 block is flat
224
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
228
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
229
const int dcThreshold= dcOffset*2 + 1;
231
src+= stride*4; // src points to begin of the 8x8 Block
232
for(y=0; y<BLOCK_SIZE-1; y++){
233
if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
234
if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
235
if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
236
if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
237
if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
238
if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
239
if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
240
if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
243
return numEq > c->ppMode.flatnessThreshold;
246
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
250
if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
252
if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
254
if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
256
if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
262
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
266
for(x=0; x<BLOCK_SIZE; x+=4){
267
if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
268
if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
269
if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
270
if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
275
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
277
if( isHorizDC_C(src, stride, c) ){
278
if( isHorizMinMaxOk_C(src, stride, c->QP) )
287
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
289
if( isVertDC_C(src, stride, c) ){
290
if( isVertMinMaxOk_C(src, stride, c->QP) )
299
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
302
for(y=0; y<BLOCK_SIZE; y++){
303
const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
305
if(FFABS(middleEnergy) < 8*c->QP){
306
const int q=(dst[3] - dst[4])/2;
307
const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
308
const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
310
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
314
d*= FFSIGN(-middleEnergy);
335
* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
336
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
338
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
341
for(y=0; y<BLOCK_SIZE; y++){
342
const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
343
const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
346
sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
347
sums[1] = sums[0] - first + dst[3];
348
sums[2] = sums[1] - first + dst[4];
349
sums[3] = sums[2] - first + dst[5];
350
sums[4] = sums[3] - first + dst[6];
351
sums[5] = sums[4] - dst[0] + dst[7];
352
sums[6] = sums[5] - dst[1] + last;
353
sums[7] = sums[6] - dst[2] + last;
354
sums[8] = sums[7] - dst[3] + last;
355
sums[9] = sums[8] - dst[4] + last;
357
dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
358
dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
359
dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
360
dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
361
dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
362
dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
363
dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
364
dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
371
* Experimental Filter 1 (Horizontal)
372
* will not damage linear gradients
373
* Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
374
* can only smooth blocks at the expected locations (it cannot smooth them if they did move)
375
* MMX2 version does correct clipping C version does not
376
* not identical with the vertical one
378
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
381
static uint64_t *lut= NULL;
385
lut = av_malloc(256*8);
388
int v= i < 128 ? 2*i : 2*(i-256);
390
//Simulate 112242211 9-Tap filter
391
uint64_t a= (v/16) & 0xFF;
392
uint64_t b= (v/8) & 0xFF;
393
uint64_t c= (v/4) & 0xFF;
394
uint64_t d= (3*v/8) & 0xFF;
396
//Simulate piecewise linear interpolation
397
uint64_t a= (v/16) & 0xFF;
398
uint64_t b= (v*3/16) & 0xFF;
399
uint64_t c= (v*5/16) & 0xFF;
400
uint64_t d= (7*v/16) & 0xFF;
401
uint64_t A= (0x100 - a)&0xFF;
402
uint64_t B= (0x100 - b)&0xFF;
403
uint64_t C= (0x100 - c)&0xFF;
404
uint64_t D= (0x100 - c)&0xFF;
406
lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
407
(D<<24) | (C<<16) | (B<<8) | (A);
408
//lut[i] = (v<<32) | (v<<24);
412
for(y=0; y<BLOCK_SIZE; y++){
413
int a= src[1] - src[2];
414
int b= src[3] - src[4];
415
int c= src[5] - src[6];
417
int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
420
int v = d * FFSIGN(-b);
434
* accurate deblock filter
436
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
439
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
440
const int dcThreshold= dcOffset*2 + 1;
442
src+= step*4; // src points to begin of the 8x8 Block
446
if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
447
if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
448
if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
449
if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
450
if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
451
if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
452
if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
453
if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
454
if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
455
if(numEq > c->ppMode.flatnessThreshold){
458
if(src[0] > src[step]){
466
if(src[x*step] > src[(x+1)*step]){
467
if(src[x *step] > max) max= src[ x *step];
468
if(src[(x+1)*step] < min) min= src[(x+1)*step];
470
if(src[(x+1)*step] > max) max= src[(x+1)*step];
471
if(src[ x *step] < min) min= src[ x *step];
475
const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
476
const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
479
sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
480
sums[1] = sums[0] - first + src[3*step];
481
sums[2] = sums[1] - first + src[4*step];
482
sums[3] = sums[2] - first + src[5*step];
483
sums[4] = sums[3] - first + src[6*step];
484
sums[5] = sums[4] - src[0*step] + src[7*step];
485
sums[6] = sums[5] - src[1*step] + last;
486
sums[7] = sums[6] - src[2*step] + last;
487
sums[8] = sums[7] - src[3*step] + last;
488
sums[9] = sums[8] - src[4*step] + last;
490
src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
491
src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
492
src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
493
src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
494
src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
495
src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
496
src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
497
src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
500
const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
502
if(FFABS(middleEnergy) < 8*QP){
503
const int q=(src[3*step] - src[4*step])/2;
504
const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
505
const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
507
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
511
d*= FFSIGN(-middleEnergy);
535
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
537
#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
542
#define COMPILE_ALTIVEC
543
#endif //HAVE_ALTIVEC
547
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
551
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
555
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
556
#define COMPILE_3DNOW
558
#endif /* ARCH_X86 */
565
#define HAVE_AMD3DNOW 0
567
#define HAVE_ALTIVEC 0
570
#define RENAME(a) a ## _C
571
#include "postprocess_template.c"
574
#ifdef COMPILE_ALTIVEC
577
#define HAVE_ALTIVEC 1
578
#define RENAME(a) a ## _altivec
579
#include "postprocess_altivec_template.c"
580
#include "postprocess_template.c"
588
#define RENAME(a) a ## _MMX
589
#include "postprocess_template.c"
599
#define RENAME(a) a ## _MMX2
600
#include "postprocess_template.c"
611
#define HAVE_AMD3DNOW 1
612
#define RENAME(a) a ## _3DNow
613
#include "postprocess_template.c"
616
// minor note: the HAVE_xyz is messed up after that line so do not use it.
618
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
619
const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
621
PPContext *c= (PPContext *)vc;
622
PPMode *ppMode= (PPMode *)vm;
623
c->ppMode= *ppMode; //FIXME
625
// Using ifs here as they are faster than function pointers although the
626
// difference would not be measurable here but it is much better because
627
// someone might exchange the CPU whithout restarting MPlayer ;)
628
#if CONFIG_RUNTIME_CPUDETECT
630
// ordered per speed fastest first
631
if(c->cpuCaps & PP_CPU_CAPS_MMX2)
632
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
633
else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
634
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
635
else if(c->cpuCaps & PP_CPU_CAPS_MMX)
636
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
638
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
641
if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
642
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
645
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
647
#else /* CONFIG_RUNTIME_CPUDETECT */
649
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
651
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
653
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
655
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659
#endif /* !CONFIG_RUNTIME_CPUDETECT */
662
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
663
// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
665
/* -pp Command line Help
667
const char pp_help[] =
668
"Available postprocessing filters:\n"
670
"short long name short long option Description\n"
671
"* * a autoq CPU power dependent enabler\n"
672
" c chrom chrominance filtering enabled\n"
673
" y nochrom chrominance filtering disabled\n"
674
" n noluma luma filtering disabled\n"
675
"hb hdeblock (2 threshold) horizontal deblocking filter\n"
676
" 1. difference factor: default=32, higher -> more deblocking\n"
677
" 2. flatness threshold: default=39, lower -> more deblocking\n"
678
" the h & v deblocking filters share these\n"
679
" so you can't set different thresholds for h / v\n"
680
"vb vdeblock (2 threshold) vertical deblocking filter\n"
681
"ha hadeblock (2 threshold) horizontal deblocking filter\n"
682
"va vadeblock (2 threshold) vertical deblocking filter\n"
683
"h1 x1hdeblock experimental h deblock filter 1\n"
684
"v1 x1vdeblock experimental v deblock filter 1\n"
685
"dr dering deringing filter\n"
686
"al autolevels automatic brightness / contrast\n"
687
" f fullyrange stretch luminance to (0..255)\n"
688
"lb linblenddeint linear blend deinterlacer\n"
689
"li linipoldeint linear interpolating deinterlace\n"
690
"ci cubicipoldeint cubic interpolating deinterlacer\n"
691
"md mediandeint median deinterlacer\n"
692
"fd ffmpegdeint ffmpeg deinterlacer\n"
693
"l5 lowpass5 FIR lowpass deinterlacer\n"
694
"de default hb:a,vb:a,dr:a\n"
695
"fa fast h1:a,v1:a,dr:a\n"
696
"ac ha:a:128:7,va:a,dr:a\n"
697
"tn tmpnoise (3 threshold) temporal noise reducer\n"
698
" 1. <= 2. <= 3. larger -> stronger filtering\n"
699
"fq forceQuant <quantizer> force quantizer\n"
701
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
702
"long form example:\n"
703
"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
704
"short form example:\n"
705
"vb:a/hb:a/lb de,-vb\n"
711
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
713
char temp[GET_MODE_BUFFER_SIZE];
715
static const char filterDelimiters[] = ",/";
716
static const char optionDelimiters[] = ":";
717
struct PPMode *ppMode;
720
ppMode= av_malloc(sizeof(PPMode));
723
ppMode->chromMode= 0;
724
ppMode->maxTmpNoise[0]= 700;
725
ppMode->maxTmpNoise[1]= 1500;
726
ppMode->maxTmpNoise[2]= 3000;
727
ppMode->maxAllowedY= 234;
728
ppMode->minAllowedY= 16;
729
ppMode->baseDcDiff= 256/8;
730
ppMode->flatnessThreshold= 56-16-1;
731
ppMode->maxClippedThreshold= 0.01;
734
memset(temp, 0, GET_MODE_BUFFER_SIZE);
735
av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
737
av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
741
int q= 1000000; //PP_QUALITY_MAX;
745
char *options[OPTIONS_ARRAY_SIZE];
748
int numOfUnknownOptions=0;
749
int enable=1; //does the user want us to enabled or disabled the filter
751
filterToken= strtok(p, filterDelimiters);
752
if(filterToken == NULL) break;
753
p+= strlen(filterToken) + 1; // p points to next filterToken
754
filterName= strtok(filterToken, optionDelimiters);
755
av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
757
if(*filterName == '-'){
762
for(;;){ //for all options
763
option= strtok(NULL, optionDelimiters);
764
if(option == NULL) break;
766
av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
767
if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
768
else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
769
else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
770
else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
772
options[numOfUnknownOptions] = option;
773
numOfUnknownOptions++;
775
if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
777
options[numOfUnknownOptions] = NULL;
779
/* replace stuff from the replace Table */
780
for(i=0; replaceTable[2*i]!=NULL; i++){
781
if(!strcmp(replaceTable[2*i], filterName)){
782
int newlen= strlen(replaceTable[2*i + 1]);
786
if(p==NULL) p= temp, *p=0; //last filter
787
else p--, *p=','; //not last filter
790
spaceLeft= p - temp + plen;
791
if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
795
memmove(p + newlen, p, plen+1);
796
memcpy(p, replaceTable[2*i + 1], newlen);
801
for(i=0; filters[i].shortName!=NULL; i++){
802
if( !strcmp(filters[i].longName, filterName)
803
|| !strcmp(filters[i].shortName, filterName)){
804
ppMode->lumMode &= ~filters[i].mask;
805
ppMode->chromMode &= ~filters[i].mask;
808
if(!enable) break; // user wants to disable it
810
if(q >= filters[i].minLumQuality && luma)
811
ppMode->lumMode|= filters[i].mask;
812
if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
813
if(q >= filters[i].minChromQuality)
814
ppMode->chromMode|= filters[i].mask;
816
if(filters[i].mask == LEVEL_FIX){
818
ppMode->minAllowedY= 16;
819
ppMode->maxAllowedY= 234;
820
for(o=0; options[o]!=NULL; o++){
821
if( !strcmp(options[o],"fullyrange")
822
||!strcmp(options[o],"f")){
823
ppMode->minAllowedY= 0;
824
ppMode->maxAllowedY= 255;
825
numOfUnknownOptions--;
829
else if(filters[i].mask == TEMP_NOISE_FILTER)
834
for(o=0; options[o]!=NULL; o++){
836
ppMode->maxTmpNoise[numOfNoises]=
837
strtol(options[o], &tail, 0);
838
if(tail!=options[o]){
840
numOfUnknownOptions--;
841
if(numOfNoises >= 3) break;
845
else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
846
|| filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
849
for(o=0; options[o]!=NULL && o<2; o++){
851
int val= strtol(options[o], &tail, 0);
852
if(tail==options[o]) break;
854
numOfUnknownOptions--;
855
if(o==0) ppMode->baseDcDiff= val;
856
else ppMode->flatnessThreshold= val;
859
else if(filters[i].mask == FORCE_QUANT){
861
ppMode->forcedQuant= 15;
863
for(o=0; options[o]!=NULL && o<1; o++){
865
int val= strtol(options[o], &tail, 0);
866
if(tail==options[o]) break;
868
numOfUnknownOptions--;
869
ppMode->forcedQuant= val;
874
if(!filterNameOk) ppMode->error++;
875
ppMode->error += numOfUnknownOptions;
878
av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
880
av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
887
void pp_free_mode(pp_mode *mode){
891
static void reallocAlign(void **p, int alignment, int size){
893
*p= av_mallocz(size);
896
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
897
int mbWidth = (width+15)>>4;
898
int mbHeight= (height+15)>>4;
902
c->qpStride= qpStride;
904
reallocAlign((void **)&c->tempDst, 8, stride*24);
905
reallocAlign((void **)&c->tempSrc, 8, stride*24);
906
reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
907
reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
909
c->yHistogram[i]= width*height/64*15/256;
912
//Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
913
reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
914
reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
917
reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
918
reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
919
reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
920
reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
923
static const char * context_to_name(void * ptr) {
927
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
929
pp_context *pp_get_context(int width, int height, int cpuCaps){
930
PPContext *c= av_malloc(sizeof(PPContext));
931
int stride= FFALIGN(width, 16); //assumed / will realloc if needed
932
int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
934
memset(c, 0, sizeof(PPContext));
935
c->av_class = &av_codec_context_class;
937
if(cpuCaps&PP_FORMAT){
938
c->hChromaSubSample= cpuCaps&0x3;
939
c->vChromaSubSample= (cpuCaps>>4)&0x3;
941
c->hChromaSubSample= 1;
942
c->vChromaSubSample= 1;
945
reallocBuffers(c, width, height, stride, qpStride);
952
void pp_free_context(void *vc){
953
PPContext *c = (PPContext*)vc;
956
for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
957
for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
959
av_free(c->tempBlocks);
960
av_free(c->yHistogram);
963
av_free(c->deintTemp);
964
av_free(c->stdQPTable);
965
av_free(c->nonBQPTable);
966
av_free(c->forcedQPTable);
968
memset(c, 0, sizeof(PPContext));
973
void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
974
uint8_t * dst[3], const int dstStride[3],
975
int width, int height,
976
const QP_STORE_T *QP_store, int QPStride,
977
pp_mode *vm, void *vc, int pict_type)
979
int mbWidth = (width+15)>>4;
980
int mbHeight= (height+15)>>4;
981
PPMode *mode = (PPMode*)vm;
982
PPContext *c = (PPContext*)vc;
983
int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
984
int absQPStride = FFABS(QPStride);
986
// c->stride and c->QPStride are always positive
987
if(c->stride < minStride || c->qpStride < absQPStride)
988
reallocBuffers(c, width, height,
989
FFMAX(minStride, c->stride),
990
FFMAX(c->qpStride, absQPStride));
992
if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
994
QP_store= c->forcedQPTable;
995
absQPStride = QPStride = 0;
996
if(mode->lumMode & FORCE_QUANT)
997
for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
999
for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1002
if(pict_type & PP_PICT_TYPE_QP2){
1004
const int count= mbHeight * absQPStride;
1005
for(i=0; i<(count>>2); i++){
1006
((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1008
for(i<<=2; i<count; i++){
1009
c->stdQPTable[i] = QP_store[i]>>1;
1011
QP_store= c->stdQPTable;
1012
QPStride= absQPStride;
1017
for(y=0; y<mbHeight; y++){
1018
for(x=0; x<mbWidth; x++){
1019
av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1021
av_log(c, AV_LOG_INFO, "\n");
1023
av_log(c, AV_LOG_INFO, "\n");
1026
if((pict_type&7)!=3){
1029
const int count= mbHeight * QPStride;
1030
for(i=0; i<(count>>2); i++){
1031
((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1033
for(i<<=2; i<count; i++){
1034
c->nonBQPTable[i] = QP_store[i] & 0x3F;
1038
for(i=0; i<mbHeight; i++) {
1039
for(j=0; j<absQPStride; j++) {
1040
c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1046
av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1047
mode->lumMode, mode->chromMode);
1049
postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1050
width, height, QP_store, QPStride, 0, mode, c);
1052
width = (width )>>c->hChromaSubSample;
1053
height = (height)>>c->vChromaSubSample;
1055
if(mode->chromMode){
1056
postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1057
width, height, QP_store, QPStride, 1, mode, c);
1058
postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1059
width, height, QP_store, QPStride, 2, mode, c);
1061
else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1062
linecpy(dst[1], src[1], height, srcStride[1]);
1063
linecpy(dst[2], src[2], height, srcStride[2]);
1066
for(y=0; y<height; y++){
1067
memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1068
memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);