2
Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4
This program is free software; you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; either version 2 of the License, or
7
(at your option) any later version.
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software
16
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29
doVertDefFilter Ec Ec e e
33
doHorizDefFilter Ec Ec e e
35
Vertical RKAlgo1 E a a
36
Horizontal RKAlgo1 a a
39
LinIpolDeinterlace e E E*
40
CubicIpolDeinterlace a e e*
41
LinBlendDeinterlace e E E*
42
MedianDeinterlace# E Ec Ec
45
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
46
# more or less selfinvented filters so the exactness isnt too meaningfull
47
E = Exact implementation
48
e = allmost exact implementation (slightly different rounding,...)
49
a = alternative / approximate impl
50
c = checked against the other implementations (-vo md5)
55
reduce the time wasted on the mem transfer
56
unroll stuff if instructions depend too much on the prior one
57
move YScale thing to the end instead of fixing QP
58
write a faster and higher quality deblocking filter :)
59
make the mainloop more flexible (variable number of blocks at once
60
(the if/else stuff per block is slowing things down)
61
compare the quality & speed of all filters
64
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
68
//Changelog: use the CVS log
82
//#define DEBUG_BRIGHTNESS
84
#include "../fastmemcpy.h"
86
#include "postprocess.h"
87
#include "postprocess_internal.h"
89
#include "mangle.h" //FIXME should be supressed
92
#define memalign(a,b) malloc(b)
95
#define MIN(a,b) ((a) > (b) ? (b) : (a))
96
#define MAX(a,b) ((a) < (b) ? (b) : (a))
97
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
98
#define SIGN(a) ((a) > 0 ? 1 : -1)
100
#define GET_MODE_BUFFER_SIZE 500
101
#define OPTIONS_ARRAY_SIZE 10
103
#define TEMP_STRIDE 8
104
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
107
static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL;
108
static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL;
109
static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL;
110
static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL;
111
static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL;
112
static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL;
113
static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL;
117
static uint8_t clip_table[3*256];
118
static uint8_t * const clip_tab= clip_table + 256;
120
static int verbose= 0;
122
static const int deringThreshold= 20;
125
static struct PPFilter filters[]=
127
{"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
128
{"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
129
/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
130
{"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
131
{"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
132
{"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
133
{"dr", "dering", 1, 5, 6, DERING},
134
{"al", "autolevels", 0, 1, 2, LEVEL_FIX},
135
{"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
136
{"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
137
{"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
138
{"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
139
{"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
140
{"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
141
{"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
142
{"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
143
{NULL, NULL,0,0,0,0} //End Marker
146
static char *replaceTable[]=
148
"default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
149
"de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
150
"fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
151
"fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
156
static inline void unusedVariableWarningFixer()
158
if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
164
static inline void prefetchnta(void *p)
166
asm volatile( "prefetchnta (%0)\n\t"
171
static inline void prefetcht0(void *p)
173
asm volatile( "prefetcht0 (%0)\n\t"
178
static inline void prefetcht1(void *p)
180
asm volatile( "prefetcht1 (%0)\n\t"
185
static inline void prefetcht2(void *p)
187
asm volatile( "prefetcht2 (%0)\n\t"
193
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
196
* Check if the given 8x8 Block is mostly "flat"
198
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
202
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
203
const int dcThreshold= dcOffset*2 + 1;
205
for(y=0; y<BLOCK_SIZE; y++)
207
if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
208
if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
209
if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
210
if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
211
if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
212
if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
213
if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
216
return numEq > c->ppMode.flatnessThreshold;
220
* Check if the middle 8x8 Block in the given 8x16 block is flat
222
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
225
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
226
const int dcThreshold= dcOffset*2 + 1;
228
src+= stride*4; // src points to begin of the 8x8 Block
229
for(y=0; y<BLOCK_SIZE-1; y++)
231
if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
232
if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
233
if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
234
if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
235
if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
236
if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
237
if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
238
if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
241
return numEq > c->ppMode.flatnessThreshold;
244
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
249
if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
251
if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
253
if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
255
if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
260
if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
267
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
273
for(x=0; x<BLOCK_SIZE; x+=4)
275
if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
276
if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
277
if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
278
if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
283
for(x=0; x<BLOCK_SIZE; x++)
285
if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
292
for(x=0; x<BLOCK_SIZE; x++)
298
int v= src[x + y*stride];
302
if(max-min > 2*QP) return 0;
308
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
309
if( isVertDC_C(src, stride, c) ){
310
if( isVertMinMaxOk_C(src, stride, c->QP) )
319
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
322
for(y=0; y<BLOCK_SIZE; y++)
324
const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
326
if(ABS(middleEnergy) < 8*QP)
328
const int q=(dst[3] - dst[4])/2;
329
const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
330
const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
332
int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
336
d*= SIGN(-middleEnergy);
357
* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
358
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
360
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
364
for(y=0; y<BLOCK_SIZE; y++)
366
const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
367
const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
370
sums[0] = first + dst[0];
371
sums[1] = dst[0] + dst[1];
372
sums[2] = dst[1] + dst[2];
373
sums[3] = dst[2] + dst[3];
374
sums[4] = dst[3] + dst[4];
375
sums[5] = dst[4] + dst[5];
376
sums[6] = dst[5] + dst[6];
377
sums[7] = dst[6] + dst[7];
378
sums[8] = dst[7] + last;
380
dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
381
dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
382
dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
383
dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
384
dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
385
dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
386
dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
387
dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
394
* Experimental Filter 1 (Horizontal)
395
* will not damage linear gradients
396
* Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
397
* can only smooth blocks at the expected locations (it cant smooth them if they did move)
398
* MMX2 version does correct clipping C version doesnt
399
* not identical with the vertical one
401
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
404
static uint64_t *lut= NULL;
408
lut= (uint64_t*)memalign(8, 256*8);
411
int v= i < 128 ? 2*i : 2*(i-256);
413
//Simulate 112242211 9-Tap filter
414
uint64_t a= (v/16) & 0xFF;
415
uint64_t b= (v/8) & 0xFF;
416
uint64_t c= (v/4) & 0xFF;
417
uint64_t d= (3*v/8) & 0xFF;
419
//Simulate piecewise linear interpolation
420
uint64_t a= (v/16) & 0xFF;
421
uint64_t b= (v*3/16) & 0xFF;
422
uint64_t c= (v*5/16) & 0xFF;
423
uint64_t d= (7*v/16) & 0xFF;
424
uint64_t A= (0x100 - a)&0xFF;
425
uint64_t B= (0x100 - b)&0xFF;
426
uint64_t C= (0x100 - c)&0xFF;
427
uint64_t D= (0x100 - c)&0xFF;
429
lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
430
(D<<24) | (C<<16) | (B<<8) | (A);
431
//lut[i] = (v<<32) | (v<<24);
435
for(y=0; y<BLOCK_SIZE; y++)
437
int a= src[1] - src[2];
438
int b= src[3] - src[4];
439
int c= src[5] - src[6];
441
int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
445
int v = d * SIGN(-b);
460
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
462
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
468
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
472
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
476
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
477
#define COMPILE_3DNOW
491
#define RENAME(a) a ## _C
492
#include "postprocess_template.c"
502
#define RENAME(a) a ## _MMX
503
#include "postprocess_template.c"
513
#define RENAME(a) a ## _MMX2
514
#include "postprocess_template.c"
524
#define RENAME(a) a ## _3DNow
525
#include "postprocess_template.c"
528
// minor note: the HAVE_xyz is messed up after that line so dont use it
530
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
531
QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
533
PPContext *c= (PPContext *)vc;
534
PPMode *ppMode= (PPMode *)vm;
535
c->ppMode= *ppMode; //FIXME
537
// useing ifs here as they are faster than function pointers allthough the
538
// difference wouldnt be messureable here but its much better because
539
// someone might exchange the cpu whithout restarting mplayer ;)
540
#ifdef RUNTIME_CPUDETECT
542
// ordered per speed fasterst first
543
if(c->cpuCaps & PP_CPU_CAPS_MMX2)
544
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
545
else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
546
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
547
else if(c->cpuCaps & PP_CPU_CAPS_MMX)
548
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
550
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
552
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
554
#else //RUNTIME_CPUDETECT
556
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
557
#elif defined (HAVE_3DNOW)
558
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
559
#elif defined (HAVE_MMX)
560
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
562
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
564
#endif //!RUNTIME_CPUDETECT
567
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
568
// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
570
/* -pp Command line Help
573
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
574
"long form example:\n"
575
"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
576
"short form example:\n"
577
"vb:a/hb:a/lb de,-vb\n"
581
"short long name short long option Description\n"
582
"* * a autoq CPU power dependent enabler\n"
583
" c chrom chrominance filtering enabled\n"
584
" y nochrom chrominance filtering disabled\n"
585
"hb hdeblock (2 threshold) horizontal deblocking filter\n"
586
" 1. difference factor: default=32, higher -> more deblocking\n"
587
" 2. flatness threshold: default=39, lower -> more deblocking\n"
588
" the h & v deblocking filters share these\n"
589
" so you can't set different thresholds for h / v\n"
590
"vb vdeblock (2 threshold) vertical deblocking filter\n"
591
"h1 x1hdeblock experimental h deblock filter 1\n"
592
"v1 x1vdeblock experimental v deblock filter 1\n"
593
"dr dering deringing filter\n"
594
"al autolevels automatic brightness / contrast\n"
595
" f fullyrange stretch luminance to (0..255)\n"
596
"lb linblenddeint linear blend deinterlacer\n"
597
"li linipoldeint linear interpolating deinterlace\n"
598
"ci cubicipoldeint cubic interpolating deinterlacer\n"
599
"md mediandeint median deinterlacer\n"
600
"fd ffmpegdeint ffmpeg deinterlacer\n"
601
"de default hb:a,vb:a,dr:a,al\n"
602
"fa fast h1:a,v1:a,dr:a,al\n"
603
"tn tmpnoise (3 threshold) temporal noise reducer\n"
604
" 1. <= 2. <= 3. larger -> stronger filtering\n"
605
"fq forceQuant <quantizer> force quantizer\n"
608
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
610
char temp[GET_MODE_BUFFER_SIZE];
612
char *filterDelimiters= ",/";
613
char *optionDelimiters= ":";
614
struct PPMode *ppMode;
617
ppMode= memalign(8, sizeof(PPMode));
620
ppMode->chromMode= 0;
621
ppMode->maxTmpNoise[0]= 700;
622
ppMode->maxTmpNoise[1]= 1500;
623
ppMode->maxTmpNoise[2]= 3000;
624
ppMode->maxAllowedY= 234;
625
ppMode->minAllowedY= 16;
626
ppMode->baseDcDiff= 256/8;
627
ppMode->flatnessThreshold= 56-16-1;
628
ppMode->maxClippedThreshold= 0.01;
631
strncpy(temp, name, GET_MODE_BUFFER_SIZE);
633
if(verbose>1) printf("pp: %s\n", name);
637
int q= 1000000; //PP_QUALITY_MAX;
640
char *options[OPTIONS_ARRAY_SIZE];
643
int numOfUnknownOptions=0;
644
int enable=1; //does the user want us to enabled or disabled the filter
646
filterToken= strtok(p, filterDelimiters);
647
if(filterToken == NULL) break;
648
p+= strlen(filterToken) + 1; // p points to next filterToken
649
filterName= strtok(filterToken, optionDelimiters);
650
if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
652
if(*filterName == '-')
658
for(;;){ //for all options
659
option= strtok(NULL, optionDelimiters);
660
if(option == NULL) break;
662
if(verbose>1) printf("pp: option: %s\n", option);
663
if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
664
else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
665
else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
668
options[numOfUnknownOptions] = option;
669
numOfUnknownOptions++;
671
if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
673
options[numOfUnknownOptions] = NULL;
675
/* replace stuff from the replace Table */
676
for(i=0; replaceTable[2*i]!=NULL; i++)
678
if(!strcmp(replaceTable[2*i], filterName))
680
int newlen= strlen(replaceTable[2*i + 1]);
684
if(p==NULL) p= temp, *p=0; //last filter
685
else p--, *p=','; //not last filter
688
spaceLeft= p - temp + plen;
689
if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
694
memmove(p + newlen, p, plen+1);
695
memcpy(p, replaceTable[2*i + 1], newlen);
700
for(i=0; filters[i].shortName!=NULL; i++)
702
// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
703
if( !strcmp(filters[i].longName, filterName)
704
|| !strcmp(filters[i].shortName, filterName))
706
ppMode->lumMode &= ~filters[i].mask;
707
ppMode->chromMode &= ~filters[i].mask;
710
if(!enable) break; // user wants to disable it
712
if(q >= filters[i].minLumQuality)
713
ppMode->lumMode|= filters[i].mask;
714
if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
715
if(q >= filters[i].minChromQuality)
716
ppMode->chromMode|= filters[i].mask;
718
if(filters[i].mask == LEVEL_FIX)
721
ppMode->minAllowedY= 16;
722
ppMode->maxAllowedY= 234;
723
for(o=0; options[o]!=NULL; o++)
725
if( !strcmp(options[o],"fullyrange")
726
||!strcmp(options[o],"f"))
728
ppMode->minAllowedY= 0;
729
ppMode->maxAllowedY= 255;
730
numOfUnknownOptions--;
734
else if(filters[i].mask == TEMP_NOISE_FILTER)
739
for(o=0; options[o]!=NULL; o++)
742
ppMode->maxTmpNoise[numOfNoises]=
743
strtol(options[o], &tail, 0);
747
numOfUnknownOptions--;
748
if(numOfNoises >= 3) break;
752
else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
756
for(o=0; options[o]!=NULL && o<2; o++)
759
int val= strtol(options[o], &tail, 0);
760
if(tail==options[o]) break;
762
numOfUnknownOptions--;
763
if(o==0) ppMode->baseDcDiff= val;
764
else ppMode->flatnessThreshold= val;
767
else if(filters[i].mask == FORCE_QUANT)
770
ppMode->forcedQuant= 15;
772
for(o=0; options[o]!=NULL && o<1; o++)
775
int val= strtol(options[o], &tail, 0);
776
if(tail==options[o]) break;
778
numOfUnknownOptions--;
779
ppMode->forcedQuant= val;
784
if(!filterNameOk) ppMode->error++;
785
ppMode->error += numOfUnknownOptions;
788
if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
791
fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
798
void pp_free_mode(pp_mode_t *mode){
802
static void reallocAlign(void **p, int alignment, int size){
804
*p= memalign(alignment, size);
808
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
809
int mbWidth = (width+15)>>4;
810
int mbHeight= (height+15)>>4;
814
c->qpStride= qpStride;
816
reallocAlign((void **)&c->tempDst, 8, stride*24);
817
reallocAlign((void **)&c->tempSrc, 8, stride*24);
818
reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
819
reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
821
c->yHistogram[i]= width*height/64*15/256;
825
//Note:the +17*1024 is just there so i dont have to worry about r/w over te end
826
reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
827
reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
830
reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
831
reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
832
reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
833
reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
836
static void global_init(void){
838
memset(clip_table, 0, 256);
839
for(i=256; i<512; i++)
841
memset(clip_table+512, 0, 256);
844
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
845
PPContext *c= memalign(32, sizeof(PPContext));
846
int stride= (width+15)&(~15); //assumed / will realloc if needed
847
int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
851
memset(c, 0, sizeof(PPContext));
853
if(cpuCaps&PP_FORMAT){
854
c->hChromaSubSample= cpuCaps&0x3;
855
c->vChromaSubSample= (cpuCaps>>4)&0x3;
857
c->hChromaSubSample= 1;
858
c->vChromaSubSample= 1;
861
reallocBuffers(c, width, height, stride, qpStride);
868
void pp_free_context(void *vc){
869
PPContext *c = (PPContext*)vc;
872
for(i=0; i<3; i++) free(c->tempBlured[i]);
873
for(i=0; i<3; i++) free(c->tempBluredPast[i]);
881
free(c->nonBQPTable);
882
free(c->forcedQPTable);
884
memset(c, 0, sizeof(PPContext));
889
void pp_postprocess(uint8_t * src[3], int srcStride[3],
890
uint8_t * dst[3], int dstStride[3],
891
int width, int height,
892
QP_STORE_T *QP_store, int QPStride,
893
pp_mode_t *vm, void *vc, int pict_type)
895
int mbWidth = (width+15)>>4;
896
int mbHeight= (height+15)>>4;
897
PPMode *mode = (PPMode*)vm;
898
PPContext *c = (PPContext*)vc;
899
int minStride= MAX(srcStride[0], dstStride[0]);
901
if(c->stride < minStride || c->qpStride < QPStride)
902
reallocBuffers(c, width, height,
903
MAX(minStride, c->stride),
904
MAX(c->qpStride, QPStride));
906
if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
909
QP_store= c->forcedQPTable;
911
if(mode->lumMode & FORCE_QUANT)
912
for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
914
for(i=0; i<mbWidth; i++) QP_store[i]= 1;
916
//printf("pict_type:%d\n", pict_type);
918
if(pict_type & PP_PICT_TYPE_QP2){
920
const int count= mbHeight * QPStride;
921
for(i=0; i<(count>>2); i++){
922
((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
924
for(i<<=2; i<count; i++){
925
c->stdQPTable[i] = QP_store[i]>>1;
927
QP_store= c->stdQPTable;
932
for(y=0; y<mbHeight; y++){
933
for(x=0; x<mbWidth; x++){
934
printf("%2d ", QP_store[x + y*QPStride]);
944
const int count= mbHeight * QPStride;
945
for(i=0; i<(count>>2); i++){
946
((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x1F1F1F1F;
948
for(i<<=2; i<count; i++){
949
c->nonBQPTable[i] = QP_store[i] & 0x1F;
955
printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
958
postProcess(src[0], srcStride[0], dst[0], dstStride[0],
959
width, height, QP_store, QPStride, 0, mode, c);
961
width = (width )>>c->hChromaSubSample;
962
height = (height)>>c->vChromaSubSample;
966
postProcess(src[1], srcStride[1], dst[1], dstStride[1],
967
width, height, QP_store, QPStride, 1, mode, c);
968
postProcess(src[2], srcStride[2], dst[2], dstStride[2],
969
width, height, QP_store, QPStride, 2, mode, c);
971
else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
973
memcpy(dst[1], src[1], srcStride[1]*height);
974
memcpy(dst[2], src[2], srcStride[2]*height);
979
for(y=0; y<height; y++)
981
memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
982
memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);