41
42
void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
45
void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
48
void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
43
50
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
44
51
uint32_t ff_squareTbl[512] = {0, };
53
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
54
#define pb_7f (~0UL/255 * 0x7f)
55
#define pb_80 (~0UL/255 * 0x80)
46
57
const uint8_t ff_zigzag_direct[64] = {
47
58
0, 1, 8, 16, 9, 2, 3, 10,
48
59
17, 24, 32, 25, 18, 11, 4, 5,
140
151
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
154
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
156
void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
160
st->scantable= src_scantable;
164
j = src_scantable[i];
165
st->permutated[i] = permutation[j];
174
j = st->permutated[i];
176
st->raster_end[i]= end;
143
180
static int pix_sum_c(uint8_t * pix, int line_size)
432
/* draw the edges of width 'w' of an image of size width, height */
433
//FIXME check that this is ok for mpeg4 interlaced
434
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
436
uint8_t *ptr, *last_line;
439
last_line = buf + (height - 1) * wrap;
442
memcpy(buf - (i + 1) * wrap, buf, width);
443
memcpy(last_line + (i + 1) * wrap, last_line, width);
447
for(i=0;i<height;i++) {
448
memset(ptr - w, ptr[0], w);
449
memset(ptr + width, ptr[width-1], w);
454
memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
455
memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
456
memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
457
memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
462
* Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
463
* @param buf destination buffer
464
* @param src source buffer
465
* @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
466
* @param block_w width of block
467
* @param block_h height of block
468
* @param src_x x coordinate of the top left sample of the block in the source buffer
469
* @param src_y y coordinate of the top left sample of the block in the source buffer
470
* @param w width of the source buffer
471
* @param h height of the source buffer
473
void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
474
int src_x, int src_y, int w, int h){
476
int start_y, start_x, end_y, end_x;
479
src+= (h-1-src_y)*linesize;
481
}else if(src_y<=-block_h){
482
src+= (1-block_h-src_y)*linesize;
488
}else if(src_x<=-block_w){
489
src+= (1-block_w-src_x);
493
start_y= FFMAX(0, -src_y);
494
start_x= FFMAX(0, -src_x);
495
end_y= FFMIN(block_h, h-src_y);
496
end_x= FFMIN(block_w, w-src_x);
498
// copy existing part
499
for(y=start_y; y<end_y; y++){
500
for(x=start_x; x<end_x; x++){
501
buf[x + y*linesize]= src[x + y*linesize];
506
for(y=0; y<start_y; y++){
507
for(x=start_x; x<end_x; x++){
508
buf[x + y*linesize]= buf[x + start_y*linesize];
513
for(y=end_y; y<block_h; y++){
514
for(x=start_x; x<end_x; x++){
515
buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
519
for(y=0; y<block_h; y++){
521
for(x=0; x<start_x; x++){
522
buf[x + y*linesize]= buf[start_x + y*linesize];
526
for(x=end_x; x<block_w; x++){
527
buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
395
532
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
772
917
for(i=0; i<h; i++){\
774
a= LD32(&src1[i*src_stride1 ]);\
775
b= LD32(&src2[i*src_stride2 ]);\
919
a= AV_RN32(&src1[i*src_stride1 ]);\
920
b= AV_RN32(&src2[i*src_stride2 ]);\
776
921
OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
777
a= LD32(&src1[i*src_stride1+4]);\
778
b= LD32(&src2[i*src_stride2+4]);\
922
a= AV_RN32(&src1[i*src_stride1+4]);\
923
b= AV_RN32(&src2[i*src_stride2+4]);\
779
924
OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
786
931
for(i=0; i<h; i++){\
788
a= LD32(&src1[i*src_stride1 ]);\
789
b= LD32(&src2[i*src_stride2 ]);\
933
a= AV_RN32(&src1[i*src_stride1 ]);\
934
b= AV_RN32(&src2[i*src_stride2 ]);\
790
935
OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
791
a= LD32(&src1[i*src_stride1+4]);\
792
b= LD32(&src2[i*src_stride2+4]);\
936
a= AV_RN32(&src1[i*src_stride1+4]);\
937
b= AV_RN32(&src2[i*src_stride2+4]);\
793
938
OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
850
995
for(i=0; i<h; i++){\
851
996
uint32_t a, b, c, d, l0, l1, h0, h1;\
852
a= LD32(&src1[i*src_stride1]);\
853
b= LD32(&src2[i*src_stride2]);\
854
c= LD32(&src3[i*src_stride3]);\
855
d= LD32(&src4[i*src_stride4]);\
997
a= AV_RN32(&src1[i*src_stride1]);\
998
b= AV_RN32(&src2[i*src_stride2]);\
999
c= AV_RN32(&src3[i*src_stride3]);\
1000
d= AV_RN32(&src4[i*src_stride4]);\
856
1001
l0= (a&0x03030303UL)\
857
1002
+ (b&0x03030303UL)\
858
1003
+ 0x02020202UL;\
863
1008
h1= ((c&0xFCFCFCFCUL)>>2)\
864
1009
+ ((d&0xFCFCFCFCUL)>>2);\
865
1010
OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
866
a= LD32(&src1[i*src_stride1+4]);\
867
b= LD32(&src2[i*src_stride2+4]);\
868
c= LD32(&src3[i*src_stride3+4]);\
869
d= LD32(&src4[i*src_stride4+4]);\
1011
a= AV_RN32(&src1[i*src_stride1+4]);\
1012
b= AV_RN32(&src2[i*src_stride2+4]);\
1013
c= AV_RN32(&src3[i*src_stride3+4]);\
1014
d= AV_RN32(&src4[i*src_stride4+4]);\
870
1015
l0= (a&0x03030303UL)\
871
1016
+ (b&0x03030303UL)\
872
1017
+ 0x02020202UL;\
902
1047
for(i=0; i<h; i++){\
903
1048
uint32_t a, b, c, d, l0, l1, h0, h1;\
904
a= LD32(&src1[i*src_stride1]);\
905
b= LD32(&src2[i*src_stride2]);\
906
c= LD32(&src3[i*src_stride3]);\
907
d= LD32(&src4[i*src_stride4]);\
1049
a= AV_RN32(&src1[i*src_stride1]);\
1050
b= AV_RN32(&src2[i*src_stride2]);\
1051
c= AV_RN32(&src3[i*src_stride3]);\
1052
d= AV_RN32(&src4[i*src_stride4]);\
908
1053
l0= (a&0x03030303UL)\
909
1054
+ (b&0x03030303UL)\
910
1055
+ 0x01010101UL;\
915
1060
h1= ((c&0xFCFCFCFCUL)>>2)\
916
1061
+ ((d&0xFCFCFCFCUL)>>2);\
917
1062
OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
918
a= LD32(&src1[i*src_stride1+4]);\
919
b= LD32(&src2[i*src_stride2+4]);\
920
c= LD32(&src3[i*src_stride3+4]);\
921
d= LD32(&src4[i*src_stride4+4]);\
1063
a= AV_RN32(&src1[i*src_stride1+4]);\
1064
b= AV_RN32(&src2[i*src_stride2+4]);\
1065
c= AV_RN32(&src3[i*src_stride3+4]);\
1066
d= AV_RN32(&src4[i*src_stride4+4]);\
922
1067
l0= (a&0x03030303UL)\
923
1068
+ (b&0x03030303UL)\
924
1069
+ 0x01010101UL;\
1429
1574
assert(x<8 && y<8 && x>=0 && y>=0);\
1433
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1434
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1577
for(i=0; i<h; i++){\
1578
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1579
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1585
const int step= C ? stride : 1;\
1586
for(i=0; i<h; i++){\
1587
OP(dst[0], (A*src[0] + E*src[step+0]));\
1588
OP(dst[1], (A*src[1] + E*src[step+1]));\
1447
1602
assert(x<8 && y<8 && x>=0 && y>=0);\
1451
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1452
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1453
OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1454
OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1605
for(i=0; i<h; i++){\
1606
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1607
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1608
OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1609
OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1615
const int step= C ? stride : 1;\
1616
for(i=0; i<h; i++){\
1617
OP(dst[0], (A*src[0] + E*src[step+0]));\
1618
OP(dst[1], (A*src[1] + E*src[step+1]));\
1619
OP(dst[2], (A*src[2] + E*src[step+2]));\
1620
OP(dst[3], (A*src[3] + E*src[step+3]));\
1467
1634
assert(x<8 && y<8 && x>=0 && y>=0);\
1471
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1472
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1473
OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1474
OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1475
OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1476
OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1477
OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1478
OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1637
for(i=0; i<h; i++){\
1638
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1639
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1640
OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1641
OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1642
OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1643
OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1644
OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1645
OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1651
const int step= C ? stride : 1;\
1652
for(i=0; i<h; i++){\
1653
OP(dst[0], (A*src[0] + E*src[step+0]));\
1654
OP(dst[1], (A*src[1] + E*src[step+1]));\
1655
OP(dst[2], (A*src[2] + E*src[step+2]));\
1656
OP(dst[3], (A*src[3] + E*src[step+3]));\
1657
OP(dst[4], (A*src[4] + E*src[step+4]));\
1658
OP(dst[5], (A*src[5] + E*src[step+5]));\
1659
OP(dst[6], (A*src[6] + E*src[step+6]));\
1660
OP(dst[7], (A*src[7] + E*src[step+7]));\
2550
2733
#endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
2552
#if defined(CONFIG_H264_ENCODER)
2735
void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
2553
2737
/* H264 specific */
2554
void ff_h264dsp_init(DSPContext* c, AVCodecContext *avctx);
2555
#endif /* CONFIG_H264_ENCODER */
2738
void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
2557
2740
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2558
2741
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
3221
3408
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
3223
for(i=0; i+7<w; i+=8){
3224
dst[i+0] += src[i+0];
3225
dst[i+1] += src[i+1];
3226
dst[i+2] += src[i+2];
3227
dst[i+3] += src[i+3];
3228
dst[i+4] += src[i+4];
3229
dst[i+5] += src[i+5];
3230
dst[i+6] += src[i+6];
3231
dst[i+7] += src[i+7];
3234
dst[i+0] += src[i+0];
3410
for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3411
long a = *(long*)(src+i);
3412
long b = *(long*)(dst+i);
3413
*(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
3416
dst[i+0] += src[i+0];
3419
static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
3421
for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3422
long a = *(long*)(src1+i);
3423
long b = *(long*)(src2+i);
3424
*(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
3427
dst[i] = src1[i]+src2[i];
3237
3430
static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
3239
for(i=0; i+7<w; i+=8){
3240
dst[i+0] = src1[i+0]-src2[i+0];
3241
dst[i+1] = src1[i+1]-src2[i+1];
3242
dst[i+2] = src1[i+2]-src2[i+2];
3243
dst[i+3] = src1[i+3]-src2[i+3];
3244
dst[i+4] = src1[i+4]-src2[i+4];
3245
dst[i+5] = src1[i+5]-src2[i+5];
3246
dst[i+6] = src1[i+6]-src2[i+6];
3247
dst[i+7] = src1[i+7]-src2[i+7];
3432
#ifndef HAVE_FAST_UNALIGNED
3433
if((long)src2 & (sizeof(long)-1)){
3434
for(i=0; i+7<w; i+=8){
3435
dst[i+0] = src1[i+0]-src2[i+0];
3436
dst[i+1] = src1[i+1]-src2[i+1];
3437
dst[i+2] = src1[i+2]-src2[i+2];
3438
dst[i+3] = src1[i+3]-src2[i+3];
3439
dst[i+4] = src1[i+4]-src2[i+4];
3440
dst[i+5] = src1[i+5]-src2[i+5];
3441
dst[i+6] = src1[i+6]-src2[i+6];
3442
dst[i+7] = src1[i+7]-src2[i+7];
3446
for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3447
long a = *(long*)(src1+i);
3448
long b = *(long*)(src2+i);
3449
*(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
3249
3451
for(; i<w; i++)
3250
3452
dst[i+0] = src1[i+0]-src2[i+0];
3386
3588
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3387
3589
MpegEncContext * const s= (MpegEncContext *)c;
3388
DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
3590
DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
3389
3591
DCTELEM * const temp= (DCTELEM*)aligned_temp;
3394
3595
s->dsp.diff_pixels(temp, src1, src2, stride);
3395
3596
s->dsp.fdct(temp);
3398
sum+= FFABS(temp[i]);
3597
return s->dsp.sum_abs_dctelem(temp);
3403
3600
#ifdef CONFIG_GPL
3501
3698
DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
3502
3699
DCTELEM * const temp= (DCTELEM*)aligned_temp;
3503
3700
uint8_t * const bak= (uint8_t*)aligned_bak;
3504
int i, last, run, bits, level, distoration, start_i;
3701
int i, last, run, bits, level, distortion, start_i;
3505
3702
const int esc_length= s->ac_esc_length;
3506
3703
uint8_t * length;
3507
3704
uint8_t * last_length;
3569
3766
s->dsp.idct_add(bak, stride, temp);
3571
distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
3768
distortion= s->dsp.sse[1](NULL, bak, src1, stride, 8);
3573
return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3770
return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3576
3773
static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3697
WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
3698
WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
3699
WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
3894
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
3898
for(i=0; i<size; i++)
3899
score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
3903
WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
3904
WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
3905
WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
3700
3906
#ifdef CONFIG_GPL
3701
WARPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
3907
WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
3703
WARPER8_16_SQ(dct_max8x8_c, dct_max16_c)
3704
WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
3705
WARPER8_16_SQ(rd8x8_c, rd16_c)
3706
WARPER8_16_SQ(bit8x8_c, bit16_c)
3909
WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
3910
WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
3911
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
3912
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
3708
3914
static void vector_fmul_c(float *dst, const float *src, int len){
3948
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
3949
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
3950
#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
3951
#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
3952
#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
3953
#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
3954
#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
3956
static void wmv2_idct_row(short * b)
3959
int a0,a1,a2,a3,a4,a5,a6,a7;
3961
a1 = W1*b[1]+W7*b[7];
3962
a7 = W7*b[1]-W1*b[7];
3963
a5 = W5*b[5]+W3*b[3];
3964
a3 = W3*b[5]-W5*b[3];
3965
a2 = W2*b[2]+W6*b[6];
3966
a6 = W6*b[2]-W2*b[6];
3967
a0 = W0*b[0]+W0*b[4];
3968
a4 = W0*b[0]-W0*b[4];
3970
s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
3971
s2 = (181*(a1-a5-a7+a3)+128)>>8;
3973
b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
3974
b[1] = (a4+a6 +s1 + (1<<7))>>8;
3975
b[2] = (a4-a6 +s2 + (1<<7))>>8;
3976
b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
3977
b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
3978
b[5] = (a4-a6 -s2 + (1<<7))>>8;
3979
b[6] = (a4+a6 -s1 + (1<<7))>>8;
3980
b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
3982
static void wmv2_idct_col(short * b)
3985
int a0,a1,a2,a3,a4,a5,a6,a7;
3986
/*step 1, with extended precision*/
3987
a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
3988
a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
3989
a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
3990
a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
3991
a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
3992
a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
3993
a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
3994
a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
3996
s1 = (181*(a1-a5+a7-a3)+128)>>8;
3997
s2 = (181*(a1-a5-a7+a3)+128)>>8;
3999
b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
4000
b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
4001
b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
4002
b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
4004
b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
4005
b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
4006
b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
4007
b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
4009
void ff_wmv2_idct_c(short * block){
4013
wmv2_idct_row(block+i);
4016
wmv2_idct_col(block+i);
3741
4019
/* XXX: those functions should be suppressed ASAP when all IDCTs are
4021
static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
4023
ff_wmv2_idct_c(block);
4024
put_pixels_clamped_c(block, dest, line_size);
4026
static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
4028
ff_wmv2_idct_c(block);
4029
add_pixels_clamped_c(block, dest, line_size);
3743
4031
static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
3745
4033
j_rev_dct (block);
3810
4098
static int did_fail=0;
3811
4099
DECLARE_ALIGNED_16(int, aligned);
3813
if((int)&aligned & 15){
4101
if((long)&aligned & 15){
3815
4103
#if defined(HAVE_MMX) || defined(HAVE_ALTIVEC)
3816
4104
av_log(NULL, AV_LOG_ERROR,
3817
4105
"Compiler did not align stack variables. Libavcodec has been miscompiled\n"
3818
4106
"and may be very slow or crash. This is not a bug in libavcodec,\n"
3819
"but in the compiler. Do not report crashes to FFmpeg developers.\n");
4107
"but in the compiler. You may try recompiling using gcc >= 4.2.\n"
4108
"Do not report crashes to FFmpeg developers.\n");
3872
4161
c->idct_add= ff_jref_idct_add;
3873
4162
c->idct = j_rev_dct;
3874
4163
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
3875
}else if(avctx->idct_algo==FF_IDCT_VP3){
4164
}else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER ) &&
4165
avctx->idct_algo==FF_IDCT_VP3){
3876
4166
c->idct_put= ff_vp3_idct_put_c;
3877
4167
c->idct_add= ff_vp3_idct_add_c;
3878
4168
c->idct = ff_vp3_idct_c;
3879
4169
c->idct_permutation_type= FF_NO_IDCT_PERM;
4170
}else if(avctx->idct_algo==FF_IDCT_WMV2){
4171
c->idct_put= ff_wmv2_idct_put_c;
4172
c->idct_add= ff_wmv2_idct_add_c;
4173
c->idct = ff_wmv2_idct_c;
4174
c->idct_permutation_type= FF_NO_IDCT_PERM;
4175
}else if(avctx->idct_algo==FF_IDCT_FAAN){
4176
c->idct_put= ff_faanidct_put;
4177
c->idct_add= ff_faanidct_add;
4178
c->idct = ff_faanidct;
4179
c->idct_permutation_type= FF_NO_IDCT_PERM;
3880
4180
}else{ //accurate/default
3881
c->idct_put= simple_idct_put;
3882
c->idct_add= simple_idct_add;
3883
c->idct = simple_idct;
4181
c->idct_put= ff_simple_idct_put;
4182
c->idct_add= ff_simple_idct_add;
4183
c->idct = ff_simple_idct;
3884
4184
c->idct_permutation_type= FF_NO_IDCT_PERM;
3888
c->h264_idct_add= ff_h264_idct_add_c;
3889
c->h264_idct8_add= ff_h264_idct8_add_c;
3890
c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
3891
c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
4188
if (ENABLE_H264_DECODER) {
4189
c->h264_idct_add= ff_h264_idct_add_c;
4190
c->h264_idct8_add= ff_h264_idct8_add_c;
4191
c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
4192
c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
3893
4195
c->get_pixels = get_pixels_c;
3894
4196
c->diff_pixels = diff_pixels_c;
4025
4328
c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
4026
4329
c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
4331
c->draw_edges = draw_edges_c;
4028
4333
#ifdef CONFIG_CAVS_DECODER
4029
4334
ff_cavsdsp_init(c,avctx);
4031
4336
#if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
4032
4337
ff_vc1dsp_init(c,avctx);
4339
#if defined(CONFIG_WMV2_DECODER) || defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
4340
ff_intrax8dsp_init(c,avctx);
4034
4342
#if defined(CONFIG_H264_ENCODER)
4035
ff_h264dsp_init(c,avctx);
4343
ff_h264dspenc_init(c,avctx);
4038
4346
c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
4121
4440
memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
4122
4441
memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
4125
dsputil_init_mmx(c, avctx);
4128
dsputil_init_armv4l(c, avctx);
4131
dsputil_init_mlib(c, avctx);
4134
dsputil_init_vis(c,avctx);
4137
dsputil_init_alpha(c, avctx);
4140
dsputil_init_ppc(c, avctx);
4143
dsputil_init_mmi(c, avctx);
4146
dsputil_init_sh4(c,avctx);
4149
dsputil_init_bfin(c,avctx);
4443
if (ENABLE_MMX) dsputil_init_mmx (c, avctx);
4444
if (ENABLE_ARMV4L) dsputil_init_armv4l(c, avctx);
4445
if (ENABLE_MLIB) dsputil_init_mlib (c, avctx);
4446
if (ENABLE_VIS) dsputil_init_vis (c, avctx);
4447
if (ENABLE_ALPHA) dsputil_init_alpha (c, avctx);
4448
if (ENABLE_POWERPC) dsputil_init_ppc (c, avctx);
4449
if (ENABLE_MMI) dsputil_init_mmi (c, avctx);
4450
if (ENABLE_SH4) dsputil_init_sh4 (c, avctx);
4451
if (ENABLE_BFIN) dsputil_init_bfin (c, avctx);
4152
4453
for(i=0; i<64; i++){
4153
4454
if(!c->put_2tap_qpel_pixels_tab[0][i])