2
* MMX optimized DSP utils
3
* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
5
* This file is part of FFmpeg.
7
* FFmpeg is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU Lesser General Public
9
* License as published by the Free Software Foundation; either
10
* version 2.1 of the License, or (at your option) any later version.
12
* FFmpeg is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
* Lesser General Public License for more details.
17
* You should have received a copy of the GNU Lesser General Public
18
* License along with FFmpeg; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
#ifndef AVCODEC_X86_DSPUTIL_MMX_H
23
#define AVCODEC_X86_DSPUTIL_MMX_H
28
#include "libavcodec/dsputil.h"
29
#include "libavutil/x86/asm.h"
31
extern const uint64_t ff_wtwo;
33
extern const xmm_reg ff_pw_3;
34
extern const xmm_reg ff_pw_4;
35
extern const xmm_reg ff_pw_5;
36
extern const xmm_reg ff_pw_8;
37
extern const uint64_t ff_pw_15;
38
extern const xmm_reg ff_pw_16;
39
extern const xmm_reg ff_pw_18;
40
extern const uint64_t ff_pw_20;
41
extern const xmm_reg ff_pw_32;
42
extern const uint64_t ff_pw_42;
43
extern const uint64_t ff_pw_53;
44
extern const xmm_reg ff_pw_64;
45
extern const uint64_t ff_pw_96;
46
extern const uint64_t ff_pw_128;
47
extern const uint64_t ff_pw_255;
49
extern const xmm_reg ff_pb_1;
50
extern const xmm_reg ff_pb_3;
51
extern const xmm_reg ff_pb_F8;
52
extern const uint64_t ff_pb_FC;
54
extern const double ff_pd_1[2];
55
extern const double ff_pd_2[2];
57
#define SBUTTERFLY(a,b,t,n,m)\
58
"mov" #m " " #a ", " #t " \n\t" /* abcd */\
59
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
60
"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
62
#define TRANSPOSE4(a,b,c,d,t)\
63
SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
64
SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
65
SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
66
SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
68
#define MOVQ_WONE(regd) \
70
"pcmpeqd %%" #regd ", %%" #regd " \n\t" \
71
"psrlw $15, %%" #regd ::)
73
#define JUMPALIGN() __asm__ volatile (".p2align 3"::)
74
#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
76
#define MOVQ_BFE(regd) \
78
"pcmpeqd %%"#regd", %%"#regd" \n\t" \
79
"paddb %%"#regd", %%"#regd" \n\t" ::)
82
#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
84
// for shared library it's better to use this way for accessing constants
86
#define MOVQ_WTWO(regd) \
88
"pcmpeqd %%"#regd", %%"#regd" \n\t" \
89
"psrlw $15, %%"#regd" \n\t" \
90
"psllw $1, %%"#regd" \n\t"::)
94
// using regr as temporary and for the output result
95
// first argument is unmodifed and second is trashed
96
// regfe is supposed to contain 0xfefefefefefefefe
97
#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
98
"movq "#rega", "#regr" \n\t" \
99
"pand "#regb", "#regr" \n\t" \
100
"pxor "#rega", "#regb" \n\t" \
101
"pand "#regfe", "#regb" \n\t" \
102
"psrlq $1, "#regb" \n\t" \
103
"paddb "#regb", "#regr" \n\t"
105
#define PAVGB_MMX(rega, regb, regr, regfe) \
106
"movq "#rega", "#regr" \n\t" \
107
"por "#regb", "#regr" \n\t" \
108
"pxor "#rega", "#regb" \n\t" \
109
"pand "#regfe", "#regb" \n\t" \
110
"psrlq $1, "#regb" \n\t" \
111
"psubb "#regb", "#regr" \n\t"
113
// mm6 is supposed to contain 0xfefefefefefefefe
114
#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
115
"movq "#rega", "#regr" \n\t" \
116
"movq "#regc", "#regp" \n\t" \
117
"pand "#regb", "#regr" \n\t" \
118
"pand "#regd", "#regp" \n\t" \
119
"pxor "#rega", "#regb" \n\t" \
120
"pxor "#regc", "#regd" \n\t" \
121
"pand %%mm6, "#regb" \n\t" \
122
"pand %%mm6, "#regd" \n\t" \
123
"psrlq $1, "#regb" \n\t" \
124
"psrlq $1, "#regd" \n\t" \
125
"paddb "#regb", "#regr" \n\t" \
126
"paddb "#regd", "#regp" \n\t"
128
#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
129
"movq "#rega", "#regr" \n\t" \
130
"movq "#regc", "#regp" \n\t" \
131
"por "#regb", "#regr" \n\t" \
132
"por "#regd", "#regp" \n\t" \
133
"pxor "#rega", "#regb" \n\t" \
134
"pxor "#regc", "#regd" \n\t" \
135
"pand %%mm6, "#regb" \n\t" \
136
"pand %%mm6, "#regd" \n\t" \
137
"psrlq $1, "#regd" \n\t" \
138
"psrlq $1, "#regb" \n\t" \
139
"psubb "#regb", "#regr" \n\t" \
140
"psubb "#regd", "#regp" \n\t"
142
void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
143
void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
145
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
146
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
147
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
150
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
151
ptrdiff_t line_size, int h);
152
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
153
ptrdiff_t line_size, int h);
154
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
155
ptrdiff_t line_size, int h);
156
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
157
ptrdiff_t line_size, int h);
158
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
159
ptrdiff_t line_size, int h);
160
void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
161
ptrdiff_t line_size, int h);
162
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
163
ptrdiff_t line_size, int h);
164
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
165
ptrdiff_t line_size, int h);
167
void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
168
ptrdiff_t line_size, int h);
170
void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
171
ptrdiff_t line_size, int h);
172
void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
173
ptrdiff_t line_size, int h);
175
void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
176
ptrdiff_t line_size, int h);
177
void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
178
ptrdiff_t line_size, int h);
181
void ff_mmx_idct(int16_t *block);
182
void ff_mmxext_idct(int16_t *block);
184
void ff_deinterlace_line_mmx(uint8_t *dst,
185
const uint8_t *lum_m4, const uint8_t *lum_m3,
186
const uint8_t *lum_m2, const uint8_t *lum_m1,
190
void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
191
const uint8_t *lum_m3,
192
const uint8_t *lum_m2,
193
const uint8_t *lum_m1,
194
const uint8_t *lum, int size);
196
#endif /* AVCODEC_X86_DSPUTIL_MMX_H */