2
* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support
4
* Copyright (c) 2005 Fabrice Bellard
6
* This library is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2 of the License, or (at your option) any later version.
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with this library; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
28
#define dh_alias_Reg ptr
29
#define dh_alias_XMMReg ptr
30
#define dh_alias_MMXReg ptr
31
#define dh_ctype_Reg Reg *
32
#define dh_ctype_XMMReg XMMReg *
33
#define dh_ctype_MMXReg MMXReg *
35
DEF_HELPER_2(glue(psrlw, SUFFIX), void, Reg, Reg)
36
DEF_HELPER_2(glue(psraw, SUFFIX), void, Reg, Reg)
37
DEF_HELPER_2(glue(psllw, SUFFIX), void, Reg, Reg)
38
DEF_HELPER_2(glue(psrld, SUFFIX), void, Reg, Reg)
39
DEF_HELPER_2(glue(psrad, SUFFIX), void, Reg, Reg)
40
DEF_HELPER_2(glue(pslld, SUFFIX), void, Reg, Reg)
41
DEF_HELPER_2(glue(psrlq, SUFFIX), void, Reg, Reg)
42
DEF_HELPER_2(glue(psllq, SUFFIX), void, Reg, Reg)
45
DEF_HELPER_2(glue(psrldq, SUFFIX), void, Reg, Reg)
46
DEF_HELPER_2(glue(pslldq, SUFFIX), void, Reg, Reg)
49
#define SSE_HELPER_B(name, F)\
50
DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
52
#define SSE_HELPER_W(name, F)\
53
DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
55
#define SSE_HELPER_L(name, F)\
56
DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
58
#define SSE_HELPER_Q(name, F)\
59
DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
61
SSE_HELPER_B(paddb, FADD)
62
SSE_HELPER_W(paddw, FADD)
63
SSE_HELPER_L(paddl, FADD)
64
SSE_HELPER_Q(paddq, FADD)
66
SSE_HELPER_B(psubb, FSUB)
67
SSE_HELPER_W(psubw, FSUB)
68
SSE_HELPER_L(psubl, FSUB)
69
SSE_HELPER_Q(psubq, FSUB)
71
SSE_HELPER_B(paddusb, FADDUB)
72
SSE_HELPER_B(paddsb, FADDSB)
73
SSE_HELPER_B(psubusb, FSUBUB)
74
SSE_HELPER_B(psubsb, FSUBSB)
76
SSE_HELPER_W(paddusw, FADDUW)
77
SSE_HELPER_W(paddsw, FADDSW)
78
SSE_HELPER_W(psubusw, FSUBUW)
79
SSE_HELPER_W(psubsw, FSUBSW)
81
SSE_HELPER_B(pminub, FMINUB)
82
SSE_HELPER_B(pmaxub, FMAXUB)
84
SSE_HELPER_W(pminsw, FMINSW)
85
SSE_HELPER_W(pmaxsw, FMAXSW)
87
SSE_HELPER_Q(pand, FAND)
88
SSE_HELPER_Q(pandn, FANDN)
89
SSE_HELPER_Q(por, FOR)
90
SSE_HELPER_Q(pxor, FXOR)
92
SSE_HELPER_B(pcmpgtb, FCMPGTB)
93
SSE_HELPER_W(pcmpgtw, FCMPGTW)
94
SSE_HELPER_L(pcmpgtl, FCMPGTL)
96
SSE_HELPER_B(pcmpeqb, FCMPEQ)
97
SSE_HELPER_W(pcmpeqw, FCMPEQ)
98
SSE_HELPER_L(pcmpeql, FCMPEQ)
100
SSE_HELPER_W(pmullw, FMULLW)
102
SSE_HELPER_W(pmulhrw, FMULHRW)
104
SSE_HELPER_W(pmulhuw, FMULHUW)
105
SSE_HELPER_W(pmulhw, FMULHW)
107
SSE_HELPER_B(pavgb, FAVG)
108
SSE_HELPER_W(pavgw, FAVG)
110
DEF_HELPER_2(glue(pmuludq, SUFFIX), void, Reg, Reg)
111
DEF_HELPER_2(glue(pmaddwd, SUFFIX), void, Reg, Reg)
113
DEF_HELPER_2(glue(psadbw, SUFFIX), void, Reg, Reg)
114
DEF_HELPER_3(glue(maskmov, SUFFIX), void, Reg, Reg, tl)
115
DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32)
117
DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64)
121
DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int)
123
DEF_HELPER_3(shufps, void, Reg, Reg, int)
124
DEF_HELPER_3(shufpd, void, Reg, Reg, int)
125
DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int)
126
DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int)
127
DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int)
132
/* XXX: not accurate */
134
#define SSE_HELPER_S(name, F)\
135
DEF_HELPER_2(name ## ps , void, Reg, Reg) \
136
DEF_HELPER_2(name ## ss , void, Reg, Reg) \
137
DEF_HELPER_2(name ## pd , void, Reg, Reg) \
138
DEF_HELPER_2(name ## sd , void, Reg, Reg)
140
SSE_HELPER_S(add, FPU_ADD)
141
SSE_HELPER_S(sub, FPU_SUB)
142
SSE_HELPER_S(mul, FPU_MUL)
143
SSE_HELPER_S(div, FPU_DIV)
144
SSE_HELPER_S(min, FPU_MIN)
145
SSE_HELPER_S(max, FPU_MAX)
146
SSE_HELPER_S(sqrt, FPU_SQRT)
149
DEF_HELPER_2(cvtps2pd, void, Reg, Reg)
150
DEF_HELPER_2(cvtpd2ps, void, Reg, Reg)
151
DEF_HELPER_2(cvtss2sd, void, Reg, Reg)
152
DEF_HELPER_2(cvtsd2ss, void, Reg, Reg)
153
DEF_HELPER_2(cvtdq2ps, void, Reg, Reg)
154
DEF_HELPER_2(cvtdq2pd, void, Reg, Reg)
155
DEF_HELPER_2(cvtpi2ps, void, XMMReg, MMXReg)
156
DEF_HELPER_2(cvtpi2pd, void, XMMReg, MMXReg)
157
DEF_HELPER_2(cvtsi2ss, void, XMMReg, i32)
158
DEF_HELPER_2(cvtsi2sd, void, XMMReg, i32)
161
DEF_HELPER_2(cvtsq2ss, void, XMMReg, i64)
162
DEF_HELPER_2(cvtsq2sd, void, XMMReg, i64)
165
DEF_HELPER_2(cvtps2dq, void, XMMReg, XMMReg)
166
DEF_HELPER_2(cvtpd2dq, void, XMMReg, XMMReg)
167
DEF_HELPER_2(cvtps2pi, void, MMXReg, XMMReg)
168
DEF_HELPER_2(cvtpd2pi, void, MMXReg, XMMReg)
169
DEF_HELPER_1(cvtss2si, s32, XMMReg)
170
DEF_HELPER_1(cvtsd2si, s32, XMMReg)
172
DEF_HELPER_1(cvtss2sq, s64, XMMReg)
173
DEF_HELPER_1(cvtsd2sq, s64, XMMReg)
176
DEF_HELPER_2(cvttps2dq, void, XMMReg, XMMReg)
177
DEF_HELPER_2(cvttpd2dq, void, XMMReg, XMMReg)
178
DEF_HELPER_2(cvttps2pi, void, MMXReg, XMMReg)
179
DEF_HELPER_2(cvttpd2pi, void, MMXReg, XMMReg)
180
DEF_HELPER_1(cvttss2si, s32, XMMReg)
181
DEF_HELPER_1(cvttsd2si, s32, XMMReg)
183
DEF_HELPER_1(cvttss2sq, s64, XMMReg)
184
DEF_HELPER_1(cvttsd2sq, s64, XMMReg)
187
DEF_HELPER_2(rsqrtps, void, XMMReg, XMMReg)
188
DEF_HELPER_2(rsqrtss, void, XMMReg, XMMReg)
189
DEF_HELPER_2(rcpps, void, XMMReg, XMMReg)
190
DEF_HELPER_2(rcpss, void, XMMReg, XMMReg)
191
DEF_HELPER_2(haddps, void, XMMReg, XMMReg)
192
DEF_HELPER_2(haddpd, void, XMMReg, XMMReg)
193
DEF_HELPER_2(hsubps, void, XMMReg, XMMReg)
194
DEF_HELPER_2(hsubpd, void, XMMReg, XMMReg)
195
DEF_HELPER_2(addsubps, void, XMMReg, XMMReg)
196
DEF_HELPER_2(addsubpd, void, XMMReg, XMMReg)
198
#define SSE_HELPER_CMP(name, F)\
199
DEF_HELPER_2( name ## ps , void, Reg, Reg) \
200
DEF_HELPER_2( name ## ss , void, Reg, Reg) \
201
DEF_HELPER_2( name ## pd , void, Reg, Reg) \
202
DEF_HELPER_2( name ## sd , void, Reg, Reg)
204
SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
205
SSE_HELPER_CMP(cmplt, FPU_CMPLT)
206
SSE_HELPER_CMP(cmple, FPU_CMPLE)
207
SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
208
SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
209
SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
210
SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
211
SSE_HELPER_CMP(cmpord, FPU_CMPORD)
213
DEF_HELPER_2(ucomiss, void, Reg, Reg)
214
DEF_HELPER_2(comiss, void, Reg, Reg)
215
DEF_HELPER_2(ucomisd, void, Reg, Reg)
216
DEF_HELPER_2(comisd, void, Reg, Reg)
217
DEF_HELPER_1(movmskps, i32, Reg)
218
DEF_HELPER_1(movmskpd, i32, Reg)
221
DEF_HELPER_1(glue(pmovmskb, SUFFIX), i32, Reg)
222
DEF_HELPER_2(glue(packsswb, SUFFIX), void, Reg, Reg)
223
DEF_HELPER_2(glue(packuswb, SUFFIX), void, Reg, Reg)
224
DEF_HELPER_2(glue(packssdw, SUFFIX), void, Reg, Reg)
225
#define UNPCK_OP(base_name, base) \
226
DEF_HELPER_2(glue(punpck ## base_name ## bw, SUFFIX) , void, Reg, Reg) \
227
DEF_HELPER_2(glue(punpck ## base_name ## wd, SUFFIX) , void, Reg, Reg) \
228
DEF_HELPER_2(glue(punpck ## base_name ## dq, SUFFIX) , void, Reg, Reg)
234
DEF_HELPER_2(glue(punpcklqdq, SUFFIX), void, Reg, Reg)
235
DEF_HELPER_2(glue(punpckhqdq, SUFFIX), void, Reg, Reg)
238
/* 3DNow! float ops */
240
DEF_HELPER_2(pi2fd, void, MMXReg, MMXReg)
241
DEF_HELPER_2(pi2fw, void, MMXReg, MMXReg)
242
DEF_HELPER_2(pf2id, void, MMXReg, MMXReg)
243
DEF_HELPER_2(pf2iw, void, MMXReg, MMXReg)
244
DEF_HELPER_2(pfacc, void, MMXReg, MMXReg)
245
DEF_HELPER_2(pfadd, void, MMXReg, MMXReg)
246
DEF_HELPER_2(pfcmpeq, void, MMXReg, MMXReg)
247
DEF_HELPER_2(pfcmpge, void, MMXReg, MMXReg)
248
DEF_HELPER_2(pfcmpgt, void, MMXReg, MMXReg)
249
DEF_HELPER_2(pfmax, void, MMXReg, MMXReg)
250
DEF_HELPER_2(pfmin, void, MMXReg, MMXReg)
251
DEF_HELPER_2(pfmul, void, MMXReg, MMXReg)
252
DEF_HELPER_2(pfnacc, void, MMXReg, MMXReg)
253
DEF_HELPER_2(pfpnacc, void, MMXReg, MMXReg)
254
DEF_HELPER_2(pfrcp, void, MMXReg, MMXReg)
255
DEF_HELPER_2(pfrsqrt, void, MMXReg, MMXReg)
256
DEF_HELPER_2(pfsub, void, MMXReg, MMXReg)
257
DEF_HELPER_2(pfsubr, void, MMXReg, MMXReg)
258
DEF_HELPER_2(pswapd, void, MMXReg, MMXReg)
261
/* SSSE3 op helpers */
262
DEF_HELPER_2(glue(phaddw, SUFFIX), void, Reg, Reg)
263
DEF_HELPER_2(glue(phaddd, SUFFIX), void, Reg, Reg)
264
DEF_HELPER_2(glue(phaddsw, SUFFIX), void, Reg, Reg)
265
DEF_HELPER_2(glue(phsubw, SUFFIX), void, Reg, Reg)
266
DEF_HELPER_2(glue(phsubd, SUFFIX), void, Reg, Reg)
267
DEF_HELPER_2(glue(phsubsw, SUFFIX), void, Reg, Reg)
268
DEF_HELPER_2(glue(pabsb, SUFFIX), void, Reg, Reg)
269
DEF_HELPER_2(glue(pabsw, SUFFIX), void, Reg, Reg)
270
DEF_HELPER_2(glue(pabsd, SUFFIX), void, Reg, Reg)
271
DEF_HELPER_2(glue(pmaddubsw, SUFFIX), void, Reg, Reg)
272
DEF_HELPER_2(glue(pmulhrsw, SUFFIX), void, Reg, Reg)
273
DEF_HELPER_2(glue(pshufb, SUFFIX), void, Reg, Reg)
274
DEF_HELPER_2(glue(psignb, SUFFIX), void, Reg, Reg)
275
DEF_HELPER_2(glue(psignw, SUFFIX), void, Reg, Reg)
276
DEF_HELPER_2(glue(psignd, SUFFIX), void, Reg, Reg)
277
DEF_HELPER_3(glue(palignr, SUFFIX), void, Reg, Reg, s32)
279
/* SSE4.1 op helpers */
281
DEF_HELPER_2(glue(pblendvb, SUFFIX), void, Reg, Reg)
282
DEF_HELPER_2(glue(blendvps, SUFFIX), void, Reg, Reg)
283
DEF_HELPER_2(glue(blendvpd, SUFFIX), void, Reg, Reg)
284
DEF_HELPER_2(glue(ptest, SUFFIX), void, Reg, Reg)
285
DEF_HELPER_2(glue(pmovsxbw, SUFFIX), void, Reg, Reg)
286
DEF_HELPER_2(glue(pmovsxbd, SUFFIX), void, Reg, Reg)
287
DEF_HELPER_2(glue(pmovsxbq, SUFFIX), void, Reg, Reg)
288
DEF_HELPER_2(glue(pmovsxwd, SUFFIX), void, Reg, Reg)
289
DEF_HELPER_2(glue(pmovsxwq, SUFFIX), void, Reg, Reg)
290
DEF_HELPER_2(glue(pmovsxdq, SUFFIX), void, Reg, Reg)
291
DEF_HELPER_2(glue(pmovzxbw, SUFFIX), void, Reg, Reg)
292
DEF_HELPER_2(glue(pmovzxbd, SUFFIX), void, Reg, Reg)
293
DEF_HELPER_2(glue(pmovzxbq, SUFFIX), void, Reg, Reg)
294
DEF_HELPER_2(glue(pmovzxwd, SUFFIX), void, Reg, Reg)
295
DEF_HELPER_2(glue(pmovzxwq, SUFFIX), void, Reg, Reg)
296
DEF_HELPER_2(glue(pmovzxdq, SUFFIX), void, Reg, Reg)
297
DEF_HELPER_2(glue(pmuldq, SUFFIX), void, Reg, Reg)
298
DEF_HELPER_2(glue(pcmpeqq, SUFFIX), void, Reg, Reg)
299
DEF_HELPER_2(glue(packusdw, SUFFIX), void, Reg, Reg)
300
DEF_HELPER_2(glue(pminsb, SUFFIX), void, Reg, Reg)
301
DEF_HELPER_2(glue(pminsd, SUFFIX), void, Reg, Reg)
302
DEF_HELPER_2(glue(pminuw, SUFFIX), void, Reg, Reg)
303
DEF_HELPER_2(glue(pminud, SUFFIX), void, Reg, Reg)
304
DEF_HELPER_2(glue(pmaxsb, SUFFIX), void, Reg, Reg)
305
DEF_HELPER_2(glue(pmaxsd, SUFFIX), void, Reg, Reg)
306
DEF_HELPER_2(glue(pmaxuw, SUFFIX), void, Reg, Reg)
307
DEF_HELPER_2(glue(pmaxud, SUFFIX), void, Reg, Reg)
308
DEF_HELPER_2(glue(pmulld, SUFFIX), void, Reg, Reg)
309
DEF_HELPER_2(glue(phminposuw, SUFFIX), void, Reg, Reg)
310
DEF_HELPER_3(glue(roundps, SUFFIX), void, Reg, Reg, i32)
311
DEF_HELPER_3(glue(roundpd, SUFFIX), void, Reg, Reg, i32)
312
DEF_HELPER_3(glue(roundss, SUFFIX), void, Reg, Reg, i32)
313
DEF_HELPER_3(glue(roundsd, SUFFIX), void, Reg, Reg, i32)
314
DEF_HELPER_3(glue(blendps, SUFFIX), void, Reg, Reg, i32)
315
DEF_HELPER_3(glue(blendpd, SUFFIX), void, Reg, Reg, i32)
316
DEF_HELPER_3(glue(pblendw, SUFFIX), void, Reg, Reg, i32)
317
DEF_HELPER_3(glue(dpps, SUFFIX), void, Reg, Reg, i32)
318
DEF_HELPER_3(glue(dppd, SUFFIX), void, Reg, Reg, i32)
319
DEF_HELPER_3(glue(mpsadbw, SUFFIX), void, Reg, Reg, i32)
322
/* SSE4.2 op helpers */
324
DEF_HELPER_2(glue(pcmpgtq, SUFFIX), void, Reg, Reg)
325
DEF_HELPER_3(glue(pcmpestri, SUFFIX), void, Reg, Reg, i32)
326
DEF_HELPER_3(glue(pcmpestrm, SUFFIX), void, Reg, Reg, i32)
327
DEF_HELPER_3(glue(pcmpistri, SUFFIX), void, Reg, Reg, i32)
328
DEF_HELPER_3(glue(pcmpistrm, SUFFIX), void, Reg, Reg, i32)
329
DEF_HELPER_3(crc32, tl, i32, tl, i32)
330
DEF_HELPER_2(popcnt, tl, tl, i32)
342
#undef SSE_HELPER_CMP