2
/ dct36_3dnow.s - 3DNow! optimized dct36()
4
/ This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
5
/ <squash@mb.kcom.ne.jp>,only two types of changes have been made:
7
/ - remove PREFETCH instruction for speedup
8
/ - change function name for support 3DNow! automatic detect
10
/ You can find Kashiyama's original 3dnow! support patch
11
/ (for mpg123-0.59o) at
12
/ http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
14
/ by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
15
/ <kim@comtec.co.jp> - after 1.Apr.1999
19
/// Replacement of dct36() with AMD's 3DNow! SIMD operations support
21
/// Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
23
/// The author of this program disclaim whole expressed or implied
24
/// warranties with regard to this program, and in no event shall the
25
/// author of this program liable to whatever resulted from the use of
26
/// this program. Use it at your own risk.
30
.type dct36_3dnow,@function
91
punpckldq 20(%eax),%mm1
99
punpckldq 36(%eax),%mm2
107
punpckldq 52(%eax),%mm3
115
punpckldq 68(%eax),%mm4
169
punpckldq tfcos36+0,%mm5
174
punpckldq 104(%edx),%mm6
184
punpckldq 36(%edx),%mm6
187
punpckldq 36(%esi),%mm6
195
punpckldq tfcos36+32,%mm5
200
punpckldq 72(%edx),%mm6
210
punpckldq 68(%edx),%mm6
213
punpckldq 68(%esi),%mm6
241
punpckldq tfcos36+4,%mm5
246
punpckldq 100(%edx),%mm6
256
punpckldq 40(%edx),%mm6
259
punpckldq 40(%esi),%mm6
267
punpckldq tfcos36+28,%mm5
272
punpckldq 76(%edx),%mm6
282
punpckldq 64(%edx),%mm6
285
punpckldq 64(%esi),%mm6
326
punpckldq tfcos36+8,%mm5
331
punpckldq 96(%edx),%mm6
341
punpckldq 44(%edx),%mm6
344
punpckldq 44(%esi),%mm6
352
punpckldq tfcos36+24,%mm5
357
punpckldq 80(%edx),%mm6
367
punpckldq 60(%edx),%mm6
370
punpckldq 60(%esi),%mm6
410
punpckldq tfcos36+12,%mm5
415
punpckldq 92(%edx),%mm6
425
punpckldq 48(%edx),%mm6
428
punpckldq 48(%esi),%mm6
436
punpckldq tfcos36+20,%mm5
441
punpckldq 84(%edx),%mm6
451
punpckldq 56(%edx),%mm6
454
punpckldq 56(%esi),%mm6
470
punpckldq tfcos36+16,%mm5
475
punpckldq 88(%edx),%mm6
485
punpckldq 52(%edx),%mm6
488
punpckldq 52(%esi),%mm6