2
dct36_3dnowext: extended 3DNow optimized DCT36
4
copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
5
see COPYING and AUTHORS files in distribution or http://mpg123.org
7
Transformed back into standalone asm, with help of
8
gcc -S -DHAVE_CONFIG_H -I. -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct36_3dnowext.{S,c}
10
MPlayer comment follows.
14
* dct36_3dnow.c - 3DNow! optimized dct36()
16
* This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
17
* <squash@mb.kcom.ne.jp>, only two types of changes have been made:
19
* - removed PREFETCH instruction for speedup
20
* - changed function name for support 3DNow! automatic detection
22
* You can find Kashiyama's original 3dnow! support patch
23
* (for mpg123-0.59o) at
24
* http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
26
* by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
27
* <kim@comtec.co.jp> - after 1.Apr.1999
29
* Modified for use with MPlayer, for details see the changelog at
30
* http://svn.mplayerhq.hu/mplayer/trunk/
31
* $Id: dct36_3dnow.c 18786 2006-06-22 13:34:00Z diego $
33
* Original disclaimer:
34
* The author of this program disclaim whole expressed or implied
35
* warranties with regard to this program, and in no event shall the
36
* author of this program liable to whatever resulted from the use of
37
* this program. Use it at your own risk.
39
* 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi
46
.globl ASM_NAME(dct36_3dnowext)
47
/* .type ASM_NAME(dct36_3dnowext), @function */
48
ASM_NAME(dct36_3dnowext):
105
punpckldq 20(%eax),%mm1
113
punpckldq 36(%eax),%mm2
121
punpckldq 52(%eax),%mm3
129
punpckldq 68(%eax),%mm4
136
movd ASM_NAME(COS9)+12,%mm2
138
movd ASM_NAME(COS9)+24,%mm3
148
movd ASM_NAME(COS9)+4,%mm3
153
movd ASM_NAME(COS9)+20,%mm4
158
movd ASM_NAME(COS9)+28,%mm4
164
movd ASM_NAME(COS9)+8,%mm5
169
movd ASM_NAME(COS9)+16,%mm5
175
movd ASM_NAME(COS9)+32,%mm5
182
punpckldq ASM_NAME(tfcos36)+0,%mm5
187
punpckldq 104(%edx),%mm6
196
punpckldq 36(%edx),%mm6
199
punpckldq 36(%esi),%mm6
207
punpckldq ASM_NAME(tfcos36)+32,%mm5
212
punpckldq 72(%edx),%mm6
222
punpckldq 68(%edx),%mm6
225
punpckldq 68(%esi),%mm6
235
movd ASM_NAME(COS9)+12,%mm3
243
movd ASM_NAME(COS9)+24,%mm4
253
punpckldq ASM_NAME(tfcos36)+4,%mm5
258
punpckldq 100(%edx),%mm6
268
punpckldq 40(%edx),%mm6
271
punpckldq 40(%esi),%mm6
279
punpckldq ASM_NAME(tfcos36)+28,%mm5
284
punpckldq 76(%edx),%mm6
294
punpckldq 64(%edx),%mm6
297
punpckldq 64(%esi),%mm6
303
movd ASM_NAME(COS9)+20,%mm3
308
movd ASM_NAME(COS9)+28,%mm4
313
movd ASM_NAME(COS9)+4,%mm4
319
movd ASM_NAME(COS9)+32,%mm5
324
movd ASM_NAME(COS9)+8,%mm5
330
movd ASM_NAME(COS9)+16,%mm5
337
punpckldq ASM_NAME(tfcos36)+8,%mm5
342
punpckldq 96(%edx),%mm6
352
punpckldq 44(%edx),%mm6
355
punpckldq 44(%esi),%mm6
363
punpckldq ASM_NAME(tfcos36)+24,%mm5
368
punpckldq 80(%edx),%mm6
378
punpckldq 60(%edx),%mm6
381
punpckldq 60(%esi),%mm6
387
movd ASM_NAME(COS9)+28,%mm3
392
movd ASM_NAME(COS9)+4,%mm4
397
movd ASM_NAME(COS9)+20,%mm4
403
movd ASM_NAME(COS9)+16,%mm5
408
movd ASM_NAME(COS9)+32,%mm5
414
movd ASM_NAME(COS9)+8,%mm5
421
punpckldq ASM_NAME(tfcos36)+12,%mm5
426
punpckldq 92(%edx),%mm6
436
punpckldq 48(%edx),%mm6
439
punpckldq 48(%esi),%mm6
447
punpckldq ASM_NAME(tfcos36)+20,%mm5
452
punpckldq 84(%edx),%mm6
462
punpckldq 56(%edx),%mm6
465
punpckldq 56(%esi),%mm6
480
punpckldq ASM_NAME(tfcos36)+16,%mm5
485
punpckldq 88(%edx),%mm6
495
punpckldq 52(%edx),%mm6
498
punpckldq 52(%esi),%mm6
510
/* .size ASM_NAME(dct36_3dnowext), .-ASM_NAME(dct36_3dnowext) */