2
dct64_3dnowext: extended 3DNow optimized DCT64
4
copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
5
see COPYING and AUTHORS files in distribution or http://mpg123.org
7
Transformed back into standalone asm, with help of
8
gcc -S -DHAVE_CONFIG_H -I. -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o dct64_3dnowext.{S,c}
10
MPlayer comment follows.
14
* This code was taken from http://www.mpg123.org
15
* See ChangeLog of mpg123-0.59s-pre.1 for detail
16
* Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
17
* Partial 3dnowex-DSP! optimization by Nick Kurshev
19
* TODO: optimize scalar 3dnow! code
20
* Warning: Phases 7 & 8 are not tested
27
/* .type plus_1f, @object
32
/* .type x_plus_minus_3dnow, @object
33
.size x_plus_minus_3dnow, 8 */
40
.globl ASM_NAME(dct64_3dnowext)
41
/* .type ASM_NAME(dct64_3dnowext), @function */
42
ASM_NAME(dct64_3dnowext):
51
leal 128+-268(%ebp),%edx
54
movl $ASM_NAME(costab_mmxsse),%ebx
60
pswapd 120(%eax), %mm1
61
pswapd 112(%eax), %mm5
78
pswapd 104(%eax), %mm1
114
pswapd 72(%eax), %mm1
115
pswapd 64(%eax), %mm5
132
pswapd 56(%edx), %mm1
133
pswapd 48(%edx), %mm5
150
pswapd 40(%edx), %mm1
151
pswapd 32(%edx), %mm5
168
pswapd 120(%edx), %mm1
169
pswapd 112(%edx), %mm5
186
pswapd 104(%edx), %mm1
187
pswapd 96(%edx), %mm5
206
pswapd 24(%ecx), %mm1
207
pswapd 16(%ecx), %mm5
224
pswapd 56(%ecx), %mm1
225
pswapd 48(%ecx), %mm5
242
pswapd 88(%ecx), %mm1
243
pswapd 80(%ecx), %mm5
260
pswapd 120(%ecx), %mm1
261
pswapd 112(%ecx), %mm5
280
pswapd 24(%edx), %mm5
297
pswapd 40(%edx), %mm1
298
pswapd 56(%edx), %mm5
315
pswapd 72(%edx), %mm1
316
pswapd 88(%edx), %mm5
333
pswapd 104(%edx), %mm1
334
pswapd 120(%edx), %mm5
348
punpckldq 120(%ebx), %mm6
349
movq x_plus_minus_3dnow, %mm7
365
punpckldq 76(%ecx), %mm0
366
punpckldq 72(%ecx), %mm2
384
pfmul 120(%ebx), %mm3
385
pfmul 120(%ebx), %mm2
400
pfmul 120(%ebx), %mm1
410
pfmul 120(%ebx), %mm0
422
pfsub 104(%ecx), %mm0
423
pfmul 120(%ebx), %mm0
425
pfadd 104(%ecx), %mm0
426
pfadd 108(%ecx), %mm0
429
pfsub 120(%ecx), %mm1
430
pfmul 120(%ebx), %mm1
432
pfadd 120(%ecx), %mm1
433
pfadd 124(%ecx), %mm1
435
pfadd 112(%ecx), %mm0
436
pfadd 116(%ecx), %mm0
439
pfsub 116(%ecx), %mm0
440
pfmul 120(%ebx), %mm0
442
pfadd 124(%edx), %mm0
448
movd %mm0, 1024(%esi)
451
pfmul 120(%ebx), %mm0
456
pfmul 120(%ebx), %mm0
463
pfmul 120(%ebx), %mm0
467
pfmul 120(%ebx), %mm0
503
pfadd 112(%edx), %mm0
504
pfadd 104(%edx), %mm2
505
pfadd 120(%edx), %mm4
534
pfadd 100(%edx), %mm0
559
pfmul 120(%ebx), %mm0
570
pfmul 120(%ebx), %mm3
574
pfmul 120(%ebx), %mm2
617
punpckldq 124(%edx), %mm0
618
pfadd 100(%edx), %mm5
619
punpckldq 36(%edx), %mm4
620
punpckldq 92(%edx), %mm2
655
pfadd 120(%edx), %mm4
712
/* .size ASM_NAME(dct64_3dnowext), .-ASM_NAME(dct64_3dnowext) */