2
/ decode_3dnow.s - 3DNow! optimized synth_1to1()
4
/ copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5
/ see COPYING and AUTHORS files in distribution or http://mpg123.de
6
/ initially written by Syuuhei Kashiyama
8
/ This code based 'decode_3dnow.s' by Syuuhei Kashiyama
9
/ <squash@mb.kcom.ne.jp>,only two types of changes have been made:
11
/ - remove PREFETCH instruction for speedup
12
/ - change function name for support 3DNow! automatic detect
13
/ - femms moved to before 'call dct64_3dnow'
15
/ You can find Kashiyama's original 3dnow! support patch
16
/ (for mpg123-0.59o) at
17
/ http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
19
/ by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
20
/ <kim@comtec.co.jp> - after 1.Apr.1999
24
/// Replacement of synth_1to1() with AMD's 3DNow! SIMD operations support
26
/// Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
28
/// The author of this program disclaim whole expressed or implied
29
/// warranties with regard to this program, and in no event shall the
30
/// author of this program liable to whatever resulted from the use of
31
/// this program. Use it at your own risk.
35
.comm buffs.40,4352,32
43
.globl synth_1to1_3dnow
44
.type synth_1to1_3dnow,@function
65
call do_equalizer_3dnow
76
movl $buffs.40+2176,%ecx
94
leal 1088(,%eax,4),%eax
104
leal 1092(%ecx,%edx,4),%eax
109
leal (%ecx,%edx,4),%eax
171
punpckldq 8(%ebx),%mm0
172
punpckldq 8(%edx),%mm1
176
punpckldq 24(%ebx),%mm3
177
punpckldq 24(%edx),%mm4
181
punpckldq 40(%ebx),%mm5
182
punpckldq 40(%edx),%mm6
187
punpckldq 56(%ebx),%mm1
188
punpckldq 56(%edx),%mm2
202
leal -128(%edx,%esi,8),%edx
207
punpckldq 4(%ebx),%mm0
208
punpckldq -8(%edx),%mm1
214
punpckldq 12(%ebx),%mm3
215
punpckldq -16(%edx),%mm4
219
punpckldq 20(%ebx),%mm5
220
punpckldq -24(%edx),%mm6
225
punpckldq 28(%ebx),%mm1
226
punpckldq -32(%edx),%mm2
231
punpckldq 36(%ebx),%mm3
232
punpckldq -40(%edx),%mm4
237
punpckldq 44(%ebx),%mm5
238
punpckldq -48(%edx),%mm6
243
punpckldq 52(%ebx),%mm1
244
punpckldq -56(%edx),%mm2
249
punpckldq 60(%ebx),%mm3
250
punpckldq (%edx),%mm4
257
punpckldq 4(%ebx),%mm0
258
punpckldq -8(%edx),%mm1