2
# FreeAmp - The Free MP3 Player
4
# Based on MP3 decoder originally Copyright (C) 1995-1997
5
# Xing Technology Corp. http://www.xingtech.com
7
# Copyright (C) 1999 Mark H. Weaver <mhw@netris.org>
9
# This program is free software; you can redistribute it and/or modify
10
# it under the terms of the GNU General Public License as published by
11
# the Free Software Foundation; either version 2 of the License, or
12
# (at your option) any later version.
14
# This program is distributed in the hope that it will be useful,
15
# but WITHOUT ANY WARRANTY; without even the implied warranty of
16
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
# GNU General Public License for more details.
19
# You should have received a copy of the GNU General Public License
20
# along with this program; if not, write to the Free Software
21
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23
# $Id: x86gas.s,v 1.1 1999/12/07 05:46:02 markster Exp $
26
#%% extern wincoef,dword
27
#%% extern coef32,dword
28
#%% ! extern float wincoef[264];
29
#%% ! extern float coef32[31];
41
#%% ! void window_dual(float *vbuf, int vb_ptr, short *pcm)
51
movl L_vb_ptr(%esp),%esi
52
movl L_vbuf(%esp),%edi
60
#%! movl %ecx,L_pcm(%esp)
62
movl $511,%ebp # ebp = 511
63
leal wincoef,%ecx # coef = wincoef
64
addl $16,%esi # si = vb_ptr + 16
67
andl %ebp,%ebx # bx = (si + 32) & 511
77
.rept 4 # Unrolled loop
78
flds (%ecx) # Push *coef
79
fmuls (%edi,%esi,4) # Multiply by vbuf[si]
80
addl $64,%esi # si += 64
81
addl $4,%ecx # Advance coef pointer
82
andl %ebp,%esi # si &= 511
83
faddp %st,%st(1) # Add to sum
85
flds (%ecx) # Push *coef
86
fmuls (%edi,%ebx,4) # Multiply by vbuf[bx]
87
addl $64,%ebx # bx += 64
88
addl $4,%ecx # Advance coef pointer
89
andl %ebp,%ebx # bx &= 511
90
fsubrp %st,%st(1) # Subtract from sum
94
jg .FirstInner # Jump back if j > 0
96
fistpl L_tmp(%esp) # tmp = (long) round (sum)
104
jz .FirstInRange # Jump if in range
106
sarl $16,%eax # Out of range
110
movl L_pcm(%esp),%eax
111
movw %bp,(%eax) # Store sample in *pcm
112
addl $4,%eax # Increment pcm
113
movl $511,%ebp # Reload ebp with 511
114
movl %eax,L_pcm(%esp)
117
jg .FirstOuter # Jump back if i > 0
125
.rept 2 # Unrolled loop
126
flds (%ecx) # Push *coef
127
fmuls (%edi,%ebx,4) # Multiply by vbuf[bx]
128
addl $64,%ebx # bx += 64
129
addl $4,%ecx # Increment coef pointer
130
andl %ebp,%ebx # bx &= 511
131
faddp %st,%st(1) # Add to sum
135
jg .SpecialInner # Jump back if j > 0
137
fistpl L_tmp(%esp) # tmp = (long) round (sum)
139
movl L_tmp(%esp),%eax
145
jz .SpecialInRange # Jump if within range
147
sarl $16,%eax # Out of range
151
movl L_pcm(%esp),%eax
152
subl $36,%ecx # Readjust coef pointer for last round
153
movw %bp,(%eax) # Store sample in *pcm
154
addl $4,%eax # Increment pcm
155
movl $511,%ebp # Reload ebp with 511
156
movl %eax,L_pcm(%esp)
160
movb $15,%dh # i = 15
167
.rept 4 # Unrolled loop
168
flds (%ecx) # Push *coef
169
fmuls (%edi,%esi,4) # Multiply by vbuf[si]
170
addl $64,%esi # si += 64
171
subl $4,%ecx # Back up coef pointer
172
andl %ebp,%esi # si &= 511
173
faddp %st,%st(1) # Add to sum
175
flds (%ecx) # Push *coef
176
fmuls (%edi,%ebx,4) # Multiply by vbuf[bx]
177
addl $64,%ebx # bx += 64
178
subl $4,%ecx # Back up coef pointer
179
andl %ebp,%ebx # bx &= 511
180
faddp %st,%st(1) # Add to sum
184
jg .LastInner # Jump back if j > 0
186
fistpl L_tmp(%esp) # tmp = (long) round (sum)
188
movl L_tmp(%esp),%eax
194
jz .LastInRange # Jump if in range
196
sarl $16,%eax # Out of range
200
movl L_pcm(%esp),%eax
201
movw %bp,(%eax) # Store sample in *pcm
202
addl $4,%eax # Increment pcm
203
movl $511,%ebp # Reload ebp with 511
204
movl %eax,L_pcm(%esp)
207
jg .LastOuter # Jump back if i > 0
213
# Restore regs and return
224
#---------------------------------------------------------------------------
231
#%!.equ L_buf, 20 # Temporary buffer
232
#%!.equ L_locals, 148 # Bytes used for locals
234
.equ L_buf, 12 # Temporary buffer
237
.equ L_locals, 140 # Bytes used for locals
242
#%% ! void asm_fdct32(float in[], float out[])
252
movl L_in(%esp),%edi # edi = x
253
movl L_out(%esp),%esi # esi = f
256
#%! movl in,%edi # edi = x
257
#%! movl out,%esi # esi = f
259
#%! subl $L_locals,%esp
261
leal coef32-128,%ecx # coef = coef32 - (32 * 4)
262
movl $1,4(%esp) # m = 1
263
movl $16,%ebp # n = 32 / 2
265
leal L_buf(%esp),%ebx
266
movl %ebx,L_out(%esp) # From now on, use temp buf instead of orig x
267
jmp .ForwardLoopStart
271
movl L_in(%esp),%edi # edi = x
272
movl L_out(%esp),%esi # esi = f
273
movl %edi,L_out(%esp) # Exchange mem versions of f/x for next iter
276
movl L_m(%esp),%ebx # ebx = m (temporarily)
277
movl %ebx,L_mi(%esp) # mi = m
278
sall $1,%ebx # Double m for next iter
279
leal (%ecx,%ebp,8),%ecx # coef += n * 8
280
movl %ebx,L_m(%esp) # Store doubled m
281
leal (%esi,%ebp,4),%ebx # ebx = f2 = f + n * 4
282
sall $3,%ebp # n *= 8
286
movl %ebp,%eax # q = n
287
xorl %edx,%edx # p = 0
289
jnz .ForwardInnerLoop1
293
subl $4,%eax # q -= 4
294
flds (%edi,%eax) # push x[q]
295
flds (%edi,%edx) # push x[p]
296
fld %st(1) # Duplicate top two stack entries
299
fstps (%esi,%edx) # f[p] = x[p] + x[q]
302
fstps (%ebx,%edx) # f2[p] = coef[p] * (x[p] - x[q])
303
addl $4,%edx # p += 4
306
subl $4,%eax # q -= 4
307
flds (%edi,%eax) # push x[q]
308
flds (%edi,%edx) # push x[p]
309
fld %st(1) # Duplicate top two stack entries
312
fstps (%esi,%edx) # f[p] = x[p] + x[q]
315
fstps (%ebx,%edx) # f2[p] = coef[p] * (x[p] - x[q])
316
addl $4,%edx # p += 4
319
jb .ForwardInnerLoop # Jump back if (p < q)
321
addl %ebp,%esi # f += n
322
addl %ebp,%ebx # f2 += n
323
addl %ebp,%edi # x += n
324
decl L_mi(%esp) # mi--
325
jg .ForwardMiddleLoop # Jump back if mi > 0
327
sarl $4,%ebp # n /= 16
328
jg .ForwardOuterLoop # Jump back if n > 0
332
movl $8,%ebx # ebx = m = 8 (temporarily)
333
movl %ebx,%ebp # n = 4 * 2
337
movl L_out(%esp),%esi # esi = f
338
movl %ebx,L_mi(%esp) # mi = m
339
movl L_in(%esp),%edi # edi = x
340
movl %ebx,L_m(%esp) # Store m
341
movl %esi,L_in(%esp) # Exchange mem versions of f/x for next iter
343
movl %edi,L_out(%esp)
344
subl %ebp,%ebx # ebx = x2 = x - n
345
sall $1,%ebp # n *= 2
349
movl -4(%ebx,%ebp),%ecx
350
movl %ecx,-8(%esi,%ebp) # f[n - 8] = x2[n - 4]
351
flds -4(%edi,%ebp) # push x[n - 4]
352
fsts -4(%esi,%ebp) # f[n - 4] = x[n - 4], without popping
353
leal -8(%ebp),%eax # q = n - 8
354
leal -16(%ebp),%edx # p = n - 16
358
movl (%ebx,%eax),%ecx
359
movl %ecx,(%esi,%edx) # f[p] = x2[q]
360
flds (%edi,%eax) # push x[q]
363
fstps 4(%esi,%edx) # f[p + 4] = x[q] + x[q + 4]
364
subl $4,%eax # q -= 4
365
subl $8,%edx # p -= 8
366
jge .BackInnerLoop # Jump back if p >= 0
368
fstps L_dummy(%esp) # Pop (XXX is there a better way to do this?)
369
addl %ebp,%esi # f += n
370
addl %ebp,%ebx # x2 += n
371
addl %ebp,%edi # x += n
372
decl L_mi(%esp) # mi--
373
jg .BackMiddleLoop # Jump back if mi > 0
375
movl L_m(%esp),%ebx # ebx = m (temporarily)
376
sarl $1,%ebx # Halve m for next iter
377
jg .BackOuterLoop # Jump back if m > 0
379
#%! addl $L_locals,%esp
383
# Restore regs and return