1
dnl Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
3
dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
5
dnl This file is part of the GNU MP Library.
7
dnl The GNU MP Library is free software; you can redistribute it and/or
8
dnl modify it under the terms of the GNU Lesser General Public License as
9
dnl published by the Free Software Foundation; either version 2.1 of the
10
dnl License, or (at your option) any later version.
12
dnl The GNU MP Library is distributed in the hope that it will be useful,
13
dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14
dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
dnl Lesser General Public License for more details.
17
dnl You should have received a copy of the GNU Lesser General Public
18
dnl License along with the GNU MP Library; see the file COPYING.LIB. If
19
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
20
dnl Suite 330, Boston, MA 02111-1307, USA.
22
include(`../config.m4')
25
C P5: 1.66 cycles/limb
28
C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
31
defframe(PARAM_SIZE, 8)
32
defframe(PARAM_SRC, 4)
36
PROLOGUE(mpn_mod_34lsub1)
79
pushl %ebx FRAME_pushl()
80
pushl %esi FRAME_pushl()
82
pushl %edi FRAME_pushl()
83
pushl %ebp FRAME_pushl()
85
xorl %esi, %esi C 0mod3
86
xorl %edi, %edi C 1mod3
88
xorl %ebp, %ebp C 2mod3, and clear carry
115
C ecx is -2, -1 or 0, representing 0, 1 or 2 more limbs, respectively
117
movl $0xFFFFFFFF, %ebx C mask
120
js L(combine) C 0 more
123
movl $0xFFFFFF00, %ebx
128
js L(combine) C 1 more
131
movl $0xFFFF0000, %ebx
146
sbbl %ecx, %ecx C carry
147
movl %esi, %eax C 0mod3
149
andl %ebx, %ecx C masked for position
150
andl $0xFFFFFF, %eax C 0mod3 low
152
shrl $24, %esi C 0mod3 high
153
subl %ecx, %eax C apply carry
155
addl %esi, %eax C apply 0mod3
156
movl %edi, %ebx C 1mod3
158
shrl $16, %edi C 1mod3 high
159
andl $0x0000FFFF, %ebx
161
shll $8, %ebx C 1mod3 low
162
addl %edi, %eax C apply 1mod3 high
164
addl %ebx, %eax C apply 1mod3 low
165
movl %ebp, %ebx C 2mod3
167
shrl $8, %ebp C 2mod3 high
170
shll $16, %ebx C 2mod3 low
171
addl %ebp, %eax C apply 2mod3 high
173
addl %ebx, %eax C apply 2mod3 low