1
dnl x86 mpn_divexact_1 -- mpn by limb exact division.
3
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
5
dnl This file is part of the GNU MP Library.
7
dnl The GNU MP Library is free software; you can redistribute it and/or
8
dnl modify it under the terms of the GNU Lesser General Public License as
9
dnl published by the Free Software Foundation; either version 2.1 of the
10
dnl License, or (at your option) any later version.
12
dnl The GNU MP Library is distributed in the hope that it will be useful,
13
dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14
dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
dnl Lesser General Public License for more details.
17
dnl You should have received a copy of the GNU Lesser General Public
18
dnl License along with the GNU MP Library; see the file COPYING.LIB. If
19
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
20
dnl Suite 330, Boston, MA 02111-1307, USA.
22
include(`../config.m4')
28
C P6 13.0 odd divisor, 12.0 even (strangely)
34
C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
38
defframe(PARAM_DIVISOR,16)
39
defframe(PARAM_SIZE, 12)
40
defframe(PARAM_SRC, 8)
41
defframe(PARAM_DST, 4)
43
dnl re-use parameter space
44
define(VAR_INVERSE,`PARAM_SRC')
49
PROLOGUE(mpn_divexact_1)
52
movl PARAM_DIVISOR, %eax
53
pushl %ebp FRAME_pushl()
56
pushl %edi FRAME_pushl()
58
pushl %ebx FRAME_pushl()
59
movl $-1, %ecx C shift count
61
pushl %esi FRAME_pushl()
69
leal 1(%eax,%eax), %ebx C d without twos
70
andl $127, %eax C d/2, 7 bits
74
addl $_GLOBAL_OFFSET_TABLE_, %edx
75
movl modlimb_invert_table@GOT(%edx), %edx
76
movzbl (%eax,%edx), %eax C inv 8 bits
79
movzbl modlimb_invert_table(%eax), %eax C inv 8 bits
82
leal (%eax,%eax), %edx C 2*inv
83
movl %ebx, PARAM_DIVISOR C d without twos
85
imull %eax, %eax C inv*inv
90
imull %ebx, %eax C inv*inv*d
92
subl %eax, %edx C inv = 2*inv - inv*inv*d
93
leal (%edx,%edx), %eax C 2*inv
95
imull %edx, %edx C inv*inv
97
leal (%esi,%ebp,4), %esi C src end
98
leal (%edi,%ebp,4), %edi C dst end
101
imull %ebx, %edx C inv*inv*d
103
subl %edx, %eax C inv = 2*inv - inv*inv*d
105
ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
106
pushl %eax FRAME_pushl()
107
imull PARAM_DIVISOR, %eax
109
popl %eax FRAME_popl()')
111
movl %eax, VAR_INVERSE
112
movl (%esi,%ebp,4), %eax C src[0]
120
movl (%esi,%ebp,4), %edx C src[1]
122
shrdl( %cl, %edx, %eax)
124
movl VAR_INVERSE, %edx
129
nop C k6 code alignment
133
C ebx carry bit, 0 or -1
138
C ebp counter, limbs, negative
140
movl -4(%esi,%ebp,4), %eax
141
subl %ebx, %edx C accumulate carry bit
143
movl (%esi,%ebp,4), %ebx
145
shrdl( %cl, %ebx, %eax)
147
subl %edx, %eax C apply carry limb
148
movl VAR_INVERSE, %edx
155
movl %eax, -4(%edi,%ebp,4)
156
movl PARAM_DIVISOR, %edx
164
movl -4(%esi), %eax C src high limb
167
popl %esi FRAME_popl()
169
addl %ebx, %eax C apply carry bit
170
popl %ebx FRAME_popl()
172
subl %edx, %eax C apply carry limb
174
imull VAR_INVERSE, %eax