144
146
the longbool for checkoverflow avoids a misaligned stack
146
148
function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}
149
assembler; nostackframe;
149
151
// (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7
152
or. r8,r3,r5 // are both msw's 0?
154
or. r10,r3,r5 // are both msw's 0?
153
155
mulhwu r8,r4,r6 // msw of product of lsw's
156
subi r0,r7,1 // if no overflowcheck, r0 := $ffffffff, else r0 := 0;
157
beq .LDone // if both msw's are zero, skip cross products
158
mullw r9,r4,r5 // lsw of first cross-product
154
159
cntlzw r11,r3 // count leading zeroes of msw1
155
160
cntlzw r12,r5 // count leading zeroes of msw2
156
subi r0,r7,1 // if no overflowcheck, r0 := $ffffffff, else r0 := 0;
161
mullw r7,r3,r6 // lsw of second cross-product
162
add r12,r11,r12 // sum of leading zeroes
158
add r9,r11,r12 // sum of leading zeroes
159
or r0,r9,r0 // maximise sum if no overflow checking, otherwise it remains
164
or r0,r12,r0 // maximise sum if no overflow checking, otherwise it remains
160
166
cmplwi cr1,r0,64 // >= 64 leading zero bits in total? If so, no overflow
161
beq .Lmsw_zero // if both msw's are zero, skip cross products
162
mullw r7,r3,r6 // lsw of first cross-product
163
167
add r8,r8,r7 // add
164
mullw r5,r4,r5 // lsw of second cross-product
167
168
bge+ cr1,.LDone // if the sum of leading zero's >= 64 (or checkoverflow was 0)
168
169
// there's no overflow, otherwise more thorough check
172
addc r7,r7,r10 // add the msw of the product of the lsw's, record carry
174
cntlzw r10,r4 // get leading zeroes count of lsw f1
169
177
subfic r0,r11,31 // if msw f1 = 0, then r0 := -1, else r0 >= 0
170
cntlzw r3,r4 // get leading zeroes count of lsw f1
179
subfic r11,r9,31 // same for f2
171
180
srawi r0,r0,31 // if msw f1 = 0, then r0 := 1, else r0 := 0
172
subfic r11,r12,31 // same for f2
175
and r3,r3,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0
176
and r12,r12,r11 // same for f2
177
add r9,r9,r3 // add leading zero counts of lsw's to sum if appropriate
182
and r10,r10,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0
183
and r9,r7,r11 // same for f2
185
add r9,r9,r10 // add leading zero counts of lsw's to sum if appropriate
179
cmplwi r9,64 // is the sum now >= 64?
187
cmplwi cr7,r9,64 // is the sum now >= 64?
180
188
cmplwi cr1,r9,62 // or <= 62?
181
bge+ .LDone // >= 64 leading zeroes -> no overflow
189
bge+ cr7,.LDone // >= 64 leading zeroes -> no overflow
182
190
ble+ cr1,.LOverflow // <= 62 leading zeroes -> overflow
183
191
// for 63 zeroes, we need additional checks
184
add r9,r7,r5 // sum of lsw's cross products can't produce a carry,
192
// sum of lsw's cross products can't have produced a carry,
185
193
// because the sum of leading zeroes is 63 -> at least
186
194
// one of these cross products is 0
188
addc r9,r9,r10 // add the msw of the product of the lsw's