~ubuntu-branches/ubuntu/dapper/fpc/dapper

function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}

147

assembler;

149

assembler; nostackframe;

148

150

asm

149

151

// (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7

150

152

// res f1 f2

151

152

or. r8,r3,r5 // are both msw's 0?

153

154

or. r10,r3,r5 // are both msw's 0?

153

155

mulhwu r8,r4,r6 // msw of product of lsw's

156

subi r0,r7,1 // if no overflowcheck, r0 := $ffffffff, else r0 := 0;

157

beq .LDone // if both msw's are zero, skip cross products

158

mullw r9,r4,r5 // lsw of first cross-product

154

159

cntlzw r11,r3 // count leading zeroes of msw1

155

160

cntlzw r12,r5 // count leading zeroes of msw2

156

subi r0,r7,1 // if no overflowcheck, r0 := $ffffffff, else r0 := 0;

161

mullw r7,r3,r6 // lsw of second cross-product

162

add r12,r11,r12 // sum of leading zeroes

157

163

mr r10,r8

158

add r9,r11,r12 // sum of leading zeroes

159

or r0,r9,r0 // maximise sum if no overflow checking, otherwise it remains

164

or r0,r12,r0 // maximise sum if no overflow checking, otherwise it remains

165

add r8,r8,r9 // add

160

166

cmplwi cr1,r0,64 // >= 64 leading zero bits in total? If so, no overflow

161

beq .Lmsw_zero // if both msw's are zero, skip cross products

162

mullw r7,r3,r6 // lsw of first cross-product

163

167

add r8,r8,r7 // add

164

mullw r5,r4,r5 // lsw of second cross-product

165

add r8,r8,r5 // add

166

.Lmsw_zero:

167

168

bge+ cr1,.LDone // if the sum of leading zero's >= 64 (or checkoverflow was 0)

168

169

// there's no overflow, otherwise more thorough check

170

add r7,r7,r9

171

mulhwu r3,r6,r3

172

addc r7,r7,r10 // add the msw of the product of the lsw's, record carry

173

cntlzw r9,r5

174

cntlzw r10,r4 // get leading zeroes count of lsw f1

175

mulhwu r5,r4,r5

176

addze r3,r3

169

177

subfic r0,r11,31 // if msw f1 = 0, then r0 := -1, else r0 >= 0

170

cntlzw r3,r4 // get leading zeroes count of lsw f1

178

cntlzw r7,r6

179

subfic r11,r9,31 // same for f2

171

180

srawi r0,r0,31 // if msw f1 = 0, then r0 := 1, else r0 := 0

172

subfic r11,r12,31 // same for f2

173

cntlzw r12,r6

174

181

srawi r11,r11,31

175

and r3,r3,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0

176

and r12,r12,r11 // same for f2

177

add r9,r9,r3 // add leading zero counts of lsw's to sum if appropriate

182

and r10,r10,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0

183

and r9,r7,r11 // same for f2

184

or. r5,r5,r3

185

add r9,r9,r10 // add leading zero counts of lsw's to sum if appropriate

178

186

add r9,r9,r12

179

cmplwi r9,64 // is the sum now >= 64?

187

cmplwi cr7,r9,64 // is the sum now >= 64?

180

188

cmplwi cr1,r9,62 // or <= 62?

181

bge+ .LDone // >= 64 leading zeroes -> no overflow

189

bge+ cr7,.LDone // >= 64 leading zeroes -> no overflow

182

190

ble+ cr1,.LOverflow // <= 62 leading zeroes -> overflow

183

191

// for 63 zeroes, we need additional checks

184

add r9,r7,r5 // sum of lsw's cross products can't produce a carry,

192

// sum of lsw's cross products can't have produced a carry,

185

193

// because the sum of leading zeroes is 63 -> at least

186

194

// one of these cross products is 0

187

li r0, 0

188

addc r9,r9,r10 // add the msw of the product of the lsw's

189

addze. r0,r0

190

195

beq+ .LDone

191

196

.LOverflow:

192

197

b FPC_OVERFLOW

198

203

199

204

{

200

205

$Log: int64p.inc,v $

201

Revision 1.4 2004/05/29 21:35:54 jonas

202

* fixed overflow checking for qword multiplication

203

204

Revision 1.3 2004/01/12 21:35:51 jonas

205

+ assembler FPC_MUL_QWORD routine

206

207

Revision 1.2 2004/01/12 18:03:30 jonas

208

+ ppc implementation of fpc_mod/div_qword (from ppc compiler writers guide)

209

210

Revision 1.1 2003/09/14 11:34:13 peter

211

* moved int64 asm code to int64p.inc

212

* save ebx,esi

206

Revision 1.9 2005/03/11 12:41:41 jonas

207

* mini scheduling optimization

208

209

Revision 1.8 2005/02/19 14:16:02 jonas

210

* fixed overflow detection, + some small optimizations

211

212

Revision 1.7 2005/02/14 17:13:31 peter

213

* truncate log

213

214

215

}

Older »