1
/* -*- Mode: Asm -*- */
3
sqrt.S is part of FPlib V 0.3.0 ported to avr-as
4
for copyright and details see readme.fplib
6
*----------------------------------------------------------------------------------------
20
RJMP _U(__fp_nanEDOM) ; sign bit is set -> argument range error
22
/* A = sign(=0):(exp+7F):[1].mant = 2^(exp)*1.mant
23
* = (LSB(exp)+7F) + (exp>>1) + (exp>>1):[1].mant = (2^(exp>>1))^2 * 2^(LSB(exp))*1.mant
25
* A' = [2^0*1.0... 2^1*1.1111111] = [1.0...4.0[
27
* sqrt(A) = 2^(exp>>1) * sqrt( A' )
28
* sqrt(A') = [1.0...2.0[ -> result of srqt(A') has definetely exponent 7F! -> exp(X0) = 7F
30
* the matissa of X0 is taken as ( 0.mant >> 1 ) 0.0mant
31
* plus if LSB(exp)==1 [LSB(exp-7F)==0] 0.100000
32
* plus the implicit one 1.000000
35
MOV rB2,rA2 ; needed later on
36
RCALL _U(__fp_split1) ; does not return on NaN
38
BREQ _sqrt_100 ; sqrt(0) = 0
42
ASR rTI0 ; this is dExp!
45
PUSH rTI0 ; exponent offset
46
SUBI rB2,0x80 ; == EOR 0x80
48
CLR rB1 ; a slightly smaller X(0) than the calculated is better
49
CLR rB0 ; and saves 2 right shifts
50
ORI rB2,0x80 ; implicit one
51
LDI rB3,0x7F ; load exponent 7F
58
PUSH rS3 ; rS3::rS0 = Abak
63
PUSH rS7 ; rS7::rS4 = Bbak
68
MOV rS0,rA0 ; Abak = A
73
MOV rA3,rS3 ; A = Abak
78
MOV rS4,rB0 ; Bbak = B
80
; divsf3x does not need preset rAE,rBE
81
RCALL _U(__divsf3x) ; FP1X = arg/xn
82
; now rAE = rBE = 0 or 80 or 0xFF ,ok for addsf3x
87
MOV rB0,rS4 ; B = Bbak
89
CLR rT1c ; addx uses R1.7 as sign
90
RCALL _U(__addsf3x) ; FP1X = arg/xn + xn
91
DEC rA3 ; div by 2 := Xn+1
101
CPC rS7,rB3 ; cmp B to Bbak
116
CLR rT0 ; no rounding beyond rAE