1
/*---------------------------------------------------------------------------+
4
| Computation of an approximation of the sin function and the cosine |
5
| function by a polynomial. |
7
| Copyright (C) 1992,1993,1994,1997,1999 |
8
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9
| E-mail billm@melbpc.org.au |
12
+---------------------------------------------------------------------------*/
14
#include "exception.h"
15
#include "reg_constant.h"
17
#include "fpu_system.h"
18
#include "control_w.h"
24
static const unsigned long long pos_terms_l[N_COEFF_P] = {
31
static const unsigned long long neg_terms_l[N_COEFF_N] = {
40
static const unsigned long long pos_terms_h[N_COEFF_PH] = {
47
static const unsigned long long neg_terms_h[N_COEFF_NH] = {
54
/*--- poly_sine() -----------------------------------------------------------+
56
+---------------------------------------------------------------------------*/
57
void poly_sine(FPU_REG *st0_ptr)
59
int exponent, echange;
60
Xsig accumulator, argSqrd, argTo4;
61
unsigned long fix_up, adj;
62
unsigned long long fixed_arg;
65
exponent = exponent(st0_ptr);
67
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
69
/* Split into two ranges, for arguments below and above 1.0 */
70
/* The boundary between upper and lower is approx 0.88309101259 */
72
|| ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
73
/* The argument is <= 0.88309101259 */
75
argSqrd.msw = st0_ptr->sigh;
76
argSqrd.midw = st0_ptr->sigl;
78
mul64_Xsig(&argSqrd, &significand(st0_ptr));
79
shr_Xsig(&argSqrd, 2 * (-1 - exponent));
80
argTo4.msw = argSqrd.msw;
81
argTo4.midw = argSqrd.midw;
82
argTo4.lsw = argSqrd.lsw;
83
mul_Xsig_Xsig(&argTo4, &argTo4);
85
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
87
mul_Xsig_Xsig(&accumulator, &argSqrd);
88
negate_Xsig(&accumulator);
90
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
93
shr_Xsig(&accumulator, 2); /* Divide by four */
94
accumulator.msw |= 0x80000000; /* Add 1.0 */
96
mul64_Xsig(&accumulator, &significand(st0_ptr));
97
mul64_Xsig(&accumulator, &significand(st0_ptr));
98
mul64_Xsig(&accumulator, &significand(st0_ptr));
100
/* Divide by four, FPU_REG compatible, etc */
101
exponent = 3 * exponent;
103
/* The minimum exponent difference is 3 */
104
shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
106
negate_Xsig(&accumulator);
107
XSIG_LL(accumulator) += significand(st0_ptr);
109
echange = round_Xsig(&accumulator);
111
setexponentpos(&result, exponent(st0_ptr) + echange);
113
/* The argument is > 0.88309101259 */
114
/* We use sin(st(0)) = cos(pi/2-st(0)) */
116
fixed_arg = significand(st0_ptr);
119
/* The argument is >= 1.0 */
121
/* Put the binary point at the left. */
124
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
125
fixed_arg = 0x921fb54442d18469LL - fixed_arg;
126
/* There is a special case which arises due to rounding, to fix here. */
127
if (fixed_arg == 0xffffffffffffffffLL)
130
XSIG_LL(argSqrd) = fixed_arg;
132
mul64_Xsig(&argSqrd, &fixed_arg);
134
XSIG_LL(argTo4) = XSIG_LL(argSqrd);
135
argTo4.lsw = argSqrd.lsw;
136
mul_Xsig_Xsig(&argTo4, &argTo4);
138
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
140
mul_Xsig_Xsig(&accumulator, &argSqrd);
141
negate_Xsig(&accumulator);
143
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
145
negate_Xsig(&accumulator);
147
mul64_Xsig(&accumulator, &fixed_arg);
148
mul64_Xsig(&accumulator, &fixed_arg);
150
shr_Xsig(&accumulator, 3);
151
negate_Xsig(&accumulator);
153
add_Xsig_Xsig(&accumulator, &argSqrd);
155
shr_Xsig(&accumulator, 1);
157
accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
158
negate_Xsig(&accumulator);
160
/* The basic computation is complete. Now fix the answer to
161
compensate for the error due to the approximation used for
165
/* This has an exponent of -65 */
167
/* The fix-up needs to be improved for larger args */
168
if (argSqrd.msw & 0xffc00000) {
169
/* Get about 32 bit precision in these: */
170
fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
172
fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
174
adj = accumulator.lsw; /* temp save */
175
accumulator.lsw -= fix_up;
176
if (accumulator.lsw > adj)
177
XSIG_LL(accumulator)--;
179
echange = round_Xsig(&accumulator);
181
setexponentpos(&result, echange - 1);
184
significand(&result) = XSIG_LL(accumulator);
185
setsign(&result, getsign(st0_ptr));
186
FPU_copy_to_reg0(&result, TAG_Valid);
189
if ((exponent(&result) >= 0)
190
&& (significand(&result) > 0x8000000000000000LL)) {
191
EXCEPTION(EX_INTERNAL | 0x150);
193
#endif /* PARANOID */
197
/*--- poly_cos() ------------------------------------------------------------+
199
+---------------------------------------------------------------------------*/
200
void poly_cos(FPU_REG *st0_ptr)
203
long int exponent, exp2, echange;
204
Xsig accumulator, argSqrd, fix_up, argTo4;
205
unsigned long long fixed_arg;
208
if ((exponent(st0_ptr) > 0)
209
|| ((exponent(st0_ptr) == 0)
210
&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
211
EXCEPTION(EX_Invalid);
212
FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
215
#endif /* PARANOID */
217
exponent = exponent(st0_ptr);
219
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
222
|| ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
223
/* arg is < 0.687705 */
225
argSqrd.msw = st0_ptr->sigh;
226
argSqrd.midw = st0_ptr->sigl;
228
mul64_Xsig(&argSqrd, &significand(st0_ptr));
231
/* shift the argument right by the required places */
232
shr_Xsig(&argSqrd, 2 * (-1 - exponent));
235
argTo4.msw = argSqrd.msw;
236
argTo4.midw = argSqrd.midw;
237
argTo4.lsw = argSqrd.lsw;
238
mul_Xsig_Xsig(&argTo4, &argTo4);
240
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
242
mul_Xsig_Xsig(&accumulator, &argSqrd);
243
negate_Xsig(&accumulator);
245
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
247
negate_Xsig(&accumulator);
249
mul64_Xsig(&accumulator, &significand(st0_ptr));
250
mul64_Xsig(&accumulator, &significand(st0_ptr));
251
shr_Xsig(&accumulator, -2 * (1 + exponent));
253
shr_Xsig(&accumulator, 3);
254
negate_Xsig(&accumulator);
256
add_Xsig_Xsig(&accumulator, &argSqrd);
258
shr_Xsig(&accumulator, 1);
260
/* It doesn't matter if accumulator is all zero here, the
261
following code will work ok */
262
negate_Xsig(&accumulator);
264
if (accumulator.lsw & 0x80000000)
265
XSIG_LL(accumulator)++;
266
if (accumulator.msw == 0) {
267
/* The result is 1.0 */
268
FPU_copy_to_reg0(&CONST_1, TAG_Valid);
271
significand(&result) = XSIG_LL(accumulator);
273
/* will be a valid positive nr with expon = -1 */
274
setexponentpos(&result, -1);
277
fixed_arg = significand(st0_ptr);
280
/* The argument is >= 1.0 */
282
/* Put the binary point at the left. */
285
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
286
fixed_arg = 0x921fb54442d18469LL - fixed_arg;
287
/* There is a special case which arises due to rounding, to fix here. */
288
if (fixed_arg == 0xffffffffffffffffLL)
294
/* A shift is needed here only for a narrow range of arguments,
295
i.e. for fixed_arg approx 2^-32, but we pick up more... */
296
if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
302
XSIG_LL(argSqrd) = fixed_arg;
304
mul64_Xsig(&argSqrd, &fixed_arg);
307
/* shift the argument right by the required places */
308
shr_Xsig(&argSqrd, 2 * (-1 - exponent));
311
argTo4.msw = argSqrd.msw;
312
argTo4.midw = argSqrd.midw;
313
argTo4.lsw = argSqrd.lsw;
314
mul_Xsig_Xsig(&argTo4, &argTo4);
316
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
318
mul_Xsig_Xsig(&accumulator, &argSqrd);
319
negate_Xsig(&accumulator);
321
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
324
shr_Xsig(&accumulator, 2); /* Divide by four */
325
accumulator.msw |= 0x80000000; /* Add 1.0 */
327
mul64_Xsig(&accumulator, &fixed_arg);
328
mul64_Xsig(&accumulator, &fixed_arg);
329
mul64_Xsig(&accumulator, &fixed_arg);
331
/* Divide by four, FPU_REG compatible, etc */
332
exponent = 3 * exponent;
334
/* The minimum exponent difference is 3 */
335
shr_Xsig(&accumulator, exp2 - exponent);
337
negate_Xsig(&accumulator);
338
XSIG_LL(accumulator) += fixed_arg;
340
/* The basic computation is complete. Now fix the answer to
341
compensate for the error due to the approximation used for
345
/* This has an exponent of -65 */
346
XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
349
/* The fix-up needs to be improved for larger args */
350
if (argSqrd.msw & 0xffc00000) {
351
/* Get about 32 bit precision in these: */
352
fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
353
fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
356
exp2 += norm_Xsig(&accumulator);
357
shr_Xsig(&accumulator, 1); /* Prevent overflow */
359
shr_Xsig(&fix_up, 65 + exp2);
361
add_Xsig_Xsig(&accumulator, &fix_up);
363
echange = round_Xsig(&accumulator);
365
setexponentpos(&result, exp2 + echange);
366
significand(&result) = XSIG_LL(accumulator);
369
FPU_copy_to_reg0(&result, TAG_Valid);
372
if ((exponent(&result) >= 0)
373
&& (significand(&result) > 0x8000000000000000LL)) {
374
EXCEPTION(EX_INTERNAL | 0x151);
376
#endif /* PARANOID */