47
101
__asm__ __volatile__("fldcw %0" : : "m"(cw));
50
void erts_restore_x87(void)
104
static void unmask_sse2(void)
107
__asm__ __volatile__("stmxcsr %0" : "=m"(mxcsr));
108
mxcsr &= ~(0x003F|0x0680); /* clear exn flags, unmask OM, ZM, IM (not PM, UM, DM) */
109
__asm__ __volatile__("ldmxcsr %0" : : "m"(mxcsr));
112
#if defined(__x86_64__) || defined(__DARWIN__)
113
static inline int cpu_has_sse2(void) { return 1; }
114
#else /* !__x86_64__ */
116
* Check if an x86-32 processor has SSE2.
118
static unsigned int xor_eflags(unsigned int mask)
120
unsigned int eax, edx;
122
eax = mask; /* eax = mask */
124
"popl %0\n\t" /* edx = original EFLAGS */
125
"xorl %0, %1\n\t" /* eax = mask ^ EFLAGS */
127
"popfl\n\t" /* new EFLAGS = mask ^ original EFLAGS */
129
"popl %1\n\t" /* eax = new EFLAGS */
130
"xorl %0, %1\n\t" /* eax = new EFLAGS ^ old EFLAGS */
132
"popfl" /* restore original EFLAGS */
133
: "=d"(edx), "=a"(eax)
138
static __inline__ unsigned int cpuid_eax(unsigned int op)
148
static __inline__ unsigned int cpuid_edx(unsigned int op)
150
unsigned int eax, edx;
152
: "=a"(eax), "=d"(edx)
158
/* The AC bit, bit #18, is a new bit introduced in the EFLAGS
159
* register on the Intel486 processor to generate alignment
160
* faults. This bit cannot be set on the Intel386 processor.
162
static __inline__ int is_386(void)
164
return ((xor_eflags(1<<18) >> 18) & 1) == 0;
167
/* Newer x86 processors have a CPUID instruction, as indicated by
168
* the ID bit (#21) in EFLAGS being modifiable.
170
static __inline__ int has_CPUID(void)
172
return (xor_eflags(1<<21) >> 21) & 1;
175
static int cpu_has_sse2(void)
177
unsigned int maxlev, features;
178
static int has_sse2 = -1;
188
maxlev = cpuid_eax(0);
189
/* Intel A-step Pentium had a preliminary version of CPUID.
190
It also didn't have SSE2. */
191
if ((maxlev & 0xFFFFFF00) == 0x0500)
193
/* If max level is zero then CPUID cannot report any features. */
196
features = cpuid_edx(1);
197
has_sse2 = (features & (1 << 26)) != 0;
201
#endif /* !__x86_64__ */
203
static void unmask_fpe(void)
210
void erts_restore_fpu(void)
52
212
__asm__ __volatile__("fninit");
56
#else /* !(__i386__ && __GNUC__) */
216
#elif defined(__sparc__) && defined(__linux__)
218
static void unmask_fpe(void)
222
__asm__("st %%fsr, %0" : "=m"(fsr));
223
fsr &= ~(0x1FUL << 23); /* clear FSR[TEM] field */
224
fsr |= (0x1AUL << 23); /* enable NV, OF, DZ exceptions */
225
__asm__ __volatile__("ld %0, %%fsr" : : "m"(fsr));
228
#elif (defined(__powerpc__) && defined(__linux__)) || (defined(__ppc__) && defined(__DARWIN__))
230
#if defined(__linux__)
231
#include <sys/prctl.h>
233
static void set_fpexc_precise(void)
235
if (prctl(PR_SET_FPEXC, PR_FP_EXC_PRECISE) < 0) {
236
perror("PR_SET_FPEXC");
241
#elif defined(__DARWIN__)
243
#include <mach/mach.h>
248
* 0 0 floating-point exceptions disabled
249
* 0 1 floating-point imprecise nonrecoverable
250
* 1 0 floating-point imprecise recoverable
251
* 1 1 floating-point precise mode
254
* - Darwin 5.5 (MacOS X <= 10.1) starts with FE0 == FE1 == 0,
255
* and resets FE0 and FE1 to 0 after each SIGFPE.
256
* - Darwin 6.0 (MacOS X 10.2) starts with FE0 == FE1 == 1,
257
* and does not reset FE0 or FE1 after a SIGFPE.
259
#define FE0_MASK (1<<11)
260
#define FE1_MASK (1<<8)
262
/* a thread cannot get or set its own MSR bits */
263
static void *fpu_fpe_enable(void *arg)
265
thread_t t = *(thread_t*)arg;
266
struct ppc_thread_state state;
267
unsigned int state_size = PPC_THREAD_STATE_COUNT;
269
if (thread_get_state(t, PPC_THREAD_STATE, (natural_t*)&state, &state_size) != KERN_SUCCESS) {
270
perror("thread_get_state");
273
if ((state.srr1 & (FE1_MASK|FE0_MASK)) != (FE1_MASK|FE0_MASK)) {
275
/* This would also have to be performed in the SIGFPE handler
276
to work around the MSR reset older Darwin releases do. */
277
state.srr1 |= (FE1_MASK|FE0_MASK);
278
thread_set_state(t, PPC_THREAD_STATE, (natural_t*)&state, state_size);
280
fprintf(stderr, "srr1 == 0x%08x, your Darwin is too old\n", state.srr1);
284
return NULL; /* Ok, we appear to be on Darwin 6.0 or later */
287
static void set_fpexc_precise(void)
289
thread_t self = mach_thread_self();
292
if (pthread_create(&enabler, NULL, fpu_fpe_enable, &self)) {
293
perror("pthread_create");
294
} else if (pthread_join(enabler, NULL)) {
295
perror("pthread_join");
301
static void set_fpscr(unsigned int fpscr)
305
unsigned int fpscr[2];
307
u.fpscr[0] = 0xFFF80000;
309
__asm__ __volatile__("mtfsf 255,%0" : : "f"(u.d));
312
static void unmask_fpe(void)
315
set_fpscr(0x80|0x40|0x10); /* VE, OE, ZE; not UE or XE */
58
320
#define unmask_fpe() fpsetmask(FP_X_INV | FP_X_OFL | FP_X_DZ)
60
#endif /* __i386__ && __GNUC__ */
324
#if (defined(__linux__) && (defined(__x86_64__) || defined(__i386__))) || (defined(__DARWIN__) && defined(__i386__)) || (defined(__FreeBSD__) && defined(__x86_64__)) || (defined(__sun__) && defined(__x86_64__))
325
#include <ucontext.h>
327
#if defined(__linux__) && defined(__x86_64__)
328
#define mc_pc(mc) ((mc)->gregs[REG_RIP])
329
typedef mcontext_t *erts_mcontext_ptr_t;
330
#elif defined(__linux__) && defined(__i386__)
331
#define mc_pc(mc) ((mc)->gregs[REG_EIP])
332
typedef mcontext_t *erts_mcontext_ptr_t;
333
#elif defined(__DARWIN__) && defined(__i386__)
334
#define mc_pc(mc) ((mc)->ss.eip)
335
typedef mcontext_t erts_mcontext_ptr_t;
336
#elif defined(__FreeBSD__) && defined(__x86_64__)
337
#define mc_pc(mc) ((mc)->mc_rip)
338
typedef mcontext_t *erts_mcontext_ptr_t;
339
#elif defined(__sun__) && defined(__x86_64__)
340
#define mc_pc(mc) ((mc)->gregs[REG_RIP])
341
typedef mcontext_t *erts_mcontext_ptr_t;
344
static void skip_sse2_insn(erts_mcontext_ptr_t mc)
346
unsigned char *pc0 = (unsigned char*)mc_pc(mc);
347
unsigned char *pc = pc0;
349
unsigned int nr_skip_bytes;
353
case 0x66: case 0xF2: case 0xF3:
356
#if defined(__x86_64__)
357
if ((opcode & 0xF0) == 0x40)
365
case 0x2A: /* cvtpi2ps,cvtsi2sd,cvtsi2ss /r */
366
case 0x2C: /* cvttpd2pi,cvttps2pi,cvttsd2si,cvtss2si /r */
367
case 0x2D: /* cvtpd2pi,cvtps2pi,cvtsd2si,cvtss2si /r */
368
case 0x2E: /* ucomisd,ucomiss /r */
369
case 0x2F: /* comisd,comiss /r */
370
case 0x51: /* sqrtpd,sqrtps,sqrtsd,sqrtss /r */
371
case 0x58: /* addpd,addps,addsd,addss /r */
372
case 0x59: /* mulpd,mulps,mulsd,mulss /r */
373
case 0x5A: /* cvtpd2ps,cvtps2pd,cvtsd2ss,cvtss2sd /r */
374
case 0x5B: /* cvtdq2ps,cvtps2dq,cvttps2dq /r */
375
case 0x5C: /* subpd,subps,subsd,subss /r */
376
case 0x5D: /* minpd,minps,minsd,minss /r */
377
case 0x5E: /* divpd,divps,divsd,divss /r */
378
case 0x5F: /* maxpd,maxps,maxsd,maxss /r */
379
case 0xE6: /* cvtpd2dq,cvttpd2dq /r */
382
case 0xC2: /* cmppd,cmpps,cmpsd,cmpss /r /ib */
387
fprintf(stderr, "%s: unexpected code at %p:", __FUNCTION__, pc0);
389
fprintf(stderr, " %02X", *pc0++);
391
fprintf(stderr, "\r\n");
395
/* Past the opcode. Parse and skip the mod/rm and sib bytes. */
397
switch ((opcode >> 6) & 3) { /* inspect mod */
399
switch (opcode & 7) { /* inspect r/m */
401
opcode = *pc++; /* sib */
402
switch (opcode & 7) { /* inspect base */
404
nr_skip_bytes += 4; /* disp32 */
409
nr_skip_bytes += 4; /* disp32 */
414
nr_skip_bytes += 1; /* disp8 */
415
switch (opcode & 7) { /* inspect r/m */
422
nr_skip_bytes += 4; /* disp32 */
423
switch (opcode & 7) { /* inspect r/m */
433
/* Past mod/rm and sib. Skip any disp, and /ib for cmp{pd,ps,sd,ss}. */
436
/* The longest instruction handled above is 11 bytes. So there is
437
no need to check the 15-byte instruction length limit here. */
440
mc_pc(mc) = (long)pc;
442
#endif /* (__linux__ && (__x86_64__ || __i386__)) || (__DARWIN__ && __i386__) || (__FreeBSD__ && __x86_64__) || (__sun__ && __x86_64__) */
444
#if (defined(__linux__) && (defined(__i386__) || defined(__x86_64__) || defined(__sparc__) || defined(__powerpc__))) || (defined(__DARWIN__) && (defined(__i386__) || defined(__ppc__))) || (defined(__FreeBSD__) && defined(__x86_64__)) || (defined(__sun__) && defined(__x86_64__))
62
446
#if defined(__linux__) && defined(__i386__)
447
#include <asm/sigcontext.h>
448
#elif defined(__FreeBSD__) && defined(__x86_64__)
449
#include <sys/types.h>
450
#include <machine/fpu.h>
64
452
#include <ucontext.h>
66
455
static void fpe_sig_action(int sig, siginfo_t *si, void *puc)
68
457
ucontext_t *uc = puc;
69
mcontext_t *mc = &uc->uc_mcontext;
70
fpregset_t fpstate = mc->fpregs;
458
#if defined(__linux__)
459
#if defined(__x86_64__)
460
mcontext_t *mc = &uc->uc_mcontext;
461
fpregset_t fpstate = mc->fpregs;
462
/* A failed SSE2 instruction will restart. To avoid
463
looping, we must update RIP to skip the instruction
464
(leaving garbage in the destination).
465
The alternative is to mask SSE2 exceptions now and
466
unmask them again later in erts_check_fpe(), but that
467
relies too much on other code being cooperative. */
468
if (fpstate->mxcsr & 0x000D) { /* OE|ZE|IE; see unmask_sse2() */
469
fpstate->mxcsr &= ~(0x003F|0x0680);
472
fpstate->swd &= ~0xFF;
473
#elif defined(__i386__)
474
mcontext_t *mc = &uc->uc_mcontext;
475
fpregset_t fpstate = mc->fpregs;
476
if ((fpstate->status >> 16) == X86_FXSR_MAGIC &&
477
((struct _fpstate*)fpstate)->mxcsr & 0x000D) {
478
((struct _fpstate*)fpstate)->mxcsr &= ~(0x003F|0x0680);
481
fpstate->sw &= ~0xFF;
482
#elif defined(__sparc__)
483
/* on SPARC the 3rd parameter points to a sigcontext not a ucontext */
484
struct sigcontext *sc = (struct sigcontext*)puc;
485
sc->si_regs.pc = sc->si_regs.npc;
486
sc->si_regs.npc = (unsigned long)sc->si_regs.npc + 4;
487
#elif defined(__powerpc__)
488
#if defined(__powerpc64__)
489
mcontext_t *mc = &uc->uc_mcontext;
490
unsigned long *regs = &mc->gp_regs[0];
492
mcontext_t *mc = uc->uc_mcontext.uc_regs;
493
unsigned long *regs = &mc->gregs[0];
496
regs[PT_FPSCR] = 0x80|0x40|0x10; /* VE, OE, ZE; not UE or XE */
498
#elif defined(__DARWIN__) && defined(__i386__)
499
mcontext_t mc = uc->uc_mcontext;
500
if (mc->fs.fpu_mxcsr & 0x000D) {
501
mc->fs.fpu_mxcsr &= ~(0x003F|0x0680);
504
*(unsigned short *)&mc->fs.fpu_fsw &= ~0xFF;
505
#elif defined(__DARWIN__) && defined(__ppc__)
506
mcontext_t mc = uc->uc_mcontext;
508
mc->fs.fpscr = 0x80|0x40|0x10;
509
#elif defined(__FreeBSD__) && defined(__x86_64__)
510
mcontext_t *mc = &uc->uc_mcontext;
511
struct savefpu *savefpu = (struct savefpu*)&mc->mc_fpstate;
512
struct envxmm *envxmm = &savefpu->sv_env;
513
if (envxmm->en_mxcsr & 0x000D) {
514
envxmm->en_mxcsr &= ~(0x003F|0x0680);
517
envxmm->en_sw &= ~0xFF;
518
#elif defined(__sun__) && defined(__x86_64__)
519
mcontext_t *mc = &uc->uc_mcontext;
520
struct fpchip_state *fpstate = &mc->fpregs.fp_reg_set.fpchip_state;
521
if (fpstate->mxcsr & 0x000D) {
522
fpstate->mxcsr &= ~(0x003F|0x0680);
525
fpstate->sw &= ~0xFF;
527
set_current_fp_exception();
75
void erts_sys_init_float(void)
530
static void erts_thread_catch_fp_exceptions(void)
77
532
struct sigaction act;
78
533
memset(&act, 0, sizeof act);
85
#else /* !(__linux__ && __i386__) */
540
#else /* !((__linux__ && (__i386__ || __x86_64__ || __powerpc__)) || (__DARWIN__ && (__i386__ || __ppc__))) */
87
542
static void fpe_sig_handler(int sig)
544
set_current_fp_exception();
93
erts_sys_init_float(void)
547
static void erts_thread_catch_fp_exceptions(void)
95
549
sys_sigset(SIGFPE, fpe_sig_handler);
99
#endif /* __linux__ && __i386__ */
553
#endif /* (__linux__ && (__i386__ || __x86_64__ || __powerpc__)) || (__DARWIN__ && (__i386__ || __ppc__))) */
555
/* once-only initialisation early in the main thread */
556
void erts_sys_init_float(void)
558
erts_init_fp_exception();
559
erts_thread_catch_fp_exceptions();
101
562
#endif /* NO_FPE_SIGNALS */
564
void erts_thread_init_float(void)
567
/* This allows Erlang schedulers to leave Erlang-process context
568
and still have working FP exceptions. XXX: is this needed? */
569
erts_thread_init_fp_exception();
572
#if !defined(NO_FPE_SIGNALS) && (defined(__DARWIN__) || defined(__FreeBSD__))
573
/* Darwin (7.9.0) does not appear to propagate FP exception settings
574
to a new thread from its parent. So if we want FP exceptions, we
575
must manually re-enable them in each new thread.
576
FreeBSD 6.1 appears to suffer from a similar issue. */
577
erts_thread_catch_fp_exceptions();
581
/* The following check is incorporated from the Vee machine */
583
#define ISDIGIT(d) ((d) >= '0' && (d) <= '9')
104
586
** Convert a double to ascii format 0.dddde[+|-]ddd
105
587
** return number of characters converted
589
** These two functions should maybe use localeconv() to pick up
590
** the current radix character, but since it is uncertain how
591
** expensive such a system call is, and since no-one has heard
592
** of other radix characters than '.' and ',' an ad-hoc
593
** low execution time solution is used instead.
109
sys_double_to_chars(double fp, char* buf)
597
sys_double_to_chars(double fp, char *buf)
111
601
(void) sprintf(buf, "%.20e", fp);
602
/* Search upto decimal point */
603
if (*s == '+' || *s == '-') s++;
604
while (ISDIGIT(*s)) s++;
605
if (*s == ',') *s++ = '.'; /* Replace ',' with '.' */
606
/* Scan to end of string */
608
return s-buf; /* i.e strlen(buf) */
115
611
/* Float conversion */