3
#include <stddef.h> /* offsetof() */
11
#include "big.h" /* term_to_Sint() */
13
#include "hipe_arch.h"
14
#include "hipe_bif0.h"
15
#include "hipe_native_bif.h" /* nbif_callemu() */
20
#include "hipe_literals.h"
22
const Uint sse2_fnegate_mask[2] = {0x8000000000000000,0};
24
void hipe_patch_load_fe(Uint64 *address, Uint64 value)
26
/* address points to an imm64 operand */
28
hipe_flush_icache_word(address);
31
int hipe_patch_insn(void *address, Uint64 value, Eterm type)
36
*(Uint64*)address = value;
40
/* check that value fits in an unsigned imm32 */
41
/* XXX: are we sure it's not really a signed imm32? */
42
if ((Uint)(Uint32)value != value)
44
*(Uint32*)address = (Uint32)value;
49
hipe_flush_icache_word(address);
53
int hipe_patch_call(void *callAddress, void *destAddress, void *trampoline)
59
rel32 = (Sint)destAddress - (Sint)callAddress - 4;
60
if ((Sint)(Sint32)rel32 != rel32)
62
*(Uint32*)callAddress = (Uint32)rel32;
63
hipe_flush_icache_word(callAddress);
68
* Memory allocator for executable code.
70
* This is required on AMD64 because some Linux kernels
71
* (including 2.6.10-rc1 and newer www.kernel.org ones)
72
* default to non-executable memory mappings, causing
73
* ordinary malloc() memory to be non-executable.
75
* Implementing this properly also allows us to ensure that
76
* executable code ends up in the low 2GB of the address space,
77
* as required by HiPE/AMD64's small code model.
79
static unsigned int code_bytes;
80
static char *code_next;
82
#if 0 /* change to non-zero to get allocation statistics at exit() */
83
static unsigned int total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs, nr_large, total_lost;
84
static unsigned int atexit_done;
86
static void alloc_code_stats(void)
88
printf("\r\nalloc_code_stats: %u bytes mapped, %u joins, %u splits, %u bytes allocated, %u average alloc, %u large allocs, %u bytes lost\r\n",
89
total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs ? total_alloc/nr_allocs : 0, nr_large, total_lost);
92
static void atexit_alloc_code_stats(void)
96
(void)atexit(alloc_code_stats);
100
#define ALLOC_CODE_STATS(X) do{X;}while(0)
102
#define ALLOC_CODE_STATS(X) do{}while(0)
105
/* FreeBSD 6.1 breakage */
106
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
107
#define MAP_ANONYMOUS MAP_ANON
110
static void morecore(unsigned int alloc_bytes)
112
unsigned int map_bytes;
113
char *map_hint, *map_start;
115
/* Page-align the amount to allocate. */
116
map_bytes = (alloc_bytes + 4095) & ~4095;
118
/* Round up small allocations. */
119
if (map_bytes < 1024*1024)
120
map_bytes = 1024*1024;
122
ALLOC_CODE_STATS(++nr_large);
124
/* Create a new memory mapping, ensuring it is executable
125
and in the low 2GB of the address space. Also attempt
126
to make it adjacent to the previous mapping. */
127
map_hint = code_next + code_bytes;
128
#if !defined(MAP_32BIT)
129
/* FreeBSD doesn't have MAP_32BIT, and it doesn't respect
130
a plain map_hint (returns high mappings even though the
131
hint refers to a free area), so we have to use both map_hint
132
and MAP_FIXED to get addresses below the 2GB boundary.
133
This is even worse than the Linux/ppc64 case.
134
Similarly, Solaris 10 doesn't have MAP_32BIT,
135
and it doesn't respect a plain map_hint. */
136
if (!map_hint) /* first call */
137
map_hint = (char*)(512*1024*1024); /* 0.5GB */
139
if ((unsigned long)map_hint & 4095)
141
map_start = mmap(map_hint, map_bytes,
142
PROT_EXEC|PROT_READ|PROT_WRITE,
143
MAP_PRIVATE|MAP_ANONYMOUS
144
#if defined(MAP_32BIT)
146
#elif defined(__FreeBSD__) || defined(__sun__)
151
ALLOC_CODE_STATS(fprintf(stderr, "%s: mmap(%p,%u,...) == %p\r\n", __FUNCTION__, map_hint, map_bytes, map_start));
152
#if !defined(MAP_32BIT)
153
if (map_start != MAP_FAILED &&
154
(((unsigned long)map_start + (map_bytes-1)) & ~0x7FFFFFFFUL)) {
155
fprintf(stderr, "mmap with hint %p returned code memory %p\r\n", map_hint, map_start);
159
if (map_start == MAP_FAILED) {
163
ALLOC_CODE_STATS(total_mapped += map_bytes);
165
/* Merge adjacent mappings, so the trailing portion of the previous
166
mapping isn't lost. In practice this is quite successful. */
167
if (map_start == map_hint) {
168
ALLOC_CODE_STATS(++nr_joins);
169
code_bytes += map_bytes;
170
#if !defined(MAP_32BIT)
171
if (!code_next) /* first call */
172
code_next = map_start;
175
ALLOC_CODE_STATS(++nr_splits);
176
ALLOC_CODE_STATS(total_lost += code_bytes);
177
code_next = map_start;
178
code_bytes = map_bytes;
181
ALLOC_CODE_STATS(atexit_alloc_code_stats());
184
static void *alloc_code(unsigned int alloc_bytes)
188
/* Align function entries. */
189
alloc_bytes = (alloc_bytes + 3) & ~3;
191
if (code_bytes < alloc_bytes)
192
morecore(alloc_bytes);
193
ALLOC_CODE_STATS(++nr_allocs);
194
ALLOC_CODE_STATS(total_alloc += alloc_bytes);
196
code_next += alloc_bytes;
197
code_bytes -= alloc_bytes;
201
void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)
203
if (is_not_nil(callees))
206
return alloc_code(nrbytes);
209
/* called from hipe_bif0.c:hipe_bifs_make_native_stub_2()
210
and hipe_bif0.c:hipe_make_stub() */
211
void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
214
* This creates a native code stub with the following contents:
216
* movq $Address, P_BEAM_IP(%ebp) %% Actually two movl
217
* movb $Arity, P_ARITY(%ebp)
220
* The stub has variable size, depending on whether the P_BEAM_IP
221
* and P_ARITY offsets fit in 8-bit signed displacements or not.
222
* The rel32 offset in the final jmp depends on its actual location,
223
* which also depends on the size of the previous instructions.
224
* Arity is stored with a movb because (a) Bj�rn tells me arities
225
* are <= 255, and (b) a movb is smaller and faster than a movl.
227
unsigned int codeSize;
228
unsigned char *code, *codep;
229
unsigned int callEmuOffset;
231
codeSize = /* 23, 26, 29, or 32 bytes */
232
23 + /* 23 when all offsets are 8-bit */
233
(P_BEAM_IP >= 128 ? 3 : 0) +
234
((P_BEAM_IP + 4) >= 128 ? 3 : 0) +
235
(P_ARITY >= 128 ? 3 : 0);
236
codep = code = alloc_code(codeSize);
238
/* movl $beamAddress, P_BEAM_IP(%ebp); 3 or 6 bytes, plus 4 */
241
codep[1] = 0x85; /* disp32[EBP] */
242
codep[2] = P_BEAM_IP & 0xFF;
243
codep[3] = (P_BEAM_IP >> 8) & 0xFF;
244
codep[4] = (P_BEAM_IP >> 16) & 0xFF;
245
codep[5] = (P_BEAM_IP >> 24) & 0xFF;
248
codep[1] = 0x45; /* disp8[EBP] */
249
codep[2] = P_BEAM_IP;
252
codep[0] = ((unsigned long)beamAddress ) & 0xFF;
253
codep[1] = ((unsigned long)beamAddress >> 8) & 0xFF;
254
codep[2] = ((unsigned long)beamAddress >> 16) & 0xFF;
255
codep[3] = ((unsigned long)beamAddress >> 24) & 0xFF;
258
/* movl (shl 32 $beamAddress), P_BEAM_IP+4(%ebp); 3 or 6 bytes, plus 4 */
260
#if P_BEAM_IP+4 >= 128
261
codep[1] = 0x85; /* disp32[EBP] */
262
codep[2] = (P_BEAM_IP+4) & 0xFF;
263
codep[3] = ((P_BEAM_IP+4) >> 8) & 0xFF;
264
codep[4] = ((P_BEAM_IP+4) >> 16) & 0xFF;
265
codep[5] = ((P_BEAM_IP+4) >> 24) & 0xFF;
268
codep[1] = 0x45; /* disp8[EBP] */
269
codep[2] = (P_BEAM_IP+4);
272
codep[0] = ((unsigned long)beamAddress >> 32) & 0xFF;
273
codep[1] = ((unsigned long)beamAddress >> 40) & 0xFF;
274
codep[2] = ((unsigned long)beamAddress >> 48) & 0xFF;
275
codep[3] = ((unsigned long)beamAddress >> 56) & 0xFF;
278
/* movb $beamArity, P_ARITY(%ebp); 3 or 6 bytes */
281
codep[1] = 0x85; /* disp32[EBP] */
282
codep[2] = P_ARITY & 0xFF;
283
codep[3] = (P_ARITY >> 8) & 0xFF;
284
codep[4] = (P_ARITY >> 16) & 0xFF;
285
codep[5] = (P_ARITY >> 24) & 0xFF;
288
codep[1] = 0x45; /* disp8[EBP] */
292
codep[0] = beamArity;
295
/* jmp callemu; 5 bytes */
296
callEmuOffset = (unsigned char*)nbif_callemu - (code + codeSize);
298
codep[1] = callEmuOffset & 0xFF;
299
codep[2] = (callEmuOffset >> 8) & 0xFF;
300
codep[3] = (callEmuOffset >> 16) & 0xFF;
301
codep[4] = (callEmuOffset >> 24) & 0xFF;
304
ASSERT(codep == code + codeSize);
311
void hipe_arch_print_pcb(struct hipe_process_state *p)
314
printf(" % 4d | %s | 0x%0*lx | %*s |\r\n", (int)offsetof(struct hipe_process_state,x), n, 2*(int)sizeof(long), (unsigned long)p->x, 2+2*(int)sizeof(long), "")
316
U("narity ", narity);
321
* XXX: The following should really be moved to a generic hipe_bifs_64 file.
325
static int term_to_Sint64(Eterm term, Sint64 *sp)
327
return term_to_Sint(term, sp);
330
BIF_RETTYPE hipe_bifs_write_s64_2(BIF_ALIST_2)
335
address = term_to_address(BIF_ARG_1);
336
if( !address || !hipe_word64_address_ok(address) )
337
BIF_ERROR(BIF_P, BADARG);
338
if( !term_to_Sint64(BIF_ARG_2, &value) )
339
BIF_ERROR(BIF_P, BADARG);
345
BIF_RETTYPE hipe_bifs_write_u64_2(BIF_ALIST_2)
350
address = term_to_address(BIF_ARG_1);
351
if( !address || !hipe_word64_address_ok(address) )
352
BIF_ERROR(BIF_P, BADARG);
353
if( !term_to_Uint(BIF_ARG_2, &value) )
354
BIF_ERROR(BIF_P, BADARG);
356
hipe_flush_icache_word(address);