1
/* -----------------------------------------------------------------------
2
ffi.c - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
4
x86-64 Foreign Function Interface
6
Permission is hereby granted, free of charge, to any person obtaining
7
a copy of this software and associated documentation files (the
8
``Software''), to deal in the Software without restriction, including
9
without limitation the rights to use, copy, modify, merge, publish,
10
distribute, sublicense, and/or sell copies of the Software, and to
11
permit persons to whom the Software is furnished to do so, subject to
12
the following conditions:
14
The above copyright notice and this permission notice shall be included
15
in all copies or substantial portions of the Software.
17
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
18
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20
IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23
OTHER DEALINGS IN THE SOFTWARE.
24
----------------------------------------------------------------------- */
27
#include <ffi_common.h>
34
#define MAX_GPR_REGS 6
35
#define MAX_SSE_REGS 8
39
/* Registers for argument passing. */
40
UINT64 gpr[MAX_GPR_REGS];
41
__int128_t sse[MAX_SSE_REGS];
44
extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
45
void *raddr, void (*fnaddr)(), unsigned ssecount);
47
/* All reference to register classes here is identical to the code in
48
gcc/config/i386/i386.c. Do *not* change one without the other. */
50
/* Register class used for passing given 64bit part of the argument.
51
These represent classes as documented by the PS ABI, with the exception
52
of SSESF, SSEDF classes, that are basically SSE class, just gcc will
53
use SF or DFmode move instead of DImode to avoid reformating penalties.
55
Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
56
whenever possible (upper half does contain padding). */
61
X86_64_INTEGERSI_CLASS,
68
X86_64_COMPLEX_X87_CLASS,
74
#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
76
/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
77
of this code is to classify each 8bytes of incoming argument by the register
78
class and assign registers accordingly. */
80
/* Return the union class of CLASS1 and CLASS2.
81
See the x86-64 PS ABI for details. */
83
static enum x86_64_reg_class
84
merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
86
/* Rule #1: If both classes are equal, this is the resulting class. */
90
/* Rule #2: If one of the classes is NO_CLASS, the resulting class is
92
if (class1 == X86_64_NO_CLASS)
94
if (class2 == X86_64_NO_CLASS)
97
/* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
98
if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
99
return X86_64_MEMORY_CLASS;
101
/* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
102
if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
103
|| (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
104
return X86_64_INTEGERSI_CLASS;
105
if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
106
|| class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
107
return X86_64_INTEGER_CLASS;
109
/* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
111
if (class1 == X86_64_X87_CLASS
112
|| class1 == X86_64_X87UP_CLASS
113
|| class1 == X86_64_COMPLEX_X87_CLASS
114
|| class2 == X86_64_X87_CLASS
115
|| class2 == X86_64_X87UP_CLASS
116
|| class2 == X86_64_COMPLEX_X87_CLASS)
117
return X86_64_MEMORY_CLASS;
119
/* Rule #6: Otherwise class SSE is used. */
120
return X86_64_SSE_CLASS;
123
/* Classify the argument of type TYPE and mode MODE.
124
CLASSES will be filled by the register class used to pass each word
125
of the operand. The number of words is returned. In case the parameter
126
should be passed in memory, 0 is returned. As a special case for zero
127
sized containers, classes[0] will be NO_CLASS and 1 is returned.
129
See the x86-64 PS ABI for details.
132
classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
139
case FFI_TYPE_UINT16:
140
case FFI_TYPE_SINT16:
141
case FFI_TYPE_UINT32:
142
case FFI_TYPE_SINT32:
143
case FFI_TYPE_UINT64:
144
case FFI_TYPE_SINT64:
145
case FFI_TYPE_POINTER:
146
if (byte_offset + type->size <= 4)
147
classes[0] = X86_64_INTEGERSI_CLASS;
149
classes[0] = X86_64_INTEGER_CLASS;
152
if (byte_offset == 0)
153
classes[0] = X86_64_SSESF_CLASS;
155
classes[0] = X86_64_SSE_CLASS;
157
case FFI_TYPE_DOUBLE:
158
classes[0] = X86_64_SSEDF_CLASS;
160
case FFI_TYPE_LONGDOUBLE:
161
classes[0] = X86_64_X87_CLASS;
162
classes[1] = X86_64_X87UP_CLASS;
164
case FFI_TYPE_STRUCT:
166
const int UNITS_PER_WORD = 8;
167
int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
170
enum x86_64_reg_class subclasses[MAX_CLASSES];
172
/* If the struct is larger than 16 bytes, pass it on the stack. */
176
for (i = 0; i < words; i++)
177
classes[i] = X86_64_NO_CLASS;
179
/* Merge the fields of structure. */
180
for (ptr = type->elements; *ptr != NULL; ptr++)
184
byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
186
num = classify_argument (*ptr, subclasses, byte_offset % 8);
189
for (i = 0; i < num; i++)
191
int pos = byte_offset / 8;
193
merge_classes (subclasses[i], classes[i + pos]);
196
byte_offset += (*ptr)->size;
199
/* Final merger cleanup. */
200
for (i = 0; i < words; i++)
202
/* If one class is MEMORY, everything should be passed in
204
if (classes[i] == X86_64_MEMORY_CLASS)
207
/* The X86_64_SSEUP_CLASS should be always preceded by
209
if (classes[i] == X86_64_SSEUP_CLASS
210
&& (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
211
classes[i] = X86_64_SSE_CLASS;
213
/* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
214
if (classes[i] == X86_64_X87UP_CLASS
215
&& (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
216
classes[i] = X86_64_SSE_CLASS;
224
return 0; /* Never reached. */
227
/* Examine the argument and return set number of register required in each
228
class. Return zero iff parameter should be passed in memory, otherwise
229
the number of registers. */
232
examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
233
_Bool in_return, int *pngpr, int *pnsse)
235
int i, n, ngpr, nsse;
237
n = classify_argument (type, classes, 0);
242
for (i = 0; i < n; ++i)
245
case X86_64_INTEGER_CLASS:
246
case X86_64_INTEGERSI_CLASS:
249
case X86_64_SSE_CLASS:
250
case X86_64_SSESF_CLASS:
251
case X86_64_SSEDF_CLASS:
254
case X86_64_NO_CLASS:
255
case X86_64_SSEUP_CLASS:
257
case X86_64_X87_CLASS:
258
case X86_64_X87UP_CLASS:
259
case X86_64_COMPLEX_X87_CLASS:
260
return in_return != 0;
271
/* Perform machine dependent cif processing. */
274
ffi_prep_cif_machdep (ffi_cif *cif)
276
int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
277
enum x86_64_reg_class classes[MAX_CLASSES];
280
gprcount = ssecount = 0;
282
flags = cif->rtype->type;
283
if (flags != FFI_TYPE_VOID)
285
n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
288
/* The return value is passed in memory. A pointer to that
289
memory is the first argument. Allocate a register for it. */
291
/* We don't have to do anything in asm for the return. */
292
flags = FFI_TYPE_VOID;
294
else if (flags == FFI_TYPE_STRUCT)
296
/* Mark which registers the result appears in. */
297
_Bool sse0 = SSE_CLASS_P (classes[0]);
298
_Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
301
else if (!sse0 && sse1)
303
else if (sse0 && sse1)
305
/* Mark the true size of the structure. */
306
flags |= cif->rtype->size << 12;
310
/* Go over all arguments and determine the way they should be passed.
311
If it's in a register and there is space for it, let that be so. If
312
not, add it's size to the stack byte count. */
313
for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
315
if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
316
|| gprcount + ngpr > MAX_GPR_REGS
317
|| ssecount + nsse > MAX_SSE_REGS)
319
long align = cif->arg_types[i]->alignment;
324
bytes = ALIGN(bytes, align);
325
bytes += cif->arg_types[i]->size;
342
ffi_call (ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
344
enum x86_64_reg_class classes[MAX_CLASSES];
346
ffi_type **arg_types;
347
int gprcount, ssecount, ngpr, nsse, i, avn;
349
struct register_args *reg_args;
351
/* Can't call 32-bit mode from 64-bit mode. */
352
FFI_ASSERT (cif->abi == FFI_UNIX64);
354
/* If the return value is a struct and we don't have a return value
355
address then we need to make one. Note the setting of flags to
356
VOID above in ffi_prep_cif_machdep. */
357
ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
358
&& (cif->flags & 0xff) == FFI_TYPE_VOID);
359
if (rvalue == NULL && ret_in_memory)
360
rvalue = alloca (cif->rtype->size);
362
/* Allocate the space for the arguments, plus 4 words of temp space. */
363
stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
364
reg_args = (struct register_args *) stack;
365
argp = stack + sizeof (struct register_args);
367
gprcount = ssecount = 0;
369
/* If the return value is passed in memory, add the pointer as the
370
first integer argument. */
372
reg_args->gpr[gprcount++] = (long) rvalue;
375
arg_types = cif->arg_types;
377
for (i = 0; i < avn; ++i)
379
size_t size = arg_types[i]->size;
382
n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
384
|| gprcount + ngpr > MAX_GPR_REGS
385
|| ssecount + nsse > MAX_SSE_REGS)
387
long align = arg_types[i]->alignment;
389
/* Stack arguments are *always* at least 8 byte aligned. */
393
/* Pass this argument in memory. */
394
argp = (void *) ALIGN (argp, align);
395
memcpy (argp, avalue[i], size);
400
/* The argument is passed entirely in registers. */
401
char *a = (char *) avalue[i];
404
for (j = 0; j < n; j++, a += 8, size -= 8)
408
case X86_64_INTEGER_CLASS:
409
case X86_64_INTEGERSI_CLASS:
410
reg_args->gpr[gprcount] = 0;
411
memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8);
414
case X86_64_SSE_CLASS:
415
case X86_64_SSEDF_CLASS:
416
reg_args->sse[ssecount++] = *(UINT64 *) a;
418
case X86_64_SSESF_CLASS:
419
reg_args->sse[ssecount++] = *(UINT32 *) a;
428
ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
429
cif->flags, rvalue, fn, ssecount);
433
extern void ffi_closure_unix64(void);
436
ffi_prep_closure (ffi_closure* closure,
438
void (*fun)(ffi_cif*, void*, void**, void*),
441
volatile unsigned short *tramp;
443
tramp = (volatile unsigned short *) &closure->tramp[0];
445
tramp[0] = 0xbb49; /* mov <code>, %r11 */
446
*(void * volatile *) &tramp[1] = ffi_closure_unix64;
447
tramp[5] = 0xba49; /* mov <data>, %r10 */
448
*(void * volatile *) &tramp[6] = closure;
450
/* Set the carry bit iff the function uses any sse registers.
451
This is clc or stc, together with the first byte of the jmp. */
452
tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
454
tramp[11] = 0xe3ff; /* jmp *%r11 */
458
closure->user_data = user_data;
464
ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
465
struct register_args *reg_args, char *argp)
469
ffi_type **arg_types;
471
int gprcount, ssecount, ngpr, nsse;
475
avalue = alloca(cif->nargs * sizeof(void *));
476
gprcount = ssecount = 0;
478
ret = cif->rtype->type;
479
if (ret != FFI_TYPE_VOID)
481
enum x86_64_reg_class classes[MAX_CLASSES];
482
int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
485
/* The return value goes in memory. Arrange for the closure
486
return value to go directly back to the original caller. */
487
rvalue = (void *) reg_args->gpr[gprcount++];
488
/* We don't have to do anything in asm for the return. */
491
else if (ret == FFI_TYPE_STRUCT && n == 2)
493
/* Mark which register the second word of the structure goes in. */
494
_Bool sse0 = SSE_CLASS_P (classes[0]);
495
_Bool sse1 = SSE_CLASS_P (classes[1]);
498
else if (sse0 && !sse1)
504
arg_types = cif->arg_types;
506
for (i = 0; i < avn; ++i)
508
enum x86_64_reg_class classes[MAX_CLASSES];
511
n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
513
|| gprcount + ngpr > MAX_GPR_REGS
514
|| ssecount + nsse > MAX_SSE_REGS)
516
long align = arg_types[i]->alignment;
518
/* Stack arguments are *always* at least 8 byte aligned. */
522
/* Pass this argument in memory. */
523
argp = (void *) ALIGN (argp, align);
525
argp += arg_types[i]->size;
527
/* If the argument is in a single register, or two consecutive
528
registers, then we can use that address directly. */
531
&& SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
533
/* The argument is in a single register. */
534
if (SSE_CLASS_P (classes[0]))
536
avalue[i] = ®_args->sse[ssecount];
541
avalue[i] = ®_args->gpr[gprcount];
545
/* Otherwise, allocate space to make them consecutive. */
548
char *a = alloca (16);
552
for (j = 0; j < n; j++, a += 8)
554
if (SSE_CLASS_P (classes[j]))
555
memcpy (a, ®_args->sse[ssecount++], 8);
557
memcpy (a, ®_args->gpr[gprcount++], 8);
562
/* Invoke the closure. */
563
closure->fun (cif, rvalue, avalue, closure->user_data);
565
/* Tell assembly how to perform return type promotions. */
569
#endif /* __x86_64__ */