2
** Definitions for x86 and x64 CPUs.
3
** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
6
#ifndef _LJ_TARGET_X86_H
7
#define _LJ_TARGET_X86_H
9
/* -- Registers IDs ------------------------------------------------------- */
13
_(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \
14
_(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D)
16
_(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \
17
_(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15)
20
_(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI)
22
_(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
27
#define RIDENUM(name) RID_##name,
30
GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
31
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
33
RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
35
/* Calling conventions. */
44
/* These definitions must match with the *.dasc file(s): */
45
RID_BASE = RID_EDX, /* Interpreter BASE. */
46
#if LJ_64 && !LJ_ABI_WIN
47
RID_LPC = RID_EBX, /* Interpreter PC. */
48
RID_DISPATCH = RID_R14D, /* Interpreter DISPATCH table. */
50
RID_LPC = RID_ESI, /* Interpreter PC. */
51
RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */
54
/* Register ranges [min, max) and number of registers. */
55
RID_MIN_GPR = RID_EAX,
56
RID_MIN_FPR = RID_XMM0,
57
RID_MAX_GPR = RID_MIN_FPR,
58
RID_MAX_FPR = RID_MAX,
59
RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
60
RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
63
/* -- Register sets ------------------------------------------------------- */
65
/* Make use of all registers, except the stack pointer. */
66
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP))
67
#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
68
#define RSET_ALL (RSET_GPR|RSET_FPR)
69
#define RSET_INIT RSET_ALL
72
/* Note: this requires the use of FORCE_REX! */
73
#define RSET_GPR8 RSET_GPR
75
#define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1))
78
/* ABI-specific register sets. */
79
#define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX))
82
/* Windows x64 ABI. */
83
#define RSET_SCRATCH \
84
(RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
86
(RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5))
87
#define REGARG_NUMGPR 4
88
#define REGARG_NUMFPR 4
89
#define REGARG_FIRSTFPR RID_XMM0
90
#define REGARG_LASTFPR RID_XMM3
91
#define STACKARG_OFS (4*8)
93
/* The rest of the civilized x64 world has a common ABI. */
94
#define RSET_SCRATCH \
95
(RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
97
(RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \
98
<<5))<<5))<<5))<<5))<<5))
99
#define REGARG_NUMGPR 6
100
#define REGARG_NUMFPR 8
101
#define REGARG_FIRSTFPR RID_XMM0
102
#define REGARG_LASTFPR RID_XMM7
103
#define STACKARG_OFS 0
106
/* Common x86 ABI. */
107
#define RSET_SCRATCH (RSET_ACD|RSET_FPR)
108
#define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */
109
#define REGARG_NUMGPR 2 /* Fastcall only. */
110
#define REGARG_NUMFPR 0
111
#define STACKARG_OFS 0
115
/* Prefer the low 8 regs of each type to reduce REX prefixes. */
117
#define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18)
120
/* -- Spill slots --------------------------------------------------------- */
122
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
124
** SPS_FIXED: Available fixed spill slots in interpreter frame.
125
** This definition must match with the *.dasc file(s).
127
** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
131
#define SPS_FIXED (4*2)
132
#define SPS_FIRST (4*2) /* Don't use callee register save area. */
144
#define sps_scale(slot) (4 * (int32_t)(slot))
145
#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
147
/* -- Exit state ---------------------------------------------------------- */
149
/* This definition must match with the *.dasc file(s). */
151
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
152
intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
153
int32_t spill[256]; /* Spill slots. */
156
/* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */
157
#define EXITSTUB_SPACING (2+2)
158
#define EXITSTUBS_PER_GROUP 32
160
/* -- x86 ModRM operand encoding ------------------------------------------ */
163
XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0,
164
XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0,
168
/* Structure to hold variable ModRM operand. */
170
int32_t ofs; /* Offset. */
171
uint8_t base; /* Base register or RID_NONE. */
172
uint8_t idx; /* Index register or RID_NONE. */
173
uint8_t scale; /* Index scale (XM_SCALE1 .. XM_SCALE8). */
176
/* -- Opcodes ------------------------------------------------------------- */
178
/* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */
179
#define XO_(o) ((uint32_t)(0x0000fe + (0x##o<<24)))
180
#define XO_FPU(a,b) ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24)))
181
#define XO_0f(o) ((uint32_t)(0x0f00fd + (0x##o<<24)))
182
#define XO_66(o) ((uint32_t)(0x6600fd + (0x##o<<24)))
183
#define XO_660f(o) ((uint32_t)(0x0f66fc + (0x##o<<24)))
184
#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
185
#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
187
/* This list of x86 opcodes is not intended to be complete. Opcodes are only
188
** included when needed. Take a look at DynASM or jit.dis_x86 to see the
192
/* Fixed length opcodes. XI_* prefix. */
198
XI_PUSH = 0x50, /* Really 50+r. */
199
XI_JCCs = 0x70, /* Really 7x. */
200
XI_JCCn = 0x80, /* Really 0f8x. */
202
XI_MOVrib = 0xb0, /* Really b0+r. */
203
XI_MOVri = 0xb8, /* Really b8+r. */
213
/* Note: little-endian byte-order! */
218
XI_FDUP = 0xc0d9, /* Really fld st0. */
219
XI_FPOP = 0xd8dd, /* Really fstp st0. */
220
XI_FPOP1 = 0xd9dd, /* Really fstp st1. */
229
/* Variable-length opcodes. XO_* prefix. */
232
XO_MOVtow = XO_66(89),
237
XO_ARITHib = XO_(80),
239
XO_ARITHi8 = XO_(83),
240
XO_ARITHiw8 = XO_66(83),
243
XO_SHIFTcl = XO_(d3),
250
XO_GROUP3b = XO_(f6),
252
XO_GROUP5b = XO_(fe),
254
XO_MOVZXb = XO_0f(b6),
255
XO_MOVZXw = XO_0f(b7),
256
XO_MOVSXb = XO_0f(be),
257
XO_MOVSXw = XO_0f(bf),
259
XO_BSWAP = XO_0f(c8),
262
XO_MOVSD = XO_f20f(10),
263
XO_MOVSDto = XO_f20f(11),
264
XO_MOVSS = XO_f30f(10),
265
XO_MOVSSto = XO_f30f(11),
266
XO_MOVLPD = XO_660f(12),
267
XO_MOVAPS = XO_0f(28),
268
XO_XORPS = XO_0f(57),
269
XO_ANDPS = XO_0f(54),
270
XO_ADDSD = XO_f20f(58),
271
XO_SUBSD = XO_f20f(5c),
272
XO_MULSD = XO_f20f(59),
273
XO_DIVSD = XO_f20f(5e),
274
XO_SQRTSD = XO_f20f(51),
275
XO_MINSD = XO_f20f(5d),
276
XO_MAXSD = XO_f20f(5f),
277
XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
278
XO_UCOMISD = XO_660f(2e),
279
XO_CVTSI2SD = XO_f20f(2a),
280
XO_CVTSD2SI = XO_f20f(2d),
281
XO_CVTTSD2SI= XO_f20f(2c),
282
XO_CVTSI2SS = XO_f30f(2a),
283
XO_CVTSS2SI = XO_f30f(2d),
284
XO_CVTTSS2SI= XO_f30f(2c),
285
XO_CVTSS2SD = XO_f30f(5a),
286
XO_CVTSD2SS = XO_f20f(5a),
287
XO_ADDSS = XO_f30f(58),
288
XO_MOVD = XO_660f(6e),
289
XO_MOVDto = XO_660f(7e),
291
XO_FLDd = XO_(d9), XOg_FLDd = 0,
292
XO_FLDq = XO_(dd), XOg_FLDq = 0,
293
XO_FILDd = XO_(db), XOg_FILDd = 0,
294
XO_FILDq = XO_(df), XOg_FILDq = 5,
295
XO_FSTPd = XO_(d9), XOg_FSTPd = 3,
296
XO_FSTPq = XO_(dd), XOg_FSTPq = 3,
297
XO_FISTPq = XO_(df), XOg_FISTPq = 7,
298
XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1,
299
XO_FADDq = XO_(dc), XOg_FADDq = 0,
300
XO_FLDCW = XO_(d9), XOg_FLDCW = 5,
301
XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7
304
/* x86 opcode groups. */
305
typedef uint32_t x86Group;
307
#define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g)))
308
#define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g)
309
#define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000)))
310
#define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000)))
312
#define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27)))
313
#define XO_ARITHw(a) ((x86Op)(0x036600fd + ((a)<<27)))
316
XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP,
321
XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR
325
XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV
329
XOg_INC, XOg_DEC, XOg_CALL, XOg_CALLfar, XOg_JMP, XOg_JMPfar, XOg_PUSH
332
/* x86 condition codes. */
334
CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE,
335
CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE,
336
CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB,
337
CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE,
338
CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL,
339
CC_NG = CC_LE, CC_G = CC_NLE