1
// Copyright 2013 Dolphin Emulator Project
2
// Licensed under GPLv2
3
// Refer to the license.txt file included.
14
#include "base/basictypes.h"
16
#include "Arm64Emitter.h"
18
#include "CommonTypes.h"
19
#include "CPUDetect.h"
24
const int kWRegSizeInBits = 32;
25
const int kXRegSizeInBits = 64;
27
// The below few functions are taken from V8.
28
int CountLeadingZeros(uint64_t value, int width) {
29
// TODO(jbramley): Optimize this for ARM64 hosts.
31
uint64_t bit_test = 1ULL << (width - 1);
32
while ((count < width) && ((bit_test & value) == 0)) {
39
uint64_t LargestPowerOf2Divisor(uint64_t value) {
40
return value & -(int64_t)value;
43
bool IsPowerOfTwo(uint64_t x) {
44
return (x != 0) && ((x & (x - 1)) == 0);
47
#define V8_UINT64_C(x) ((uint64_t)(x))
49
bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift) {
51
if (val) *val = input;
52
if (shift) *shift = false;
54
} else if ((input & 0xFFF000) == input) {
55
if (val) *val = input >> 12;
56
if (shift) *shift = true;
62
bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned int *imm_s, unsigned int *imm_r) {
63
//DCHECK((n != NULL) && (imm_s != NULL) && (imm_r != NULL));
64
// DCHECK((width == kWRegSizeInBits) || (width == kXRegSizeInBits));
68
// Logical immediates are encoded using parameters n, imm_s and imm_r using
69
// the following table:
71
// N imms immr size S R
72
// 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
73
// 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
74
// 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
75
// 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
76
// 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
77
// 0 11110s xxxxxr 2 UInt(s) UInt(r)
78
// (s bits must not be all set)
80
// A pattern is constructed of size bits, where the least significant S+1 bits
81
// are set. The pattern is rotated right by R, and repeated across a 32 or
82
// 64-bit value, depending on destination register width.
84
// Put another way: the basic format of a logical immediate is a single
85
// contiguous stretch of 1 bits, repeated across the whole word at intervals
86
// given by a power of 2. To identify them quickly, we first locate the
87
// lowest stretch of 1 bits, then the next 1 bit above that; that combination
88
// is different for every logical immediate, so it gives us all the
89
// information we need to identify the only logical immediate that our input
90
// could be, and then we simply check if that's the value we actually have.
92
// (The rotation parameter does give the possibility of the stretch of 1 bits
93
// going 'round the end' of the word. To deal with that, we observe that in
94
// any situation where that happens the bitwise NOT of the value is also a
95
// valid logical immediate. So we simply invert the input whenever its low bit
96
// is set, and then we know that the rotated case can't arise.)
99
// If the low bit is 1, negate the value, and set a flag to remember that we
100
// did (so that we can adjust the return values appropriately).
105
if (width == kWRegSizeInBits) {
106
// To handle 32-bit logical immediates, the very easiest thing is to repeat
107
// the input value twice to make a 64-bit word. The correct encoding of that
108
// as a logical immediate will also be the correct encoding of the 32-bit
111
// The most-significant 32 bits may not be zero (ie. negate is true) so
112
// shift the value left before duplicating it.
113
value <<= kWRegSizeInBits;
114
value |= value >> kWRegSizeInBits;
117
// The basic analysis idea: imagine our input word looks like this.
119
// 0011111000111110001111100011111000111110001111100011111000111110
123
// We find the lowest set bit (as an actual power-of-2 value, not its index)
124
// and call it a. Then we add a to our original number, which wipes out the
125
// bottommost stretch of set bits and replaces it with a 1 carried into the
126
// next zero bit. Then we look for the new lowest set bit, which is in
127
// position b, and subtract it, so now our number is just like the original
128
// but with the lowest stretch of set bits completely gone. Now we find the
129
// lowest set bit again, which is position c in the diagram above. Then we'll
130
// measure the distance d between bit positions a and c (using CLZ), and that
131
// tells us that the only valid logical immediate that could possibly be equal
132
// to this number is the one in which a stretch of bits running from a to just
133
// below b is replicated every d bits.
134
uint64_t a = LargestPowerOf2Divisor(value);
135
uint64_t value_plus_a = value + a;
136
uint64_t b = LargestPowerOf2Divisor(value_plus_a);
137
uint64_t value_plus_a_minus_b = value_plus_a - b;
138
uint64_t c = LargestPowerOf2Divisor(value_plus_a_minus_b);
144
// The general case, in which there is more than one stretch of set bits.
145
// Compute the repeat distance d, and set up a bitmask covering the basic
146
// unit of repetition (i.e. a word with the bottom d bits set). Also, in all
147
// of these cases the N bit of the output will be zero.
148
clz_a = CountLeadingZeros(a, kXRegSizeInBits);
149
int clz_c = CountLeadingZeros(c, kXRegSizeInBits);
151
mask = ((V8_UINT64_C(1) << d) - 1);
154
// Handle degenerate cases.
156
// If any of those 'find lowest set bit' operations didn't find a set bit at
157
// all, then the word will have been zero thereafter, so in particular the
158
// last lowest_set_bit operation will have returned zero. So we can test for
159
// all the special case conditions in one go by seeing if c is zero.
161
// The input was zero (or all 1 bits, which will come to here too after we
162
// inverted it at the start of the function), for which we just return
166
// Otherwise, if c was zero but a was not, then there's just one stretch
167
// of set bits in our word, meaning that we have the trivial case of
168
// d == 64 and only one 'repetition'. Set up all the same variables as in
169
// the general case above, and set the N bit in the output.
170
clz_a = CountLeadingZeros(a, kXRegSizeInBits);
172
mask = ~V8_UINT64_C(0);
177
// If the repeat period d is not a power of two, it can't be encoded.
178
if (!IsPowerOfTwo(d)) {
182
if (((b - a) & ~mask) != 0) {
183
// If the bit stretch (b - a) does not fit within the mask derived from the
184
// repeat period, then fail.
188
// The only possible option is b - a repeated every d bits. Now we're going to
189
// actually construct the valid logical immediate derived from that
190
// specification, and see if it equals our original input.
192
// To repeat a value every d bits, we multiply it by a number of the form
193
// (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
194
// be derived using a table lookup on CLZ(d).
195
static const uint64_t multipliers[] = {
196
0x0000000000000001UL,
197
0x0000000100000001UL,
198
0x0001000100010001UL,
199
0x0101010101010101UL,
200
0x1111111111111111UL,
201
0x5555555555555555UL,
203
int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
204
// Ensure that the index to the multipliers array is within bounds.
205
_dbg_assert_(JIT, (multiplier_idx >= 0) &&
206
(static_cast<size_t>(multiplier_idx) < ARRAY_SIZE(multipliers)));
207
uint64_t multiplier = multipliers[multiplier_idx];
208
uint64_t candidate = (b - a) * multiplier;
210
if (value != candidate) {
211
// The candidate pattern doesn't match our input value, so fail.
215
// We have a match! This is a valid logical immediate, so now we have to
216
// construct the bits and pieces of the instruction encoding that generates
219
// Count the set bits in our basic stretch. The special case of clz(0) == -1
220
// makes the answer come out right for stretches that reach the very top of
221
// the word (e.g. numbers like 0xffffc00000000000).
222
int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits);
223
int s = clz_a - clz_b;
225
// Decide how many bits to rotate right by, to put the low bit of that basic
226
// stretch in position a.
229
// If we inverted the input right at the start of this function, here's
230
// where we compensate: the number of set bits becomes the number of clear
231
// bits, and the rotation count is based on position b rather than position
232
// a (since b is the location of the 'lowest' 1 bit after inversion).
234
r = (clz_b + 1) & (d - 1);
236
r = (clz_a + 1) & (d - 1);
239
// Now we're done, except for having to encode the S output in such a way that
240
// it gives both the number of set bits and the length of the repeated
241
// segment. The s field is encoded like this:
244
// ssssss 64 UInt(ssssss)
245
// 0sssss 32 UInt(sssss)
246
// 10ssss 16 UInt(ssss)
247
// 110sss 8 UInt(sss)
251
// So we 'or' (-d << 1) with our computed s to form imms.
253
*imm_s = ((-d << 1) | (s - 1)) & 0x3f;
259
static int EncodeSize(int size) {
269
void ARM64XEmitter::SetCodePointer(u8* ptr)
272
m_startcode = m_code;
273
m_lastCacheFlushEnd = ptr;
276
const u8* ARM64XEmitter::GetCodePointer() const
281
u8* ARM64XEmitter::GetWritableCodePtr()
286
void ARM64XEmitter::ReserveCodeSpace(u32 bytes)
288
for (u32 i = 0; i < bytes/4; i++)
292
const u8* ARM64XEmitter::AlignCode16()
294
int c = int((u64)m_code & 15);
296
ReserveCodeSpace(16-c);
300
const u8* ARM64XEmitter::AlignCodePage()
302
int c = int((u64)m_code & 4095);
304
ReserveCodeSpace(4096-c);
308
void ARM64XEmitter::FlushIcache()
310
FlushIcacheSection(m_lastCacheFlushEnd, m_code);
311
m_lastCacheFlushEnd = m_code;
314
void ARM64XEmitter::FlushIcacheSection(u8* start, u8* end)
317
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
318
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
319
#elif !defined(_M_IX86) && !defined(_M_X64)
320
// Code from Dolphin, contributed by the Mono project.
322
// Don't rely on GCC's __clear_cache implementation, as it caches
323
// icache/dcache cache line sizes, that can vary between cores on
324
// big.LITTLE architectures.
327
if (cpu_info.sQuirks.bExynos8890DifferingCachelineSizes) {
328
// Enforce the minimum cache line size to be completely safe on these CPUs.
333
static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
334
__asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
335
isize = 4 << ((ctr_el0 >> 0) & 0xf);
336
dsize = 4 << ((ctr_el0 >> 16) & 0xf);
338
// use the global minimum cache line size
339
icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
340
dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
343
u64 addr = (u64)start & ~(u64)(dsize - 1);
344
for (; addr < (u64)end; addr += dsize)
345
// use "civac" instead of "cvau", as this is the suggested workaround for
346
// Cortex-A53 errata 819472, 826319, 827319 and 824069.
347
__asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
348
__asm__ volatile("dsb ish" : : : "memory");
350
addr = (u64)start & ~(u64)(isize - 1);
351
for (; addr < (u64)end; addr += isize)
352
__asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
354
__asm__ volatile("dsb ish" : : : "memory");
355
__asm__ volatile("isb" : : : "memory");
359
// Exception generation
360
static const u32 ExcEnc[][3] = {
371
// Arithmetic generation
372
static const u32 ArithEnc[] = {
377
// Conditional Select
378
static const u32 CondSelectEnc[][2] = {
385
// Data-Processing (1 source)
386
static const u32 Data1SrcEnc[][2] = {
395
// Data-Processing (2 source)
396
static const u32 Data2SrcEnc[] = {
409
0x13, // CRC32X (64bit Only)
410
0x17, // XRC32CX (64bit Only)
413
// Data-Processing (3 source)
414
static const u32 Data3SrcEnc[][2] = {
417
{1, 0}, // SMADDL (64Bit Only)
418
{1, 1}, // SMSUBL (64Bit Only)
419
{2, 0}, // SMULH (64Bit Only)
420
{5, 0}, // UMADDL (64Bit Only)
421
{5, 1}, // UMSUBL (64Bit Only)
422
{6, 0}, // UMULH (64Bit Only)
425
// Logical (shifted register)
426
static const u32 LogicalEnc[][2] = {
437
// Load/Store Exclusive
438
static const u32 LoadStoreExcEnc[][5] = {
439
{0, 0, 0, 0, 0}, // STXRB
440
{0, 0, 0, 0, 1}, // STLXRB
441
{0, 0, 1, 0, 0}, // LDXRB
442
{0, 0, 1, 0, 1}, // LDAXRB
443
{0, 1, 0, 0, 1}, // STLRB
444
{0, 1, 1, 0, 1}, // LDARB
445
{1, 0, 0, 0, 0}, // STXRH
446
{1, 0, 0, 0, 1}, // STLXRH
447
{1, 0, 1, 0, 0}, // LDXRH
448
{1, 0, 1, 0, 1}, // LDAXRH
449
{1, 1, 0, 0, 1}, // STLRH
450
{1, 1, 1, 0, 1}, // LDARH
451
{2, 0, 0, 0, 0}, // STXR
452
{3, 0, 0, 0, 0}, // (64bit) STXR
453
{2, 0, 0, 0, 1}, // STLXR
454
{3, 0, 0, 0, 1}, // (64bit) STLXR
455
{2, 0, 0, 1, 0}, // STXP
456
{3, 0, 0, 1, 0}, // (64bit) STXP
457
{2, 0, 0, 1, 1}, // STLXP
458
{3, 0, 0, 1, 1}, // (64bit) STLXP
459
{2, 0, 1, 0, 0}, // LDXR
460
{3, 0, 1, 0, 0}, // (64bit) LDXR
461
{2, 0, 1, 0, 1}, // LDAXR
462
{3, 0, 1, 0, 1}, // (64bit) LDAXR
463
{2, 0, 1, 1, 0}, // LDXP
464
{3, 0, 1, 1, 0}, // (64bit) LDXP
465
{2, 0, 1, 1, 1}, // LDAXP
466
{3, 0, 1, 1, 1}, // (64bit) LDAXP
467
{2, 1, 0, 0, 1}, // STLR
468
{3, 1, 0, 0, 1}, // (64bit) STLR
469
{2, 1, 1, 0, 1}, // LDAR
470
{3, 1, 1, 0, 1}, // (64bit) LDAR
473
void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr)
475
bool b64Bit = Is64Bit(Rt);
476
s64 distance = (s64)ptr - (s64)m_code;
478
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %llx", __FUNCTION__, distance);
482
_assert_msg_(DYNA_REC, distance >= -0x40000 && distance <= 0x3FFFF, "%s: Received too large distance: %llx", __FUNCTION__, distance);
485
Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | \
486
(((u32)distance << 5) & 0xFFFFE0) | Rt);
489
void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr)
491
bool b64Bit = Is64Bit(Rt);
492
s64 distance = (s64)ptr - (s64)m_code;
494
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %llx", __FUNCTION__, distance);
498
_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %llx", __FUNCTION__, distance);
501
Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \
502
(bits << 19) | (((u32)distance << 5) & 0x7FFE0) | Rt);
505
void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr)
507
s64 distance = (s64)ptr - s64(m_code);
509
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %llx", __FUNCTION__, distance);
513
_assert_msg_(DYNA_REC, distance >= -0x2000000LL && distance <= 0x1FFFFFFLL, "%s: Received too large distance: %llx", __FUNCTION__, distance);
515
Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF));
518
void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn)
521
Write32((0x6B << 25) | (opc << 21) | (op2 << 16) | (op3 << 10) | (Rn << 5) | op4);
524
void ARM64XEmitter::EncodeExceptionInst(u32 instenc, u32 imm)
526
_assert_msg_(DYNA_REC, !(imm & ~0xFFFF), "%s: Exception instruction too large immediate: %d", __FUNCTION__, imm);
528
Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) | ExcEnc[instenc][2]);
531
void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt)
533
Write32((0x354 << 22) | (op0 << 19) | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5) | Rt);
536
void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
538
bool b64Bit = Is64Bit(Rd);
543
Write32((b64Bit << 31) | (flags << 29) | (ArithEnc[instenc] << 21) | \
544
(Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? (1 << 21) : 0) | (Rm << 16) | Option.GetData() | (Rn << 5) | Rd);
547
void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
549
bool b64Bit = Is64Bit(Rd);
554
Write32((b64Bit << 31) | (op << 30) | (flags << 29) | \
555
(0xD0 << 21) | (Rm << 16) | (Rn << 5) | Rd);
558
void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)
560
bool b64Bit = Is64Bit(Rn);
562
_assert_msg_(DYNA_REC, !(imm & ~0x1F), "%s: too large immediate: %d", __FUNCTION__, imm)
563
_assert_msg_(DYNA_REC, !(nzcv & ~0xF), "%s: Flags out of range: %d", __FUNCTION__, nzcv)
566
Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | \
567
(imm << 16) | (cond << 12) | (1 << 11) | (Rn << 5) | nzcv);
570
void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)
572
bool b64Bit = Is64Bit(Rm);
574
_assert_msg_(DYNA_REC, !(nzcv & ~0xF), "%s: Flags out of range: %d", __FUNCTION__, nzcv)
578
Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | \
579
(Rm << 16) | (cond << 12) | (Rn << 5) | nzcv);
582
void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
584
bool b64Bit = Is64Bit(Rd);
589
Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | \
590
(0xD4 << 21) | (Rm << 16) | (cond << 12) | (CondSelectEnc[instenc][1] << 10) | \
594
void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn)
596
bool b64Bit = Is64Bit(Rd);
600
Write32((b64Bit << 31) | (0x2D6 << 21) | \
601
(Data1SrcEnc[instenc][0] << 16) | (Data1SrcEnc[instenc][1] << 10) | \
605
void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
607
bool b64Bit = Is64Bit(Rd);
612
Write32((b64Bit << 31) | (0x0D6 << 21) | \
613
(Rm << 16) | (Data2SrcEnc[instenc] << 10) | \
617
void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
619
bool b64Bit = Is64Bit(Rd);
625
Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | \
626
(Rm << 16) | (Data3SrcEnc[instenc][1] << 15) | \
627
(Ra << 10) | (Rn << 5) | Rd);
630
void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
632
bool b64Bit = Is64Bit(Rd);
637
Write32((b64Bit << 31) | (LogicalEnc[instenc][0] << 29) | (0x5 << 25) | (LogicalEnc[instenc][1] << 21) | \
638
Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd);
641
void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm)
643
bool b64Bit = Is64Bit(Rt);
644
bool bVec = IsVector(Rt);
646
_assert_msg_(DYNA_REC, !(imm & 0xFFFFF), "%s: offset too large %d", __FUNCTION__, imm);
649
if (b64Bit && bitop != 0x2) // LDRSW(0x2) uses 64bit reg, doesn't have 64bit bit set
651
Write32((bitop << 30) | (bVec << 26) | (0x18 << 24) | (imm << 5) | Rt);
654
void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc,
655
ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt)
658
Rt2 = DecodeReg(Rt2);
661
Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) | (LoadStoreExcEnc[instenc][1] << 23) | \
662
(LoadStoreExcEnc[instenc][2] << 22) | (LoadStoreExcEnc[instenc][3] << 21) | (Rs << 16) | \
663
(LoadStoreExcEnc[instenc][4] << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
666
void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)
668
bool b64Bit = Is64Bit(Rt);
669
bool b128Bit = IsQuad(Rt);
670
bool bVec = IsVector(Rt);
679
_assert_msg_(DYNA_REC, !(imm & ~0xF), "%s: offset too large %d", __FUNCTION__, imm);
684
else if (b64Bit && bVec)
686
else if (b64Bit && !bVec)
690
Rt2 = DecodeReg(Rt2);
692
Write32((opc << 30) | (bVec << 26) | (op << 22) | (imm << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
695
void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
697
bool b64Bit = Is64Bit(Rt);
698
bool bVec = IsVector(Rt);
700
u32 offset = imm & 0x1FF;
702
_assert_msg_(DYNA_REC, !(imm < -256 || imm > 255), "%s: offset too large %d", __FUNCTION__, imm);
706
Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) | (Rn << 5) | Rt);
709
void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size)
711
bool b64Bit = Is64Bit(Rt);
712
bool bVec = IsVector(Rt);
722
_assert_msg_(DYNA_REC, ((imm >> shift) << shift) == imm, "%s(INDEX_UNSIGNED): offset must be aligned %d", __FUNCTION__, imm);
725
_assert_msg_(DYNA_REC, imm >= 0, "%s(INDEX_UNSIGNED): offset must be positive %d", __FUNCTION__, imm);
726
_assert_msg_(DYNA_REC, !(imm & ~0xFFF), "%s(INDEX_UNSIGNED): offset too large %d", __FUNCTION__, imm);
730
Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (imm << 10) | (Rn << 5) | Rt);
733
void ARM64XEmitter::EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos)
735
bool b64Bit = Is64Bit(Rd);
737
_assert_msg_(DYNA_REC, !(imm & ~0xFFFF), "%s: immediate out of range: %d", __FUNCTION__, imm);
740
Write32((b64Bit << 31) | (op << 29) | (0x25 << 23) | (pos << 21) | (imm << 5) | Rd);
743
void ARM64XEmitter::EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
745
bool b64Bit = Is64Bit(Rd);
749
Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | \
750
(immr << 16) | (imms << 10) | (Rn << 5) | Rd);
753
void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
757
ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());
759
Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (decoded_Rm << 16) | \
760
Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
763
void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd)
765
bool b64Bit = Is64Bit(Rd);
767
_assert_msg_(DYNA_REC, !(imm & ~0xFFF), "%s: immediate too large: %x", __FUNCTION__, imm);
771
Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) | \
772
(imm << 10) | (Rn << 5) | Rd);
775
void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n)
777
// Sometimes Rd is fixed to SP, but can still be 32bit or 64bit.
778
// Use Rn to determine bitness here.
779
bool b64Bit = Is64Bit(Rn);
784
Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | \
785
(immr << 16) | (imms << 10) | (Rn << 5) | Rd);
788
void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
790
bool b64Bit = Is64Bit(Rt);
804
_assert_msg_(DYNA_REC, false, "%s doesn't support INDEX_UNSIGNED!", __FUNCTION__);
817
_assert_msg_(JIT, imm >= -64 && imm <= 63, "%s recieved too large imm: %d", __FUNCTION__, imm);
820
Rt2 = DecodeReg(Rt2);
823
Write32((op << 30) | (5 << 27) | (type_encode << 23) | (load << 22) | \
824
(((uint32_t)imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
826
void ARM64XEmitter::EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm)
830
Write32((op << 31) | ((imm & 0x3) << 29) | (0x10 << 24) | \
831
((imm & 0x1FFFFC) << 3) | Rd);
834
void ARM64XEmitter::EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
836
_assert_msg_(DYNA_REC, !(imm < -256 || imm > 255), "%s received too large offset: %d", __FUNCTION__, imm);
840
Write32((size << 30) | (7 << 27) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);
843
static inline bool IsInRangeImm19(s64 distance) {
844
return (distance >= -0x40000 && distance <= 0x3FFFF);
847
static inline bool IsInRangeImm14(s64 distance) {
848
return (distance >= -0x2000 && distance <= 0x1FFF);
851
static inline bool IsInRangeImm26(s64 distance) {
852
return (distance >= -0x2000000 && distance <= 0x1FFFFFF);
855
static inline u32 MaskImm19(s64 distance) {
856
return distance & 0x7FFFF;
859
static inline u32 MaskImm14(s64 distance) {
860
return distance & 0x3FFF;
863
static inline u32 MaskImm26(s64 distance) {
864
return distance & 0x3FFFFFF;
867
// FixupBranch branching
868
void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
872
s64 distance = (s64)(m_code - branch.ptr);
881
_assert_msg_(DYNA_REC, IsInRangeImm19(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
882
bool b64Bit = Is64Bit(branch.reg);
883
ARM64Reg reg = DecodeReg(branch.reg);
884
inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | (MaskImm19(distance) << 5) | reg;
887
case 2: // B (conditional)
888
_assert_msg_(DYNA_REC, IsInRangeImm19(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
889
inst = (0x2A << 25) | (MaskImm19(distance) << 5) | branch.cond;
895
_assert_msg_(DYNA_REC, IsInRangeImm14(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
896
ARM64Reg reg = DecodeReg(branch.reg);
897
inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg;
900
case 5: // B (uncoditional)
901
_assert_msg_(DYNA_REC, IsInRangeImm26(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
902
inst = (0x5 << 26) | MaskImm26(distance);
904
case 6: // BL (unconditional)
905
_assert_msg_(DYNA_REC, IsInRangeImm26(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
906
inst = (0x25 << 26) | MaskImm26(distance);
909
*(u32*)branch.ptr = inst;
912
FixupBranch ARM64XEmitter::CBZ(ARM64Reg Rt)
921
FixupBranch ARM64XEmitter::CBNZ(ARM64Reg Rt)
930
FixupBranch ARM64XEmitter::B(CCFlags cond)
939
FixupBranch ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bit)
949
FixupBranch ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bit)
959
FixupBranch ARM64XEmitter::B()
967
FixupBranch ARM64XEmitter::BL()
976
// Compare and Branch
977
void ARM64XEmitter::CBZ(ARM64Reg Rt, const void* ptr)
979
EncodeCompareBranchInst(0, Rt, ptr);
981
void ARM64XEmitter::CBNZ(ARM64Reg Rt, const void* ptr)
983
EncodeCompareBranchInst(1, Rt, ptr);
986
// Conditional Branch
987
void ARM64XEmitter::B(CCFlags cond, const void* ptr)
989
s64 distance = (s64)ptr - (s64)m_code;
993
_assert_msg_(DYNA_REC, IsInRangeImm19(distance), "%s: Received too large distance: %p->%p %lld %llx", __FUNCTION__, m_code, ptr, distance, distance);
994
Write32((0x54 << 24) | (MaskImm19(distance) << 5) | cond);
998
void ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bits, const void* ptr)
1000
EncodeTestBranchInst(0, Rt, bits, ptr);
1002
void ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bits, const void* ptr)
1004
EncodeTestBranchInst(1, Rt, bits, ptr);
1007
// Unconditional Branch
1008
void ARM64XEmitter::B(const void* ptr)
1010
EncodeUnconditionalBranchInst(0, ptr);
1012
void ARM64XEmitter::BL(const void* ptr)
1014
EncodeUnconditionalBranchInst(1, ptr);
1017
void ARM64XEmitter::QuickCallFunction(ARM64Reg scratchreg, const void *func) {
1018
s64 distance = (s64)func - (s64)m_code;
1019
distance >>= 2; // Can only branch to opcode-aligned (4) addresses
1020
if (!IsInRangeImm26(distance)) {
1021
// WARN_LOG(DYNA_REC, "Distance too far in function call (%p to %p)! Using scratch.", m_code, func);
1022
MOVI2R(scratchreg, (uintptr_t)func);
1029
// Unconditional Branch (register)
1030
void ARM64XEmitter::BR(ARM64Reg Rn)
1032
EncodeUnconditionalBranchInst(0, 0x1F, 0, 0, Rn);
1034
void ARM64XEmitter::BLR(ARM64Reg Rn)
1036
EncodeUnconditionalBranchInst(1, 0x1F, 0, 0, Rn);
1038
void ARM64XEmitter::RET(ARM64Reg Rn)
1040
EncodeUnconditionalBranchInst(2, 0x1F, 0, 0, Rn);
1042
void ARM64XEmitter::ERET()
1044
EncodeUnconditionalBranchInst(4, 0x1F, 0, 0, SP);
1046
void ARM64XEmitter::DRPS()
1048
EncodeUnconditionalBranchInst(5, 0x1F, 0, 0, SP);
1051
// Exception generation
1052
void ARM64XEmitter::SVC(u32 imm)
1054
EncodeExceptionInst(0, imm);
1057
void ARM64XEmitter::HVC(u32 imm)
1059
EncodeExceptionInst(1, imm);
1062
void ARM64XEmitter::SMC(u32 imm)
1064
EncodeExceptionInst(2, imm);
1067
void ARM64XEmitter::BRK(u32 imm)
1069
EncodeExceptionInst(3, imm);
1072
void ARM64XEmitter::HLT(u32 imm)
1074
EncodeExceptionInst(4, imm);
1077
void ARM64XEmitter::DCPS1(u32 imm)
1079
EncodeExceptionInst(5, imm);
1082
void ARM64XEmitter::DCPS2(u32 imm)
1084
EncodeExceptionInst(6, imm);
1087
void ARM64XEmitter::DCPS3(u32 imm)
1089
EncodeExceptionInst(7, imm);
1093
void ARM64XEmitter::_MSR(PStateField field, u8 imm)
1095
u32 op1 = 0, op2 = 0;
1098
case FIELD_SPSel: op1 = 0; op2 = 5; break;
1099
case FIELD_DAIFSet: op1 = 3; op2 = 6; break;
1100
case FIELD_DAIFClr: op1 = 3; op2 = 7; break;
1102
_assert_msg_(JIT, false, "Invalid PStateField to do a imm move to");
1105
EncodeSystemInst(0, op1, 4, imm, op2, WSP);
1108
static void GetSystemReg(PStateField field, int &o0, int &op1, int &CRn, int &CRm, int &op2) {
1111
o0 = 3; op1 = 3; CRn = 4; CRm = 2; op2 = 0;
1114
o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 0;
1117
o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 1;
1120
_assert_msg_(JIT, false, "Invalid PStateField to do a register move from/to");
1125
void ARM64XEmitter::_MSR(PStateField field, ARM64Reg Rt) {
1126
int o0 = 0, op1 = 0, CRn = 0, CRm = 0, op2 = 0;
1127
_assert_msg_(JIT, Is64Bit(Rt), "MSR: Rt must be 64-bit");
1128
GetSystemReg(field, o0, op1, CRn, CRm, op2);
1129
EncodeSystemInst(o0, op1, CRn, CRm, op2, DecodeReg(Rt));
1132
void ARM64XEmitter::MRS(ARM64Reg Rt, PStateField field) {
1133
int o0 = 0, op1 = 0, CRn = 0, CRm = 0, op2 = 0;
1134
_assert_msg_(JIT, Is64Bit(Rt), "MRS: Rt must be 64-bit");
1135
GetSystemReg(field, o0, op1, CRn, CRm, op2);
1136
EncodeSystemInst(o0 | 4, op1, CRn, CRm, op2, DecodeReg(Rt));
1139
void ARM64XEmitter::HINT(SystemHint op)
1141
EncodeSystemInst(0, 3, 2, 0, op, WSP);
1143
void ARM64XEmitter::CLREX()
1145
EncodeSystemInst(0, 3, 3, 0, 2, WSP);
1147
void ARM64XEmitter::DSB(BarrierType type)
1149
EncodeSystemInst(0, 3, 3, type, 4, WSP);
1151
void ARM64XEmitter::DMB(BarrierType type)
1153
EncodeSystemInst(0, 3, 3, type, 5, WSP);
1155
void ARM64XEmitter::ISB(BarrierType type)
1157
EncodeSystemInst(0, 3, 3, type, 6, WSP);
1160
// Add/Subtract (extended register)
1161
void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1163
ADD(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1166
void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
1168
EncodeArithmeticInst(0, false, Rd, Rn, Rm, Option);
1171
void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1173
EncodeArithmeticInst(0, true, Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1176
void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
1178
EncodeArithmeticInst(0, true, Rd, Rn, Rm, Option);
1181
void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1183
SUB(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1186
void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
1188
EncodeArithmeticInst(1, false, Rd, Rn, Rm, Option);
1191
void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1193
EncodeArithmeticInst(1, true, Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1196
void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
1198
EncodeArithmeticInst(1, true, Rd, Rn, Rm, Option);
1201
void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm)
1203
CMN(Rn, Rm, ArithOption(Rn, ST_LSL, 0));
1206
void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
1208
EncodeArithmeticInst(0, true, Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Option);
1211
void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm)
1213
CMP(Rn, Rm, ArithOption(Rn, ST_LSL, 0));
1216
void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
1218
EncodeArithmeticInst(1, true, Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Option);
1221
// Add/Subtract (with carry)
1222
void ARM64XEmitter::ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1224
EncodeArithmeticCarryInst(0, false, Rd, Rn, Rm);
1226
void ARM64XEmitter::ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1228
EncodeArithmeticCarryInst(0, true, Rd, Rn, Rm);
1230
void ARM64XEmitter::SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1232
EncodeArithmeticCarryInst(1, false, Rd, Rn, Rm);
1234
void ARM64XEmitter::SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1236
EncodeArithmeticCarryInst(1, true, Rd, Rn, Rm);
1239
// Conditional Compare (immediate)
1240
void ARM64XEmitter::CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)
1242
EncodeCondCompareImmInst(0, Rn, imm, nzcv, cond);
1244
void ARM64XEmitter::CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)
1246
EncodeCondCompareImmInst(1, Rn, imm, nzcv, cond);
1249
// Conditiona Compare (register)
1250
void ARM64XEmitter::CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)
1252
EncodeCondCompareRegInst(0, Rn, Rm, nzcv, cond);
1254
void ARM64XEmitter::CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)
1256
EncodeCondCompareRegInst(1, Rn, Rm, nzcv, cond);
1259
// Conditional Select
1260
void ARM64XEmitter::CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
1262
EncodeCondSelectInst(0, Rd, Rn, Rm, cond);
1264
void ARM64XEmitter::CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
1266
EncodeCondSelectInst(1, Rd, Rn, Rm, cond);
1268
void ARM64XEmitter::CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
1270
EncodeCondSelectInst(2, Rd, Rn, Rm, cond);
1272
void ARM64XEmitter::CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
1274
EncodeCondSelectInst(3, Rd, Rn, Rm, cond);
1277
// Data-Processing 1 source
1278
void ARM64XEmitter::RBIT(ARM64Reg Rd, ARM64Reg Rn)
1280
EncodeData1SrcInst(0, Rd, Rn);
1282
void ARM64XEmitter::REV16(ARM64Reg Rd, ARM64Reg Rn)
1284
EncodeData1SrcInst(1, Rd, Rn);
1286
void ARM64XEmitter::REV32(ARM64Reg Rd, ARM64Reg Rn)
1288
EncodeData1SrcInst(2, Rd, Rn);
1290
void ARM64XEmitter::REV64(ARM64Reg Rd, ARM64Reg Rn)
1292
EncodeData1SrcInst(3, Rd, Rn);
1294
void ARM64XEmitter::CLZ(ARM64Reg Rd, ARM64Reg Rn)
1296
EncodeData1SrcInst(4, Rd, Rn);
1298
void ARM64XEmitter::CLS(ARM64Reg Rd, ARM64Reg Rn)
1300
EncodeData1SrcInst(5, Rd, Rn);
1303
// Data-Processing 2 source
1304
void ARM64XEmitter::UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1306
EncodeData2SrcInst(0, Rd, Rn, Rm);
1308
void ARM64XEmitter::SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1310
EncodeData2SrcInst(1, Rd, Rn, Rm);
1312
void ARM64XEmitter::LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1314
EncodeData2SrcInst(2, Rd, Rn, Rm);
1316
void ARM64XEmitter::LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1318
EncodeData2SrcInst(3, Rd, Rn, Rm);
1320
void ARM64XEmitter::ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1322
EncodeData2SrcInst(4, Rd, Rn, Rm);
1324
void ARM64XEmitter::RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1326
EncodeData2SrcInst(5, Rd, Rn, Rm);
1328
void ARM64XEmitter::CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1330
EncodeData2SrcInst(6, Rd, Rn, Rm);
1332
void ARM64XEmitter::CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1334
EncodeData2SrcInst(7, Rd, Rn, Rm);
1336
void ARM64XEmitter::CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1338
EncodeData2SrcInst(8, Rd, Rn, Rm);
1340
void ARM64XEmitter::CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1342
EncodeData2SrcInst(9, Rd, Rn, Rm);
1344
void ARM64XEmitter::CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1346
EncodeData2SrcInst(10, Rd, Rn, Rm);
1348
void ARM64XEmitter::CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1350
EncodeData2SrcInst(11, Rd, Rn, Rm);
1352
void ARM64XEmitter::CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1354
EncodeData2SrcInst(12, Rd, Rn, Rm);
1356
void ARM64XEmitter::CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1358
EncodeData2SrcInst(13, Rd, Rn, Rm);
1361
// Data-Processing 3 source
1362
void ARM64XEmitter::MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1364
EncodeData3SrcInst(0, Rd, Rn, Rm, Ra);
1366
void ARM64XEmitter::MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1368
EncodeData3SrcInst(1, Rd, Rn, Rm, Ra);
1370
void ARM64XEmitter::SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1372
EncodeData3SrcInst(2, Rd, Rn, Rm, Ra);
1374
void ARM64XEmitter::SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1376
SMADDL(Rd, Rn, Rm, SP);
1378
void ARM64XEmitter::SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1380
EncodeData3SrcInst(3, Rd, Rn, Rm, Ra);
1382
void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1384
EncodeData3SrcInst(4, Rd, Rn, Rm, SP);
1386
void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1388
EncodeData3SrcInst(5, Rd, Rn, Rm, Ra);
1390
void ARM64XEmitter::UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1392
UMADDL(Rd, Rn, Rm, SP);
1394
void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1396
EncodeData3SrcInst(6, Rd, Rn, Rm, Ra);
1398
void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1400
EncodeData3SrcInst(7, Rd, Rn, Rm, SP);
1402
void ARM64XEmitter::MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1404
EncodeData3SrcInst(0, Rd, Rn, Rm, SP);
1406
void ARM64XEmitter::MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1408
EncodeData3SrcInst(1, Rd, Rn, Rm, SP);
1411
// Logical (shifted register)
1412
void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
1414
EncodeLogicalInst(0, Rd, Rn, Rm, Shift);
1416
void ARM64XEmitter::BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
1418
EncodeLogicalInst(1, Rd, Rn, Rm, Shift);
1420
void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
1422
EncodeLogicalInst(2, Rd, Rn, Rm, Shift);
1424
void ARM64XEmitter::ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
1426
EncodeLogicalInst(3, Rd, Rn, Rm, Shift);
1428
void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
1430
EncodeLogicalInst(4, Rd, Rn, Rm, Shift);
1432
void ARM64XEmitter::EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
1434
EncodeLogicalInst(5, Rd, Rn, Rm, Shift);
1436
void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
1438
EncodeLogicalInst(6, Rd, Rn, Rm, Shift);
1440
void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
1442
EncodeLogicalInst(7, Rd, Rn, Rm, Shift);
1444
void ARM64XEmitter::TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
1446
ANDS(Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Shift);
1449
void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift) {
1450
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, Shift);
1453
void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm)
1455
if (IsGPR(Rd) && IsGPR(Rm)) {
1456
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, 0));
1458
_assert_msg_(JIT, false, "Non-GPRs not supported in MOV");
1461
void ARM64XEmitter::MVN(ARM64Reg Rd, ARM64Reg Rm)
1463
ORN(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, 0));
1465
void ARM64XEmitter::LSL(ARM64Reg Rd, ARM64Reg Rm, int shift)
1467
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, shift));
1469
void ARM64XEmitter::LSR(ARM64Reg Rd, ARM64Reg Rm, int shift)
1471
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSR, shift));
1473
void ARM64XEmitter::ASR(ARM64Reg Rd, ARM64Reg Rm, int shift)
1475
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_ASR, shift));
1477
void ARM64XEmitter::ROR(ARM64Reg Rd, ARM64Reg Rm, int shift)
1479
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_ROR, shift));
1482
// Logical (immediate)
1483
void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
1485
EncodeLogicalImmInst(0, Rd, Rn, immr, imms, invert);
1487
void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
1489
EncodeLogicalImmInst(3, Rd, Rn, immr, imms, invert);
1491
void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
1493
EncodeLogicalImmInst(2, Rd, Rn, immr, imms, invert);
1495
void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
1497
EncodeLogicalImmInst(1, Rd, Rn, immr, imms, invert);
1499
void ARM64XEmitter::TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert)
1501
EncodeLogicalImmInst(3, Is64Bit(Rn) ? ZR : WZR, Rn, immr, imms, invert);
1504
// Add/subtract (immediate)
1505
void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
1507
EncodeAddSubImmInst(0, false, shift, imm, Rn, Rd);
1509
void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
1511
EncodeAddSubImmInst(0, true, shift, imm, Rn, Rd);
1513
void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
1515
EncodeAddSubImmInst(1, false, shift, imm, Rn, Rd);
1517
void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
1519
EncodeAddSubImmInst(1, true, shift, imm, Rn, Rd);
1521
void ARM64XEmitter::CMP(ARM64Reg Rn, u32 imm, bool shift)
1523
EncodeAddSubImmInst(1, true, shift, imm, Rn, Is64Bit(Rn) ? SP : WSP);
1526
// Data Processing (Immediate)
1527
void ARM64XEmitter::MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos)
1529
EncodeMOVWideInst(2, Rd, imm, pos);
1531
void ARM64XEmitter::MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos)
1533
EncodeMOVWideInst(0, Rd, imm, pos);
1535
void ARM64XEmitter::MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos)
1537
EncodeMOVWideInst(3, Rd, imm, pos);
1541
void ARM64XEmitter::BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
1543
EncodeBitfieldMOVInst(1, Rd, Rn, immr, imms);
1545
void ARM64XEmitter::SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
1547
EncodeBitfieldMOVInst(0, Rd, Rn, immr, imms);
1549
void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
1551
EncodeBitfieldMOVInst(2, Rd, Rn, immr, imms);
1554
void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
1556
u32 size = Is64Bit(Rn) ? 64 : 32;
1557
_assert_msg_(DYNA_REC, (lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!",
1558
__FUNCTION__, lsb, width);
1559
EncodeBitfieldMOVInst(1, Rd, Rn, (size - lsb) % size, width - 1);
1561
void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
1563
u32 size = Is64Bit(Rn) ? 64 : 32;
1564
_assert_msg_(DYNA_REC, (lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!",
1565
__FUNCTION__, lsb, width);
1566
EncodeBitfieldMOVInst(2, Rd, Rn, (size - lsb) % size, width - 1);
1568
void ARM64XEmitter::EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift) {
1569
bool sf = Is64Bit(Rd);
1574
Write32((sf << 31) | (0x27 << 23) | (N << 22) | (Rm << 16) | (shift << 10) | (Rm << 5) | Rd);
1576
void ARM64XEmitter::SXTB(ARM64Reg Rd, ARM64Reg Rn)
1580
void ARM64XEmitter::SXTH(ARM64Reg Rd, ARM64Reg Rn)
1582
SBFM(Rd, Rn, 0, 15);
1584
void ARM64XEmitter::SXTW(ARM64Reg Rd, ARM64Reg Rn)
1586
_assert_msg_(DYNA_REC, Is64Bit(Rd), "%s requires 64bit register as destination", __FUNCTION__);
1587
SBFM(Rd, Rn, 0, 31);
1589
void ARM64XEmitter::UXTB(ARM64Reg Rd, ARM64Reg Rn)
1593
void ARM64XEmitter::UXTH(ARM64Reg Rd, ARM64Reg Rn)
1595
UBFM(Rd, Rn, 0, 15);
1598
// Load Register (Literal)
1599
void ARM64XEmitter::LDR(ARM64Reg Rt, u32 imm)
1601
EncodeLoadRegisterInst(0, Rt, imm);
1603
void ARM64XEmitter::LDRSW(ARM64Reg Rt, u32 imm)
1605
EncodeLoadRegisterInst(2, Rt, imm);
1607
void ARM64XEmitter::PRFM(ARM64Reg Rt, u32 imm)
1609
EncodeLoadRegisterInst(3, Rt, imm);
1613
void ARM64XEmitter::LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
1615
EncodeLoadStorePair(0, 1, type, Rt, Rt2, Rn, imm);
1617
void ARM64XEmitter::LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
1619
EncodeLoadStorePair(1, 1, type, Rt, Rt2, Rn, imm);
1621
void ARM64XEmitter::STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
1623
EncodeLoadStorePair(0, 0, type, Rt, Rt2, Rn, imm);
1626
// Load/Store Exclusive
1627
void ARM64XEmitter::STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1629
EncodeLoadStoreExcInst(0, Rs, SP, Rt, Rn);
1631
void ARM64XEmitter::STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1633
EncodeLoadStoreExcInst(1, Rs, SP, Rt, Rn);
1635
void ARM64XEmitter::LDXRB(ARM64Reg Rt, ARM64Reg Rn)
1637
EncodeLoadStoreExcInst(2, SP, SP, Rt, Rn);
1639
void ARM64XEmitter::LDAXRB(ARM64Reg Rt, ARM64Reg Rn)
1641
EncodeLoadStoreExcInst(3, SP, SP, Rt, Rn);
1643
void ARM64XEmitter::STLRB(ARM64Reg Rt, ARM64Reg Rn)
1645
EncodeLoadStoreExcInst(4, SP, SP, Rt, Rn);
1647
void ARM64XEmitter::LDARB(ARM64Reg Rt, ARM64Reg Rn)
1649
EncodeLoadStoreExcInst(5, SP, SP, Rt, Rn);
1651
void ARM64XEmitter::STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1653
EncodeLoadStoreExcInst(6, Rs, SP, Rt, Rn);
1655
void ARM64XEmitter::STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1657
EncodeLoadStoreExcInst(7, Rs, SP, Rt, Rn);
1659
void ARM64XEmitter::LDXRH(ARM64Reg Rt, ARM64Reg Rn)
1661
EncodeLoadStoreExcInst(8, SP, SP, Rt, Rn);
1663
void ARM64XEmitter::LDAXRH(ARM64Reg Rt, ARM64Reg Rn)
1665
EncodeLoadStoreExcInst(9, SP, SP, Rt, Rn);
1667
void ARM64XEmitter::STLRH(ARM64Reg Rt, ARM64Reg Rn)
1669
EncodeLoadStoreExcInst(10, SP, SP, Rt, Rn);
1671
void ARM64XEmitter::LDARH(ARM64Reg Rt, ARM64Reg Rn)
1673
EncodeLoadStoreExcInst(11, SP, SP, Rt, Rn);
1675
void ARM64XEmitter::STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1677
EncodeLoadStoreExcInst(12 + Is64Bit(Rt), Rs, SP, Rt, Rn);
1679
void ARM64XEmitter::STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1681
EncodeLoadStoreExcInst(14 + Is64Bit(Rt), Rs, SP, Rt, Rn);
1683
void ARM64XEmitter::STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
1685
EncodeLoadStoreExcInst(16 + Is64Bit(Rt), Rs, Rt2, Rt, Rn);
1687
void ARM64XEmitter::STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
1689
EncodeLoadStoreExcInst(18 + Is64Bit(Rt), Rs, Rt2, Rt, Rn);
1691
void ARM64XEmitter::LDXR(ARM64Reg Rt, ARM64Reg Rn)
1693
EncodeLoadStoreExcInst(20 + Is64Bit(Rt), SP, SP, Rt, Rn);
1695
void ARM64XEmitter::LDAXR(ARM64Reg Rt, ARM64Reg Rn)
1697
EncodeLoadStoreExcInst(22 + Is64Bit(Rt), SP, SP, Rt, Rn);
1699
void ARM64XEmitter::LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
1701
EncodeLoadStoreExcInst(24 + Is64Bit(Rt), SP, Rt2, Rt, Rn);
1703
void ARM64XEmitter::LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
1705
EncodeLoadStoreExcInst(26 + Is64Bit(Rt), SP, Rt2, Rt, Rn);
1707
void ARM64XEmitter::STLR(ARM64Reg Rt, ARM64Reg Rn)
1709
EncodeLoadStoreExcInst(28 + Is64Bit(Rt), SP, SP, Rt, Rn);
1711
void ARM64XEmitter::LDAR(ARM64Reg Rt, ARM64Reg Rn)
1713
EncodeLoadStoreExcInst(30 + Is64Bit(Rt), SP, SP, Rt, Rn);
1716
// Load/Store no-allocate pair (offset)
1717
void ARM64XEmitter::STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)
1719
EncodeLoadStorePairedInst(0xA0, Rt, Rt2, Rn, imm);
1721
void ARM64XEmitter::LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)
1723
EncodeLoadStorePairedInst(0xA1, Rt, Rt2, Rn, imm);
1726
// Load/Store register (immediate post-indexed)
1727
// XXX: Most of these support vectors
1728
void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1730
if (type == INDEX_UNSIGNED)
1731
EncodeLoadStoreIndexedInst(0x0E4, Rt, Rn, imm, 8);
1733
EncodeLoadStoreIndexedInst(0x0E0,
1734
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1736
void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1738
if (type == INDEX_UNSIGNED)
1739
EncodeLoadStoreIndexedInst(0x0E5, Rt, Rn, imm, 8);
1741
EncodeLoadStoreIndexedInst(0x0E1,
1742
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1744
void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1746
if (type == INDEX_UNSIGNED)
1747
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E6 : 0x0E7, Rt, Rn, imm, 8);
1749
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E2 : 0x0E3,
1750
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1752
void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1754
if (type == INDEX_UNSIGNED)
1755
EncodeLoadStoreIndexedInst(0x1E4, Rt, Rn, imm, 16);
1757
EncodeLoadStoreIndexedInst(0x1E0,
1758
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1760
void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1762
if (type == INDEX_UNSIGNED)
1763
EncodeLoadStoreIndexedInst(0x1E5, Rt, Rn, imm, 16);
1765
EncodeLoadStoreIndexedInst(0x1E1,
1766
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1768
void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1770
if (type == INDEX_UNSIGNED)
1771
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E6 : 0x1E7, Rt, Rn, imm, 16);
1773
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E2 : 0x1E3,
1774
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1776
void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1778
if (type == INDEX_UNSIGNED)
1779
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E4 : 0x2E4, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32);
1781
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E0 : 0x2E0,
1782
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1784
void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1786
if (type == INDEX_UNSIGNED)
1787
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E5 : 0x2E5, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32);
1789
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E1 : 0x2E1,
1790
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1792
void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1794
if (type == INDEX_UNSIGNED)
1795
EncodeLoadStoreIndexedInst(0x2E6, Rt, Rn, imm, 32);
1797
EncodeLoadStoreIndexedInst(0x2E2,
1798
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1801
// Load/Store register (register offset)
1802
void ARM64XEmitter::STRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
1804
EncodeLoadStoreRegisterOffset(0, 0, Rt, Rn, Rm);
1806
void ARM64XEmitter::LDRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
1808
EncodeLoadStoreRegisterOffset(0, 1, Rt, Rn, Rm);
1810
void ARM64XEmitter::LDRSB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
1812
bool b64Bit = Is64Bit(Rt);
1813
EncodeLoadStoreRegisterOffset(0, 3 - b64Bit, Rt, Rn, Rm);
1815
void ARM64XEmitter::STRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
1817
EncodeLoadStoreRegisterOffset(1, 0, Rt, Rn, Rm);
1819
void ARM64XEmitter::LDRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
1821
EncodeLoadStoreRegisterOffset(1, 1, Rt, Rn, Rm);
1823
void ARM64XEmitter::LDRSH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
1825
bool b64Bit = Is64Bit(Rt);
1826
EncodeLoadStoreRegisterOffset(1, 3 - b64Bit, Rt, Rn, Rm);
1828
void ARM64XEmitter::STR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
1830
bool b64Bit = Is64Bit(Rt);
1831
EncodeLoadStoreRegisterOffset(2 + b64Bit, 0, Rt, Rn, Rm);
1833
void ARM64XEmitter::LDR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
1835
bool b64Bit = Is64Bit(Rt);
1836
EncodeLoadStoreRegisterOffset(2 + b64Bit, 1, Rt, Rn, Rm);
1838
void ARM64XEmitter::LDRSW(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
1840
EncodeLoadStoreRegisterOffset(2, 2, Rt, Rn, Rm);
1842
void ARM64XEmitter::PRFM(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
1844
EncodeLoadStoreRegisterOffset(3, 2, Rt, Rn, Rm);
1847
// Load/Store register (unscaled offset)
1848
void ARM64XEmitter::STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1850
EncodeLoadStoreUnscaled(0, 0, Rt, Rn, imm);
1852
void ARM64XEmitter::LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1854
EncodeLoadStoreUnscaled(0, 1, Rt, Rn, imm);
1856
void ARM64XEmitter::LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1858
EncodeLoadStoreUnscaled(0, Is64Bit(Rt) ? 2 : 3, Rt, Rn, imm);
1860
void ARM64XEmitter::STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1862
EncodeLoadStoreUnscaled(1, 0, Rt, Rn, imm);
1864
void ARM64XEmitter::LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1866
EncodeLoadStoreUnscaled(1, 1, Rt, Rn, imm);
1868
void ARM64XEmitter::LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1870
EncodeLoadStoreUnscaled(1, Is64Bit(Rt) ? 2 : 3, Rt, Rn, imm);
1872
void ARM64XEmitter::STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1874
EncodeLoadStoreUnscaled(Is64Bit(Rt) ? 3 : 2, 0, Rt, Rn, imm);
1876
void ARM64XEmitter::LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1878
EncodeLoadStoreUnscaled(Is64Bit(Rt) ? 3 : 2, 1, Rt, Rn, imm);
1880
void ARM64XEmitter::LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1882
_assert_msg_(DYNA_REC, !Is64Bit(Rt), "%s must have a 64bit destination register!", __FUNCTION__);
1883
EncodeLoadStoreUnscaled(2, 2, Rt, Rn, imm);
1886
// Address of label/page PC-relative
1887
void ARM64XEmitter::ADR(ARM64Reg Rd, s32 imm)
1889
EncodeAddressInst(0, Rd, imm);
1891
void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm)
1893
EncodeAddressInst(1, Rd, imm >> 12);
1896
// LLVM is unhappy about the regular abs function, so here we go.
1897
inline int64_t abs64(int64_t x) {
1898
return x >= 0 ? x : -x;
1901
// Wrapper around MOVZ+MOVK (and later MOVN)
1902
void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
1904
unsigned int parts = Is64Bit(Rd) ? 4 : 2;
1905
BitSet32 upload_part(0);
1907
// Always start with a movz! Kills the dependency on the register.
1908
bool use_movz = true;
1912
// Zero immediate, just clear the register. EOR is pointless when we have MOVZ, which looks clearer in disasm too.
1913
MOVZ(Rd, 0, SHIFT_0);
1917
if ((Is64Bit(Rd) && imm == std::numeric_limits<u64>::max()) ||
1918
(!Is64Bit(Rd) && imm == std::numeric_limits<u32>::max()))
1920
// Max unsigned value (or if signed, -1)
1922
ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP;
1923
ORN(Rd, ZR, ZR, ArithOption(ZR, ST_LSL, 0));
1927
// TODO: Make some more systemic use of MOVN, but this will take care of most cases.
1928
// Small negative integer. Use MOVN
1929
if (!Is64Bit(Rd) && (imm | 0xFFFF0000) == imm) {
1930
MOVN(Rd, ~imm, SHIFT_0);
1935
// XXX: Use MOVN when possible.
1936
// XXX: Optimize more
1937
// XXX: Support rotating immediates to save instructions
1940
for (unsigned int i = 0; i < parts; ++i)
1942
if ((imm >> (i * 16)) & 0xFFFF)
1947
u64 aligned_pc = (u64)GetCodePointer() & ~0xFFF;
1948
s64 aligned_offset = (s64)imm - (s64)aligned_pc;
1949
if (upload_part.Count() > 1 && abs64(aligned_offset) < 0xFFFFFFFFLL)
1951
// Immediate we are loading is within 4GB of our aligned range
1952
// Most likely a address that we can load in one or two instructions
1953
if (!(abs64(aligned_offset) & 0xFFF))
1956
ADRP(Rd, (s32)aligned_offset);
1961
// If the address is within 1MB of PC we can load it in a single instruction still
1962
s64 offset = (s64)imm - (s64)GetCodePointer();
1963
if (offset >= -0xFFFFF && offset <= 0xFFFFF)
1965
ADR(Rd, (s32)offset);
1970
ADRP(Rd, (s32)(aligned_offset & ~0xFFF));
1971
ADD(Rd, Rd, imm & 0xFFF);
1977
for (unsigned i = 0; i < parts; ++i)
1979
if (use_movz && upload_part[i])
1981
MOVZ(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i);
1986
if (upload_part[i] || !optimize)
1987
MOVK(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i);
1992
void ARM64XEmitter::PUSH(ARM64Reg Rd) {
1993
STR(INDEX_PRE, Rd, SP, -16);
1996
void ARM64XEmitter::POP(ARM64Reg Rd) {
1997
LDR(INDEX_POST, Rd, SP, 16);
2000
void ARM64XEmitter::PUSH2(ARM64Reg Rd, ARM64Reg Rn) {
2001
STP(INDEX_PRE, Rd, Rn, SP, -16);
2003
void ARM64XEmitter::POP2(ARM64Reg Rd, ARM64Reg Rn) {
2004
LDP(INDEX_POST, Rd, Rn, SP, 16);
2008
void ARM64XEmitter::ABI_PushRegisters(BitSet32 registers)
2010
int num_regs = registers.Count();
2016
// Stack is required to be quad-word aligned.
2017
u32 stack_size = ROUND_UP(num_regs * 8, 16);
2018
u32 current_offset = 0;
2019
std::vector<ARM64Reg> reg_pair;
2021
for (auto it : registers)
2025
STR(INDEX_PRE, (ARM64Reg)(X0 + it), SP, -(s32)stack_size);
2027
current_offset += 16;
2031
reg_pair.push_back((ARM64Reg)(X0 + it));
2032
if (reg_pair.size() == 2)
2034
STP(INDEX_UNSIGNED, reg_pair[0], reg_pair[1], SP, current_offset);
2036
current_offset += 16;
2043
std::vector<ARM64Reg> reg_pair;
2045
for (auto it : registers)
2047
reg_pair.push_back((ARM64Reg)(X0 + it));
2048
if (reg_pair.size() == 2)
2050
STP(INDEX_PRE, reg_pair[0], reg_pair[1], SP, -16);
2057
void ARM64XEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
2059
int num_regs = registers.Count();
2065
std::vector<ARM64Reg> reg_pair;
2067
for (auto it : registers)
2069
if (ignore_mask[it])
2074
LDR(INDEX_POST, (ARM64Reg)(X0 + it), SP, 16);
2079
reg_pair.push_back((ARM64Reg)(X0 + it));
2080
if (reg_pair.size() == 2)
2082
LDP(INDEX_POST, reg_pair[0], reg_pair[1], SP, 16);
2090
std::vector<ARM64Reg> reg_pair;
2092
for (int i = 31; i >= 0; --i)
2099
if (ignore_mask[reg])
2102
reg_pair.push_back((ARM64Reg)(X0 + reg));
2103
if (reg_pair.size() == 2)
2105
LDP(INDEX_POST, reg_pair[1], reg_pair[0], SP, 16);
2113
void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2117
u32 encoded_size = 0;
2118
u32 encoded_imm = 0;
2122
else if (size == 16)
2124
else if (size == 32)
2126
else if (size == 64)
2128
else if (size == 128)
2131
if (type == INDEX_UNSIGNED)
2133
_assert_msg_(DYNA_REC, !(imm & ((size - 1) >> 3)), "%s(INDEX_UNSIGNED) immediate offset must be aligned to size! (%d) (%p)", __FUNCTION__, imm, m_emit->GetCodePointer());
2134
_assert_msg_(DYNA_REC, imm >= 0, "%s(INDEX_UNSIGNED) immediate offset must be positive!", __FUNCTION__);
2137
else if (size == 32)
2139
else if (size == 64)
2141
else if (size == 128)
2143
encoded_imm = (imm & 0xFFF);
2147
_assert_msg_(DYNA_REC, !(imm < -256 || imm > 255), "%s immediate offset must be within range of -256 to 256!", __FUNCTION__);
2148
encoded_imm = (imm & 0x1FF) << 2;
2149
if (type == INDEX_POST)
2155
Write32((encoded_size << 30) | (0xF << 26) | (type == INDEX_UNSIGNED ? (1 << 24) : 0) | \
2156
(size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt);
2159
void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2161
_assert_msg_(DYNA_REC, !IsQuad(Rd), "%s only supports double and single registers!", __FUNCTION__);
2166
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (Rm << 16) | \
2167
(opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
2170
void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2172
_assert_msg_(DYNA_REC, !IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
2173
bool quad = IsQuad(Rd);
2178
Write32((quad << 30) | (U << 29) | (0x71 << 21) | (size << 22) | \
2179
(Rm << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
2182
void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn)
2187
Write32((Q << 30) | (op << 29) | (0x7 << 25) | (imm5 << 16) | (imm4 << 11) | \
2188
(1 << 10) | (Rn << 5) | Rd);
2191
void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
2193
_assert_msg_(DYNA_REC, !IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
2197
Write32((Q << 30) | (U << 29) | (0x71 << 21) | (size << 22) | \
2198
(opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
2201
void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn)
2203
_assert_msg_(DYNA_REC, !IsSingle(Rt), "%s doesn't support singles!", __FUNCTION__);
2204
bool quad = IsQuad(Rt);
2208
Write32((quad << 30) | (0xD << 24) | (L << 22) | (R << 21) | (opcode << 13) | \
2209
(S << 12) | (size << 10) | (Rn << 5) | Rt);
2212
void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2214
_assert_msg_(DYNA_REC, !IsSingle(Rt), "%s doesn't support singles!", __FUNCTION__);
2215
bool quad = IsQuad(Rt);
2220
Write32((quad << 30) | (0x1B << 23) | (L << 22) | (R << 21) | (Rm << 16) | \
2221
(opcode << 13) | (S << 12) | (size << 10) | (Rn << 5) | Rt);
2224
void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
2226
_assert_msg_(DYNA_REC, !IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
2230
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | \
2231
(1 << 14) | (Rn << 5) | Rd);
2234
void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
2236
_assert_msg_(DYNA_REC, Rn <= SP, "%s only supports GPR as source!", __FUNCTION__);
2240
Write32((sf << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (rmode << 19) | \
2241
(opcode << 16) | (Rn << 5) | Rd);
2244
void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign)
2246
_dbg_assert_msg_(JIT, IsScalar(Rn), "fcvts: Rn must be floating point");
2248
// Use the encoding that transfers the result to a GPR.
2249
bool sf = Is64Bit(Rd);
2250
int type = IsDouble(Rn) ? 1 : 0;
2253
int opcode = (sign ? 1 : 0);
2256
case ROUND_A: rmode = 0; opcode |= 4; break;
2257
case ROUND_P: rmode = 1; break;
2258
case ROUND_M: rmode = 2; break;
2259
case ROUND_Z: rmode = 3; break;
2260
case ROUND_N: rmode = 0; break;
2262
EmitConversion2(sf, 0, true, type, rmode, opcode, 0, Rd, Rn);
2266
// Use the encoding (vector, single) that keeps the result in the fp register.
2267
int sz = IsDouble(Rn);
2272
case ROUND_A: opcode = 0x1C; break;
2273
case ROUND_N: opcode = 0x1A; break;
2274
case ROUND_M: opcode = 0x1B; break;
2275
case ROUND_P: opcode = 0x1A; sz |= 2; break;
2276
case ROUND_Z: opcode = 0x1B; sz |= 2; break;
2278
Write32((0x5E << 24) | (sign << 29) | (sz << 22) | (1 << 21) | (opcode << 12) | (2 << 10) | (Rn << 5) | Rd);
2282
void ARM64FloatEmitter::FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {
2283
EmitConvertScalarToInt(Rd, Rn, round, false);
2286
void ARM64FloatEmitter::FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {
2287
EmitConvertScalarToInt(Rd, Rn, round, true);
2290
void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn)
2295
Write32((sf << 31) | (S << 29) | (0xF0 << 21) | (direction << 21) | (type << 22) | (rmode << 19) | \
2296
(opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
2299
void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm)
2301
_assert_msg_(DYNA_REC, !IsQuad(Rn), "%s doesn't support vector!", __FUNCTION__);
2302
bool is_double = IsDouble(Rn);
2307
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | \
2308
(op << 14) | (1 << 13) | (Rn << 5) | opcode2);
2311
void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2313
_assert_msg_(DYNA_REC, !IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
2314
bool is_double = IsDouble(Rd);
2320
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | \
2321
(cond << 12) | (3 << 10) | (Rn << 5) | Rd);
2324
void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2326
_assert_msg_(DYNA_REC, !IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
2328
bool quad = IsQuad(Rd);
2330
u32 encoded_size = 0;
2333
else if (size == 32)
2335
else if (size == 64)
2342
Write32((quad << 30) | (7 << 25) | (encoded_size << 22) | (Rm << 16) | (op << 12) | \
2343
(1 << 11) | (Rn << 5) | Rd);
2346
void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8)
2348
_assert_msg_(DYNA_REC, !IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
2350
bool is_double = !IsSingle(Rd);
2354
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (type << 22) | \
2355
(imm8 << 13) | (1 << 12) | (imm5 << 5) | Rd);
2358
void ARM64FloatEmitter::EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
2360
_assert_msg_(DYNA_REC, immh, "%s bad encoding! Can't have zero immh", __FUNCTION__);
2365
Write32((Q << 30) | (U << 29) | (0xF << 24) | (immh << 19) | (immb << 16) | \
2366
(opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
2369
void ARM64FloatEmitter::EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {
2373
Write32((2 << 30) | (U << 29) | (0x3E << 23) | (immh << 19) | (immb << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
2376
void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn)
2378
bool quad = IsQuad(Rt);
2379
u32 encoded_size = 0;
2383
else if (size == 32)
2385
else if (size == 64)
2391
Write32((quad << 30) | (3 << 26) | (L << 22) | (opcode << 12) | \
2392
(encoded_size << 10) | (Rn << 5) | Rt);
2395
void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2397
bool quad = IsQuad(Rt);
2398
u32 encoded_size = 0;
2402
else if (size == 32)
2404
else if (size == 64)
2411
Write32((quad << 30) | (0x19 << 23) | (L << 22) | (Rm << 16) | (opcode << 12) | \
2412
(encoded_size << 10) | (Rn << 5) | Rt);
2416
void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
2418
_assert_msg_(DYNA_REC, !IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
2423
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | \
2424
(opcode << 15) | (1 << 14) | (Rn << 5) | Rd);
2427
void ARM64FloatEmitter::EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2429
bool quad = IsQuad(Rd);
2435
Write32((quad << 30) | (U << 29) | (0xF << 24) | (size << 22) | (L << 21) | \
2436
(Rm << 16) | (opcode << 12) | (H << 11) | (Rn << 5) | Rd);
2439
void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2441
_assert_msg_(DYNA_REC, !(imm < -256 || imm > 255), "%s received too large offset: %d", __FUNCTION__, imm);
2445
Write32((size << 30) | (0xF << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);
2448
void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
2450
u32 type_encode = 0;
2464
case INDEX_UNSIGNED:
2465
_assert_msg_(DYNA_REC, false, "%s doesn't support INDEX_UNSIGNED!", __FUNCTION__);
2471
_assert_msg_(DYNA_REC, !(imm & 0xF), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
2475
else if (size == 64)
2477
_assert_msg_(DYNA_REC, !(imm & 0x7), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
2481
else if (size == 32)
2483
_assert_msg_(DYNA_REC, !(imm & 0x3), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
2489
Rt2 = DecodeReg(Rt2);
2492
Write32((opc << 30) | (0xB << 26) | (type_encode << 23) | (load << 22) | \
2493
((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
2497
void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
2499
_assert_msg_(DYNA_REC, Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, "%s must contain an extended reg as Rm!", __FUNCTION__);
2501
u32 encoded_size = 0;
2509
else if (size == 16)
2514
else if (size == 32)
2519
else if (size == 64)
2524
else if (size == 128)
2535
ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());
2537
Write32((encoded_size << 30) | (encoded_op << 22) | (0x1E1 << 21) | (decoded_Rm << 16) | \
2538
Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
2541
void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2543
EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm);
2545
void ARM64FloatEmitter::STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2547
EmitLoadStoreImmediate(size, 0, type, Rt, Rn, imm);
2550
// Loadstore unscaled
2551
void ARM64FloatEmitter::LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2553
u32 encoded_size = 0;
2561
else if (size == 16)
2566
else if (size == 32)
2571
else if (size == 64)
2576
else if (size == 128)
2582
EmitLoadStoreUnscaled(encoded_size, encoded_op, Rt, Rn, imm);
2584
void ARM64FloatEmitter::STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2586
u32 encoded_size = 0;
2594
else if (size == 16)
2599
else if (size == 32)
2604
else if (size == 64)
2609
else if (size == 128)
2615
EmitLoadStoreUnscaled(encoded_size, encoded_op, Rt, Rn, imm);
2619
// Loadstore single structure
2620
void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)
2624
u32 encoded_size = 0;
2625
ARM64Reg encoded_reg = INVALID_REG;
2629
S = (index & 4) != 0;
2631
encoded_size = index & 3;
2633
encoded_reg = EncodeRegToQuad(Rt);
2635
encoded_reg = EncodeRegToDouble(Rt);
2638
else if (size == 16)
2640
S = (index & 2) != 0;
2642
encoded_size = (index & 1) << 1;
2644
encoded_reg = EncodeRegToQuad(Rt);
2646
encoded_reg = EncodeRegToDouble(Rt);
2649
else if (size == 32)
2651
S = (index & 1) != 0;
2655
encoded_reg = EncodeRegToQuad(Rt);
2657
encoded_reg = EncodeRegToDouble(Rt);
2659
else if (size == 64)
2665
encoded_reg = EncodeRegToQuad(Rt);
2667
encoded_reg = EncodeRegToDouble(Rt);
2670
EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn);
2673
void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm)
2677
u32 encoded_size = 0;
2678
ARM64Reg encoded_reg = INVALID_REG;
2682
S = (index & 4) != 0;
2684
encoded_size = index & 3;
2686
encoded_reg = EncodeRegToQuad(Rt);
2688
encoded_reg = EncodeRegToDouble(Rt);
2691
else if (size == 16)
2693
S = (index & 2) != 0;
2695
encoded_size = (index & 1) << 1;
2697
encoded_reg = EncodeRegToQuad(Rt);
2699
encoded_reg = EncodeRegToDouble(Rt);
2702
else if (size == 32)
2704
S = (index & 1) != 0;
2708
encoded_reg = EncodeRegToQuad(Rt);
2710
encoded_reg = EncodeRegToDouble(Rt);
2712
else if (size == 64)
2718
encoded_reg = EncodeRegToQuad(Rt);
2720
encoded_reg = EncodeRegToDouble(Rt);
2723
EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);
2726
void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn)
2728
EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn);
2730
void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn)
2732
EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn);
2734
void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2736
EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn, Rm);
2738
void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2740
EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn, Rm);
2743
void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)
2747
u32 encoded_size = 0;
2748
ARM64Reg encoded_reg = INVALID_REG;
2752
S = (index & 4) != 0;
2754
encoded_size = index & 3;
2756
encoded_reg = EncodeRegToQuad(Rt);
2758
encoded_reg = EncodeRegToDouble(Rt);
2761
else if (size == 16)
2763
S = (index & 2) != 0;
2765
encoded_size = (index & 1) << 1;
2767
encoded_reg = EncodeRegToQuad(Rt);
2769
encoded_reg = EncodeRegToDouble(Rt);
2772
else if (size == 32)
2774
S = (index & 1) != 0;
2778
encoded_reg = EncodeRegToQuad(Rt);
2780
encoded_reg = EncodeRegToDouble(Rt);
2782
else if (size == 64)
2788
encoded_reg = EncodeRegToQuad(Rt);
2790
encoded_reg = EncodeRegToDouble(Rt);
2793
EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn);
2796
void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm)
2800
u32 encoded_size = 0;
2801
ARM64Reg encoded_reg = INVALID_REG;
2805
S = (index & 4) != 0;
2807
encoded_size = index & 3;
2809
encoded_reg = EncodeRegToQuad(Rt);
2811
encoded_reg = EncodeRegToDouble(Rt);
2814
else if (size == 16)
2816
S = (index & 2) != 0;
2818
encoded_size = (index & 1) << 1;
2820
encoded_reg = EncodeRegToQuad(Rt);
2822
encoded_reg = EncodeRegToDouble(Rt);
2825
else if (size == 32)
2827
S = (index & 1) != 0;
2831
encoded_reg = EncodeRegToQuad(Rt);
2833
encoded_reg = EncodeRegToDouble(Rt);
2835
else if (size == 64)
2841
encoded_reg = EncodeRegToQuad(Rt);
2843
encoded_reg = EncodeRegToDouble(Rt);
2846
EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);
2849
// Loadstore multiple structure
2850
void ARM64FloatEmitter::LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
2852
_assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
2856
else if (count == 2)
2858
else if (count == 3)
2860
else if (count == 4)
2862
EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn);
2864
void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2866
_assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
2867
_assert_msg_(DYNA_REC, type == INDEX_POST, "%s only supports post indexing!", __FUNCTION__);
2872
else if (count == 2)
2874
else if (count == 3)
2876
else if (count == 4)
2878
EmitLoadStoreMultipleStructurePost(size, 1, opcode, Rt, Rn, Rm);
2880
void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
2882
_assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
2886
else if (count == 2)
2888
else if (count == 3)
2890
else if (count == 4)
2892
EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn);
2894
void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2896
_assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
2897
_assert_msg_(DYNA_REC, type == INDEX_POST, "%s only supports post indexing!", __FUNCTION__);
2902
else if (count == 2)
2904
else if (count == 3)
2906
else if (count == 4)
2908
EmitLoadStoreMultipleStructurePost(size, 0, opcode, Rt, Rn, Rm);
2911
// Scalar - 1 Source
2912
void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top)
2914
if (IsScalar(Rd) && IsScalar(Rn)) {
2915
EmitScalar1Source(0, 0, IsDouble(Rd), 0, Rd, Rn);
2917
_assert_msg_(JIT, !IsQuad(Rd) && !IsQuad(Rn), "FMOV can't move to/from quads");
2921
if (IsSingle(Rd) && !Is64Bit(Rn) && !top) {
2922
// GPR to scalar single
2924
} else if (!Is64Bit(Rd) && IsSingle(Rn) && !top) {
2925
// Scalar single to GPR - defaults are correct
2928
_assert_msg_(JIT, 0, "FMOV: Unhandled case");
2932
Write32((sf << 31) | (0x1e2 << 20) | (rmode << 19) | (opcode << 16) | (Rn << 5) | Rd);
2937
void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
2939
EncodeLoadStorePair(size, true, type, Rt, Rt2, Rn, imm);
2941
void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
2943
EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm);
2946
// Loadstore register offset
2947
void ARM64FloatEmitter::STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
2949
EncodeLoadStoreRegisterOffset(size, false, Rt, Rn, Rm);
2951
void ARM64FloatEmitter::LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
2953
EncodeLoadStoreRegisterOffset(size, true, Rt, Rn, Rm);
2956
void ARM64FloatEmitter::FABS(ARM64Reg Rd, ARM64Reg Rn)
2958
EmitScalar1Source(0, 0, IsDouble(Rd), 1, Rd, Rn);
2960
void ARM64FloatEmitter::FNEG(ARM64Reg Rd, ARM64Reg Rn)
2962
EmitScalar1Source(0, 0, IsDouble(Rd), 2, Rd, Rn);
2964
void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)
2966
EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn);
2970
// Scalar - 2 Source
2971
void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2973
EmitScalar2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm);
2975
void ARM64FloatEmitter::FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2977
EmitScalar2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm);
2979
void ARM64FloatEmitter::FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2981
EmitScalar2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm);
2983
void ARM64FloatEmitter::FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2985
EmitScalar2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm);
2987
void ARM64FloatEmitter::FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2989
EmitScalar2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm);
2991
void ARM64FloatEmitter::FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2993
EmitScalar2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm);
2995
void ARM64FloatEmitter::FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2997
EmitScalar2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm);
2999
void ARM64FloatEmitter::FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3001
EmitScalar2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm);
3003
void ARM64FloatEmitter::FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3005
EmitScalar2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm);
3008
void ARM64FloatEmitter::FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
3009
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0);
3011
void ARM64FloatEmitter::FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
3012
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1);
3014
void ARM64FloatEmitter::FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
3015
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2);
3017
void ARM64FloatEmitter::FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
3018
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3);
3021
void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) {
3022
int type = isDouble ? 1 : 0;
3027
int o1 = opcode >> 1;
3028
int o0 = opcode & 1;
3029
m_emit->Write32((0x1F << 24) | (type << 22) | (o1 << 21) | (Rm << 16) | (o0 << 15) | (Ra << 10) | (Rn << 5) | Rd);
3032
// Scalar floating point immediate
3033
void ARM64FloatEmitter::FMOV(ARM64Reg Rd, uint8_t imm8)
3035
EmitScalarImm(0, 0, 0, 0, Rd, imm8);
3039
void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3041
EmitThreeSame(0, 0, 3, Rd, Rn, Rm);
3043
void ARM64FloatEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3045
EmitThreeSame(1, 0, 3, Rd, Rn, Rm);
3047
void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3049
EmitThreeSame(1, 1, 3, Rd, Rn, Rm);
3051
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
3060
else if (size == 16)
3065
else if (size == 32)
3070
else if (size == 64)
3076
EmitCopy(IsQuad(Rd), 0, imm5, 0, Rd, Rn);
3078
void ARM64FloatEmitter::FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3080
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xF, Rd, Rn);
3082
void ARM64FloatEmitter::FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3084
EmitThreeSame(0, size >> 6, 0x1A, Rd, Rn, Rm);
3086
void ARM64FloatEmitter::FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3088
EmitThreeSame(0, size >> 6, 0x1E, Rd, Rn, Rm);
3090
void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3092
EmitThreeSame(0, size >> 6, 0x19, Rd, Rn, Rm);
3094
void ARM64FloatEmitter::FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3096
EmitThreeSame(0, 2 | size >> 6, 0x1E, Rd, Rn, Rm);
3098
void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3100
Emit2RegMisc(false, 0, size >> 6, 0x17, Rd, Rn);
3102
void ARM64FloatEmitter::FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3104
Emit2RegMisc(true, 0, size >> 6, 0x17, Rd, Rn);
3106
void ARM64FloatEmitter::FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3108
Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 5, 0x16, Rd, Rn);
3110
void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3112
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x1B, Rd, Rn);
3114
void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3116
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1B, Rd, Rn);
3118
void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3120
EmitThreeSame(1, size >> 6, 0x1F, Rd, Rn, Rm);
3122
void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3124
EmitThreeSame(1, size >> 6, 0x1B, Rd, Rn, Rm);
3126
void ARM64FloatEmitter::UMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3128
EmitThreeSame(1, EncodeSize(size), 0xD, Rd, Rn, Rm);
3130
void ARM64FloatEmitter::UMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3132
EmitThreeSame(1, EncodeSize(size), 0xC, Rd, Rn, Rm);
3134
void ARM64FloatEmitter::SMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3136
EmitThreeSame(0, EncodeSize(size), 0xD, Rd, Rn, Rm);
3138
void ARM64FloatEmitter::SMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3140
EmitThreeSame(0, EncodeSize(size), 0xC, Rd, Rn, Rm);
3142
void ARM64FloatEmitter::FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3144
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xF, Rd, Rn);
3146
void ARM64FloatEmitter::FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3148
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1D, Rd, Rn);
3150
void ARM64FloatEmitter::FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3152
EmitThreeSame(0, 2 | (size >> 6), 0x1A, Rd, Rn, Rm);
3154
void ARM64FloatEmitter::FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3156
EmitThreeSame(0, 2 | (size >> 6), 0x19, Rd, Rn, Rm);
3158
void ARM64FloatEmitter::NOT(ARM64Reg Rd, ARM64Reg Rn)
3160
Emit2RegMisc(IsQuad(Rd), 1, 0, 5, Rd, Rn);
3162
void ARM64FloatEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3164
EmitThreeSame(0, 2, 3, Rd, Rn, Rm);
3166
void ARM64FloatEmitter::REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3168
Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 1, Rd, Rn);
3170
void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3172
Emit2RegMisc(IsQuad(Rd), 1, size >> 4, 0, Rd, Rn);
3174
void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3176
Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 0, Rd, Rn);
3178
void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3180
Emit2RegMisc(IsQuad(Rd), 0, size >> 6, 0x1D, Rd, Rn);
3182
void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3184
Emit2RegMisc(IsQuad(Rd), 1, size >> 6, 0x1D, Rd, Rn);
3186
void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)
3188
int imm = size * 2 - scale;
3189
EmitShiftImm(IsQuad(Rd), 0, imm >> 3, imm & 7, 0x1C, Rd, Rn);
3191
void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)
3193
int imm = size * 2 - scale;
3194
EmitShiftImm(IsQuad(Rd), 1, imm >> 3, imm & 7, 0x1C, Rd, Rn);
3196
void ARM64FloatEmitter::SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3198
Emit2RegMisc(false, 0, dest_size >> 4, 0x14, Rd, Rn);
3200
void ARM64FloatEmitter::SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3202
Emit2RegMisc(true, 0, dest_size >> 4, 0x14, Rd, Rn);
3204
void ARM64FloatEmitter::UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3206
Emit2RegMisc(false, 1, dest_size >> 4, 0x14, Rd, Rn);
3208
void ARM64FloatEmitter::UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3210
Emit2RegMisc(true, 1, dest_size >> 4, 0x14, Rd, Rn);
3212
void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3214
Emit2RegMisc(false, 0, dest_size >> 4, 0x12, Rd, Rn);
3216
void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3218
Emit2RegMisc(true, 0, dest_size >> 4, 0x12, Rd, Rn);
3222
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3228
else if (size == 16)
3230
else if (size == 32)
3232
else if (size == 64)
3235
EmitCopy(IsQuad(Rd), 0, imm5, 1, Rd, Rn);
3238
void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn)
3247
else if (size == 16)
3252
else if (size == 32)
3257
else if (size == 64)
3263
EmitCopy(1, 0, imm5, 3, Rd, Rn);
3265
void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2)
3267
u32 imm5 = 0, imm4 = 0;
3272
imm5 |= index1 << 1;
3275
else if (size == 16)
3278
imm5 |= index1 << 2;
3281
else if (size == 32)
3284
imm5 |= index1 << 3;
3287
else if (size == 64)
3290
imm5 |= index1 << 4;
3294
EmitCopy(1, 1, imm5, imm4, Rd, Rn);
3297
void ARM64FloatEmitter::UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
3299
bool b64Bit = Is64Bit(Rd);
3300
_assert_msg_(DYNA_REC, Rd < SP, "%s destination must be a GPR!", __FUNCTION__);
3301
_assert_msg_(DYNA_REC, !(b64Bit && size != 64), "%s must have a size of 64 when destination is 64bit!", __FUNCTION__);
3309
else if (size == 16)
3314
else if (size == 32)
3319
else if (size == 64)
3325
EmitCopy(b64Bit, 0, imm5, 7, Rd, Rn);
3327
void ARM64FloatEmitter::SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
3329
bool b64Bit = Is64Bit(Rd);
3330
_assert_msg_(DYNA_REC, Rd < SP, "%s destination must be a GPR!", __FUNCTION__);
3331
_assert_msg_(DYNA_REC, size != 64, "%s doesn't support 64bit destination. Use UMOV!", __FUNCTION__);
3339
else if (size == 16)
3344
else if (size == 32)
3350
EmitCopy(b64Bit, 0, imm5, 5, Rd, Rn);
3354
void ARM64FloatEmitter::FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn)
3356
u32 dst_encoding = 0;
3357
u32 src_encoding = 0;
3361
else if (size_to == 32)
3363
else if (size_to == 64)
3366
if (size_from == 16)
3368
else if (size_from == 32)
3370
else if (size_from == 64)
3373
Emit1Source(0, 0, src_encoding, 4 | dst_encoding, Rd, Rn);
3376
void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn)
3379
// Source is in FP register (like destination!). We must use a vector encoding.
3383
int sz = IsDouble(Rn);
3384
Write32((0x5e << 24) | (sign << 29) | (sz << 22) | (0x876 << 10) | (Rn << 5) | Rd);
3386
bool sf = Is64Bit(Rn);
3390
EmitConversion(sf, 0, type, 0, 2, Rd, Rn);
3394
void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn)
3397
// Source is in FP register (like destination!). We must use a vector encoding.
3401
int sz = IsDouble(Rn);
3402
Write32((0x5e << 24) | (sign << 29) | (sz << 22) | (0x876 << 10) | (Rn << 5) | Rd);
3404
bool sf = Is64Bit(Rn);
3409
EmitConversion(sf, 0, type, 0, 3, Rd, Rn);
3413
void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
3415
bool sf = Is64Bit(Rn);
3420
EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);
3423
void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
3425
bool sf = Is64Bit(Rn);
3430
EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);
3433
void ARM64FloatEmitter::FCMP(ARM64Reg Rn, ARM64Reg Rm)
3435
EmitCompare(0, 0, 0, 0, Rn, Rm);
3437
void ARM64FloatEmitter::FCMP(ARM64Reg Rn)
3439
EmitCompare(0, 0, 0, 8, Rn, (ARM64Reg)0);
3441
void ARM64FloatEmitter::FCMPE(ARM64Reg Rn, ARM64Reg Rm)
3443
EmitCompare(0, 0, 0, 0x10, Rn, Rm);
3445
void ARM64FloatEmitter::FCMPE(ARM64Reg Rn)
3447
EmitCompare(0, 0, 0, 0x18, Rn, (ARM64Reg)0);
3449
void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3451
EmitThreeSame(0, size >> 6, 0x1C, Rd, Rn, Rm);
3453
void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3455
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xD, Rd, Rn);
3457
void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3459
EmitThreeSame(1, size >> 6, 0x1C, Rd, Rn, Rm);
3461
void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3463
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xC, Rd, Rn);
3465
void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3467
EmitThreeSame(1, 2 | (size >> 6), 0x1C, Rd, Rn, Rm);
3469
void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3471
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x0C, Rd, Rn);
3473
void ARM64FloatEmitter::FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3475
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xD, Rd, Rn);
3477
void ARM64FloatEmitter::FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3479
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xE, Rd, Rn);
3482
void ARM64FloatEmitter::FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
3484
EmitCondSelect(0, 0, cond, Rd, Rn, Rm);
3488
void ARM64FloatEmitter::UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3490
EmitPermute(size, 1, Rd, Rn, Rm);
3492
void ARM64FloatEmitter::TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3494
EmitPermute(size, 2, Rd, Rn, Rm);
3496
void ARM64FloatEmitter::ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3498
EmitPermute(size, 3, Rd, Rn, Rm);
3500
void ARM64FloatEmitter::UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3502
EmitPermute(size, 5, Rd, Rn, Rm);
3504
void ARM64FloatEmitter::TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3506
EmitPermute(size, 6, Rd, Rn, Rm);
3508
void ARM64FloatEmitter::ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3510
EmitPermute(size, 7, Rd, Rn, Rm);
3513
// Shift by immediate
3514
void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3516
SSHLL(src_size, Rd, Rn, shift, false);
3518
void ARM64FloatEmitter::SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3520
SSHLL(src_size, Rd, Rn, shift, true);
3522
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3524
SHRN(dest_size, Rd, Rn, shift, false);
3526
void ARM64FloatEmitter::SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3528
SHRN(dest_size, Rd, Rn, shift, true);
3530
void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3532
USHLL(src_size, Rd, Rn, shift, false);
3534
void ARM64FloatEmitter::USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3536
USHLL(src_size, Rd, Rn, shift, true);
3538
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
3540
SXTL(src_size, Rd, Rn, false);
3542
void ARM64FloatEmitter::SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
3544
SXTL(src_size, Rd, Rn, true);
3546
void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
3548
UXTL(src_size, Rd, Rn, false);
3550
void ARM64FloatEmitter::UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
3552
UXTL(src_size, Rd, Rn, true);
3555
static u32 EncodeImmShiftLeft(u8 src_size, u32 shift) {
3556
return src_size + shift;
3559
static u32 EncodeImmShiftRight(u8 src_size, u32 shift) {
3560
return src_size * 2 - shift;
3563
void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
3565
_assert_msg_(DYNA_REC, shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__);
3566
u32 imm = EncodeImmShiftLeft(src_size, shift);
3567
EmitShiftImm(upper, 0, imm >> 3, imm & 7, 0x14, Rd, Rn);
3570
void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
3572
_assert_msg_(DYNA_REC, shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__);
3573
u32 imm = EncodeImmShiftLeft(src_size, shift);
3574
EmitShiftImm(upper, 1, imm >> 3, imm & 7, 0x14, Rd, Rn);
3577
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
3579
_assert_msg_(DYNA_REC, shift > 0, "%s shift amount must be greater than zero!", __FUNCTION__);
3580
_assert_msg_(DYNA_REC, shift <= dest_size, "%s shift amount must less than or equal to the element size!", __FUNCTION__);
3581
u32 imm = EncodeImmShiftRight(dest_size, shift);
3582
EmitShiftImm(upper, 0, imm >> 3, imm & 7, 0x10, Rd, Rn);
3585
void ARM64FloatEmitter::SHL(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3586
_assert_msg_(DYNA_REC, shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
3587
u32 imm = EncodeImmShiftLeft(dest_size, shift);
3588
EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0xA, Rd, Rn);
3591
void ARM64FloatEmitter::USHR(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3592
_assert_msg_(DYNA_REC, shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
3593
u32 imm = EncodeImmShiftRight(dest_size, shift);
3594
EmitShiftImm(IsQuad(Rd), true, imm >> 3, imm & 7, 0x0, Rd, Rn);
3597
void ARM64FloatEmitter::SSHR(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3598
_assert_msg_(DYNA_REC, shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
3599
u32 imm = EncodeImmShiftRight(dest_size, shift);
3600
EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0x0, Rd, Rn);
3603
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper)
3605
SSHLL(src_size, Rd, Rn, 0, upper);
3608
void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper)
3610
USHLL(src_size, Rd, Rn, 0, upper);
3613
// vector x indexed element
3614
void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index)
3616
_assert_msg_(DYNA_REC, size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __FUNCTION__);
3622
H = (index >> 1) & 1;
3623
} else if (size == 64) {
3627
EmitVectorxElement(0, 2 | (size >> 6), L, 0x9, H, Rd, Rn, Rm);
3630
void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index)
3632
_assert_msg_(DYNA_REC, size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __FUNCTION__);
3638
H = (index >> 1) & 1;
3639
} else if (size == 64) {
3643
EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm);
3646
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp)
3648
bool bundled_loadstore = false;
3650
for (int i = 0; i < 32; ++i)
3656
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
3659
bundled_loadstore = true;
3664
if (bundled_loadstore && tmp != INVALID_REG)
3666
int num_regs = registers.Count();
3667
m_emit->SUB(SP, SP, num_regs * 16);
3668
m_emit->ADD(tmp, SP, 0);
3669
std::vector<ARM64Reg> island_regs;
3670
for (int i = 0; i < 32; ++i)
3678
// 1 < 4 && registers[i + 1] true!
3679
// 2 < 4 && registers[i + 2] true!
3680
// 3 < 4 && registers[i + 3] true!
3681
// 4 < 4 && registers[i + 4] false!
3682
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
3685
island_regs.push_back((ARM64Reg)(Q0 + i));
3687
ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), tmp);
3692
// Handle island registers
3693
std::vector<ARM64Reg> pair_regs;
3694
for (auto& it : island_regs)
3696
pair_regs.push_back(it);
3697
if (pair_regs.size() == 2)
3699
STP(128, INDEX_POST, pair_regs[0], pair_regs[1], tmp, 32);
3703
if (pair_regs.size())
3704
STR(128, INDEX_POST, pair_regs[0], tmp, 16);
3708
std::vector<ARM64Reg> pair_regs;
3709
for (auto it : registers)
3711
pair_regs.push_back((ARM64Reg)(Q0 + it));
3712
if (pair_regs.size() == 2)
3714
STP(128, INDEX_PRE, pair_regs[0], pair_regs[1], SP, -32);
3718
if (pair_regs.size())
3719
STR(128, INDEX_PRE, pair_regs[0], SP, -16);
3722
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp)
3724
bool bundled_loadstore = false;
3725
int num_regs = registers.Count();
3727
for (int i = 0; i < 32; ++i)
3733
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
3736
bundled_loadstore = true;
3741
if (bundled_loadstore && tmp != INVALID_REG)
3743
// The temporary register is only used to indicate that we can use this code path
3744
std::vector<ARM64Reg> island_regs;
3745
for (int i = 0; i < 32; ++i)
3751
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
3754
island_regs.push_back((ARM64Reg)(Q0 + i));
3756
LD1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), SP);
3761
// Handle island registers
3762
std::vector<ARM64Reg> pair_regs;
3763
for (auto& it : island_regs)
3765
pair_regs.push_back(it);
3766
if (pair_regs.size() == 2)
3768
LDP(128, INDEX_POST, pair_regs[0], pair_regs[1], SP, 32);
3772
if (pair_regs.size())
3773
LDR(128, INDEX_POST, pair_regs[0], SP, 16);
3777
bool odd = (num_regs % 2) != 0;
3778
std::vector<ARM64Reg> pair_regs;
3779
for (int i = 31; i >= 0; --i)
3786
// First load must be a regular LDR if odd
3788
LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16);
3792
pair_regs.push_back((ARM64Reg)(Q0 + i));
3793
if (pair_regs.size() == 2)
3795
LDP(128, INDEX_POST, pair_regs[1], pair_regs[0], SP, 32);
3803
void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3804
unsigned int n, imm_s, imm_r;
3807
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
3808
AND(Rd, Rn, imm_r, imm_s, n != 0);
3810
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
3811
MOVI2R(scratch, imm);
3812
AND(Rd, Rn, scratch);
3816
void ARM64XEmitter::ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3817
unsigned int n, imm_s, imm_r;
3818
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
3819
ORR(Rd, Rn, imm_r, imm_s, n != 0);
3821
_assert_msg_(JIT, scratch != INVALID_REG, "ORRI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
3822
MOVI2R(scratch, imm);
3823
ORR(Rd, Rn, scratch);
3827
void ARM64XEmitter::EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3828
unsigned int n, imm_s, imm_r;
3829
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
3830
EOR(Rd, Rn, imm_r, imm_s, n != 0);
3832
_assert_msg_(JIT, scratch != INVALID_REG, "EORI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
3833
MOVI2R(scratch, imm);
3834
EOR(Rd, Rn, scratch);
3838
void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3839
unsigned int n, imm_s, imm_r;
3840
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
3841
ANDS(Rd, Rn, imm_r, imm_s, n != 0);
3843
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
3844
MOVI2R(scratch, imm);
3845
ANDS(Rd, Rn, scratch);
3849
void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3852
if (IsImmArithmetic(imm, &val, &shift)) {
3853
ADD(Rd, Rn, val, shift);
3855
_assert_msg_(JIT, scratch != INVALID_REG, "ADDI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
3856
MOVI2R(scratch, imm);
3857
ADD(Rd, Rn, scratch);
3861
void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3864
if (IsImmArithmetic(imm, &val, &shift)) {
3865
SUB(Rd, Rn, val, shift);
3867
_assert_msg_(JIT, scratch != INVALID_REG, "SUBI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
3868
MOVI2R(scratch, imm);
3869
SUB(Rd, Rn, scratch);
3873
void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3876
if (IsImmArithmetic(imm, &val, &shift)) {
3877
CMP(Rn, val, shift);
3879
_assert_msg_(JIT, scratch != INVALID_REG, "CMPI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
3880
MOVI2R(scratch, imm);
3885
bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
3888
if (IsImmArithmetic(imm, &val, &shift)) {
3889
ADD(Rd, Rn, val, shift);
3896
bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
3899
if (IsImmArithmetic(imm, &val, &shift)) {
3900
SUB(Rd, Rn, val, shift);
3907
bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u32 imm) {
3910
if (IsImmArithmetic(imm, &val, &shift)) {
3911
CMP(Rn, val, shift);
3918
bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
3919
u32 n, imm_r, imm_s;
3920
if (IsImmLogical(imm, 32, &n, &imm_s, &imm_r)) {
3921
AND(Rd, Rn, imm_r, imm_s, n != 0);
3927
bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
3928
u32 n, imm_r, imm_s;
3929
if (IsImmLogical(imm, 32, &n, &imm_s, &imm_r)) {
3930
ORR(Rd, Rn, imm_r, imm_s, n != 0);
3936
bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
3937
u32 n, imm_r, imm_s;
3938
if (IsImmLogical(imm, 32, &n, &imm_s, &imm_r)) {
3939
EOR(Rd, Rn, imm_r, imm_s, n != 0);
3946
float FPImm8ToFloat(uint8_t bits) {
3949
int sign = bits >> 7;
3952
int bit6 = (bits >> 6) & 1;
3953
uint32_t exp = ((!bit6) << 7) | (0x7C * bit6) | ((bits >> 4) & 3);
3954
uint32_t mantissa = (bits & 0xF) << 19;
3958
memcpy(&fl, &f, sizeof(float));
3962
bool FPImm8FromFloat(float value, uint8_t *immOut) {
3964
memcpy(&f, &value, sizeof(float));
3965
uint32_t mantissa4 = (f & 0x7FFFFF) >> 19;
3966
uint32_t exponent = (f >> 23) & 0xFF;
3967
uint32_t sign = f >> 31;
3968
if ((exponent >> 7) == ((exponent >> 6) & 1))
3970
uint8_t imm8 = (sign << 7) | ((!(exponent >> 7)) << 6) | ((exponent & 3) << 4) | mantissa4;
3971
float newFloat = FPImm8ToFloat(imm8);
3972
if (newFloat == value) {
3980
void ARM64FloatEmitter::MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch, bool negate) {
3981
_assert_msg_(JIT, !IsDouble(Rd), "MOVI2F does not yet support double precision");
3984
if (std::signbit(value)) {
3987
FMOV(Rd, IsDouble(Rd) ? ZR : WZR);
3991
// TODO: There are some other values we could generate with the float-imm instruction, like 1.0...
3992
} else if (negate && FPImm8FromFloat(-value, &imm8)) {
3994
} else if (FPImm8FromFloat(value, &imm8)) {
4000
_assert_msg_(JIT, scratch != INVALID_REG, "Failed to find a way to generate FP immediate %f without scratch", value);
4005
memcpy(&ival, &value, sizeof(ival));
4006
m_emit->MOVI2R(scratch, ival);
4011
// TODO: Quite a few values could be generated easily using the MOVI instruction and friends.
4012
void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch) {
4013
// TODO: Make it work with more element sizes
4014
// TODO: Optimize - there are shorter solution for many values
4015
ARM64Reg s = (ARM64Reg)(S0 + DecodeReg(Rd));
4017
memcpy(&ival, &value, 4);
4018
if (ival == 0) { // Make sure to not catch negative zero here
4021
MOVI2F(s, value, scratch);
4026
void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
4029
if (IsImmArithmetic(imm, &val, &shift)) {
4030
SUBS(Rd, Rn, val, shift);
4032
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
4033
MOVI2R(scratch, imm);
4034
SUBS(Rd, Rn, scratch);