1
// Copyright 2015 Dolphin Emulator Project
2
// Licensed under GPLv2+
3
// Refer to the license.txt file included.
11
#include "CodeBlock.h"
26
// X30 serves a dual purpose as a link register
27
// Encoded as <u3:type><u5:reg>
31
// 010 - VFP single precision
32
// 100 - VFP double precision
33
// 110 - VFP quad precision
37
W0 = 0, W1, W2, W3, W4, W5, W6,
38
W7, W8, W9, W10, W11, W12, W13, W14,
39
W15, W16, W17, W18, W19, W20, W21, W22,
40
W23, W24, W25, W26, W27, W28, W29, W30,
42
WSP, // 32bit stack pointer
45
X0 = 0x20, X1, X2, X3, X4, X5, X6,
46
X7, X8, X9, X10, X11, X12, X13, X14,
47
X15, X16, X17, X18, X19, X20, X21, X22,
48
X23, X24, X25, X26, X27, X28, X29, X30,
50
SP, // 64bit stack pointer
52
// VFP single precision registers
53
S0 = 0x40, S1, S2, S3, S4, S5, S6,
54
S7, S8, S9, S10, S11, S12, S13,
55
S14, S15, S16, S17, S18, S19, S20,
56
S21, S22, S23, S24, S25, S26, S27,
59
// VFP Double Precision registers
60
D0 = 0x80, D1, D2, D3, D4, D5, D6, D7,
61
D8, D9, D10, D11, D12, D13, D14, D15,
62
D16, D17, D18, D19, D20, D21, D22, D23,
63
D24, D25, D26, D27, D28, D29, D30, D31,
65
// ASIMD Quad-Word registers
66
Q0 = 0xC0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
67
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
68
Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23,
69
Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31,
71
// For PRFM(prefetch memory) encoding
72
// This is encoded in the Rt register
74
PLDL1KEEP = 0, PLDL1STRM,
77
// Instruction preload
78
PLIL1KEEP = 8, PLIL1STRM,
82
PLTL1KEEP = 16, PLTL1STRM,
89
INVALID_REG = 0xFFFFFFFF
92
// R19-R28, R29 (FP), R30 (LR). FP seems questionable?
93
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
94
const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // d8-d15
96
inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
97
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
98
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
99
inline bool IsScalar(ARM64Reg reg) { return IsSingle(reg) || IsDouble(reg); }
100
inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
101
inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
102
inline bool IsGPR(ARM64Reg reg) { return (int)reg < 0x40; }
104
int CountLeadingZeros(uint64_t value, int width);
106
inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
107
inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }
108
inline ARM64Reg EncodeRegToSingle(ARM64Reg reg) { return (ARM64Reg)(DecodeReg(reg) + S0); }
109
inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }
110
inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); }
112
// For AND/TST/ORR/EOR etc
113
bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned int *imm_s, unsigned int *imm_r);
115
bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift);
117
float FPImm8ToFloat(uint8_t bits);
118
bool FPImm8FromFloat(float value, uint8_t *immOut);
142
INDEX_SIGNED = 3, // used in LDP/STP
154
ROUND_A, // round to nearest, ties to away
155
ROUND_M, // round towards -inf
156
ROUND_N, // round to nearest, ties to even
157
ROUND_P, // round towards +inf
158
ROUND_Z, // round towards zero
167
// 2 = B (conditional)
170
// 5 = B (unconditional)
171
// 6 = BL (unconditional)
177
// Used with TBZ/TBNZ
180
// Used with Test/Compare and Branch
189
FIELD_NZCV, // The only system registers accessible from EL0 (user space)
234
EXTEND_UXTW = 0x2, /* Also LSL on 32bit width */
235
EXTEND_UXTX = 0x3, /* Also LSL on 64bit width */
251
WidthSpecifier m_width;
252
ExtendSpecifier m_extend;
253
TypeSpecifier m_type;
254
ShiftType m_shifttype;
258
ArithOption(ARM64Reg Rd, bool index = false)
260
// Indexed registers are a certain feature of AARch64
261
// On Loadstore instructions that use a register offset
262
// We can have the register as an index
263
// If we are indexing then the offset register will
264
// be shifted to the left so we are indexing at intervals
265
// of the size of what we are loading
266
// 8-bit: Index does nothing
267
// 16-bit: Index LSL 1
268
// 32-bit: Index LSL 2
269
// 64-bit: Index LSL 3
276
m_type = TYPE_EXTENDEDREG;
279
m_width = WIDTH_64BIT;
280
m_extend = EXTEND_UXTX;
284
m_width = WIDTH_32BIT;
285
m_extend = EXTEND_UXTW;
287
m_shifttype = ST_LSL;
289
ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift)
293
m_shifttype = shift_type;
294
m_type = TYPE_SHIFTEDREG;
297
m_width = WIDTH_64BIT;
303
m_width = WIDTH_32BIT;
308
TypeSpecifier GetType() const
312
ARM64Reg GetReg() const
320
case TYPE_EXTENDEDREG:
321
return (m_extend << 13) |
324
case TYPE_SHIFTEDREG:
325
return (m_shifttype << 22) |
329
_dbg_assert_msg_(DYNA_REC, false, "Invalid type in GetData");
338
friend class ARM64FloatEmitter;
343
u8* m_lastCacheFlushEnd;
345
void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr);
346
void EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr);
347
void EncodeUnconditionalBranchInst(u32 op, const void* ptr);
348
void EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn);
349
void EncodeExceptionInst(u32 instenc, u32 imm);
350
void EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt);
351
void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
352
void EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
353
void EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
354
void EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
355
void EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
356
void EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn);
357
void EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
358
void EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
359
void EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
360
void EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm);
361
void EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt);
362
void EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
363
void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
364
void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size);
365
void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos);
366
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
367
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
368
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
369
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
370
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
371
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
372
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
375
inline void Write32(u32 value)
377
*(u32*)m_code = value;
383
: m_code(nullptr), m_startcode(nullptr), m_lastCacheFlushEnd(nullptr)
387
ARM64XEmitter(u8* code_ptr) {
389
m_lastCacheFlushEnd = code_ptr;
390
m_startcode = code_ptr;
393
virtual ~ARM64XEmitter()
397
void SetCodePointer(u8* ptr);
398
const u8* GetCodePointer() const;
400
void ReserveCodeSpace(u32 bytes);
401
const u8* AlignCode16();
402
const u8* AlignCodePage();
404
void FlushIcacheSection(u8* start, u8* end);
405
u8* GetWritableCodePtr();
407
// FixupBranch branching
408
void SetJumpTarget(FixupBranch const& branch);
409
FixupBranch CBZ(ARM64Reg Rt);
410
FixupBranch CBNZ(ARM64Reg Rt);
411
FixupBranch B(CCFlags cond);
412
FixupBranch TBZ(ARM64Reg Rt, u8 bit);
413
FixupBranch TBNZ(ARM64Reg Rt, u8 bit);
417
// Compare and Branch
418
void CBZ(ARM64Reg Rt, const void* ptr);
419
void CBNZ(ARM64Reg Rt, const void* ptr);
421
// Conditional Branch
422
void B(CCFlags cond, const void* ptr);
425
void TBZ(ARM64Reg Rt, u8 bits, const void* ptr);
426
void TBNZ(ARM64Reg Rt, u8 bits, const void* ptr);
428
// Unconditional Branch
429
void B(const void* ptr);
430
void BL(const void* ptr);
432
// Unconditional Branch (register)
433
void BR(ARM64Reg Rn);
434
void BLR(ARM64Reg Rn);
435
void RET(ARM64Reg Rn = X30);
439
// Exception generation
450
void _MSR(PStateField field, u8 imm);
452
void _MSR(PStateField field, ARM64Reg Rt);
453
void MRS(ARM64Reg Rt, PStateField field);
455
void HINT(SystemHint op);
457
void DSB(BarrierType type);
458
void DMB(BarrierType type);
459
void ISB(BarrierType type);
461
// Add/Subtract (Extended/Shifted register)
462
void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
463
void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
464
void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
465
void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
466
void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
467
void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
468
void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
469
void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
470
void CMN(ARM64Reg Rn, ARM64Reg Rm);
471
void CMN(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
472
void CMP(ARM64Reg Rn, ARM64Reg Rm);
473
void CMP(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
475
// Add/Subtract (with carry)
476
void ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
477
void ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
478
void SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
479
void SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
481
// Conditional Compare (immediate)
482
void CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
483
void CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
485
// Conditional Compare (register)
486
void CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
487
void CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
489
// Conditional Select
490
void CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
491
void CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
492
void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
493
void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
496
void CSET(ARM64Reg Rd, CCFlags cond) {
497
ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;
498
CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
500
void NEG(ARM64Reg Rd, ARM64Reg Rs) {
501
SUB(Rd, Is64Bit(Rd) ? ZR : WZR, Rs);
504
// Data-Processing 1 source
505
void RBIT(ARM64Reg Rd, ARM64Reg Rn);
506
void REV16(ARM64Reg Rd, ARM64Reg Rn);
507
void REV32(ARM64Reg Rd, ARM64Reg Rn);
508
void REV64(ARM64Reg Rd, ARM64Reg Rn);
509
void CLZ(ARM64Reg Rd, ARM64Reg Rn);
510
void CLS(ARM64Reg Rd, ARM64Reg Rn);
512
// Data-Processing 2 source
513
void UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
514
void SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
515
void LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
516
void LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
517
void ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
518
void RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
519
void CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
520
void CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
521
void CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
522
void CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
523
void CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
524
void CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
525
void CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
526
void CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
528
// Data-Processing 3 source
529
void MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
530
void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
531
void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
532
void SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
533
void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
534
void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
535
void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
536
void UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
537
void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
538
void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
539
void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
540
void MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
542
// Logical (shifted register)
543
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
544
void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
545
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
546
void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
547
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
548
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
549
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
550
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
551
void TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
553
// Wrap the above for saner syntax
554
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
555
void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BIC(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
556
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
557
void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORN(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
558
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EOR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
559
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
560
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
561
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
562
void TST(ARM64Reg Rn, ARM64Reg Rm) { TST(Rn, Rm, ArithOption(Is64Bit(Rn) ? ZR : WZR, ST_LSL, 0)); }
564
// Convenience wrappers around ORR. These match the official convenience syntax.
565
void MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift);
566
void MOV(ARM64Reg Rd, ARM64Reg Rm);
567
void MVN(ARM64Reg Rd, ARM64Reg Rm);
569
// TODO: These are "slow" as they use arith+shift, should be replaced with UBFM/EXTR variants.
570
void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift);
571
void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift);
572
void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift);
573
void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift);
575
// Logical (immediate)
576
void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
577
void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
578
void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
579
void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
580
void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
582
// Add/subtract (immediate)
583
void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
584
void ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
585
void SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
586
void SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
587
void CMP(ARM64Reg Rn, u32 imm, bool shift = false);
589
// Data Processing (Immediate)
590
void MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);
591
void MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);
592
void MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);
595
void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
596
void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
597
void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
598
void BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
599
void UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
601
// Extract register (ROR with two inputs, if same then faster on A67)
602
void EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift);
605
void SXTB(ARM64Reg Rd, ARM64Reg Rn);
606
void SXTH(ARM64Reg Rd, ARM64Reg Rn);
607
void SXTW(ARM64Reg Rd, ARM64Reg Rn);
608
void UXTB(ARM64Reg Rd, ARM64Reg Rn);
609
void UXTH(ARM64Reg Rd, ARM64Reg Rn);
611
void UBFX(ARM64Reg Rd, ARM64Reg Rn, int lsb, int width) {
612
UBFM(Rd, Rn, lsb, lsb + width - 1);
615
// Load Register (Literal)
616
void LDR(ARM64Reg Rt, u32 imm);
617
void LDRSW(ARM64Reg Rt, u32 imm);
618
void PRFM(ARM64Reg Rt, u32 imm);
620
// Load/Store Exclusive
621
void STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
622
void STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
623
void LDXRB(ARM64Reg Rt, ARM64Reg Rn);
624
void LDAXRB(ARM64Reg Rt, ARM64Reg Rn);
625
void STLRB(ARM64Reg Rt, ARM64Reg Rn);
626
void LDARB(ARM64Reg Rt, ARM64Reg Rn);
627
void STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
628
void STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
629
void LDXRH(ARM64Reg Rt, ARM64Reg Rn);
630
void LDAXRH(ARM64Reg Rt, ARM64Reg Rn);
631
void STLRH(ARM64Reg Rt, ARM64Reg Rn);
632
void LDARH(ARM64Reg Rt, ARM64Reg Rn);
633
void STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
634
void STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
635
void STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
636
void STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
637
void LDXR(ARM64Reg Rt, ARM64Reg Rn);
638
void LDAXR(ARM64Reg Rt, ARM64Reg Rn);
639
void LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
640
void LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
641
void STLR(ARM64Reg Rt, ARM64Reg Rn);
642
void LDAR(ARM64Reg Rt, ARM64Reg Rn);
644
// Load/Store no-allocate pair (offset)
645
void STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
646
void LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
648
// Load/Store register (immediate indexed)
649
void STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
650
void LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
651
void LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
652
void STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
653
void LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
654
void LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
655
void STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
656
void LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
657
void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
659
// Load/Store register (register offset)
660
void STRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
661
void LDRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
662
void LDRSB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
663
void STRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
664
void LDRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
665
void LDRSH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
666
void STR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
667
void LDR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
668
void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
669
void PRFM(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
671
// Load/Store register (unscaled offset)
672
void STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
673
void LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
674
void LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
675
void STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
676
void LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
677
void LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
678
void STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
679
void LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
680
void LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
683
void LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
684
void LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
685
void STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
687
// Address of label/page PC-relative
688
void ADR(ARM64Reg Rd, s32 imm);
689
void ADRP(ARM64Reg Rd, s32 imm);
691
// Wrapper around MOVZ+MOVK
692
void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);
694
void MOVP2R(ARM64Reg Rd, P *ptr) {
695
_assert_msg_(JIT, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
696
MOVI2R(Rd, (uintptr_t)ptr);
699
// Wrapper around AND x, y, imm etc. If you are sure the imm will work, no need to pass a scratch register.
700
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
701
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
702
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG) { ANDSI2R(Is64Bit(Rn) ? ZR : WZR, Rn, imm, scratch); }
703
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
704
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
705
void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
707
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
708
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
709
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
711
bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
712
bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
713
bool TryCMPI2R(ARM64Reg Rn, u32 imm);
715
bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
716
bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
717
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
720
void ABI_PushRegisters(BitSet32 registers);
721
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));
723
// Pseudo-instruction for convenience. PUSH pushes 16 bytes even though we only push a single register.
724
// This is so the stack pointer is always 16-byte aligned, which is checked by hardware!
725
void PUSH(ARM64Reg Rd);
726
void POP(ARM64Reg Rd);
727
void PUSH2(ARM64Reg Rd, ARM64Reg Rn);
728
void POP2(ARM64Reg Rd, ARM64Reg Rn);
731
// Utility to generate a call to a std::function object.
733
// Unfortunately, calling operator() directly is undefined behavior in C++
734
// (this method might be a thunk in the case of multi-inheritance) so we
735
// have to go through a trampoline function.
736
template <typename T, typename... Args>
737
static void CallLambdaTrampoline(const std::function<T(Args...)>* f,
743
// This function expects you to have set up the state.
744
// Overwrites X0 and X30
745
template <typename T, typename... Args>
746
ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)
748
auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
749
MOVI2R(X30, (uintptr_t)trampoline);
750
MOVI2R(X0, (uintptr_t)const_cast<void*>((const void*)f));
754
// Plain function call
755
void QuickCallFunction(ARM64Reg scratchreg, const void *func);
756
template <typename T> void QuickCallFunction(ARM64Reg scratchreg, T func) {
757
QuickCallFunction(scratchreg, (const void *)func);
761
class ARM64FloatEmitter
764
ARM64FloatEmitter(ARM64XEmitter* emit) : m_emit(emit) {}
766
void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
767
void STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
769
// Loadstore unscaled
770
void LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
771
void STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
773
// Loadstore single structure
774
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
775
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
776
void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
777
void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
778
void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
779
void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
780
void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
781
void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
783
// Loadstore multiple structure
784
void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
785
void LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
786
void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
787
void ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
790
void LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
791
void STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
793
// Loadstore register offset
794
void STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
795
void LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
798
void FABS(ARM64Reg Rd, ARM64Reg Rn);
799
void FNEG(ARM64Reg Rd, ARM64Reg Rn);
800
void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
801
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
804
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
805
void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
806
void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
807
void FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
808
void FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
809
void FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
810
void FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
811
void FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
812
void FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
814
// Scalar - 3 Source. Note - the accumulator is last on ARM!
815
void FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
816
void FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
817
void FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
818
void FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
820
// Scalar floating point immediate
821
void FMOV(ARM64Reg Rd, uint8_t imm8);
824
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
825
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
826
void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
827
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
828
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
829
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
830
void FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
831
void FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
832
void FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
833
void FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
834
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
835
void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
836
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
837
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
838
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
839
void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
840
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
841
void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn);
842
void FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
843
void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
844
void NOT(ARM64Reg Rd, ARM64Reg Rn);
845
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
846
void MOV(ARM64Reg Rd, ARM64Reg Rn) {
850
void UMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
851
void UMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
852
void SMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
853
void SMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
855
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
856
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
857
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
858
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
859
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
860
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
861
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
862
void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
863
void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
864
void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
865
void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
866
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
867
void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
870
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);
871
void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn);
872
void INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2);
873
void UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
874
void SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
877
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
879
// Scalar convert float to int, in a lot of variants.
880
// Note that the scalar version of this operation has two encodings, one that goes to an integer register
881
// and one that outputs to a scalar fp register.
882
void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
883
void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
885
// Scalar convert int to float. No rounding mode specifier necessary.
886
void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
887
void UCVTF(ARM64Reg Rd, ARM64Reg Rn);
889
// Scalar fixed point to float. scale is the number of fractional bits.
890
void SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
891
void UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
894
void FCMP(ARM64Reg Rn, ARM64Reg Rm);
895
void FCMP(ARM64Reg Rn);
896
void FCMPE(ARM64Reg Rn, ARM64Reg Rm);
897
void FCMPE(ARM64Reg Rn);
898
void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
899
void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);
900
void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
901
void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
902
void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
903
void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
904
void FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
905
void FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
907
// Conditional select
908
void FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
911
void UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
912
void TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
913
void ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
914
void UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
915
void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
916
void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
918
// Shift by immediate
919
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
920
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
921
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
922
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
923
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
924
void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
925
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
926
void SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
927
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
928
void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
930
void SHL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
931
void USHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
932
void SSHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
934
// vector x indexed element
935
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
936
void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
938
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
939
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
942
void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
943
void ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
946
ARM64XEmitter* m_emit;
947
inline void Write32(u32 value) { m_emit->Write32(value); }
949
// Emitting functions
950
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
951
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
952
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
953
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
954
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
955
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
956
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
957
void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
958
void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
959
void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn);
960
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
961
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
962
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
963
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8);
964
void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
965
void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
966
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
967
void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
968
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
969
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
970
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
971
void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
972
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
973
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
974
void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
976
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
977
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
978
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
979
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
980
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
983
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
986
void PoisonMemory() override
988
u32* ptr = (u32*)region;
989
u32* maxptr = (u32*)(region + region_size);
990
// If our memory isn't a multiple of u32 then this won't write the last remaining bytes with anything
991
// Less than optimal, but there would be nothing we could do but throw a runtime warning anyway.
992
// AArch64: 0xD4200000 = BRK 0