1
// Copyright (c) 2012- PPSSPP Project.
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
19
// Optimization ideas:
21
// It's common to see sequences of stores writing or reading to a contiguous set of
22
// addresses in function prologues/epilogues:
33
// Such sequences could easily be detected and turned into nice contiguous
34
// sequences of ARM stores instead of the current 3 instructions per sw/lw.
36
// Also, if we kept track of the likely register content of a cached register,
37
// (pointer or data), we could avoid many BIC instructions.
40
#include "Core/MemMap.h"
41
#include "Core/Config.h"
42
#include "Core/MIPS/MIPS.h"
43
#include "Core/MIPS/MIPSAnalyst.h"
44
#include "Core/MIPS/MIPSCodeUtils.h"
45
#include "Core/MIPS/ARM64/Arm64Jit.h"
46
#include "Core/MIPS/ARM64/Arm64RegCache.h"
48
#define _RS MIPS_GET_RS(op)
49
#define _RT MIPS_GET_RT(op)
50
#define _RD MIPS_GET_RD(op)
51
#define _FS MIPS_GET_FS(op)
52
#define _FT MIPS_GET_FT(op)
53
#define _FD MIPS_GET_FD(op)
54
#define _SA MIPS_GET_SA(op)
55
#define _POS ((op>> 6) & 0x1F)
56
#define _SIZE ((op>>11) & 0x1F)
57
#define _IMM16 (signed short)(op & 0xFFFF)
58
#define _IMM26 (op & 0x03FFFFFF)
60
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
61
// Currently known non working ones should have DISABLE.
63
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
64
#define CONDITIONAL_DISABLE ;
65
#define DISABLE { Comp_Generic(op); return; }
68
using namespace Arm64Gen;
69
using namespace Arm64JitConstants;
72
void Arm64Jit::SetScratch1ToEffectiveAddress(MIPSGPReg rs, s16 offset) {
74
ADDI2R(SCRATCH1, gpr.R(rs), offset, SCRATCH2);
76
MOV(SCRATCH1, gpr.R(rs));
80
std::vector<FixupBranch> Arm64Jit::SetScratch1ForSafeAddress(MIPSGPReg rs, s16 offset, ARM64Reg tempReg) {
81
std::vector<FixupBranch> skips;
83
SetScratch1ToEffectiveAddress(rs, offset);
85
// We can do this a little smarter by shifting out the lower 8 bits, since blocks are 0x100 aligned.
86
// PSP_GetUserMemoryEnd() is dynamic, but the others encode to imms just fine.
87
// So we only need to safety check the one value.
89
if ((PSP_GetUserMemoryEnd() & 0x000FFFFF) == 0) {
90
// In other words, shift right 8.
91
UBFX(tempReg, SCRATCH1, 8, 24);
92
// Now check if we're higher than that.
93
CMPI2R(tempReg, PSP_GetUserMemoryEnd() >> 8);
95
// Compare first using the tempReg, then shift into it.
96
CMPI2R(SCRATCH1, PSP_GetUserMemoryEnd(), tempReg);
97
UBFX(tempReg, SCRATCH1, 8, 24);
99
skips.push_back(B(CC_HS));
101
// If its higher than memory start and we didn't skip yet, it must be good. Hurray.
102
CMPI2R(tempReg, PSP_GetKernelMemoryBase() >> 8);
103
FixupBranch inRAM = B(CC_HS);
105
// If we got here and it's higher, then it's between VRAM and RAM - skip.
106
CMPI2R(tempReg, PSP_GetVidMemEnd() >> 8);
107
skips.push_back(B(CC_HS));
109
// And if it's higher the VRAM and we're still here again, it's in VRAM.
110
CMPI2R(tempReg, PSP_GetVidMemBase() >> 8);
111
FixupBranch inVRAM = B(CC_HS);
113
// Last gap, this is between SRAM and VRAM. Skip it.
114
CMPI2R(tempReg, PSP_GetScratchpadMemoryEnd() >> 8);
115
skips.push_back(B(CC_HS));
117
// And for lower than SRAM, we just skip again.
118
CMPI2R(tempReg, PSP_GetScratchpadMemoryBase() >> 8);
119
skips.push_back(B(CC_LO));
121
// At this point, we're either in SRAM (above) or in RAM/VRAM.
122
SetJumpTarget(inRAM);
123
SetJumpTarget(inVRAM);
128
void Arm64Jit::Comp_ITypeMemLR(MIPSOpcode op, bool load) {
130
int offset = (signed short)(op & 0xFFFF);
135
if (!js.inDelaySlot) {
136
// Optimisation: Combine to single unaligned load/store
137
bool isLeft = (o == 34 || o == 42);
138
MIPSOpcode nextOp = GetOffsetInstruction(1);
139
// Find a matching shift in opposite direction with opposite offset.
140
if (nextOp == (isLeft ? (op.encoding + (4 << 26) - 3) : (op.encoding - (4 << 26) + 3))) {
141
EatInstruction(nextOp);
142
nextOp = MIPSOpcode(((load ? 35 : 43) << 26) | ((isLeft ? nextOp : op) & 0x03FFFFFF)); //lw, sw
143
Comp_ITypeMem(nextOp);
150
u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
151
std::vector<FixupBranch> skips;
153
if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {
155
// Need to initialize since this only loads part of the register.
156
// But rs no longer matters (even if rs == rt) since we have the address.
157
gpr.MapReg(rt, load ? MAP_DIRTY : 0);
158
gpr.SetRegImm(SCRATCH1, addr & ~3);
160
u8 shift = (addr & 3) * 8;
164
LDR(SCRATCH1, MEMBASEREG, SCRATCH1);
165
ANDI2R(gpr.R(rt), gpr.R(rt), 0x00ffffff >> shift, INVALID_REG);
166
ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSL, 24 - shift));
170
LDR(SCRATCH1, MEMBASEREG, SCRATCH1);
171
ANDI2R(gpr.R(rt), gpr.R(rt), 0xffffff00 << (24 - shift), INVALID_REG);
172
ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSR, shift));
176
LDR(SCRATCH2, MEMBASEREG, SCRATCH1);
177
ANDI2R(SCRATCH2, SCRATCH2, 0xffffff00 << shift, INVALID_REG);
178
ORR(SCRATCH2, SCRATCH2, SCRATCH2, ArithOption(gpr.R(rt), ST_LSR, 24 - shift));
179
STR(SCRATCH2, MEMBASEREG, SCRATCH1);
183
LDR(SCRATCH2, MEMBASEREG, SCRATCH1);
184
ANDI2R(SCRATCH2, SCRATCH2, 0x00ffffff >> (24 - shift), INVALID_REG);
185
ORR(SCRATCH2, SCRATCH2, SCRATCH2, ArithOption(gpr.R(rt), ST_LSL, shift));
186
STR(SCRATCH2, MEMBASEREG, SCRATCH1);
208
_dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?");
210
gpr.MapDirtyIn(rt, rs, false);
215
if (false && !g_Config.bFastMemory && rs != MIPS_REG_SP) {
216
skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
218
SetScratch1ToEffectiveAddress(rs, offset);
221
// Need temp regs. TODO: Get from the regcache?
222
static const ARM64Reg LR_SCRATCH3 = W9;
223
static const ARM64Reg LR_SCRATCH4 = W10;
225
PUSH(EncodeRegTo64(LR_SCRATCH3));
227
PUSH2(EncodeRegTo64(LR_SCRATCH3), EncodeRegTo64(LR_SCRATCH4));
230
// Here's our shift amount.
231
ANDI2R(SCRATCH2, SCRATCH1, 3);
232
LSL(SCRATCH2, SCRATCH2, 3);
234
// Now align the address for the actual read.
235
ANDI2R(SCRATCH1, SCRATCH1, ~3U);
239
MOVI2R(LR_SCRATCH3, 0x00ffffff);
240
LDR(SCRATCH1, MEMBASEREG, SCRATCH1);
241
LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
242
AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3);
243
NEG(SCRATCH2, SCRATCH2);
244
ADDI2R(SCRATCH2, SCRATCH2, 24);
245
LSLV(SCRATCH1, SCRATCH1, SCRATCH2);
246
ORR(gpr.R(rt), gpr.R(rt), SCRATCH1);
250
MOVI2R(LR_SCRATCH3, 0xffffff00);
251
LDR(SCRATCH1, MEMBASEREG, SCRATCH1);
252
LSRV(SCRATCH1, SCRATCH1, SCRATCH2);
253
NEG(SCRATCH2, SCRATCH2);
254
ADDI2R(SCRATCH2, SCRATCH2, 24);
255
LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
256
AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3);
257
ORR(gpr.R(rt), gpr.R(rt), SCRATCH1);
261
MOVI2R(LR_SCRATCH3, 0xffffff00);
262
LDR(LR_SCRATCH4, MEMBASEREG, SCRATCH1);
263
LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
264
AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
265
NEG(SCRATCH2, SCRATCH2);
266
ADDI2R(SCRATCH2, SCRATCH2, 24);
267
LSRV(LR_SCRATCH3, gpr.R(rt), SCRATCH2);
268
ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
269
STR(LR_SCRATCH4, MEMBASEREG, SCRATCH1);
273
MOVI2R(LR_SCRATCH3, 0x00ffffff);
274
LDR(LR_SCRATCH4, MEMBASEREG, SCRATCH1);
275
NEG(SCRATCH2, SCRATCH2);
276
ADDI2R(SCRATCH2, SCRATCH2, 24);
277
LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
278
AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
279
NEG(SCRATCH2, SCRATCH2);
280
ADDI2R(SCRATCH2, SCRATCH2, 24);
281
LSLV(LR_SCRATCH3, gpr.R(rt), SCRATCH2);
282
ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
283
STR(LR_SCRATCH4, MEMBASEREG, SCRATCH1);
288
POP(EncodeRegTo64(LR_SCRATCH3));
290
POP2(EncodeRegTo64(LR_SCRATCH3), EncodeRegTo64(LR_SCRATCH4));
293
for (auto skip : skips) {
298
void Arm64Jit::Comp_ITypeMem(MIPSOpcode op) {
301
int offset = (signed short)(op & 0xFFFF);
306
if (((op >> 29) & 1) == 0 && rt == MIPS_REG_ZERO) {
307
// Don't load anything into $zr
311
u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
312
std::vector<FixupBranch> skips;
313
ARM64Reg addrReg = SCRATCH1;
344
if (jo.cachePointers && g_Config.bFastMemory) {
345
// ARM has smaller load/store immediate displacements than MIPS, 12 bits - and some memory ops only have 8 bits.
346
int offsetRange = 0x3ff;
347
if (o == 41 || o == 33 || o == 37 || o == 32)
348
offsetRange = 0xff; // 8 bit offset only
349
if (!gpr.IsImm(rs) && rs != rt && (offset <= offsetRange) && offset >= 0 &&
350
(dataSize == 1 || (offset & (dataSize - 1)) == 0)) { // Check that the offset is aligned to the access size as that's required for INDEX_UNSIGNED encodings. we can get here through fallback from lwl/lwr
351
gpr.SpillLock(rs, rt);
352
gpr.MapRegAsPointer(rs);
354
Arm64Gen::ARM64Reg ar;
355
if (!load && gpr.IsImm(rt) && gpr.GetImm(rt) == 0) {
356
// Can just store from the zero register directly.
359
gpr.MapReg(rt, load ? MAP_NOINIT : 0);
363
case 35: LDR(INDEX_UNSIGNED, ar, gpr.RPtr(rs), offset); break;
364
case 37: LDRH(INDEX_UNSIGNED, ar, gpr.RPtr(rs), offset); break;
365
case 33: LDRSH(INDEX_UNSIGNED, ar, gpr.RPtr(rs), offset); break;
366
case 36: LDRB(INDEX_UNSIGNED, ar, gpr.RPtr(rs), offset); break;
367
case 32: LDRSB(INDEX_UNSIGNED, ar, gpr.RPtr(rs), offset); break;
369
case 43: STR(INDEX_UNSIGNED, ar, gpr.RPtr(rs), offset); break;
370
case 41: STRH(INDEX_UNSIGNED, ar, gpr.RPtr(rs), offset); break;
371
case 40: STRB(INDEX_UNSIGNED, ar, gpr.RPtr(rs), offset); break;
373
gpr.ReleaseSpillLocks();
378
if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {
379
// TODO: Avoid mapping a register for the "zero" register, use R0 instead.
381
// We can compute the full address at compile time. Kickass.
382
u32 addr = iaddr & 0x3FFFFFFF;
384
if (addr == iaddr && offset == 0) {
385
// It was already safe. Let's shove it into a reg and use it directly.
386
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
389
// In this case, only map rt. rs+offset will be in R0.
390
gpr.MapReg(rt, load ? MAP_NOINIT : 0);
391
gpr.SetRegImm(SCRATCH1, addr);
395
_dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?");
396
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
398
if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {
399
skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
401
SetScratch1ToEffectiveAddress(rs, offset);
408
case 35: LDR(gpr.R(rt), MEMBASEREG, addrReg); break;
409
case 37: LDRH(gpr.R(rt), MEMBASEREG, addrReg); break;
410
case 33: LDRSH(gpr.R(rt), MEMBASEREG, addrReg); break;
411
case 36: LDRB(gpr.R(rt), MEMBASEREG, addrReg); break;
412
case 32: LDRSB(gpr.R(rt), MEMBASEREG, addrReg); break;
414
case 43: STR(gpr.R(rt), MEMBASEREG, addrReg); break;
415
case 41: STRH(gpr.R(rt), MEMBASEREG, addrReg); break;
416
case 40: STRB(gpr.R(rt), MEMBASEREG, addrReg); break;
418
for (auto skip : skips) {
420
// TODO: Could clear to zero here on load, if skipping this for good reads.
428
Comp_ITypeMemLR(op, load);
436
void Arm64Jit::Comp_Cache(MIPSOpcode op) {
437
// int imm = (s16)(op & 0xFFFF);
439
// int addr = R(rs) + imm;
440
int func = (op >> 16) & 0x1F;
442
// It appears that a cache line is 0x40 (64) bytes, loops in games
443
// issue the cache instruction at that interval.
445
// These codes might be PSP-specific, they don't match regular MIPS cache codes very well
449
// Invalidate the instruction cache at this address
454
// "Create Dirty Exclusive" - for avoiding a cacheline fill before writing to it.
455
// Will cause garbage on the real machine so we just ignore it, the app will overwrite the cacheline.
457
case 25: // Hit Invalidate - zaps the line if present in cache. Should not writeback???? scary.
458
// No need to do anything.
460
case 27: // D-cube. Hit Writeback Invalidate. Tony Hawk Underground 2
462
case 30: // GTA LCS, a lot. Fill (prefetch). Tony Hawk Underground 2