~registry/dolphin-emu/triforce

« back to all changes in this revision

Viewing changes to Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp

  • Committer: Sérgio Benjamim
  • Date: 2015-02-13 05:54:40 UTC
  • Revision ID: sergio_br2@yahoo.com.br-20150213055440-ey2rt3sjpy27km78
Dolphin Triforce branch from code.google, commit b957980 (4.0-315).

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// Copyright 2013 Dolphin Emulator Project
 
2
// Licensed under GPLv2
 
3
// Refer to the license.txt file included.
 
4
 
 
5
#include <map>
 
6
 
 
7
// for the PROFILER stuff
 
8
#ifdef _WIN32
 
9
#include <windows.h>
 
10
#endif
 
11
 
 
12
#include "Common.h"
 
13
#include "../../HLE/HLE.h"
 
14
#include "../../PatchEngine.h"
 
15
#include "../Profiler.h"
 
16
#include "Jit.h"
 
17
#include "JitAsm.h"
 
18
#include "JitRegCache.h"
 
19
#include "Jit64_Tables.h"
 
20
#include "HW/ProcessorInterface.h"
 
21
#if defined(_DEBUG) || defined(DEBUGFAST)
 
22
#include "PowerPCDisasm.h"
 
23
#endif
 
24
 
 
25
using namespace Gen;
 
26
using namespace PowerPC;
 
27
 
 
28
// Dolphin's PowerPC->x86 JIT dynamic recompiler
 
29
// (Nearly) all code by ector (hrydgard)
 
30
// Features:
 
31
// * x86 & x64 support, lots of shared code.
 
32
// * Basic block linking
 
33
// * Fast dispatcher
 
34
 
 
35
// Unfeatures:
 
36
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding Interpreter function.
 
37
 
 
38
// Various notes below
 
39
 
 
40
// Register allocation
 
41
//   RAX - Generic quicktemp register
 
42
//   RBX - point to base of memory map
 
43
//   RSI RDI R12 R13 R14 R15 - free for allocation
 
44
//   RCX RDX R8 R9 R10 R11 - allocate in emergencies. These need to be flushed before functions are called.
 
45
//   RSP - stack pointer, do not generally use, very dangerous
 
46
//   RBP - ?
 
47
 
 
48
// IMPORTANT:
 
49
// Make sure that all generated code and all emulator state sits under the 2GB boundary so that
 
50
// RIP addressing can be used easily. Windows will always allocate static code under the 2GB boundary.
 
51
// Also make sure to use VirtualAlloc and specify EXECUTE permission.
 
52
 
 
53
// Open questions
 
54
// * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp
 
55
// * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting diminishing returns.
 
56
 
 
57
// Other considerations
 
58
//
 
59
// Many instructions have shorter forms for EAX. However, I believe their performance boost
 
60
// will be as small to be negligible, so I haven't dirtied up the code with that. AMD recommends it in their
 
61
// optimization manuals, though.
 
62
//
 
63
// We support block linking. Reserve space at the exits of every block for a full 5-byte jmp. Save 16-bit offsets 
 
64
// from the starts of each block, marking the exits so that they can be nicely patched at any time.
 
65
//
 
66
// Blocks do NOT use call/ret, they only jmp to each other and to the dispatcher when necessary.
 
67
//
 
68
// All blocks that can be precompiled will be precompiled. Code will be memory protected - any write will mark
 
69
// the region as non-compilable, and all links to the page will be torn out and replaced with dispatcher jmps.
 
70
//
 
71
// Alternatively, icbi instruction SHOULD mark where we can't compile
 
72
//
 
73
// Seldom-happening events is handled by adding a decrement of a counter to all blr instructions (which are
 
74
// expensive anyway since we need to return to dispatcher, except when they can be predicted).
 
75
 
 
76
// TODO: SERIOUS synchronization problem with the video backend setting tokens and breakpoints in dual core mode!!!
 
77
//       Somewhat fixed by disabling idle skipping when certain interrupts are enabled
 
78
//       This is no permanent reliable fix
 
79
// TODO: Zeldas go whacko when you hang the gfx thread
 
80
 
 
81
// Idea - Accurate exception handling
 
82
// Compute register state at a certain instruction by running the JIT in "dry mode", and stopping at the right place.
 
83
// Not likely to be done :P
 
84
 
 
85
 
 
86
// Optimization Ideas -
 
87
/*
 
88
  * Assume SP is in main RAM (in Wii mode too?) - partly done
 
89
  * Assume all floating point loads and double precision loads+stores are to/from main ram
 
90
    (single precision stores can be used in write gather pipe, specialized fast check added)
 
91
  * AMD only - use movaps instead of movapd when loading ps from memory?
 
92
  * HLE functions like floorf, sin, memcpy, etc - they can be much faster
 
93
  * ABI optimizations - drop F0-F13 on blr, for example. Watch out for context switching.
 
94
    CR2-CR4 are non-volatile, rest of CR is volatile -> dropped on blr.
 
95
        R5-R12 are volatile -> dropped on blr.
 
96
  * classic inlining across calls.
 
97
  
 
98
Low hanging fruit:
 
99
stfd -- guaranteed in memory
 
100
cmpl
 
101
mulli
 
102
stfs
 
103
stwu
 
104
lb/stzx
 
105
 
 
106
bcx - optimize!
 
107
bcctr
 
108
stfs
 
109
psq_st
 
110
addx
 
111
orx
 
112
rlwimix
 
113
fcmpo
 
114
DSP_UpdateARAMDMA
 
115
lfd
 
116
stwu
 
117
cntlzwx
 
118
bcctrx
 
119
WriteBigEData
 
120
 
 
121
TODO
 
122
lha
 
123
srawx
 
124
addic_rc
 
125
addex
 
126
subfcx
 
127
subfex
 
128
 
 
129
fmaddx
 
130
fmulx
 
131
faddx
 
132
fnegx
 
133
frspx
 
134
frsqrtex
 
135
ps_sum0
 
136
ps_muls0
 
137
ps_adds1
 
138
 
 
139
*/
 
140
 
 
141
static int CODE_SIZE = 1024*1024*32;
 
142
 
 
143
namespace CPUCompare
 
144
{
 
145
        extern u32 m_BlockStart;
 
146
}
 
147
 
 
148
void Jit64::Init()
 
149
{
 
150
        jo.optimizeStack = true;
 
151
        /* This will enable block linking in JitBlockCache::FinalizeBlock(), it gives faster execution but may not
 
152
           be as stable as the alternative (to not link the blocks). However, I have not heard about any good examples
 
153
           where this cause problems, so I'm enabling this by default, since I seem to get perhaps as much as 20% more
 
154
           fps with this option enabled. If you suspect that this option cause problems you can also disable it from the
 
155
           debugging window. */
 
156
        if (Core::g_CoreStartupParameter.bEnableDebugging)
 
157
        {
 
158
                jo.enableBlocklink = false;
 
159
                Core::g_CoreStartupParameter.bSkipIdle = false;
 
160
        }
 
161
        else
 
162
        {
 
163
                if (!Core::g_CoreStartupParameter.bJITBlockLinking)
 
164
                {
 
165
                        jo.enableBlocklink = false;
 
166
                }
 
167
                else
 
168
                        jo.enableBlocklink = !Core::g_CoreStartupParameter.bMMU;
 
169
        }
 
170
        jo.fpAccurateFcmp = Core::g_CoreStartupParameter.bEnableFPRF;
 
171
        jo.optimizeGatherPipe = true;
 
172
        jo.fastInterrupts = false;
 
173
        jo.accurateSinglePrecision = true;
 
174
        js.memcheck = Core::g_CoreStartupParameter.bMMU;
 
175
 
 
176
        gpr.SetEmitter(this);
 
177
        fpr.SetEmitter(this);
 
178
 
 
179
        trampolines.Init();
 
180
        AllocCodeSpace(CODE_SIZE);
 
181
 
 
182
        blocks.Init();
 
183
        asm_routines.Init();
 
184
}
 
185
 
 
186
void Jit64::ClearCache() 
 
187
{
 
188
        blocks.Clear();
 
189
        trampolines.ClearCodeSpace();
 
190
        ClearCodeSpace();
 
191
}
 
192
 
 
193
void Jit64::Shutdown()
 
194
{
 
195
        FreeCodeSpace();
 
196
 
 
197
        blocks.Shutdown();
 
198
        trampolines.Shutdown();
 
199
        asm_routines.Shutdown();
 
200
}
 
201
 
 
202
// This is only called by Default() in this file. It will execute an instruction with the interpreter functions.
 
203
void Jit64::WriteCallInterpreter(UGeckoInstruction inst)
 
204
{
 
205
        gpr.Flush(FLUSH_ALL);
 
206
        fpr.Flush(FLUSH_ALL);
 
207
        if (js.isLastInstruction)
 
208
        {
 
209
                MOV(32, M(&PC), Imm32(js.compilerPC));
 
210
                MOV(32, M(&NPC), Imm32(js.compilerPC + 4));
 
211
        }
 
212
        Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst);
 
213
        ABI_CallFunctionC((void*)instr, inst.hex);
 
214
}
 
215
 
 
216
void Jit64::unknown_instruction(UGeckoInstruction inst)
 
217
{
 
218
        PanicAlert("unknown_instruction %08x - Fix me ;)", inst.hex);
 
219
}
 
220
 
 
221
void Jit64::Default(UGeckoInstruction _inst)
 
222
{
 
223
        WriteCallInterpreter(_inst.hex);
 
224
}
 
225
 
 
226
void Jit64::HLEFunction(UGeckoInstruction _inst)
 
227
{
 
228
        gpr.Flush(FLUSH_ALL);
 
229
        fpr.Flush(FLUSH_ALL);
 
230
        ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex);
 
231
}
 
232
 
 
233
void Jit64::DoNothing(UGeckoInstruction _inst)
 
234
{
 
235
        // Yup, just don't do anything.
 
236
}
 
237
 
 
238
static const bool ImHereDebug = false;
 
239
static const bool ImHereLog = false;
 
240
static std::map<u32, int> been_here;
 
241
 
 
242
static void ImHere()
 
243
{
 
244
        static File::IOFile f;
 
245
        if (ImHereLog)
 
246
        {
 
247
                if (!f)
 
248
                {
 
249
#ifdef _M_X64
 
250
                        f.Open("log64.txt", "w");
 
251
#else
 
252
                        f.Open("log32.txt", "w");
 
253
#endif
 
254
                }
 
255
                fprintf(f.GetHandle(), "%08x\n", PC);
 
256
        }
 
257
        if (been_here.find(PC) != been_here.end())
 
258
        {
 
259
                been_here.find(PC)->second++;
 
260
                if ((been_here.find(PC)->second) & 1023)
 
261
                        return;
 
262
        }
 
263
        DEBUG_LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR);
 
264
        been_here[PC] = 1;
 
265
}
 
266
 
 
267
void Jit64::Cleanup()
 
268
{
 
269
        if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
 
270
        {
 
271
                ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
 
272
        }
 
273
 
 
274
        // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
 
275
        if (MMCR0.Hex || MMCR1.Hex)
 
276
                ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst);
 
277
}
 
278
 
 
279
void Jit64::WriteExit(u32 destination, int exit_num)
 
280
{
 
281
        Cleanup();
 
282
 
 
283
        SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
 
284
 
 
285
        //If nobody has taken care of this yet (this can be removed when all branches are done)
 
286
        JitBlock *b = js.curBlock;
 
287
        b->exitAddress[exit_num] = destination;
 
288
        b->exitPtrs[exit_num] = GetWritableCodePtr();
 
289
        
 
290
        // Link opportunity!
 
291
        if (jo.enableBlocklink)
 
292
        {
 
293
                int block = blocks.GetBlockNumberFromStartAddress(destination);
 
294
                if (block >= 0)
 
295
                {
 
296
                        // It exists! Joy of joy!
 
297
                        JMP(blocks.GetBlock(block)->checkedEntry, true);
 
298
                        b->linkStatus[exit_num] = true;
 
299
                        return;
 
300
                }
 
301
        }
 
302
        MOV(32, M(&PC), Imm32(destination));
 
303
        JMP(asm_routines.dispatcher, true);
 
304
}
 
305
 
 
306
void Jit64::WriteExitDestInEAX() 
 
307
{
 
308
        MOV(32, M(&PC), R(EAX));
 
309
        Cleanup();
 
310
        SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); 
 
311
        JMP(asm_routines.dispatcher, true);
 
312
}
 
313
 
 
314
void Jit64::WriteRfiExitDestInEAX() 
 
315
{
 
316
        MOV(32, M(&PC), R(EAX));
 
317
        MOV(32, M(&NPC), R(EAX));
 
318
        Cleanup();
 
319
        ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
 
320
        SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); 
 
321
        JMP(asm_routines.dispatcher, true);
 
322
}
 
323
 
 
324
void Jit64::WriteExceptionExit()
 
325
{
 
326
        Cleanup();
 
327
        MOV(32, R(EAX), M(&PC));
 
328
        MOV(32, M(&NPC), R(EAX));
 
329
        ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
 
330
        SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
 
331
        JMP(asm_routines.dispatcher, true);
 
332
}
 
333
 
 
334
void Jit64::WriteExternalExceptionExit()
 
335
{
 
336
        Cleanup();
 
337
        MOV(32, R(EAX), M(&PC));
 
338
        MOV(32, M(&NPC), R(EAX));
 
339
        ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
 
340
        SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); 
 
341
        JMP(asm_routines.dispatcher, true);
 
342
}
 
343
 
 
344
void STACKALIGN Jit64::Run()
 
345
{
 
346
        CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
 
347
        pExecAddr();
 
348
}
 
349
 
 
350
void Jit64::SingleStep()
 
351
{
 
352
        CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
 
353
        pExecAddr();
 
354
}
 
355
 
 
356
void Jit64::Trace()
 
357
{
 
358
        char regs[500] = "";
 
359
        char fregs[750] = "";
 
360
 
 
361
#ifdef JIT_LOG_GPR
 
362
        for (int i = 0; i < 32; i++)
 
363
        {
 
364
                char reg[50];
 
365
                sprintf(reg, "r%02d: %08x ", i, PowerPC::ppcState.gpr[i]);
 
366
                strncat(regs, reg, 500);
 
367
        }
 
368
#endif
 
369
 
 
370
#ifdef JIT_LOG_FPR
 
371
        for (int i = 0; i < 32; i++)
 
372
        {
 
373
                char reg[50];
 
374
                sprintf(reg, "f%02d: %016x ", i, riPS0(i));
 
375
                strncat(fregs, reg, 750);
 
376
        }
 
377
#endif  
 
378
 
 
379
        DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s", 
 
380
                PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], 
 
381
                PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, 
 
382
                PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs, fregs);
 
383
}
 
384
 
 
385
void STACKALIGN Jit64::Jit(u32 em_address)
 
386
{
 
387
        if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || Core::g_CoreStartupParameter.bJITNoBlockCache)
 
388
        {
 
389
                ClearCache();
 
390
        }
 
391
 
 
392
        int block_num = blocks.AllocateBlock(em_address);
 
393
        JitBlock *b = blocks.GetBlock(block_num);
 
394
        blocks.FinalizeBlock(block_num, jo.enableBlocklink, DoJit(em_address, &code_buffer, b));
 
395
}
 
396
 
 
397
const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b)
 
398
{
 
399
        int blockSize = code_buf->GetSize();
 
400
 
 
401
        // Memory exception on instruction fetch
 
402
        bool memory_exception = false;
 
403
 
 
404
        // A broken block is a block that does not end in a branch
 
405
        bool broken_block = false;
 
406
 
 
407
        if (Core::g_CoreStartupParameter.bEnableDebugging)
 
408
        {
 
409
                // Comment out the following to disable breakpoints (speed-up)
 
410
                if (!Profiler::g_ProfileBlocks)
 
411
                {
 
412
                        if (GetState() == CPU_STEPPING)
 
413
                                blockSize = 1;
 
414
                        Trace();
 
415
                }
 
416
        }
 
417
 
 
418
        if (em_address == 0)
 
419
        {
 
420
                // Memory exception occurred during instruction fetch
 
421
                memory_exception = true;
 
422
        }
 
423
 
 
424
        if (Core::g_CoreStartupParameter.bMMU && (em_address & JIT_ICACHE_VMEM_BIT))
 
425
        {
 
426
                if (!Memory::TranslateAddress(em_address, Memory::FLAG_OPCODE))
 
427
                {
 
428
                        // Memory exception occurred during instruction fetch
 
429
                        memory_exception = true;
 
430
                }
 
431
        }
 
432
 
 
433
        int size = 0;
 
434
        js.firstFPInstructionFound = false;
 
435
        js.isLastInstruction = false;
 
436
        js.blockStart = em_address;
 
437
        js.fifoBytesThisBlock = 0;
 
438
        js.curBlock = b;
 
439
        js.block_flags = 0;
 
440
        js.cancel = false;
 
441
        jit->js.numLoadStoreInst = 0;
 
442
        jit->js.numFloatingPointInst = 0;
 
443
 
 
444
        // Analyze the block, collect all instructions it is made of (including inlining,
 
445
        // if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
 
446
        u32 nextPC = em_address;
 
447
        u32 merged_addresses[32];
 
448
        const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]);
 
449
        int size_of_merged_addresses = 0;
 
450
        if (!memory_exception)
 
451
        {
 
452
                // If there is a memory exception inside a block (broken_block==true), compile up to that instruction.
 
453
                nextPC = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses);
 
454
        }
 
455
 
 
456
        PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
 
457
 
 
458
        const u8 *start = AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr
 
459
        b->checkedEntry = start;
 
460
        b->runCount = 0;
 
461
 
 
462
        // Downcount flag check. The last block decremented downcounter, and the flag should still be available.
 
463
        FixupBranch skip = J_CC(CC_NBE);
 
464
        MOV(32, M(&PC), Imm32(js.blockStart));
 
465
        JMP(asm_routines.doTiming, true);  // downcount hit zero - go doTiming.
 
466
        SetJumpTarget(skip);
 
467
 
 
468
        const u8 *normalEntry = GetCodePtr();
 
469
        b->normalEntry = normalEntry;
 
470
 
 
471
        if (ImHereDebug)
 
472
                ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
 
473
 
 
474
        // Conditionally add profiling code.
 
475
        if (Profiler::g_ProfileBlocks) {
 
476
                ADD(32, M(&b->runCount), Imm8(1));
 
477
#ifdef _WIN32
 
478
                b->ticCounter = 0;
 
479
                b->ticStart = 0;
 
480
                b->ticStop = 0;
 
481
#else
 
482
//TODO
 
483
#endif
 
484
                // get start tic
 
485
                PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStart);
 
486
        }
 
487
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK)
 
488
        // should help logged stack-traces become more accurate
 
489
        MOV(32, M(&PC), Imm32(js.blockStart));
 
490
#endif
 
491
 
 
492
        // Start up the register allocators
 
493
        // They use the information in gpa/fpa to preload commonly used registers.
 
494
        gpr.Start(js.gpa);
 
495
        fpr.Start(js.fpa);
 
496
 
 
497
        js.downcountAmount = 0;
 
498
        if (!Core::g_CoreStartupParameter.bEnableDebugging)
 
499
        {
 
500
                for (int i = 0; i < size_of_merged_addresses; ++i)
 
501
                {
 
502
                        const u32 address = merged_addresses[i];
 
503
                        js.downcountAmount += PatchEngine::GetSpeedhackCycles(address);
 
504
                }
 
505
        }
 
506
 
 
507
        js.skipnext = false;
 
508
        js.blockSize = size;
 
509
        js.compilerPC = nextPC;
 
510
        // Translate instructions
 
511
        for (int i = 0; i < (int)size; i++)
 
512
        {
 
513
                js.compilerPC = ops[i].address;
 
514
                js.op = &ops[i];
 
515
                js.instructionNumber = i;
 
516
                const GekkoOPInfo *opinfo = ops[i].opinfo;
 
517
                js.downcountAmount += (opinfo->numCyclesMinusOne + 1);
 
518
 
 
519
                if (i == (int)size - 1)
 
520
                {
 
521
                        // WARNING - cmp->branch merging will screw this up.
 
522
                        js.isLastInstruction = true;
 
523
                        js.next_inst = 0;
 
524
                        if (Profiler::g_ProfileBlocks) {
 
525
                                // CAUTION!!! push on stack regs you use, do your stuff, then pop
 
526
                                PROFILER_VPUSH;
 
527
                                // get end tic
 
528
                                PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
 
529
                                // tic counter += (end tic - start tic)
 
530
                                PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart);
 
531
                                PROFILER_VPOP;
 
532
                        }
 
533
                }
 
534
                else
 
535
                {
 
536
                        // help peephole optimizations
 
537
                        js.next_inst = ops[i + 1].inst;
 
538
                        js.next_compilerPC = ops[i + 1].address;
 
539
                }
 
540
 
 
541
                if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
 
542
                {
 
543
                        js.fifoBytesThisBlock -= 32;
 
544
                        MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
 
545
                        u32 registersInUse = RegistersInUse();
 
546
                        ABI_PushRegistersAndAdjustStack(registersInUse, false);
 
547
                        ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
 
548
                        ABI_PopRegistersAndAdjustStack(registersInUse, false);
 
549
                }
 
550
 
 
551
                u32 function = HLE::GetFunctionIndex(ops[i].address);
 
552
                if (function != 0)
 
553
                {
 
554
                        int type = HLE::GetFunctionTypeByIndex(function);
 
555
                        if (type == HLE::HLE_HOOK_START || type == HLE::HLE_HOOK_REPLACE)
 
556
                        {
 
557
                                int flags = HLE::GetFunctionFlagsByIndex(function);
 
558
                                if (HLE::IsEnabled(flags))
 
559
                                {
 
560
                                        HLEFunction(function);
 
561
                                        if (type == HLE::HLE_HOOK_REPLACE)
 
562
                                        {
 
563
                                                MOV(32, R(EAX), M(&NPC));
 
564
                                                js.downcountAmount += js.st.numCycles;
 
565
                                                WriteExitDestInEAX();
 
566
                                                break;
 
567
                                        }
 
568
                                }
 
569
                        }
 
570
                }
 
571
 
 
572
                if (!ops[i].skip)
 
573
                {
 
574
                        if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
 
575
                        {
 
576
                                gpr.Flush(FLUSH_ALL);
 
577
                                fpr.Flush(FLUSH_ALL);
 
578
 
 
579
                                //This instruction uses FPU - needs to add FP exception bailout
 
580
                                TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit
 
581
                                FixupBranch b1 = J_CC(CC_NZ, true);
 
582
 
 
583
                                // If a FPU exception occurs, the exception handler will read
 
584
                                // from PC.  Update PC with the latest value in case that happens.
 
585
                                MOV(32, M(&PC), Imm32(ops[i].address));
 
586
                                OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
 
587
                                WriteExceptionExit();
 
588
 
 
589
                                SetJumpTarget(b1);
 
590
 
 
591
                                js.firstFPInstructionFound = true;
 
592
                        }
 
593
 
 
594
                        // Add an external exception check if the instruction writes to the FIFO.
 
595
                        if (jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end())
 
596
                        {
 
597
                                gpr.Flush(FLUSH_ALL);
 
598
                                fpr.Flush(FLUSH_ALL);
 
599
 
 
600
                                TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT));
 
601
                                FixupBranch clearInt = J_CC(CC_NZ, true);
 
602
                                TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
 
603
                                FixupBranch noExtException = J_CC(CC_Z, true);
 
604
                                TEST(32, M((void *)&PowerPC::ppcState.msr), Imm32(0x0008000));
 
605
                                FixupBranch noExtIntEnable = J_CC(CC_Z, true);
 
606
                                TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
 
607
                                FixupBranch noCPInt = J_CC(CC_Z, true);
 
608
 
 
609
                                MOV(32, M(&PC), Imm32(ops[i].address));
 
610
                                WriteExternalExceptionExit();
 
611
 
 
612
                                SetJumpTarget(noCPInt);
 
613
                                SetJumpTarget(noExtIntEnable);
 
614
                                SetJumpTarget(noExtException);
 
615
                                SetJumpTarget(clearInt);
 
616
                        }
 
617
 
 
618
                        if (Core::g_CoreStartupParameter.bEnableDebugging && breakpoints.IsAddressBreakPoint(ops[i].address) && GetState() != CPU_STEPPING)
 
619
                        {
 
620
                                gpr.Flush(FLUSH_ALL);
 
621
                                fpr.Flush(FLUSH_ALL);
 
622
 
 
623
                                MOV(32, M(&PC), Imm32(ops[i].address));
 
624
                                ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
 
625
                                TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
 
626
                                FixupBranch noBreakpoint = J_CC(CC_Z);
 
627
 
 
628
                                WriteExit(ops[i].address, 0);
 
629
                                SetJumpTarget(noBreakpoint);
 
630
                        }
 
631
 
 
632
                        Jit64Tables::CompileInstruction(ops[i]);
 
633
 
 
634
                        if (js.memcheck && (opinfo->flags & FL_LOADSTORE))
 
635
                        {
 
636
                                // In case we are about to jump to the dispatcher, flush regs
 
637
                                gpr.Flush(FLUSH_ALL);
 
638
                                fpr.Flush(FLUSH_ALL);
 
639
 
 
640
                                TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI));
 
641
                                FixupBranch noMemException = J_CC(CC_Z, true);
 
642
 
 
643
                                // If a memory exception occurs, the exception handler will read
 
644
                                // from PC.  Update PC with the latest value in case that happens.
 
645
                                MOV(32, M(&PC), Imm32(ops[i].address));
 
646
                                WriteExceptionExit();
 
647
                                SetJumpTarget(noMemException);
 
648
                        }
 
649
 
 
650
                        if (opinfo->flags & FL_LOADSTORE)
 
651
                                ++jit->js.numLoadStoreInst;
 
652
 
 
653
                        if (opinfo->flags & FL_USE_FPU)
 
654
                                ++jit->js.numFloatingPointInst;
 
655
                }
 
656
 
 
657
#if defined(_DEBUG) || defined(DEBUGFAST)
 
658
                if (gpr.SanityCheck() || fpr.SanityCheck())
 
659
                {
 
660
                        char ppcInst[256];
 
661
                        DisassembleGekko(ops[i].inst.hex, em_address, ppcInst, 256);
 
662
                        //NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppcInst);
 
663
                }
 
664
#endif
 
665
                if (js.skipnext) {
 
666
                        js.skipnext = false;
 
667
                        i++; // Skip next instruction
 
668
                }
 
669
                
 
670
                if (js.cancel)
 
671
                        break;
 
672
        }
 
673
 
 
674
        u32 function = HLE::GetFunctionIndex(js.blockStart);
 
675
        if (function != 0)
 
676
        {
 
677
                int type = HLE::GetFunctionTypeByIndex(function);
 
678
                if (type == HLE::HLE_HOOK_END)
 
679
                {
 
680
                        int flags = HLE::GetFunctionFlagsByIndex(function);
 
681
                        if (HLE::IsEnabled(flags))
 
682
                        {
 
683
                                HLEFunction(function);
 
684
                        }
 
685
                }
 
686
        }
 
687
 
 
688
        if (memory_exception)
 
689
        {
 
690
                // Address of instruction could not be translated
 
691
                MOV(32, M(&NPC), Imm32(js.compilerPC));
 
692
 
 
693
                OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI));
 
694
 
 
695
                // Remove the invalid instruction from the icache, forcing a recompile
 
696
#ifdef _M_IX86
 
697
                MOV(32, M(jit->GetBlockCache()->GetICachePtr(js.compilerPC)), Imm32(JIT_ICACHE_INVALID_WORD));
 
698
#else
 
699
                MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC)));
 
700
                MOV(32,MatR(RAX),Imm32(JIT_ICACHE_INVALID_WORD));
 
701
#endif
 
702
 
 
703
                WriteExceptionExit();
 
704
        }
 
705
 
 
706
        if (broken_block)
 
707
        {
 
708
                gpr.Flush(FLUSH_ALL);
 
709
                fpr.Flush(FLUSH_ALL);
 
710
                WriteExit(nextPC, 0);
 
711
        }
 
712
 
 
713
        b->flags = js.block_flags;
 
714
        b->codeSize = (u32)(GetCodePtr() - normalEntry);
 
715
        b->originalSize = size;
 
716
 
 
717
#ifdef JIT_LOG_X86
 
718
        LogGeneratedX86(size, code_buf, normalEntry, b);
 
719
#endif
 
720
 
 
721
        return normalEntry;
 
722
}
 
723
 
 
724
u32 Jit64::RegistersInUse()
 
725
{
 
726
#ifdef _M_X64
 
727
        u32 result = 0;
 
728
        for (int i = 0; i < NUMXREGS; i++)
 
729
        {
 
730
                if (!gpr.IsFreeX(i))
 
731
                        result |= (1 << i);
 
732
                if (!fpr.IsFreeX(i))
 
733
                        result |= (1 << (16 + i));
 
734
        }
 
735
        return result;
 
736
#else
 
737
        // not needed
 
738
        return 0;
 
739
#endif
 
740
}