~ppsspp/ppsspp/ppsspp_1.3.0

« back to all changes in this revision

Viewing changes to GPU/Directx9/GPU_DX9.cpp

  • Committer: Sérgio Benjamim
  • Date: 2017-01-02 00:12:05 UTC
  • Revision ID: sergio_br2@yahoo.com.br-20170102001205-cxbta9za203nmjwm
1.3.0 source (from ppsspp_1.3.0-r160.p5.l1762.a165.t83~56~ubuntu16.04.1.tar.xz).

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// Copyright (c) 2012- PPSSPP Project.
 
2
 
 
3
// This program is free software: you can redistribute it and/or modify
 
4
// it under the terms of the GNU General Public License as published by
 
5
// the Free Software Foundation, version 2.0 or later versions.
 
6
 
 
7
// This program is distributed in the hope that it will be useful,
 
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
10
// GNU General Public License 2.0 for more details.
 
11
 
 
12
// A copy of the GPL 2.0 should have been included with the program.
 
13
// If not, see http://www.gnu.org/licenses/
 
14
 
 
15
// Official git repository and contact information can be found at
 
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
 
17
 
 
18
#include <set>
 
19
 
 
20
#include "Common/ChunkFile.h"
 
21
#include "Common/GraphicsContext.h"
 
22
#include "base/NativeApp.h"
 
23
#include "base/logging.h"
 
24
#include "profiler/profiler.h"
 
25
#include "Core/Debugger/Breakpoints.h"
 
26
#include "Core/MemMapHelpers.h"
 
27
#include "Core/MIPS/MIPS.h"
 
28
#include "Core/Host.h"
 
29
#include "Core/Config.h"
 
30
#include "Core/Reporting.h"
 
31
#include "Core/System.h"
 
32
 
 
33
#include "helper/dx_state.h"
 
34
 
 
35
#include "GPU/GPUState.h"
 
36
#include "GPU/ge_constants.h"
 
37
#include "GPU/GeDisasm.h"
 
38
 
 
39
#include "GPU/Common/FramebufferCommon.h"
 
40
#include "GPU/Directx9/helper/global.h"
 
41
#include "GPU/Directx9/ShaderManagerDX9.h"
 
42
#include "GPU/Directx9/GPU_DX9.h"
 
43
#include "GPU/Directx9/FramebufferDX9.h"
 
44
#include "GPU/Directx9/DrawEngineDX9.h"
 
45
#include "GPU/Directx9/TextureCacheDX9.h"
 
46
 
 
47
#include "Core/HLE/sceKernelThread.h"
 
48
#include "Core/HLE/sceKernelInterrupt.h"
 
49
#include "Core/HLE/sceGe.h"
 
50
 
 
51
namespace DX9 {
 
52
 
 
53
enum {
 
54
        FLAG_FLUSHBEFORE = 1,
 
55
        FLAG_FLUSHBEFOREONCHANGE = 2,
 
56
        FLAG_EXECUTE = 4,  // needs to actually be executed. unused for now.
 
57
        FLAG_EXECUTEONCHANGE = 8,
 
58
        FLAG_ANY_EXECUTE = 4 | 8,
 
59
        FLAG_READS_PC = 16,
 
60
        FLAG_WRITES_PC = 32,
 
61
};
 
62
 
 
63
struct CommandTableEntry {
 
64
        u8 cmd;
 
65
        u8 flags;
 
66
        GPU_DX9::CmdFunc func;
 
67
};
 
68
 
 
69
static const CommandTableEntry commandTable[] = {
 
70
        // Changes that dirty the framebuffer
 
71
        {GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_FramebufType},
 
72
        {GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_FramebufType},
 
73
        {GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_FramebufType},
 
74
        {GE_CMD_ZBUFPTR, FLAG_FLUSHBEFOREONCHANGE},
 
75
        {GE_CMD_ZBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE},
 
76
 
 
77
        // Changes that dirty uniforms
 
78
        {GE_CMD_FOGCOLOR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
79
        {GE_CMD_FOG1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
80
        {GE_CMD_FOG2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
81
 
 
82
        // Should these maybe flush?
 
83
        {GE_CMD_MINZ, FLAG_FLUSHBEFOREONCHANGE},
 
84
        {GE_CMD_MAXZ, FLAG_FLUSHBEFOREONCHANGE},
 
85
 
 
86
        // Changes that dirty texture scaling.
 
87
        { GE_CMD_TEXMAPMODE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexMapMode },
 
88
        { GE_CMD_TEXSCALEU, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexScaleU },
 
89
        { GE_CMD_TEXSCALEV, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexScaleV },
 
90
        { GE_CMD_TEXOFFSETU, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexOffsetU },
 
91
        { GE_CMD_TEXOFFSETV, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexOffsetV },
 
92
 
 
93
        // Changes that dirty the current texture. Really should be possible to avoid executing these if we compile
 
94
        // by adding some more flags.
 
95
        {GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, &GPU_DX9::Execute_TexSize0},
 
96
        {GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexSizeN},
 
97
        {GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexSizeN},
 
98
        {GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexSizeN},
 
99
        {GE_CMD_TEXSIZE4, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexSizeN},
 
100
        {GE_CMD_TEXSIZE5, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexSizeN},
 
101
        {GE_CMD_TEXSIZE6, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexSizeN},
 
102
        {GE_CMD_TEXSIZE7, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexSizeN},
 
103
        {GE_CMD_TEXFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexFormat},
 
104
        {GE_CMD_TEXLEVEL, FLAG_EXECUTE, &GPU_DX9::Execute_TexLevel},
 
105
        {GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexAddr0},
 
106
        {GE_CMD_TEXADDR1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexAddrN},
 
107
        {GE_CMD_TEXADDR2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexAddrN},
 
108
        {GE_CMD_TEXADDR3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexAddrN},
 
109
        {GE_CMD_TEXADDR4, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexAddrN},
 
110
        {GE_CMD_TEXADDR5, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexAddrN},
 
111
        {GE_CMD_TEXADDR6, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexAddrN},
 
112
        {GE_CMD_TEXADDR7, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexAddrN},
 
113
        {GE_CMD_TEXBUFWIDTH0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexBufw0},
 
114
        {GE_CMD_TEXBUFWIDTH1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexBufwN},
 
115
        {GE_CMD_TEXBUFWIDTH2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexBufwN},
 
116
        {GE_CMD_TEXBUFWIDTH3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexBufwN},
 
117
        {GE_CMD_TEXBUFWIDTH4, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexBufwN},
 
118
        {GE_CMD_TEXBUFWIDTH5, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexBufwN},
 
119
        {GE_CMD_TEXBUFWIDTH6, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexBufwN},
 
120
        {GE_CMD_TEXBUFWIDTH7, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexBufwN},
 
121
        {GE_CMD_CLUTADDR, FLAG_FLUSHBEFOREONCHANGE},
 
122
        {GE_CMD_CLUTADDRUPPER, FLAG_FLUSHBEFOREONCHANGE},
 
123
        {GE_CMD_CLUTFORMAT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_ClutFormat},
 
124
 
 
125
        // These affect the fragment shader so need flushing.
 
126
        {GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE},
 
127
        {GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
128
        {GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
129
        {GE_CMD_TEXMODE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexParamType},
 
130
        {GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE},
 
131
        {GE_CMD_SHADEMODE, FLAG_FLUSHBEFOREONCHANGE},
 
132
        {GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE},
 
133
        {GE_CMD_COLORTEST, FLAG_FLUSHBEFOREONCHANGE},
 
134
        {GE_CMD_ALPHATESTENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
135
        {GE_CMD_COLORTESTENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
136
        {GE_CMD_COLORTESTMASK, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_ColorTestMask},
 
137
 
 
138
        // These change the vertex shader so need flushing.
 
139
        {GE_CMD_REVERSENORMAL, FLAG_FLUSHBEFOREONCHANGE},
 
140
        {GE_CMD_LIGHTINGENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
141
        {GE_CMD_LIGHTENABLE0, FLAG_FLUSHBEFOREONCHANGE},
 
142
        {GE_CMD_LIGHTENABLE1, FLAG_FLUSHBEFOREONCHANGE},
 
143
        {GE_CMD_LIGHTENABLE2, FLAG_FLUSHBEFOREONCHANGE},
 
144
        {GE_CMD_LIGHTENABLE3, FLAG_FLUSHBEFOREONCHANGE},
 
145
        {GE_CMD_LIGHTTYPE0, FLAG_FLUSHBEFOREONCHANGE},
 
146
        {GE_CMD_LIGHTTYPE1, FLAG_FLUSHBEFOREONCHANGE},
 
147
        {GE_CMD_LIGHTTYPE2, FLAG_FLUSHBEFOREONCHANGE},
 
148
        {GE_CMD_LIGHTTYPE3, FLAG_FLUSHBEFOREONCHANGE},
 
149
        {GE_CMD_MATERIALUPDATE, FLAG_FLUSHBEFOREONCHANGE},
 
150
 
 
151
        // This changes both shaders so need flushing.
 
152
        {GE_CMD_LIGHTMODE, FLAG_FLUSHBEFOREONCHANGE},
 
153
        {GE_CMD_TEXFILTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexParamType},
 
154
        {GE_CMD_TEXWRAP, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexParamType},
 
155
 
 
156
        // Uniform changes
 
157
        {GE_CMD_ALPHATEST, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_AlphaTest},
 
158
        {GE_CMD_COLORREF, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_ColorRef},
 
159
        {GE_CMD_TEXENVCOLOR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_TexEnvColor},
 
160
 
 
161
        // Simple render state changes. Handled in StateMapping.cpp.
 
162
        {GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE},
 
163
        {GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE},
 
164
        {GE_CMD_CULL,   FLAG_FLUSHBEFOREONCHANGE},
 
165
        {GE_CMD_CULLFACEENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
166
        {GE_CMD_DITHERENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
167
        {GE_CMD_STENCILOP, FLAG_FLUSHBEFOREONCHANGE},
 
168
        {GE_CMD_STENCILTEST, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_StencilTest},
 
169
        {GE_CMD_STENCILTESTENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
170
        {GE_CMD_ALPHABLENDENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
171
        {GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE},
 
172
        {GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE},
 
173
        {GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE},
 
174
        {GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE},
 
175
        {GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE},
 
176
        {GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE},
 
177
        {GE_CMD_ZTESTENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
178
        {GE_CMD_ZWRITEDISABLE, FLAG_FLUSHBEFOREONCHANGE},
 
179
 
 
180
        // These can't be emulated in D3D (except a few special cases)
 
181
        {GE_CMD_LOGICOP, FLAG_FLUSHBEFOREONCHANGE},
 
182
        {GE_CMD_LOGICOPENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
183
 
 
184
        // Can probably ignore this one as we don't support AA lines.
 
185
        {GE_CMD_ANTIALIASENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
186
        
 
187
        // Morph weights. TODO: Remove precomputation?
 
188
        {GE_CMD_MORPHWEIGHT0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
189
        {GE_CMD_MORPHWEIGHT1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
190
        {GE_CMD_MORPHWEIGHT2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
191
        {GE_CMD_MORPHWEIGHT3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
192
        {GE_CMD_MORPHWEIGHT4, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
193
        {GE_CMD_MORPHWEIGHT5, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
194
        {GE_CMD_MORPHWEIGHT6, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
195
        {GE_CMD_MORPHWEIGHT7, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
196
 
 
197
        // Control spline/bezier patches. Don't really require flushing as such, but meh.
 
198
        {GE_CMD_PATCHDIVISION, FLAG_FLUSHBEFOREONCHANGE},
 
199
        {GE_CMD_PATCHPRIMITIVE, FLAG_FLUSHBEFOREONCHANGE},
 
200
        {GE_CMD_PATCHFACING, FLAG_FLUSHBEFOREONCHANGE},
 
201
        {GE_CMD_PATCHCULLENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
202
 
 
203
        // Viewport.
 
204
        {GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_ViewportType},
 
205
        {GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_ViewportType},
 
206
        {GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_ViewportType},
 
207
        {GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_ViewportType},
 
208
        {GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_ViewportType},
 
209
        {GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_ViewportType},
 
210
        {GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE},
 
211
 
 
212
        // Region
 
213
        {GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Region},
 
214
        {GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Region},
 
215
 
 
216
        // Scissor
 
217
        {GE_CMD_SCISSOR1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Scissor},
 
218
        {GE_CMD_SCISSOR2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Scissor},
 
219
 
 
220
        // These dirty various vertex shader uniforms. Could embed information about that in this table and call dirtyuniform directly, hm...
 
221
        {GE_CMD_AMBIENTCOLOR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
222
        {GE_CMD_AMBIENTALPHA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
223
        {GE_CMD_MATERIALDIFFUSE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
224
        {GE_CMD_MATERIALEMISSIVE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
225
        {GE_CMD_MATERIALAMBIENT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
226
        {GE_CMD_MATERIALALPHA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
227
        {GE_CMD_MATERIALSPECULAR, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
228
        {GE_CMD_MATERIALSPECULARCOEF, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE},
 
229
 
 
230
        // These precompute a value. not sure if worth it. Also dirty uniforms, which could be table-ized to avoid execute.
 
231
        {GE_CMD_LX0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
232
        {GE_CMD_LY0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
233
        {GE_CMD_LZ0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
234
        {GE_CMD_LX1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
235
        {GE_CMD_LY1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
236
        {GE_CMD_LZ1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
237
        {GE_CMD_LX2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
238
        {GE_CMD_LY2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
239
        {GE_CMD_LZ2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
240
        {GE_CMD_LX3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
241
        {GE_CMD_LY3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
242
        {GE_CMD_LZ3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
243
 
 
244
        {GE_CMD_LDX0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
245
        {GE_CMD_LDY0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
246
        {GE_CMD_LDZ0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
247
        {GE_CMD_LDX1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
248
        {GE_CMD_LDY1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
249
        {GE_CMD_LDZ1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
250
        {GE_CMD_LDX2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
251
        {GE_CMD_LDY2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
252
        {GE_CMD_LDZ2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
253
        {GE_CMD_LDX3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
254
        {GE_CMD_LDY3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
255
        {GE_CMD_LDZ3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
256
 
 
257
        {GE_CMD_LKA0,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
258
        {GE_CMD_LKB0,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
259
        {GE_CMD_LKC0,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
260
        {GE_CMD_LKA1,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
261
        {GE_CMD_LKB1,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
262
        {GE_CMD_LKC1,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
263
        {GE_CMD_LKA2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
264
        {GE_CMD_LKB2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
265
        {GE_CMD_LKC2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
266
        {GE_CMD_LKA3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
267
        {GE_CMD_LKB3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
268
        {GE_CMD_LKC3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
269
 
 
270
        {GE_CMD_LKS0,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
271
        {GE_CMD_LKS1,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
272
        {GE_CMD_LKS2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
273
        {GE_CMD_LKS3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
274
 
 
275
        {GE_CMD_LKO0,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
276
        {GE_CMD_LKO1,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
277
        {GE_CMD_LKO2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
278
        {GE_CMD_LKO3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
279
 
 
280
        {GE_CMD_LAC0,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
281
        {GE_CMD_LDC0,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
282
        {GE_CMD_LSC0,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light0Param},
 
283
        {GE_CMD_LAC1,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
284
        {GE_CMD_LDC1,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
285
        {GE_CMD_LSC1,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light1Param},
 
286
        {GE_CMD_LAC2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
287
        {GE_CMD_LDC2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
288
        {GE_CMD_LSC2,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light2Param},
 
289
        {GE_CMD_LAC3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
290
        {GE_CMD_LDC3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
291
        {GE_CMD_LSC3,   FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_Light3Param},
 
292
 
 
293
        // Ignored commands
 
294
        {GE_CMD_TEXFLUSH, 0},
 
295
        {GE_CMD_TEXLODSLOPE, 0},
 
296
        {GE_CMD_TEXSYNC, 0},
 
297
 
 
298
        // These are just nop or part of other later commands.
 
299
        {GE_CMD_NOP, 0},
 
300
        {GE_CMD_BASE, 0},
 
301
        {GE_CMD_TRANSFERSRC, 0},
 
302
        {GE_CMD_TRANSFERSRCW, 0},
 
303
        {GE_CMD_TRANSFERDST, 0},
 
304
        {GE_CMD_TRANSFERDSTW, 0},
 
305
        {GE_CMD_TRANSFERSRCPOS, 0},
 
306
        {GE_CMD_TRANSFERDSTPOS, 0},
 
307
        {GE_CMD_TRANSFERSIZE, 0},
 
308
 
 
309
        // From Common. No flushing but definitely need execute.
 
310
        {GE_CMD_OFFSETADDR, FLAG_EXECUTE},
 
311
        {GE_CMD_ORIGIN, FLAG_EXECUTE | FLAG_READS_PC},  // Really?
 
312
        {GE_CMD_PRIM, FLAG_EXECUTE},
 
313
        {GE_CMD_JUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
 
314
        {GE_CMD_CALL, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
 
315
        {GE_CMD_RET, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},
 
316
        {GE_CMD_END, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},  // Flush?
 
317
        {GE_CMD_VADDR, FLAG_EXECUTE, &GPU_DX9::Execute_Vaddr},
 
318
        {GE_CMD_IADDR, FLAG_EXECUTE, &GPU_DX9::Execute_Iaddr},
 
319
        {GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC},  // EXECUTE
 
320
        {GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, &GPU_DX9::Execute_BoundingBox}, // + FLUSHBEFORE when we implement... or not, do we need to?
 
321
 
 
322
        // Changing the vertex type requires us to flush.
 
323
        {GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GPU_DX9::Execute_VertexType},
 
324
 
 
325
        {GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, &GPU_DX9::Execute_Bezier},
 
326
        {GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, &GPU_DX9::Execute_Spline},
 
327
 
 
328
        // These two are actually processed in CMD_END.
 
329
        {GE_CMD_SIGNAL, FLAG_FLUSHBEFORE},
 
330
        {GE_CMD_FINISH, FLAG_FLUSHBEFORE},
 
331
 
 
332
        // Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
 
333
        {GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, &GPU_DX9::Execute_LoadClut},
 
334
        {GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC},
 
335
 
 
336
        // We don't use the dither table.
 
337
        {GE_CMD_DITH0},
 
338
        {GE_CMD_DITH1},
 
339
        {GE_CMD_DITH2},
 
340
        {GE_CMD_DITH3},
 
341
 
 
342
        // These handle their own flushing.
 
343
        {GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_WorldMtxNum},
 
344
        {GE_CMD_WORLDMATRIXDATA,   FLAG_EXECUTE, &GPU_DX9::Execute_WorldMtxData},
 
345
        {GE_CMD_VIEWMATRIXNUMBER,  FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_ViewMtxNum},
 
346
        {GE_CMD_VIEWMATRIXDATA,    FLAG_EXECUTE, &GPU_DX9::Execute_ViewMtxData},
 
347
        {GE_CMD_PROJMATRIXNUMBER,  FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_ProjMtxNum},
 
348
        {GE_CMD_PROJMATRIXDATA,    FLAG_EXECUTE, &GPU_DX9::Execute_ProjMtxData},
 
349
        {GE_CMD_TGENMATRIXNUMBER,  FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_TgenMtxNum},
 
350
        {GE_CMD_TGENMATRIXDATA,    FLAG_EXECUTE, &GPU_DX9::Execute_TgenMtxData},
 
351
        {GE_CMD_BONEMATRIXNUMBER,  FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, &GPU_DX9::Execute_BoneMtxNum},
 
352
        {GE_CMD_BONEMATRIXDATA,    FLAG_EXECUTE, &GPU_DX9::Execute_BoneMtxData},
 
353
 
 
354
        // Vertex Screen/Texture/Color
 
355
        { GE_CMD_VSCX, FLAG_EXECUTE },
 
356
        { GE_CMD_VSCY, FLAG_EXECUTE },
 
357
        { GE_CMD_VSCZ, FLAG_EXECUTE },
 
358
        { GE_CMD_VTCS, FLAG_EXECUTE },
 
359
        { GE_CMD_VTCT, FLAG_EXECUTE },
 
360
        { GE_CMD_VTCQ, FLAG_EXECUTE },
 
361
        { GE_CMD_VCV, FLAG_EXECUTE },
 
362
        { GE_CMD_VAP, FLAG_EXECUTE },
 
363
        { GE_CMD_VFC, FLAG_EXECUTE },
 
364
        { GE_CMD_VSCV, FLAG_EXECUTE },
 
365
 
 
366
        // "Missing" commands (gaps in the sequence)
 
367
        {GE_CMD_UNKNOWN_03, FLAG_EXECUTE},
 
368
        {GE_CMD_UNKNOWN_0D, FLAG_EXECUTE},
 
369
        {GE_CMD_UNKNOWN_11, FLAG_EXECUTE},
 
370
        {GE_CMD_UNKNOWN_29, FLAG_EXECUTE},
 
371
        {GE_CMD_UNKNOWN_34, FLAG_EXECUTE},
 
372
        {GE_CMD_UNKNOWN_35, FLAG_EXECUTE},
 
373
        {GE_CMD_UNKNOWN_39, FLAG_EXECUTE},
 
374
        {GE_CMD_UNKNOWN_4E, FLAG_EXECUTE},
 
375
        {GE_CMD_UNKNOWN_4F, FLAG_EXECUTE},
 
376
        {GE_CMD_UNKNOWN_52, FLAG_EXECUTE},
 
377
        {GE_CMD_UNKNOWN_59, FLAG_EXECUTE},
 
378
        {GE_CMD_UNKNOWN_5A, FLAG_EXECUTE},
 
379
        {GE_CMD_UNKNOWN_B6, FLAG_EXECUTE},
 
380
        {GE_CMD_UNKNOWN_B7, FLAG_EXECUTE},
 
381
        {GE_CMD_UNKNOWN_D1, FLAG_EXECUTE},
 
382
        {GE_CMD_UNKNOWN_ED, FLAG_EXECUTE},
 
383
        {GE_CMD_UNKNOWN_EF, FLAG_EXECUTE},
 
384
        {GE_CMD_UNKNOWN_FA, FLAG_EXECUTE},
 
385
        {GE_CMD_UNKNOWN_FB, FLAG_EXECUTE},
 
386
        {GE_CMD_UNKNOWN_FC, FLAG_EXECUTE},
 
387
        {GE_CMD_UNKNOWN_FD, FLAG_EXECUTE},
 
388
        {GE_CMD_UNKNOWN_FE, FLAG_EXECUTE},
 
389
        // Appears to be debugging related or something?  Hit a lot in GoW.
 
390
        { GE_CMD_UNKNOWN_FF, 0 },
 
391
};
 
392
 
 
393
GPU_DX9::CommandInfo GPU_DX9::cmdInfo_[256];
 
394
 
 
395
GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx)
 
396
: resized_(false), gfxCtx_(gfxCtx) {
 
397
        lastVsync_ = g_Config.bVSync ? 1 : 0;
 
398
        dxstate.SetVSyncInterval(g_Config.bVSync);
 
399
 
 
400
        shaderManager_ = new ShaderManagerDX9();
 
401
        drawEngine_.SetShaderManager(shaderManager_);
 
402
        drawEngine_.SetTextureCache(&textureCache_);
 
403
        drawEngine_.SetFramebufferManager(&framebufferManager_);
 
404
        framebufferManager_.Init();
 
405
        framebufferManager_.SetTextureCache(&textureCache_);
 
406
        framebufferManager_.SetShaderManager(shaderManager_);
 
407
        framebufferManager_.SetTransformDrawEngine(&drawEngine_);
 
408
        textureCache_.SetFramebufferManager(&framebufferManager_);
 
409
        textureCache_.SetDepalShaderCache(&depalShaderCache_);
 
410
        textureCache_.SetShaderManager(shaderManager_);
 
411
 
 
412
        // Sanity check gstate
 
413
        if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) {
 
414
                ERROR_LOG(G3D, "gstate has drifted out of sync!");
 
415
        }
 
416
 
 
417
        // Sanity check cmdInfo_ table - no dupes please
 
418
        std::set<u8> dupeCheck;
 
419
        memset(cmdInfo_, 0, sizeof(cmdInfo_));
 
420
        for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
 
421
                const u8 cmd = commandTable[i].cmd;
 
422
                if (dupeCheck.find(cmd) != dupeCheck.end()) {
 
423
                        ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
 
424
                } else {
 
425
                        dupeCheck.insert(cmd);
 
426
                }
 
427
                cmdInfo_[cmd].flags |= commandTable[i].flags;
 
428
                cmdInfo_[cmd].func = commandTable[i].func;
 
429
                if (!cmdInfo_[cmd].func) {
 
430
                        cmdInfo_[cmd].func = &GPU_DX9::Execute_Generic;
 
431
                }
 
432
        }
 
433
        // Find commands missing from the table.
 
434
        for (int i = 0; i < 0xEF; i++) {
 
435
                if (dupeCheck.find((u8)i) == dupeCheck.end()) {
 
436
                        ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
 
437
                }
 
438
        }
 
439
 
 
440
        // No need to flush before the tex scale/offset commands if we are baking
 
441
        // the tex scale/offset into the vertices anyway.
 
442
        UpdateCmdInfo();
 
443
 
 
444
        BuildReportingInfo();
 
445
 
 
446
        // Some of our defaults are different from hw defaults, let's assert them.
 
447
        // We restore each frame anyway, but here is convenient for tests.
 
448
        dxstate.Restore();
 
449
        textureCache_.NotifyConfigChanged();
 
450
}
 
451
 
 
452
void GPU_DX9::UpdateCmdInfo() {
 
453
        if (g_Config.bPrescaleUV) {
 
454
                cmdInfo_[GE_CMD_TEXSCALEU].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
 
455
                cmdInfo_[GE_CMD_TEXSCALEV].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
 
456
                cmdInfo_[GE_CMD_TEXOFFSETU].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
 
457
                cmdInfo_[GE_CMD_TEXOFFSETV].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
 
458
        } else {
 
459
                cmdInfo_[GE_CMD_TEXSCALEU].flags |= FLAG_FLUSHBEFOREONCHANGE;
 
460
                cmdInfo_[GE_CMD_TEXSCALEV].flags |= FLAG_FLUSHBEFOREONCHANGE;
 
461
                cmdInfo_[GE_CMD_TEXOFFSETU].flags |= FLAG_FLUSHBEFOREONCHANGE;
 
462
                cmdInfo_[GE_CMD_TEXOFFSETV].flags |= FLAG_FLUSHBEFOREONCHANGE;
 
463
        }
 
464
 
 
465
        if (g_Config.bSoftwareSkinning) {
 
466
                cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
 
467
                cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPU_DX9::Execute_VertexTypeSkinning;
 
468
        } else {
 
469
                cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
 
470
                cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPU_DX9::Execute_VertexType;
 
471
        }
 
472
 
 
473
        CheckGPUFeatures();
 
474
}
 
475
 
 
476
void GPU_DX9::CheckGPUFeatures() {
 
477
        u32 features = 0;
 
478
 
 
479
        features |= GPU_SUPPORTS_BLEND_MINMAX;
 
480
        features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
 
481
        features |= GPU_PREFER_CPU_DOWNLOAD;
 
482
        features |= GPU_SUPPORTS_ACCURATE_DEPTH;
 
483
 
 
484
        D3DCAPS9 caps;
 
485
        ZeroMemory(&caps, sizeof(caps));
 
486
        HRESULT result = 0;
 
487
        if (pD3DdeviceEx) {
 
488
                result = pD3DdeviceEx->GetDeviceCaps(&caps);
 
489
        } else {
 
490
                result = pD3Ddevice->GetDeviceCaps(&caps);
 
491
        }
 
492
        if (FAILED(result)) {
 
493
                WARN_LOG_REPORT(G3D, "Direct3D9: Failed to get the device caps!");
 
494
        } else {
 
495
                if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1)
 
496
                        features |= GPU_SUPPORTS_ANISOTROPY;
 
497
                if ((caps.TextureCaps & (D3DPTEXTURECAPS_NONPOW2CONDITIONAL | D3DPTEXTURECAPS_POW2)) == 0)
 
498
                        features |= GPU_SUPPORTS_OES_TEXTURE_NPOT;
 
499
        }
 
500
 
 
501
        if (!g_Config.bHighQualityDepth) {
 
502
                features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
 
503
        } else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) {
 
504
                // Assume we always have a 24-bit depth buffer.
 
505
                features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
 
506
        } else if (PSP_CoreParameter().compat.flags().VertexDepthRounding) {
 
507
                features |= GPU_ROUND_DEPTH_TO_16BIT;
 
508
        }
 
509
 
 
510
        gstate_c.featureFlags = features;
 
511
}
 
512
 
 
513
GPU_DX9::~GPU_DX9() {
 
514
        framebufferManager_.DestroyAllFBOs(true);
 
515
        shaderManager_->ClearCache(true);
 
516
        delete shaderManager_;
 
517
}
 
518
 
 
519
// Needs to be called on GPU thread, not reporting thread.
 
520
void GPU_DX9::BuildReportingInfo() {
 
521
        Thin3DContext *thin3d = gfxCtx_->CreateThin3DContext();
 
522
 
 
523
        reportingPrimaryInfo_ = thin3d->GetInfoString(T3DInfo::VENDORSTRING);
 
524
        reportingFullInfo_ = reportingPrimaryInfo_ + " - " + System_GetProperty(SYSPROP_GPUDRIVER_VERSION) + " - " + thin3d->GetInfoString(T3DInfo::SHADELANGVERSION);
 
525
 
 
526
        thin3d->Release();
 
527
}
 
528
 
 
529
void GPU_DX9::DeviceLost() {
 
530
        // Simply drop all caches and textures.
 
531
        // FBOs appear to survive? Or no?
 
532
        shaderManager_->ClearCache(false);
 
533
        textureCache_.Clear(false);
 
534
        framebufferManager_.DeviceLost();
 
535
}
 
536
 
 
537
void GPU_DX9::DeviceRestore() {
 
538
        // Nothing needed.
 
539
}
 
540
 
 
541
void GPU_DX9::InitClear() {
 
542
        ScheduleEvent(GPU_EVENT_INIT_CLEAR);
 
543
}
 
544
 
 
545
void GPU_DX9::InitClearInternal() {
 
546
        bool useNonBufferedRendering = g_Config.iRenderingMode == FB_NON_BUFFERED_MODE;
 
547
        if (useNonBufferedRendering) {
 
548
                dxstate.depthWrite.set(true);
 
549
                dxstate.colorMask.set(true, true, true, true);
 
550
                pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET|D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 1.f, 0);
 
551
        }
 
552
}
 
553
 
 
554
void GPU_DX9::DumpNextFrame() {
 
555
        dumpNextFrame_ = true;
 
556
}
 
557
 
 
558
void GPU_DX9::BeginFrame() {
 
559
        ScheduleEvent(GPU_EVENT_BEGIN_FRAME);
 
560
}
 
561
 
 
562
void GPU_DX9::ReapplyGfxStateInternal() {
 
563
        dxstate.Restore();
 
564
        GPUCommon::ReapplyGfxStateInternal();
 
565
}
 
566
 
 
567
void GPU_DX9::BeginFrameInternal() {
 
568
        if (resized_) {
 
569
                UpdateCmdInfo();
 
570
                drawEngine_.Resized();
 
571
                textureCache_.NotifyConfigChanged();
 
572
                resized_ = false;
 
573
        }
 
574
 
 
575
        // Turn off vsync when unthrottled
 
576
        int desiredVSyncInterval = g_Config.bVSync ? 1 : 0;
 
577
        if ((PSP_CoreParameter().unthrottle) || (PSP_CoreParameter().fpsLimit == 1))
 
578
                desiredVSyncInterval = 0;
 
579
        if (desiredVSyncInterval != lastVsync_) {
 
580
                dxstate.SetVSyncInterval(desiredVSyncInterval);
 
581
                lastVsync_ = desiredVSyncInterval;
 
582
        }
 
583
 
 
584
        textureCache_.StartFrame();
 
585
        drawEngine_.DecimateTrackedVertexArrays();
 
586
        depalShaderCache_.Decimate();
 
587
        // fragmentTestCache_.Decimate();
 
588
 
 
589
        if (dumpNextFrame_) {
 
590
                NOTICE_LOG(G3D, "DUMPING THIS FRAME");
 
591
                dumpThisFrame_ = true;
 
592
                dumpNextFrame_ = false;
 
593
        } else if (dumpThisFrame_) {
 
594
                dumpThisFrame_ = false;
 
595
        }
 
596
        shaderManager_->DirtyShader();
 
597
 
 
598
        framebufferManager_.BeginFrame();
 
599
}
 
600
 
 
601
void GPU_DX9::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
 
602
        host->GPUNotifyDisplay(framebuf, stride, format);
 
603
        framebufferManager_.SetDisplayFramebuffer(framebuf, stride, format);
 
604
}
 
605
 
 
606
bool GPU_DX9::FramebufferDirty() {
 
607
        // FIXME: Workaround for displaylists sometimes hanging unprocessed.  Not yet sure of the cause.
 
608
        if (ThreadEnabled()) {
 
609
                // FIXME: Workaround for displaylists sometimes hanging unprocessed.  Not yet sure of the cause.
 
610
                ScheduleEvent(GPU_EVENT_PROCESS_QUEUE);
 
611
                // Allow it to process fully before deciding if it's dirty.
 
612
                SyncThread();
 
613
        }
 
614
        VirtualFramebuffer *vfb = framebufferManager_.GetDisplayVFB();
 
615
        if (vfb) {
 
616
                bool dirty = vfb->dirtyAfterDisplay;
 
617
                vfb->dirtyAfterDisplay = false;
 
618
                return dirty;
 
619
        }
 
620
        return true;
 
621
}
 
622
bool GPU_DX9::FramebufferReallyDirty() {
 
623
        // FIXME: Workaround for displaylists sometimes hanging unprocessed.  Not yet sure of the cause.
 
624
        if (ThreadEnabled()) {
 
625
                // FIXME: Workaround for displaylists sometimes hanging unprocessed.  Not yet sure of the cause.
 
626
                ScheduleEvent(GPU_EVENT_PROCESS_QUEUE);
 
627
                // Allow it to process fully before deciding if it's dirty.
 
628
                SyncThread();
 
629
        }
 
630
 
 
631
        VirtualFramebuffer *vfb = framebufferManager_.GetDisplayVFB();
 
632
        if (vfb) {
 
633
                bool dirty = vfb->reallyDirtyAfterDisplay;
 
634
                vfb->reallyDirtyAfterDisplay = false;
 
635
                return dirty;
 
636
        }
 
637
        return true;
 
638
}
 
639
 
 
640
void GPU_DX9::CopyDisplayToOutput() {
 
641
        ScheduleEvent(GPU_EVENT_COPY_DISPLAY_TO_OUTPUT);
 
642
}
 
643
 
 
644
void GPU_DX9::CopyDisplayToOutputInternal() {
 
645
        dxstate.depthWrite.set(true);
 
646
        dxstate.colorMask.set(true, true, true, true);
 
647
 
 
648
        drawEngine_.Flush();
 
649
 
 
650
        framebufferManager_.CopyDisplayToOutput();
 
651
        framebufferManager_.EndFrame();
 
652
 
 
653
        // shaderManager_->EndFrame();
 
654
        shaderManager_->DirtyLastShader();
 
655
 
 
656
        gstate_c.textureChanged = TEXCHANGE_UPDATED;
 
657
}
 
658
 
 
659
// Maybe should write this in ASM...
 
660
void GPU_DX9::FastRunLoop(DisplayList &list) {
 
661
        PROFILE_THIS_SCOPE("gpuloop");
 
662
        const CommandInfo *cmdInfo = cmdInfo_;
 
663
        for (; downcount > 0; --downcount) {
 
664
                // We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
 
665
                const u32 op = *(const u32 *)(Memory::base + list.pc);
 
666
                const u32 cmd = op >> 24;
 
667
                const CommandInfo info = cmdInfo[cmd];
 
668
                const u8 cmdFlags = info.flags;      // If we stashed the cmdFlags in the top bits of the cmdmem, we could get away with one table lookup instead of two
 
669
                const u32 diff = op ^ gstate.cmdmem[cmd];
 
670
                // Inlined CheckFlushOp here to get rid of the dumpThisFrame_ check.
 
671
                if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) {
 
672
                        drawEngine_.Flush();
 
673
                }
 
674
                gstate.cmdmem[cmd] = op;  // TODO: no need to write if diff==0...
 
675
                if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
 
676
                        (this->*info.func)(op, diff);
 
677
                }
 
678
                list.pc += 4;
 
679
        }
 
680
}
 
681
 
 
682
void GPU_DX9::FinishDeferred() {
 
683
        // This finishes reading any vertex data that is pending.
 
684
        drawEngine_.FinishDeferred();
 
685
}
 
686
 
 
687
void GPU_DX9::ProcessEvent(GPUEvent ev) {
 
688
        switch (ev.type) {
 
689
        case GPU_EVENT_INIT_CLEAR:
 
690
                InitClearInternal();
 
691
                break;
 
692
 
 
693
        case GPU_EVENT_BEGIN_FRAME:
 
694
                BeginFrameInternal();
 
695
                break;
 
696
 
 
697
        case GPU_EVENT_COPY_DISPLAY_TO_OUTPUT:
 
698
                CopyDisplayToOutputInternal();
 
699
                break;
 
700
 
 
701
        case GPU_EVENT_INVALIDATE_CACHE:
 
702
                InvalidateCacheInternal(ev.invalidate_cache.addr, ev.invalidate_cache.size, ev.invalidate_cache.type);
 
703
                break;
 
704
 
 
705
        case GPU_EVENT_FB_MEMCPY:
 
706
                PerformMemoryCopyInternal(ev.fb_memcpy.dst, ev.fb_memcpy.src, ev.fb_memcpy.size);
 
707
                break;
 
708
 
 
709
        case GPU_EVENT_FB_MEMSET:
 
710
                PerformMemorySetInternal(ev.fb_memset.dst, ev.fb_memset.v, ev.fb_memset.size);
 
711
                break;
 
712
 
 
713
        case GPU_EVENT_FB_STENCIL_UPLOAD:
 
714
                PerformStencilUploadInternal(ev.fb_stencil_upload.dst, ev.fb_stencil_upload.size);
 
715
                break;
 
716
 
 
717
        default:
 
718
                GPUCommon::ProcessEvent(ev);
 
719
        }
 
720
}
 
721
 
 
722
inline void GPU_DX9::CheckFlushOp(int cmd, u32 diff) {
 
723
        const u8 cmdFlags = cmdInfo_[cmd].flags;
 
724
        if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) {
 
725
                if (dumpThisFrame_) {
 
726
                        NOTICE_LOG(G3D, "================ FLUSH ================");
 
727
                }
 
728
                drawEngine_.Flush();
 
729
        }
 
730
}
 
731
 
 
732
void GPU_DX9::PreExecuteOp(u32 op, u32 diff) {
 
733
        CheckFlushOp(op >> 24, diff);
 
734
}
 
735
 
 
736
void GPU_DX9::ExecuteOp(u32 op, u32 diff) {
 
737
        const u8 cmd = op >> 24;
 
738
        const CommandInfo info = cmdInfo_[cmd];
 
739
        const u8 cmdFlags = info.flags;
 
740
        if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
 
741
                (this->*info.func)(op, diff);
 
742
        }
 
743
}
 
744
 
 
745
void GPU_DX9::Execute_Vaddr(u32 op, u32 diff) {
 
746
        gstate_c.vertexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
 
747
}
 
748
 
 
749
void GPU_DX9::Execute_Iaddr(u32 op, u32 diff) {
 
750
        gstate_c.indexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
 
751
}
 
752
 
 
753
void GPU_DX9::Execute_VertexType(u32 op, u32 diff) {
 
754
        if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) {
 
755
                shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
 
756
        }
 
757
}
 
758
 
 
759
void GPU_DX9::Execute_VertexTypeSkinning(u32 op, u32 diff) {
 
760
        // Don't flush when weight count changes, unless morph is enabled.
 
761
        if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
 
762
                // Restore and flush
 
763
                gstate.vertType ^= diff;
 
764
                Flush();
 
765
                gstate.vertType ^= diff;
 
766
                if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
 
767
                        shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
 
768
                // In this case, we may be doing weights and morphs.
 
769
                // Update any bone matrix uniforms so it uses them correctly.
 
770
                if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
 
771
                        shaderManager_->DirtyUniform(gstate_c.deferredVertTypeDirty);
 
772
                        gstate_c.deferredVertTypeDirty = 0;
 
773
                }
 
774
        }
 
775
}
 
776
 
 
777
void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
 
778
        // This drives all drawing. All other state we just buffer up, then we apply it only
 
779
        // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
 
780
 
 
781
        u32 data = op & 0xFFFFFF;
 
782
        u32 count = data & 0xFFFF;
 
783
        // Upper bits are ignored.
 
784
        GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
 
785
 
 
786
        if (count == 0)
 
787
                return;
 
788
 
 
789
        // Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
 
790
 
 
791
        if (gstate.isAntiAliasEnabled()) {
 
792
                // Discard AA lines in DOA
 
793
                if (prim == GE_PRIM_LINE_STRIP)
 
794
                        return;
 
795
                // Discard AA lines in Summon Night 5
 
796
                if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
 
797
                        return;
 
798
        }
 
799
 
 
800
        // This also make skipping drawing very effective.
 
801
        framebufferManager_.SetRenderFrameBuffer(gstate_c.framebufChanged, gstate_c.skipDrawReason);
 
802
        if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
 
803
                drawEngine_.SetupVertexDecoder(gstate.vertType);
 
804
                // Rough estimate, not sure what's correct.
 
805
                int vertexCost = drawEngine_.EstimatePerVertexCost();
 
806
                cyclesExecuted += vertexCost * count;
 
807
                return;
 
808
        }
 
809
 
 
810
        u32 vertexAddr = gstate_c.vertexAddr;
 
811
        if (!Memory::IsValidAddress(vertexAddr)) {
 
812
                ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", vertexAddr);
 
813
                return;
 
814
        }
 
815
 
 
816
        void *verts = Memory::GetPointerUnchecked(vertexAddr);
 
817
        void *inds = 0;
 
818
        u32 vertexType = gstate.vertType;
 
819
        if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
 
820
                u32 indexAddr = gstate_c.indexAddr;
 
821
                if (!Memory::IsValidAddress(indexAddr)) {
 
822
                        ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
 
823
                        return;
 
824
                }
 
825
                inds = Memory::GetPointerUnchecked(indexAddr);
 
826
        }
 
827
 
 
828
#ifndef MOBILE_DEVICE
 
829
        if (prim > GE_PRIM_RECTANGLES) {
 
830
                ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
 
831
        }
 
832
#endif
 
833
 
 
834
        int bytesRead = 0;
 
835
        drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
 
836
 
 
837
        int vertexCost = drawEngine_.EstimatePerVertexCost() * count;
 
838
        gpuStats.vertexGPUCycles += vertexCost;
 
839
        cyclesExecuted += vertexCost;
 
840
 
 
841
        // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
 
842
        // Some games rely on this, they don't bother reloading VADDR and IADDR.
 
843
        // The VADDR/IADDR registers are NOT updated.
 
844
        AdvanceVerts(vertexType, count, bytesRead);
 
845
}
 
846
 
 
847
void GPU_DX9::Execute_Bezier(u32 op, u32 diff) {
 
848
        // This also make skipping drawing very effective.
 
849
        framebufferManager_.SetRenderFrameBuffer(gstate_c.framebufChanged, gstate_c.skipDrawReason);
 
850
        if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
 
851
                // TODO: Should this eat some cycles?  Probably yes.  Not sure if important.
 
852
                return;
 
853
        }
 
854
 
 
855
        if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
 
856
                ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
 
857
                return;
 
858
        }
 
859
 
 
860
        void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
 
861
        void *indices = NULL;
 
862
        if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
 
863
                if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
 
864
                        ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
 
865
                        return;
 
866
                }
 
867
                indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
 
868
        }
 
869
 
 
870
        if (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) {
 
871
                DEBUG_LOG_REPORT(G3D, "Bezier + morph: %i", (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT);
 
872
        }
 
873
        if (vertTypeIsSkinningEnabled(gstate.vertType)) {
 
874
                DEBUG_LOG_REPORT(G3D, "Bezier + skinning: %i", vertTypeGetNumBoneWeights(gstate.vertType));
 
875
        }
 
876
 
 
877
        GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType();
 
878
        int bz_ucount = op & 0xFF;
 
879
        int bz_vcount = (op >> 8) & 0xFF;
 
880
        bool computeNormals = gstate.isLightingEnabled();
 
881
        bool patchFacing = gstate.patchfacing & 1;
 
882
        int bytesRead = 0;
 
883
        drawEngine_.SubmitBezier(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), bz_ucount, bz_vcount, patchPrim, computeNormals, patchFacing, gstate.vertType, &bytesRead);
 
884
 
 
885
        // After drawing, we advance pointers - see SubmitPrim which does the same.
 
886
        int count = bz_ucount * bz_vcount;
 
887
        AdvanceVerts(gstate.vertType, count, bytesRead);
 
888
}
 
889
 
 
890
void GPU_DX9::Execute_Spline(u32 op, u32 diff) {
 
891
        // This also make skipping drawing very effective.
 
892
        framebufferManager_.SetRenderFrameBuffer(gstate_c.framebufChanged, gstate_c.skipDrawReason);
 
893
        if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
 
894
                // TODO: Should this eat some cycles?  Probably yes.  Not sure if important.
 
895
                return;
 
896
        }
 
897
 
 
898
        if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
 
899
                ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
 
900
                return;
 
901
        }
 
902
 
 
903
        void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
 
904
        void *indices = NULL;
 
905
        if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
 
906
                if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
 
907
                        ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
 
908
                        return;
 
909
                }
 
910
                indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
 
911
        }
 
912
 
 
913
        if (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) {
 
914
                DEBUG_LOG_REPORT(G3D, "Spline + morph: %i", (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT);
 
915
        }
 
916
        if (vertTypeIsSkinningEnabled(gstate.vertType)) {
 
917
                DEBUG_LOG_REPORT(G3D, "Spline + skinning: %i", vertTypeGetNumBoneWeights(gstate.vertType));
 
918
        }
 
919
 
 
920
        int sp_ucount = op & 0xFF;
 
921
        int sp_vcount = (op >> 8) & 0xFF;
 
922
        int sp_utype = (op >> 16) & 0x3;
 
923
        int sp_vtype = (op >> 18) & 0x3;
 
924
        GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType();
 
925
        bool computeNormals = gstate.isLightingEnabled();
 
926
        bool patchFacing = gstate.patchfacing & 1;
 
927
        u32 vertType = gstate.vertType;
 
928
        int bytesRead = 0;
 
929
        drawEngine_.SubmitSpline(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, computeNormals, patchFacing, vertType, &bytesRead);
 
930
 
 
931
        // After drawing, we advance pointers - see SubmitPrim which does the same.
 
932
        int count = sp_ucount * sp_vcount;
 
933
        AdvanceVerts(gstate.vertType, count, bytesRead);
 
934
}
 
935
 
 
936
void GPU_DX9::Execute_ViewportType(u32 op, u32 diff) {
 
937
        gstate_c.framebufChanged = true;
 
938
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
939
        switch (op >> 24) {
 
940
        case GE_CMD_VIEWPORTZSCALE:
 
941
        case GE_CMD_VIEWPORTZCENTER:
 
942
                shaderManager_->DirtyUniform(DIRTY_PROJMATRIX | DIRTY_DEPTHRANGE);
 
943
                break;
 
944
        }
 
945
}
 
946
 
 
947
void GPU_DX9::Execute_BoundingBox(u32 op, u32 diff) {
 
948
        // Just resetting, nothing to bound.
 
949
        const u32 data = op & 0x00FFFFFF;
 
950
        if (data == 0) {
 
951
                // TODO: Should this set the bboxResult?  Let's set it true for now.
 
952
                currentList->bboxResult = true;
 
953
                return;
 
954
        }
 
955
        if (((data & 7) == 0) && data <= 64) {  // Sanity check
 
956
                void *control_points = Memory::GetPointer(gstate_c.vertexAddr);
 
957
                if (gstate.vertType & GE_VTYPE_IDX_MASK) {
 
958
                        ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Indexed bounding box data not supported.");
 
959
                        // Data seems invalid. Let's assume the box test passed.
 
960
                        currentList->bboxResult = true;
 
961
                        return;
 
962
                }
 
963
 
 
964
                // Test if the bounding box is within the drawing region.
 
965
                if (control_points) {
 
966
                        currentList->bboxResult = drawEngine_.TestBoundingBox(control_points, data, gstate.vertType);
 
967
                }
 
968
        } else {
 
969
                ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Bad bounding box data: %06x", data);
 
970
                // Data seems invalid. Let's assume the box test passed.
 
971
                currentList->bboxResult = true;
 
972
        }
 
973
}
 
974
 
 
975
void GPU_DX9::Execute_Region(u32 op, u32 diff) {
 
976
        gstate_c.framebufChanged = true;
 
977
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
978
}
 
979
 
 
980
void GPU_DX9::Execute_Scissor(u32 op, u32 diff) {
 
981
        gstate_c.framebufChanged = true;
 
982
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
983
}
 
984
 
 
985
void GPU_DX9::Execute_FramebufType(u32 op, u32 diff) {
 
986
        gstate_c.framebufChanged = true;
 
987
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
988
}
 
989
 
 
990
void GPU_DX9::Execute_TexScaleU(u32 op, u32 diff) {
 
991
        gstate_c.uv.uScale = getFloat24(op);
 
992
        shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
 
993
}
 
994
 
 
995
void GPU_DX9::Execute_TexScaleV(u32 op, u32 diff) {
 
996
        gstate_c.uv.vScale = getFloat24(op);
 
997
        shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
 
998
}
 
999
 
 
1000
void GPU_DX9::Execute_TexOffsetU(u32 op, u32 diff) {
 
1001
        gstate_c.uv.uOff = getFloat24(op);
 
1002
        shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
 
1003
}
 
1004
 
 
1005
void GPU_DX9::Execute_TexOffsetV(u32 op, u32 diff) {
 
1006
        gstate_c.uv.vOff = getFloat24(op);
 
1007
        shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
 
1008
}
 
1009
 
 
1010
void GPU_DX9::Execute_TexAddr0(u32 op, u32 diff) {
 
1011
        gstate_c.textureChanged = TEXCHANGE_UPDATED;
 
1012
        shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
 
1013
}
 
1014
 
 
1015
void GPU_DX9::Execute_TexAddrN(u32 op, u32 diff) {
 
1016
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
1017
}
 
1018
 
 
1019
void GPU_DX9::Execute_TexBufw0(u32 op, u32 diff) {
 
1020
        gstate_c.textureChanged = TEXCHANGE_UPDATED;
 
1021
}
 
1022
 
 
1023
void GPU_DX9::Execute_TexBufwN(u32 op, u32 diff) {
 
1024
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
1025
}
 
1026
 
 
1027
void GPU_DX9::Execute_TexSize0(u32 op, u32 diff) {
 
1028
        // Render to texture may have overridden the width/height.
 
1029
        // Don't reset it unless the size is different / the texture has changed.
 
1030
        if (diff || gstate_c.textureChanged != TEXCHANGE_UNCHANGED) {
 
1031
                gstate_c.curTextureWidth = gstate.getTextureWidth(0);
 
1032
                gstate_c.curTextureHeight = gstate.getTextureHeight(0);
 
1033
                shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
 
1034
                // We will need to reset the texture now.
 
1035
                gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
1036
        }
 
1037
}
 
1038
 
 
1039
void GPU_DX9::Execute_TexSizeN(u32 op, u32 diff) {
 
1040
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
1041
}
 
1042
 
 
1043
void GPU_DX9::Execute_TexFormat(u32 op, u32 diff) {
 
1044
        gstate_c.textureChanged = TEXCHANGE_UPDATED;
 
1045
}
 
1046
 
 
1047
void GPU_DX9::Execute_TexMapMode(u32 op, u32 diff) {
 
1048
        shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
 
1049
}
 
1050
 
 
1051
void GPU_DX9::Execute_TexParamType(u32 op, u32 diff) {
 
1052
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
1053
}
 
1054
 
 
1055
void GPU_DX9::Execute_TexEnvColor(u32 op, u32 diff) {
 
1056
        shaderManager_->DirtyUniform(DIRTY_TEXENV);
 
1057
}
 
1058
 
 
1059
void GPU_DX9::Execute_TexLevel(u32 op, u32 diff) {
 
1060
        // I had hoped that this would let us avoid excessively flushing in Gran Turismo, but not so,
 
1061
        // as the game switches rapidly between modes 0 and 1.
 
1062
        /*
 
1063
        if (gstate.getTexLevelMode() == GE_TEXLEVEL_MODE_CONST) {
 
1064
                gstate.texlevel ^= diff;
 
1065
                Flush();
 
1066
                gstate.texlevel ^= diff;
 
1067
        }
 
1068
        */
 
1069
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
1070
}
 
1071
 
 
1072
void GPU_DX9::Execute_LoadClut(u32 op, u32 diff) {
 
1073
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
1074
        textureCache_.LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
 
1075
        // This could be used to "dirty" textures with clut.
 
1076
}
 
1077
 
 
1078
void GPU_DX9::Execute_ClutFormat(u32 op, u32 diff) {
 
1079
        gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
 
1080
        // This could be used to "dirty" textures with clut.
 
1081
}
 
1082
 
 
1083
void GPU_DX9::Execute_Ambient(u32 op, u32 diff) {
 
1084
        shaderManager_->DirtyUniform(DIRTY_AMBIENT);
 
1085
}
 
1086
 
 
1087
void GPU_DX9::Execute_MaterialDiffuse(u32 op, u32 diff) {
 
1088
        shaderManager_->DirtyUniform(DIRTY_MATDIFFUSE);
 
1089
}
 
1090
 
 
1091
void GPU_DX9::Execute_MaterialEmissive(u32 op, u32 diff) {
 
1092
        shaderManager_->DirtyUniform(DIRTY_MATEMISSIVE);
 
1093
}
 
1094
 
 
1095
void GPU_DX9::Execute_MaterialAmbient(u32 op, u32 diff) {
 
1096
        shaderManager_->DirtyUniform(DIRTY_MATAMBIENTALPHA);
 
1097
}
 
1098
 
 
1099
void GPU_DX9::Execute_MaterialSpecular(u32 op, u32 diff) {
 
1100
        shaderManager_->DirtyUniform(DIRTY_MATSPECULAR);
 
1101
}
 
1102
 
 
1103
void GPU_DX9::Execute_Light0Param(u32 op, u32 diff) {
 
1104
        shaderManager_->DirtyUniform(DIRTY_LIGHT0);
 
1105
}
 
1106
 
 
1107
void GPU_DX9::Execute_Light1Param(u32 op, u32 diff) {
 
1108
        shaderManager_->DirtyUniform(DIRTY_LIGHT1);
 
1109
}
 
1110
 
 
1111
void GPU_DX9::Execute_Light2Param(u32 op, u32 diff) {
 
1112
        shaderManager_->DirtyUniform(DIRTY_LIGHT2);
 
1113
}
 
1114
 
 
1115
void GPU_DX9::Execute_Light3Param(u32 op, u32 diff) {
 
1116
        shaderManager_->DirtyUniform(DIRTY_LIGHT3);
 
1117
}
 
1118
 
 
1119
void GPU_DX9::Execute_FogColor(u32 op, u32 diff) {
 
1120
        shaderManager_->DirtyUniform(DIRTY_FOGCOLOR);
 
1121
}
 
1122
 
 
1123
void GPU_DX9::Execute_FogCoef(u32 op, u32 diff) {
 
1124
        shaderManager_->DirtyUniform(DIRTY_FOGCOEF);
 
1125
}
 
1126
 
 
1127
void GPU_DX9::Execute_ColorTestMask(u32 op, u32 diff) {
 
1128
        shaderManager_->DirtyUniform(DIRTY_ALPHACOLORMASK);
 
1129
}
 
1130
 
 
1131
void GPU_DX9::Execute_AlphaTest(u32 op, u32 diff) {
 
1132
        shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
 
1133
        shaderManager_->DirtyUniform(DIRTY_ALPHACOLORMASK);
 
1134
}
 
1135
 
 
1136
void GPU_DX9::Execute_StencilTest(u32 op, u32 diff) {
 
1137
        shaderManager_->DirtyUniform(DIRTY_STENCILREPLACEVALUE);
 
1138
}
 
1139
 
 
1140
void GPU_DX9::Execute_ColorRef(u32 op, u32 diff) {
 
1141
        shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
 
1142
}
 
1143
 
 
1144
void GPU_DX9::Execute_WorldMtxNum(u32 op, u32 diff) {
 
1145
        // This is almost always followed by GE_CMD_WORLDMATRIXDATA.
 
1146
        const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
 
1147
        u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
 
1148
        const int end = 12 - (op & 0xF);
 
1149
        int i = 0;
 
1150
 
 
1151
        while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
 
1152
                const u32 newVal = src[i] << 8;
 
1153
                if (dst[i] != newVal) {
 
1154
                        Flush();
 
1155
                        dst[i] = newVal;
 
1156
                        shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
 
1157
                }
 
1158
                if (++i >= end) {
 
1159
                        break;
 
1160
                }
 
1161
        }
 
1162
 
 
1163
        const int count = i;
 
1164
        gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
 
1165
 
 
1166
        // Skip over the loaded data, it's done now.
 
1167
        UpdatePC(currentList->pc, currentList->pc + count * 4);
 
1168
        currentList->pc += count * 4;
 
1169
}
 
1170
 
 
1171
void GPU_DX9::Execute_WorldMtxData(u32 op, u32 diff) {
 
1172
        // Note: it's uncommon to get here now, see above.
 
1173
        int num = gstate.worldmtxnum & 0xF;
 
1174
        u32 newVal = op << 8;
 
1175
        if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
 
1176
                Flush();
 
1177
                ((u32 *)gstate.worldMatrix)[num] = newVal;
 
1178
                shaderManager_->DirtyUniform(DIRTY_WORLDMATRIX);
 
1179
        }
 
1180
        num++;
 
1181
        gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
 
1182
}
 
1183
 
 
1184
void GPU_DX9::Execute_ViewMtxNum(u32 op, u32 diff) {
 
1185
        // This is almost always followed by GE_CMD_VIEWMATRIXDATA.
 
1186
        const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
 
1187
        u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
 
1188
        const int end = 12 - (op & 0xF);
 
1189
        int i = 0;
 
1190
 
 
1191
        while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
 
1192
                const u32 newVal = src[i] << 8;
 
1193
                if (dst[i] != newVal) {
 
1194
                        Flush();
 
1195
                        dst[i] = newVal;
 
1196
                        shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
 
1197
                }
 
1198
                if (++i >= end) {
 
1199
                        break;
 
1200
                }
 
1201
        }
 
1202
 
 
1203
        const int count = i;
 
1204
        gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
 
1205
 
 
1206
        // Skip over the loaded data, it's done now.
 
1207
        UpdatePC(currentList->pc, currentList->pc + count * 4);
 
1208
        currentList->pc += count * 4;
 
1209
}
 
1210
 
 
1211
void GPU_DX9::Execute_ViewMtxData(u32 op, u32 diff) {
 
1212
        // Note: it's uncommon to get here now, see above.
 
1213
        int num = gstate.viewmtxnum & 0xF;
 
1214
        u32 newVal = op << 8;
 
1215
        if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
 
1216
                Flush();
 
1217
                ((u32 *)gstate.viewMatrix)[num] = newVal;
 
1218
                shaderManager_->DirtyUniform(DIRTY_VIEWMATRIX);
 
1219
        }
 
1220
        num++;
 
1221
        gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
 
1222
}
 
1223
 
 
1224
void GPU_DX9::Execute_ProjMtxNum(u32 op, u32 diff) {
 
1225
        // This is almost always followed by GE_CMD_PROJMATRIXDATA.
 
1226
        const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
 
1227
        u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
 
1228
        const int end = 16 - (op & 0xF);
 
1229
        int i = 0;
 
1230
 
 
1231
        while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
 
1232
                const u32 newVal = src[i] << 8;
 
1233
                if (dst[i] != newVal) {
 
1234
                        Flush();
 
1235
                        dst[i] = newVal;
 
1236
                        shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
 
1237
                }
 
1238
                if (++i >= end) {
 
1239
                        break;
 
1240
                }
 
1241
        }
 
1242
 
 
1243
        const int count = i;
 
1244
        gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0xF);
 
1245
 
 
1246
        // Skip over the loaded data, it's done now.
 
1247
        UpdatePC(currentList->pc, currentList->pc + count * 4);
 
1248
        currentList->pc += count * 4;
 
1249
}
 
1250
 
 
1251
void GPU_DX9::Execute_ProjMtxData(u32 op, u32 diff) {
 
1252
        // Note: it's uncommon to get here now, see above.
 
1253
        int num = gstate.projmtxnum & 0xF;
 
1254
        u32 newVal = op << 8;
 
1255
        if (newVal != ((const u32 *)gstate.projMatrix)[num]) {
 
1256
                Flush();
 
1257
                ((u32 *)gstate.projMatrix)[num] = newVal;
 
1258
                shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
 
1259
        }
 
1260
        num++;
 
1261
        gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
 
1262
}
 
1263
 
 
1264
void GPU_DX9::Execute_TgenMtxNum(u32 op, u32 diff) {
 
1265
        // This is almost always followed by GE_CMD_TGENMATRIXDATA.
 
1266
        const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
 
1267
        u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
 
1268
        const int end = 12 - (op & 0xF);
 
1269
        int i = 0;
 
1270
 
 
1271
        while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
 
1272
                const u32 newVal = src[i] << 8;
 
1273
                if (dst[i] != newVal) {
 
1274
                        Flush();
 
1275
                        dst[i] = newVal;
 
1276
                        shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
 
1277
                }
 
1278
                if (++i >= end) {
 
1279
                        break;
 
1280
                }
 
1281
        }
 
1282
 
 
1283
        const int count = i;
 
1284
        gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
 
1285
 
 
1286
        // Skip over the loaded data, it's done now.
 
1287
        UpdatePC(currentList->pc, currentList->pc + count * 4);
 
1288
        currentList->pc += count * 4;
 
1289
}
 
1290
 
 
1291
void GPU_DX9::Execute_TgenMtxData(u32 op, u32 diff) {
 
1292
        // Note: it's uncommon to get here now, see above.
 
1293
        int num = gstate.texmtxnum & 0xF;
 
1294
        u32 newVal = op << 8;
 
1295
        if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
 
1296
                Flush();
 
1297
                ((u32 *)gstate.tgenMatrix)[num] = newVal;
 
1298
                shaderManager_->DirtyUniform(DIRTY_TEXMATRIX);
 
1299
        }
 
1300
        num++;
 
1301
        gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
 
1302
}
 
1303
 
 
1304
void GPU_DX9::Execute_BoneMtxNum(u32 op, u32 diff) {
 
1305
        // This is almost always followed by GE_CMD_BONEMATRIXDATA.
 
1306
        const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
 
1307
        u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F));
 
1308
        const int end = 12 * 8 - (op & 0x7F);
 
1309
        int i = 0;
 
1310
 
 
1311
        // If we can't use software skinning, we have to flush and dirty.
 
1312
        if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
 
1313
                while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
 
1314
                        const u32 newVal = src[i] << 8;
 
1315
                        if (dst[i] != newVal) {
 
1316
                                Flush();
 
1317
                                dst[i] = newVal;
 
1318
                        }
 
1319
                        if (++i >= end) {
 
1320
                                break;
 
1321
                        }
 
1322
                }
 
1323
 
 
1324
                const int numPlusCount = (op & 0x7F) + i;
 
1325
                for (int num = op & 0x7F; num < numPlusCount; num += 12) {
 
1326
                        shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
 
1327
                }
 
1328
        } else {
 
1329
                while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
 
1330
                        dst[i] = src[i] << 8;
 
1331
                        if (++i >= end) {
 
1332
                                break;
 
1333
                        }
 
1334
                }
 
1335
 
 
1336
                const int numPlusCount = (op & 0x7F) + i;
 
1337
                for (int num = op & 0x7F; num < numPlusCount; num += 12) {
 
1338
                        gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
 
1339
                }
 
1340
        }
 
1341
 
 
1342
        const int count = i;
 
1343
        gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F);
 
1344
 
 
1345
        // Skip over the loaded data, it's done now.
 
1346
        UpdatePC(currentList->pc, currentList->pc + count * 4);
 
1347
        currentList->pc += count * 4;
 
1348
}
 
1349
 
 
1350
void GPU_DX9::Execute_BoneMtxData(u32 op, u32 diff) {
 
1351
        // Note: it's uncommon to get here now, see above.
 
1352
        int num = gstate.boneMatrixNumber & 0x7F;
 
1353
        u32 newVal = op << 8;
 
1354
        if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
 
1355
                // Bone matrices should NOT flush when software skinning is enabled!
 
1356
                if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
 
1357
                        Flush();
 
1358
                        shaderManager_->DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12));
 
1359
                } else {
 
1360
                        gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
 
1361
                }
 
1362
                ((u32 *)gstate.boneMatrix)[num] = newVal;
 
1363
        }
 
1364
        num++;
 
1365
        gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
 
1366
}
 
1367
 
 
1368
void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
 
1369
        u32 cmd = op >> 24;
 
1370
        u32 data = op & 0xFFFFFF;
 
1371
 
 
1372
        // Handle control and drawing commands here directly. The others we delegate.
 
1373
        switch (cmd) {
 
1374
        case GE_CMD_BASE:
 
1375
                break;
 
1376
 
 
1377
        case GE_CMD_VADDR:
 
1378
                Execute_Vaddr(op, diff);
 
1379
                break;
 
1380
 
 
1381
        case GE_CMD_IADDR:
 
1382
                Execute_Iaddr(op, diff);
 
1383
                break;
 
1384
 
 
1385
        case GE_CMD_PRIM:
 
1386
                Execute_Prim(op, diff);
 
1387
                break;
 
1388
 
 
1389
                // The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
 
1390
        case GE_CMD_BEZIER:
 
1391
                Execute_Bezier(op, diff);
 
1392
                break;
 
1393
 
 
1394
        case GE_CMD_SPLINE:
 
1395
                Execute_Spline(op, diff);
 
1396
                break;
 
1397
 
 
1398
        case GE_CMD_BOUNDINGBOX:
 
1399
                Execute_BoundingBox(op, diff);
 
1400
                break;
 
1401
 
 
1402
        case GE_CMD_REGION1:
 
1403
        case GE_CMD_REGION2:
 
1404
                Execute_Region(op, diff);
 
1405
                break;
 
1406
 
 
1407
        case GE_CMD_CLIPENABLE:
 
1408
                //we always clip, this is opengl
 
1409
                break;
 
1410
 
 
1411
        case GE_CMD_CULLFACEENABLE:
 
1412
        case GE_CMD_CULL:
 
1413
                break;
 
1414
 
 
1415
        case GE_CMD_TEXTUREMAPENABLE:
 
1416
                if (diff)
 
1417
                        gstate_c.textureChanged = TEXCHANGE_UPDATED;
 
1418
                break;
 
1419
 
 
1420
        case GE_CMD_LIGHTINGENABLE:
 
1421
                break;
 
1422
 
 
1423
        case GE_CMD_FOGCOLOR:
 
1424
                if (diff)
 
1425
                        shaderManager_->DirtyUniform(DIRTY_FOGCOLOR);
 
1426
                break;
 
1427
 
 
1428
        case GE_CMD_FOG1:
 
1429
                if (diff)
 
1430
                        shaderManager_->DirtyUniform(DIRTY_FOGCOEF);
 
1431
                break;
 
1432
 
 
1433
        case GE_CMD_FOG2:
 
1434
                if (diff)
 
1435
                        shaderManager_->DirtyUniform(DIRTY_FOGCOEF);
 
1436
                break;
 
1437
 
 
1438
        case GE_CMD_FOGENABLE:
 
1439
                break;
 
1440
 
 
1441
        case GE_CMD_DITHERENABLE:
 
1442
                break;
 
1443
 
 
1444
        case GE_CMD_OFFSETX:
 
1445
                break;
 
1446
 
 
1447
        case GE_CMD_OFFSETY:
 
1448
                break;
 
1449
 
 
1450
        case GE_CMD_TEXSCALEU: Execute_TexScaleU(op, diff); break;
 
1451
        case GE_CMD_TEXSCALEV: Execute_TexScaleV(op, diff); break;
 
1452
        case GE_CMD_TEXOFFSETU: Execute_TexOffsetU(op, diff); break;
 
1453
        case GE_CMD_TEXOFFSETV: Execute_TexOffsetV(op, diff); break;
 
1454
 
 
1455
        case GE_CMD_SCISSOR1:
 
1456
        case GE_CMD_SCISSOR2:
 
1457
                Execute_Scissor(op, diff);
 
1458
                break;
 
1459
 
 
1460
                ///
 
1461
        case GE_CMD_MINZ:
 
1462
        case GE_CMD_MAXZ:
 
1463
                break;
 
1464
 
 
1465
        case GE_CMD_FRAMEBUFPTR:
 
1466
        case GE_CMD_FRAMEBUFWIDTH:
 
1467
        case GE_CMD_FRAMEBUFPIXFORMAT:
 
1468
                Execute_FramebufType(op, diff);
 
1469
                break;
 
1470
 
 
1471
        case GE_CMD_TEXADDR0:
 
1472
                Execute_TexAddr0(op, diff);
 
1473
                break;
 
1474
 
 
1475
        case GE_CMD_TEXADDR1:
 
1476
        case GE_CMD_TEXADDR2:
 
1477
        case GE_CMD_TEXADDR3:
 
1478
        case GE_CMD_TEXADDR4:
 
1479
        case GE_CMD_TEXADDR5:
 
1480
        case GE_CMD_TEXADDR6:
 
1481
        case GE_CMD_TEXADDR7:
 
1482
                Execute_TexAddrN(op, diff);
 
1483
                break;
 
1484
 
 
1485
        case GE_CMD_TEXBUFWIDTH0:
 
1486
                Execute_TexAddr0(op, diff);
 
1487
                break;
 
1488
 
 
1489
        case GE_CMD_TEXBUFWIDTH1:
 
1490
        case GE_CMD_TEXBUFWIDTH2:
 
1491
        case GE_CMD_TEXBUFWIDTH3:
 
1492
        case GE_CMD_TEXBUFWIDTH4:
 
1493
        case GE_CMD_TEXBUFWIDTH5:
 
1494
        case GE_CMD_TEXBUFWIDTH6:
 
1495
        case GE_CMD_TEXBUFWIDTH7:
 
1496
                Execute_TexBufwN(op, diff);
 
1497
                break;
 
1498
 
 
1499
        case GE_CMD_CLUTFORMAT:
 
1500
                Execute_ClutFormat(op, diff);
 
1501
                break;
 
1502
 
 
1503
        case GE_CMD_CLUTADDR:
 
1504
        case GE_CMD_CLUTADDRUPPER:
 
1505
                // Hm, LOADCLUT actually changes the CLUT so no need to dirty here.
 
1506
                break;
 
1507
 
 
1508
        case GE_CMD_LOADCLUT:
 
1509
                Execute_LoadClut(op, diff);
 
1510
                break;
 
1511
 
 
1512
        case GE_CMD_TEXMAPMODE:
 
1513
                Execute_TexMapMode(op, diff);
 
1514
                break;
 
1515
 
 
1516
        case GE_CMD_TEXSHADELS:
 
1517
                break;
 
1518
 
 
1519
        case GE_CMD_TRANSFERSRC:
 
1520
        case GE_CMD_TRANSFERSRCW:
 
1521
        case GE_CMD_TRANSFERDST:
 
1522
        case GE_CMD_TRANSFERDSTW:
 
1523
        case GE_CMD_TRANSFERSRCPOS:
 
1524
        case GE_CMD_TRANSFERDSTPOS:
 
1525
                break;
 
1526
 
 
1527
        case GE_CMD_TRANSFERSIZE:
 
1528
                break;
 
1529
 
 
1530
        case GE_CMD_TRANSFERSTART:  // Orphis calls this TRXKICK
 
1531
                {
 
1532
                        // TODO: Here we should check if the transfer overlaps a framebuffer or any textures,
 
1533
                        // and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa.
 
1534
                        // Can we skip this entirely on SkipDraw? It skips some things internally.
 
1535
                        DoBlockTransfer(gstate_c.skipDrawReason);
 
1536
 
 
1537
                        // Fixes Gran Turismo's funky text issue, since it overwrites the current texture.
 
1538
                        gstate_c.textureChanged = TEXCHANGE_UPDATED;
 
1539
                        break;
 
1540
                }
 
1541
 
 
1542
        case GE_CMD_TEXSIZE0:
 
1543
                Execute_TexSize0(op, diff);
 
1544
                break;
 
1545
 
 
1546
        case GE_CMD_TEXSIZE1:
 
1547
        case GE_CMD_TEXSIZE2:
 
1548
        case GE_CMD_TEXSIZE3:
 
1549
        case GE_CMD_TEXSIZE4:
 
1550
        case GE_CMD_TEXSIZE5:
 
1551
        case GE_CMD_TEXSIZE6:
 
1552
        case GE_CMD_TEXSIZE7:
 
1553
                Execute_TexSizeN(op, diff);
 
1554
                break;
 
1555
 
 
1556
        case GE_CMD_ZBUFPTR:
 
1557
        case GE_CMD_ZBUFWIDTH:
 
1558
                break;
 
1559
 
 
1560
        case GE_CMD_AMBIENTCOLOR:
 
1561
        case GE_CMD_AMBIENTALPHA:
 
1562
                if (diff)
 
1563
                        shaderManager_->DirtyUniform(DIRTY_AMBIENT);
 
1564
                break;
 
1565
 
 
1566
        case GE_CMD_MATERIALDIFFUSE:
 
1567
                if (diff)
 
1568
                        shaderManager_->DirtyUniform(DIRTY_MATDIFFUSE);
 
1569
                break;
 
1570
 
 
1571
        case GE_CMD_MATERIALEMISSIVE:
 
1572
                if (diff)
 
1573
                        shaderManager_->DirtyUniform(DIRTY_MATEMISSIVE);
 
1574
                break;
 
1575
 
 
1576
        case GE_CMD_MATERIALAMBIENT:
 
1577
        case GE_CMD_MATERIALALPHA:
 
1578
                if (diff)
 
1579
                        shaderManager_->DirtyUniform(DIRTY_MATAMBIENTALPHA);
 
1580
                break;
 
1581
 
 
1582
        case GE_CMD_MATERIALSPECULAR:
 
1583
        case GE_CMD_MATERIALSPECULARCOEF:
 
1584
                if (diff)
 
1585
                        shaderManager_->DirtyUniform(DIRTY_MATSPECULAR);
 
1586
                break;
 
1587
 
 
1588
        case GE_CMD_LIGHTTYPE0:
 
1589
        case GE_CMD_LIGHTTYPE1:
 
1590
        case GE_CMD_LIGHTTYPE2:
 
1591
        case GE_CMD_LIGHTTYPE3:
 
1592
                break;
 
1593
        case GE_CMD_LX0:case GE_CMD_LY0:case GE_CMD_LZ0:
 
1594
        case GE_CMD_LDX0:case GE_CMD_LDY0:case GE_CMD_LDZ0:
 
1595
        case GE_CMD_LKA0:case GE_CMD_LKB0:case GE_CMD_LKC0:
 
1596
        case GE_CMD_LKS0:  // spot coef ("conv")
 
1597
        case GE_CMD_LKO0: // light angle ("cutoff")
 
1598
        case GE_CMD_LAC0:
 
1599
        case GE_CMD_LDC0:
 
1600
        case GE_CMD_LSC0:
 
1601
                shaderManager_->DirtyUniform(DIRTY_LIGHT0);
 
1602
                break;
 
1603
 
 
1604
        case GE_CMD_LX1:case GE_CMD_LY1:case GE_CMD_LZ1:
 
1605
        case GE_CMD_LDX1:case GE_CMD_LDY1:case GE_CMD_LDZ1:
 
1606
        case GE_CMD_LKA1:case GE_CMD_LKB1:case GE_CMD_LKC1:
 
1607
        case GE_CMD_LKS1:
 
1608
        case GE_CMD_LKO1:
 
1609
        case GE_CMD_LAC1:
 
1610
        case GE_CMD_LDC1:
 
1611
        case GE_CMD_LSC1:
 
1612
                shaderManager_->DirtyUniform(DIRTY_LIGHT1);
 
1613
                break;
 
1614
        case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2:
 
1615
        case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2:
 
1616
        case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2:
 
1617
        case GE_CMD_LKS2:
 
1618
        case GE_CMD_LKO2:
 
1619
        case GE_CMD_LAC2:
 
1620
        case GE_CMD_LDC2:
 
1621
        case GE_CMD_LSC2:
 
1622
                shaderManager_->DirtyUniform(DIRTY_LIGHT2);
 
1623
                break;
 
1624
        case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3:
 
1625
        case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3:
 
1626
        case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3:
 
1627
        case GE_CMD_LKS3:
 
1628
        case GE_CMD_LKO3:
 
1629
        case GE_CMD_LAC3:
 
1630
        case GE_CMD_LDC3:
 
1631
        case GE_CMD_LSC3:
 
1632
                shaderManager_->DirtyUniform(DIRTY_LIGHT3);
 
1633
                break;
 
1634
 
 
1635
        case GE_CMD_VIEWPORTXSCALE:
 
1636
        case GE_CMD_VIEWPORTYSCALE:
 
1637
        case GE_CMD_VIEWPORTXCENTER:
 
1638
        case GE_CMD_VIEWPORTYCENTER:
 
1639
        case GE_CMD_VIEWPORTZSCALE:
 
1640
        case GE_CMD_VIEWPORTZCENTER:
 
1641
                Execute_ViewportType(op, diff);
 
1642
                break;
 
1643
 
 
1644
        case GE_CMD_LIGHTENABLE0:
 
1645
        case GE_CMD_LIGHTENABLE1:
 
1646
        case GE_CMD_LIGHTENABLE2:
 
1647
        case GE_CMD_LIGHTENABLE3:
 
1648
                break;
 
1649
 
 
1650
        case GE_CMD_SHADEMODE:
 
1651
                break;
 
1652
 
 
1653
        case GE_CMD_PATCHDIVISION:
 
1654
        case GE_CMD_PATCHPRIMITIVE:
 
1655
        case GE_CMD_PATCHFACING:
 
1656
                break;
 
1657
 
 
1658
 
 
1659
        case GE_CMD_MATERIALUPDATE:
 
1660
                break;
 
1661
 
 
1662
        //////////////////////////////////////////////////////////////////
 
1663
        //      CLEARING
 
1664
        //////////////////////////////////////////////////////////////////
 
1665
        case GE_CMD_CLEARMODE:
 
1666
                break;
 
1667
 
 
1668
        //////////////////////////////////////////////////////////////////
 
1669
        //      ALPHA BLENDING
 
1670
        //////////////////////////////////////////////////////////////////
 
1671
        case GE_CMD_ALPHABLENDENABLE:
 
1672
        case GE_CMD_BLENDMODE:
 
1673
        case GE_CMD_BLENDFIXEDA:
 
1674
        case GE_CMD_BLENDFIXEDB:
 
1675
                break;
 
1676
 
 
1677
        case GE_CMD_ALPHATESTENABLE:
 
1678
        case GE_CMD_COLORTESTENABLE:
 
1679
                // They are done in the fragment shader.
 
1680
                break;
 
1681
 
 
1682
        case GE_CMD_COLORTEST:
 
1683
        case GE_CMD_COLORTESTMASK:
 
1684
                if (diff)
 
1685
                        shaderManager_->DirtyUniform(DIRTY_ALPHACOLORMASK);
 
1686
                break;
 
1687
 
 
1688
        case GE_CMD_ALPHATEST:
 
1689
                if (((data >> 16) & 0xFF) != 0xFF && (data & 7) > 1)
 
1690
                        WARN_LOG_REPORT_ONCE(alphatestmask, G3D, "Unsupported alphatest mask: %02x", (data >> 16) & 0xFF);
 
1691
                // Intentional fallthrough.
 
1692
        case GE_CMD_COLORREF:
 
1693
                if (diff)
 
1694
                        shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
 
1695
                break;
 
1696
 
 
1697
        case GE_CMD_TEXENVCOLOR:
 
1698
                if (diff)
 
1699
                        shaderManager_->DirtyUniform(DIRTY_TEXENV);
 
1700
                break;
 
1701
 
 
1702
        case GE_CMD_TEXFUNC:
 
1703
        case GE_CMD_TEXFLUSH:
 
1704
                break;
 
1705
 
 
1706
        case GE_CMD_TEXFORMAT:
 
1707
                Execute_TexFormat(op, diff);
 
1708
                break;
 
1709
 
 
1710
        case GE_CMD_TEXMODE:
 
1711
        case GE_CMD_TEXFILTER:
 
1712
        case GE_CMD_TEXWRAP:
 
1713
                Execute_TexParamType(op, diff);
 
1714
                break;
 
1715
 
 
1716
        //////////////////////////////////////////////////////////////////
 
1717
        //      Z/STENCIL TESTING
 
1718
        //////////////////////////////////////////////////////////////////
 
1719
 
 
1720
        case GE_CMD_STENCILTESTENABLE:
 
1721
        case GE_CMD_ZTESTENABLE:
 
1722
        case GE_CMD_ZTEST:
 
1723
        case GE_CMD_ZWRITEDISABLE:
 
1724
                break;
 
1725
 
 
1726
        case GE_CMD_MORPHWEIGHT0:
 
1727
        case GE_CMD_MORPHWEIGHT1:
 
1728
        case GE_CMD_MORPHWEIGHT2:
 
1729
        case GE_CMD_MORPHWEIGHT3:
 
1730
        case GE_CMD_MORPHWEIGHT4:
 
1731
        case GE_CMD_MORPHWEIGHT5:
 
1732
        case GE_CMD_MORPHWEIGHT6:
 
1733
        case GE_CMD_MORPHWEIGHT7:
 
1734
                gstate_c.morphWeights[cmd - GE_CMD_MORPHWEIGHT0] = getFloat24(data);
 
1735
                break;
 
1736
 
 
1737
        case GE_CMD_DITH0:
 
1738
        case GE_CMD_DITH1:
 
1739
        case GE_CMD_DITH2:
 
1740
        case GE_CMD_DITH3:
 
1741
                break;
 
1742
 
 
1743
        case GE_CMD_LOGICOPENABLE:
 
1744
        case GE_CMD_LOGICOP:
 
1745
                break;
 
1746
 
 
1747
        case GE_CMD_ANTIALIASENABLE:
 
1748
                if (data != 0)
 
1749
                        WARN_LOG_REPORT_ONCE(antiAlias, G3D, "Unsupported antialias enabled: %06x", data);
 
1750
                break;
 
1751
 
 
1752
        case GE_CMD_TEXLODSLOPE:
 
1753
                if (data != 0)
 
1754
                        WARN_LOG_REPORT_ONCE(texLodSlope, G3D, "Unsupported texture lod slope: %06x", data);
 
1755
                break;
 
1756
 
 
1757
        case GE_CMD_TEXLEVEL:
 
1758
                Execute_TexLevel(op, diff);
 
1759
                break;
 
1760
 
 
1761
        case GE_CMD_VSCX:
 
1762
                if (data != 0)
 
1763
                        WARN_LOG_REPORT_ONCE(vscx, G3D, "Unsupported Vertex Screen Coordinate X : %06x", data);
 
1764
                break;
 
1765
 
 
1766
        case GE_CMD_VSCY:
 
1767
                if (data != 0)
 
1768
                        WARN_LOG_REPORT_ONCE(vscy, G3D, "Unsupported Vertex Screen Coordinate Y : %06x", data);
 
1769
                break;
 
1770
 
 
1771
        case GE_CMD_VSCZ:
 
1772
                if (data != 0)
 
1773
                        WARN_LOG_REPORT_ONCE(vscz, G3D, "Unsupported Vertex Screen Coordinate Z : %06x", data);
 
1774
                break;
 
1775
 
 
1776
        case GE_CMD_VTCS:
 
1777
                if (data != 0)
 
1778
                        WARN_LOG_REPORT_ONCE(vtcs, G3D, "Unsupported Vertex Texture Coordinate S : %06x", data);
 
1779
                break;
 
1780
 
 
1781
        case GE_CMD_VTCT:
 
1782
                if (data != 0)
 
1783
                        WARN_LOG_REPORT_ONCE(vtct, G3D, "Unsupported Vertex Texture Coordinate T : %06x", data);
 
1784
                break;
 
1785
 
 
1786
        case GE_CMD_VTCQ:
 
1787
                if (data != 0)
 
1788
                        WARN_LOG_REPORT_ONCE(vtcq, G3D, "Unsupported Vertex Texture Coordinate Q : %06x", data);
 
1789
                break;
 
1790
 
 
1791
        case GE_CMD_VCV:
 
1792
                if (data != 0)
 
1793
                        WARN_LOG_REPORT_ONCE(vcv, G3D, "Unsupported Vertex Color Value : %06x", data);
 
1794
                break;
 
1795
 
 
1796
        case GE_CMD_VAP:
 
1797
                if (data != 0)
 
1798
                        WARN_LOG_REPORT_ONCE(vap, G3D, "Unsupported Vertex Alpha and Primitive : %06x", data);
 
1799
                break;
 
1800
 
 
1801
        case GE_CMD_VFC:
 
1802
                if (data != 0)
 
1803
                        WARN_LOG_REPORT_ONCE(vfc, G3D, "Unsupported Vertex Fog Coefficient : %06x", data);
 
1804
                break;
 
1805
 
 
1806
        case GE_CMD_VSCV:
 
1807
                if (data != 0)
 
1808
                        WARN_LOG_REPORT_ONCE(vscv, G3D, "Unsupported Vertex Secondary Color Value : %06x", data);
 
1809
                break;
 
1810
 
 
1811
 
 
1812
        case GE_CMD_UNKNOWN_03: 
 
1813
        case GE_CMD_UNKNOWN_0D:
 
1814
        case GE_CMD_UNKNOWN_11:
 
1815
        case GE_CMD_UNKNOWN_29:
 
1816
        case GE_CMD_UNKNOWN_34:
 
1817
        case GE_CMD_UNKNOWN_35:
 
1818
        case GE_CMD_UNKNOWN_39:
 
1819
        case GE_CMD_UNKNOWN_4E:
 
1820
        case GE_CMD_UNKNOWN_4F:
 
1821
        case GE_CMD_UNKNOWN_52:
 
1822
        case GE_CMD_UNKNOWN_59:
 
1823
        case GE_CMD_UNKNOWN_5A:
 
1824
        case GE_CMD_UNKNOWN_B6:
 
1825
        case GE_CMD_UNKNOWN_B7:
 
1826
        case GE_CMD_UNKNOWN_D1:
 
1827
        case GE_CMD_UNKNOWN_ED:
 
1828
        case GE_CMD_UNKNOWN_EF:
 
1829
        case GE_CMD_UNKNOWN_FA:
 
1830
        case GE_CMD_UNKNOWN_FB:
 
1831
        case GE_CMD_UNKNOWN_FC:
 
1832
        case GE_CMD_UNKNOWN_FD:
 
1833
        case GE_CMD_UNKNOWN_FE:
 
1834
                if (data != 0)
 
1835
                        WARN_LOG_REPORT_ONCE(unknowncmd, G3D, "Unknown GE command : %08x ", op);
 
1836
                break;
 
1837
        case GE_CMD_UNKNOWN_FF:
 
1838
                // This is hit in quite a few games, supposedly it is a no-op.
 
1839
                // Might be used for debugging or something?
 
1840
                break;
 
1841
                
 
1842
        default:
 
1843
                GPUCommon::ExecuteOp(op, diff);
 
1844
                break;
 
1845
        }
 
1846
}
 
1847
 
 
1848
void GPU_DX9::FastLoadBoneMatrix(u32 target) {
 
1849
        const int num = gstate.boneMatrixNumber & 0x7F;
 
1850
        const int mtxNum = num / 12;
 
1851
        uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum;
 
1852
        if ((num - 12 * mtxNum) != 0) {
 
1853
                uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
 
1854
        }
 
1855
 
 
1856
        if (!g_Config.bSoftwareSkinning || (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
 
1857
                Flush();
 
1858
                shaderManager_->DirtyUniform(uniformsToDirty);
 
1859
        } else {
 
1860
                gstate_c.deferredVertTypeDirty |= uniformsToDirty;
 
1861
        }
 
1862
        gstate.FastLoadBoneMatrix(target);
 
1863
}
 
1864
 
 
1865
void GPU_DX9::GetStats(char *buffer, size_t bufsize) {
 
1866
        float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
 
1867
        snprintf(buffer, bufsize - 1,
 
1868
                "DL processing time: %0.2f ms\n"
 
1869
                "Draw calls: %i, flushes %i\n"
 
1870
                "Cached Draw calls: %i\n"
 
1871
                "Num Tracked Vertex Arrays: %i\n"
 
1872
                "GPU cycles executed: %d (%f per vertex)\n"
 
1873
                "Commands per call level: %i %i %i %i\n"
 
1874
                "Vertices submitted: %i\n"
 
1875
                "Cached, Uncached Vertices Drawn: %i, %i\n"
 
1876
                "FBOs active: %i\n"
 
1877
                "Textures active: %i, decoded: %i  invalidated: %i\n"
 
1878
                "Vertex, Fragment shaders loaded: %i, %i\n",
 
1879
                gpuStats.msProcessingDisplayLists * 1000.0f,
 
1880
                gpuStats.numDrawCalls,
 
1881
                gpuStats.numFlushes,
 
1882
                gpuStats.numCachedDrawCalls,
 
1883
                gpuStats.numTrackedVertexArrays,
 
1884
                gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
 
1885
                vertexAverageCycles,
 
1886
                gpuStats.gpuCommandsAtCallLevel[0], gpuStats.gpuCommandsAtCallLevel[1], gpuStats.gpuCommandsAtCallLevel[2], gpuStats.gpuCommandsAtCallLevel[3],
 
1887
                gpuStats.numVertsSubmitted,
 
1888
                gpuStats.numCachedVertsDrawn,
 
1889
                gpuStats.numUncachedVertsDrawn,
 
1890
                (int)framebufferManager_.NumVFBs(),
 
1891
                (int)textureCache_.NumLoadedTextures(),
 
1892
                gpuStats.numTexturesDecoded,
 
1893
                gpuStats.numTextureInvalidations,
 
1894
                shaderManager_->NumVertexShaders(),
 
1895
                shaderManager_->NumFragmentShaders()
 
1896
        );
 
1897
}
 
1898
 
 
1899
void GPU_DX9::DoBlockTransfer(u32 skipDrawReason) {
 
1900
        // TODO: This is used a lot to copy data around between render targets and textures,
 
1901
        // and also to quickly load textures from RAM to VRAM. So we should do checks like the following:
 
1902
        //  * Does dstBasePtr point to an existing texture? If so maybe reload it immediately.
 
1903
        //
 
1904
        //  * Does srcBasePtr point to a render target, and dstBasePtr to a texture? If so
 
1905
        //    either copy between rt and texture or reassign the texture to point to the render target
 
1906
        //
 
1907
        // etc....
 
1908
 
 
1909
        u32 srcBasePtr = gstate.getTransferSrcAddress();
 
1910
        u32 srcStride = gstate.getTransferSrcStride();
 
1911
 
 
1912
        u32 dstBasePtr = gstate.getTransferDstAddress();
 
1913
        u32 dstStride = gstate.getTransferDstStride();
 
1914
 
 
1915
        int srcX = gstate.getTransferSrcX();
 
1916
        int srcY = gstate.getTransferSrcY();
 
1917
 
 
1918
        int dstX = gstate.getTransferDstX();
 
1919
        int dstY = gstate.getTransferDstY();
 
1920
 
 
1921
        int width = gstate.getTransferWidth();
 
1922
        int height = gstate.getTransferHeight();
 
1923
 
 
1924
        int bpp = gstate.getTransferBpp();
 
1925
 
 
1926
        DEBUG_LOG(G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
 
1927
 
 
1928
        if (!Memory::IsValidAddress(srcBasePtr)) {
 
1929
                ERROR_LOG_REPORT(G3D, "BlockTransfer: Bad source transfer address %08x!", srcBasePtr);
 
1930
                return;
 
1931
        }
 
1932
 
 
1933
        if (!Memory::IsValidAddress(dstBasePtr)) {
 
1934
                ERROR_LOG_REPORT(G3D, "BlockTransfer: Bad destination transfer address %08x!", dstBasePtr);
 
1935
                return;
 
1936
        }
 
1937
 
 
1938
        // Check that the last address of both source and dest are valid addresses
 
1939
 
 
1940
        u32 srcLastAddr = srcBasePtr + ((height - 1 + srcY) * srcStride + (srcX + width - 1)) * bpp;
 
1941
        u32 dstLastAddr = dstBasePtr + ((height - 1 + dstY) * dstStride + (dstX + width - 1)) * bpp;
 
1942
 
 
1943
        if (!Memory::IsValidAddress(srcLastAddr)) {
 
1944
                ERROR_LOG_REPORT(G3D, "Bottom-right corner of source of block transfer is at an invalid address: %08x", srcLastAddr);
 
1945
                return;
 
1946
        }
 
1947
        if (!Memory::IsValidAddress(dstLastAddr)) {
 
1948
                ERROR_LOG_REPORT(G3D, "Bottom-right corner of destination of block transfer is at an invalid address: %08x", srcLastAddr);
 
1949
                return;
 
1950
        }
 
1951
 
 
1952
        // Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return.
 
1953
        if (!framebufferManager_.NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason)) {
 
1954
                // Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)
 
1955
                // Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
 
1956
                // entirely by walking a couple of pointers...
 
1957
                if (srcStride == dstStride && (u32)width == srcStride) {
 
1958
                        // Common case in God of War, let's do it all in one chunk.
 
1959
                        u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
 
1960
                        u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
 
1961
                        const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
 
1962
                        u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
 
1963
                        memcpy(dst, src, width * height * bpp);
 
1964
                } else {
 
1965
                        for (int y = 0; y < height; y++) {
 
1966
                                u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
 
1967
                                u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
 
1968
 
 
1969
                                const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
 
1970
                                u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
 
1971
                                memcpy(dst, src, width * bpp);
 
1972
                        }
 
1973
                }
 
1974
 
 
1975
                textureCache_.Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT);
 
1976
                framebufferManager_.NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason);
 
1977
        }
 
1978
 
 
1979
        CBreakPoints::ExecMemCheck(srcBasePtr + (srcY * srcStride + srcX) * bpp, false, height * srcStride * bpp, currentMIPS->pc);
 
1980
        CBreakPoints::ExecMemCheck(dstBasePtr + (srcY * dstStride + srcX) * bpp, true, height * dstStride * bpp, currentMIPS->pc);
 
1981
 
 
1982
        // TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate.
 
1983
        cyclesExecuted += ((height * width * bpp) * 16) / 10;
 
1984
}
 
1985
 
 
1986
void GPU_DX9::InvalidateCache(u32 addr, int size, GPUInvalidationType type) {
 
1987
        GPUEvent ev(GPU_EVENT_INVALIDATE_CACHE);
 
1988
        ev.invalidate_cache.addr = addr;
 
1989
        ev.invalidate_cache.size = size;
 
1990
        ev.invalidate_cache.type = type;
 
1991
        ScheduleEvent(ev);
 
1992
}
 
1993
 
 
1994
void GPU_DX9::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type) {
 
1995
        if (size > 0)
 
1996
                textureCache_.Invalidate(addr, size, type);
 
1997
        else
 
1998
                textureCache_.InvalidateAll(type);
 
1999
 
 
2000
        if (type != GPU_INVALIDATE_ALL && framebufferManager_.MayIntersectFramebuffer(addr)) {
 
2001
                // If we're doing block transfers, we shouldn't need this, and it'll only confuse us.
 
2002
                // Vempire invalidates (with writeback) after drawing, but before blitting.
 
2003
                if (!g_Config.bBlockTransferGPU || type == GPU_INVALIDATE_SAFE) {
 
2004
                        framebufferManager_.UpdateFromMemory(addr, size, type == GPU_INVALIDATE_SAFE);
 
2005
                }
 
2006
        }
 
2007
}
 
2008
 
 
2009
void GPU_DX9::NotifyVideoUpload(u32 addr, int size, int width, int format) {
 
2010
        if (Memory::IsVRAMAddress(addr)) {
 
2011
                framebufferManager_.NotifyVideoUpload(addr, size, width, (GEBufferFormat)format);
 
2012
        }
 
2013
        textureCache_.NotifyVideoUpload(addr, size, width, (GEBufferFormat)format);
 
2014
        InvalidateCache(addr, size, GPU_INVALIDATE_SAFE);
 
2015
}
 
2016
 
 
2017
void GPU_DX9::PerformMemoryCopyInternal(u32 dest, u32 src, int size) {
 
2018
        if (!framebufferManager_.NotifyFramebufferCopy(src, dest, size, false, gstate_c.skipDrawReason)) {
 
2019
                // We use a little hack for Download/Upload using a VRAM mirror.
 
2020
                // Since they're identical we don't need to copy.
 
2021
                if (!Memory::IsVRAMAddress(dest) || (dest ^ 0x00400000) != src) {
 
2022
                        Memory::Memcpy(dest, src, size);
 
2023
                }
 
2024
        }
 
2025
        InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
 
2026
}
 
2027
 
 
2028
void GPU_DX9::PerformMemorySetInternal(u32 dest, u8 v, int size) {
 
2029
        if (!framebufferManager_.NotifyFramebufferCopy(dest, dest, size, true, gstate_c.skipDrawReason)) {
 
2030
                InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
 
2031
        }
 
2032
}
 
2033
 
 
2034
void GPU_DX9::PerformStencilUploadInternal(u32 dest, int size) {
 
2035
        framebufferManager_.NotifyStencilUpload(dest, size);
 
2036
}
 
2037
 
 
2038
bool GPU_DX9::PerformMemoryCopy(u32 dest, u32 src, int size) {
 
2039
        // Track stray copies of a framebuffer in RAM. MotoGP does this.
 
2040
        if (framebufferManager_.MayIntersectFramebuffer(src) || framebufferManager_.MayIntersectFramebuffer(dest)) {
 
2041
                if (IsOnSeparateCPUThread()) {
 
2042
                        GPUEvent ev(GPU_EVENT_FB_MEMCPY);
 
2043
                        ev.fb_memcpy.dst = dest;
 
2044
                        ev.fb_memcpy.src = src;
 
2045
                        ev.fb_memcpy.size = size;
 
2046
                        ScheduleEvent(ev);
 
2047
 
 
2048
                        // This is a memcpy, so we need to wait for it to complete.
 
2049
                        SyncThread();
 
2050
                } else {
 
2051
                        PerformMemoryCopyInternal(dest, src, size);
 
2052
                }
 
2053
                return true;
 
2054
        }
 
2055
 
 
2056
        InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
 
2057
        return false;
 
2058
}
 
2059
 
 
2060
bool GPU_DX9::PerformMemorySet(u32 dest, u8 v, int size) {
 
2061
        // This may indicate a memset, usually to 0, of a framebuffer.
 
2062
        if (framebufferManager_.MayIntersectFramebuffer(dest)) {
 
2063
                Memory::Memset(dest, v, size);
 
2064
 
 
2065
                if (IsOnSeparateCPUThread()) {
 
2066
                        GPUEvent ev(GPU_EVENT_FB_MEMSET);
 
2067
                        ev.fb_memset.dst = dest;
 
2068
                        ev.fb_memset.v = v;
 
2069
                        ev.fb_memset.size = size;
 
2070
                        ScheduleEvent(ev);
 
2071
 
 
2072
                        // We don't need to wait for the framebuffer to be updated.
 
2073
                } else {
 
2074
                        PerformMemorySetInternal(dest, v, size);
 
2075
                }
 
2076
                return true;
 
2077
        }
 
2078
 
 
2079
        // Or perhaps a texture, let's invalidate.
 
2080
        InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
 
2081
        return false;
 
2082
}
 
2083
 
 
2084
bool GPU_DX9::PerformMemoryDownload(u32 dest, int size) {
 
2085
        // Cheat a bit to force a download of the framebuffer.
 
2086
        // VRAM + 0x00400000 is simply a VRAM mirror.
 
2087
        if (Memory::IsVRAMAddress(dest)) {
 
2088
                return PerformMemoryCopy(dest ^ 0x00400000, dest, size);
 
2089
        }
 
2090
        return false;
 
2091
}
 
2092
 
 
2093
bool GPU_DX9::PerformMemoryUpload(u32 dest, int size) {
 
2094
        // Cheat a bit to force an upload of the framebuffer.
 
2095
        // VRAM + 0x00400000 is simply a VRAM mirror.
 
2096
        if (Memory::IsVRAMAddress(dest)) {
 
2097
                return PerformMemoryCopy(dest, dest ^ 0x00400000, size);
 
2098
        }
 
2099
        return false;
 
2100
}
 
2101
 
 
2102
bool GPU_DX9::PerformStencilUpload(u32 dest, int size) {
 
2103
        if (framebufferManager_.MayIntersectFramebuffer(dest)) {
 
2104
                if (IsOnSeparateCPUThread()) {
 
2105
                        GPUEvent ev(GPU_EVENT_FB_STENCIL_UPLOAD);
 
2106
                        ev.fb_stencil_upload.dst = dest;
 
2107
                        ev.fb_stencil_upload.size = size;
 
2108
                        ScheduleEvent(ev);
 
2109
                } else {
 
2110
                        PerformStencilUploadInternal(dest, size);
 
2111
                }
 
2112
                return true;
 
2113
        }
 
2114
        return false;
 
2115
}
 
2116
 
 
2117
void GPU_DX9::ClearCacheNextFrame() {
 
2118
        textureCache_.ClearNextFrame();
 
2119
}
 
2120
 
 
2121
void GPU_DX9::Resized() {
 
2122
        resized_ = true;
 
2123
        framebufferManager_.Resized();
 
2124
}
 
2125
 
 
2126
void GPU_DX9::ClearShaderCache() {
 
2127
        shaderManager_->ClearCache(true);
 
2128
}
 
2129
 
 
2130
std::vector<FramebufferInfo> GPU_DX9::GetFramebufferList() {
 
2131
        return framebufferManager_.GetFramebufferList();
 
2132
}
 
2133
 
 
2134
void GPU_DX9::DoState(PointerWrap &p) {
 
2135
        GPUCommon::DoState(p);
 
2136
 
 
2137
        // TODO: Some of these things may not be necessary.
 
2138
        // None of these are necessary when saving.
 
2139
        if (p.mode == p.MODE_READ) {
 
2140
                textureCache_.Clear(true);
 
2141
                drawEngine_.ClearTrackedVertexArrays();
 
2142
 
 
2143
                gstate_c.textureChanged = TEXCHANGE_UPDATED;
 
2144
                framebufferManager_.DestroyAllFBOs(true);
 
2145
                shaderManager_->ClearCache(true);
 
2146
        }
 
2147
}
 
2148
 
 
2149
bool GPU_DX9::GetCurrentFramebuffer(GPUDebugBuffer &buffer, int maxRes) {
 
2150
        return framebufferManager_.GetCurrentFramebuffer(buffer, maxRes);
 
2151
}
 
2152
 
 
2153
bool GPU_DX9::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) {
 
2154
        return framebufferManager_.GetCurrentDepthbuffer(buffer);
 
2155
}
 
2156
 
 
2157
bool GPU_DX9::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) {
 
2158
        return framebufferManager_.GetCurrentStencilbuffer(buffer);
 
2159
}
 
2160
 
 
2161
bool GPU_DX9::GetCurrentTexture(GPUDebugBuffer &buffer, int level) {
 
2162
        if (!gstate.isTextureMapEnabled()) {
 
2163
                return false;
 
2164
        }
 
2165
 
 
2166
        textureCache_.SetTexture(true);
 
2167
        textureCache_.ApplyTexture();
 
2168
        int w = gstate.getTextureWidth(level);
 
2169
        int h = gstate.getTextureHeight(level);
 
2170
 
 
2171
        LPDIRECT3DBASETEXTURE9 baseTex;
 
2172
        LPDIRECT3DTEXTURE9 tex;
 
2173
        LPDIRECT3DSURFACE9 offscreen = nullptr;
 
2174
        HRESULT hr;
 
2175
 
 
2176
        bool success = false;
 
2177
        hr = pD3Ddevice->GetTexture(0, &baseTex);
 
2178
        if (SUCCEEDED(hr) && baseTex != NULL) {
 
2179
                hr = baseTex->QueryInterface(IID_IDirect3DTexture9, (void **)&tex);
 
2180
                if (SUCCEEDED(hr)) {
 
2181
                        D3DSURFACE_DESC desc;
 
2182
                        D3DLOCKED_RECT locked;
 
2183
                        tex->GetLevelDesc(level, &desc);
 
2184
                        RECT rect = {0, 0, (LONG)desc.Width, (LONG)desc.Height};
 
2185
                        hr = tex->LockRect(level, &locked, &rect, D3DLOCK_READONLY);
 
2186
 
 
2187
                        // If it fails, this means it's a render-to-texture, so we have to get creative.
 
2188
                        if (FAILED(hr)) {
 
2189
                                LPDIRECT3DSURFACE9 renderTarget = nullptr;
 
2190
                                hr = tex->GetSurfaceLevel(level, &renderTarget);
 
2191
                                if (renderTarget && SUCCEEDED(hr)) {
 
2192
                                        hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL);
 
2193
                                        if (SUCCEEDED(hr)) {
 
2194
                                                hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen);
 
2195
                                                if (SUCCEEDED(hr)) {
 
2196
                                                        hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY);
 
2197
                                                }
 
2198
                                        }
 
2199
                                        renderTarget->Release();
 
2200
                                }
 
2201
                        }
 
2202
 
 
2203
                        if (SUCCEEDED(hr)) {
 
2204
                                GPUDebugBufferFormat fmt;
 
2205
                                int pixelSize;
 
2206
                                switch (desc.Format) {
 
2207
                                case D3DFMT_A1R5G5B5:
 
2208
                                        fmt = gstate_c.bgraTexture ? GPU_DBG_FORMAT_5551 : GPU_DBG_FORMAT_5551_BGRA;
 
2209
                                        pixelSize = 2;
 
2210
                                        break;
 
2211
                                case D3DFMT_A4R4G4B4:
 
2212
                                        fmt = gstate_c.bgraTexture ? GPU_DBG_FORMAT_4444 : GPU_DBG_FORMAT_4444_BGRA;
 
2213
                                        pixelSize = 2;
 
2214
                                        break;
 
2215
                                case D3DFMT_R5G6B5:
 
2216
                                        fmt = gstate_c.bgraTexture ? GPU_DBG_FORMAT_565 : GPU_DBG_FORMAT_565_BGRA;
 
2217
                                        pixelSize = 2;
 
2218
                                        break;
 
2219
                                case D3DFMT_A8R8G8B8:
 
2220
                                        fmt = gstate_c.bgraTexture ? GPU_DBG_FORMAT_8888 : GPU_DBG_FORMAT_8888_BGRA;
 
2221
                                        pixelSize = 4;
 
2222
                                        break;
 
2223
                                default:
 
2224
                                        fmt = GPU_DBG_FORMAT_INVALID;
 
2225
                                        break;
 
2226
                                }
 
2227
 
 
2228
                                if (fmt != GPU_DBG_FORMAT_INVALID) {
 
2229
                                        buffer.Allocate(locked.Pitch / pixelSize, desc.Height, fmt, false);
 
2230
                                        memcpy(buffer.GetData(), locked.pBits, locked.Pitch * desc.Height);
 
2231
                                        success = true;
 
2232
                                } else {
 
2233
                                        success = false;
 
2234
                                }
 
2235
                                if (offscreen) {
 
2236
                                        offscreen->UnlockRect();
 
2237
                                        offscreen->Release();
 
2238
                                } else {
 
2239
                                        tex->UnlockRect(level);
 
2240
                                }
 
2241
                        }
 
2242
                        tex->Release();
 
2243
                }
 
2244
                baseTex->Release();
 
2245
        }
 
2246
 
 
2247
        return success;
 
2248
}
 
2249
 
 
2250
bool GPU_DX9::GetCurrentClut(GPUDebugBuffer &buffer) {
 
2251
        return textureCache_.GetCurrentClutBuffer(buffer);
 
2252
}
 
2253
 
 
2254
bool GPU_DX9::GetDisplayFramebuffer(GPUDebugBuffer &buffer) {
 
2255
        return FramebufferManagerDX9::GetDisplayFramebuffer(buffer);
 
2256
}
 
2257
 
 
2258
bool GPU_DX9::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {
 
2259
        return drawEngine_.GetCurrentSimpleVertices(count, vertices, indices);
 
2260
}
 
2261
 
 
2262
std::vector<std::string> GPU_DX9::DebugGetShaderIDs(DebugShaderType type) {
 
2263
        if (type == SHADER_TYPE_VERTEXLOADER) {
 
2264
                return drawEngine_.DebugGetVertexLoaderIDs();
 
2265
        } else {
 
2266
                return shaderManager_->DebugGetShaderIDs(type);
 
2267
        }
 
2268
}
 
2269
 
 
2270
std::string GPU_DX9::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) {
 
2271
        if (type == SHADER_TYPE_VERTEXLOADER) {
 
2272
                return drawEngine_.DebugGetVertexLoaderString(id, stringType);
 
2273
        } else {
 
2274
                return shaderManager_->DebugGetShaderString(id, type, stringType);
 
2275
        }
 
2276
}
 
2277
 
 
2278
}  // namespace DX9