1
// Copyright (c) 2012- PPSSPP Project.
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
20
#include "Core/Reporting.h"
21
#include "Core/Config.h"
22
#include "GPU/Directx9/helper/global.h"
23
#include "GPU/Directx9/PixelShaderGeneratorDX9.h"
24
#include "GPU/ge_constants.h"
25
#include "GPU/Common/GPUStateUtils.h"
26
#include "GPU/GPUState.h"
28
#define WRITE p+=sprintf
30
// #define DEBUG_SHADER
34
// Missing: Z depth range
35
// Also, logic ops etc, of course, as they are not supported in DX9.
36
bool GenerateFragmentShaderDX9(const ShaderID &id, char *buffer) {
39
bool lmode = id.Bit(FS_BIT_LMODE);
40
bool doTexture = id.Bit(FS_BIT_DO_TEXTURE);
41
bool enableFog = id.Bit(FS_BIT_ENABLE_FOG);
42
bool enableAlphaTest = id.Bit(FS_BIT_ALPHA_TEST);
44
bool alphaTestAgainstZero = id.Bit(FS_BIT_ALPHA_AGAINST_ZERO);
45
bool enableColorTest = id.Bit(FS_BIT_COLOR_TEST);
46
bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO);
47
bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE);
48
bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ);
49
bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA);
50
bool doFlatShading = id.Bit(FS_BIT_FLATSHADE);
51
bool isModeClear = id.Bit(FS_BIT_CLEARMODE);
53
bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE);
55
GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
56
GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
57
bool needShaderTexClamp = id.Bit(FS_BIT_SHADER_TEX_CLAMP);
59
ReplaceBlendType replaceBlend = static_cast<ReplaceBlendType>(id.Bits(FS_BIT_REPLACE_BLEND, 3));
60
ReplaceAlphaType stencilToAlpha = static_cast<ReplaceAlphaType>(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2));
62
GETexFunc texFunc = (GETexFunc)id.Bits(FS_BIT_TEXFUNC, 3);
63
bool textureAtOffset = id.Bit(FS_BIT_TEXTURE_AT_OFFSET);
65
GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
66
GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
67
GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3);
69
StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);
72
WRITE(p, "sampler tex : register(s0);\n");
73
if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) {
74
if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
75
WRITE(p, "float2 u_fbotexSize : register(c%i);\n", CONST_PS_FBOTEXSIZE);
76
WRITE(p, "sampler fbotex : register(s1);\n");
78
if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) {
79
WRITE(p, "float3 u_blendFixA : register(c%i);\n", CONST_PS_BLENDFIXA);
81
if (replaceBlendFuncB >= GE_DSTBLEND_FIXB) {
82
WRITE(p, "float3 u_blendFixB : register(c%i);\n", CONST_PS_BLENDFIXB);
85
if (gstate_c.needShaderTexClamp && doTexture) {
86
WRITE(p, "float4 u_texclamp : register(c%i);\n", CONST_PS_TEXCLAMP);
87
if (textureAtOffset) {
88
WRITE(p, "float2 u_texclampoff : register(c%i);\n", CONST_PS_TEXCLAMPOFF);
92
if (enableAlphaTest || enableColorTest) {
93
WRITE(p, "float4 u_alphacolorref : register(c%i);\n", CONST_PS_ALPHACOLORREF);
94
WRITE(p, "float4 u_alphacolormask : register(c%i);\n", CONST_PS_ALPHACOLORMASK);
96
if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
97
WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE);
99
if (doTexture && texFunc == GE_TEXFUNC_BLEND) {
100
WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV);
103
WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR);
106
if (enableAlphaTest) {
107
WRITE(p, "float roundAndScaleTo255f(float x) { return floor(x * 255.0f + 0.5f); }\n");
109
if (enableColorTest) {
110
WRITE(p, "float3 roundAndScaleTo255v(float3 x) { return floor(x * 255.0f + 0.5f); }\n");
113
WRITE(p, "struct PS_IN {\n");
115
if (doTextureProjection)
116
WRITE(p, " float3 v_texcoord: TEXCOORD0;\n");
118
WRITE(p, " float2 v_texcoord: TEXCOORD0;\n");
120
WRITE(p, " float4 v_color0: COLOR0;\n");
122
WRITE(p, " float3 v_color1: COLOR1;\n");
125
WRITE(p, " float2 v_fogdepth: TEXCOORD1;\n");
128
WRITE(p, "float4 main( PS_IN In ) : COLOR\n");
132
// Clear mode does not allow any fancy shading.
133
WRITE(p, " float4 v = In.v_color0;\n");
135
const char *secondary = "";
136
// Secondary color for specular on top of texture
138
WRITE(p, " float4 s = float4(In.v_color1, 0);\n");
145
const char *texcoord = "In.v_texcoord";
146
// TODO: Not sure the right way to do this for projection.
147
if (needShaderTexClamp) {
148
// We may be clamping inside a larger surface (tex = 64x64, buffer=480x272).
149
// We may also be wrapping in such a surface, or either one in a too-small surface.
150
// Obviously, clamping to a smaller surface won't work. But better to clamp to something.
151
std::string ucoord = "In.v_texcoord.x";
152
std::string vcoord = "In.v_texcoord.y";
153
if (doTextureProjection) {
154
ucoord = "(In.v_texcoord.x / In.v_texcoord.z)";
155
vcoord = "(In.v_texcoord.y / In.v_texcoord.z)";
158
if (id.Bit(FS_BIT_CLAMP_S)) {
159
ucoord = "clamp(" + ucoord + ", u_texclamp.z, u_texclamp.x - u_texclamp.z)";
161
ucoord = "fmod(" + ucoord + ", u_texclamp.x)";
163
if (id.Bit(FS_BIT_CLAMP_T)) {
164
vcoord = "clamp(" + vcoord + ", u_texclamp.w, u_texclamp.y - u_texclamp.w)";
166
vcoord = "fmod(" + vcoord + ", u_texclamp.y)";
168
if (textureAtOffset) {
169
ucoord = "(" + ucoord + " + u_texclampoff.x)";
170
vcoord = "(" + vcoord + " + u_texclampoff.y)";
173
WRITE(p, " float2 fixedcoord = float2(%s, %s);\n", ucoord.c_str(), vcoord.c_str());
174
texcoord = "fixedcoord";
175
// We already projected it.
176
doTextureProjection = false;
179
if (doTextureProjection) {
180
WRITE(p, " float4 t = tex2Dproj(tex, float4(In.v_texcoord.x, In.v_texcoord.y, 0, In.v_texcoord.z))%s;\n", bgraTexture ? ".bgra" : "");
182
WRITE(p, " float4 t = tex2D(tex, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : "");
184
WRITE(p, " float4 p = In.v_color0;\n");
186
if (doTextureAlpha) { // texfmt == RGBA
188
case GE_TEXFUNC_MODULATE:
189
WRITE(p, " float4 v = p * t%s;\n", secondary); break;
190
case GE_TEXFUNC_DECAL:
191
WRITE(p, " float4 v = float4(lerp(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary); break;
192
case GE_TEXFUNC_BLEND:
193
WRITE(p, " float4 v = float4(lerp(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary); break;
194
case GE_TEXFUNC_REPLACE:
195
WRITE(p, " float4 v = t%s;\n", secondary); break;
197
case GE_TEXFUNC_UNKNOWN1:
198
case GE_TEXFUNC_UNKNOWN2:
199
case GE_TEXFUNC_UNKNOWN3:
200
WRITE(p, " float4 v = float4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary); break;
202
WRITE(p, " float4 v = p;\n"); break;
205
} else { // texfmt == RGB
207
case GE_TEXFUNC_MODULATE:
208
WRITE(p, " float4 v = float4(t.rgb * p.rgb, p.a)%s;\n", secondary); break;
209
case GE_TEXFUNC_DECAL:
210
WRITE(p, " float4 v = float4(t.rgb, p.a)%s;\n", secondary); break;
211
case GE_TEXFUNC_BLEND:
212
WRITE(p, " float4 v = float4(lerp(p.rgb, u_texenv.rgb, t.rgb), p.a)%s;\n", secondary); break;
213
case GE_TEXFUNC_REPLACE:
214
WRITE(p, " float4 v = float4(t.rgb, p.a)%s;\n", secondary); break;
216
case GE_TEXFUNC_UNKNOWN1:
217
case GE_TEXFUNC_UNKNOWN2:
218
case GE_TEXFUNC_UNKNOWN3:
219
WRITE(p, " float4 v = float4(p.rgb + t.rgb, p.a)%s;\n", secondary); break;
221
WRITE(p, " float4 v = p;\n"); break;
225
// No texture mapping
226
WRITE(p, " float4 v = In.v_color0 %s;\n", secondary);
229
if (enableAlphaTest) {
230
if (alphaTestAgainstZero) {
231
// When testing against 0 (extremely common), we can avoid some math.
232
// 0.002 is approximately half of 1.0 / 255.0.
233
if (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) {
234
WRITE(p, " clip(v.a - 0.002);\n");
235
} else if (alphaTestFunc != GE_COMP_NEVER) {
236
// Anything else is a test for == 0. Happens sometimes, actually...
237
WRITE(p, " clip(-v.a + 0.002);\n");
239
// NEVER has been logged as used by games, although it makes little sense - statically failing.
240
// Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here.
241
WRITE(p, " clip(-1);\n");
244
const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; // never/always don't make sense
245
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
246
// TODO: Rewrite this to use clip() appropriately (like, clip(v.a - u_alphacolorref.a))
247
WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) clip(-1);\n", alphaTestFuncs[alphaTestFunc]);
249
// This means NEVER. See above.
250
WRITE(p, " clip(-1);\n");
254
if (enableColorTest) {
255
if (colorTestAgainstZero) {
256
// When testing against 0 (common), we can avoid some math.
257
// 0.002 is approximately half of 1.0 / 255.0.
258
if (colorTestFunc == GE_COMP_NOTEQUAL) {
259
WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) clip(-1);\n");
260
} else if (colorTestFunc != GE_COMP_NEVER) {
261
// Anything else is a test for == 0.
262
WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) clip(-1);\n");
264
// NEVER has been logged as used by games, although it makes little sense - statically failing.
265
// Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here.
266
WRITE(p, " clip(-1);\n");
269
const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; // never/always don't make sense
270
if (colorTestFuncs[colorTestFunc][0] != '#') {
271
const char * test = colorTestFuncs[colorTestFunc];
272
WRITE(p, " float3 colortest = roundAndScaleTo255v(v.rgb);\n");
273
WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b )) clip(-1);\n", test, test, test);
275
WRITE(p, " clip(-1);\n");
280
// Color doubling happens after the color test.
281
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
282
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
283
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
284
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
288
WRITE(p, " float fogCoef = clamp(In.v_fogdepth.x, 0.0, 1.0);\n");
289
WRITE(p, " v = lerp(float4(u_fogcolor, v.a), v, fogCoef);\n");
292
if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
293
const char *srcFactor = "ERROR";
294
switch (replaceBlendFuncA) {
295
case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break;
296
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "ERROR"; break;
297
case GE_SRCBLEND_SRCALPHA: srcFactor = "float3(v.a, v.a, v.a)"; break;
298
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "float3(1.0 - v.a, 1.0 - v.a, 1.0 - v.a)"; break;
299
case GE_SRCBLEND_DSTALPHA: srcFactor = "ERROR"; break;
300
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "ERROR"; break;
301
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "float3(v.a * 2.0, v.a * 2.0, v.a * 2.0)"; break;
302
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "float3(1.0 - v.a * 2.0, 1.0 - v.a * 2.0, 1.0 - v.a * 2.0)"; break;
303
// PRE_SRC for REPLACE_BLEND_PRE_SRC_2X_ALPHA means "double the src."
304
// It's close to the same, but clamping can still be an issue.
305
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "float3(2.0, 2.0, 2.0)"; break;
306
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break;
307
case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break;
308
default: srcFactor = "u_blendFixA"; break;
311
WRITE(p, " v.rgb = v.rgb * %s;\n", srcFactor);
314
// Can do REPLACE_BLEND_COPY_FBO in ps_2_0, but need to apply viewport in the vertex shader
315
// so that we can have the output position here to sample the texture at.
317
if (replaceBlend == REPLACE_BLEND_2X_ALPHA || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
318
WRITE(p, " v.a = v.a * 2.0;\n");
322
std::string replacedAlpha = "0.0";
323
char replacedAlphaTemp[64] = "";
324
if (stencilToAlpha != REPLACE_ALPHA_NO) {
325
switch (replaceAlphaWithStencilType) {
326
case STENCIL_VALUE_UNIFORM:
327
replacedAlpha = "u_stencilReplaceValue";
330
case STENCIL_VALUE_ZERO:
331
replacedAlpha = "0.0";
334
case STENCIL_VALUE_ONE:
335
case STENCIL_VALUE_INVERT:
336
// In invert, we subtract by one, but we want to output one here.
337
replacedAlpha = "1.0";
340
case STENCIL_VALUE_INCR_4:
341
case STENCIL_VALUE_DECR_4:
342
// We're adding/subtracting, just by the smallest value in 4-bit.
343
snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 15.0);
344
replacedAlpha = replacedAlphaTemp;
347
case STENCIL_VALUE_INCR_8:
348
case STENCIL_VALUE_DECR_8:
349
// We're adding/subtracting, just by the smallest value in 8-bit.
350
snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 255.0);
351
replacedAlpha = replacedAlphaTemp;
354
case STENCIL_VALUE_KEEP:
355
// Do nothing. We'll mask out the alpha using color mask.
360
switch (stencilToAlpha) {
361
case REPLACE_ALPHA_DUALSOURCE:
362
WRITE(p, " v.a = %s;\n", replacedAlpha.c_str());
363
// TODO: Output the second color as well using original v.a.
366
case REPLACE_ALPHA_YES:
367
WRITE(p, " v.a = %s;\n", replacedAlpha.c_str());
370
case REPLACE_ALPHA_NO:
371
// Do nothing, v is already fine.
375
LogicOpReplaceType replaceLogicOpType = (LogicOpReplaceType)id.Bits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2);
376
switch (replaceLogicOpType) {
377
case LOGICOPTYPE_ONE:
378
WRITE(p, " v.rgb = float3(1.0, 1.0, 1.0);\n");
380
case LOGICOPTYPE_INVERT:
381
WRITE(p, " v.rgb = float3(1.0, 1.0, 1.0) - v.rgb;\n");
383
case LOGICOPTYPE_NORMAL:
387
WRITE(p, " return v;\n");