1
// Copyright 2013 Dolphin Emulator Project
2
// Licensed under GPLv2
3
// Refer to the license.txt file included.
7
#include "Rasterizer.h"
8
#include "HwRasterizer.h"
9
#include "EfbInterface.h"
10
#include "BPMemLoader.h"
11
#include "XFMemLoader.h"
13
#include "SWPixelEngine.h"
14
#include "SWStatistics.h"
15
#include "SWVideoConfig.h"
20
#define CLAMP(x, a, b) (x>b)?b:(x<a)?a:x
22
// returns approximation of log2(f) in s28.4
23
// results are close enough to use for LOD
24
static inline s32 FixedLog2(float f)
27
s32 logInt = ((*x & 0x7F800000) >> 19) - 2032; // integer part
28
s32 logFract = (*x & 0x007fffff) >> 19; // approximate fractional part
30
return logInt + logFract;
37
Slope ColorSlopes[2][4];
38
Slope TexSlopes[8][3];
48
s32 scissorBottom = 0;
51
RasterBlock rasterBlock;
53
void DoState(PointerWrap &p)
58
for (int n=0; n<4; ++n)
59
ColorSlopes[i][n].DoState(p);
61
for (int n=0; n<3; ++n)
62
TexSlopes[i][n].DoState(p);
79
// Set initial z reference plane in the unlikely case that zfreeze is enabled when drawing the first primitive.
80
// TODO: This is just a guess!
81
ZSlope.dfdx = ZSlope.dfdy = 0.f;
85
inline int iround(float x)
89
#if defined(_WIN32) && !defined(_M_X64)
106
int xoff = bpmem.scissorOffset.x * 2 - 342;
107
int yoff = bpmem.scissorOffset.y * 2 - 342;
109
scissorLeft = bpmem.scissorTL.x - xoff - 342;
110
if (scissorLeft < 0) scissorLeft = 0;
112
scissorTop = bpmem.scissorTL.y - yoff - 342;
113
if (scissorTop < 0) scissorTop = 0;
115
scissorRight = bpmem.scissorBR.x - xoff - 341;
116
if (scissorRight > EFB_WIDTH) scissorRight = EFB_WIDTH;
118
scissorBottom = bpmem.scissorBR.y - yoff - 341;
119
if (scissorBottom > EFB_HEIGHT) scissorBottom = EFB_HEIGHT;
122
void SetTevReg(int reg, int comp, bool konst, s16 color)
124
tev.SetRegColor(reg, comp, konst, color);
127
inline void Draw(s32 x, s32 y, s32 xi, s32 yi)
129
INCSTAT(swstats.thisFrame.rasterizedPixels);
131
float dx = vertexOffsetX + (float)(x - vertex0X);
132
float dy = vertexOffsetY + (float)(y - vertex0Y);
134
s32 z = (s32)ZSlope.GetValue(dx, dy);
135
if (z < 0 || z > 0x00ffffff)
138
if (bpmem.UseEarlyDepthTest() && g_SWVideoConfig.bZComploc)
140
// TODO: Test if perf regs are incremented even if test is disabled
141
SWPixelEngine::pereg.IncZInputQuadCount(true);
142
if (bpmem.zmode.testenable)
145
if (!EfbInterface::ZCompare(x, y, z))
148
SWPixelEngine::pereg.IncZOutputQuadCount(true);
151
RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
158
for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
160
for(int comp = 0; comp < 4; comp++)
162
u16 color = (u16)ColorSlopes[i][comp].GetValue(dx, dy);
164
// clamp color value to 0
165
u16 mask = ~(color >> 8);
167
tev.Color[i][comp] = color & mask;
172
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
174
// multiply by 128 because TEV stores UVs as s17.7
175
tev.Uv[i].s = (s32)(pixel.Uv[i][0] * 128);
176
tev.Uv[i].t = (s32)(pixel.Uv[i][1] * 128);
179
for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++)
181
tev.IndirectLod[i] = rasterBlock.IndirectLod[i];
182
tev.IndirectLinear[i] = rasterBlock.IndirectLinear[i];
185
for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++)
187
tev.TextureLod[i] = rasterBlock.TextureLod[i];
188
tev.TextureLinear[i] = rasterBlock.TextureLinear[i];
194
void InitTriangle(float X1, float Y1, s32 xi, s32 yi)
199
// adjust a little less than 0.5
200
const float adjust = 0.495f;
202
vertexOffsetX = ((float)xi - X1) + adjust;
203
vertexOffsetY = ((float)yi - Y1) + adjust;
206
void InitSlope(Slope *slope, float f1, float f2, float f3, float DX31, float DX12, float DY12, float DY31)
208
float DF31 = f3 - f1;
209
float DF21 = f2 - f1;
210
float a = DF31 * -DY12 - DF21 * DY31;
211
float b = DX31 * DF21 + DX12 * DF31;
212
float c = -DX12 * DY31 - DX31 * -DY12;
213
slope->dfdx = -a / c;
214
slope->dfdy = -b / c;
218
inline void CalculateLOD(s32 &lod, bool &linear, u32 texmap, u32 texcoord)
220
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
221
u8 subTexmap = texmap & 3;
223
// LOD calculation requires data from the texture mode for bias, etc.
224
// it does not seem to use the actual texture size
225
TexMode0& tm0 = texUnit.texMode0[subTexmap];
226
TexMode1& tm1 = texUnit.texMode1[subTexmap];
228
float sDelta, tDelta;
231
float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
232
float *uv1 = rasterBlock.Pixel[1][1].Uv[texcoord];
234
sDelta = fabsf(uv0[0] - uv1[0]);
235
tDelta = fabsf(uv0[1] - uv1[1]);
239
float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
240
float *uv1 = rasterBlock.Pixel[1][0].Uv[texcoord];
241
float *uv2 = rasterBlock.Pixel[0][1].Uv[texcoord];
243
sDelta = max(fabsf(uv0[0] - uv1[0]), fabsf(uv0[0] - uv2[0]));
244
tDelta = max(fabsf(uv0[1] - uv1[1]), fabsf(uv0[1] - uv2[1]));
248
lod = FixedLog2(max(sDelta, tDelta));
251
int bias = tm0.lod_bias;
255
linear = ((lod > 0 && (tm0.min_filter & 4)) || (lod <= 0 && tm0.mag_filter));
257
// order of checks matters
259
// if lod > max then max
260
// else if lod < min then min
261
lod = CLAMP(lod, (s32)tm1.min_lod, (s32)tm1.max_lod);
264
void BuildBlock(s32 blockX, s32 blockY)
266
for (s32 yi = 0; yi < BLOCK_SIZE; yi++)
268
for (s32 xi = 0; xi < BLOCK_SIZE; xi++)
270
RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
272
float dx = vertexOffsetX + (float)(xi + blockX - vertex0X);
273
float dy = vertexOffsetY + (float)(yi + blockY - vertex0Y);
275
float invW = 1.0f / WSlope.GetValue(dx, dy);
279
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
281
float projection = invW;
282
if (swxfregs.texMtxInfo[i].projection)
284
float q = TexSlopes[i][2].GetValue(dx, dy) * invW;
286
projection = invW / q;
289
pixel.Uv[i][0] = TexSlopes[i][0].GetValue(dx, dy) * projection;
290
pixel.Uv[i][1] = TexSlopes[i][1].GetValue(dx, dy) * projection;
295
u32 indref = bpmem.tevindref.hex;
296
for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++)
298
u32 texmap = indref & 3;
300
u32 texcoord = indref & 3;
303
CalculateLOD(rasterBlock.IndirectLod[i], rasterBlock.IndirectLinear[i], texmap, texcoord);
306
for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++)
309
TwoTevStageOrders &order = bpmem.tevorders[i >> 1];
310
if(order.getEnable(stageOdd))
312
u32 texmap = order.getTexMap(stageOdd);
313
u32 texcoord = order.getTexCoord(stageOdd);
315
CalculateLOD(rasterBlock.TextureLod[i], rasterBlock.TextureLinear[i], texmap, texcoord);
320
void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2)
322
INCSTAT(swstats.thisFrame.numTrianglesDrawn);
324
if (g_SWVideoConfig.bHwRasterizer)
326
HwRasterizer::DrawTriangleFrontFace(v0, v1, v2);
330
// adapted from http://www.devmaster.net/forums/showthread.php?t=1884
332
// 28.4 fixed-pou32 coordinates. rounded to nearest and adjusted to match hardware output
333
// could also take floor and adjust -8
334
const s32 Y1 = iround(16.0f * v0->screenPosition[1]) - 9;
335
const s32 Y2 = iround(16.0f * v1->screenPosition[1]) - 9;
336
const s32 Y3 = iround(16.0f * v2->screenPosition[1]) - 9;
338
const s32 X1 = iround(16.0f * v0->screenPosition[0]) - 9;
339
const s32 X2 = iround(16.0f * v1->screenPosition[0]) - 9;
340
const s32 X3 = iround(16.0f * v2->screenPosition[0]) - 9;
343
const s32 DX12 = X1 - X2;
344
const s32 DX23 = X2 - X3;
345
const s32 DX31 = X3 - X1;
347
const s32 DY12 = Y1 - Y2;
348
const s32 DY23 = Y2 - Y3;
349
const s32 DY31 = Y3 - Y1;
351
// Fixed-pos32 deltas
352
const s32 FDX12 = DX12 << 4;
353
const s32 FDX23 = DX23 << 4;
354
const s32 FDX31 = DX31 << 4;
356
const s32 FDY12 = DY12 << 4;
357
const s32 FDY23 = DY23 << 4;
358
const s32 FDY31 = DY31 << 4;
360
// Bounding rectangle
361
s32 minx = (min(min(X1, X2), X3) + 0xF) >> 4;
362
s32 maxx = (max(max(X1, X2), X3) + 0xF) >> 4;
363
s32 miny = (min(min(Y1, Y2), Y3) + 0xF) >> 4;
364
s32 maxy = (max(max(Y1, Y2), Y3) + 0xF) >> 4;
367
minx = max(minx, scissorLeft);
368
maxx = min(maxx, scissorRight);
369
miny = max(miny, scissorTop);
370
maxy = min(maxy, scissorBottom);
372
if (minx >= maxx || miny >= maxy)
376
float fltx1 = v0->screenPosition.x;
377
float flty1 = v0->screenPosition.y;
378
float fltdx31 = v2->screenPosition.x - fltx1;
379
float fltdx12 = fltx1 - v1->screenPosition.x;
380
float fltdy12 = flty1 - v1->screenPosition.y;
381
float fltdy31 = v2->screenPosition.y - flty1;
383
InitTriangle(fltx1, flty1, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4);
385
float w[3] = { 1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w };
386
InitSlope(&WSlope, w[0], w[1], w[2], fltdx31, fltdx12, fltdy12, fltdy31);
388
if (!bpmem.genMode.zfreeze || !g_SWVideoConfig.bZFreeze)
389
InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31, fltdx12, fltdy12, fltdy31);
391
for(unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
393
for(int comp = 0; comp < 4; comp++)
394
InitSlope(&ColorSlopes[i][comp], v0->color[i][comp], v1->color[i][comp], v2->color[i][comp], fltdx31, fltdx12, fltdy12, fltdy31);
397
for(unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
399
for(int comp = 0; comp < 3; comp++)
400
InitSlope(&TexSlopes[i][comp], v0->texCoords[i][comp] * w[0], v1->texCoords[i][comp] * w[1], v2->texCoords[i][comp] * w[2], fltdx31, fltdx12, fltdy12, fltdy31);
403
// Start in corner of 8x8 block
404
minx &= ~(BLOCK_SIZE - 1);
405
miny &= ~(BLOCK_SIZE - 1);
407
// Half-edge constants
408
s32 C1 = DY12 * X1 - DX12 * Y1;
409
s32 C2 = DY23 * X2 - DX23 * Y2;
410
s32 C3 = DY31 * X3 - DX31 * Y3;
412
// Correct for fill convention
413
if(DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++;
414
if(DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++;
415
if(DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
417
// Loop through blocks
418
for(s32 y = miny; y < maxy; y += BLOCK_SIZE)
420
for(s32 x = minx; x < maxx; x += BLOCK_SIZE)
424
s32 x1 = (x + BLOCK_SIZE - 1) << 4;
426
s32 y1 = (y + BLOCK_SIZE - 1) << 4;
428
// Evaluate half-space functions
429
bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0;
430
bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0;
431
bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0;
432
bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0;
433
int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3);
435
bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0;
436
bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0;
437
bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0;
438
bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0;
439
int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3);
441
bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0;
442
bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0;
443
bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0;
444
bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0;
445
int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3);
447
// Skip block when outside an edge
448
if(a == 0x0 || b == 0x0 || c == 0x0)
453
// Accept whole block when totally covered
454
if(a == 0xF && b == 0xF && c == 0xF)
456
for(s32 iy = 0; iy < BLOCK_SIZE; iy++)
458
for(s32 ix = 0; ix < BLOCK_SIZE; ix++)
460
Draw(x + ix, y + iy, ix, iy);
464
else // Partially covered block
466
s32 CY1 = C1 + DX12 * y0 - DY12 * x0;
467
s32 CY2 = C2 + DX23 * y0 - DY23 * x0;
468
s32 CY3 = C3 + DX31 * y0 - DY31 * x0;
470
for(s32 iy = 0; iy < BLOCK_SIZE; iy++)
476
for(s32 ix = 0; ix < BLOCK_SIZE; ix++)
478
if(CX1 > 0 && CX2 > 0 && CX3 > 0)
480
Draw(x + ix, y + iy, ix, iy);