1
/* Mednafen - Multi-system Emulator
3
* This program is free software; you can redistribute it and/or modify
4
* it under the terms of the GNU General Public License as published by
5
* the Free Software Foundation; either version 2 of the License, or
6
* (at your option) any later version.
8
* This program is distributed in the hope that it will be useful,
9
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
* GNU General Public License for more details.
13
* You should have received a copy of the GNU General Public License
14
* along with this program; if not, write to the Free Software
15
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2
18
#ifndef PSXDEV_GTE_TESTING
23
static uint32 ReciprocalTable[0x8000] =
25
#include "gte_divrecip.inc"
635
INLINE int32 A(unsigned int which, int64 value)
650
#define sign_x_to_s64(_bits, _value) (((int64)((uint64)(_value) << (64 - _bits))) >> (64 - _bits))
651
INLINE int64 A_MV(unsigned which, int64 value)
637
// Done for an issue with NCCS, at least. See if this masking-out is applicable in all cases, and for other registers.
639
FLAGS &= ~(1 << (27 - which));
640
FLAGS &= ~(1 << (30 - which));
653
if(value >= (1LL << 43))
654
FLAGS |= 1 << (30 - which);
644
if(value < -2147483648LL)
656
if(value < -(1LL << 43))
647
657
FLAGS |= 1 << (27 - which);
650
if(value > 2147483647LL)
653
FLAGS |= 1 << (30 - which);
659
return sign_x_to_s64(44, value);
658
INLINE int32 F(int64 value)
662
INLINE int64 F(int64 value)
660
664
if(value < -2147483648LL)
797
847
IR3 = Lm_B(2, MAC[3], lm);
801
// << 12 for translation vector with MVMA when sf is 0.
802
// FIXME: far color vector is borked
803
850
INLINE void MultiplyMatrixByVector(const gtematrix *matrix, const int16 *v, const int32 *crv, uint32 sf, int lm)
806
MAC[1] = A(0, ((int64)crv[0] << 12) + (((int64)(matrix.MX[0][0] * v[0]) + (matrix.MX[0][1] * v[1]) + (matrix.MX[0][2] * v[2])) >> sf));
807
MAC[2] = A(1, ((int64)crv[1] << 12) + (((int64)(matrix.MX[1][0] * v[0]) + (matrix.MX[1][1] * v[1]) + (matrix.MX[1][2] * v[2])) >> sf));
808
MAC[3] = A(2, ((int64)crv[2] << 12) + (((int64)(matrix.MX[2][0] * v[0]) + (matrix.MX[2][1] * v[1]) + (matrix.MX[2][2] * v[2])) >> sf));
811
assert(crv != CRVectors.FC);
814
if(0 && crv == CRVectors.FC)
816
MAC[1] = A(0, ((((int64)crv[0] << 8) + (int64)(matrix->MX[0][0] * v[0]) + (matrix->MX[0][1] * v[1]) + (matrix->MX[0][2] * v[2])) >> sf));
817
MAC[2] = A(1, ((((int64)crv[1] << 8) + (int64)(matrix->MX[1][0] * v[0]) + (matrix->MX[1][1] * v[1]) + (matrix->MX[1][2] * v[2])) >> sf));
818
MAC[3] = A(2, ((((int64)crv[2] << 8) + (int64)(matrix->MX[2][0] * v[0]) + (matrix->MX[2][1] * v[1]) + (matrix->MX[2][2] * v[2])) >> sf));
823
MAC[1] = A(0, ((((int64)crv[0] << 12) + (int64)(matrix->MX[0][0] * v[0]) + (matrix->MX[0][1] * v[1]) + (matrix->MX[0][2] * v[2])) >> sf));
824
MAC[2] = A(1, ((((int64)crv[1] << 12) + (int64)(matrix->MX[1][0] * v[0]) + (matrix->MX[1][1] * v[1]) + (matrix->MX[1][2] * v[2])) >> sf));
825
MAC[3] = A(2, ((((int64)crv[2] << 12) + (int64)(matrix->MX[2][0] * v[0]) + (matrix->MX[2][1] * v[1]) + (matrix->MX[2][2] * v[2])) >> sf));
854
for(i = 0; i < 3; i++)
859
tmp = (int64)crv[i] << 12;
861
if(matrix == &Matrices.AbbyNormal)
865
mulr[0] = -((RGB.R << 4) * v[0]);
866
mulr[1] = (RGB.R << 4) * v[1];
867
mulr[2] = IR0 * v[2];
871
mulr[0] = (int16)CR[i] * v[0];
872
mulr[1] = (int16)CR[i] * v[1];
873
mulr[2] = (int16)CR[i] * v[2];
878
mulr[0] = matrix->MX[i][0] * v[0];
879
mulr[1] = matrix->MX[i][1] * v[1];
880
mulr[2] = matrix->MX[i][2] * v[2];
883
tmp = A_MV(i, tmp + mulr[0]);
884
if(crv == CRVectors.FC)
886
Lm_B(i, tmp >> sf, FALSE);
890
tmp = A_MV(i, tmp + mulr[1]);
891
tmp = A_MV(i, tmp + mulr[2]);
893
MAC[1 + i] = tmp >> sf;
901
INLINE void MultiplyMatrixByVector_PT(const gtematrix *matrix, const int16 *v, const int32 *crv, uint32 sf, int lm)
906
for(i = 0; i < 3; i++)
910
tmp[i] = (int64)crv[i] << 12;
912
mulr[0] = matrix->MX[i][0] * v[0];
913
mulr[1] = matrix->MX[i][1] * v[1];
914
mulr[2] = matrix->MX[i][2] * v[2];
916
tmp[i] = A_MV(i, tmp[i] + mulr[0]);
917
tmp[i] = A_MV(i, tmp[i] + mulr[1]);
918
tmp[i] = A_MV(i, tmp[i] + mulr[2]);
920
MAC[1 + i] = tmp[i] >> sf;
923
IR1 = Lm_B(0, MAC[1], lm);
924
IR2 = Lm_B(1, MAC[2], lm);
925
//printf("FTV: %08x %08x\n", crv[2], (uint32)(tmp[2] >> 12));
926
IR3 = Lm_B_PTZ(2, MAC[3], tmp[2] >> 12, lm);
928
Z_FIFO[0] = Z_FIFO[1];
929
Z_FIFO[1] = Z_FIFO[2];
930
Z_FIFO[2] = Z_FIFO[3];
931
Z_FIFO[3] = Lm_D(tmp[2] >> 12, TRUE);
832
935
#define VAR_UNUSED __attribute__((unused))
834
937
#define DECODE_FIELDS \
880
INLINE void PTransform(uint32 sf, int lm, unsigned int v)
884
MultiplyMatrixByVector(&Matrices.Rot, Vectors[v], CRVectors.T, sf, lm);
886
Z_FIFO[0] = Z_FIFO[1];
887
Z_FIFO[1] = Z_FIFO[2];
888
Z_FIFO[2] = Z_FIFO[3];
889
Z_FIFO[3] = Lm_D(MAC[3]);
892
if(H < (Z_FIFO[3] * 2))
893
h_div_sz = (((int64)H << 16)) / Z_FIFO[3];
981
static INLINE unsigned CountLeadingZeroU16(uint16 val)
985
while(!(val & 0x8000) && ret < 16)
994
static INLINE uint32 Divide(uint32 dividend, uint32 divisor)
996
//if((Z_FIFO[3] * 2) > H)
997
if((divisor * 2) > dividend)
999
unsigned shift_bias = CountLeadingZeroU16(divisor);
1001
dividend <<= shift_bias;
1002
divisor <<= shift_bias;
1004
return ((int64)dividend * ReciprocalTable[divisor & 0x7FFF] + 32768) >> 16;
897
1008
FLAGS |= 1 << 17;
900
MAC[0] = F(((int64)OFX + IR1 * h_div_sz + 32768) >> 16);
1013
static INLINE void TransformXY(int64 h_div_sz)
1015
MAC[0] = F((int64)OFX + IR1 * h_div_sz) >> 16;
901
1016
XY_FIFO[3].X = Lm_G(0, MAC[0]);
903
MAC[0] = F(((int64)OFY + IR2 * h_div_sz + 32768) >> 16);
1018
MAC[0] = F((int64)OFY + IR2 * h_div_sz) >> 16;
904
1019
XY_FIFO[3].Y = Lm_G(1, MAC[0]);
906
1021
XY_FIFO[0] = XY_FIFO[1];
907
1022
XY_FIFO[1] = XY_FIFO[2];
908
1023
XY_FIFO[2] = XY_FIFO[3];
910
// MAC[0] = F((DQB + ((DQA * h_div_sz + 32768) >> 16)) >> 8 );
911
// IR0 = Lm_H(MAC[0]);
913
// printf("MOO: %d %d %16lld\n", DQB, DQA, (long long)h_div_sz);
1026
static INLINE void TransformDQ(int64 h_div_sz)
915
1028
MAC[0] = F((int64)DQB + DQA * h_div_sz);
916
IR0 = Lm_H(MAC[0] >> sf);
1029
IR0 = Lm_H(((int64)DQB + DQA * h_div_sz) >> 12);
919
1032
int32 RTPS(uint32 instr)
923
PTransform(sf, lm, 0);
1037
MultiplyMatrixByVector_PT(&Matrices.Rot, Vectors[0], CRVectors.T, sf, lm);
1038
h_div_sz = Divide(H, Z_FIFO[3]);
1040
TransformXY(h_div_sz);
1041
TransformDQ(h_div_sz);
977
1105
tmp_vector[0] = IR1; tmp_vector[1] = IR2; tmp_vector[2] = IR3;
978
1106
MultiplyMatrixByVector(&Matrices.Color, tmp_vector, CRVectors.B, sf, lm);
980
MAC[1] = A(0, ((RGB.R << 4) * IR1) >> sf);
981
MAC[2] = A(1, ((RGB.G << 4) * IR2) >> sf);
982
MAC[3] = A(2, ((RGB.B << 4) * IR3) >> sf);
1108
MAC[1] = ((RGB.R << 4) * IR1) >> sf;
1109
MAC[2] = ((RGB.G << 4) * IR2) >> sf;
1110
MAC[3] = ((RGB.B << 4) * IR3) >> sf;
1009
1137
INLINE void DepthCue(int mult_IR123, int RGB_from_FIFO, uint32 sf, int lm)
1011
int32 R_temp, G_temp, B_temp;
1140
int32 IR_temp[3] = { IR1, IR2, IR3 };
1015
1145
if(RGB_from_FIFO)
1017
R_temp = RGB_FIFO[0].R << 4;
1018
G_temp = RGB_FIFO[0].G << 4;
1019
B_temp = RGB_FIFO[0].B << 4;
1147
RGB_temp[0] = RGB_FIFO[0].R << 4;
1148
RGB_temp[1] = RGB_FIFO[0].G << 4;
1149
RGB_temp[2] = RGB_FIFO[0].B << 4;
1023
R_temp = RGB.R << 4;
1024
G_temp = RGB.G << 4;
1025
B_temp = RGB.B << 4;
1153
RGB_temp[0] = RGB.R << 4;
1154
RGB_temp[1] = RGB.G << 4;
1155
RGB_temp[2] = RGB.B << 4;
1030
// Note: Do not put A() here! We might just want to change this to local temporaries.
1031
MAC[1] = (((int64)CRVectors.FC[0] << 12) - R_temp * IR1) >> sf;
1032
MAC[2] = (((int64)CRVectors.FC[1] << 12) - G_temp * IR2) >> sf;
1033
MAC[3] = (((int64)CRVectors.FC[2] << 12) - B_temp * IR3) >> sf;
1035
MAC[1] = A(0, (R_temp * IR1 + IR0 * Lm_B(0, MAC[1], FALSE)) >> sf);
1036
MAC[2] = A(1, (G_temp * IR2 + IR0 * Lm_B(1, MAC[2], FALSE)) >> sf);
1037
MAC[3] = A(2, (B_temp * IR3 + IR0 * Lm_B(2, MAC[3], FALSE)) >> sf);
1040
MAC[1] = A(0, (int64)((R_temp * IR1) >> sf) + IR0 * Lm_B(0, (int64)CRVectors.FC[0] - ((R_temp * IR1) >> sf), FALSE) );
1041
MAC[2] = A(1, (int64)((G_temp * IR2) >> sf) + IR0 * Lm_B(1, (int64)CRVectors.FC[1] - ((G_temp * IR2) >> sf), FALSE) );
1042
MAC[3] = A(2, (int64)((B_temp * IR3) >> sf) + IR0 * Lm_B(2, (int64)CRVectors.FC[2] - ((B_temp * IR3) >> sf), FALSE) );
1160
for(i = 0; i < 3; i++)
1162
MAC[1 + i] = A_MV(i, (((int64)CRVectors.FC[i] << 12) - RGB_temp[i] * IR_temp[i])) >> sf;
1163
MAC[1 + i] = A_MV(i, (RGB_temp[i] * IR_temp[i] + IR0 * Lm_B(i, MAC[1 + i], FALSE))) >> sf;
1047
// Note: Do not put A() here! We might just want to change this to local temporaries.
1048
MAC[1] = (((int64)CRVectors.FC[0] << 12) - (R_temp << 12)) >> sf;
1049
MAC[2] = (((int64)CRVectors.FC[1] << 12) - (G_temp << 12)) >> sf;
1050
MAC[3] = (((int64)CRVectors.FC[2] << 12) - (B_temp << 12)) >> sf;
1052
MAC[1] = A(0, (((int64)R_temp << 12) + IR0 * Lm_B(0, MAC[1], FALSE)) >> sf);
1053
MAC[2] = A(1, (((int64)G_temp << 12) + IR0 * Lm_B(1, MAC[2], FALSE)) >> sf);
1054
MAC[3] = A(2, (((int64)B_temp << 12) + IR0 * Lm_B(2, MAC[3], FALSE)) >> sf);
1057
MAC[1] = A(0, (int64)R_temp + ((IR0 * Lm_B(0, (int64)CRVectors.FC[0] - R_temp, FALSE)) >> sf) );
1058
MAC[2] = A(1, (int64)G_temp + ((IR0 * Lm_B(1, (int64)CRVectors.FC[1] - G_temp, FALSE)) >> sf) );
1059
MAC[3] = A(2, (int64)B_temp + ((IR0 * Lm_B(2, (int64)CRVectors.FC[2] - B_temp, FALSE)) >> sf) );
1168
for(i = 0; i < 3; i++)
1170
MAC[1 + i] = A_MV(i, (((int64)CRVectors.FC[i] << 12) - (RGB_temp[i] << 12))) >> sf;
1171
MAC[1 + i] = A_MV(i, (((int64)RGB_temp[i] << 12) + IR0 * Lm_B(i, MAC[1 + i], FALSE))) >> sf;
1101
// SF field *is* used(tested), but it's a bit...weird.
1102
1213
int32 INTPL(uint32 instr)
1108
// MAC[1] = A(0, (int64)IR1 + ((IR0 * Lm_B(0, (int64)CRVectors.FC[0] - IR1, FALSE)) >> 12) );
1109
// MAC[2] = A(1, (int64)IR2 + ((IR0 * Lm_B(1, (int64)CRVectors.FC[1] - IR2, FALSE)) >> 12) );
1110
// MAC[3] = A(2, (int64)IR3 + ((IR0 * Lm_B(2, (int64)CRVectors.FC[2] - IR3, FALSE)) >> 12) );
1115
// Note: Do not put A() here! We might just want to change this to local temporaries.
1116
MAC[1] = (((int64)CRVectors.FC[0] << 12) - (IR1 << 12)) >> sf;
1117
MAC[2] = (((int64)CRVectors.FC[1] << 12) - (IR2 << 12)) >> sf;
1118
MAC[3] = (((int64)CRVectors.FC[2] << 12) - (IR3 << 12)) >> sf;
1120
MAC[1] = A(0, (((int64)IR1 << 12) + IR0 * Lm_B(0, MAC[1], FALSE)) >> sf);
1121
MAC[2] = A(1, (((int64)IR2 << 12) + IR0 * Lm_B(1, MAC[2], FALSE)) >> sf);
1122
MAC[3] = A(2, (((int64)IR3 << 12) + IR0 * Lm_B(2, MAC[3], FALSE)) >> sf);
1217
MAC[1] = A_MV(0, (((int64)CRVectors.FC[0] << 12) - (IR1 << 12))) >> sf;
1218
MAC[2] = A_MV(1, (((int64)CRVectors.FC[1] << 12) - (IR2 << 12))) >> sf;
1219
MAC[3] = A_MV(2, (((int64)CRVectors.FC[2] << 12) - (IR3 << 12))) >> sf;
1221
MAC[1] = A_MV(0, (((int64)IR1 << 12) + IR0 * Lm_B(0, MAC[1], FALSE)) >> sf);
1222
MAC[2] = A_MV(1, (((int64)IR2 << 12) + IR0 * Lm_B(1, MAC[2], FALSE)) >> sf);
1223
MAC[3] = A_MV(2, (((int64)IR3 << 12) + IR0 * Lm_B(2, MAC[3], FALSE)) >> sf);
1159
1259
for(i = 0; i < 3; i++)
1160
1261
NormColorDepthCue(i, sf, lm);
1267
int32 CC(uint32 instr)
1270
int16 tmp_vector[3];
1272
tmp_vector[0] = IR1; tmp_vector[1] = IR2; tmp_vector[2] = IR3;
1273
MultiplyMatrixByVector(&Matrices.Color, tmp_vector, CRVectors.B, sf, lm);
1275
MAC[1] = ((RGB.R << 4) * IR1) >> sf;
1276
MAC[2] = ((RGB.G << 4) * IR2) >> sf;
1277
MAC[3] = ((RGB.B << 4) * IR3) >> sf;
1286
int32 CDP(uint32 instr)
1289
int16 tmp_vector[3];
1291
tmp_vector[0] = IR1; tmp_vector[1] = IR2; tmp_vector[2] = IR3;
1292
MultiplyMatrixByVector(&Matrices.Color, tmp_vector, CRVectors.B, sf, lm);
1294
DepthCue(TRUE, FALSE, sf, lm);
1165
1299
int32 NCLIP(uint32 instr)
1191
1324
MAC[0] = F(((int64)ZSF4 * (Z_FIFO[0] + Z_FIFO[1] + Z_FIFO[2] + Z_FIFO[3])));
1193
OTZ = Lm_D(MAC[0] >> 12);
1326
OTZ = Lm_D(MAC[0] >> 12, FALSE);
1332
// -32768 * -32768 - 32767 * -32768 = 2147450880
1333
// (2 ^ 31) - 1 = 2147483647
1199
1334
int32 OP(uint32 instr)
1203
MAC[1] = A(0, ((int64)(Matrices.Rot.MX[1][1] * IR3) - (Matrices.Rot.MX[2][2] * IR2)) >> sf);
1204
MAC[2] = A(1, ((int64)(Matrices.Rot.MX[2][2] * IR1) - (Matrices.Rot.MX[0][0] * IR3)) >> sf);
1205
MAC[3] = A(2, ((int64)(Matrices.Rot.MX[0][0] * IR2) - (Matrices.Rot.MX[1][1] * IR1)) >> sf);
1338
MAC[1] = ((Matrices.Rot.MX[1][1] * IR3) - (Matrices.Rot.MX[2][2] * IR2)) >> sf;
1339
MAC[2] = ((Matrices.Rot.MX[2][2] * IR1) - (Matrices.Rot.MX[0][0] * IR3)) >> sf;
1340
MAC[3] = ((Matrices.Rot.MX[0][0] * IR2) - (Matrices.Rot.MX[1][1] * IR1)) >> sf;
1247
24 23 22 21 20|19|18 17|16 15|14 13|12 11|10| 9 8| 7 6| 5 4 3 2 1 0
1248
|sf| mx | v | cv |-----|lm|-----------|
1379
---------------------------------------------------------------------------------------------
1380
| 24 23 22 21 20 | 19 | 18 17 | 16 15 | 14 13 | 12 11 | 10 | 9 8 7 6 | 5 4 3 2 1 0 |
1381
|-------------------------------------------------------------------------------------------|
1382
| (unused) | sf | mx | v | cv |(unused)| lm | (unused) | opcode |
1383
---------------------------------------------------------------------------------------------
1384
(unused) = unused, ignored
1257
1392
cv = add vector(translation/back/far color(bugged)/none)
1394
(unused) = unused, ignored
1259
1396
lm = limit negative results to 0
1398
(unused) = unused, ignored
1400
opcode = operation code
1263
1403
int32 GTE_Instruction(uint32 instr)
1405
const unsigned code = instr & 0x3F;
1267
//PSX_WARNING("[GTE] Instruction 0x%08x", instr);
1271
switch(instr & ((0x1F << 20) | 0x3F))
1273
default: //PSX_WARNING("[GTE] Unknown instruction: 0x%08x, 0x%08x", instr & ~(0x7F << 25), instr & ((0x1F << 20) | 0x3F));
1413
#ifndef PSXDEV_GTE_TESTING
1414
PSX_WARNING("[GTE] Unknown instruction code: 0x%02x", code);
1418
case 0x00: // alternate?
1277
1420
ret = RTPS(instr);
1292
case 0x0700010: // RR
1424
case 0x02: // UNSTABLE?
1427
case 0x03: // UNSTABLE?
1430
case 0x04: // Probably simple with v,cv,sf,mx,lm ignored. Same calculation as 0x3B?
1433
case 0x05: // UNSTABLE?
1442
case 0x07: // UNSTABLE?
1445
case 0x08: // UNSTABLE?
1448
case 0x09: // UNSTABLE?
1451
case 0x0A: // UNSTABLE?
1454
case 0x0B: // UNSTABLE?
1464
case 0x0D: // UNSTABLE?
1467
case 0x0E: // UNSTABLE?
1470
case 0x0F: // UNSTABLE?
1293
1475
ret = DPCS(instr);
1296
case 0x0900011: // RR
1297
1479
ret = INTPL(instr);
1496
case 0x15: // does one push on RGB FIFO, what else...
1505
case 0x17: // PARTIALLY UNSTABLE(depending on sf or v or cv or mx or lm???), similar behavior under some conditions to 0x16?
1515
case 0x1A: // Alternate for 0x29?
1305
1533
ret = NCS(instr);
1309
1542
ret = NCT(instr);
1548
case 0x22: // UNSTABLE?
1321
1576
ret = DPCT(instr);
1337
1588
ret = AVSZ3(instr);
1341
1592
ret = AVSZ4(instr);
1596
case 0x2F: // UNSTABLE?
1605
case 0x31: // UNSTABLE?
1608
case 0x32: // UNSTABLE?
1611
case 0x33: // UNSTABLE?
1614
case 0x34: // UNSTABLE?
1617
case 0x35: // UNSTABLE?
1620
case 0x36: // UNSTABLE?
1623
case 0x37: // UNSTABLE?
1629
case 0x39: // Probably simple with v,cv,sf,mx,lm ignored.
1632
case 0x3A: // Probably simple with v,cv,sf,mx,lm ignored.
1635
case 0x3B: // Probably simple with v,cv,sf,mx,lm ignored. Same calculation as 0x04?
1638
case 0x3C: // UNSTABLE?
1349
1643
ret = GPF(instr);
1353
1647
ret = GPL(instr);
1358
1655
if(FLAGS & 0x7f87e000)