1
// Copyright (c) 2012- PPSSPP Project.
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
21
#include "base/basictypes.h"
22
#include "Common/Log.h"
23
#include "Common/CommonTypes.h"
24
#include "Core/Reporting.h"
25
#include "GPU/ge_constants.h"
26
#include "GPU/Common/ShaderCommon.h"
28
#include "Common/ArmEmitter.h"
30
#include "Common/Arm64Emitter.h"
31
#elif defined(_M_IX86) || defined(_M_X64)
32
#include "Common/x64Emitter.h"
34
#include "Common/MipsEmitter.h"
36
#include "Common/FakeEmitter.h"
40
// DecVtxFormat - vertex formats for PC
41
// Kind of like a D3D VertexDeclaration.
42
// Can write code to easily bind these using OpenGL, or read these manually.
43
// No morph support, that is taken care of by the VertexDecoder.
65
int DecFmtSize(u8 fmt);
68
u8 w0fmt; u8 w0off; // first 4 weights
69
u8 w1fmt; u8 w1off; // second 4 weights
71
u8 c0fmt; u8 c0off; // First color
78
struct TransformedVertex
82
float x, y, z, fog; // in case of morph, preblend during decode
88
float u; float v; float w; // scaled by uscale, vscale, if there
93
u8 color0[4]; // prelit
97
u8 color1[4]; // prelit
102
void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
104
inline int RoundUp4(int x) {
108
// Reads decoded vertex formats in a convenient way. For software transform and debugging.
111
VertexReader(u8 *base, const DecVtxFormat &decFmt, int vtype) : base_(base), data_(base), decFmt_(decFmt), vtype_(vtype) {}
113
void ReadPos(float pos[3]) const {
114
switch (decFmt_.posfmt) {
117
const float *f = (const float *)(data_ + decFmt_.posoff);
120
// Integer value passed in a float. Clamped to 0, 65535.
121
const float z = (int)pos[2] * (1.0f / 65535.0f);
122
pos[2] = z > 1.0f ? 1.0f : (z < 0.0f ? 0.0f : z);
128
// X and Y are signed 16 bit, Z is unsigned 16 bit
129
const s16 *s = (const s16 *)(data_ + decFmt_.posoff);
130
const u16 *u = (const u16 *)(data_ + decFmt_.posoff);
132
for (int i = 0; i < 2; i++)
134
pos[2] = u[2] * (1.0f / 65535.0f);
136
for (int i = 0; i < 3; i++)
137
pos[i] = s[i] * (1.0f / 32768.0f);
143
// X and Y are signed 8 bit, Z is unsigned 8 bit
144
const s8 *b = (const s8 *)(data_ + decFmt_.posoff);
145
const u8 *u = (const u8 *)(data_ + decFmt_.posoff);
147
for (int i = 0; i < 2; i++)
149
pos[2] = u[2] * (1.0f / 255.0f);
151
for (int i = 0; i < 3; i++)
152
pos[i] = b[i] * (1.0f / 128.0f);
157
ERROR_LOG_REPORT_ONCE(fmtpos, G3D, "Reader: Unsupported Pos Format %d", decFmt_.posfmt);
158
memset(pos, 0, sizeof(float) * 3);
163
void ReadPosThroughZ16(float pos[3]) const {
164
switch (decFmt_.posfmt) {
167
const float *f = (const float *)(data_ + decFmt_.posoff);
170
// Integer value passed in a float. Clamped to 0, 65535.
171
const float z = (int)pos[2];
172
pos[2] = z > 65535.0f ? 65535.0f : (z < 0.0f ? 0.0f : z);
178
// X and Y are signed 16 bit, Z is unsigned 16 bit
179
const s16 *s = (const s16 *)(data_ + decFmt_.posoff);
180
const u16 *u = (const u16 *)(data_ + decFmt_.posoff);
182
for (int i = 0; i < 2; i++)
186
for (int i = 0; i < 3; i++)
187
pos[i] = s[i] * (1.0f / 32768.0f);
193
// X and Y are signed 8 bit, Z is unsigned 8 bit
194
const s8 *b = (const s8 *)(data_ + decFmt_.posoff);
195
const u8 *u = (const u8 *)(data_ + decFmt_.posoff);
197
for (int i = 0; i < 2; i++)
201
for (int i = 0; i < 3; i++)
202
pos[i] = b[i] * (1.0f / 128.0f);
207
ERROR_LOG_REPORT_ONCE(fmtz16, G3D, "Reader: Unsupported Pos Format %d", decFmt_.posfmt);
208
memset(pos, 0, sizeof(float) * 3);
213
void ReadNrm(float nrm[3]) const {
214
switch (decFmt_.nrmfmt) {
216
//memcpy(nrm, data_ + decFmt_.nrmoff, 12);
218
const float *f = (const float *)(data_ + decFmt_.nrmoff);
219
for (int i = 0; i < 3; i++)
225
const s16 *s = (const s16 *)(data_ + decFmt_.nrmoff);
226
for (int i = 0; i < 3; i++)
227
nrm[i] = s[i] * (1.f / 32767.f);
232
const s8 *b = (const s8 *)(data_ + decFmt_.nrmoff);
233
for (int i = 0; i < 3; i++)
234
nrm[i] = b[i] * (1.f / 127.f);
238
ERROR_LOG_REPORT_ONCE(fmtnrm, G3D, "Reader: Unsupported Nrm Format %d", decFmt_.nrmfmt);
239
memset(nrm, 0, sizeof(float) * 3);
244
void ReadUV(float uv[2]) const {
245
switch (decFmt_.uvfmt) {
248
const u8 *b = (const u8 *)(data_ + decFmt_.uvoff);
249
uv[0] = b[0] * (1.f / 128.f);
250
uv[1] = b[1] * (1.f / 128.f);
256
const u16 *s = (const u16 *)(data_ + decFmt_.uvoff);
257
uv[0] = s[0] * (1.f / 32768.f);
258
uv[1] = s[1] * (1.f / 32768.f);
264
const float *f = (const float *)(data_ + decFmt_.uvoff);
272
const u8 *b = (const u8 *)(data_ + decFmt_.uvoff);
280
const u16 *p = (const u16 *)(data_ + decFmt_.uvoff);
286
ERROR_LOG_REPORT_ONCE(fmtuv, G3D, "Reader: Unsupported UV Format %d", decFmt_.uvfmt);
287
memset(uv, 0, sizeof(float) * 2);
292
void ReadColor0(float color[4]) const {
293
switch (decFmt_.c0fmt) {
296
const u8 *b = (const u8 *)(data_ + decFmt_.c0off);
297
for (int i = 0; i < 4; i++)
298
color[i] = b[i] * (1.f / 255.f);
302
memcpy(color, data_ + decFmt_.c0off, 16);
305
ERROR_LOG_REPORT_ONCE(fmtc0, G3D, "Reader: Unsupported C0 Format %d", decFmt_.c0fmt);
306
memset(color, 0, sizeof(float) * 4);
311
void ReadColor0_8888(u8 color[4]) const {
312
switch (decFmt_.c0fmt) {
315
const u8 *b = (const u8 *)(data_ + decFmt_.c0off);
316
for (int i = 0; i < 4; i++)
322
const float *f = (const float *)(data_ + decFmt_.c0off);
323
for (int i = 0; i < 4; i++)
324
color[i] = f[i] * 255.0f;
328
ERROR_LOG_REPORT_ONCE(fmtc0_8888, G3D, "Reader: Unsupported C0 Format %d", decFmt_.c0fmt);
329
memset(color, 0, sizeof(u8) * 4);
335
void ReadColor1(float color[3]) const {
336
switch (decFmt_.c1fmt) {
339
const u8 *b = (const u8 *)(data_ + decFmt_.c1off);
340
for (int i = 0; i < 3; i++)
341
color[i] = b[i] * (1.f / 255.f);
345
memcpy(color, data_ + decFmt_.c1off, 12);
348
ERROR_LOG_REPORT_ONCE(fmtc1, G3D, "Reader: Unsupported C1 Format %d", decFmt_.c1fmt);
349
memset(color, 0, sizeof(float) * 3);
354
void ReadWeights(float weights[8]) const {
355
const float *f = (const float *)(data_ + decFmt_.w0off);
356
const u8 *b = (const u8 *)(data_ + decFmt_.w0off);
357
const u16 *s = (const u16 *)(data_ + decFmt_.w0off);
358
switch (decFmt_.w0fmt) {
363
for (int i = 0; i <= decFmt_.w0fmt - DEC_FLOAT_1; i++)
366
case DEC_U8_1: weights[0] = b[0] * (1.f / 128.f); break;
367
case DEC_U8_2: for (int i = 0; i < 2; i++) weights[i] = b[i] * (1.f / 128.f); break;
368
case DEC_U8_3: for (int i = 0; i < 3; i++) weights[i] = b[i] * (1.f / 128.f); break;
369
case DEC_U8_4: for (int i = 0; i < 4; i++) weights[i] = b[i] * (1.f / 128.f); break;
370
case DEC_U16_1: weights[0] = s[0] * (1.f / 32768.f); break;
371
case DEC_U16_2: for (int i = 0; i < 2; i++) weights[i] = s[i] * (1.f / 32768.f); break;
372
case DEC_U16_3: for (int i = 0; i < 3; i++) weights[i] = s[i] * (1.f / 32768.f); break;
373
case DEC_U16_4: for (int i = 0; i < 4; i++) weights[i] = s[i] * (1.f / 32768.f); break;
375
ERROR_LOG_REPORT_ONCE(fmtw0, G3D, "Reader: Unsupported W0 Format %d", decFmt_.w0fmt);
376
memset(weights, 0, sizeof(float) * 8);
380
f = (const float *)(data_ + decFmt_.w1off);
381
b = (const u8 *)(data_ + decFmt_.w1off);
382
s = (const u16 *)(data_ + decFmt_.w1off);
383
switch (decFmt_.w1fmt) {
385
// It's fine for there to be w0 weights but not w1.
391
for (int i = 0; i <= decFmt_.w1fmt - DEC_FLOAT_1; i++)
394
case DEC_U8_1: weights[4] = b[0] * (1.f / 128.f); break;
395
case DEC_U8_2: for (int i = 0; i < 2; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
396
case DEC_U8_3: for (int i = 0; i < 3; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
397
case DEC_U8_4: for (int i = 0; i < 4; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
398
case DEC_U16_1: weights[4] = s[0] * (1.f / 32768.f); break;
399
case DEC_U16_2: for (int i = 0; i < 2; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
400
case DEC_U16_3: for (int i = 0; i < 3; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
401
case DEC_U16_4: for (int i = 0; i < 4; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
403
ERROR_LOG_REPORT_ONCE(fmtw1, G3D, "Reader: Unsupported W1 Format %d", decFmt_.w1fmt);
404
memset(weights + 4, 0, sizeof(float) * 4);
409
bool hasColor0() const { return decFmt_.c0fmt != 0; }
410
bool hasColor1() const { return decFmt_.c1fmt != 0; }
411
bool hasNormal() const { return decFmt_.nrmfmt != 0; }
412
bool hasUV() const { return decFmt_.uvfmt != 0; }
413
bool isThrough() const { return (vtype_ & GE_VTYPE_THROUGH) != 0; }
414
void Goto(int index) {
415
data_ = base_ + index * decFmt_.stride;
421
DecVtxFormat decFmt_;
424
// Debugging utilities
425
void PrintDecodedVertex(VertexReader &vtx);
429
class VertexDecoderJitCache;
431
typedef void (VertexDecoder::*StepFunction)() const;
432
typedef void (VertexDecoderJitCache::*JitStepFunction)();
436
JitStepFunction jitFunc;
439
// Collapse to less skinning shaders to reduce shader switching, which is expensive.
440
int TranslateNumBones(int bones);
442
typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
444
struct VertexDecoderOptions {
445
bool expandAllUVtoFloat;
446
bool expandAllWeightsToFloat;
447
bool expand8BitNormalsToFloat;
450
class VertexDecoder {
454
// A jit cache is not mandatory, we don't use it in the sw renderer
455
void SetVertexType(u32 vtype, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache = 0);
457
u32 VertexType() const { return fmt_; }
459
const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
461
void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const;
463
bool hasColor() const { return col != 0; }
464
bool hasTexcoord() const { return tc != 0; }
465
int VertexSize() const { return size; } // PSP format size
467
std::string GetString(DebugShaderStringType stringType);
469
void Step_WeightsU8() const;
470
void Step_WeightsU16() const;
471
void Step_WeightsU8ToFloat() const;
472
void Step_WeightsU16ToFloat() const;
473
void Step_WeightsFloat() const;
475
void Step_WeightsU8Skin() const;
476
void Step_WeightsU16Skin() const;
477
void Step_WeightsFloatSkin() const;
479
void Step_TcU8() const;
480
void Step_TcU16() const;
481
void Step_TcU8ToFloat() const;
482
void Step_TcU16ToFloat() const;
483
void Step_TcFloat() const;
485
void Step_TcU8Prescale() const;
486
void Step_TcU16Prescale() const;
487
void Step_TcU16DoublePrescale() const;
488
void Step_TcFloatPrescale() const;
490
void Step_TcU16Double() const;
491
void Step_TcU16Through() const;
492
void Step_TcU16ThroughDouble() const;
493
void Step_TcU16DoubleToFloat() const;
494
void Step_TcU16ThroughToFloat() const;
495
void Step_TcU16ThroughDoubleToFloat() const;
496
void Step_TcFloatThrough() const;
498
void Step_TcU8Morph() const;
499
void Step_TcU16Morph() const;
500
void Step_TcU16DoubleMorph() const;
501
void Step_TcU8MorphToFloat() const;
502
void Step_TcU16MorphToFloat() const;
503
void Step_TcU16DoubleMorphToFloat() const;
504
void Step_TcFloatMorph() const;
505
void Step_TcU8PrescaleMorph() const;
506
void Step_TcU16PrescaleMorph() const;
507
void Step_TcU16DoublePrescaleMorph() const;
508
void Step_TcFloatPrescaleMorph() const;
510
void Step_ColorInvalid() const;
511
void Step_Color4444() const;
512
void Step_Color565() const;
513
void Step_Color5551() const;
514
void Step_Color8888() const;
516
void Step_Color4444Morph() const;
517
void Step_Color565Morph() const;
518
void Step_Color5551Morph() const;
519
void Step_Color8888Morph() const;
521
void Step_NormalS8() const;
522
void Step_NormalS8ToFloat() const;
523
void Step_NormalS16() const;
524
void Step_NormalFloat() const;
526
void Step_NormalS8Skin() const;
527
void Step_NormalS16Skin() const;
528
void Step_NormalFloatSkin() const;
530
void Step_NormalS8Morph() const;
531
void Step_NormalS16Morph() const;
532
void Step_NormalFloatMorph() const;
534
void Step_PosS8() const;
535
void Step_PosS16() const;
536
void Step_PosFloat() const;
538
void Step_PosS8Skin() const;
539
void Step_PosS16Skin() const;
540
void Step_PosFloatSkin() const;
542
void Step_PosS8Morph() const;
543
void Step_PosS16Morph() const;
544
void Step_PosFloatMorph() const;
546
void Step_PosS8Through() const;
547
void Step_PosS16Through() const;
548
void Step_PosFloatThrough() const;
550
// output must be big for safety.
551
// Returns number of chars written.
553
int ToString(char *output) const;
555
// Mutable decoder state
556
mutable u8 *decoded_;
557
mutable const u8 *ptr_;
559
JittedVertexDecoder jitted_;
562
// "Immutable" state, set at startup
564
// The decoding steps. Never more than 5.
565
StepFunction steps_[5];
590
friend class VertexDecoderJitCache;
594
// A compiled vertex decoder takes the following arguments (C calling convention):
595
// u8 *src, u8 *dst, int count
598
// src is placed in esi and dst in edi
599
// for every vertex, we step esi and edi forwards by the two vertex sizes
600
// all movs are done relative to esi and edi
606
class VertexDecoderJitCache : public ArmGen::ARMXCodeBlock {
608
class VertexDecoderJitCache : public Arm64Gen::ARM64CodeBlock {
609
#elif defined(_M_IX86) || defined(_M_X64)
610
class VertexDecoderJitCache : public Gen::XCodeBlock {
612
class VertexDecoderJitCache : public MIPSGen::MIPSCodeBlock {
614
class VertexDecoderJitCache : public FakeGen::FakeXCodeBlock {
617
VertexDecoderJitCache();
619
// Returns a pointer to the code to run.
620
JittedVertexDecoder Compile(const VertexDecoder &dec, int32_t *jittedSize);
623
void Jit_WeightsU8();
624
void Jit_WeightsU16();
625
void Jit_WeightsU8ToFloat();
626
void Jit_WeightsU16ToFloat();
627
void Jit_WeightsFloat();
629
void Jit_WeightsU8Skin();
630
void Jit_WeightsU16Skin();
631
void Jit_WeightsFloatSkin();
634
void Jit_TcU8ToFloat();
636
void Jit_TcU16ToFloat();
639
void Jit_TcU8Prescale();
640
void Jit_TcU16Prescale();
641
void Jit_TcFloatPrescale();
643
void Jit_TcAnyMorph(int bits);
644
void Jit_TcU8MorphToFloat();
645
void Jit_TcU16MorphToFloat();
646
void Jit_TcFloatMorph();
647
void Jit_TcU8PrescaleMorph();
648
void Jit_TcU16PrescaleMorph();
649
void Jit_TcFloatPrescaleMorph();
651
void Jit_TcU16Double();
652
void Jit_TcU16ThroughDouble();
654
void Jit_TcU16Through();
655
void Jit_TcU16ThroughToFloat();
656
void Jit_TcFloatThrough();
658
void Jit_Color8888();
659
void Jit_Color4444();
661
void Jit_Color5551();
664
void Jit_NormalS8ToFloat();
665
void Jit_NormalS16();
666
void Jit_NormalFloat();
668
void Jit_NormalS8Skin();
669
void Jit_NormalS16Skin();
670
void Jit_NormalFloatSkin();
673
void Jit_PosS8ToFloat();
676
void Jit_PosS8Through();
677
void Jit_PosS16Through();
679
void Jit_PosS8Skin();
680
void Jit_PosS16Skin();
681
void Jit_PosFloatSkin();
683
void Jit_NormalS8Morph();
684
void Jit_NormalS16Morph();
685
void Jit_NormalFloatMorph();
687
void Jit_PosS8Morph();
688
void Jit_PosS16Morph();
689
void Jit_PosFloatMorph();
691
void Jit_Color8888Morph();
692
void Jit_Color4444Morph();
693
void Jit_Color565Morph();
694
void Jit_Color5551Morph();
697
bool CompileStep(const VertexDecoder &dec, int i);
698
void Jit_ApplyWeights();
699
void Jit_WriteMatrixMul(int outOff, bool pos);
700
void Jit_WriteMorphColor(int outOff, bool checkAlpha = true);
701
void Jit_AnyS8ToFloat(int srcoff);
702
void Jit_AnyS16ToFloat(int srcoff);
703
void Jit_AnyU8ToFloat(int srcoff, u32 bits = 32);
704
void Jit_AnyU16ToFloat(int srcoff, u32 bits = 64);
705
void Jit_AnyS8Morph(int srcoff, int dstoff);
706
void Jit_AnyS16Morph(int srcoff, int dstoff);
707
void Jit_AnyFloatMorph(int srcoff, int dstoff);
709
const VertexDecoder *dec_;
711
Arm64Gen::ARM64FloatEmitter fp;