1
// Copyright 2013 Dolphin Emulator Project
2
// Licensed under GPLv2
3
// Refer to the license.txt file included.
5
// Fast image conversion using OpenGL shaders.
6
// This kind of stuff would be a LOT nicer with OpenCL.
8
#include "TextureConverter.h"
9
#include "TextureConversionShader.h"
10
#include "TextureCache.h"
11
#include "ProgramShaderCache.h"
12
#include "FramebufferManager.h"
14
#include "VideoConfig.h"
15
#include "ImageWrite.h"
18
#include "HW/Memmap.h"
19
#include "DriverDetails.h"
24
namespace TextureConverter
27
using OGL::TextureCache;
29
static GLuint s_texConvFrameBuffer = 0;
30
static GLuint s_srcTexture = 0; // for decoding from RAM
31
static GLuint s_srcTextureWidth = 0;
32
static GLuint s_srcTextureHeight = 0;
33
static GLuint s_dstTexture = 0; // for encoding to RAM
35
const int renderBufferWidth = 1024;
36
const int renderBufferHeight = 1024;
38
static SHADER s_rgbToYuyvProgram;
39
static SHADER s_yuyvToRgbProgram;
41
// Not all slots are taken - but who cares.
42
const u32 NUM_ENCODING_PROGRAMS = 64;
43
static SHADER s_encodingPrograms[NUM_ENCODING_PROGRAMS];
45
static GLuint s_encode_VBO = 0;
46
static GLuint s_encode_VAO = 0;
47
static GLuint s_decode_VBO = 0;
48
static GLuint s_decode_VAO = 0;
49
static TargetRectangle s_cached_sourceRc;
50
static int s_cached_srcWidth = 0;
51
static int s_cached_srcHeight = 0;
53
static const char *VProgram =
54
"ATTRIN vec2 rawpos;\n"
60
" gl_Position = vec4(rawpos, 0.0, 1.0);\n"
65
// Output is BGRA because that is slightly faster than RGBA.
66
const char *FProgramRgbToYuyv =
67
"uniform sampler2DRect samp9;\n"
72
" vec3 c0 = texture2DRect(samp9, uv0).rgb;\n"
73
" vec3 c1 = texture2DRect(samp9, uv0 + vec2(1.0, 0.0)).rgb;\n"
74
" vec3 c01 = (c0 + c1) * 0.5;\n"
75
" vec3 y_const = vec3(0.257,0.504,0.098);\n"
76
" vec3 u_const = vec3(-0.148,-0.291,0.439);\n"
77
" vec3 v_const = vec3(0.439,-0.368,-0.071);\n"
78
" vec4 const3 = vec4(0.0625,0.5,0.0625,0.5);\n"
79
" ocol0 = vec4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n"
82
const char *FProgramYuyvToRgb =
83
"uniform sampler2DRect samp9;\n"
88
" vec4 c0 = texture2DRect(samp9, uv0).rgba;\n"
89
" float f = step(0.5, fract(uv0.x));\n"
90
" float y = mix(c0.b, c0.r, f);\n"
91
" float yComp = 1.164 * (y - 0.0625);\n"
92
" float uComp = c0.g - 0.5;\n"
93
" float vComp = c0.a - 0.5;\n"
94
" ocol0 = vec4(yComp + (1.596 * vComp),\n"
95
" yComp - (0.813 * vComp) - (0.391 * uComp),\n"
96
" yComp + (2.018 * uComp),\n"
100
ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgram, FProgramRgbToYuyv);
101
ProgramShaderCache::CompileShader(s_yuyvToRgbProgram, VProgram, FProgramYuyvToRgb);
104
SHADER &GetOrCreateEncodingShader(u32 format)
106
if (format > NUM_ENCODING_PROGRAMS)
108
PanicAlert("Unknown texture copy format: 0x%x\n", format);
109
return s_encodingPrograms[0];
112
if (s_encodingPrograms[format].glprogid == 0)
114
const char* shader = TextureConversionShader::GenerateEncodingShader(format, API_OPENGL);
116
#if defined(_DEBUG) || defined(DEBUGFAST)
117
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader)
119
static int counter = 0;
120
char szTemp[MAX_PATH];
121
sprintf(szTemp, "%senc_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
123
SaveData(szTemp, shader);
127
ProgramShaderCache::CompileShader(s_encodingPrograms[format], VProgram, shader);
129
return s_encodingPrograms[format];
134
glGenFramebuffers(1, &s_texConvFrameBuffer);
136
glGenBuffers(1, &s_encode_VBO );
137
glGenVertexArrays(1, &s_encode_VAO );
138
glBindBuffer(GL_ARRAY_BUFFER, s_encode_VBO );
139
glBindVertexArray( s_encode_VAO );
140
glEnableVertexAttribArray(SHADER_POSITION_ATTRIB);
141
glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL);
142
glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB);
143
glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2);
144
s_cached_sourceRc.top = -1;
145
s_cached_sourceRc.bottom = -1;
146
s_cached_sourceRc.left = -1;
147
s_cached_sourceRc.right = -1;
149
glGenBuffers(1, &s_decode_VBO );
150
glGenVertexArrays(1, &s_decode_VAO );
151
glBindBuffer(GL_ARRAY_BUFFER, s_decode_VBO );
152
glBindVertexArray( s_decode_VAO );
153
s_cached_srcWidth = -1;
154
s_cached_srcHeight = -1;
155
glEnableVertexAttribArray(SHADER_POSITION_ATTRIB);
156
glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL);
157
glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB);
158
glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2);
160
s_srcTextureWidth = 0;
161
s_srcTextureHeight = 0;
163
glActiveTexture(GL_TEXTURE0 + 9);
164
glGenTextures(1, &s_srcTexture);
165
glBindTexture(getFbType(), s_srcTexture);
166
glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0);
168
glGenTextures(1, &s_dstTexture);
169
glBindTexture(GL_TEXTURE_2D, s_dstTexture);
170
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
171
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, renderBufferWidth, renderBufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
178
glDeleteTextures(1, &s_srcTexture);
179
glDeleteTextures(1, &s_dstTexture);
180
glDeleteFramebuffers(1, &s_texConvFrameBuffer);
181
glDeleteBuffers(1, &s_encode_VBO );
182
glDeleteVertexArrays(1, &s_encode_VAO );
183
glDeleteBuffers(1, &s_decode_VBO );
184
glDeleteVertexArrays(1, &s_decode_VAO );
186
s_rgbToYuyvProgram.Destroy();
187
s_yuyvToRgbProgram.Destroy();
189
for (unsigned int i = 0; i < NUM_ENCODING_PROGRAMS; i++)
190
s_encodingPrograms[i].Destroy();
194
s_texConvFrameBuffer = 0;
197
void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc,
198
u8* destAddr, int dstWidth, int dstHeight, int readStride,
199
bool toTexture, bool linearFilter)
203
// switch to texture converter frame buffer
204
// attach render buffer as color destination
205
FramebufferManager::SetFramebuffer(s_texConvFrameBuffer);
207
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_dstTexture, 0);
210
// set source texture
211
glActiveTexture(GL_TEXTURE0+9);
212
glBindTexture(getFbType(), srcTexture);
216
glTexParameteri(getFbType(), GL_TEXTURE_MAG_FILTER, GL_LINEAR);
217
glTexParameteri(getFbType(), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
221
glTexParameteri(getFbType(), GL_TEXTURE_MAG_FILTER, GL_NEAREST);
222
glTexParameteri(getFbType(), GL_TEXTURE_MIN_FILTER, GL_NEAREST);
227
glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight);
230
if(!(s_cached_sourceRc == sourceRc)) {
231
GLfloat vertices[] = {
233
(float)sourceRc.left, (float)sourceRc.top,
235
(float)sourceRc.left, (float)sourceRc.bottom,
237
(float)sourceRc.right, (float)sourceRc.top,
239
(float)sourceRc.right, (float)sourceRc.bottom
241
glBindBuffer(GL_ARRAY_BUFFER, s_encode_VBO );
242
glBufferData(GL_ARRAY_BUFFER, 4*4*sizeof(GLfloat), vertices, GL_STREAM_DRAW);
244
s_cached_sourceRc = sourceRc;
247
glBindVertexArray( s_encode_VAO );
248
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
252
// .. and then read back the results.
253
// TODO: make this less slow.
255
int writeStride = bpmem.copyMipMapStrideChannels * 32;
257
if (writeStride != readStride && toTexture)
259
// writing to a texture of a different size
261
int readHeight = readStride / dstWidth;
262
readHeight /= 4; // 4 bytes per pixel
265
int readLoops = dstHeight / readHeight;
266
for (int i = 0; i < readLoops; i++)
268
glReadPixels(0, readStart, (GLsizei)dstWidth, (GLsizei)readHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr);
269
readStart += readHeight;
270
destAddr += writeStride;
274
glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr);
280
int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source)
282
u32 format = copyfmt;
286
format |= _GX_TF_ZTF;
289
else if (format < GX_TF_Z8 || format > GX_TF_Z24X8)
290
format |= _GX_TF_CTF;
293
if (copyfmt > GX_TF_RGBA8 || (copyfmt < GX_TF_RGB565 && !bIsIntensityFmt))
294
format |= _GX_TF_CTF;
296
SHADER& texconv_shader = GetOrCreateEncodingShader(format);
298
u8 *dest_ptr = Memory::GetPointer(address);
300
int width = (source.right - source.left) >> bScaleByHalf;
301
int height = (source.bottom - source.top) >> bScaleByHalf;
303
int size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, format);
305
u16 blkW = TexDecoder_GetBlockWidthInTexels(format) - 1;
306
u16 blkH = TexDecoder_GetBlockHeightInTexels(format) - 1;
307
u16 samples = TextureConversionShader::GetEncodedSampleCount(format);
309
// only copy on cache line boundaries
310
// extra pixels are copied but not displayed in the resulting texture
311
s32 expandedWidth = (width + blkW) & (~blkW);
312
s32 expandedHeight = (height + blkH) & (~blkH);
314
float sampleStride = bScaleByHalf ? 2.f : 1.f;
317
Renderer::EFBToScaledXf(sampleStride), Renderer::EFBToScaledYf(sampleStride),
319
(float)expandedWidth, (float)Renderer::EFBToScaledY(expandedHeight)-1,
320
(float)Renderer::EFBToScaledX(source.left), (float)Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight)
323
texconv_shader.Bind();
324
glUniform4fv(texconv_shader.UniformLocations[0], 2, params);
326
TargetRectangle scaledSource;
327
scaledSource.top = 0;
328
scaledSource.bottom = expandedHeight;
329
scaledSource.left = 0;
330
scaledSource.right = expandedWidth / samples;
332
if ((format & 0x0f) == 6)
335
int readStride = (expandedWidth * cacheBytes) /
336
TexDecoder_GetBlockWidthInTexels(format);
337
EncodeToRamUsingShader(source_texture, scaledSource,
338
dest_ptr, expandedWidth / samples, expandedHeight, readStride,
339
true, bScaleByHalf > 0 && !bFromZBuffer);
340
return size_in_bytes; // TODO: D3D11 is calculating this value differently!
344
void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* destAddr, int dstWidth, int dstHeight)
346
g_renderer->ResetAPIState();
348
s_rgbToYuyvProgram.Bind();
350
EncodeToRamUsingShader(srcTexture, sourceRc, destAddr, dstWidth / 2, dstHeight, 0, false, false);
351
FramebufferManager::SetFramebuffer(0);
352
TextureCache::DisableStage(0);
353
g_renderer->RestoreAPIState();
358
// Should be scale free.
359
void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTexture)
361
u8* srcAddr = Memory::GetPointer(xfbAddr);
364
WARN_LOG(VIDEO, "Tried to decode from invalid memory address");
368
int srcFmtWidth = srcWidth / 2;
370
g_renderer->ResetAPIState(); // reset any game specific settings
372
// switch to texture converter frame buffer
373
// attach destTexture as color destination
374
FramebufferManager::SetFramebuffer(s_texConvFrameBuffer);
375
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, destTexture, 0);
377
GL_REPORT_FBO_ERROR();
379
// activate source texture
380
// set srcAddr as data for source texture
381
glActiveTexture(GL_TEXTURE0+9);
382
glBindTexture(getFbType(), s_srcTexture);
384
// TODO: make this less slow. (How?)
385
if ((GLsizei)s_srcTextureWidth == (GLsizei)srcFmtWidth && (GLsizei)s_srcTextureHeight == (GLsizei)srcHeight)
387
glTexSubImage2D(getFbType(), 0,0,0,s_srcTextureWidth, s_srcTextureHeight,
388
GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
392
glTexImage2D(getFbType(), 0, GL_RGBA, (GLsizei)srcFmtWidth, (GLsizei)srcHeight,
393
0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
394
s_srcTextureWidth = (GLsizei)srcFmtWidth;
395
s_srcTextureHeight = (GLsizei)srcHeight;
398
glViewport(0, 0, srcWidth, srcHeight);
399
s_yuyvToRgbProgram.Bind();
403
if(s_cached_srcHeight != srcHeight || s_cached_srcWidth != srcWidth) {
404
GLfloat vertices[] = {
406
(float)srcFmtWidth, (float)srcHeight,
408
(float)srcFmtWidth, 0.f,
410
0.f, (float)srcHeight,
415
glBindBuffer(GL_ARRAY_BUFFER, s_decode_VBO );
416
glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat)*4*4, vertices, GL_STREAM_DRAW);
418
s_cached_srcHeight = srcHeight;
419
s_cached_srcWidth = srcWidth;
422
glBindVertexArray( s_decode_VAO );
423
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
427
FramebufferManager::SetFramebuffer(0);
429
g_renderer->RestoreAPIState();