2
* Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice shall be included
12
* in all copies or substantial portions of the Software.
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
* Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
34
#include "main/imports.h"
35
#include "main/mtypes.h"
37
#include "tnl/t_context.h"
38
#include "program/program.h"
39
#include "program/prog_parameter.h"
40
#include "program/prog_statevars.h"
42
#include "radeon_debug.h"
43
#include "r600_context.h"
44
#include "r600_cmdbuf.h"
45
#include "r600_emit.h"
46
#include "program/programopt.h"
48
#include "r700_debug.h"
49
#include "r700_vertprog.h"
51
unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
52
struct gl_vertex_program *mesa_vp,
57
unsigned int unTotal = unStart;
59
//!!!!!!! THE ORDER MATCH FS INPUT
61
unBit = 1 << VERT_RESULT_HPOS;
62
if(mesa_vp->Base.OutputsWritten & unBit)
64
pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
67
unBit = 1 << VERT_RESULT_COL0;
68
if(mesa_vp->Base.OutputsWritten & unBit)
70
pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
73
unBit = 1 << VERT_RESULT_COL1;
74
if(mesa_vp->Base.OutputsWritten & unBit)
76
pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
79
//TODO : dealing back face.
80
unBit = 1 << VERT_RESULT_BFC0;
81
if(mesa_vp->Base.OutputsWritten & unBit)
83
pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
86
unBit = 1 << VERT_RESULT_BFC1;
87
if(mesa_vp->Base.OutputsWritten & unBit)
89
pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
93
unBit = 1 << VERT_RESULT_FOGC;
94
if(mesa_vp->Base.OutputsWritten & unBit)
96
pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
99
//TODO : dealing point size.
100
unBit = 1 << VERT_RESULT_PSIZ;
101
if(mesa_vp->Base.OutputsWritten & unBit)
103
pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
108
unBit = 1 << (VERT_RESULT_TEX0 + i);
109
if(mesa_vp->Base.OutputsWritten & unBit)
111
pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
115
for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
118
if(mesa_vp->Base.OutputsWritten & unBit)
120
pAsm->ucVP_OutputMap[i] = unTotal++;
124
return (unTotal - unStart);
127
unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm,
128
struct gl_vertex_program *mesa_vp,
129
unsigned int unStart)
133
unsigned int unTotal = unStart;
134
for(i=0; i<VERT_ATTRIB_MAX; i++)
137
if(mesa_vp->Base.InputsRead & unBit)
139
pAsm->ucVP_AttributeMap[i] = unTotal++;
142
return (unTotal - unStart);
145
GLboolean Process_Vertex_Program_Vfetch_Instructions(
146
struct r700_vertex_program *vp,
147
struct gl_vertex_program *mesa_vp)
151
VTX_FETCH_METHOD vtxFetchMethod;
152
vtxFetchMethod.bEnableMini = GL_FALSE;
153
vtxFetchMethod.mega_fetch_remainder = 0;
155
for(i=0; i<VERT_ATTRIB_MAX; i++)
158
if(mesa_vp->Base.InputsRead & unBit)
160
assemble_vfetch_instruction(&vp->r700AsmCode,
162
vp->r700AsmCode.ucVP_AttributeMap[i],
163
vp->aos_desc[i].size,
164
vp->aos_desc[i].type,
172
GLboolean Process_Vertex_Program_Vfetch_Instructions2(
173
struct gl_context *ctx,
174
struct r700_vertex_program *vp,
175
struct gl_vertex_program *mesa_vp)
178
context_t *context = R700_CONTEXT(ctx);
180
VTX_FETCH_METHOD vtxFetchMethod;
181
vtxFetchMethod.bEnableMini = GL_FALSE;
182
vtxFetchMethod.mega_fetch_remainder = 0;
184
for(i=0; i<context->nNumActiveAos; i++)
186
assemble_vfetch_instruction2(&vp->r700AsmCode,
187
vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element],
188
context->stream_desc[i].type,
189
context->stream_desc[i].size,
190
context->stream_desc[i].element,
191
context->stream_desc[i]._signed,
192
context->stream_desc[i].normalize,
193
context->stream_desc[i].format,
200
void Map_Vertex_Program(struct gl_context *ctx,
201
struct r700_vertex_program *vp,
202
struct gl_vertex_program *mesa_vp)
204
r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
205
unsigned int num_inputs;
207
// R0 will always be used for index into vertex buffer
208
pAsm->number_used_registers = 1;
209
pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
211
// Map Inputs: Add 1 to mapping since R0 is used for index
212
num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
213
pAsm->number_used_registers += num_inputs;
215
// Create VFETCH instructions for inputs
216
if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
218
radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
223
pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
225
pAsm->starting_export_register_number = pAsm->number_used_registers;
227
pAsm->number_used_registers += pAsm->number_of_exports;
229
/* Map temporary registers (GPRs) */
230
pAsm->starting_temp_register_number = pAsm->number_used_registers;
232
if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
233
{ /* arb uses NumNativeTemporaries */
234
pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
237
{ /* fix func t_vp uses NumTemporaries */
238
pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
241
pAsm->flag_reg_index = pAsm->number_used_registers++;
243
pAsm->uFirstHelpReg = pAsm->number_used_registers;
246
GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
247
struct gl_vertex_program *mesa_vp)
250
GLint * puiTEMPwrites;
251
struct prog_instruction *pILInst;
254
puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
255
for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
257
puiTEMPwrites[i] = -1;
260
pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
262
for(i=0; i<mesa_vp->Base.NumInstructions; i++)
264
pInstDeps[i].nDstDep = -1;
265
pILInst = &(mesa_vp->Base.Instructions[i]);
268
if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
270
//Set lastwrite for the temp
271
puiTEMPwrites[pILInst->DstReg.Index] = i;
277
if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
280
pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
284
pInstDeps[i].nSrcDeps[j] = -1;
289
vp->r700AsmCode.pInstDeps = pInstDeps;
296
struct r700_vertex_program* r700TranslateVertexShader(struct gl_context *ctx,
297
struct gl_vertex_program *mesa_vp)
299
context_t *context = R700_CONTEXT(ctx);
301
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
303
struct r700_vertex_program *vp;
306
vp = calloc(1, sizeof(*vp));
307
vp->mesa_program = _mesa_clone_vertex_program(ctx, mesa_vp);
311
if (mesa_vp->IsPositionInvariant)
313
_mesa_insert_mvp_code(ctx, vp->mesa_program);
316
for(i=0; i<context->nNumActiveAos; i++)
318
vp->aos_desc[i].size = context->stream_desc[i].size;
319
vp->aos_desc[i].stride = context->stream_desc[i].stride;
320
vp->aos_desc[i].type = context->stream_desc[i].type;
321
vp->aos_desc[i].format = context->stream_desc[i].format;
324
if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
326
vp->r700AsmCode.bR6xx = 1;
330
Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
332
if(GL_TRUE == r700->bShaderUseMemConstant)
334
vp->r700AsmCode.bUseMemConstant = GL_TRUE;
338
vp->r700AsmCode.bUseMemConstant = GL_FALSE;
341
vp->r700AsmCode.unAsic = 7;
343
Map_Vertex_Program(ctx, vp, vp->mesa_program );
345
if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
350
InitShaderProgram(&(vp->r700AsmCode));
352
for(i=0; i < MAX_SAMPLERS; i++)
354
vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i];
357
vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions;
359
if(GL_FALSE == AssembleInstr(0,
361
vp->mesa_program->Base.NumInstructions,
362
&(vp->mesa_program->Base.Instructions[0]),
363
&(vp->r700AsmCode)) )
368
if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
373
if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) )
378
vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
379
: (vp->r700AsmCode.number_used_registers - 1);
381
vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
383
vp->translated = GL_TRUE;
388
void r700SelectVertexShader(struct gl_context *ctx)
390
context_t *context = R700_CONTEXT(ctx);
391
struct r700_vertex_program_cont *vpc;
392
struct r700_vertex_program *vp;
395
GLbitfield InputsRead;
397
vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
399
InputsRead = vpc->mesa_program.Base.InputsRead;
400
if (vpc->mesa_program.IsPositionInvariant)
402
InputsRead |= VERT_BIT_POS;
405
for (vp = vpc->progs; vp; vp = vp->next)
408
for(i=0; i<context->nNumActiveAos; i++)
410
if (vp->aos_desc[i].size != context->stream_desc[i].size ||
411
vp->aos_desc[i].format != context->stream_desc[i].format)
419
context->selected_vp = vp;
424
vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program));
427
radeon_error("Failed to translate vertex shader. \n");
430
vp->next = vpc->progs;
432
context->selected_vp = vp;
436
int getTypeSize(GLenum type)
441
return sizeof(GLdouble);
443
return sizeof(GLfloat);
445
return sizeof(GLint);
446
case GL_UNSIGNED_INT:
447
return sizeof(GLuint);
449
return sizeof(GLshort);
450
case GL_UNSIGNED_SHORT:
451
return sizeof(GLushort);
453
return sizeof(GLbyte);
454
case GL_UNSIGNED_BYTE:
455
return sizeof(GLubyte);
462
static void r700TranslateAttrib(struct gl_context *ctx, GLuint unLoc, int count, const struct gl_client_array *input)
464
context_t *context = R700_CONTEXT(ctx);
466
StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]);
470
stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size
473
if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT
475
|| getTypeSize(input->Type) != 4
479
pStreamDesc->type = GL_FLOAT;
481
if (input->StrideB == 0)
483
pStreamDesc->stride = 0;
487
pStreamDesc->stride = sizeof(GLfloat) * input->Size;
489
pStreamDesc->dwords = input->Size;
490
pStreamDesc->is_named_bo = GL_FALSE;
494
pStreamDesc->type = input->Type;
495
pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4;
496
if (!input->BufferObj->Name)
498
if (input->StrideB == 0)
500
pStreamDesc->stride = 0;
504
pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3;
507
pStreamDesc->is_named_bo = GL_FALSE;
511
pStreamDesc->size = input->Size;
512
pStreamDesc->dst_loc = context->nNumActiveAos;
513
pStreamDesc->element = unLoc;
514
pStreamDesc->format = input->Format;
516
switch (pStreamDesc->type)
519
pStreamDesc->_signed = 0;
520
pStreamDesc->normalize = GL_FALSE;
523
pStreamDesc->_signed = 1;
524
pStreamDesc->normalize = input->Normalized;
527
pStreamDesc->_signed = 1;
528
pStreamDesc->normalize = input->Normalized;
530
case GL_UNSIGNED_SHORT:
531
pStreamDesc->_signed = 0;
532
pStreamDesc->normalize = input->Normalized;
534
case GL_UNSIGNED_BYTE:
535
pStreamDesc->_signed = 0;
536
pStreamDesc->normalize = input->Normalized;
540
case GL_UNSIGNED_INT:
545
context->nNumActiveAos++;
548
void r700SetVertexFormat(struct gl_context *ctx, const struct gl_client_array *arrays[], int count)
550
context_t *context = R700_CONTEXT(ctx);
551
struct r700_vertex_program *vpc
552
= (struct r700_vertex_program *)ctx->VertexProgram._Current;
554
struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program);
555
unsigned int unLoc = 0;
556
unsigned int unBit = mesa_vp->Base.InputsRead;
557
context->nNumActiveAos = 0;
559
if (mesa_vp->IsPositionInvariant)
561
unBit |= VERT_BIT_POS;
568
r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]);
574
context->radeon.tcl.aos_count = context->nNumActiveAos;
577
void * r700GetActiveVpShaderBo(struct gl_context * ctx)
579
context_t *context = R700_CONTEXT(ctx);
580
struct r700_vertex_program *vp = context->selected_vp;;
588
void * r700GetActiveVpShaderConstBo(struct gl_context * ctx)
590
context_t *context = R700_CONTEXT(ctx);
591
struct r700_vertex_program *vp = context->selected_vp;;
599
GLboolean r700SetupVertexProgram(struct gl_context * ctx)
601
context_t *context = R700_CONTEXT(ctx);
602
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
603
struct r700_vertex_program *vp = context->selected_vp;
605
struct gl_program_parameter_list *paramList;
606
unsigned int unNumParamData;
608
unsigned int num_sq_vs_gprs;
610
if(GL_FALSE == vp->loaded)
612
if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
614
Assemble( &(vp->r700Shader) );
620
(GLvoid *)(vp->r700Shader.pProgram),
621
vp->r700Shader.uShaderBinaryDWORDSize,
624
if(GL_TRUE == r700->bShaderUseMemConstant)
626
paramList = vp->mesa_program->Base.Parameters;
627
if(NULL != paramList)
629
unNumParamData = paramList->NumParameters;
630
r600AllocShaderConsts(ctx,
637
vp->loaded = GL_TRUE;
640
DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram),
641
vp->r700Shader.uShaderBinaryDWORDSize);
643
/* TODO : enable this after MemUse fixed *=
644
(context->chipobj.MemUse)(context, vp->shadercode.buf->id);
647
R600_STATECHANGE(context, vs);
648
R600_STATECHANGE(context, fs); /* hack */
650
r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
651
SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
653
r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All = 0; /* set from buffer object. */
655
r700->vs.SQ_PGM_START_VS.u32All = 0;
657
SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
658
NUM_GPRS_shift, NUM_GPRS_mask);
660
num_sq_vs_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_VS_GPRS_mask) >> NUM_VS_GPRS_shift);
662
if((vp->r700Shader.nRegs + 1) > num_sq_vs_gprs)
664
/* care! thich changes sq - needs idle state */
665
R600_STATECHANGE(context, sq);
666
SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, vp->r700Shader.nRegs + 1,
667
NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
670
if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
672
SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
673
STACK_SIZE_shift, STACK_SIZE_mask);
676
R600_STATECHANGE(context, spi);
678
if(vp->mesa_program->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
679
R600_STATECHANGE(context, cl);
680
SETbit(r700->PA_CL_VS_OUT_CNTL.u32All, USE_VTX_POINT_SIZE_bit);
681
SETbit(r700->PA_CL_VS_OUT_CNTL.u32All, VS_OUT_MISC_VEC_ENA_bit);
682
} else if (r700->PA_CL_VS_OUT_CNTL.u32All != 0) {
683
R600_STATECHANGE(context, cl);
684
CLEARbit(r700->PA_CL_VS_OUT_CNTL.u32All, USE_VTX_POINT_SIZE_bit);
685
CLEARbit(r700->PA_CL_VS_OUT_CNTL.u32All, VS_OUT_MISC_VEC_ENA_bit);
688
SETfield(r700->SPI_VS_OUT_CONFIG.u32All,
689
vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0,
690
VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
691
SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
692
NUM_INTERP_shift, NUM_INTERP_mask);
695
SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
696
CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
699
/* sent out shader constants. */
700
paramList = vp->mesa_program->Base.Parameters;
702
if(NULL != paramList) {
703
/* vp->mesa_program was cloned, not updated by glsl shader api. */
704
/* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */
705
/* so, use ctx->VertexProgem._Current */
706
struct gl_program_parameter_list *paramListOrginal =
707
ctx->VertexProgram._Current->Base.Parameters;
709
_mesa_load_state_parameters(ctx, paramList);
711
if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
714
R600_STATECHANGE(context, vs_consts);
716
r700->vs.num_consts = paramList->NumParameters;
718
unNumParamData = paramList->NumParameters;
720
for(ui=0; ui<unNumParamData; ui++) {
721
if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM)
723
r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
724
r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
725
r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
726
r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
730
r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
731
r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
732
r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
733
r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
737
/* Load vp constants to gpu */
738
if(GL_TRUE == r700->bShaderUseMemConstant)
740
r600EmitShaderConsts(ctx,
743
(GLvoid *)&(r700->vs.consts[0][0]),
744
unNumParamData * 4 * 4);
747
r700->vs.num_consts = 0;
749
COMPILED_SUB * pCompiledSub;
751
GLuint unConstOffset = r700->vs.num_consts;
752
for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++)
754
pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub;
756
r700->vs.num_consts += pCompiledSub->NumParameters;
758
for(uj=0; uj<pCompiledSub->NumParameters; uj++)
760
r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
761
r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
762
r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
763
r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
765
unConstOffset += pCompiledSub->NumParameters;