2
* Copyright (C) 2005 Ben Skeggs.
6
* Permission is hereby granted, free of charge, to any person obtaining
7
* a copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sublicense, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial
16
* portions of the Software.
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
* Emit the r300_fragment_program_code that can be understood by the hardware.
32
* Input is a pre-transformed radeon_program.
34
* \author Ben Skeggs <darktama@iinet.net.au>
36
* \author Jerome Glisse <j.glisse@gmail.com>
39
#include "r300_fragprog.h"
41
#include "../r300_reg.h"
43
#include "radeon_program_pair.h"
44
#include "r300_fragprog_swizzle.h"
47
struct r300_emit_state {
48
struct r300_fragment_program_compiler * compiler;
50
unsigned current_node : 2;
51
unsigned node_first_tex : 8;
52
unsigned node_first_alu : 8;
57
struct r300_fragment_program_compiler *c = emit->compiler; \
58
struct r300_fragment_program_code *code = &c->code->code.r300
60
#define error(fmt, args...) do { \
61
rc_error(&c->Base, "%s::%s(): " fmt "\n", \
62
__FILE__, __FUNCTION__, ##args); \
65
static unsigned int get_msbs_alu(unsigned int bits)
67
return (bits >> 6) & 0x7;
71
* @param lsbs The number of least significant bits
73
static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
75
return (bits >> lsbs) & 0x15;
78
#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
81
* Mark a temporary register as used.
83
static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
85
if (index > code->pixsize)
86
code->pixsize = index;
89
static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
94
if (src.File == RC_FILE_CONSTANT) {
95
return src.Index | (1 << 5);
96
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
97
use_temporary(code, src.Index);
98
return src.Index & 0x1f;
105
static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
108
case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
109
case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
110
case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
111
case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
112
case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
114
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
118
case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
119
case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
120
case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
121
case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
125
static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
128
case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
129
case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
130
case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
131
case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
132
case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
133
case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
134
case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
136
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
140
case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
141
case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
142
case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
143
case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
144
case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
149
* Emit one paired ALU instruction.
151
static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
157
if (code->alu.length >= c->Base.max_alu_insts) {
158
error("Too many ALU instructions");
162
ip = code->alu.length++;
164
code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
165
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
167
for(j = 0; j < 3; ++j) {
168
/* Set the RGB address */
169
unsigned int src = use_source(code, inst->RGB.Src[j]);
171
if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
172
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
174
code->alu.inst[ip].rgb_addr |= src << (6*j);
176
/* Set the Alpha address */
177
src = use_source(code, inst->Alpha.Src[j]);
178
if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
179
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
181
code->alu.inst[ip].alpha_addr |= src << (6*j);
183
arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
184
arg |= inst->RGB.Arg[j].Abs << 6;
185
arg |= inst->RGB.Arg[j].Negate << 5;
186
code->alu.inst[ip].rgb_inst |= arg << (7*j);
188
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
189
arg |= inst->Alpha.Arg[j].Abs << 6;
190
arg |= inst->Alpha.Arg[j].Negate << 5;
191
code->alu.inst[ip].alpha_inst |= arg << (7*j);
195
if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
196
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
198
code->alu.inst[ip].rgb_inst |=
199
R300_ALU_SRCP_1_MINUS_2_SRC0;
202
code->alu.inst[ip].rgb_inst |=
203
R300_ALU_SRCP_SRC1_PLUS_SRC0;
206
code->alu.inst[ip].rgb_inst |=
207
R300_ALU_SRCP_SRC1_MINUS_SRC0;
210
code->alu.inst[ip].rgb_inst |=
211
R300_ALU_SRCP_1_MINUS_SRC0;
218
if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
219
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
221
code->alu.inst[ip].alpha_inst |=
222
R300_ALU_SRCP_1_MINUS_2_SRC0;
225
code->alu.inst[ip].alpha_inst |=
226
R300_ALU_SRCP_SRC1_PLUS_SRC0;
229
code->alu.inst[ip].alpha_inst |=
230
R300_ALU_SRCP_SRC1_MINUS_SRC0;
233
code->alu.inst[ip].alpha_inst |=
234
R300_ALU_SRCP_1_MINUS_SRC0;
241
if (inst->RGB.Saturate)
242
code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
243
if (inst->Alpha.Saturate)
244
code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
246
if (inst->RGB.WriteMask) {
247
use_temporary(code, inst->RGB.DestIndex);
248
if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
249
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
250
code->alu.inst[ip].rgb_addr |=
251
((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
252
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
254
if (inst->RGB.OutputWriteMask) {
255
code->alu.inst[ip].rgb_addr |=
256
(inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
257
R300_RGB_TARGET(inst->RGB.Target);
258
emit->node_flags |= R300_RGBA_OUT;
261
if (inst->Alpha.WriteMask) {
262
use_temporary(code, inst->Alpha.DestIndex);
263
if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
264
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
265
code->alu.inst[ip].alpha_addr |=
266
((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
269
if (inst->Alpha.OutputWriteMask) {
270
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
271
R300_ALPHA_TARGET(inst->Alpha.Target);
272
emit->node_flags |= R300_RGBA_OUT;
274
if (inst->Alpha.DepthWriteMask) {
275
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
276
emit->node_flags |= R300_W_OUT;
277
c->code->writes_depth = 1;
280
code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
282
/* Handle Output Modifier
283
* According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
284
if (inst->RGB.Omod) {
285
if (inst->RGB.Omod == RC_OMOD_DISABLE) {
286
rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
288
code->alu.inst[ip].rgb_inst |=
289
(inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
291
if (inst->Alpha.Omod) {
292
if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
293
rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
295
code->alu.inst[ip].alpha_inst |=
296
(inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
303
* Finish the current node without advancing to the next one.
305
static int finish_node(struct r300_emit_state * emit)
307
struct r300_fragment_program_compiler * c = emit->compiler;
308
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
314
unsigned int alu_offset_msbs, alu_end_msbs;
316
if (code->alu.length == emit->node_first_alu) {
317
/* Generate a single NOP for this node */
318
struct rc_pair_instruction inst;
319
memset(&inst, 0, sizeof(inst));
320
if (!emit_alu(emit, &inst))
324
alu_offset = emit->node_first_alu;
325
alu_end = code->alu.length - alu_offset - 1;
326
tex_offset = emit->node_first_tex;
327
tex_end = code->tex.length - tex_offset - 1;
329
if (code->tex.length == emit->node_first_tex) {
330
if (emit->current_node > 0) {
331
error("Node %i has no TEX instructions", emit->current_node);
337
if (emit->current_node == 0)
338
code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
341
/* Write the config register.
342
* Note: The order in which the words for each node are written
343
* is not correct here and needs to be fixed up once we're entirely
346
* Also note that the register specification from AMD is slightly
347
* incorrect in its description of this register. */
348
code->code_addr[emit->current_node] =
349
((alu_offset << R300_ALU_START_SHIFT)
350
& R300_ALU_START_MASK)
351
| ((alu_end << R300_ALU_SIZE_SHIFT)
352
& R300_ALU_SIZE_MASK)
353
| ((tex_offset << R300_TEX_START_SHIFT)
354
& R300_TEX_START_MASK)
355
| ((tex_end << R300_TEX_SIZE_SHIFT)
356
& R300_TEX_SIZE_MASK)
358
| (get_msbs_tex(tex_offset, 5)
359
<< R400_TEX_START_MSB_SHIFT)
360
| (get_msbs_tex(tex_end, 5)
361
<< R400_TEX_SIZE_MSB_SHIFT)
364
/* Write r400 extended instruction fields. These will be ignored on
366
alu_offset_msbs = get_msbs_alu(alu_offset);
367
alu_end_msbs = get_msbs_alu(alu_end);
368
switch(emit->current_node) {
370
code->r400_code_offset_ext |=
371
alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
372
| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
375
code->r400_code_offset_ext |=
376
alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
377
| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
380
code->r400_code_offset_ext |=
381
alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
382
| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
385
code->r400_code_offset_ext |=
386
alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
387
| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
395
* Begin a block of texture instructions.
396
* Create the necessary indirection.
398
static int begin_tex(struct r300_emit_state * emit)
402
if (code->alu.length == emit->node_first_alu &&
403
code->tex.length == emit->node_first_tex) {
407
if (emit->current_node == 3) {
408
error("Too many texture indirections");
412
if (!finish_node(emit))
415
emit->current_node++;
416
emit->node_first_tex = code->tex.length;
417
emit->node_first_alu = code->alu.length;
418
emit->node_flags = 0;
423
static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
430
if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
431
error("Too many TEX instructions");
435
unit = inst->U.I.TexSrcUnit;
436
dest = inst->U.I.DstReg.Index;
438
switch(inst->U.I.Opcode) {
439
case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
440
case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
441
case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
442
case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
444
error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
448
if (inst->U.I.Opcode == RC_OPCODE_KIL) {
452
use_temporary(code, dest);
455
use_temporary(code, inst->U.I.SrcReg[0].Index);
457
code->tex.inst[code->tex.length++] =
458
((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
459
& R300_SRC_ADDR_MASK)
460
| ((dest << R300_DST_ADDR_SHIFT)
461
& R300_DST_ADDR_MASK)
462
| (unit << R300_TEX_ID_SHIFT)
463
| (opcode << R300_TEX_INST_SHIFT)
464
| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
465
R400_SRC_ADDR_EXT_BIT : 0)
466
| (dest >= R300_PFS_NUM_TEMP_REGS ?
467
R400_DST_ADDR_EXT_BIT : 0)
474
* Final compilation step: Turn the intermediate radeon_program into
475
* machine-readable instructions.
477
void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
479
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
480
struct r300_emit_state emit;
481
struct r300_fragment_program_code *code = &compiler->code->code.r300;
482
unsigned int tex_end;
484
memset(&emit, 0, sizeof(emit));
485
emit.compiler = compiler;
487
memset(code, 0, sizeof(struct r300_fragment_program_code));
489
for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
490
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
492
if (inst->Type == RC_INSTRUCTION_NORMAL) {
493
if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
498
emit_tex(&emit, inst);
500
emit_alu(&emit, &inst->U.P);
504
if (code->pixsize >= compiler->Base.max_temp_regs)
505
rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
507
if (compiler->Base.Error)
510
/* Finish the program */
513
code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
515
/* Set r400 extended instruction fields. These values will be ignored
517
code->r400_code_offset_ext |=
519
<< R400_ALU_OFFSET_MSB_SHIFT)
520
| (get_msbs_alu(code->alu.length - 1)
521
<< R400_ALU_SIZE_MSB_SHIFT);
523
tex_end = code->tex.length ? code->tex.length - 1 : 0;
525
((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
526
& R300_PFS_CNTL_ALU_OFFSET_MASK)
527
| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
528
& R300_PFS_CNTL_ALU_END_MASK)
529
| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
530
& R300_PFS_CNTL_TEX_OFFSET_MASK)
531
| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
532
& R300_PFS_CNTL_TEX_END_MASK)
533
| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
534
| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
537
if (emit.current_node < 3) {
538
int shift = 3 - emit.current_node;
540
for(i = emit.current_node; i >= 0; --i)
541
code->code_addr[shift + i] = code->code_addr[i];
542
for(i = 0; i < shift; ++i)
543
code->code_addr[i] = 0;
546
if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
547
|| code->alu.length > R300_PFS_MAX_ALU_INST
548
|| code->tex.length > R300_PFS_MAX_TEX_INST) {