78
70
hash_table_insert(this->variable_ht, reg, ir);
82
if (ir->mode == ir_var_uniform) {
72
} else if (ir->mode == ir_var_out) {
73
reg = new(this->mem_ctx) fs_reg(this, ir->type);
75
if (ir->location == FRAG_RESULT_COLOR) {
76
/* Writing gl_FragColor outputs to all color regions. */
77
for (int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
78
this->outputs[i] = *reg;
80
} else if (ir->location == FRAG_RESULT_DEPTH) {
81
this->frag_depth = ir;
83
/* gl_FragData or a user-defined FS output */
84
assert(ir->location >= FRAG_RESULT_DATA0 &&
85
ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
87
/* General color output. */
88
for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
89
int output = ir->location - FRAG_RESULT_DATA0 + i;
90
this->outputs[output] = *reg;
91
this->outputs[output].reg_offset += 4 * i;
94
} else if (ir->mode == ir_var_uniform) {
83
95
int param_index = c->prog_data.nr_params;
85
97
if (c->dispatch_width == 16) {
207
212
assert(!ir->operands[operand]->type->is_vector());
210
/* Inherit storage from our parent if possible, and otherwise we
215
/* Storage for our result. If our result goes into an assignment, it will
216
* just get copy-propagated out, so no worries.
213
if (saved_result_storage.file == BAD_FILE) {
214
this->result = fs_reg(this, ir->type);
216
this->result = saved_result_storage;
218
this->result = fs_reg(this, ir->type);
219
220
switch (ir->operation) {
220
221
case ir_unop_logic_not:
254
252
case ir_unop_rcp:
255
emit_math(FS_OPCODE_RCP, this->result, op[0]);
253
emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
258
256
case ir_unop_exp2:
259
emit_math(FS_OPCODE_EXP2, this->result, op[0]);
257
emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
261
259
case ir_unop_log2:
262
emit_math(FS_OPCODE_LOG2, this->result, op[0]);
260
emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
264
262
case ir_unop_exp:
265
263
case ir_unop_log:
291
289
case ir_binop_mul:
292
emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
290
if (ir->type->is_integer()) {
291
/* For integer multiplication, the MUL uses the low 16 bits
292
* of one of the operands (src0 on gen6, src1 on gen7). The
293
* MACH accumulates in the contribution of the upper 16 bits
296
* FINISHME: Emit just the MUL if we know an operand is small
299
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
301
emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
302
emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]);
303
emit(BRW_OPCODE_MOV, this->result, fs_reg(acc));
305
emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
294
308
case ir_binop_div:
295
assert(!"not reached: should be handled by ir_div_to_mul_rcp");
309
if (intel->gen >= 7 && c->dispatch_width == 16)
310
fail("16-wide INTDIV unsupported\n");
312
/* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
313
assert(ir->type->is_integer());
314
emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
297
316
case ir_binop_mod:
298
assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
317
if (intel->gen >= 7 && c->dispatch_width == 16)
318
fail("16-wide INTDIV unsupported\n");
320
/* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
321
assert(ir->type->is_integer());
322
emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
301
325
case ir_binop_less:
433
477
inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
437
480
case ir_binop_lshift:
481
inst = emit(BRW_OPCODE_SHL, this->result, op[0], op[1]);
438
484
case ir_binop_rshift:
439
assert(!"GLSL 1.30 features unsupported");
485
if (ir->type->base_type == GLSL_TYPE_INT)
486
inst = emit(BRW_OPCODE_ASR, this->result, op[0], op[1]);
488
inst = emit(BRW_OPCODE_SHR, this->result, op[0], op[1]);
537
/* If the RHS processing resulted in an instruction generating a
538
* temporary value, and it would be easy to rewrite the instruction to
539
* generate its result right into the LHS instead, do so. This ends
540
* up reliably removing instructions where it can be tricky to do so
541
* later without real UD chain information.
544
fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
547
fs_inst *pre_rhs_inst,
548
fs_inst *last_rhs_inst)
550
if (pre_rhs_inst == last_rhs_inst)
551
return false; /* No instructions generated to work with. */
553
/* Only attempt if we're doing a direct assignment. */
555
!(ir->lhs->type->is_scalar() ||
556
(ir->lhs->type->is_vector() &&
557
ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
560
/* Make sure the last instruction generated our source reg. */
561
if (last_rhs_inst->predicated ||
562
last_rhs_inst->force_uncompressed ||
563
last_rhs_inst->force_sechalf ||
564
!src.equals(&last_rhs_inst->dst))
567
/* Success! Rewrite the instruction. */
568
last_rhs_inst->dst = dst;
489
574
fs_visitor::visit(ir_assignment *ir)
494
579
/* FINISHME: arrays on the lhs */
495
this->result = reg_undef;
496
580
ir->lhs->accept(this);
497
581
l = this->result;
499
/* If we're doing a direct assignment, an RHS expression could
500
* drop its result right into our destination. Otherwise, tell it
504
!(ir->lhs->type->is_scalar() ||
505
(ir->lhs->type->is_vector() &&
506
ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) {
507
this->result = reg_undef;
583
fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail();
510
585
ir->rhs->accept(this);
511
586
r = this->result;
588
fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail();
513
590
assert(l.file != BAD_FILE);
514
591
assert(r.file != BAD_FILE);
593
if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
516
596
if (ir->condition) {
517
597
emit_bool_to_cond_code(ir->condition);
568
644
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
570
646
} else if (ir->op == ir_txb) {
571
this->result = reg_undef;
572
647
ir->lod_info.bias->accept(this);
573
648
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
576
651
assert(ir->op == ir_txl);
577
this->result = reg_undef;
578
652
ir->lod_info.lod->accept(this);
579
653
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
583
this->result = reg_undef;
584
657
ir->shadow_comparitor->accept(this);
585
658
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
588
661
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
589
662
fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
591
if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
664
if (i < 3 && c->key.tex.gl_clamp_mask[i] & (1 << sampler))
592
665
inst->saturate = true;
593
666
coordinate.reg_offset++;
595
668
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
597
670
} else if (ir->op == ir_txd) {
598
this->result = reg_undef;
599
671
ir->lod_info.grad.dPdx->accept(this);
600
672
fs_reg dPdx = this->result;
602
this->result = reg_undef;
603
674
ir->lod_info.grad.dPdy->accept(this);
604
675
fs_reg dPdy = this->result;
635
706
dPdy.reg_offset++;
637
708
mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
709
} else if (ir->op == ir_txs) {
710
/* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */
712
ir->lod_info.lod->accept(this);
713
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
639
716
/* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
640
717
* instructions. We'll need to do SIMD16 here.
642
assert(ir->op == ir_txb || ir->op == ir_txl);
720
assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf);
644
722
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
645
723
fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF,
646
base_mrf + mlen + i * 2),
724
base_mrf + mlen + i * 2,
648
if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
727
if (i < 3 && c->key.tex.gl_clamp_mask[i] & (1 << sampler))
649
728
inst->saturate = true;
650
729
coordinate.reg_offset++;
732
/* Initialize the rest of u/v/r with 0.0. Empirically, this seems to
733
* be necessary for TXF (ld), but seems wise to do for all messages.
735
for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
736
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f));
653
739
/* lod/bias appears after u/v/r. */
656
742
if (ir->op == ir_txb) {
657
this->result = reg_undef;
658
743
ir->lod_info.bias->accept(this);
659
744
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
662
this->result = reg_undef;
663
747
ir->lod_info.lod->accept(this);
664
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
748
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, this->result.type),
668
753
/* The unused upper half. */
671
758
/* Now, since we're doing simd16, the return is 2 interleaved
672
759
* vec4s where the odd-indexed ones are junk. We'll need to move
673
760
* this weirdness around to the expected layout.
677
dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
679
dst.type = BRW_REGISTER_TYPE_F;
763
const glsl_type *vec_type =
764
glsl_type::get_instance(ir->type->base_type, 4, 1);
765
dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
766
dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
767
: BRW_REGISTER_TYPE_F;
682
770
fs_inst *inst = NULL;
683
771
switch (ir->op) {
685
inst = emit(FS_OPCODE_TEX, dst);
773
inst = emit(SHADER_OPCODE_TEX, dst);
688
776
inst = emit(FS_OPCODE_TXB, dst);
691
inst = emit(FS_OPCODE_TXL, dst);
779
inst = emit(SHADER_OPCODE_TXL, dst);
694
inst = emit(FS_OPCODE_TXD, dst);
782
inst = emit(SHADER_OPCODE_TXD, dst);
785
inst = emit(SHADER_OPCODE_TXS, dst);
697
assert(!"GLSL 1.30 features unsupported");
788
inst = emit(SHADER_OPCODE_TXF, dst);
700
791
inst->base_mrf = base_mrf;
741
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
834
for (int i = 0; i < vector_elements; i++) {
742
835
fs_inst *inst = emit(BRW_OPCODE_MOV,
743
fs_reg(MRF, base_mrf + mlen + i * reg_width),
836
fs_reg(MRF, base_mrf + mlen + i * reg_width,
745
if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
839
if (i < 3 && c->key.tex.gl_clamp_mask[i] & (1 << sampler))
746
840
inst->saturate = true;
747
841
coordinate.reg_offset++;
749
mlen += ir->coordinate->type->vector_elements * reg_width;
843
mlen += vector_elements * reg_width;
751
845
if (ir->shadow_comparitor && ir->op != ir_txd) {
752
846
mlen = MAX2(mlen, header_present + 4 * reg_width);
754
this->result = reg_undef;
755
848
ir->shadow_comparitor->accept(this);
756
849
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
757
850
mlen += reg_width;
776
this->result = reg_undef;
777
868
ir->lod_info.lod->accept(this);
778
869
mlen = MAX2(mlen, header_present + 4 * reg_width);
779
870
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
780
871
mlen += reg_width;
782
inst = emit(FS_OPCODE_TXL, dst);
873
inst = emit(SHADER_OPCODE_TXL, dst);
785
this->result = reg_undef;
786
876
ir->lod_info.grad.dPdx->accept(this);
787
877
fs_reg dPdx = this->result;
789
this->result = reg_undef;
790
879
ir->lod_info.grad.dPdy->accept(this);
791
880
fs_reg dPdy = this->result;
811
900
mlen += reg_width;
814
inst = emit(FS_OPCODE_TXD, dst);
903
inst = emit(SHADER_OPCODE_TXD, dst);
907
ir->lod_info.lod->accept(this);
908
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
910
inst = emit(SHADER_OPCODE_TXS, dst);
818
assert(!"GLSL 1.30 features unsupported");
913
mlen = header_present + 4 * reg_width;
915
ir->lod_info.lod->accept(this);
917
fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD),
919
inst = emit(SHADER_OPCODE_TXF, dst);
821
922
inst->base_mrf = base_mrf;
1003
ir->lod_info.lod->accept(this);
1004
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
907
assert(!"GLSL 1.30 features unsupported");
1008
/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
1009
emit(BRW_OPCODE_MOV,
1010
fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate);
1011
coordinate.reg_offset++;
1014
ir->lod_info.lod->accept(this);
1015
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), this->result);
1018
for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
1019
emit(BRW_OPCODE_MOV,
1020
fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate);
1021
coordinate.reg_offset++;
911
/* Set up the coordinate (except for TXD where it was done earlier) */
912
if (ir->op != ir_txd) {
1027
/* Set up the coordinate (except for cases where it was done above) */
1028
if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf) {
913
1029
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
914
1030
fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
916
if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
1032
if (i < 3 && c->key.tex.gl_clamp_mask[i] & (1 << sampler))
917
1033
inst->saturate = true;
918
1034
coordinate.reg_offset++;
919
1035
mlen += reg_width;
923
1039
/* Generate the SEND */
924
1040
fs_inst *inst = NULL;
925
1041
switch (ir->op) {
926
case ir_tex: inst = emit(FS_OPCODE_TEX, dst); break;
1042
case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break;
927
1043
case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
928
case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
929
case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
930
case ir_txf: assert(!"TXF unsupported.");
1044
case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break;
1045
case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break;
1046
case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break;
1047
case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
932
1049
inst->base_mrf = base_mrf;
933
1050
inst->mlen = mlen;
954
1071
bool hw_compare_supported = ir->op != ir_txd;
955
1072
if (ir->shadow_comparitor && !hw_compare_supported) {
956
assert(c->key.compare_funcs[sampler] != GL_NONE);
1073
assert(c->key.tex.compare_funcs[sampler] != GL_NONE);
957
1074
/* No need to even sample for GL_ALWAYS or GL_NEVER...bail early */
958
if (c->key.compare_funcs[sampler] == GL_ALWAYS)
1075
if (c->key.tex.compare_funcs[sampler] == GL_ALWAYS)
959
1076
return swizzle_result(ir, fs_reg(1.0f), sampler);
960
else if (c->key.compare_funcs[sampler] == GL_NEVER)
1077
else if (c->key.tex.compare_funcs[sampler] == GL_NEVER)
961
1078
return swizzle_result(ir, fs_reg(0.0f), sampler);
964
this->result = reg_undef;
965
ir->coordinate->accept(this);
1082
ir->coordinate->accept(this);
966
1083
fs_reg coordinate = this->result;
968
1085
if (ir->offset != NULL) {
969
ir_constant *offset = ir->offset->as_constant();
970
assert(offset != NULL);
972
signed char offsets[3];
973
for (unsigned i = 0; i < ir->offset->type->vector_elements; i++)
974
offsets[i] = (signed char) offset->value.i[i];
976
/* Combine all three offsets into a single unsigned dword:
978
* bits 11:8 - U Offset (X component)
979
* bits 7:4 - V Offset (Y component)
980
* bits 3:0 - R Offset (Z component)
982
unsigned offset_bits = 0;
983
for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) {
984
const unsigned shift = 4 * (2 - i);
985
offset_bits |= (offsets[i] << shift) & (0xF << shift);
1086
uint32_t offset_bits = brw_texture_offset(ir->offset->as_constant());
988
1088
/* Explicitly set up the message header by copying g0 to msg reg m1. */
989
1089
emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD),
990
fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD));
1090
fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)));
992
1092
/* Then set the offset bits in DWord 2 of the message header. */
993
1093
emit(BRW_OPCODE_MOV,
1125
1225
this->result = orig_val;
1227
if (ir->op == ir_txs)
1127
1230
if (ir->type == glsl_type::float_type) {
1128
1231
/* Ignore DEPTH_TEXTURE_MODE swizzling. */
1129
1232
assert(ir->sampler->type->sampler_shadow);
1130
} else if (c->key.tex_swizzles[sampler] != SWIZZLE_NOOP) {
1233
} else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) {
1131
1234
fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type);
1133
1236
for (int i = 0; i < 4; i++) {
1134
int swiz = GET_SWZ(c->key.tex_swizzles[sampler], i);
1237
int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i);
1135
1238
fs_reg l = swizzled_result;
1136
1239
l.reg_offset += i;
1469
1567
inst->predicated = true;
1472
foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1473
ir_instruction *ir = (ir_instruction *)iter.get();
1570
foreach_list(node, &ir->then_instructions) {
1571
ir_instruction *ir = (ir_instruction *)node;
1474
1572
this->base_ir = ir;
1475
this->result = reg_undef;
1476
1574
ir->accept(this);
1479
1577
if (!ir->else_instructions.is_empty()) {
1480
1578
emit(BRW_OPCODE_ELSE);
1482
foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1483
ir_instruction *ir = (ir_instruction *)iter.get();
1580
foreach_list(node, &ir->else_instructions) {
1581
ir_instruction *ir = (ir_instruction *)node;
1484
1582
this->base_ir = ir;
1485
this->result = reg_undef;
1486
1584
ir->accept(this);
1530
1623
inst->predicated = true;
1533
foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1534
ir_instruction *ir = (ir_instruction *)iter.get();
1626
foreach_list(node, &ir->body_instructions) {
1627
ir_instruction *ir = (ir_instruction *)node;
1536
1629
this->base_ir = ir;
1537
this->result = reg_undef;
1538
1630
ir->accept(this);
1541
1633
if (ir->increment) {
1542
1634
this->base_ir = ir->increment;
1543
this->result = reg_undef;
1544
1635
ir->increment->accept(this);
1545
1636
emit(BRW_OPCODE_ADD, counter, counter, this->result);
1668
1759
this->current_annotation = "compute pixel deltas from v0";
1669
1760
if (brw->has_pln) {
1670
this->delta_x = fs_reg(this, glsl_type::vec2_type);
1671
this->delta_y = this->delta_x;
1672
this->delta_y.reg_offset++;
1761
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1762
fs_reg(this, glsl_type::vec2_type);
1763
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1764
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
1765
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++;
1674
this->delta_x = fs_reg(this, glsl_type::float_type);
1675
this->delta_y = fs_reg(this, glsl_type::float_type);
1767
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1768
fs_reg(this, glsl_type::float_type);
1769
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1770
fs_reg(this, glsl_type::float_type);
1677
emit(BRW_OPCODE_ADD, this->delta_x,
1772
emit(BRW_OPCODE_ADD, this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1678
1773
this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0))));
1679
emit(BRW_OPCODE_ADD, this->delta_y,
1774
emit(BRW_OPCODE_ADD, this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1680
1775
this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
1682
1777
this->current_annotation = "compute pos.w and 1/pos.w";
1684
1779
* interpolate the other attributes.
1686
1781
this->wpos_w = fs_reg(this, glsl_type::float_type);
1687
emit(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1782
emit(FS_OPCODE_LINTERP, wpos_w,
1783
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1784
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1688
1785
interp_reg(FRAG_ATTRIB_WPOS, 3));
1689
1786
/* Compute the pixel 1/W value from wpos.w. */
1690
1787
this->pixel_w = fs_reg(this, glsl_type::float_type);
1691
emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
1788
emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
1692
1789
this->current_annotation = NULL;
1725
1822
this->current_annotation = "compute pos.w";
1726
1823
this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
1727
1824
this->wpos_w = fs_reg(this, glsl_type::float_type);
1728
emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w);
1825
emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
1730
this->delta_x = fs_reg(brw_vec8_grf(2, 0));
1731
this->delta_y = fs_reg(brw_vec8_grf(3, 0));
1827
for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
1828
uint8_t reg = c->barycentric_coord_reg[i];
1829
this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0));
1830
this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0));
1733
1833
this->current_annotation = NULL;
1737
fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
1837
fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
1739
1839
int reg_width = c->dispatch_width / 8;
1741
if (c->dispatch_width == 8 || intel->gen == 6) {
1841
fs_reg color = outputs[target];
1844
/* If there's no color data to be written, skip it. */
1845
if (color.file == BAD_FILE)
1848
color.reg_offset += index;
1850
if (c->dispatch_width == 8 || intel->gen >= 6) {
1742
1851
/* SIMD8 write looks like:
1774
1885
* usual destination + 1 for the second half we get
1775
1886
* destination + 4.
1777
emit(BRW_OPCODE_MOV,
1778
fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color);
1888
inst = emit(BRW_OPCODE_MOV,
1889
fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index,
1892
inst->saturate = c->key.clamp_fragment_color;
1780
1894
push_force_uncompressed();
1781
emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
1895
inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index,
1898
inst->saturate = c->key.clamp_fragment_color;
1782
1899
pop_force_uncompressed();
1784
1901
push_force_sechalf();
1785
1902
color.sechalf = true;
1786
emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
1903
inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4,
1906
inst->saturate = c->key.clamp_fragment_color;
1787
1907
pop_force_sechalf();
1788
1908
color.sechalf = false;
1850
1971
nr += reg_width;
1853
fs_reg color = reg_undef;
1854
if (this->frag_color)
1855
color = *(variable_storage(this->frag_color));
1856
else if (this->frag_data) {
1857
color = *(variable_storage(this->frag_data));
1858
color.type = BRW_REGISTER_TYPE_F;
1861
1974
for (int target = 0; target < c->key.nr_color_regions; target++) {
1862
1975
this->current_annotation = ralloc_asprintf(this->mem_ctx,
1863
1976
"FB write target %d",
1865
if (this->frag_color || this->frag_data) {
1866
for (int i = 0; i < 4; i++) {
1867
emit_color_write(i, color_mrf, color);
1872
if (this->frag_color)
1873
color.reg_offset -= 4;
1978
for (int i = 0; i < 4; i++)
1979
emit_color_write(target, i, color_mrf);
1875
1981
fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
1876
1982
inst->target = target;
1983
inst->base_mrf = base_mrf;
1984
inst->mlen = nr - base_mrf;
1879
1985
if (target == c->key.nr_color_regions - 1)
1880
1986
inst->eot = true;
1881
1987
inst->header_present = header_present;
1884
1990
if (c->key.nr_color_regions == 0) {
1885
if (c->key.alpha_test && (this->frag_color || this->frag_data)) {
1991
if (c->key.alpha_test) {
1886
1992
/* If the alpha test is enabled but there's no color buffer,
1887
1993
* we still need to send alpha out the pipeline to our null
1888
1994
* renderbuffer.
1890
color.reg_offset += 3;
1891
emit_color_write(3, color_mrf, color);
1996
emit_color_write(0, 3, color_mrf);
1894
1999
fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
2000
inst->base_mrf = base_mrf;
2001
inst->mlen = nr - base_mrf;
1897
2002
inst->eot = true;
1898
2003
inst->header_present = header_present;
1901
2006
this->current_annotation = NULL;
2010
fs_visitor::resolve_ud_negate(fs_reg *reg)
2012
if (reg->type != BRW_REGISTER_TYPE_UD ||
2016
fs_reg temp = fs_reg(this, glsl_type::uint_type);
2017
emit(BRW_OPCODE_MOV, temp, *reg);