2
* Copyright 2010 Christoph Bumiller
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
#include "nvc0_program.h"
26
#define DESCEND_ARBITRARY(j, f) \
28
b->pass_seq = ctx->pc->pass_seq; \
30
for (j = 0; j < 2; ++j) \
31
if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
36
registers_interfere(struct nv_value *a, struct nv_value *b)
38
if (a->reg.file != b->reg.file)
40
if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
43
assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
45
if (a->join->reg.id < b->join->reg.id) {
46
return (a->join->reg.id + a->reg.size >= b->join->reg.id);
48
if (a->join->reg.id > b->join->reg.id) {
49
return (b->join->reg.id + b->reg.size >= a->join->reg.id);
56
values_equal(struct nv_value *a, struct nv_value *b)
58
if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
60
if (NV_IS_MEMORY_FILE(a->reg.file))
61
return a->reg.address == b->reg.address;
63
return a->join->reg.id == b->join->reg.id;
68
inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
72
for (di = 0; di < 4 && a->def[di]; ++di)
73
for (si = 0; si < 5 && b->src[si]; ++si)
74
if (registers_interfere(a->def[di], b->src[si]->value))
80
/* Check whether we can swap the order of the instructions,
81
* where a & b may be either the earlier or the later one.
84
inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
86
return inst_commutation_check(a, b) && inst_commutation_check(b, a);
91
inst_removable(struct nv_instruction *nvi)
93
if (nvi->opcode == NV_OP_ST)
95
return (!(nvi->terminator ||
99
nvc0_insn_refcount(nvi)));
102
/* Check if we do not actually have to emit this instruction. */
103
static INLINE boolean
104
inst_is_noop(struct nv_instruction *nvi)
106
if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
108
if (nvi->terminator || nvi->join)
110
if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
112
if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
114
if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
117
if (nvi->src[0]->value->join->reg.id < 0) {
118
NV50_DBGMSG(PROG_IR, "inst_is_noop: orphaned value detected\n");
122
if (nvi->opcode == NV_OP_SELECT)
123
if (!values_equal(nvi->def[0], nvi->src[1]->value))
125
return values_equal(nvi->def[0], nvi->src[0]->value);
135
nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
138
nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
140
struct nv_pc *pc = (struct nv_pc *)priv;
141
struct nv_basic_block *in;
142
struct nv_instruction *nvi, *next;
145
/* find first non-empty block emitted before b */
146
for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
148
for (; j >= 0; --j) {
151
/* check for no-op branches (BRA $PC+8) */
152
if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
156
for (++j; j < pc->num_blocks; ++j)
157
pc->bb_list[j]->emit_pos -= 8;
159
nvc0_insn_delete(in->exit);
161
b->emit_pos = in->emit_pos + in->emit_size;
163
if (in->emit_size) /* no more no-op branches to b */
167
pc->bb_list[pc->num_blocks++] = b;
171
for (nvi = b->entry; nvi; nvi = next) {
173
if (inst_is_noop(nvi) ||
174
(pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
175
nvc0_insn_delete(nvi);
179
pc->emit_size += b->emit_size;
181
#if NV50_DEBUG & NV50_DEBUG_PROG_IR
183
debug_printf("BB:%i is now empty\n", b->id);
185
debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
190
nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
197
nv_pass_flatten(&pass, root);
199
nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
205
nvc0_pc_exec_pass2(struct nv_pc *pc)
209
NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks);
211
pc->num_blocks = 0; /* will reorder bb_list */
213
for (i = 0; i < pc->num_subroutines + 1; ++i)
214
if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
219
static INLINE boolean
220
is_cspace_load(struct nv_instruction *nvi)
224
assert(nvi->indirect != 0);
225
return (nvi->opcode == NV_OP_LD &&
226
nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
227
nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
230
static INLINE boolean
231
is_immd32_load(struct nv_instruction *nvi)
235
return (nvi->opcode == NV_OP_MOV &&
236
nvi->src[0]->value->reg.file == NV_FILE_IMM &&
237
nvi->src[0]->value->reg.size == 4);
241
check_swap_src_0_1(struct nv_instruction *nvi)
243
struct nv_ref *src0 = nvi->src[0];
244
struct nv_ref *src1 = nvi->src[1];
246
if (!nv_op_commutative(nvi->opcode) &&
247
NV_BASEOP(nvi->opcode) != NV_OP_SET &&
248
NV_BASEOP(nvi->opcode) != NV_OP_SLCT)
250
assert(src0 && src1 && src0->value && src1->value);
252
if (src1->value->reg.file != NV_FILE_GPR)
255
if (is_cspace_load(src0->value->insn)) {
256
if (!is_cspace_load(src1->value->insn)) {
261
if (is_immd32_load(src0->value->insn)) {
262
if (!is_cspace_load(src1->value->insn) &&
263
!is_immd32_load(src1->value->insn)) {
269
if (nvi->src[0] != src0) {
270
if (NV_BASEOP(nvi->opcode) == NV_OP_SET)
271
nvi->set_cond = nvc0_ir_reverse_cc(nvi->set_cond);
273
if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT)
274
nvi->set_cond = NV_CC_INVERSE(nvi->set_cond);
279
nvi_set_indirect_load(struct nv_pc *pc,
280
struct nv_instruction *nvi, struct nv_value *val)
282
for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
284
assert(nvi->indirect < 6);
285
nv_reference(pc, nvi, nvi->indirect, val);
289
nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
291
struct nv_instruction *nvi, *ld;
294
for (nvi = b->entry; nvi; nvi = nvi->next) {
295
check_swap_src_0_1(nvi);
297
for (s = 0; s < 3 && nvi->src[s]; ++s) {
298
ld = nvi->src[s]->value->insn;
299
if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
301
if (!nvc0_insn_can_load(nvi, s, ld))
305
nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
306
if (ld->indirect >= 0)
307
nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
309
if (!nvc0_insn_refcount(ld))
310
nvc0_insn_delete(ld);
313
DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
318
/* NOTE: Assumes loads have not yet been folded. */
320
nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
322
struct nv_instruction *nvi, *mi, *next;
326
for (nvi = b->entry; nvi; nvi = next) {
328
if (nvi->opcode == NV_OP_SUB) {
329
nvi->src[1]->mod ^= NV_MOD_NEG;
330
nvi->opcode = NV_OP_ADD;
333
for (j = 0; j < 3 && nvi->src[j]; ++j) {
334
mi = nvi->src[j]->value->insn;
337
if (mi->def[0]->refc > 1 || mi->predicate >= 0)
340
if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
342
if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
345
assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
347
mod |= mi->src[0]->mod;
349
if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
350
/* abs neg [abs] = abs */
351
mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
353
if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
354
/* neg as opcode and modifier on same insn cannot occur */
355
/* neg neg abs = abs, neg neg = identity */
357
if (mod & NV_MOD_ABS)
358
nvi->opcode = NV_OP_ABS;
360
nvi->opcode = NV_OP_MOV;
364
if ((nv_op_supported_src_mods(nvi->opcode, j) & mod) != mod)
367
nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
369
nvi->src[j]->mod ^= mod;
372
if (nvi->opcode == NV_OP_SAT) {
373
mi = nvi->src[0]->value->insn;
375
if (mi->def[0]->refc > 1 ||
376
(mi->opcode != NV_OP_ADD &&
377
mi->opcode != NV_OP_MUL &&
378
mi->opcode != NV_OP_MAD))
381
mi->def[0] = nvi->def[0];
382
mi->def[0]->insn = mi;
383
nvc0_insn_delete(nvi);
386
DESCEND_ARBITRARY(j, nv_pass_lower_mods);
391
#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
394
apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
396
if (mod & NV_MOD_ABS) {
397
if (type == NV_TYPE_F32)
400
if ((*val) & (1 << 31))
403
if (mod & NV_MOD_NEG) {
404
if (type == NV_TYPE_F32)
409
if (mod & NV_MOD_SAT) {
416
if (type == NV_TYPE_F32) {
417
u.f = CLAMP(u.f, -1.0f, 1.0f);
419
if (type == NV_TYPE_U16) {
420
u.u = MIN2(u.u, 0xffff);
422
if (type == NV_TYPE_S16) {
423
u.i = CLAMP(u.i, -32768, 32767);
427
if (mod & NV_MOD_NOT)
432
constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
433
struct nv_value *src0, struct nv_value *src1)
435
struct nv_value *val;
445
type = NV_OPTYPE(nvi->opcode);
448
u0.u32 = src0->reg.imm.u32;
449
u1.u32 = src1->reg.imm.u32;
451
apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
452
apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
454
switch (nvi->opcode) {
456
if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
460
u.f32 = u0.f32 * u1.f32;
463
u.u32 = u0.u32 * u1.u32;
466
u.f32 = u0.f32 + u1.f32;
469
u.u32 = u0.u32 + u1.u32;
472
u.f32 = u0.f32 - u1.f32;
476
u.u32 = u0.u32 - u1.u32;
483
val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
484
val->reg.imm.u32 = u.u32;
486
nv_reference(pc, nvi, 1, NULL);
487
nv_reference(pc, nvi, 0, val);
489
if (nvi->opcode == NV_OP_MAD_F32) {
490
nvi->src[1] = nvi->src[0];
491
nvi->src[0] = nvi->src[2];
493
nvi->opcode = NV_OP_ADD_F32;
495
if (val->reg.imm.u32 == 0) {
497
nvi->opcode = NV_OP_MOV;
500
nvi->opcode = NV_OP_MOV;
505
constant_operand(struct nv_pc *pc,
506
struct nv_instruction *nvi, struct nv_value *val, int s)
520
type = NV_OPTYPE(nvi->opcode);
522
u.u32 = val->reg.imm.u32;
523
apply_modifiers(&u.u32, type, nvi->src[s]->mod);
525
if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
526
nvi->opcode = NV_OP_MOV;
527
nv_reference(pc, nvi, t, NULL);
529
nvi->src[0] = nvi->src[1];
535
switch (nvi->opcode) {
537
if (u.f32 == 1.0f || u.f32 == -1.0f) {
539
nvi->src[t]->mod ^= NV_MOD_NEG;
540
switch (nvi->src[t]->mod) {
541
case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
542
case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
543
case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
548
nv_reference(pc, nvi, 0, nvi->src[t]->value);
549
nv_reference(pc, nvi, 1, NULL);
550
nvi->src[0]->mod = 0;
552
if (u.f32 == 2.0f || u.f32 == -2.0f) {
554
nvi->src[t]->mod ^= NV_MOD_NEG;
555
nvi->opcode = NV_OP_ADD_F32;
556
nv_reference(pc, nvi, s, nvi->src[t]->value);
557
nvi->src[s]->mod = nvi->src[t]->mod;
562
switch (nvi->src[t]->mod) {
563
case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
564
case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
565
case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
566
case NV_MOD_NEG | NV_MOD_ABS:
568
nvi->ext.cvt.s = nvi->ext.cvt.d = type;
574
nv_reference(pc, nvi, 0, nvi->src[t]->value);
575
nv_reference(pc, nvi, 1, NULL);
576
if (nvi->opcode != NV_OP_CVT)
577
nvi->src[0]->mod = 0;
582
assert(nvi->src[t]->mod == 0);
583
nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
584
nvi->ext.cvt.s = nvi->ext.cvt.d = type;
585
nv_reference(pc, nvi, 0, nvi->src[t]->value);
586
nv_reference(pc, nvi, 1, NULL);
590
/* multiplication by 0 already handled above */
591
assert(nvi->src[s]->mod == 0);
592
shift = ffs(u.s32) - 1;
594
nvi->opcode = NV_OP_MOV;
595
nv_reference(pc, nvi, 0, nvi->src[t]->value);
596
nv_reference(pc, nvi, 1, NULL);
598
if (u.s32 > 0 && u.s32 == (1 << shift)) {
599
nvi->opcode = NV_OP_SHL;
600
(val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift;
601
nv_reference(pc, nvi, 0, nvi->src[t]->value);
602
nv_reference(pc, nvi, 1, val);
607
u.f32 = 1.0f / u.f32;
608
(val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
609
nvi->opcode = NV_OP_MOV;
611
nv_reference(pc, nvi, 0, val);
614
u.f32 = 1.0f / sqrtf(u.f32);
615
(val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
616
nvi->opcode = NV_OP_MOV;
618
nv_reference(pc, nvi, 0, val);
626
handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
628
struct nv_value *src0 = nvi->src[0]->value;
629
struct nv_value *src1 = nvi->src[1]->value;
631
if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
633
if (src0->reg.file != NV_FILE_GPR)
635
nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
636
nvc0_insn_delete(nvi);
639
/* check if we can MUL + ADD -> MAD/FMA */
641
handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
643
struct nv_value *src0 = nvi->src[0]->value;
644
struct nv_value *src1 = nvi->src[1]->value;
645
struct nv_value *src;
649
if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
651
if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
655
if ((src0->insn && src0->insn->bb != nvi->bb) ||
656
(src1->insn && src1->insn->bb != nvi->bb))
659
/* check for immediates from prior constant folding */
660
if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
662
src = nvi->src[s]->value;
664
mod[0] = nvi->src[0]->mod;
665
mod[1] = nvi->src[1]->mod;
666
mod[2] = src->insn->src[0]->mod;
667
mod[3] = src->insn->src[1]->mod;
669
if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
672
nvi->opcode = NV_OP_MAD_F32;
674
nv_reference(ctx->pc, nvi, s, NULL);
675
nvi->src[2] = nvi->src[!s];
678
nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
679
nvi->src[0]->mod = mod[2] ^ mod[s];
680
nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
681
nvi->src[1]->mod = mod[3];
685
nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
687
struct nv_instruction *nvi, *next;
690
for (nvi = b->entry; nvi; nvi = next) {
691
struct nv_value *src0, *src1;
692
uint baseop = NV_BASEOP(nvi->opcode);
696
src0 = nvc0_pc_find_immediate(nvi->src[0]);
697
src1 = nvc0_pc_find_immediate(nvi->src[1]);
700
constant_expression(ctx->pc, nvi, src0, src1);
703
constant_operand(ctx->pc, nvi, src0, 0);
706
constant_operand(ctx->pc, nvi, src1, 1);
709
if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
710
handle_min_max(ctx, nvi);
712
if (nvi->opcode == NV_OP_ADD_F32)
713
handle_add_mul(ctx, nvi);
715
DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
720
/* TODO: redundant store elimination */
723
struct mem_record *next;
724
struct nv_instruction *insn;
730
#define MEM_RECORD_POOL_SIZE 1024
732
struct pass_reld_elim {
735
struct mem_record *imm;
736
struct mem_record *mem_v;
737
struct mem_record *mem_a;
738
struct mem_record *mem_c[16];
739
struct mem_record *mem_l;
741
struct mem_record pool[MEM_RECORD_POOL_SIZE];
745
/* Extend the load operation in @rec to also cover the data loaded by @ld.
746
* The two loads may not overlap but reference adjacent memory locations.
749
combine_load(struct nv_pc *pc, struct mem_record *rec,
750
struct nv_instruction *ld)
752
struct nv_instruction *fv = rec->insn;
753
struct nv_value *mem = ld->src[0]->value;
754
uint32_t size = rec->size + mem->reg.size;
756
int d = rec->size / 4;
758
assert(rec->size < 16);
759
if (rec->ofst > mem->reg.address) {
760
if ((size == 8 && mem->reg.address & 3) ||
761
(size > 8 && mem->reg.address & 7))
763
rec->ofst = mem->reg.address;
764
for (j = 0; j < d; ++j)
765
fv->def[mem->reg.size / 4 + j] = fv->def[j];
768
if ((size == 8 && rec->ofst & 3) ||
769
(size > 8 && rec->ofst & 7)) {
773
for (j = 0; j < mem->reg.size / 4; ++j) {
774
fv->def[d] = ld->def[j];
775
fv->def[d++]->insn = fv;
778
if (fv->src[0]->value->refc > 1)
779
nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
780
fv->src[0]->value->reg.address = rec->ofst;
781
fv->src[0]->value->reg.size = rec->size = size;
783
nvc0_insn_delete(ld);
787
combine_export(struct mem_record *rec, struct nv_instruction *ex)
793
add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
794
uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
796
struct mem_record *it = &ctx->pool[ctx->alloc++];
803
it->size = nvi->src[0]->value->reg.size;
806
/* vectorize and reuse loads from memory or of immediates */
808
nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
810
struct mem_record **rec, *it;
811
struct nv_instruction *ld, *next;
812
struct nv_value *mem;
816
for (ld = b->entry; ld; ld = next) {
819
if (is_cspace_load(ld)) {
820
mem = ld->src[0]->value;
821
rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
823
if (ld->opcode == NV_OP_VFETCH) {
824
mem = ld->src[0]->value;
827
if (ld->opcode == NV_OP_EXPORT) {
828
mem = ld->src[0]->value;
829
if (mem->reg.file != NV_FILE_MEM_V)
835
if (ld->def[0] && ld->def[0]->refc == 0)
837
ofst = mem->reg.address;
838
base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
840
for (it = *rec; it; it = it->next) {
841
if (it->base == base &&
842
((it->ofst >> 4) == (ofst >> 4)) &&
843
((it->ofst + it->size == ofst) ||
844
(it->ofst - mem->reg.size == ofst))) {
845
/* only NV_OP_VFETCH can load exactly 12 bytes */
846
if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
848
if (it->ofst < ofst) {
849
if ((it->ofst & 0xf) == 4)
852
if ((ofst & 0xf) == 4)
858
switch (ld->opcode) {
859
case NV_OP_EXPORT: combine_export(it, ld); break;
861
combine_load(ctx->pc, it, ld);
865
if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
866
add_mem_record(ctx, rec, base, ofst, ld);
871
ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
872
for (s = 0; s < 16; ++s)
873
ctx->mem_c[s] = NULL;
875
DESCEND_ARBITRARY(s, nv_pass_mem_opt);
880
eliminate_store(struct mem_record *rec, struct nv_instruction *st)
884
/* elimination of redundant stores */
886
pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
888
struct mem_record **rec, *it;
889
struct nv_instruction *st, *next;
890
struct nv_value *mem;
891
uint32_t base, ofst, size;
894
for (st = b->entry; st; st = next) {
897
if (st->opcode == NV_OP_ST) {
898
mem = st->src[0]->value;
901
if (st->opcode == NV_OP_EXPORT) {
902
mem = st->src[0]->value;
903
if (mem->reg.file != NV_FILE_MEM_V)
907
if (st->opcode == NV_OP_ST) {
910
ofst = mem->reg.address;
911
base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
912
size = mem->reg.size;
914
for (it = *rec; it; it = it->next) {
915
if (it->base == base &&
916
(it->ofst <= ofst && (it->ofst + size) > ofst))
920
eliminate_store(it, st);
922
add_mem_record(ctx, rec, base, ofst, st);
925
DESCEND_ARBITRARY(s, nv_pass_mem_opt);
929
/* TODO: properly handle loads from l[] memory in the presence of stores */
931
nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
934
struct load_record **rec, *it;
935
struct nv_instruction *ld, *next;
937
struct nv_value *val;
940
for (ld = b->entry; ld; ld = next) {
944
val = ld->src[0]->value;
947
if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
948
data[0] = val->reg.id;
952
if (ld->opcode == NV_OP_LDA) {
953
data[0] = val->reg.id;
954
data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
955
if (val->reg.file >= NV_FILE_MEM_C(0) &&
956
val->reg.file <= NV_FILE_MEM_C(15))
957
rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
959
if (val->reg.file == NV_FILE_MEM_S)
962
if (val->reg.file == NV_FILE_MEM_L)
965
if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
966
data[0] = val->reg.imm.u32;
971
if (!rec || !ld->def[0]->refc)
974
for (it = *rec; it; it = it->next)
975
if (it->data[0] == data[0] && it->data[1] == data[1])
979
if (ld->def[0]->reg.id >= 0)
980
it->value = ld->def[0];
983
nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
985
if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
987
it = &ctx->pool[ctx->alloc++];
989
it->data[0] = data[0];
990
it->data[1] = data[1];
991
it->value = ld->def[0];
999
for (j = 0; j < 16; ++j)
1000
ctx->mem_c[j] = NULL;
1004
DESCEND_ARBITRARY(j, nv_pass_reload_elim);
1010
nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
1014
for (i = 0; i < ctx->pc->num_instructions; ++i) {
1015
struct nv_instruction *nvi = &ctx->pc->instructions[i];
1016
struct nv_value *def[4];
1018
if (!nv_is_texture_op(nvi->opcode))
1022
for (c = 0; c < 4; ++c) {
1023
if (nvi->def[c]->refc)
1024
nvi->tex_mask |= 1 << c;
1025
def[c] = nvi->def[c];
1029
for (c = 0; c < 4; ++c)
1030
if (nvi->tex_mask & (1 << c))
1031
nvi->def[j++] = def[c];
1032
for (c = 0; c < 4; ++c)
1033
if (!(nvi->tex_mask & (1 << c)))
1034
nvi->def[j++] = def[c];
1040
struct nv_pass_dce {
1046
nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
1049
struct nv_instruction *nvi, *next;
1051
for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
1054
if (inst_removable(nvi)) {
1055
nvc0_insn_delete(nvi);
1059
DESCEND_ARBITRARY(j, nv_pass_dce);
1064
/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
1065
* Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
1066
* BREAK and dummy ELSE block.
1068
static INLINE boolean
1069
bb_is_if_else_endif(struct nv_basic_block *bb)
1071
if (!bb->out[0] || !bb->out[1])
1074
if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
1075
return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
1076
!bb->out[1]->out[1]);
1078
return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
1079
!bb->out[0]->out[1] &&
1080
!bb->out[1]->out[1]);
1084
/* Predicate instructions and delete any branch at the end if it is
1085
* not a break from a loop.
1088
predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
1089
struct nv_value *pred, uint8_t cc)
1091
struct nv_instruction *nvi, *prev;
1096
for (nvi = b->entry; nvi; nvi = nvi->next) {
1098
if (inst_is_noop(nvi))
1100
for (s = 0; nvi->src[s]; ++s);
1104
nv_reference(pc, nvi, nvi->predicate, pred);
1106
if (prev->opcode == NV_OP_BRA &&
1107
b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
1108
b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
1109
nvc0_insn_delete(prev);
1112
static INLINE boolean
1113
may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
1115
if (nvi->def[0] && values_equal(nvi->def[0], pred))
1117
return nvc0_insn_is_predicateable(nvi);
1120
/* Transform IF/ELSE/ENDIF constructs into predicated instructions
1124
nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
1126
struct nv_instruction *nvi;
1127
struct nv_value *pred;
1129
int n0, n1; /* instruction counts of outgoing blocks */
1131
if (bb_is_if_else_endif(b)) {
1132
assert(b->exit && b->exit->opcode == NV_OP_BRA);
1134
assert(b->exit->predicate >= 0);
1135
pred = b->exit->src[b->exit->predicate]->value;
1138
for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
1139
if (!may_predicate_insn(nvi, pred))
1142
/* we're after register allocation, so there always is an ELSE block */
1143
for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
1144
if (!may_predicate_insn(nvi, pred))
1148
/* 12 is an arbitrary limit */
1149
if (!nvi && n0 < 12 && n1 < 12) {
1150
predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
1151
predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
1153
nvc0_insn_delete(b->exit); /* delete the branch */
1155
/* and a potential joinat before it */
1156
if (b->exit && b->exit->opcode == NV_OP_JOINAT)
1157
nvc0_insn_delete(b->exit);
1159
/* remove join operations at the end of the conditional */
1160
k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
1161
if ((nvi = b->out[0]->out[k]->entry)) {
1163
if (nvi->opcode == NV_OP_JOIN)
1164
nvc0_insn_delete(nvi);
1168
DESCEND_ARBITRARY(k, nv_pass_flatten);
1173
/* Tests instructions for equality, but independently of sources. */
1175
is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
1177
if (a->opcode != b->opcode)
1179
if (nv_is_texture_op(a->opcode)) {
1180
if (a->ext.tex.t != b->ext.tex.t ||
1181
a->ext.tex.s != b->ext.tex.s)
1183
if (a->tex_dim != b->tex_dim ||
1184
a->tex_array != b->tex_array ||
1185
a->tex_cube != b->tex_cube ||
1186
a->tex_shadow != b->tex_shadow ||
1187
a->tex_live != b->tex_live)
1190
if (a->opcode == NV_OP_CVT) {
1191
if (a->ext.cvt.s != b->ext.cvt.s ||
1192
a->ext.cvt.d != b->ext.cvt.d)
1195
if (NV_BASEOP(a->opcode) == NV_OP_SET ||
1196
NV_BASEOP(a->opcode) == NV_OP_SLCT) {
1197
if (a->set_cond != b->set_cond)
1200
if (a->opcode == NV_OP_LINTERP ||
1201
a->opcode == NV_OP_PINTERP) {
1202
if (a->centroid != b->centroid ||
1208
if (a->lanes != b->lanes ||
1209
a->patch != b->patch ||
1210
a->saturate != b->saturate)
1212
if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
1217
/* local common subexpression elimination, stupid O(n^2) implementation */
1219
nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1221
struct nv_instruction *ir, *ik, *next;
1222
struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1228
for (ir = entry; ir; ir = next) {
1232
for (ik = entry; ik != ir; ik = ik->next) {
1233
if (!is_operation_equal(ir, ik))
1235
if (!ir->def[0] || !ik->def[0])
1238
if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
1241
for (d = 0; d < 4; ++d) {
1242
if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
1245
if (!values_equal(ik->def[0], ir->def[0]))
1255
for (s = 0; s < 5; ++s) {
1256
struct nv_value *a, *b;
1258
if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
1265
if (ik->src[s]->mod != ir->src[s]->mod)
1267
a = ik->src[s]->value;
1268
b = ir->src[s]->value;
1271
if (a->reg.file != b->reg.file ||
1272
a->reg.id < 0 || /* this excludes memory loads/stores */
1273
a->reg.id != b->reg.id)
1277
nvc0_insn_delete(ir);
1278
for (d = 0; d < 4 && ir->def[d]; ++d)
1279
nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
1287
DESCEND_ARBITRARY(s, nv_pass_cse);
1292
/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
1293
* neighbouring registers. CSE might have messed this up.
1294
* Just generate a MOV for each source to avoid conflicts if they're used in
1295
* multiple NV_OP_BIND at different positions.
1297
* Add a dummy use of the pointer source of >= 8 byte loads after the load
1298
* to prevent it from being assigned a register which overlaps the load's
1299
* destination, which would produce random corruptions.
1302
nv_pass_fixups(struct nv_pass *ctx, struct nv_basic_block *b)
1304
struct nv_value *val;
1305
struct nv_instruction *fix, *nvi, *next;
1308
for (fix = b->entry; fix; fix = next) {
1311
if (fix->opcode == NV_OP_LD) {
1312
if (fix->indirect >= 0 && fix->src[0]->value->reg.size >= 8) {
1313
nvi = nv_alloc_instruction(ctx->pc, NV_OP_UNDEF);
1314
nv_reference(ctx->pc, nvi, 0, fix->src[fix->indirect]->value);
1316
nvc0_insn_insert_after(fix, nvi);
1320
if (fix->opcode == NV_OP_BIND) {
1321
for (s = 0; s < 4 && fix->src[s]; ++s) {
1322
val = fix->src[s]->value;
1324
nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
1325
nvi->def[0] = new_value_like(ctx->pc, val);
1326
nvi->def[0]->insn = nvi;
1327
nv_reference(ctx->pc, nvi, 0, val);
1328
nv_reference(ctx->pc, fix, s, nvi->def[0]);
1330
nvc0_insn_insert_before(fix, nvi);
1334
DESCEND_ARBITRARY(s, nv_pass_fixups);
1340
nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1342
struct pass_reld_elim *reldelim;
1343
struct nv_pass pass;
1344
struct nv_pass_dce dce;
1350
/* Do CSE so we can just compare values by pointer in subsequent passes. */
1352
ret = nv_pass_cse(&pass, root);
1356
/* Do this first, so we don't have to pay attention
1357
* to whether sources are supported memory loads.
1360
ret = nv_pass_algebraic_opt(&pass, root);
1365
ret = nv_pass_lower_mods(&pass, root);
1370
ret = nvc0_pass_fold_loads(&pass, root);
1374
if (pc->opt_reload_elim) {
1375
reldelim = CALLOC_STRUCT(pass_reld_elim);
1379
ret = nv_pass_reload_elim(reldelim, root);
1384
memset(reldelim, 0, sizeof(struct pass_reld_elim));
1388
/* May run DCE before load-combining since that pass will clean up
1395
ret = nv_pass_dce(&dce, root);
1398
} while (dce.removed);
1400
if (pc->opt_reload_elim) {
1402
ret = nv_pass_mem_opt(reldelim, root);
1404
memset(reldelim, 0, sizeof(struct pass_reld_elim));
1408
ret = nv_pass_mem_opt(reldelim, root);
1415
ret = nv_pass_tex_mask(&pass, root);
1420
ret = nv_pass_fixups(&pass, root);
1426
nvc0_pc_exec_pass0(struct nv_pc *pc)
1430
for (i = 0; i < pc->num_subroutines + 1; ++i)
1431
if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))