2
* Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* on the rights to use, copy, modify, merge, publish, distribute, sub
8
* license, and/or sell copies of the Software, and to permit persons to whom
9
* the Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
* USE OR OTHER DEALINGS IN THE SOFTWARE.
47
class vpass : public pass {
51
vpass(shader &s) : pass(s) {}
57
virtual void run_on(container_node &n);
59
virtual bool visit(node &n, bool enter);
60
virtual bool visit(container_node &n, bool enter);
61
virtual bool visit(alu_group_node &n, bool enter);
62
virtual bool visit(cf_node &n, bool enter);
63
virtual bool visit(alu_node &n, bool enter);
64
virtual bool visit(alu_packed_node &n, bool enter);
65
virtual bool visit(fetch_node &n, bool enter);
66
virtual bool visit(region_node &n, bool enter);
67
virtual bool visit(repeat_node &n, bool enter);
68
virtual bool visit(depart_node &n, bool enter);
69
virtual bool visit(if_node &n, bool enter);
70
virtual bool visit(bb_node &n, bool enter);
74
class rev_vpass : public vpass {
77
rev_vpass(shader &s) : vpass(s) {}
79
virtual void run_on(container_node &n);
83
// =================== PASSES
87
class bc_dump : public vpass {
95
unsigned new_group, group_index;
99
bc_dump(shader &s, bytecode *bc = NULL);
101
bc_dump(shader &s, uint32_t *bc_ptr, unsigned ndw) :
102
vpass(s), bc_data(bc_ptr), ndw(ndw), id(), new_group(), group_index() {}
107
virtual bool visit(cf_node &n, bool enter);
108
virtual bool visit(alu_node &n, bool enter);
109
virtual bool visit(fetch_node &n, bool enter);
111
void dump_dw(unsigned dw_id, unsigned count = 2);
113
void dump(cf_node& n);
114
void dump(alu_node& n);
115
void dump(fetch_node& n);
119
class dce_cleanup : public vpass {
126
dce_cleanup(shader &s) : vpass(s),
127
remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {}
131
virtual bool visit(node &n, bool enter);
132
virtual bool visit(alu_group_node &n, bool enter);
133
virtual bool visit(cf_node &n, bool enter);
134
virtual bool visit(alu_node &n, bool enter);
135
virtual bool visit(alu_packed_node &n, bool enter);
136
virtual bool visit(fetch_node &n, bool enter);
137
virtual bool visit(region_node &n, bool enter);
138
virtual bool visit(container_node &n, bool enter);
142
void cleanup_dst(node &n);
143
bool cleanup_dst_vec(vvec &vv);
145
// Did we alter/remove nodes during a single pass?
150
class def_use : public pass {
154
def_use(shader &sh) : pass(sh) {}
157
void run_on(node *n, bool defs);
161
void process_uses(node *n);
162
void process_defs(node *n, vvec &vv, bool arr_def);
163
void process_phi(container_node *c, bool defs, bool uses);
168
class dump : public vpass {
175
dump(shader &s) : vpass(s), level(0) {}
177
virtual bool visit(node &n, bool enter);
178
virtual bool visit(container_node &n, bool enter);
179
virtual bool visit(alu_group_node &n, bool enter);
180
virtual bool visit(cf_node &n, bool enter);
181
virtual bool visit(alu_node &n, bool enter);
182
virtual bool visit(alu_packed_node &n, bool enter);
183
virtual bool visit(fetch_node &n, bool enter);
184
virtual bool visit(region_node &n, bool enter);
185
virtual bool visit(repeat_node &n, bool enter);
186
virtual bool visit(depart_node &n, bool enter);
187
virtual bool visit(if_node &n, bool enter);
188
virtual bool visit(bb_node &n, bool enter);
191
static void dump_op(node &n, const char *name);
192
static void dump_vec(const vvec & vv);
193
static void dump_set(shader &sh, val_set & v);
195
static void dump_rels(vvec & vv);
197
static void dump_val(value *v);
198
static void dump_op(node *n);
200
static void dump_op_list(container_node *c);
201
static void dump_queue(sched_queue &q);
203
static void dump_alu(alu_node *n);
209
void dump_common(node &n);
210
void dump_flags(node &n);
212
void dump_live_values(container_node &n, bool before);
216
// Global Code Motion
218
class gcm : public pass {
220
sched_queue bu_ready[SQ_NUM];
221
sched_queue bu_ready_next[SQ_NUM];
222
sched_queue bu_ready_early[SQ_NUM];
224
sched_queue ready_above;
226
unsigned outstanding_lds_oq;
227
container_node pending;
232
op_info() : top_bb(), bottom_bb() {}
235
typedef std::map<node*, op_info> op_info_map;
237
typedef std::map<node*, unsigned> nuc_map;
242
typedef std::vector<nuc_map> nuc_stack;
251
node_list pending_nodes;
255
// for register pressure tracking in bottom-up pass
259
static const int rp_threshold = 100;
261
bool pending_exec_mask_update;
265
gcm(shader &sh) : pass(sh),
266
bu_ready(), bu_ready_next(), bu_ready_early(),
267
ready(), outstanding_lds_oq(),
268
op_map(), uses(), nuc_stk(1), ucs_level(),
269
bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
270
live(), live_count(), pending_exec_mask_update() {}
276
void collect_instructions(container_node *c, bool early_pass);
278
void sched_early(container_node *n);
279
void td_sched_bb(bb_node *bb);
280
bool td_is_ready(node *n);
281
void td_release_uses(vvec &v);
282
void td_release_val(value *v);
283
void td_schedule(bb_node *bb, node *n);
285
void sched_late(container_node *n);
286
void bu_sched_bb(bb_node *bb);
287
void bu_release_defs(vvec &v, bool src);
288
void bu_release_phi_defs(container_node *p, unsigned op);
289
bool bu_is_ready(node *n);
290
void bu_release_val(value *v);
291
void bu_release_op(node * n);
292
void bu_find_best_bb(node *n, op_info &oi);
293
void bu_schedule(container_node *bb, node *n);
295
void push_uc_stack();
298
void init_def_count(nuc_map &m, container_node &s);
299
void init_use_count(nuc_map &m, container_node &s);
300
unsigned get_uc_vec(vvec &vv);
301
unsigned get_dc_vec(vvec &vv, bool src);
303
void add_ready(node *n);
305
void dump_uc_stack();
307
unsigned real_alu_count(sched_queue &q, unsigned max);
309
// check if we have not less than threshold ready alu instructions
310
bool check_alu_ready_count(unsigned threshold);
314
class gvn : public vpass {
319
gvn(shader &sh) : vpass(sh) {}
321
virtual bool visit(node &n, bool enter);
322
virtual bool visit(cf_node &n, bool enter);
323
virtual bool visit(alu_node &n, bool enter);
324
virtual bool visit(alu_packed_node &n, bool enter);
325
virtual bool visit(fetch_node &n, bool enter);
326
virtual bool visit(region_node &n, bool enter);
330
void process_op(node &n, bool rewrite = true);
332
// returns true if the value was rewritten
333
bool process_src(value* &v, bool rewrite);
336
void process_alu_src_constants(node &n, value* &v);
340
class if_conversion : public pass {
344
if_conversion(shader &sh) : pass(sh) {}
348
bool run_on(region_node *r);
350
void convert_kill_instructions(region_node *r, value *em, bool branch,
353
bool check_and_convert(region_node *r);
355
alu_node* convert_phi(value *select, node *phi);
360
class liveness : public rev_vpass {
368
liveness(shader &s) : rev_vpass(s), live_changed(false) {}
372
virtual bool visit(node &n, bool enter);
373
virtual bool visit(bb_node &n, bool enter);
374
virtual bool visit(container_node &n, bool enter);
375
virtual bool visit(alu_group_node &n, bool enter);
376
virtual bool visit(cf_node &n, bool enter);
377
virtual bool visit(alu_node &n, bool enter);
378
virtual bool visit(alu_packed_node &n, bool enter);
379
virtual bool visit(fetch_node &n, bool enter);
380
virtual bool visit(region_node &n, bool enter);
381
virtual bool visit(repeat_node &n, bool enter);
382
virtual bool visit(depart_node &n, bool enter);
383
virtual bool visit(if_node &n, bool enter);
387
void update_interferences();
388
void process_op(node &n);
390
bool remove_val(value *v);
391
bool remove_vec(vvec &v);
392
bool process_outs(node& n);
393
void process_ins(node& n);
395
void process_phi_outs(container_node *phi);
396
void process_phi_branch(container_node *phi, unsigned id);
398
bool process_maydef(value *v);
400
bool add_vec(vvec &vv, bool src);
402
void update_src_vec(vvec &vv, bool src);
406
struct bool_op_info {
413
class peephole : public pass {
417
peephole(shader &sh) : pass(sh) {}
421
void run_on(container_node *c);
423
void optimize_cc_op(alu_node *a);
425
void optimize_cc_op2(alu_node *a);
426
void optimize_CNDcc_op(alu_node *a);
428
bool get_bool_op_info(value *b, bool_op_info& bop);
429
bool get_bool_flt_to_int_source(alu_node* &a);
430
void convert_float_setcc(alu_node *f2i, alu_node *s);
434
class psi_ops : public rev_vpass {
435
using rev_vpass::visit;
439
psi_ops(shader &s) : rev_vpass(s) {}
441
virtual bool visit(node &n, bool enter);
442
virtual bool visit(alu_node &n, bool enter);
444
bool try_inline(node &n);
445
bool try_reduce(node &n);
446
bool eliminate(node &n);
448
void unpredicate(node *n);
452
// check correctness of the generated code, e.g.:
453
// - expected source operand value is the last value written to its gpr,
454
// - all arguments of phi node should be allocated to the same gpr,
456
class ra_checker : public pass {
458
typedef std::map<sel_chan, value *> reg_value_map;
460
typedef std::vector<reg_value_map> regmap_stack;
462
regmap_stack rm_stack;
463
unsigned rm_stk_level;
469
ra_checker(shader &sh) : pass(sh), rm_stk_level(0), prev_dst() {}
473
void run_on(container_node *c);
475
void dump_error(const error_info &e);
476
void dump_all_errors();
480
reg_value_map& rmap() { return rm_stack[rm_stk_level]; }
485
// when going out of the alu clause, values in the clause temporary gprs,
486
// AR, predicate values, PS/PV are destroyed
487
void kill_alu_only_regs();
488
void error(node *n, unsigned id, std::string msg);
490
void check_phi_src(container_node *p, unsigned id);
491
void process_phi_dst(container_node *p);
492
void check_alu_group(alu_group_node *g);
493
void process_op_dst(node *n);
494
void check_op_src(node *n);
495
void check_src_vec(node *n, unsigned id, vvec &vv, bool src);
496
void check_value_gpr(node *n, unsigned id, value *v);
499
// =======================================
502
class ra_coalesce : public pass {
506
ra_coalesce(shader &sh) : pass(sh) {}
513
// =======================================
515
class ra_init : public pass {
519
ra_init(shader &sh) : pass(sh), prev_chans() {
521
// The parameter below affects register channels distribution.
522
// For cayman (VLIW-4) we're trying to distribute the channels
523
// uniformly, this means significantly better alu slots utilization
524
// at the expense of higher gpr usage. Hopefully this will improve
525
// performance, though it has to be proven with real benchmarks yet.
526
// For VLIW-5 this method could also slightly improve slots
527
// utilization, but increased register pressure seems more significant
528
// and overall performance effect is negative according to some
529
// benchmarks, so it's not used currently. Basically, VLIW-5 doesn't
530
// really need it because trans slot (unrestricted by register write
531
// channel) allows to consume most deviations from uniform channel
533
// Value 3 means that for new allocation we'll use channel that differs
534
// from 3 last used channels. 0 for VLIW-5 effectively turns this off.
536
ra_tune = sh.get_ctx().is_cayman() ? 3 : 0;
546
void add_prev_chan(unsigned chan);
547
unsigned get_preferable_chan_mask();
549
bool ra_node(container_node *c);
550
bool process_op(node *n);
552
bool color(value *v);
554
void color_bs_constraint(ra_constraint *c);
556
void assign_color(value *v, sel_chan c);
560
// =======================================
562
class ra_split : public pass {
566
ra_split(shader &sh) : pass(sh) {}
570
void split(container_node *n);
571
void split_op(node *n);
572
void split_alu_packed(alu_packed_node *n);
573
void split_vector_inst(node *n);
575
void split_packed_ins(alu_packed_node *n);
578
void split_pinned_outs(node *n);
581
void split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz);
583
void split_phi_src(container_node *loc, container_node *c, unsigned id,
585
void split_phi_dst(node *loc, container_node *c, bool loop);
586
void init_phi_constraints(container_node *c);
591
class ssa_prepare : public vpass {
594
typedef std::vector<val_set> vd_stk;
600
ssa_prepare(shader &s) : vpass(s), level(0) {}
602
virtual bool visit(cf_node &n, bool enter);
603
virtual bool visit(alu_node &n, bool enter);
604
virtual bool visit(fetch_node &n, bool enter);
605
virtual bool visit(region_node &n, bool enter);
606
virtual bool visit(repeat_node &n, bool enter);
607
virtual bool visit(depart_node &n, bool enter);
613
if (level + 1 > stk.size())
621
stk[level].add_set(stk[level + 1]);
624
void add_defs(node &n);
626
val_set & cur_set() { return stk[level]; }
628
container_node* create_phi_nodes(int count);
631
class ssa_rename : public vpass {
634
typedef sb_map<value*, unsigned> def_map;
637
def_map lds_oq_count;
638
def_map lds_rw_count;
639
std::stack<def_map> rename_stack;
640
std::stack<def_map> rename_lds_oq_stack;
641
std::stack<def_map> rename_lds_rw_stack;
643
typedef std::map<uint32_t, value*> val_map;
648
ssa_rename(shader &s) : vpass(s) {}
652
virtual bool visit(container_node &n, bool enter);
653
virtual bool visit(node &n, bool enter);
654
virtual bool visit(alu_group_node &n, bool enter);
655
virtual bool visit(cf_node &n, bool enter);
656
virtual bool visit(alu_node &n, bool enter);
657
virtual bool visit(alu_packed_node &n, bool enter);
658
virtual bool visit(fetch_node &n, bool enter);
659
virtual bool visit(region_node &n, bool enter);
660
virtual bool visit(repeat_node &n, bool enter);
661
virtual bool visit(depart_node &n, bool enter);
662
virtual bool visit(if_node &n, bool enter);
666
void push(node *phi);
669
unsigned get_index(def_map& m, value* v);
670
void set_index(def_map& m, value* v, unsigned index);
671
unsigned new_index(def_map& m, value* v);
673
value* rename_use(node *n, value* v);
674
value* rename_def(node *def, value* v);
676
void rename_src_vec(node *n, vvec &vv, bool src);
677
void rename_dst_vec(node *def, vvec &vv, bool set_def);
679
void rename_src(node *n);
680
void rename_dst(node *n);
682
void rename_phi_args(container_node *phi, unsigned op, bool def);
684
void rename_virt(node *n);
685
void rename_virt_val(node *n, value *v);
688
class bc_finalizer : public pass {
690
cf_node *last_export[EXP_TYPE_COUNT];
698
bc_finalizer(shader &sh) : pass(sh), last_export(), last_cf(), ngpr(),
703
void finalize_loop(region_node *r);
704
void finalize_if(region_node *r);
706
void run_on(container_node *c);
708
void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
709
void finalize_alu_group(alu_group_node *g, node *prev_node);
710
bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node);
712
void emit_set_grad(fetch_node* f);
713
void finalize_fetch(fetch_node *f);
715
void finalize_cf(cf_node *c);
717
sel_chan translate_kcache(cf_node *alu, value *v);
719
void update_ngpr(unsigned gpr);
720
void update_nstack(region_node *r, unsigned add = 0);
722
unsigned get_stack_depth(node *n, unsigned &loops, unsigned &ifs,
728
void copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start);
729
void emit_set_texture_offsets(fetch_node &f);
733
} // namespace r600_sb
735
#endif /* SB_PASS_H_ */