2
* Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* on the rights to use, copy, modify, merge, publish, distribute, sub
8
* license, and/or sell copies of the Software, and to permit persons to whom
9
* the Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
* USE OR OTHER DEALINGS IN THE SOFTWARE.
43
class hw_encoding_format;
57
virtual void write(const char *s) = 0;
59
sb_ostream& operator <<(const char *s) {
64
sb_ostream& operator <<(const std::string& s) {
65
return *this << s.c_str();
68
sb_ostream& operator <<(void *p) {
74
sb_ostream& operator <<(char c) {
80
sb_ostream& operator <<(int n) {
86
sb_ostream& operator <<(unsigned n) {
92
sb_ostream& operator <<(double d) {
94
snprintf(b, 32, "%g", d);
98
// print as field of specified width, right aligned
99
void print_w(int n, int width) {
101
sprintf(f, "%%%dd", width);
102
snprintf(b, 256, f, n);
106
// print as field of specified width, left aligned
107
void print_wl(int n, int width) {
109
sprintf(f, "%%-%dd", width);
110
snprintf(b, 256, f, n);
114
// print as field of specified width, left aligned
115
void print_wl(const std::string &s, int width) {
118
while (l++ < width) {
123
// print int as field of specified width, right aligned, zero-padded
124
void print_zw(int n, int width) {
126
sprintf(f, "%%0%dd", width);
127
snprintf(b, 256, f, n);
131
// print int as field of specified width, right aligned, zero-padded, hex
132
void print_zw_hex(int n, int width) {
134
sprintf(f, "%%0%dx", width);
135
snprintf(b, 256, f, n);
140
class sb_ostringstream : public sb_ostream {
143
sb_ostringstream() : data() {}
145
virtual void write(const char *s) {
149
void clear() { data.clear(); }
151
const char* c_str() { return data.c_str(); }
152
std::string& str() { return data; }
155
class sb_log : public sb_ostream {
158
sb_log() : o(stderr) {}
160
virtual void write(const char *s) {
183
enum sb_hw_class_bits
190
HB_R6R7 = (HB_R6 | HB_R7),
191
HB_EGCM = (HB_EG | HB_CM),
192
HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
193
HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
195
HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
246
MAX_ALU_LITERALS = 4,
255
ALU_SRC_LDS_OQ_A = 219,
256
ALU_SRC_LDS_OQ_B = 220,
257
ALU_SRC_LDS_OQ_A_POP = 221,
258
ALU_SRC_LDS_OQ_B_POP = 222,
259
ALU_SRC_LDS_DIRECT_A = 223,
260
ALU_SRC_LDS_DIRECT_B = 224,
261
ALU_SRC_TIME_HI = 227,
262
ALU_SRC_TIME_LO = 228,
263
ALU_SRC_MASK_HI = 229,
264
ALU_SRC_MASK_LO = 230,
265
ALU_SRC_HW_WAVE_ID = 231,
266
ALU_SRC_SIMD_ID = 232,
268
ALU_SRC_HW_THREADGRP_ID = 234,
269
ALU_SRC_WAVE_ID_IN_GRP = 235,
270
ALU_SRC_NUM_THREADGRP_WAVES = 236,
271
ALU_SRC_HW_ALU_ODD = 237,
272
ALU_SRC_LOOP_IDX = 238,
273
ALU_SRC_PARAM_BASE_ADDR = 240,
274
ALU_SRC_NEW_PRIM_MASK = 241,
275
ALU_SRC_PRIM_MASK_HI = 242,
276
ALU_SRC_PRIM_MASK_LO = 243,
277
ALU_SRC_1_DBL_L = 244,
278
ALU_SRC_1_DBL_M = 245,
279
ALU_SRC_0_5_DBL_L = 246,
280
ALU_SRC_0_5_DBL_M = 247,
284
ALU_SRC_M_1_INT = 251,
286
ALU_SRC_LITERAL = 253,
290
ALU_SRC_PARAM_OFFSET = 448
293
enum alu_predicate_select
309
enum alu_index_mode {
317
INDEX_GLOBAL_AR_X = 6
320
enum alu_cayman_mova_dst {
325
CM_MOVADST_CG0, // clause-global byte 0
331
enum alu_cayman_exec_mask_op {
355
enum alu_kcache_mode {
362
enum alu_kcache_index_mode {
399
enum sched_queue_id {
416
literal(int32_t i = 0) : i(i) {}
417
literal(uint32_t u) : u(u) {}
418
literal(float f) : f(f) {}
419
literal(double f) : f(f) {}
420
operator uint32_t() const { return u; }
421
bool operator ==(literal l) { return u == l.u; }
422
bool operator ==(int v_int) { return i == v_int; }
423
bool operator ==(unsigned v_uns) { return u == v_uns; }
433
// TODO optimize bc structures
442
const cf_op_info * op_ptr;
447
unsigned alt_const:1;
448
unsigned uses_waterfall:1;
452
unsigned pop_count:3;
453
unsigned call_count:6;
454
unsigned whole_quad_mode:1;
455
unsigned valid_pixel_mode:1;
457
unsigned jumptable_sel:3;
460
unsigned end_of_program:1;
462
unsigned array_base:13;
463
unsigned elem_size:2;
464
unsigned index_gpr:7;
469
unsigned burst_count:4;
473
unsigned array_size:12;
474
unsigned comp_mask:4;
478
unsigned rat_index_mode:2;
480
void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
482
bool is_alu_extended() {
483
assert(op_ptr->flags & CF_ALU);
484
return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
485
kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
486
kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
510
const alu_op_info * op_ptr;
520
unsigned bank_swizzle:3;
522
unsigned index_mode:3;
526
unsigned fog_merge:1;
527
unsigned write_mask:1;
528
unsigned update_exec_mask:1;
529
unsigned update_pred:1;
533
unsigned lds_idx_offset:6;
535
alu_op_flags slot_flags;
537
void set_op(unsigned op) {
539
op_ptr = r600_isa_alu(op);
544
for (int i = 0; i < 3; ++i)
557
update_exec_mask = 0;
561
slot_flags = AF_NONE;
569
const fetch_op_info * op_ptr;
572
unsigned bc_frac_mode:1;
573
unsigned fetch_whole_quad:1;
574
unsigned resource_id:8;
578
unsigned src_rel_global:1; /* for GDS ops */
583
unsigned dst_rel_global:1; /* for GDS ops */
586
unsigned alt_const:1;
589
unsigned resource_index_mode:2;
590
unsigned sampler_index_mode:2;
592
unsigned coord_type[4];
597
unsigned sampler_id:5;
600
unsigned fetch_type:2;
601
unsigned mega_fetch_count:6;
602
unsigned coalesced_read:1;
603
unsigned structured_read:2;
606
unsigned data_format:6;
607
unsigned format_comp_all:1;
608
unsigned num_format_all:2;
609
unsigned semantic_id:8;
610
unsigned srf_mode_all:1;
611
unsigned use_const_fields:1;
613
unsigned const_buf_no_stride:1;
614
unsigned endian_swap:2;
615
unsigned mega_fetch:1;
617
unsigned src2_gpr:7; /* for GDS */
618
unsigned alloc_consume:1;
620
unsigned uav_index_mode:2;
621
unsigned bcast_first_req:1;
624
unsigned elem_size:2;
627
unsigned burst_count:4;
628
unsigned array_base:13;
629
unsigned array_size:12;
631
void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
634
struct shader_stats {
639
unsigned cf; // clause instructions not included
641
unsigned alu_clauses;
642
unsigned fetch_clauses;
646
unsigned shaders; // number of shaders (for accumulated stats)
648
shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
649
fetch_clauses(), fetch(), alu_groups(), shaders() {}
651
void collect(node *n);
652
void accumulate(shader_stats &s);
654
void dump_diff(shader_stats &s);
661
shader_stats src_stats, opt_stats;
666
sb_hw_class hw_class;
668
unsigned alu_temp_gprs;
671
unsigned vtx_src_num;
675
bool r6xx_gpr_index_workaround;
677
bool stack_workaround_8xx;
678
bool stack_workaround_9xx;
680
unsigned wavefront_size;
681
unsigned stack_entry_size;
683
static unsigned dump_pass;
684
static unsigned dump_stat;
686
static unsigned dry_run;
687
static unsigned no_fallback;
688
static unsigned safe_math;
690
static unsigned dskip_start;
691
static unsigned dskip_end;
692
static unsigned dskip_mode;
694
sb_context() : src_stats(), opt_stats(), isa(0),
695
hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN),
696
alu_temp_gprs(0), max_fetch(0), has_trans(false), vtx_src_num(0),
697
num_slots(0), uses_mova_gpr(false),
698
r6xx_gpr_index_workaround(false), stack_workaround_8xx(false),
699
stack_workaround_9xx(false), wavefront_size(0),
700
stack_entry_size(0) {}
702
int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
704
bool is_r600() {return hw_class == HW_CLASS_R600;}
705
bool is_r700() {return hw_class == HW_CLASS_R700;}
706
bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
707
bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
708
bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
710
bool needs_8xx_stack_workaround() {
715
case HW_CHIP_HEMLOCK:
716
case HW_CHIP_CYPRESS:
717
case HW_CHIP_JUNIPER:
724
bool needs_9xx_stack_workaround() {
728
sb_hw_class_bits hw_class_bit() {
730
case HW_CLASS_R600:return HB_R6;
731
case HW_CLASS_R700:return HB_R7;
732
case HW_CLASS_EVERGREEN:return HB_EG;
733
case HW_CLASS_CAYMAN:return HB_CM;
734
default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
739
unsigned cf_opcode(unsigned op) {
740
return r600_isa_cf_opcode(isa->hw_class, op);
743
unsigned alu_opcode(unsigned op) {
744
return r600_isa_alu_opcode(isa->hw_class, op);
747
unsigned alu_slots(unsigned op) {
748
return r600_isa_alu_slots(isa->hw_class, op);
751
unsigned alu_slots(const alu_op_info * op_ptr) {
752
return op_ptr->slots[isa->hw_class];
755
unsigned alu_slots_mask(const alu_op_info * op_ptr) {
757
unsigned slot_flags = alu_slots(op_ptr);
758
if (slot_flags & AF_V)
760
if (!is_cayman() && (slot_flags & AF_S))
762
/* Force LDS_IDX ops into SLOT_X */
763
if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11))
768
unsigned fetch_opcode(unsigned op) {
769
return r600_isa_fetch_opcode(isa->hw_class, op);
772
bool is_kcache_sel(unsigned sel) {
773
return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
776
bool is_lds_oq(unsigned sel) {
777
return (sel >= 0xdb && sel <= 0xde);
780
const char * get_hw_class_name();
781
const char * get_hw_chip_name();
785
#define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
786
#define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
797
bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
798
: ctx(sctx), dw(data), ndw(size) {}
800
int decode_cf(unsigned &i, bc_cf &bc);
801
int decode_alu(unsigned &i, bc_alu &bc);
802
int decode_fetch(unsigned &i, bc_fetch &bc);
805
int decode_cf_alu(unsigned &i, bc_cf &bc);
806
int decode_cf_exp(unsigned &i, bc_cf &bc);
807
int decode_cf_mem(unsigned &i, bc_cf &bc);
809
int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
810
int decode_fetch_gds(unsigned &i, bc_fetch &bc);
811
int decode_fetch_mem(unsigned &i, bc_fetch &bc);
814
// bytecode format definition
816
class hw_encoding_format {
817
const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
818
hw_encoding_format();
822
hw_encoding_format(sb_hw_class_bits hw)
823
: hw_target(hw), value(0) {}
824
hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
825
: hw_target(hw), value(v) {}
826
uint32_t get_value(sb_hw_class_bits hw) const {
827
assert((hw & hw_target) == hw);
832
#define BC_FORMAT_BEGIN_HW(fmt, hwset) \
833
class fmt##_##hwset : public hw_encoding_format {\
834
typedef fmt##_##hwset thistype; \
836
fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
837
fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
839
#define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
841
#define BC_FORMAT_END(fmt) };
843
// bytecode format field definition
845
#define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
846
thistype & name(unsigned v) { \
847
value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
850
unsigned get_##name() const { \
851
return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
854
#define BC_RSRVD(fmt, last_bit, first_bit)
856
// CLAMP macro defined elsewhere interferes with bytecode field name
858
#include "sb_bc_fmt_def.inc"
860
#undef BC_FORMAT_BEGIN
871
r600_shader *pshader;
882
alu_node *slots[2][5];
885
typedef std::vector<cf_node*> id_cf_map;
888
typedef std::stack<region_node*> region_stack;
889
region_stack loop_stack;
893
// Note: currently relies on input emitting SET_CF in same basic block as uses
894
value *cf_index_value[2];
898
bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
899
ctx(sctx), dec(), bc(bc), pshader(pshader),
900
dw(), bc_ndw(), max_cf(),
901
sh(), error(), slots(), cgroup(),
902
cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
907
shader* get_shader() { assert(!error); return sh; }
915
int decode_cf(unsigned &i, bool &eop);
917
int decode_alu_clause(cf_node *cf);
918
int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
920
int decode_fetch_clause(cf_node *cf);
923
int prepare_alu_clause(cf_node *cf);
924
int prepare_alu_group(cf_node* cf, alu_group_node *g);
925
int prepare_fetch_clause(cf_node *cf);
927
int prepare_loop(cf_node *c);
928
int prepare_if(cf_node *c);
930
void save_set_cf_index(value *val, unsigned idx);
931
value *get_cf_index_value(unsigned idx);
932
void save_mova(alu_node *mova);
933
alu_node *get_mova();
940
typedef std::vector<uint32_t> bc_vector;
941
sb_hw_class_bits hw_class_bit;
949
bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
950
: hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
952
unsigned ndw() { return bc.size(); }
954
void write_data(uint32_t* dst) {
955
std::copy(bc.begin(), bc.end(), dst);
958
void align(unsigned a) {
959
unsigned size = bc.size();
960
size = (size + a - 1) & ~(a-1);
964
void set_size(unsigned sz) {
965
assert(sz >= bc.size());
969
void seek(unsigned p) {
978
unsigned get_pos() { return pos; }
979
uint32_t *data() { return &bc[0]; }
981
bytecode & operator <<(uint32_t v) {
990
bytecode & operator <<(const hw_encoding_format &e) {
991
*this << e.get_value(hw_class_bit);
995
bytecode & operator <<(const bytecode &b) {
996
bc.insert(bc.end(), b.bc.begin(), b.bc.end());
1000
uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
1012
bc_builder(shader &s);
1014
bytecode& get_bytecode() { assert(!error); return bb; }
1018
int build_cf(cf_node *n);
1020
int build_cf_alu(cf_node *n);
1021
int build_cf_mem(cf_node *n);
1022
int build_cf_exp(cf_node *n);
1024
int build_alu_clause(cf_node *n);
1025
int build_alu_group(alu_group_node *n);
1026
int build_alu(alu_node *n);
1028
int build_fetch_clause(cf_node *n);
1029
int build_fetch_tex(fetch_node *n);
1030
int build_fetch_vtx(fetch_node *n);
1031
int build_fetch_gds(fetch_node *n);
1032
int build_fetch_mem(fetch_node* n);
1035
} // namespace r600_sb
1037
#endif /* SB_BC_H_ */