26
26
using namespace aco;
28
void create_mubuf(unsigned offset, PhysReg dst=PhysReg(256), PhysReg vaddr=PhysReg(256))
29
create_mubuf(unsigned offset, PhysReg dst = PhysReg(256), PhysReg vaddr = PhysReg(256))
30
31
bld.mubuf(aco_opcode::buffer_load_dword, Definition(dst, v1), Operand(PhysReg(0), s4),
31
32
Operand(vaddr, v1), Operand::zero(), offset, true);
34
void create_mubuf_store(PhysReg src=PhysReg(256))
36
create_mubuf_store(PhysReg src = PhysReg(256))
36
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4),
37
Operand(src, v1), Operand::zero(), Operand(src, v1), 0, true);
38
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(src, v1),
39
Operand::zero(), Operand(src, v1), 0, true);
40
void create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
43
create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
42
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(
43
aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
45
aco_ptr<MIMG_instruction> mimg{
46
create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
44
47
mimg->definitions[0] = Definition(PhysReg(256), v1);
45
48
mimg->operands[0] = Operand(PhysReg(0), s8);
46
49
mimg->operands[1] = Operand(PhysReg(0), s4);
216
219
//! s_waitcnt_depctr vm_vsrc(0)
217
220
//! s1: %0:m0 = s_mov_b32 0
218
221
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
219
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
222
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
220
224
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
222
226
//! p_unit_test 5
224
228
//! s_waitcnt_depctr vm_vsrc(0)
225
229
//! s2: %0:exec = s_mov_b64 -1
226
230
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
227
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
231
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
228
233
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(-1));
230
235
/* no hazard: LDS */
232
237
//! v1: %0:v[0] = ds_read_b32 %0:v[0], %0:m0
233
238
//! s1: %0:s[0] = s_mov_b32 0
234
239
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
235
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
240
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
236
242
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero());
238
244
/* no hazard: LDS with VALU in-between */
242
248
//! s1: %0:m0 = s_mov_b32 0
243
249
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7));
244
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
250
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
245
252
bld.vop1(aco_opcode::v_nop);
246
253
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
269
276
//! s_waitcnt lgkmcnt(0)
270
277
//! s1: %0:m0 = s_mov_b32 0
271
278
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10));
272
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
279
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
273
281
bld.sopp(aco_opcode::s_waitcnt, -1, 0xc07f);
274
282
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
300
308
//! s_waitcnt_depctr vm_vsrc(0)
301
309
//! s1: %0:m0 = s_mov_b32 0
302
310
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13));
303
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
311
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
304
313
bld.sopp(aco_opcode::s_waitcnt, -1, 0x3f70);
305
314
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
932
941
//! s_waitcnt_depctr sa_sdst(0)
933
942
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
934
943
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
935
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
936
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
944
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
945
Operand::zero(), Operand(PhysReg(0), s2));
937
946
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
938
947
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
944
953
//! s1: %0:s[1] = s_mov_b32 0
945
954
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
946
955
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
947
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
948
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
956
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
957
Operand::zero(), Operand(PhysReg(0), s2));
949
958
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(257), v1), Operand(PhysReg(1), s1));
950
959
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
951
960
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
957
966
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
958
967
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
959
968
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
960
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
961
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
969
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
970
Operand::zero(), Operand(PhysReg(0), s2));
962
971
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
963
972
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
964
973
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
969
978
//! s_waitcnt_depctr sa_sdst(0)
970
979
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
971
980
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
972
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
973
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
981
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
982
Operand::zero(), Operand(PhysReg(0), s2));
974
983
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
975
984
bld.sopp(aco_opcode::s_waitcnt_depctr, -1, 0xfffe);
976
985
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
982
991
//! s_waitcnt_depctr sa_sdst(0)
983
992
//! s1: %0:s[2] = s_mov_b32 %0:s[1]
984
993
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
985
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
986
Operand(PhysReg(2), s1), Operand::zero(), Operand(PhysReg(0), s2));
994
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand(PhysReg(2), s1),
995
Operand::zero(), Operand(PhysReg(0), s2));
987
996
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
988
997
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));