~mmach/netext73/mesa-ryzen

« back to all changes in this revision

Viewing changes to src/amd/compiler/tests/test_regalloc.cpp

  • Committer: mmach
  • Date: 2023-11-02 21:31:35 UTC
  • Revision ID: netbit73@gmail.com-20231102213135-18d4tzh7tj0uz752
2023-11-02 22:11:57

Show diffs side-by-side

added added

removed removed

Lines of Context:
37
37
 
38
38
   /* TODO: is this possible to do on GFX11? */
39
39
   for (amd_gfx_level cc = GFX8; cc <= GFX10_3; cc = (amd_gfx_level)((unsigned)cc + 1)) {
40
 
      for (bool pessimistic : { false, true }) {
 
40
      for (bool pessimistic : {false, true}) {
41
41
         const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
42
42
 
43
43
         //>> v1: %_:v[#a] = p_startpgm
45
45
            return;
46
46
 
47
47
         //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
48
 
         Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
 
48
         Builder::Result tmp =
 
49
            bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
49
50
 
50
51
         //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1
51
52
         //! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]
55
56
         writeout(0, result1);
56
57
         writeout(1, result2);
57
58
 
58
 
         finish_ra_test(ra_test_policy { pessimistic });
 
59
         finish_ra_test(ra_test_policy{pessimistic});
59
60
      }
60
61
   }
61
62
END_TEST
62
63
 
63
 
BEGIN_TEST(regalloc.32bit_partial_write)
 
64
BEGIN_TEST(regalloc._32bit_partial_write)
64
65
   //>> v1: %_:v[0] = p_startpgm
65
66
   if (!setup_cs("v1", GFX10))
66
67
      return;
67
68
 
68
69
   /* ensure high 16 bits are occupied */
69
70
   //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
70
 
   Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
 
71
   Temp hi =
 
72
      bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
71
73
 
72
74
   /* This test checks if this instruction uses SDWA. */
73
75
   //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
168
170
 
169
171
   //! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
170
172
   //! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3]
171
 
   bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256+0)),
172
 
              Operand(inputs[0], PhysReg(256+1)), Operand(inputs[1], PhysReg(256+2)),
173
 
              Operand(inputs[2], PhysReg(256+3)));
 
173
   bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256 + 0)),
 
174
              Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[1], PhysReg(256 + 2)),
 
175
              Operand(inputs[2], PhysReg(256 + 3)));
174
176
 
175
177
   finish_ra_test(ra_test_policy());
176
178
END_TEST
182
184
 
183
185
   //! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0]
184
186
   //! p_unit_test %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
185
 
   bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256+0)),
186
 
              Operand(inputs[0], PhysReg(256+1)), Operand(inputs[0], PhysReg(256+2)));
 
187
   bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256 + 0)),
 
188
              Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[0], PhysReg(256 + 2)));
187
189
 
188
190
   finish_ra_test(ra_test_policy());
189
191
END_TEST
256
258
 
257
259
   //! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test
258
260
   Temp s0_tmp = bld.tmp(s1);
259
 
   Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1));
 
261
   Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc),
 
262
                             Definition(s0_tmp.id(), PhysReg{0}, s1));
260
263
 
261
264
   //! lv1: %tmp1:v[1] = p_unit_test
262
265
   Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1));
273
276
   //>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1
274
277
   Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo();
275
278
   aco_print_instr(program->gfx_level, &parallelcopy, output);
276
 
   fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg());
 
279
   fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc,
 
280
           parallelcopy.scratch_sgpr.reg());
277
281
END_TEST
278
282
 
279
283
BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies)
392
396
 
393
397
   //! v1: %tmp0:v[1] = v_interp_p10_f16_f32_inreg %lo:v[3][0:16], %in1:v[1], hi(%hi:v[3][16:32])
394
398
   //! p_unit_test %tmp0:v[1]
395
 
   Temp tmp0 = bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
 
399
   Temp tmp0 =
 
400
      bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
396
401
   bld.pseudo(aco_opcode::p_unit_test, tmp0);
397
402
 
398
403
   //! v2b: %tmp1:v[0][16:32] = v_interp_p2_f16_f32_inreg %in0:v[0], %in2:v[2], %tmp0:v[1] opsel_hi
399
404
   //! v1: %tmp2:v[0] = p_create_vector 0, %tmp1:v[0][16:32]
400
405
   //! p_unit_test %tmp2:v[0]
401
 
   Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0], inputs[2], tmp0);
 
406
   Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0],
 
407
                                 inputs[2], tmp0);
402
408
   Temp tmp2 = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand::zero(2), tmp1);
403
409
   bld.pseudo(aco_opcode::p_unit_test, tmp2);
404
410