256
if (has_alu_flag(alu_write))
258
if (has_alu_flag(alu_write) || m_dest->has_flag(Register::addr_or_idx)) {
257
259
os << " " << *m_dest;
260
262
<< "." << swzchar[m_dest->chan()];
261
if (!has_alu_flag(alu_write) && m_dest->pin() != pin_none)
262
os << "@" << m_dest->pin();
263
if (m_dest->pin() != pin_none)
264
os << "@" << m_dest->pin();
265
os << "__." << swzchar[dest_chan()] << " : ";
268
os << " __." << swzchar[dest_chan()] << " : ";
269
272
const int n_source_per_slot =
270
273
has_alu_flag(alu_is_lds) ? m_src.size() : alu_ops.at(m_opcode).nsrc;
272
276
for (int s = 0; s < m_alu_slots; ++s) {
281
if (has_alu_flag(src_neg_flags[k]))
285
if (has_source_mod(i, mod_neg))
282
286
pflags |= ValuePrintFlags::has_neg;
283
287
if (has_alu_flag(src_rel_flags[k]))
284
288
pflags |= ValuePrintFlags::is_rel;
286
if (has_alu_flag(src_abs_flags[k]))
289
if (n_source_per_slot <= 2)
290
if (has_source_mod(i, mod_abs))
287
291
pflags |= ValuePrintFlags::has_abs;
289
293
if (pflags & ValuePrintFlags::has_neg)
345
349
return m_dest->pin() == pin_none || m_dest->pin() == pin_free;
352
class ReplaceIndirectArrayAddr : public RegisterVisitor {
354
void visit(Register& value) override { (void)value; }
355
void visit(LocalArray& value) override
358
unreachable("An array can't be used as address");
360
void visit(LocalArrayValue& value) override;
361
void visit(UniformValue& value) override;
362
void visit(LiteralConstant& value) override { (void)value; }
363
void visit(InlineConstant& value) override { (void)value; }
368
void ReplaceIndirectArrayAddr::visit(LocalArrayValue& value)
370
if (new_addr->sel() == 0 && value.addr()->as_register())
371
value.set_addr(new_addr);
374
void ReplaceIndirectArrayAddr::visit(UniformValue& value)
376
if (value.buf_addr() && value.buf_addr()->as_register() &&
377
(new_addr->sel() == 1 || new_addr->sel() == 2)) {
378
value.set_buf_addr(new_addr);
382
void AluInstr::update_indirect_addr(PRegister reg)
384
ReplaceIndirectArrayAddr visitor;
385
visitor.new_addr = reg;
386
assert(reg->has_flag(Register::addr_or_idx));
389
m_dest->accept(visitor);
391
for (auto src : m_src)
392
src->accept(visitor);
349
398
AluInstr::can_propagate_dest() const
471
bool AluInstr::replace_src(int i, PVirtualValue new_src, uint32_t to_set,
474
auto old_src = m_src[i]->as_register();
477
if (!can_replace_source(old_src, new_src))
481
old_src->del_use(this);
485
auto r = new_src->as_register();
489
m_source_modifiers |= to_set << (2 * i);
490
m_source_modifiers &= ~(to_clear << (2 * i));
419
496
bool AluInstr::can_replace_source(PRegister old_src, PVirtualValue new_src)
421
498
if (!check_readport_validation(old_src, new_src))
424
/* If the old source is an array element, we assume that there
501
/* If the old or new source is an array element, we assume that there
425
502
* might have been an (untracked) indirect access, so don't replace
427
if (old_src->pin() == pin_array)
504
if (old_src->pin() == pin_array || new_src->pin() == pin_array)
430
if (new_src->get_addr()) {
431
for (auto& s : m_src) {
432
auto addr = s->get_addr();
433
/* can't have two differen't indirect addresses in the same instr */
434
if (addr && !addr->equal_to(*new_src->get_addr()))
507
auto [addr, dummy, index] = indirect_addr();
508
auto addr_reg = addr ? addr->as_register() : nullptr;
509
auto index_reg = index ? index->as_register() : nullptr;
511
if (auto u = new_src->as_uniform()) {
512
if (u && u->buf_addr()) {
514
/* Don't mix indirect buffer and indirect registers, because the
515
* scheduler can't handle it yet. */
519
/* Don't allow two different index registers, can't deal with that yet */
520
if (index_reg && !index_reg->equal_to(*u->buf_addr()))
440
/* We don't allow src and dst with rel and different indirect register
442
if (m_dest->pin() == pin_array && new_src->pin() == pin_array) {
443
auto dav = static_cast<const LocalArrayValue *>(m_dest)->addr();
444
auto sav = static_cast<const LocalArrayValue *>(new_src)->addr();
445
if (dav && sav && dav->as_register() && !dav->equal_to(*sav))
525
if (auto new_addr = new_src->get_addr()) {
526
auto new_addr_reg = new_addr->as_register();
527
bool new_addr_lowered = new_addr_reg &&
528
new_addr_reg->has_flag(Register::addr_or_idx);
531
if (!addr_reg->equal_to(*new_addr) || new_addr_lowered ||
532
addr_reg->has_flag(Register::addr_or_idx))
535
if (m_dest->has_flag(Register::addr_or_idx)) {
536
if (new_src->pin() == pin_array) {
537
auto s = static_cast<const LocalArrayValue *>(new_src)->addr();
538
if (!s->as_inline_const() || !s->as_literal())
764
857
instr->set_blockid(block_id(), index());
766
859
if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
767
if (has_alu_flag(alu_src0_neg))
768
instr->set_alu_flag(alu_src0_neg);
769
if (has_alu_flag(alu_src1_neg))
770
instr->set_alu_flag(alu_src1_neg);
771
if (has_alu_flag(alu_src2_neg))
772
instr->set_alu_flag(alu_src2_neg);
773
if (has_alu_flag(alu_src0_abs))
774
instr->set_alu_flag(alu_src0_abs);
775
if (has_alu_flag(alu_src1_abs))
776
instr->set_alu_flag(alu_src1_abs);
860
if (has_source_mod(nsrc * k + 0, mod_neg))
861
instr->set_source_mod(0, mod_neg);
862
if (has_source_mod(nsrc * k + 1, mod_neg))
863
instr->set_source_mod(1, mod_neg);
864
if (has_source_mod(nsrc * k + 2, mod_neg))
865
instr->set_source_mod(2, mod_neg);
866
if (has_source_mod(nsrc * k + 0, mod_abs))
867
instr->set_source_mod(0, mod_abs);
868
if (has_source_mod(nsrc * k + 1, mod_abs))
869
instr->set_source_mod(1, mod_abs);
778
871
if (has_alu_flag(alu_dst_clamp))
779
872
instr->set_alu_flag(alu_dst_clamp);
1010
1108
string srcstr = *t++;
1012
1110
if (srcstr[0] == '-') {
1014
flags.insert(AluInstr::src_neg_flags[i]);
1016
assert(flags.find(AluInstr::src_neg_flags[i]) != flags.end());
1111
src_mods |= AluInstr::mod_neg << (2 * sources.size());
1017
1112
srcstr = srcstr.substr(1);
1020
1115
if (srcstr[0] == '|') {
1021
1116
assert(srcstr[srcstr.length() - 1] == '|');
1023
flags.insert(AluInstr::src_abs_flags[i]);
1025
assert(flags.find(AluInstr::src_abs_flags[i]) != flags.end());
1117
src_mods |= AluInstr::mod_abs << (2 * sources.size());
1026
1118
srcstr = srcstr.substr(1, srcstr.length() - 2);
1206
1299
instr->predicate()->accept(*this);
1302
bool AluInstr::is_kill() const
1304
if (has_alu_flag(alu_is_lds))
1311
case op2_killne_int:
1313
case op2_killge_int:
1314
case op2_killge_uint:
1316
case op2_killgt_int:
1317
case op2_killgt_uint:
1210
1332
emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader);
1213
1337
emit_alu_op1(const nir_alu_instr& alu,
1215
1339
Shader& shader,
1216
const AluOpFlags& flags = 0);
1340
AluMods mod = mod_none);
1218
1342
emit_alu_op1_64bit(const nir_alu_instr& alu,
1561
1689
return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
1563
1691
case nir_op_fabs:
1564
return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_abs});
1692
return emit_alu_op1(*alu, op1_mov, shader, mod_src0_abs);
1565
1693
case nir_op_fadd:
1566
1694
return emit_alu_op2(*alu, op2_add, shader);
1567
1695
case nir_op_fceil:
1628
1756
case nir_op_fround_even:
1629
1757
return emit_alu_op1(*alu, op1_rndne, shader);
1630
1758
case nir_op_fsat:
1631
return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_dst_clamp});
1759
return emit_alu_op1(*alu, op1_mov, shader, mod_dest_clamp);
1632
1760
case nir_op_fsub:
1633
1761
return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
1634
1762
case nir_op_ftrunc:
1791
1919
value_factory.src64(alu.src[0], i, swz[0]),
1793
1921
group->add_instruction(ir);
1795
ir->set_alu_flag(alu_src0_abs);
1796
if (alu.src[0].negate)
1797
ir->set_alu_flag(alu_src0_neg);
1799
1923
ir = new AluInstr(opcode,
1800
1924
value_factory.dest(alu.dest, 2 * i + 1, pin_chan),
1848
1966
value_factory.dest(alu.dest, 2 * i + c, pin_chan),
1849
1967
value_factory.src64(alu.src[0], i, c),
1851
group->add_instruction(ir);
1969
shader.emit_instruction(ir);
1853
ir->set_alu_flag(alu_src0_neg);
1971
ir->set_source_mod(0, AluInstr::mod_neg);
1856
1974
ir->set_alu_flag(alu_last_instr);
1857
shader.emit_instruction(group);
1874
1992
value_factory.dest(alu.dest, 1, pin_chan),
1875
1993
value_factory.src64(alu.src[0], 0, 1),
1876
1994
AluInstr::last_write);
1877
ir->set_alu_flag(alu_src0_abs);
1995
ir->set_source_mod(0, AluInstr::mod_abs);
1878
1996
shader.emit_instruction(ir);
2001
try_propagat_fsat64(const nir_alu_instr& alu, Shader& shader)
2003
auto& value_factory = shader.value_factory();
2004
auto src0 = value_factory.src64(alu.src[0], 0, 0);
2005
auto reg0 = src0->as_register();
2009
if (!reg0->has_flag(Register::ssa))
2012
if (reg0->parents().size() != 1)
2015
if (!reg0->uses().empty())
2018
auto parent = (*reg0->parents().begin())->as_alu();
2022
auto opinfo = alu_ops.at(parent->opcode());
2023
if (!opinfo.can_clamp)
2026
parent->set_alu_flag(alu_dst_clamp);
2032
emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader)
2034
auto& value_factory = shader.value_factory();
2036
assert(nir_dest_num_components(alu.dest.dest) == 1);
2038
if (try_propagat_fsat64(alu, shader)) {
2039
auto ir = new AluInstr(op1_mov,
2040
value_factory.dest(alu.dest, 0, pin_chan),
2041
value_factory.src64(alu.src[0], 0, 0),
2043
shader.emit_instruction(ir);
2045
shader.emit_instruction(new AluInstr(op1_mov,
2046
value_factory.dest(alu.dest, 1, pin_chan),
2047
value_factory.src64(alu.src[0], 0, 1),
2048
AluInstr::last_write));
2051
/* dest clamp doesn't work on plain 64 bit move, so add a zero
2052
* to apply the modifier */
2054
auto group = new AluGroup();
2055
auto ir = new AluInstr(op2_add_64,
2056
value_factory.dest(alu.dest, 0, pin_chan),
2057
value_factory.src64(alu.src[0], 0, 1),
2058
value_factory.literal(0),
2060
ir->set_alu_flag(alu_dst_clamp);
2061
group->add_instruction(ir);
2063
group->add_instruction(new AluInstr(op2_add_64,
2064
value_factory.dest(alu.dest, 1, pin_chan),
2065
value_factory.src64(alu.src[0], 0, 0),
2066
value_factory.literal(0),
2067
AluInstr::last_write));
2068
shader.emit_instruction(group);
1883
2076
emit_alu_op2_64bit(const nir_alu_instr& alu,
1885
2078
Shader& shader,
1909
2102
value_factory.src64(alu.src[order[0]], k, 1),
1910
2103
value_factory.src64(alu.src[order[1]], k, 1),
1911
2104
i < 2 ? AluInstr::write : AluInstr::empty);
1914
ir->set_alu_flag(switch_src ? alu_src1_abs : alu_src0_abs);
1916
ir->set_alu_flag(switch_src ? alu_src0_abs : alu_src1_abs);
1917
if (alu.src[0].negate)
1918
ir->set_alu_flag(switch_src ? alu_src1_neg : alu_src0_neg);
1919
if (alu.src[1].negate)
1920
ir->set_alu_flag(switch_src ? alu_src0_neg : alu_src1_neg);
1921
if (alu.dest.saturate && i == 0) {
1922
ir->set_alu_flag(alu_dst_clamp);
1925
2105
group->add_instruction(ir);
1966
2146
src[3] = value_factory.src64(alu.src[order[1]], k, 0);
1968
2148
ir = new AluInstr(opcode, dest, src, AluInstr::write, 2);
1971
ir->set_alu_flag(switch_order ? alu_src1_abs : alu_src0_abs);
1973
ir->set_alu_flag(switch_order ? alu_src0_abs : alu_src1_abs);
1974
if (alu.src[0].negate)
1975
ir->set_alu_flag(switch_order ? alu_src1_neg : alu_src0_neg);
1976
if (alu.src[1].negate)
1977
ir->set_alu_flag(switch_order ? alu_src0_neg : alu_src1_neg);
1978
2149
ir->set_alu_flag(alu_64bit_op);
1980
2151
shader.emit_instruction(ir);
1999
2170
value_factory.src64(alu.src[0], 0, 0),
2000
2171
i < 2 ? AluInstr::write : AluInstr::empty);
2002
if (alu.src[0].abs || opcode == op1_sqrt_64)
2003
ir->set_alu_flag(alu_src1_abs);
2004
if (alu.src[0].negate)
2005
ir->set_alu_flag(alu_src1_neg);
2173
if (opcode == op1_sqrt_64)
2174
ir->set_source_mod(0, AluInstr::mod_abs);
2007
2175
group->add_instruction(ir);
2030
2198
value_factory.src64(alu.src[1], 0, chan),
2031
2199
value_factory.src64(alu.src[2], 0, chan),
2032
2200
i < 2 ? AluInstr::write : AluInstr::empty);
2035
if (alu.src[0].negate)
2036
ir->set_alu_flag(alu_src0_neg);
2037
if (alu.src[1].negate)
2038
ir->set_alu_flag(alu_src1_neg);
2039
if (alu.src[2].negate)
2040
ir->set_alu_flag(alu_src2_neg);
2043
2201
group->add_instruction(ir);
2224
2378
for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest); ++i) {
2225
2379
if (alu.dest.write_mask & (1 << i)) {
2226
2381
ir = new AluInstr(opcode,
2227
2382
value_factory.dest(alu.dest, i, pin),
2228
2383
value_factory.src(alu.src[0], i),
2231
if (flags.test(alu_src0_abs) || alu.src[0].abs)
2232
ir->set_alu_flag(alu_src0_abs);
2234
if (alu.src[0].negate ^ flags.test(alu_src0_neg))
2235
ir->set_alu_flag(alu_src0_neg);
2237
if (flags.test(alu_dst_clamp) || alu.dest.saturate)
2387
ir->set_source_mod(0, AluInstr::mod_abs); break;
2389
ir->set_source_mod(0, AluInstr::mod_neg); break;
2390
case mod_dest_clamp:
2238
2391
ir->set_alu_flag(alu_dst_clamp);
2240
2394
shader.emit_instruction(ir);
2273
2427
value_factory.src(*src0, i),
2274
2428
value_factory.src(*src1, i),
2278
ir->set_alu_flag(alu_src0_neg);
2280
ir->set_alu_flag(alu_src0_abs);
2281
2430
if (src1_negate)
2282
ir->set_alu_flag(alu_src1_neg);
2284
ir->set_alu_flag(alu_src1_abs);
2285
if (alu.dest.saturate)
2286
ir->set_alu_flag(alu_dst_clamp);
2431
ir->set_source_mod(1, AluInstr::mod_neg);
2287
2432
shader.emit_instruction(ir);
2328
2468
value_factory.src(*src[1], i),
2329
2469
value_factory.src(*src[2], i),
2333
ir->set_alu_flag(alu_src0_neg);
2335
ir->set_alu_flag(alu_src1_neg);
2337
ir->set_alu_flag(alu_src2_neg);
2339
assert(!src[0]->abs);
2340
assert(!src[1]->abs);
2341
assert(!src[2]->abs);
2343
if (alu.dest.saturate)
2344
ir->set_alu_flag(alu_dst_clamp);
2345
2471
ir->set_alu_flag(alu_write);
2346
2472
shader.emit_instruction(ir);
2367
2493
value_factory.src(alu.src[0], i),
2368
2494
value_factory.src(alu.src[1], i),
2371
ir->set_alu_flag(alu_src0_abs);
2372
if (alu.src[0].negate)
2373
ir->set_alu_flag(alu_src0_neg);
2376
ir->set_alu_flag(alu_src1_abs);
2377
if (alu.src[1].negate)
2378
ir->set_alu_flag(alu_src1_neg);
2380
2496
shader.emit_instruction(ir);
2382
2498
ir->set_alu_flag(alu_last_instr);
2435
2540
ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4);
2438
ir->set_alu_flag(alu_src0_neg);
2543
ir->set_source_mod(0, AluInstr::mod_neg);
2544
ir->set_source_mod(1, AluInstr::mod_neg);
2545
ir->set_source_mod(2, AluInstr::mod_neg);
2546
ir->set_source_mod(3, AluInstr::mod_neg);
2440
2549
shader.emit_instruction(ir);
2534
2643
AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::last_write, n);
2537
ir->set_alu_flag(alu_src0_neg);
2539
ir->set_alu_flag(alu_src0_abs);
2541
ir->set_alu_flag(alu_src1_neg);
2543
ir->set_alu_flag(alu_src1_abs);
2545
if (alu.dest.saturate)
2546
ir->set_alu_flag(alu_dst_clamp);
2548
2645
shader.emit_instruction(ir);
2646
shader.set_flag(Shader::sh_disble_sb);
2573
2672
AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
2576
ir->set_alu_flag(alu_src0_neg);
2578
ir->set_alu_flag(alu_src0_abs);
2580
ir->set_alu_flag(alu_src1_neg);
2582
ir->set_alu_flag(alu_src1_abs);
2584
if (alu.dest.saturate)
2585
ir->set_alu_flag(alu_dst_clamp);
2587
2674
shader.emit_instruction(ir);
2608
2695
srcs[7] = value_factory.src(src1, 3);
2610
2697
AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
2613
ir->set_alu_flag(alu_src0_neg);
2615
ir->set_alu_flag(alu_src0_abs);
2617
ir->set_alu_flag(alu_src1_neg);
2619
ir->set_alu_flag(alu_src1_abs);
2621
if (alu.dest.saturate)
2622
ir->set_alu_flag(alu_dst_clamp);
2624
2698
shader.emit_instruction(ir);
2635
2709
if (instr.dest.write_mask & (1 << i)) {
2636
2710
auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]);
2637
2711
auto dst = value_factory.dest(instr.dest.dest, i, pin_none);
2638
ir = new AluInstr(op1_mov, dst, src, {alu_write});
2640
if (instr.dest.saturate)
2641
ir->set_alu_flag(alu_dst_clamp);
2642
if (instr.src[i].negate)
2643
ir->set_alu_flag(alu_src0_neg);
2644
if (instr.src[i].abs)
2645
ir->set_alu_flag(alu_src0_abs);
2647
shader.emit_instruction(ir);
2712
shader.emit_instruction(new AluInstr(op1_mov, dst, src, {alu_write}));
2826
2891
value_factory.dest(alu.dest.dest, i, pin),
2827
2892
value_factory.src(src0, i),
2828
2893
AluInstr::last_write);
2830
ir->set_alu_flag(alu_src0_neg);
2832
ir->set_alu_flag(alu_src0_abs);
2833
if (alu.dest.saturate)
2834
ir->set_alu_flag(alu_dst_clamp);
2835
2894
ir->set_alu_flag(alu_is_trans);
2836
2895
shader.emit_instruction(ir);
2932
2979
value_factory.src(src0, i),
2933
2980
value_factory.src(src1, i),
2934
2981
AluInstr::last_write);
2936
ir->set_alu_flag(alu_src0_neg);
2938
ir->set_alu_flag(alu_src0_abs);
2940
ir->set_alu_flag(alu_src1_neg);
2942
ir->set_alu_flag(alu_src1_abs);
2943
if (alu.dest.saturate)
2944
ir->set_alu_flag(alu_dst_clamp);
2945
2982
ir->set_alu_flag(alu_is_trans);
2946
2983
shader.emit_instruction(ir);
2974
3011
auto ir = new AluInstr(opcode, dest, srcs, flags, last_slot);
2977
ir->set_alu_flag(alu_src0_neg);
2979
ir->set_alu_flag(alu_src0_abs);
2981
ir->set_alu_flag(alu_src1_neg);
2983
ir->set_alu_flag(alu_src1_abs);
2984
if (alu.dest.saturate)
2985
ir->set_alu_flag(alu_dst_clamp);
2986
3012
ir->set_alu_flag(alu_is_cayman_trans);
2987
3013
shader.emit_instruction(ir);