1219
1219
bi_make_vec_to(b, dst, inout_words, NULL, sz / 32, 32);
1222
/* Extracts an atomic opcode */
1224
1222
static enum bi_atom_opc
1225
bi_atom_opc_for_nir(nir_intrinsic_op op)
1223
bi_atom_opc_for_nir(nir_atomic_op op)
1225
/* clang-format off */
1228
case nir_intrinsic_global_atomic_add:
1229
case nir_intrinsic_shared_atomic_add:
1230
case nir_intrinsic_image_atomic_add:
1231
return BI_ATOM_OPC_AADD;
1233
case nir_intrinsic_global_atomic_imin:
1234
case nir_intrinsic_shared_atomic_imin:
1235
case nir_intrinsic_image_atomic_imin:
1236
return BI_ATOM_OPC_ASMIN;
1238
case nir_intrinsic_global_atomic_umin:
1239
case nir_intrinsic_shared_atomic_umin:
1240
case nir_intrinsic_image_atomic_umin:
1241
return BI_ATOM_OPC_AUMIN;
1243
case nir_intrinsic_global_atomic_imax:
1244
case nir_intrinsic_shared_atomic_imax:
1245
case nir_intrinsic_image_atomic_imax:
1246
return BI_ATOM_OPC_ASMAX;
1248
case nir_intrinsic_global_atomic_umax:
1249
case nir_intrinsic_shared_atomic_umax:
1250
case nir_intrinsic_image_atomic_umax:
1251
return BI_ATOM_OPC_AUMAX;
1253
case nir_intrinsic_global_atomic_and:
1254
case nir_intrinsic_shared_atomic_and:
1255
case nir_intrinsic_image_atomic_and:
1256
return BI_ATOM_OPC_AAND;
1258
case nir_intrinsic_global_atomic_or:
1259
case nir_intrinsic_shared_atomic_or:
1260
case nir_intrinsic_image_atomic_or:
1261
return BI_ATOM_OPC_AOR;
1263
case nir_intrinsic_global_atomic_xor:
1264
case nir_intrinsic_shared_atomic_xor:
1265
case nir_intrinsic_image_atomic_xor:
1266
return BI_ATOM_OPC_AXOR;
1269
unreachable("Unexpected computational atomic");
1227
case nir_atomic_op_iadd: return BI_ATOM_OPC_AADD;
1228
case nir_atomic_op_imin: return BI_ATOM_OPC_ASMIN;
1229
case nir_atomic_op_umin: return BI_ATOM_OPC_AUMIN;
1230
case nir_atomic_op_imax: return BI_ATOM_OPC_ASMAX;
1231
case nir_atomic_op_umax: return BI_ATOM_OPC_AUMAX;
1232
case nir_atomic_op_iand: return BI_ATOM_OPC_AAND;
1233
case nir_atomic_op_ior: return BI_ATOM_OPC_AOR;
1234
case nir_atomic_op_ixor: return BI_ATOM_OPC_AXOR;
1235
default: unreachable("Unexpected computational atomic");
1237
/* clang-format on */
1273
1240
/* Optimized unary atomics are available with an implied #1 argument */
1486
/* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5
1487
* gl_FragCoord.z = ld_vary(fragz)
1488
* gl_FragCoord.w = ld_vary(fragw)
1492
bi_emit_load_frag_coord(bi_builder *b, nir_intrinsic_instr *instr)
1460
bi_emit_load_frag_coord_zw(bi_builder *b, bi_index dst, unsigned channel)
1494
bi_index src[4] = {};
1496
for (unsigned i = 0; i < 2; ++i) {
1497
src[i] = bi_fadd_f32(b, bi_u16_to_f32(b, bi_half(bi_preload(b, 59), i)),
1501
for (unsigned i = 0; i < 2; ++i) {
1502
src[2 + i] = bi_ld_var_special(
1503
b, bi_zero(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER,
1505
(i == 0) ? BI_VARYING_NAME_FRAG_Z : BI_VARYING_NAME_FRAG_W,
1509
bi_make_vec_to(b, bi_dest_index(&instr->dest), src, NULL, 4, 32);
1462
bi_ld_var_special_to(
1463
b, dst, bi_zero(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER,
1465
(channel == 2) ? BI_VARYING_NAME_FRAG_Z : BI_VARYING_NAME_FRAG_W,
1619
case nir_intrinsic_shared_atomic_add:
1620
case nir_intrinsic_shared_atomic_imin:
1621
case nir_intrinsic_shared_atomic_umin:
1622
case nir_intrinsic_shared_atomic_imax:
1623
case nir_intrinsic_shared_atomic_umax:
1624
case nir_intrinsic_shared_atomic_and:
1625
case nir_intrinsic_shared_atomic_or:
1626
case nir_intrinsic_shared_atomic_xor: {
1627
assert(nir_src_bit_size(instr->src[1]) == 32);
1629
bi_index addr = bi_src_index(&instr->src[0]);
1632
if (b->shader->arch >= 9) {
1633
bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL);
1634
addr = bi_collect_v2i32(b, addr, addr_hi);
1636
addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS);
1637
bi_emit_cached_split(b, addr, 64);
1640
bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]),
1642
bi_split_dest(b, instr->dest);
1646
case nir_intrinsic_image_atomic_add:
1647
case nir_intrinsic_image_atomic_imin:
1648
case nir_intrinsic_image_atomic_umin:
1649
case nir_intrinsic_image_atomic_imax:
1650
case nir_intrinsic_image_atomic_umax:
1651
case nir_intrinsic_image_atomic_and:
1652
case nir_intrinsic_image_atomic_or:
1653
case nir_intrinsic_image_atomic_xor:
1654
assert(nir_src_bit_size(instr->src[3]) == 32);
1656
bi_emit_atomic_i32_to(b, dst, bi_emit_lea_image(b, instr),
1657
bi_src_index(&instr->src[3]), instr->intrinsic);
1658
bi_split_dest(b, instr->dest);
1661
case nir_intrinsic_global_atomic_add:
1662
case nir_intrinsic_global_atomic_imin:
1663
case nir_intrinsic_global_atomic_umin:
1664
case nir_intrinsic_global_atomic_imax:
1665
case nir_intrinsic_global_atomic_umax:
1666
case nir_intrinsic_global_atomic_and:
1667
case nir_intrinsic_global_atomic_or:
1668
case nir_intrinsic_global_atomic_xor:
1669
assert(nir_src_bit_size(instr->src[1]) == 32);
1671
bi_emit_atomic_i32_to(b, dst, bi_src_index(&instr->src[0]),
1672
bi_src_index(&instr->src[1]), instr->intrinsic);
1674
bi_split_dest(b, instr->dest);
1576
case nir_intrinsic_shared_atomic: {
1577
nir_atomic_op op = nir_intrinsic_atomic_op(instr);
1579
if (op == nir_atomic_op_xchg) {
1580
bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1583
assert(nir_src_bit_size(instr->src[1]) == 32);
1585
bi_index addr = bi_src_index(&instr->src[0]);
1588
if (b->shader->arch >= 9) {
1589
bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL);
1590
addr = bi_collect_v2i32(b, addr, addr_hi);
1592
addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS);
1593
bi_emit_cached_split(b, addr, 64);
1596
bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]), op);
1599
bi_split_dest(b, instr->dest);
1603
case nir_intrinsic_global_atomic: {
1604
nir_atomic_op op = nir_intrinsic_atomic_op(instr);
1606
if (op == nir_atomic_op_xchg) {
1607
bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1610
assert(nir_src_bit_size(instr->src[1]) == 32);
1612
bi_emit_atomic_i32_to(b, dst, bi_src_index(&instr->src[0]),
1613
bi_src_index(&instr->src[1]), op);
1616
bi_split_dest(b, instr->dest);
1620
case nir_intrinsic_image_texel_address:
1621
bi_emit_lea_image_to(b, dst, instr);
1677
1624
case nir_intrinsic_image_load:
1682
1629
bi_emit_image_store(b, instr);
1685
case nir_intrinsic_global_atomic_exchange:
1686
bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1688
bi_split_dest(b, instr->dest);
1691
case nir_intrinsic_image_atomic_exchange:
1692
bi_emit_axchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3],
1694
bi_split_dest(b, instr->dest);
1697
case nir_intrinsic_shared_atomic_exchange:
1698
bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1700
bi_split_dest(b, instr->dest);
1703
case nir_intrinsic_global_atomic_comp_swap:
1632
case nir_intrinsic_global_atomic_swap:
1704
1633
bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1705
1634
&instr->src[2], BI_SEG_NONE);
1706
1635
bi_split_dest(b, instr->dest);
1709
case nir_intrinsic_image_atomic_comp_swap:
1710
bi_emit_acmpxchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3],
1711
&instr->src[4], BI_SEG_NONE);
1712
bi_split_dest(b, instr->dest);
1715
case nir_intrinsic_shared_atomic_comp_swap:
1638
case nir_intrinsic_shared_atomic_swap:
1716
1639
bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1717
1640
&instr->src[2], BI_SEG_WLS);
1718
1641
bi_split_dest(b, instr->dest);
1721
case nir_intrinsic_load_frag_coord:
1722
bi_emit_load_frag_coord(b, instr);
1644
case nir_intrinsic_load_pixel_coord:
1645
/* Vectorized load of the preloaded i16vec2 */
1646
bi_mov_i32_to(b, dst, bi_preload(b, 59));
1649
case nir_intrinsic_load_frag_coord_zw:
1650
bi_emit_load_frag_coord_zw(b, dst, nir_intrinsic_component(instr));
1725
1653
case nir_intrinsic_load_converted_output_pan:
4789
4715
NIR_PASS_V(nir, pan_nir_lower_store_component);
4792
NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes,
4793
nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_constant |
4794
nir_var_mem_task_payload | nir_var_shader_temp |
4795
nir_var_function_temp | nir_var_mem_global |
4797
mem_access_size_align_cb, NULL);
4718
nir_lower_mem_access_bit_sizes_options mem_size_options = {
4719
.modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_constant |
4720
nir_var_mem_task_payload | nir_var_shader_temp |
4721
nir_var_function_temp | nir_var_mem_global | nir_var_mem_shared,
4722
.callback = mem_access_size_align_cb,
4724
NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &mem_size_options);
4799
4726
NIR_PASS_V(nir, nir_lower_ssbo);
4800
4727
NIR_PASS_V(nir, pan_lower_sample_pos);
4801
4728
NIR_PASS_V(nir, nir_lower_bit_size, bi_lower_bit_size, NULL);
4802
4729
NIR_PASS_V(nir, nir_lower_64bit_phis);
4803
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
4804
4730
NIR_PASS_V(nir, pan_nir_lower_64bit_intrin);
4805
4731
NIR_PASS_V(nir, pan_lower_helper_invocation);
4806
4732
NIR_PASS_V(nir, nir_lower_int64);
4819
4745
.lower_index_to_offset = true,
4748
NIR_PASS_V(nir, nir_lower_image_atomics_to_global);
4822
4749
NIR_PASS_V(nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
4823
4750
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
4824
4751
NIR_PASS_V(nir, nir_lower_phis_to_scalar, true);
4825
4752
NIR_PASS_V(nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
4826
4753
NIR_PASS_V(nir, nir_lower_var_copies);
4827
4754
NIR_PASS_V(nir, nir_lower_alu);
4755
NIR_PASS_V(nir, nir_lower_frag_coord_to_pixel_coord);
4830
4758
static bi_context *
4884
4812
ctx->allocated_vec = _mesa_hash_table_u64_create(ctx);
4886
nir_foreach_function(func, nir) {
4890
nir_index_blocks(func->impl);
4814
nir_foreach_function_impl(impl, nir) {
4815
nir_index_blocks(impl);
4892
4817
ctx->indexed_nir_blocks =
4893
rzalloc_array(ctx, bi_block *, func->impl->num_blocks);
4895
ctx->ssa_alloc += func->impl->ssa_alloc;
4896
ctx->reg_alloc += func->impl->reg_alloc;
4898
emit_cf_list(ctx, &func->impl->body);
4818
rzalloc_array(ctx, bi_block *, impl->num_blocks);
4820
ctx->ssa_alloc += impl->ssa_alloc;
4821
ctx->reg_alloc += impl->reg_alloc;
4823
emit_cf_list(ctx, &impl->body);
4899
4824
bi_emit_phis_deferred(ctx);
4900
4825
break; /* TODO: Multi-function shaders */