274
274
const unsigned num_inputs = util_bitcount64(nir->info.inputs_read);
276
nir_foreach_function(function, nir) {
281
nir_builder_init(&b, function->impl);
283
nir_foreach_block(block, function->impl) {
276
nir_foreach_function_impl(impl, nir) {
277
nir_builder b = nir_builder_create(impl);
279
nir_foreach_block(block, impl) {
284
280
nir_foreach_instr_safe(instr, block) {
285
281
if (instr->type != nir_instr_type_intrinsic)
336
332
load->num_components = 1;
337
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
333
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32);
338
334
nir_builder_instr_insert(&b, &load->instr);
340
336
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
389
385
nir_io_add_const_offset_to_base(nir, nir_var_shader_in);
391
nir_foreach_function(function, nir) {
395
nir_foreach_block(block, function->impl) {
387
nir_foreach_function_impl(impl, nir) {
388
nir_foreach_block(block, impl) {
396
389
nir_foreach_instr(instr, block) {
397
390
if (instr->type != nir_instr_type_intrinsic)
440
433
nir_io_add_const_offset_to_base(nir, nir_var_shader_in);
442
nir_foreach_function(function, nir) {
443
if (function->impl) {
445
nir_builder_init(&b, function->impl);
446
nir_foreach_block(block, function->impl) {
447
remap_patch_urb_offsets(block, &b, vue_map,
448
nir->info.tess._primitive_mode);
435
nir_foreach_function_impl(impl, nir) {
436
nir_builder b = nir_builder_create(impl);
437
nir_foreach_block(block, impl) {
438
remap_patch_urb_offsets(block, &b, vue_map,
439
nir->info.tess._primitive_mode);
504
494
assert(intrin->src[0].ssa);
505
495
nir_ssa_def *offset =
506
496
nir_imin(b, nir_imm_int(b, 7),
507
nir_f2i32(b, nir_fmul(b, nir_imm_float(b, 16),
508
intrin->src[0].ssa)));
497
nir_f2i32(b, nir_fmul_imm(b, intrin->src[0].ssa, 16)));
510
499
nir_instr_rewrite_src(instr, &intrin->src[0], nir_src_for_ssa(offset));
597
586
nir_io_add_const_offset_to_base(nir, nir_var_shader_out);
599
nir_foreach_function(function, nir) {
600
if (function->impl) {
602
nir_builder_init(&b, function->impl);
603
nir_foreach_block(block, function->impl) {
604
remap_patch_urb_offsets(block, &b, vue_map, tes_primitive_mode);
588
nir_foreach_function_impl(impl, nir) {
589
nir_builder b = nir_builder_create(impl);
590
nir_foreach_block(block, impl) {
591
remap_patch_urb_offsets(block, &b, vue_map, tes_primitive_mode);
631
brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
617
brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler)
635
620
unsigned lower_flrp =
636
621
(nir->options->lower_flrp16 ? 16 : 0) |
637
622
(nir->options->lower_flrp32 ? 32 : 0) |
638
623
(nir->options->lower_flrp64 ? 64 : 0);
624
const bool is_scalar = compiler->scalar_stage[nir->info.stage];
641
627
progress = false;
711
697
OPT(nir_opt_intrinsics);
712
698
OPT(nir_opt_idiv_const, 32);
713
699
OPT(nir_opt_algebraic);
701
/* BFI2 did not exist until Gfx7, so there's no point in trying to
702
* optimize an instruction that should not get generated.
704
if (compiler->devinfo->ver >= 7)
705
OPT(nir_opt_reassociate_bfi);
714
707
OPT(nir_lower_constant_convert_alu_types);
715
708
OPT(nir_opt_constant_folding);
915
908
* This is intended to be called more-or-less directly after you get the
916
909
* shader out of GLSL or some other source. While it is geared towards i965,
917
* it is not at all generator-specific except for the is_scalar flag. Even
918
* there, it is safe to call with is_scalar = false for a shader that is
919
* intended for the FS backend as long as nir_optimize is called again with
920
* is_scalar = true to scalarize everything prior to code gen.
910
* it is not at all generator-specific.
923
913
brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
951
941
* So when robust image access is enabled, just avoid the workaround.
953
if (devinfo->ver >= 12 && !opts->robust_image_access)
943
if (intel_needs_workaround(devinfo, 1806565034) && !opts->robust_image_access)
954
944
OPT(brw_nir_clamp_image_1d_2d_array_sizes);
956
946
const nir_lower_tex_options tex_options = {
971
961
.lower_invalid_implicit_lod = true,
974
OPT(nir_lower_tex, &tex_options);
964
/* In the case where TG4 coords are lowered to offsets and we have a
965
* lower_xehp_tg4_offset_filter lowering those offsets further, we need to
966
* rerun the pass because the instructions inserted by the first lowering
967
* are not visible during that first pass.
969
if (OPT(nir_lower_tex, &tex_options))
970
OPT(nir_lower_tex, &tex_options);
975
971
OPT(nir_normalize_cubemap_coords);
977
973
OPT(nir_lower_global_vars_to_local);
979
975
OPT(nir_split_var_copies);
980
976
OPT(nir_split_struct_vars, nir_var_function_temp);
982
brw_nir_optimize(nir, compiler, is_scalar);
978
brw_nir_optimize(nir, compiler);
984
980
OPT(nir_lower_doubles, opts->softfp64, nir->options->lower_doubles_options);
985
if (OPT(nir_lower_int64)) {
981
if (OPT(nir_lower_int64_float_conversions)) {
986
982
OPT(nir_opt_algebraic);
987
983
OPT(nir_lower_doubles, opts->softfp64,
988
984
nir->options->lower_doubles_options);
1052
1048
nir_var_mem_ubo | nir_var_mem_ssbo,
1053
1049
nir_lower_direct_array_deref_of_vec_load);
1051
/* Clamp load_per_vertex_input of the TCS stage so that we do not generate
1052
* loads reading out of bounds. We can do this here because we called
1053
* nir_lower_system_values above.
1055
if (nir->info.stage == MESA_SHADER_TESS_CTRL &&
1056
compiler->use_tcs_multi_patch)
1057
OPT(brw_nir_clamp_per_vertex_loads);
1055
1059
/* Get rid of split copies */
1056
brw_nir_optimize(nir, compiler, is_scalar);
1060
brw_nir_optimize(nir, compiler);
1097
1101
nir_metadata_block_index | nir_metadata_dominance, zero_inputs);
1100
/* Code for Wa_14015590813 may have created input/output variables beyond
1104
/* Code for Wa_18019110168 may have created input/output variables beyond
1101
1105
* VARYING_SLOT_MAX and removed uses of variables below VARYING_SLOT_MAX.
1102
1106
* Clean it up, so they all stay below VARYING_SLOT_MAX.
1137
/* The rest of this function should be hit only for Wa_14015590813. */
1141
/* The rest of this function should be hit only for Wa_18019110168. */
1139
1143
nir_foreach_shader_out_variable(var, mesh) {
1140
1144
gl_varying_slot location = var->data.location;
1215
1219
if (p_is_scalar && c_is_scalar) {
1216
1220
NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1217
1221
NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1218
brw_nir_optimize(producer, compiler, p_is_scalar);
1219
brw_nir_optimize(consumer, compiler, c_is_scalar);
1222
brw_nir_optimize(producer, compiler);
1223
brw_nir_optimize(consumer, compiler);
1222
1226
if (nir_link_opt_varyings(producer, consumer))
1223
brw_nir_optimize(consumer, compiler, c_is_scalar);
1227
brw_nir_optimize(consumer, compiler);
1225
1229
NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1226
1230
NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1249
1253
brw_nir_no_indirect_mask(compiler, consumer->info.stage),
1252
brw_nir_optimize(producer, compiler, p_is_scalar);
1253
brw_nir_optimize(consumer, compiler, c_is_scalar);
1256
brw_nir_optimize(producer, compiler);
1257
brw_nir_optimize(consumer, compiler);
1255
1259
if (producer->info.stage == MESA_SHADER_MESH &&
1256
1260
consumer->info.stage == MESA_SHADER_FRAGMENT) {
1286
1290
NIR_PASS(_, producer, nir_split_var_copies);
1287
1291
NIR_PASS(_, producer, nir_lower_var_copies);
1294
if (producer->info.stage == MESA_SHADER_TASK &&
1295
consumer->info.stage == MESA_SHADER_MESH &&
1296
!consumer->info.mesh.nv) {
1298
for (unsigned i = 0; i < 3; ++i)
1299
assert(producer->info.mesh.ts_mesh_dispatch_dimensions[i] <= UINT16_MAX);
1301
nir_lower_compute_system_values_options options = {
1302
.lower_workgroup_id_to_index = true,
1303
.num_workgroups[0] = producer->info.mesh.ts_mesh_dispatch_dimensions[0],
1304
.num_workgroups[1] = producer->info.mesh.ts_mesh_dispatch_dimensions[1],
1305
.num_workgroups[2] = producer->info.mesh.ts_mesh_dispatch_dimensions[2],
1306
/* nir_lower_idiv generates expensive code */
1307
.shortcut_1d_workgroup_id = compiler->devinfo->verx10 >= 125,
1310
NIR_PASS(_, consumer, nir_lower_compute_system_values, &options);
1306
1329
if (low->intrinsic == nir_intrinsic_load_global_const_block_intel ||
1330
low->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel ||
1307
1331
low->intrinsic == nir_intrinsic_load_ssbo_uniform_block_intel ||
1308
low->intrinsic == nir_intrinsic_load_shared_uniform_block_intel) {
1332
low->intrinsic == nir_intrinsic_load_shared_uniform_block_intel ||
1333
low->intrinsic == nir_intrinsic_load_global_constant_uniform_block_intel) {
1309
1334
if (num_components > 4) {
1310
1335
if (!util_is_power_of_two_nonzero(num_components))
1345
1370
/* Only combine pure memory barriers */
1346
if ((nir_intrinsic_execution_scope(a) != NIR_SCOPE_NONE) ||
1347
(nir_intrinsic_execution_scope(b) != NIR_SCOPE_NONE))
1371
if ((nir_intrinsic_execution_scope(a) != SCOPE_NONE) ||
1372
(nir_intrinsic_execution_scope(b) != SCOPE_NONE))
1350
1375
/* Translation to backend IR will get rid of modes we don't care about, so
1365
1390
static nir_mem_access_size_align
1366
1391
get_mem_access_size_align(nir_intrinsic_op intrin, uint8_t bytes,
1367
uint32_t align_mul, uint32_t align_offset,
1392
uint8_t bit_size, uint32_t align_mul, uint32_t align_offset,
1368
1393
bool offset_is_const, const void *cb_data)
1370
1395
const uint32_t align = nir_combined_align(align_mul, align_offset);
1445
1470
brw_vectorize_lower_mem_access(nir_shader *nir,
1446
1471
const struct brw_compiler *compiler,
1448
1472
bool robust_buffer_access)
1450
1474
bool progress = false;
1475
const bool is_scalar = compiler->scalar_stage[nir->info.stage];
1452
1477
if (is_scalar) {
1453
1478
nir_load_store_vectorize_options options = {
1494
OPT(nir_lower_mem_access_bit_sizes,
1496
nir_var_mem_constant |
1497
nir_var_mem_task_payload |
1498
nir_var_shader_temp |
1499
nir_var_function_temp |
1500
nir_var_mem_global |
1502
get_mem_access_size_align, NULL);
1519
nir_lower_mem_access_bit_sizes_options mem_access_options = {
1520
.modes = nir_var_mem_ssbo |
1521
nir_var_mem_constant |
1522
nir_var_mem_task_payload |
1523
nir_var_shader_temp |
1524
nir_var_function_temp |
1525
nir_var_mem_global |
1527
.callback = get_mem_access_size_align,
1529
OPT(nir_lower_mem_access_bit_sizes, &mem_access_options);
1504
1531
while (progress) {
1505
1532
progress = false;
1517
1544
nir_shader_has_local_variables(const nir_shader *nir)
1519
nir_foreach_function(func, nir) {
1520
if (func->impl && !exec_list_is_empty(&func->impl->locals))
1546
nir_foreach_function_impl(impl, nir) {
1547
if (!exec_list_is_empty(&impl->locals))
1535
1562
brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
1536
bool is_scalar, bool debug_enabled,
1537
1564
bool robust_buffer_access)
1539
1566
const struct intel_device_info *devinfo = compiler->devinfo;
1567
const bool is_scalar = compiler->scalar_stage[nir->info.stage];
1541
1569
UNUSED bool progress; /* Written by OPT */
1561
1589
if (gl_shader_stage_can_set_fragment_shading_rate(nir->info.stage))
1562
1590
NIR_PASS(_, nir, brw_nir_lower_shading_rate_output);
1564
brw_nir_optimize(nir, compiler, is_scalar);
1592
brw_nir_optimize(nir, compiler);
1566
1594
if (is_scalar && nir_shader_has_local_variables(nir)) {
1567
1595
OPT(nir_lower_vars_to_explicit_types, nir_var_function_temp,
1568
1596
glsl_get_natural_size_align_bytes);
1569
1597
OPT(nir_lower_explicit_io, nir_var_function_temp,
1570
1598
nir_address_format_32bit_offset);
1571
brw_nir_optimize(nir, compiler, is_scalar);
1599
brw_nir_optimize(nir, compiler);
1574
brw_vectorize_lower_mem_access(nir, compiler, is_scalar,
1575
robust_buffer_access);
1602
brw_vectorize_lower_mem_access(nir, compiler, robust_buffer_access);
1577
1604
if (OPT(nir_lower_int64))
1578
brw_nir_optimize(nir, compiler, is_scalar);
1605
brw_nir_optimize(nir, compiler);
1580
1607
if (devinfo->ver >= 6) {
1581
1608
/* Try and fuse multiply-adds, if successful, run shrink_vectors to
1673
1703
OPT(nir_lower_subgroups, &subgroups_options);
1675
1705
if (OPT(nir_lower_int64))
1676
brw_nir_optimize(nir, compiler, is_scalar);
1706
brw_nir_optimize(nir, compiler);
1679
1709
/* Clean up LCSSA phis */
1683
1713
OPT(nir_copy_prop);
1684
1714
OPT(nir_opt_dce);
1686
OPT(nir_lower_locals_to_regs);
1716
OPT(nir_lower_locals_to_regs, 32);
1688
1718
if (unlikely(debug_enabled)) {
1689
1719
/* Re-index SSA defs so we print more sensible numbers. */
1690
nir_foreach_function(function, nir) {
1692
nir_index_ssa_defs(function->impl);
1720
nir_foreach_function_impl(impl, nir) {
1721
nir_index_ssa_defs(impl);
1695
1724
fprintf(stderr, "NIR (SSA form) for %s shader:\n",
1700
1729
nir_validate_ssa_dominance(nir, "before nir_convert_from_ssa");
1702
OPT(nir_convert_from_ssa, true);
1731
OPT(nir_convert_from_ssa, true, false);
1704
1733
if (!is_scalar) {
1705
1734
OPT(nir_move_vec_src_uses_to_dest);
1820
1849
brw_nir_apply_key(nir_shader *nir,
1821
1850
const struct brw_compiler *compiler,
1822
1851
const struct brw_base_prog_key *key,
1823
unsigned max_subgroup_size,
1852
unsigned max_subgroup_size)
1826
1854
bool progress = false;
1839
1867
OPT(brw_nir_limit_trig_input_range_workaround);
1842
brw_nir_optimize(nir, compiler, is_scalar);
1870
brw_nir_optimize(nir, compiler);
1845
1873
enum brw_conditional_mod
1894
1922
enum lsc_opcode
1895
1923
lsc_aop_for_nir_intrinsic(const nir_intrinsic_instr *atomic)
1897
switch (atomic->intrinsic) {
1898
#define AOP_CASE(atom) \
1899
case nir_intrinsic_image_atomic_##atom: \
1900
case nir_intrinsic_bindless_image_atomic_##atom: \
1901
case nir_intrinsic_ssbo_atomic_##atom: \
1902
case nir_intrinsic_shared_atomic_##atom: \
1903
case nir_intrinsic_global_atomic_##atom
1925
switch (nir_intrinsic_atomic_op(atomic)) {
1926
case nir_atomic_op_iadd: {
1906
1927
unsigned src_idx;
1907
1928
switch (atomic->intrinsic) {
1908
case nir_intrinsic_image_atomic_add:
1909
case nir_intrinsic_bindless_image_atomic_add:
1929
case nir_intrinsic_image_atomic:
1930
case nir_intrinsic_bindless_image_atomic:
1912
case nir_intrinsic_ssbo_atomic_add:
1933
case nir_intrinsic_ssbo_atomic:
1915
case nir_intrinsic_shared_atomic_add:
1916
case nir_intrinsic_global_atomic_add:
1936
case nir_intrinsic_shared_atomic:
1937
case nir_intrinsic_global_atomic:
1930
1951
return LSC_OP_ATOMIC_ADD;
1933
AOP_CASE(imin): return LSC_OP_ATOMIC_MIN;
1934
AOP_CASE(umin): return LSC_OP_ATOMIC_UMIN;
1935
AOP_CASE(imax): return LSC_OP_ATOMIC_MAX;
1936
AOP_CASE(umax): return LSC_OP_ATOMIC_UMAX;
1937
AOP_CASE(and): return LSC_OP_ATOMIC_AND;
1938
AOP_CASE(or): return LSC_OP_ATOMIC_OR;
1939
AOP_CASE(xor): return LSC_OP_ATOMIC_XOR;
1940
AOP_CASE(exchange): return LSC_OP_ATOMIC_STORE;
1941
AOP_CASE(comp_swap): return LSC_OP_ATOMIC_CMPXCHG;
1944
#define AOP_CASE(atom) \
1945
case nir_intrinsic_ssbo_atomic_##atom: \
1946
case nir_intrinsic_shared_atomic_##atom: \
1947
case nir_intrinsic_global_atomic_##atom
1949
AOP_CASE(fmin): return LSC_OP_ATOMIC_FMIN;
1950
AOP_CASE(fmax): return LSC_OP_ATOMIC_FMAX;
1951
AOP_CASE(fcomp_swap): return LSC_OP_ATOMIC_FCMPXCHG;
1952
AOP_CASE(fadd): return LSC_OP_ATOMIC_FADD;
1954
case nir_atomic_op_imin: return LSC_OP_ATOMIC_MIN;
1955
case nir_atomic_op_umin: return LSC_OP_ATOMIC_UMIN;
1956
case nir_atomic_op_imax: return LSC_OP_ATOMIC_MAX;
1957
case nir_atomic_op_umax: return LSC_OP_ATOMIC_UMAX;
1958
case nir_atomic_op_iand: return LSC_OP_ATOMIC_AND;
1959
case nir_atomic_op_ior: return LSC_OP_ATOMIC_OR;
1960
case nir_atomic_op_ixor: return LSC_OP_ATOMIC_XOR;
1961
case nir_atomic_op_xchg: return LSC_OP_ATOMIC_STORE;
1962
case nir_atomic_op_cmpxchg: return LSC_OP_ATOMIC_CMPXCHG;
1964
case nir_atomic_op_fmin: return LSC_OP_ATOMIC_FMIN;
1965
case nir_atomic_op_fmax: return LSC_OP_ATOMIC_FMAX;
1966
case nir_atomic_op_fcmpxchg: return LSC_OP_ATOMIC_FCMPXCHG;
1967
case nir_atomic_op_fadd: return LSC_OP_ATOMIC_FADD;
1957
1970
unreachable("Unsupported NIR atomic intrinsic");
2001
2014
brw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler,
2002
2015
const struct brw_tcs_prog_key *key)
2017
assert(key->input_vertices > 0);
2004
2019
const nir_shader_compiler_options *options =
2005
2020
compiler->nir_options[MESA_SHADER_TESS_CTRL];
2093
const struct glsl_type *
2094
brw_nir_get_var_type(const struct nir_shader *nir, nir_variable *var)
2096
const struct glsl_type *type = var->interface_type;
2099
if (nir_is_arrayed_io(var, nir->info.stage) || var->data.per_view) {
2100
assert(glsl_type_is_array(type));
2101
type = glsl_get_array_element(type);
2109
brw_nir_pulls_at_sample(nir_shader *shader)
2111
nir_foreach_function_impl(impl, shader) {
2112
nir_foreach_block(block, impl) {
2113
nir_foreach_instr(instr, block) {
2114
if (instr->type != nir_instr_type_intrinsic)
2117
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2119
if (intrin->intrinsic == nir_intrinsic_load_barycentric_at_sample)