2
2
* Copyright 2017 Advanced Micro Devices, Inc.
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* on the rights to use, copy, modify, merge, publish, distribute, sub
9
* license, and/or sell copies of the Software, and to permit persons to whom
10
* the Software is furnished to do so, subject to the following conditions:
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE.
4
* SPDX-License-Identifier: MIT
25
7
#include "nir_builder.h"
28
10
#include "ac_nir.h"
31
static bool si_alu_to_scalar_filter(const nir_instr *instr, const void *data)
13
bool si_alu_to_scalar_packed_math_filter(const nir_instr *instr, const void *data)
33
struct si_screen *sscreen = (struct si_screen *)data;
35
if (sscreen->info.has_packed_math_16bit && instr->type == nir_instr_type_alu) {
15
if (instr->type == nir_instr_type_alu) {
36
16
nir_alu_instr *alu = nir_instr_as_alu(instr);
38
18
if (alu->dest.dest.is_ssa &&
93
73
bool lower_phis_to_scalar = false;
95
75
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
96
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, si_alu_to_scalar_filter, sscreen);
76
NIR_PASS(progress, nir, nir_lower_alu_to_scalar,
77
nir->options->lower_to_scalar_filter, NULL);
97
78
NIR_PASS(progress, nir, nir_lower_phis_to_scalar, false);
111
92
NIR_PASS(progress, nir, nir_opt_dce);
112
93
/* nir_opt_if_optimize_phi_true_false is disabled on LLVM14 (#6976) */
113
94
NIR_PASS(lower_phis_to_scalar, nir, nir_opt_if,
114
nir_opt_if_aggressive_last_continue |
115
(LLVM_VERSION_MAJOR == 14 ? 0 : nir_opt_if_optimize_phi_true_false));
95
nir_opt_if_aggressive_last_continue |
96
nir_opt_if_optimize_phi_true_false);
116
97
NIR_PASS(progress, nir, nir_opt_dead_cf);
118
if (lower_alu_to_scalar)
119
NIR_PASS_V(nir, nir_lower_alu_to_scalar, si_alu_to_scalar_filter, sscreen);
99
if (lower_alu_to_scalar) {
100
NIR_PASS_V(nir, nir_lower_alu_to_scalar,
101
nir->options->lower_to_scalar_filter, NULL);
120
103
if (lower_phis_to_scalar)
121
104
NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
122
105
progress |= lower_alu_to_scalar | lower_phis_to_scalar;
306
290
NIR_PASS_V(nir, si_lower_intrinsics);
292
NIR_PASS_V(nir, ac_nir_lower_sin_cos);
308
294
NIR_PASS_V(nir, nir_lower_subgroups, &si_nir_subgroups_options);
310
NIR_PASS_V(nir, nir_lower_discard_or_demote,
311
(sscreen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) ||
312
nir->info.use_legacy_math_rules);
296
NIR_PASS_V(nir, nir_lower_discard_or_demote, true);
314
298
/* Lower load constants to scalar and then clean up the mess */
315
299
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
373
357
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
360
static bool si_mark_divergent_texture_non_uniform(struct nir_shader *nir)
362
assert(nir->info.divergence_analysis_run);
364
/* sampler_non_uniform and texture_non_uniform are always false in GLSL,
365
* but this can lead to unexpected behavior if texture/sampler index come from
366
* a vertex attribute.
368
* For instance, 2 consecutive draws using 2 different index values,
369
* could be squashed together by the hw - producing a single draw with
370
* non-dynamically uniform index.
372
* To avoid this, detect divergent indexing, mark them as non-uniform,
373
* so that we can apply waterfall loop on these index later (either llvm
374
* backend or nir_lower_non_uniform_access).
376
* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/2253
379
bool divergence_changed = false;
381
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
382
nir_foreach_block_safe(block, impl) {
383
nir_foreach_instr_safe(instr, block) {
384
if (instr->type != nir_instr_type_tex)
387
nir_tex_instr *tex = nir_instr_as_tex(instr);
388
for (int i = 0; i < tex->num_srcs; i++) {
389
bool divergent = tex->src[i].src.ssa->divergent;
391
switch (tex->src[i].src_type) {
392
case nir_tex_src_texture_deref:
393
case nir_tex_src_texture_handle:
394
tex->texture_non_uniform |= divergent;
396
case nir_tex_src_sampler_deref:
397
case nir_tex_src_sampler_handle:
398
tex->sampler_non_uniform |= divergent;
405
/* If dest is already divergent, divergence won't change. */
406
divergence_changed |= !tex->dest.ssa.divergent &&
407
(tex->texture_non_uniform || tex->sampler_non_uniform);
411
nir_metadata_preserve(impl, nir_metadata_all);
412
return divergence_changed;
376
415
char *si_finalize_nir(struct pipe_screen *screen, void *nirptr)
378
417
struct si_screen *sscreen = (struct si_screen *)screen;
379
418
struct nir_shader *nir = (struct nir_shader *)nirptr;
381
nir_lower_io_passes(nir);
420
nir_lower_io_passes(nir, false);
421
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in | nir_var_shader_out, NULL);
423
if (nir->info.stage == MESA_SHADER_FRAGMENT)
424
NIR_PASS_V(nir, nir_lower_color_inputs);
383
426
NIR_PASS_V(nir, ac_nir_lower_subdword_loads,
384
427
(ac_nir_lower_subdword_options) {
426
469
NIR_PASS_V(nir, nir_convert_to_lcssa, true, true); /* required by divergence analysis */
427
470
NIR_PASS_V(nir, nir_divergence_analysis); /* to find divergent loops */
472
/* Must be after divergence analysis. */
473
bool divergence_changed = false;
474
NIR_PASS(divergence_changed, nir, si_mark_divergent_texture_non_uniform);
475
/* Re-analysis whole shader if texture instruction divergence changed. */
476
if (divergence_changed)
477
NIR_PASS_V(nir, nir_divergence_analysis);