2
2
* Copyright 2023 Advanced Micro Devices, Inc.
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4
* SPDX-License-Identifier: MIT
43
34
#define DUAL_SRC_BLEND_SLOT FRAG_RESULT_MAX
37
create_interp_param(nir_builder *b, lower_ps_state *s)
39
if (s->options->force_persp_sample_interp) {
41
nir_local_variable_create(b->impl, glsl_vec_type(2), "persp_center");
44
if (s->options->bc_optimize_for_persp ||
45
s->options->force_persp_sample_interp ||
46
s->options->force_persp_center_interp) {
48
nir_local_variable_create(b->impl, glsl_vec_type(2), "persp_centroid");
51
if (s->options->force_persp_center_interp) {
53
nir_local_variable_create(b->impl, glsl_vec_type(2), "persp_sample");
56
if (s->options->force_linear_sample_interp) {
58
nir_local_variable_create(b->impl, glsl_vec_type(2), "linear_center");
61
if (s->options->bc_optimize_for_linear ||
62
s->options->force_linear_sample_interp ||
63
s->options->force_linear_center_interp) {
65
nir_local_variable_create(b->impl, glsl_vec_type(2), "linear_centroid");
68
if (s->options->force_linear_center_interp) {
70
nir_local_variable_create(b->impl, glsl_vec_type(2), "linear_sample");
73
s->lower_load_barycentric =
74
s->persp_center || s->persp_centroid || s->persp_sample ||
75
s->linear_center || s->linear_centroid || s->linear_sample;
79
init_interp_param(nir_builder *b, lower_ps_state *s)
81
b->cursor = nir_before_cf_list(&b->impl->body);
83
/* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
84
* The hw doesn't compute CENTROID if the whole wave only
85
* contains fully-covered quads.
87
if (s->options->bc_optimize_for_persp || s->options->bc_optimize_for_linear) {
88
nir_ssa_def *bc_optimize = nir_load_barycentric_optimize_amd(b);
90
if (s->options->bc_optimize_for_persp) {
92
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
93
nir_ssa_def *centroid =
94
nir_load_barycentric_centroid(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
96
nir_ssa_def *value = nir_bcsel(b, bc_optimize, center, centroid);
97
nir_store_var(b, s->persp_centroid, value, 0x3);
100
if (s->options->bc_optimize_for_linear) {
101
nir_ssa_def *center =
102
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
103
nir_ssa_def *centroid =
104
nir_load_barycentric_centroid(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
106
nir_ssa_def *value = nir_bcsel(b, bc_optimize, center, centroid);
107
nir_store_var(b, s->linear_centroid, value, 0x3);
111
if (s->options->force_persp_sample_interp) {
112
nir_ssa_def *sample =
113
nir_load_barycentric_sample(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
114
nir_store_var(b, s->persp_center, sample, 0x3);
115
nir_store_var(b, s->persp_centroid, sample, 0x3);
118
if (s->options->force_linear_sample_interp) {
119
nir_ssa_def *sample =
120
nir_load_barycentric_sample(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
121
nir_store_var(b, s->linear_center, sample, 0x3);
122
nir_store_var(b, s->linear_centroid, sample, 0x3);
125
if (s->options->force_persp_center_interp) {
126
nir_ssa_def *center =
127
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
128
nir_store_var(b, s->persp_sample, center, 0x3);
129
nir_store_var(b, s->persp_centroid, center, 0x3);
132
if (s->options->force_linear_center_interp) {
133
nir_ssa_def *center =
134
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
135
nir_store_var(b, s->linear_sample, center, 0x3);
136
nir_store_var(b, s->linear_centroid, center, 0x3);
141
lower_ps_load_barycentric(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_state *s)
143
enum glsl_interp_mode mode = nir_intrinsic_interp_mode(intrin);
144
nir_variable *var = NULL;
147
case INTERP_MODE_NONE:
148
case INTERP_MODE_SMOOTH:
149
switch (intrin->intrinsic) {
150
case nir_intrinsic_load_barycentric_pixel:
151
var = s->persp_center;
153
case nir_intrinsic_load_barycentric_centroid:
154
var = s->persp_centroid;
156
case nir_intrinsic_load_barycentric_sample:
157
var = s->persp_sample;
164
case INTERP_MODE_NOPERSPECTIVE:
165
switch (intrin->intrinsic) {
166
case nir_intrinsic_load_barycentric_pixel:
167
var = s->linear_center;
169
case nir_intrinsic_load_barycentric_centroid:
170
var = s->linear_centroid;
172
case nir_intrinsic_load_barycentric_sample:
173
var = s->linear_sample;
187
b->cursor = nir_before_instr(&intrin->instr);
189
nir_ssa_def *replacement = nir_load_var(b, var);
190
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
192
nir_instr_remove(&intrin->instr);
46
197
gather_ps_store_output(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_state *s)
67
218
s->output_types[slot] = type;
220
/* Keep color output instruction if not exported in nir. */
221
if (!s->options->no_color_export ||
222
(slot < FRAG_RESULT_DATA0 && slot != FRAG_RESULT_COLOR)) {
223
nir_instr_remove(&intrin->instr);
230
lower_ps_load_sample_mask_in(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_state *s)
232
/* Section 15.2.2 (Shader Inputs) of the OpenGL 4.5 (Core Profile) spec
235
* "When per-sample shading is active due to the use of a fragment
236
* input qualified by sample or due to the use of the gl_SampleID
237
* or gl_SamplePosition variables, only the bit for the current
238
* sample is set in gl_SampleMaskIn. When state specifies multiple
239
* fragment shader invocations for a given fragment, the sample
240
* mask for any single fragment shader invocation may specify a
241
* subset of the covered samples for the fragment. In this case,
242
* the bit corresponding to each covered sample will be set in
243
* exactly one fragment shader invocation."
245
* The samplemask loaded by hardware is always the coverage of the
246
* entire pixel/fragment, so mask bits out based on the sample ID.
249
b->cursor = nir_before_instr(&intrin->instr);
251
uint32_t ps_iter_mask = ac_get_ps_iter_mask(s->options->ps_iter_samples);
252
nir_ssa_def *sampleid = nir_load_sample_id(b);
253
nir_ssa_def *submask = nir_ishl(b, nir_imm_int(b, ps_iter_mask), sampleid);
255
nir_ssa_def *sample_mask = nir_load_sample_mask_in(b);
256
nir_ssa_def *replacement = nir_iand(b, sample_mask, submask);
258
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
69
260
nir_instr_remove(&intrin->instr);
497
716
emit_ps_null_export(nir_builder *b, lower_ps_state *s)
718
const bool pops = b->shader->info.fs.sample_interlock_ordered ||
719
b->shader->info.fs.sample_interlock_unordered ||
720
b->shader->info.fs.pixel_interlock_ordered ||
721
b->shader->info.fs.pixel_interlock_unordered;
499
723
/* Gfx10+ doesn't need to export anything if we don't need to export the EXEC mask
725
* In Primitive Ordered Pixel Shading, however, GFX11+ explicitly uses the `done` export to exit
726
* the ordered section, and before GFX11, shaders with POPS also need an export.
502
if (s->options->gfx_level >= GFX10 && !s->options->uses_discard)
728
if (s->options->gfx_level >= GFX10 && !s->options->uses_discard && !pops)
731
/* The `done` export exits the POPS ordered section on GFX11+, make sure UniformMemory and
732
* ImageMemory (in SPIR-V terms) accesses from the ordered section may not be reordered below it.
734
if (s->options->gfx_level >= GFX11 && pops)
735
nir_scoped_memory_barrier(b, SCOPE_QUEUE_FAMILY, NIR_MEMORY_RELEASE,
736
nir_var_image | nir_var_mem_ubo | nir_var_mem_ssbo |
505
739
/* Gfx11 doesn't support null exports, and mrt0 should be exported instead. */
506
740
unsigned target = s->options->gfx_level >= GFX11 ?
507
741
V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL;
584
816
if (s->exp_num) {
585
if (s->options->dual_src_blend_swizzle)
817
if (s->options->dual_src_blend_swizzle) {
586
818
emit_ps_dual_src_blend_swizzle(b, s, first_color_export);
819
/* Skip last export flag setting because they have been replaced by
820
* a pseudo instruction.
822
if (s->options->use_aco)
588
826
/* Specify that this is the last export */
589
827
nir_intrinsic_instr *final_exp = s->exp[s->exp_num - 1];
590
828
unsigned final_exp_flags = nir_intrinsic_flags(final_exp);
591
829
final_exp_flags |= AC_EXP_FLAG_DONE | AC_EXP_FLAG_VALID_MASK;
592
830
nir_intrinsic_set_flags(final_exp, final_exp_flags);
832
/* The `done` export exits the POPS ordered section on GFX11+, make sure UniformMemory and
833
* ImageMemory (in SPIR-V terms) accesses from the ordered section may not be reordered below
836
if (s->options->gfx_level >= GFX11 &&
837
(b->shader->info.fs.sample_interlock_ordered ||
838
b->shader->info.fs.sample_interlock_unordered ||
839
b->shader->info.fs.pixel_interlock_ordered ||
840
b->shader->info.fs.pixel_interlock_unordered)) {
841
b->cursor = nir_before_instr(&final_exp->instr);
842
nir_scoped_memory_barrier(b, SCOPE_QUEUE_FAMILY, NIR_MEMORY_RELEASE,
843
nir_var_image | nir_var_mem_ubo | nir_var_mem_ssbo |
594
847
emit_ps_null_export(b, s);
599
852
ac_nir_lower_ps(nir_shader *nir, const ac_nir_lower_ps_options *options)
854
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
856
nir_builder builder = nir_builder_create(impl);
857
nir_builder *b = &builder;
601
859
lower_ps_state state = {
602
860
.options = options,
863
create_interp_param(b, &state);
605
865
nir_shader_instructions_pass(nir, lower_ps_intrinsic,
606
866
nir_metadata_block_index | nir_metadata_dominance,
609
export_ps_outputs(nir, &state);
869
/* Must be after lower_ps_intrinsic() to prevent it lower added intrinsic here. */
870
init_interp_param(b, &state);
872
export_ps_outputs(b, &state);
874
/* Cleanup nir variable, as RADV won't do this. */
875
if (state.lower_load_barycentric)
876
nir_lower_vars_to_ssa(nir);