2
* Copyright © 2022 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25
#include "nir_builder.h"
27
/* This pass updates the block index in the resource_intel intrinsics if the
28
* array index is constant.
30
* This pass must be run before anv_nir_compute_push_layout().
33
update_resource_intel_block(nir_builder *b, nir_instr *instr, UNUSED void *data)
35
if (instr->type != nir_instr_type_intrinsic)
38
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
39
if (intrin->intrinsic != nir_intrinsic_resource_intel)
42
/* If the array index in the descriptor binding is not const, we won't be
43
* able to turn this load_ubo into a push constant.
45
* Also if not pushable, set the block to 0xffffffff.
47
* Otherwise we need to update the block index by adding the array index so
48
* that when anv_nir_compute_push_layout() uses the block value it uses the
49
* right surface in the array of the binding.
51
if (!nir_src_is_const(intrin->src[2]) ||
52
!(nir_intrinsic_resource_access_intel(intrin) &
53
nir_resource_intel_pushable)) {
54
nir_intrinsic_set_resource_block_intel(intrin, 0xffffffff);
55
nir_intrinsic_set_resource_access_intel(
57
nir_intrinsic_resource_access_intel(intrin) &
58
~nir_resource_intel_pushable);
60
nir_intrinsic_set_resource_block_intel(
62
nir_intrinsic_resource_block_intel(intrin) +
63
nir_src_as_uint(intrin->src[2]));
70
anv_nir_update_resource_intel_block(nir_shader *shader)
72
return nir_shader_instructions_pass(shader, update_resource_intel_block,
78
intrinsic_dont_need_rewrite(nir_intrinsic_instr *instr)
80
switch (instr->intrinsic) {
81
case nir_intrinsic_load_ubo:
82
case nir_intrinsic_load_ssbo:
83
case nir_intrinsic_store_ssbo:
85
case nir_intrinsic_image_load:
86
case nir_intrinsic_image_store:
87
case nir_intrinsic_image_atomic:
88
case nir_intrinsic_image_atomic_swap:
89
case nir_intrinsic_image_size:
90
case nir_intrinsic_image_load_raw_intel:
91
case nir_intrinsic_image_store_raw_intel:
92
case nir_intrinsic_image_samples:
93
case nir_intrinsic_bindless_image_load:
94
case nir_intrinsic_bindless_image_store:
95
case nir_intrinsic_bindless_image_atomic:
96
case nir_intrinsic_bindless_image_atomic_swap:
97
case nir_intrinsic_bindless_image_size:
105
struct lower_resource_state {
106
enum anv_descriptor_set_layout_type desc_type;
107
const struct anv_physical_device *device;
110
/* This pass lower resource_intel surface_index source, combining the
111
* descriptor set offset with the surface offset in the descriptor set.
113
* This pass must be run after anv_nir_compute_push_layout() because we want
114
* the push constant selection to tell if the surface offset is constant. Once
115
* combined the constant detection does not work anymore.
118
lower_resource_intel(nir_builder *b, nir_instr *instr, void *data)
120
if (instr->type != nir_instr_type_intrinsic)
123
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
124
if (intrin->intrinsic != nir_intrinsic_resource_intel)
127
const bool is_bindless =
128
(nir_intrinsic_resource_access_intel(intrin) &
129
nir_resource_intel_bindless) != 0;
130
const bool is_sampler =
131
(nir_intrinsic_resource_access_intel(intrin) &
132
nir_resource_intel_sampler) != 0;
133
const struct lower_resource_state *state = data;
138
b->cursor = nir_before_instr(instr);
140
nir_ssa_def *set_offset = intrin->src[0].ssa;
141
nir_ssa_def *binding_offset = intrin->src[1].ssa;
143
/* When using indirect descriptor, the surface handles are loaded from the
144
* descriptor buffer and do not need any offset.
146
if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) {
147
if (!state->device->uses_ex_bso) {
148
/* We're trying to reduce the number of instructions in the shaders
149
* to compute surface handles. The assumption is that we're using
150
* more surface handles than sampler handles (UBO, SSBO, images,
151
* etc...) so it's worth optimizing that case.
153
* Surface handles in the extended descriptor message have to be
154
* shifted left by 6 prior to ex_bso (bits 31:12 in extended
155
* descriptor, match bits 25:6 of the surface handle). We have to
156
* combine 2 parts in the shader to build the final surface handle,
157
* base offset of the descriptor set (in the push constant, located
158
* in resource_intel::src[0]) and the relative descriptor offset
159
* (resource_intel::src[1]).
161
* For convenience, up to here, resource_intel::src[1] is in bytes.
162
* We now have to shift it left by 6 to match the shifted left by 6
163
* done for the push constant value provided in
164
* resource_intel::src[0]. That way the shader can just do a single
165
* ADD and get the surface handle.
167
* Samplers have a 4Gb heap and in the message they're in bits 31:6
168
* of the component 3 of the sampler message header. But since we
169
* push only a single offset for the base offset of the descriptor
170
* set, resource_intel::src[0] has to be shifted right by 6 (bringing
174
set_offset = nir_ushr_imm(b, set_offset, 6);
176
binding_offset = nir_ishl_imm(b, binding_offset, 6);
179
nir_instr_rewrite_src_ssa(instr, &intrin->src[1],
180
nir_iadd(b, set_offset, binding_offset));
183
/* Now unused values : set offset, array index */
184
nir_instr_rewrite_src_ssa(instr, &intrin->src[0], nir_imm_int(b, 0xdeaddeed));
185
nir_instr_rewrite_src_ssa(instr, &intrin->src[2], nir_imm_int(b, 0xdeaddeed));
191
anv_nir_lower_resource_intel(nir_shader *shader,
192
const struct anv_physical_device *device,
193
enum anv_descriptor_set_layout_type desc_type)
195
struct lower_resource_state state = {
196
.desc_type = desc_type,
199
return nir_shader_instructions_pass(shader, lower_resource_intel,
200
nir_metadata_block_index |
201
nir_metadata_dominance,