2
* Copyright 2018 Collabora Ltd.
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* on the rights to use, copy, modify, merge, publish, distribute, sub
8
* license, and/or sell copies of the Software, and to permit persons to whom
9
* the Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
* USE OR OTHER DEALINGS IN THE SOFTWARE.
24
#include "zink_context.h"
25
#include "zink_compiler.h"
26
#include "zink_program.h"
27
#include "zink_screen.h"
28
#include "nir_to_spirv/nir_to_spirv.h"
30
#include "pipe/p_state.h"
33
#include "compiler/nir/nir_builder.h"
35
#include "nir/tgsi_to_nir.h"
36
#include "tgsi/tgsi_dump.h"
37
#include "tgsi/tgsi_from_mesa.h"
39
#include "util/u_memory.h"
42
zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
45
create_vs_pushconst(nir_shader *nir)
47
nir_variable *vs_pushconst;
48
/* create compatible layout for the ntv push constant loader */
49
struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2);
50
fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
51
fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed");
52
fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
53
fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0);
54
fields[1].name = ralloc_asprintf(nir, "draw_id");
55
fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id);
56
vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
57
glsl_struct_type(fields, 2, "struct", false), "vs_pushconst");
58
vs_pushconst->data.location = INT_MAX; //doesn't really matter
62
create_cs_pushconst(nir_shader *nir)
64
nir_variable *cs_pushconst;
65
/* create compatible layout for the ntv push constant loader */
66
struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field));
67
fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
68
fields[0].name = ralloc_asprintf(nir, "work_dim");
70
cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
71
glsl_struct_type(fields, 1, "struct", false), "cs_pushconst");
72
cs_pushconst->data.location = INT_MAX; //doesn't really matter
76
reads_work_dim(nir_shader *shader)
78
return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM);
82
lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data)
84
if (in->type != nir_instr_type_intrinsic)
86
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
87
if (instr->intrinsic != nir_intrinsic_load_work_dim)
90
if (instr->intrinsic == nir_intrinsic_load_work_dim) {
91
b->cursor = nir_after_instr(&instr->instr);
92
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
93
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
94
nir_intrinsic_set_range(load, 3 * sizeof(uint32_t));
95
load->num_components = 1;
96
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim");
97
nir_builder_instr_insert(b, &load->instr);
99
nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
106
lower_work_dim(nir_shader *shader)
108
if (shader->info.stage != MESA_SHADER_KERNEL)
111
if (!reads_work_dim(shader))
114
return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL);
118
lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
120
if (instr->type != nir_instr_type_intrinsic)
122
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
123
if (intr->intrinsic != nir_intrinsic_load_deref)
125
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
126
if (var->data.mode != nir_var_shader_in)
128
if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
131
/* create second variable for the split */
132
nir_variable *var2 = nir_variable_clone(var, b->shader);
133
/* split new variable into second slot */
134
var2->data.driver_location++;
135
nir_shader_add_variable(b->shader, var2);
137
unsigned total_num_components = glsl_get_vector_elements(var->type);
138
/* new variable is the second half of the dvec */
139
var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
140
/* clamp original variable to a dvec2 */
141
var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
143
b->cursor = nir_after_instr(instr);
145
/* this is the first load instruction for the first half of the dvec3/4 components */
146
nir_ssa_def *load = nir_load_var(b, var);
147
/* this is the second load instruction for the second half of the dvec3/4 components */
148
nir_ssa_def *load2 = nir_load_var(b, var2);
151
/* create a new dvec3/4 comprised of all the loaded components from both variables */
152
def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
153
def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
154
def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
155
if (total_num_components == 4)
156
def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
157
nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
158
/* use the assembled dvec3/4 for all other uses of the load */
159
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
160
new_vec->parent_instr);
162
/* remove the original instr and its deref chain */
163
nir_instr *parent = intr->src[0].ssa->parent_instr;
164
nir_instr_remove(instr);
165
nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
170
/* mesa/gallium always provides UINT versions of 64bit formats:
171
* - rewrite loads as 32bit vec loads
172
* - cast back to 64bit
175
lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
177
if (instr->type != nir_instr_type_intrinsic)
179
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
180
if (intr->intrinsic != nir_intrinsic_load_deref)
182
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
183
if (var->data.mode != nir_var_shader_in)
185
if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
188
unsigned num_components = glsl_get_vector_elements(var->type);
189
enum glsl_base_type base_type;
190
switch (glsl_get_base_type(var->type)) {
191
case GLSL_TYPE_UINT64:
192
base_type = GLSL_TYPE_UINT;
194
case GLSL_TYPE_INT64:
195
base_type = GLSL_TYPE_INT;
197
case GLSL_TYPE_DOUBLE:
198
base_type = GLSL_TYPE_FLOAT;
201
unreachable("unknown 64-bit vertex attribute format!");
203
var->type = glsl_vector_type(base_type, num_components * 2);
205
b->cursor = nir_after_instr(instr);
207
nir_ssa_def *load = nir_load_var(b, var);
208
nir_ssa_def *casted[2];
209
for (unsigned i = 0; i < num_components; i++)
210
casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
211
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
213
/* remove the original instr and its deref chain */
214
nir_instr *parent = intr->src[0].ssa->parent_instr;
215
nir_instr_remove(instr);
216
nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
221
/* "64-bit three- and four-component vectors consume two consecutive locations."
222
* - 14.1.4. Location Assignment
224
* this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
225
* are assigned to consecutive locations, loaded separately, and then assembled back into a
226
* composite value that's used in place of the original loaded ssa src
229
lower_64bit_vertex_attribs(nir_shader *shader)
231
if (shader->info.stage != MESA_SHADER_VERTEX)
234
bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
235
progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
240
lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
242
if (in->type != nir_instr_type_intrinsic)
244
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
245
if (instr->intrinsic != nir_intrinsic_load_base_vertex)
248
b->cursor = nir_after_instr(&instr->instr);
249
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
250
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
251
nir_intrinsic_set_range(load, 4);
252
load->num_components = 1;
253
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
254
nir_builder_instr_insert(b, &load->instr);
256
nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
257
nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
262
nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
263
composite->parent_instr);
268
lower_basevertex(nir_shader *shader)
270
if (shader->info.stage != MESA_SHADER_VERTEX)
273
if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
276
return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
281
lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
283
if (in->type != nir_instr_type_intrinsic)
285
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
286
if (instr->intrinsic != nir_intrinsic_load_draw_id)
289
b->cursor = nir_before_instr(&instr->instr);
290
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
291
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1));
292
nir_intrinsic_set_range(load, 4);
293
load->num_components = 1;
294
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
295
nir_builder_instr_insert(b, &load->instr);
297
nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
303
lower_drawid(nir_shader *shader)
305
if (shader->info.stage != MESA_SHADER_VERTEX)
308
if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
311
return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
315
lower_dual_blend(nir_shader *shader)
317
bool progress = false;
318
nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
320
var->data.location = FRAG_RESULT_DATA0;
324
nir_shader_preserve_all_metadata(shader);
329
zink_screen_init_compiler(struct zink_screen *screen)
331
static const struct nir_shader_compiler_options
333
.lower_ffma16 = true,
334
.lower_ffma32 = true,
335
.lower_ffma64 = true,
338
.lower_flrp32 = true,
341
.lower_extract_byte = true,
342
.lower_extract_word = true,
343
.lower_insert_byte = true,
344
.lower_insert_word = true,
345
.lower_mul_high = true,
346
.lower_rotate = true,
347
.lower_uadd_carry = true,
348
.lower_pack_64_2x32_split = true,
349
.lower_unpack_64_2x32_split = true,
350
.lower_pack_32_2x16_split = true,
351
.lower_unpack_32_2x16_split = true,
352
.lower_vector_cmp = true,
353
.lower_int64_options = 0,
354
.lower_doubles_options = 0,
355
.lower_uniforms_to_ubo = true,
359
.lower_mul_2x32_64 = true,
360
.support_16bit_alu = true, /* not quite what it sounds like */
363
screen->nir_options = default_options;
365
if (!screen->info.feats.features.shaderInt64)
366
screen->nir_options.lower_int64_options = ~0;
368
if (!screen->info.feats.features.shaderFloat64) {
369
screen->nir_options.lower_doubles_options = ~0;
370
screen->nir_options.lower_flrp64 = true;
371
screen->nir_options.lower_ffma64 = true;
375
The OpFRem and OpFMod instructions use cheap approximations of remainder,
376
and the error can be large due to the discontinuity in trunc() and floor().
377
This can produce mathematically unexpected results in some cases, such as
378
FMod(x,x) computing x rather than 0, and can also cause the result to have
379
a different sign than the infinitely precise result.
381
-Table 84. Precision of core SPIR-V Instructions
382
* for drivers that are known to have imprecise fmod for doubles, lower dmod
384
if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
385
screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
386
screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
387
screen->nir_options.lower_doubles_options = nir_lower_dmod;
391
zink_get_compiler_options(struct pipe_screen *pscreen,
392
enum pipe_shader_ir ir,
393
enum pipe_shader_type shader)
395
assert(ir == PIPE_SHADER_IR_NIR);
396
return &zink_screen(pscreen)->nir_options;
400
zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
402
if (zink_debug & ZINK_DEBUG_TGSI) {
403
fprintf(stderr, "TGSI shader:\n---8<---\n");
404
tgsi_dump_to_file(tokens, 0, stderr);
405
fprintf(stderr, "---8<---\n\n");
408
return tgsi_to_nir(tokens, screen, false);
412
optimize_nir(struct nir_shader *s)
417
NIR_PASS_V(s, nir_lower_vars_to_ssa);
418
NIR_PASS(progress, s, nir_copy_prop);
419
NIR_PASS(progress, s, nir_opt_remove_phis);
420
NIR_PASS(progress, s, nir_opt_dce);
421
NIR_PASS(progress, s, nir_opt_dead_cf);
422
NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
423
NIR_PASS(progress, s, nir_opt_cse);
424
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
425
NIR_PASS(progress, s, nir_opt_algebraic);
426
NIR_PASS(progress, s, nir_opt_constant_folding);
427
NIR_PASS(progress, s, nir_opt_undef);
428
NIR_PASS(progress, s, zink_nir_lower_b2b);
433
NIR_PASS(progress, s, nir_opt_algebraic_late);
435
NIR_PASS_V(s, nir_copy_prop);
436
NIR_PASS_V(s, nir_opt_dce);
437
NIR_PASS_V(s, nir_opt_cse);
442
/* - copy the lowered fbfetch variable
443
* - set the new one up as an input attachment for descriptor 0.6
444
* - load it as an image
445
* - overwrite the previous load
448
lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
450
bool ms = data != NULL;
451
if (instr->type != nir_instr_type_intrinsic)
453
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
454
if (intr->intrinsic != nir_intrinsic_load_deref)
456
nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
457
if (!var->data.fb_fetch_output)
459
b->cursor = nir_after_instr(instr);
460
nir_variable *fbfetch = nir_variable_clone(var, b->shader);
461
/* If Dim is SubpassData, ... Image Format must be Unknown
462
* - SPIRV OpTypeImage specification
464
fbfetch->data.image.format = 0;
465
fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
466
fbfetch->data.mode = nir_var_uniform;
467
fbfetch->data.binding = ZINK_FBFETCH_BINDING;
468
fbfetch->data.binding = ZINK_FBFETCH_BINDING;
469
fbfetch->data.sample = ms;
470
enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
471
fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
472
nir_shader_add_variable(b->shader, fbfetch);
473
nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
474
nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
475
nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
476
unsigned swiz[4] = {2, 1, 0, 3};
477
nir_ssa_def *swizzle = nir_swizzle(b, load, swiz, 4);
478
nir_ssa_def_rewrite_uses(&intr->dest.ssa, swizzle);
483
lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
485
nir_foreach_shader_out_variable(var, shader) {
486
if (var->data.fb_fetch_output) {
494
return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
497
/* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
499
check_psiz(struct nir_shader *s)
501
bool have_psiz = false;
502
nir_foreach_shader_out_variable(var, s) {
503
if (var->data.location == VARYING_SLOT_PSIZ) {
504
/* genuine PSIZ outputs will have this set */
505
have_psiz |= !!var->data.explicit_location;
511
static nir_variable *
512
find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
515
if (!location_frac && location != VARYING_SLOT_PSIZ) {
516
nir_foreach_shader_out_variable(var, nir) {
517
if (var->data.location == location)
522
/* multiple variables found for this location: find the biggest one */
523
nir_variable *out = NULL;
525
nir_foreach_shader_out_variable(var, nir) {
526
if (var->data.location == location) {
527
unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
528
if (count_slots > slots) {
536
/* only one variable found or this is location_frac */
537
nir_foreach_shader_out_variable(var, nir) {
538
if (var->data.location == location &&
539
(var->data.location_frac == location_frac ||
540
(glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
541
if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
550
is_inlined(const bool *inlined, const struct pipe_stream_output *output)
552
for (unsigned i = 0; i < output->num_components; i++)
553
if (!inlined[output->start_component + i])
559
update_psiz_location(nir_shader *nir, nir_variable *psiz)
561
uint32_t last_output = util_last_bit64(nir->info.outputs_written);
562
if (last_output < VARYING_SLOT_VAR0)
563
last_output = VARYING_SLOT_VAR0;
566
/* this should get fixed up by slot remapping */
567
psiz->data.location = last_output;
570
static const struct glsl_type *
571
clamp_slot_type(const struct glsl_type *type, unsigned slot)
573
/* could be dvec/dmat/mat: each member is the same */
574
const struct glsl_type *plain = glsl_without_array_or_matrix(type);
575
/* determine size of each member type */
576
unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
577
/* normalize slot idx to current type's size */
579
unsigned slot_components = glsl_get_components(plain);
580
if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
581
slot_components *= 2;
582
/* create a vec4 mask of the selected slot's components out of all the components */
583
uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
584
/* return a vecN of the selected components */
585
slot_components = util_bitcount(mask);
586
return glsl_vec_type(slot_components);
589
static const struct glsl_type *
590
unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
592
const struct glsl_type *type = slot_type;
593
unsigned slot_count = 0;
594
unsigned cur_slot = 0;
595
/* iterate over all the members in the struct, stopping once the slot idx is reached */
596
for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
597
/* use array type for slot counting but return array member type for unroll */
598
const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
599
type = glsl_without_array(arraytype);
600
slot_count = glsl_count_vec4_slots(arraytype, false, false);
602
*slot_idx -= (cur_slot - slot_count);
603
if (!glsl_type_is_struct_or_ifc(type))
604
/* this is a fully unrolled struct: find the number of vec components to output */
605
type = clamp_slot_type(type, *slot_idx);
610
get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
612
assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
613
const struct glsl_type *orig_type = var->type;
614
const struct glsl_type *type = glsl_without_array(var->type);
615
unsigned slot_idx = slot - so_slot;
616
if (type != orig_type)
617
slot_idx %= glsl_count_vec4_slots(type, false, false);
618
/* need to find the vec4 that's being exported by this slot */
619
while (glsl_type_is_struct_or_ifc(type))
620
type = unroll_struct_type(type, &slot_idx);
622
/* arrays here are already fully unrolled from their structs, so slot handling is implicit */
623
unsigned num_components = glsl_get_components(glsl_without_array(type));
624
const struct glsl_type *arraytype = orig_type;
625
while (glsl_type_is_array(arraytype) && !glsl_type_is_struct_or_ifc(glsl_without_array(arraytype))) {
626
num_components *= glsl_array_size(arraytype);
627
arraytype = glsl_get_array_element(arraytype);
629
assert(num_components);
630
/* gallium handles xfb in terms of 32bit units */
631
if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
633
return num_components;
636
static const struct pipe_stream_output *
637
find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
639
for (unsigned i = 0; i < so_info->num_outputs; i++) {
640
const struct pipe_stream_output *packed_output = &so_info->output[i];
641
if (reverse_map[packed_output->register_index] == slot)
642
return packed_output;
648
update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info,
649
uint64_t outputs_written, bool have_psiz)
651
uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
653
/* semi-copied from iris */
654
while (outputs_written) {
655
int bit = u_bit_scan64(&outputs_written);
656
/* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
657
if (bit == VARYING_SLOT_PSIZ && !have_psiz)
659
reverse_map[slot++] = bit;
662
bool have_fake_psiz = false;
663
nir_foreach_shader_out_variable(var, zs->nir) {
664
var->data.explicit_xfb_buffer = 0;
665
if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
666
have_fake_psiz = true;
669
bool inlined[VARYING_SLOT_MAX][4] = {0};
671
uint8_t packed_components[VARYING_SLOT_MAX] = {0};
672
uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
673
uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
674
uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
675
nir_variable *psiz = NULL;
676
for (unsigned i = 0; i < so_info->num_outputs; i++) {
677
const struct pipe_stream_output *output = &so_info->output[i];
678
unsigned slot = reverse_map[output->register_index];
679
/* always set stride to be used during draw */
680
zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
681
if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) {
682
nir_variable *var = NULL;
685
var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz);
686
if (var->data.location == VARYING_SLOT_PSIZ)
689
slot = reverse_map[output->register_index];
690
if (var->data.explicit_xfb_buffer) {
691
/* handle dvec3 where gallium splits streamout over 2 registers */
692
for (unsigned j = 0; j < output->num_components; j++)
693
inlined[slot][output->start_component + j] = true;
695
if (is_inlined(inlined[slot], output))
697
bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
698
unsigned num_components = get_slot_components(var, slot, so_slot);
699
/* if this is the entire variable, try to blast it out during the initial declaration
700
* structs must be handled later to ensure accurate analysis
702
if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
703
var->data.explicit_xfb_buffer = 1;
704
var->data.xfb.buffer = output->output_buffer;
705
var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
706
var->data.offset = output->dst_offset * 4;
707
var->data.stream = output->stream;
708
for (unsigned j = 0; j < output->num_components; j++)
709
inlined[slot][output->start_component + j] = true;
711
/* otherwise store some metadata for later */
712
packed |= BITFIELD64_BIT(slot);
713
packed_components[slot] += output->num_components;
714
packed_streams[slot] |= BITFIELD_BIT(output->stream);
715
packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
716
for (unsigned j = 0; j < output->num_components; j++)
717
packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
722
/* if this was flagged as a packed output before, and if all the components are
723
* being output with the same stream on the same buffer with increasing offsets, this entire variable
724
* can be consolidated into a single output to conserve locations
726
for (unsigned i = 0; i < so_info->num_outputs; i++) {
727
const struct pipe_stream_output *output = &so_info->output[i];
728
unsigned slot = reverse_map[output->register_index];
729
if (is_inlined(inlined[slot], output))
731
if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) {
732
nir_variable *var = NULL;
734
var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz);
736
unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
737
/* for each variable, iterate over all the variable's slots and inline the outputs */
738
for (unsigned j = 0; j < num_slots; j++) {
739
slot = var->data.location + j;
740
const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
744
/* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
745
if (!(packed & BITFIELD64_BIT(slot)) ||
746
util_bitcount(packed_streams[slot]) != 1 ||
747
util_bitcount(packed_buffers[slot]) != 1)
750
/* if all the components the variable exports to this slot aren't captured, skip consolidation */
751
unsigned num_components = get_slot_components(var, slot, var->data.location);
752
if (num_components != packed_components[slot])
755
/* in order to pack the xfb output, all the offsets must be sequentially incrementing */
756
uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
757
for (unsigned k = 1; k < num_components; k++) {
758
/* if the offsets are not incrementing as expected, skip consolidation */
759
if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
761
prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
764
/* this output can be consolidated: blast out all the data inlined */
765
var->data.explicit_xfb_buffer = 1;
766
var->data.xfb.buffer = output->output_buffer;
767
var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
768
var->data.offset = output->dst_offset * 4;
769
var->data.stream = output->stream;
770
/* GLSL specifies that interface blocks are split per-buffer in XFB */
771
if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
772
zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
773
/* mark all slot components inlined to skip subsequent loop iterations */
774
for (unsigned j = 0; j < num_slots; j++) {
775
slot = var->data.location + j;
776
for (unsigned k = 0; k < packed_components[slot]; k++)
777
inlined[slot][k] = true;
778
packed &= ~BITFIELD64_BIT(slot);
783
/* these are packed/explicit varyings which can't be exported with normal output */
784
zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
785
/* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
786
zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
788
zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
789
/* ensure this doesn't get output in the shader by unsetting location */
790
if (have_fake_psiz && psiz)
791
update_psiz_location(zs->nir, psiz);
794
struct decompose_state {
795
nir_variable **split;
800
lower_attrib(nir_builder *b, nir_instr *instr, void *data)
802
struct decompose_state *state = data;
803
nir_variable **split = state->split;
804
if (instr->type != nir_instr_type_intrinsic)
806
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
807
if (intr->intrinsic != nir_intrinsic_load_deref)
809
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
810
nir_variable *var = nir_deref_instr_get_variable(deref);
813
unsigned num_components = glsl_get_vector_elements(split[0]->type);
814
b->cursor = nir_after_instr(instr);
815
nir_ssa_def *loads[4];
816
for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
817
loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
818
if (state->needs_w) {
819
/* oob load w comopnent to get correct value for int/float */
820
loads[3] = nir_channel(b, loads[0], 3);
821
loads[0] = nir_channel(b, loads[0], 0);
823
nir_ssa_def *new_load = nir_vec(b, loads, num_components);
824
nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
825
nir_instr_remove_v(instr);
830
decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
833
nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
834
bits |= BITFIELD_BIT(var->data.driver_location);
836
u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
837
nir_variable *split[5];
838
struct decompose_state state;
840
nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
843
bits |= BITFIELD_BIT(var->data.driver_location);
844
const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
845
unsigned num_components = glsl_get_vector_elements(var->type);
846
state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
847
for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
848
split[i+1] = nir_variable_clone(var, nir);
849
split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
850
if (decomposed_attrs_without_w & BITFIELD_BIT(location))
851
split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
853
split[i+1]->type = new_type;
854
split[i+1]->data.driver_location = ffs(bits) - 1;
855
bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
856
nir_shader_add_variable(nir, split[i+1]);
858
var->data.mode = nir_var_shader_temp;
859
nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
861
nir_fixup_deref_modes(nir);
862
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
868
rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
870
struct zink_screen *screen = data;
871
const bool has_int64 = screen->info.feats.features.shaderInt64;
872
if (instr->type != nir_instr_type_intrinsic)
874
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
875
b->cursor = nir_before_instr(instr);
876
switch (intr->intrinsic) {
877
case nir_intrinsic_load_ssbo:
878
case nir_intrinsic_load_ubo: {
879
/* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
880
bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
881
nir_src_as_uint(intr->src[0]) == 0 &&
882
nir_dest_bit_size(intr->dest) == 64 &&
883
nir_intrinsic_align_offset(intr) % 8 != 0;
884
force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
885
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
886
nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
887
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
889
/* this is always scalarized */
890
assert(intr->dest.ssa.num_components == 1);
891
/* rewrite as 2x32 */
892
nir_ssa_def *load[2];
893
for (unsigned i = 0; i < 2; i++) {
894
if (intr->intrinsic == nir_intrinsic_load_ssbo)
895
load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
897
load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
898
nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
900
/* cast back to 64bit */
901
nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
902
nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
903
nir_instr_remove(instr);
907
case nir_intrinsic_load_shared:
908
b->cursor = nir_before_instr(instr);
909
bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
910
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
911
nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
912
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
914
/* this is always scalarized */
915
assert(intr->dest.ssa.num_components == 1);
916
/* rewrite as 2x32 */
917
nir_ssa_def *load[2];
918
for (unsigned i = 0; i < 2; i++)
919
load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
920
/* cast back to 64bit */
921
nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
922
nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
923
nir_instr_remove(instr);
927
case nir_intrinsic_store_ssbo: {
928
b->cursor = nir_before_instr(instr);
929
bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
930
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
931
nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
932
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
934
/* this is always scalarized */
935
assert(intr->src[0].ssa->num_components == 1);
936
/* cast to 32bit: nir_unpack_64_2x32 not supported by ntv */
937
nir_ssa_def *casted = nir_vec2(b, nir_u2u32(b, intr->src[0].ssa), nir_u2u32(b, nir_ushr_imm(b, intr->src[0].ssa, 32)));
938
for (unsigned i = 0; i < 2; i++)
939
nir_store_ssbo(b, nir_channel(b, casted, i), intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
940
nir_instr_remove(instr);
944
case nir_intrinsic_store_shared: {
945
b->cursor = nir_before_instr(instr);
946
bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
947
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
948
nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
949
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
950
if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
951
/* this is always scalarized */
952
assert(intr->src[0].ssa->num_components == 1);
953
/* cast to 32bit: nir_unpack_64_2x32 not supported by ntv */
954
nir_ssa_def *casted = nir_vec2(b, nir_u2u32(b, intr->src[0].ssa), nir_u2u32(b, nir_ushr_imm(b, intr->src[0].ssa, 32)));
955
for (unsigned i = 0; i < 2; i++)
956
nir_store_shared(b, nir_channel(b, casted, i), nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
957
nir_instr_remove(instr);
968
rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
970
return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
974
nir_variable *ubo[PIPE_MAX_CONSTANT_BUFFERS][5];
975
nir_variable *ssbo[PIPE_MAX_CONSTANT_BUFFERS][5];
978
static nir_variable *
979
get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, unsigned idx, unsigned bit_size)
982
nir_variable **arr = (nir_variable**)(ssbo ? bo->ssbo : bo->ubo);
984
var = arr[idx * 5 + (bit_size >> 4)];
986
arr[idx * 5 + (bit_size >> 4)] = var = nir_variable_clone(arr[idx * 5 + (32 >> 4)], shader);
987
nir_shader_add_variable(shader, var);
989
struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
990
fields[0].name = ralloc_strdup(shader, "base");
991
fields[1].name = ralloc_strdup(shader, "unsized");
992
const struct glsl_type *array_type = glsl_get_struct_field(var->type, 0);
993
const struct glsl_type *type;
994
const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
996
assert(bit_size == 64);
997
type = glsl_array_type(glsl_uintN_t_type(bit_size), glsl_get_length(array_type) / 2, bit_size / 8);
999
type = glsl_array_type(glsl_uintN_t_type(bit_size), glsl_get_length(array_type) * (32 / bit_size), bit_size / 8);
1001
fields[0].type = type;
1002
fields[1].type = unsized;
1003
var->type = glsl_struct_type(fields, glsl_get_length(var->type), "struct", false);
1009
remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1011
struct bo_vars *bo = data;
1012
if (instr->type != nir_instr_type_intrinsic)
1014
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1015
nir_variable *var = NULL;
1016
nir_ssa_def *offset = NULL;
1017
bool is_load = true;
1018
b->cursor = nir_before_instr(instr);
1019
switch (intr->intrinsic) {
1020
case nir_intrinsic_store_ssbo:
1021
var = get_bo_var(b->shader, bo, true, nir_src_as_uint(intr->src[1]), nir_src_bit_size(intr->src[0]));
1022
offset = intr->src[2].ssa;
1025
case nir_intrinsic_load_ssbo:
1026
var = get_bo_var(b->shader, bo, true, nir_src_as_uint(intr->src[0]), nir_dest_bit_size(intr->dest));
1027
offset = intr->src[1].ssa;
1029
case nir_intrinsic_load_ubo:
1030
var = get_bo_var(b->shader, bo, false, nir_src_as_uint(intr->src[0]), nir_dest_bit_size(intr->dest));
1031
offset = intr->src[1].ssa;
1038
nir_deref_instr *deref_var = nir_build_deref_struct(b, nir_build_deref_var(b, var), 0);
1039
assert(intr->num_components <= 2);
1041
nir_ssa_def *result[2];
1042
for (unsigned i = 0; i < intr->num_components; i++) {
1043
nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_var, offset);
1044
result[i] = nir_load_deref(b, deref_arr);
1045
if (intr->intrinsic == nir_intrinsic_load_ssbo)
1046
nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
1047
offset = nir_iadd_imm(b, offset, 1);
1049
nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1050
nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1052
nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_var, offset);
1053
nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
1055
nir_instr_remove(instr);
1060
remove_bo_access(nir_shader *shader)
1063
memset(&bo, 0, sizeof(bo));
1064
nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1065
if (var->data.mode == nir_var_mem_ssbo) {
1066
assert(!bo.ssbo[var->data.driver_location][32 >> 4]);
1067
bo.ssbo[var->data.driver_location][32 >> 4] = var;
1069
assert(!bo.ubo[var->data.driver_location][32 >> 4]);
1070
bo.ubo[var->data.driver_location][32 >> 4] = var;
1073
return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
1077
assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
1079
unsigned slot = var->data.location;
1082
case VARYING_SLOT_POS:
1083
case VARYING_SLOT_PNTC:
1084
case VARYING_SLOT_PSIZ:
1085
case VARYING_SLOT_LAYER:
1086
case VARYING_SLOT_PRIMITIVE_ID:
1087
case VARYING_SLOT_CLIP_DIST0:
1088
case VARYING_SLOT_CULL_DIST0:
1089
case VARYING_SLOT_VIEWPORT:
1090
case VARYING_SLOT_FACE:
1091
case VARYING_SLOT_TESS_LEVEL_OUTER:
1092
case VARYING_SLOT_TESS_LEVEL_INNER:
1093
/* use a sentinel value to avoid counting later */
1094
var->data.driver_location = UINT_MAX;
1098
if (var->data.patch) {
1099
assert(slot >= VARYING_SLOT_PATCH0);
1100
slot -= VARYING_SLOT_PATCH0;
1102
if (slot_map[slot] == 0xff) {
1103
assert(*reserved < MAX_VARYING);
1104
slot_map[slot] = *reserved;
1105
if (stage == MESA_SHADER_TESS_EVAL && var->data.mode == nir_var_shader_in && !var->data.patch)
1106
*reserved += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
1108
*reserved += glsl_count_vec4_slots(var->type, false, false);
1110
slot = slot_map[slot];
1111
assert(slot < MAX_VARYING);
1112
var->data.driver_location = slot;
1116
ALWAYS_INLINE static bool
1117
is_texcoord(gl_shader_stage stage, const nir_variable *var)
1119
if (stage != MESA_SHADER_FRAGMENT)
1121
return var->data.location >= VARYING_SLOT_TEX0 &&
1122
var->data.location <= VARYING_SLOT_TEX7;
1126
assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
1128
unsigned slot = var->data.location;
1130
case VARYING_SLOT_POS:
1131
case VARYING_SLOT_PNTC:
1132
case VARYING_SLOT_PSIZ:
1133
case VARYING_SLOT_LAYER:
1134
case VARYING_SLOT_PRIMITIVE_ID:
1135
case VARYING_SLOT_CLIP_DIST0:
1136
case VARYING_SLOT_CULL_DIST0:
1137
case VARYING_SLOT_VIEWPORT:
1138
case VARYING_SLOT_FACE:
1139
case VARYING_SLOT_TESS_LEVEL_OUTER:
1140
case VARYING_SLOT_TESS_LEVEL_INNER:
1141
/* use a sentinel value to avoid counting later */
1142
var->data.driver_location = UINT_MAX;
1145
if (var->data.patch) {
1146
assert(slot >= VARYING_SLOT_PATCH0);
1147
slot -= VARYING_SLOT_PATCH0;
1149
if (slot_map[slot] == (unsigned char)-1) {
1150
if (stage != MESA_SHADER_TESS_CTRL && !is_texcoord(stage, var))
1153
/* - texcoords can't be eliminated in fs due to GL_COORD_REPLACE
1154
* - patch variables may be read in the workgroup
1156
slot_map[slot] = (*reserved)++;
1158
var->data.driver_location = slot_map[slot];
1165
rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
1167
nir_variable *var = data;
1168
if (instr->type != nir_instr_type_intrinsic)
1171
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1172
if (intr->intrinsic != nir_intrinsic_load_deref)
1174
nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
1175
if (deref_var != var)
1177
nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
1178
nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
1183
zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
1185
unsigned reserved = 0;
1186
unsigned char slot_map[VARYING_SLOT_MAX];
1187
memset(slot_map, -1, sizeof(slot_map));
1188
bool do_fixup = false;
1189
nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
1190
if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
1191
/* remove injected pointsize from all but the last vertex stage */
1192
nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
1193
if (var && !var->data.explicit_location) {
1194
var->data.mode = nir_var_shader_temp;
1195
nir_fixup_deref_modes(producer);
1196
NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1197
optimize_nir(producer);
1200
if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
1201
/* never assign from tcs -> tes, always invert */
1202
nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
1203
assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
1204
nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
1205
if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
1206
/* this is an output, nothing more needs to be done for it to be dropped */
1210
nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
1211
assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
1212
nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
1213
if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
1215
/* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
1216
nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
1222
nir_fixup_deref_modes(nir);
1223
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1228
zink_shader_dump(void *words, size_t size, const char *file)
1230
FILE *fp = fopen(file, "wb");
1232
fwrite(words, 1, size, fp);
1234
fprintf(stderr, "wrote '%s'...\n", file);
1239
zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv)
1242
VkShaderModuleCreateInfo smci = {0};
1247
if (zink_debug & ZINK_DEBUG_SPIRV) {
1250
snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
1251
zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
1254
smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
1255
smci.codeSize = spirv->num_words * sizeof(uint32_t);
1256
smci.pCode = spirv->words;
1258
VkResult ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod);
1259
bool success = zink_screen_handle_vkresult(screen, ret);
1261
return success ? mod : VK_NULL_HANDLE;
1265
find_var_deref(nir_shader *nir, nir_variable *var)
1267
nir_foreach_function(function, nir) {
1268
if (!function->impl)
1271
nir_foreach_block(block, function->impl) {
1272
nir_foreach_instr(instr, block) {
1273
if (instr->type != nir_instr_type_deref)
1275
nir_deref_instr *deref = nir_instr_as_deref(instr);
1276
if (deref->deref_type == nir_deref_type_var && deref->var == var)
1285
prune_io(nir_shader *nir)
1287
nir_foreach_shader_in_variable_safe(var, nir) {
1288
if (!find_var_deref(nir, var))
1289
var->data.mode = nir_var_shader_temp;
1291
nir_foreach_shader_out_variable_safe(var, nir) {
1292
if (!find_var_deref(nir, var))
1293
var->data.mode = nir_var_shader_temp;
1298
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key)
1300
VkShaderModule mod = VK_NULL_HANDLE;
1301
struct zink_shader_info *sinfo = &zs->sinfo;
1302
nir_shader *nir = nir_shader_clone(NULL, base_nir);
1303
bool need_optimize = false;
1304
bool inlined_uniforms = false;
1307
if (key->inline_uniforms) {
1308
NIR_PASS_V(nir, nir_inline_uniforms,
1309
nir->info.num_inlinable_uniforms,
1310
key->base.inlined_uniform_values,
1311
nir->info.inlinable_uniform_dw_offsets);
1313
inlined_uniforms = true;
1316
/* TODO: use a separate mem ctx here for ralloc */
1317
switch (zs->nir->info.stage) {
1318
case MESA_SHADER_VERTEX: {
1319
uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
1320
const struct zink_vs_key *vs_key = zink_vs_key(key);
1321
switch (vs_key->size) {
1323
decomposed_attrs = vs_key->u32.decomposed_attrs;
1324
decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
1327
decomposed_attrs = vs_key->u16.decomposed_attrs;
1328
decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
1331
decomposed_attrs = vs_key->u8.decomposed_attrs;
1332
decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
1336
if (decomposed_attrs || decomposed_attrs_without_w)
1337
NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
1340
case MESA_SHADER_TESS_EVAL:
1341
case MESA_SHADER_GEOMETRY:
1342
if (zink_vs_key_base(key)->last_vertex_stage) {
1343
if (zs->sinfo.have_xfb)
1344
sinfo->last_vertex = true;
1346
if (!zink_vs_key_base(key)->clip_halfz && screen->driver_workarounds.depth_clip_control_missing) {
1347
NIR_PASS_V(nir, nir_lower_clip_halfz);
1349
if (zink_vs_key_base(key)->push_drawid) {
1350
NIR_PASS_V(nir, lower_drawid);
1354
case MESA_SHADER_FRAGMENT:
1355
if (!zink_fs_key(key)->samples &&
1356
nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1357
/* VK will always use gl_SampleMask[] values even if sample count is 0,
1358
* so we need to skip this write here to mimic GL's behavior of ignoring it
1360
nir_foreach_shader_out_variable(var, nir) {
1361
if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
1362
var->data.mode = nir_var_shader_temp;
1364
nir_fixup_deref_modes(nir);
1365
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1366
need_optimize = true;
1368
if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
1369
NIR_PASS_V(nir, lower_dual_blend);
1371
if (zink_fs_key(key)->coord_replace_bits) {
1372
NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
1373
false, zink_fs_key(key)->coord_replace_yinvert);
1375
if (zink_fs_key(key)->force_persample_interp || zink_fs_key(key)->fbfetch_ms) {
1376
nir_foreach_shader_in_variable(var, nir)
1377
var->data.sample = true;
1378
nir->info.fs.uses_sample_qualifier = true;
1379
nir->info.fs.uses_sample_shading = true;
1381
if (nir->info.fs.uses_fbfetch_output) {
1382
nir_variable *fbfetch = NULL;
1383
NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key(key)->fbfetch_ms);
1384
/* old variable must be deleted to avoid spirv errors */
1385
fbfetch->data.mode = nir_var_shader_temp;
1386
nir_fixup_deref_modes(nir);
1387
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1388
need_optimize = true;
1393
if (key->base.nonseamless_cube_mask) {
1394
NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
1395
need_optimize = true;
1398
if (screen->driconf.inline_uniforms) {
1399
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
1400
NIR_PASS_V(nir, rewrite_bo_access, screen);
1401
NIR_PASS_V(nir, remove_bo_access);
1402
need_optimize = true;
1404
if (inlined_uniforms) {
1407
/* This must be done again. */
1408
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
1409
nir_var_shader_out);
1411
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1412
if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
1413
zs->can_inline = false;
1414
} else if (need_optimize)
1418
NIR_PASS_V(nir, nir_convert_from_ssa, true);
1420
struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
1422
mod = zink_shader_spirv_compile(screen, zs, spirv);
1426
/* TODO: determine if there's any reason to cache spirv output? */
1427
if (zs->is_generated)
1435
lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
1437
if (instr->type != nir_instr_type_intrinsic)
1439
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1440
if (intr->intrinsic != nir_intrinsic_load_instance_id)
1442
b->cursor = nir_after_instr(instr);
1443
nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
1444
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
1449
lower_baseinstance(nir_shader *shader)
1451
if (shader->info.stage != MESA_SHADER_VERTEX)
1453
return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
1456
bool nir_lower_dynamic_bo_access(nir_shader *shader);
1458
/* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
1459
* so instead we delete all those broken variables and just make new ones
1462
unbreak_bos(nir_shader *shader)
1464
uint32_t ssbo_used = 0;
1465
uint32_t ubo_used = 0;
1466
uint64_t max_ssbo_size = 0;
1467
uint64_t max_ubo_size = 0;
1468
bool ssbo_sizes[PIPE_MAX_SHADER_BUFFERS] = {false};
1470
if (!shader->info.num_ssbos && !shader->info.num_ubos && !shader->num_uniforms)
1472
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1473
nir_foreach_block(block, impl) {
1474
nir_foreach_instr(instr, block) {
1475
if (instr->type != nir_instr_type_intrinsic)
1478
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1479
switch (intrin->intrinsic) {
1480
case nir_intrinsic_store_ssbo:
1481
ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[1]));
1484
case nir_intrinsic_get_ssbo_size: {
1485
uint32_t slot = nir_src_as_uint(intrin->src[0]);
1486
ssbo_used |= BITFIELD_BIT(slot);
1487
ssbo_sizes[slot] = true;
1490
case nir_intrinsic_ssbo_atomic_add:
1491
case nir_intrinsic_ssbo_atomic_imin:
1492
case nir_intrinsic_ssbo_atomic_umin:
1493
case nir_intrinsic_ssbo_atomic_imax:
1494
case nir_intrinsic_ssbo_atomic_umax:
1495
case nir_intrinsic_ssbo_atomic_and:
1496
case nir_intrinsic_ssbo_atomic_or:
1497
case nir_intrinsic_ssbo_atomic_xor:
1498
case nir_intrinsic_ssbo_atomic_exchange:
1499
case nir_intrinsic_ssbo_atomic_comp_swap:
1500
case nir_intrinsic_ssbo_atomic_fmin:
1501
case nir_intrinsic_ssbo_atomic_fmax:
1502
case nir_intrinsic_ssbo_atomic_fcomp_swap:
1503
case nir_intrinsic_load_ssbo:
1504
ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
1506
case nir_intrinsic_load_ubo:
1507
case nir_intrinsic_load_ubo_vec4:
1508
ubo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
1516
nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1517
const struct glsl_type *type = glsl_without_array(var->type);
1518
if (type_is_counter(type))
1520
unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
1521
if (var->data.mode == nir_var_mem_ubo)
1522
max_ubo_size = MAX2(max_ubo_size, size);
1524
max_ssbo_size = MAX2(max_ssbo_size, size);
1525
var->data.mode = nir_var_shader_temp;
1527
nir_fixup_deref_modes(shader);
1528
NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1529
optimize_nir(shader);
1531
if (!ssbo_used && !ubo_used)
1534
struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
1535
fields[0].name = ralloc_strdup(shader, "base");
1536
fields[1].name = ralloc_strdup(shader, "unsized");
1538
const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
1539
fields[0].type = ubo_type;
1540
u_foreach_bit(slot, ubo_used) {
1542
snprintf(buf, sizeof(buf), "ubo_slot_%u", slot);
1543
nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, glsl_struct_type(fields, 1, "struct", false), buf);
1544
var->interface_type = var->type;
1545
var->data.driver_location = slot;
1549
const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4);
1550
const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4);
1551
fields[0].type = ssbo_type;
1552
u_foreach_bit(slot, ssbo_used) {
1554
snprintf(buf, sizeof(buf), "ssbo_slot_%u", slot);
1555
bool use_runtime = ssbo_sizes[slot] && max_ssbo_size;
1557
fields[1].type = unsized;
1559
fields[1].type = NULL;
1560
nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
1561
glsl_struct_type(fields, 1 + use_runtime, "struct", false), buf);
1562
var->interface_type = var->type;
1563
var->data.driver_location = slot;
1569
/* this is a "default" bindless texture used if the shader has no texture variables */
1570
static nir_variable *
1571
create_bindless_texture(nir_shader *nir, nir_tex_instr *tex)
1573
unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
1576
const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
1577
var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
1578
var->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1579
var->data.driver_location = var->data.binding = binding;
1583
/* this is a "default" bindless image used if the shader has no image variables */
1584
static nir_variable *
1585
create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim)
1587
unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
1590
const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1591
var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
1592
var->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1593
var->data.driver_location = var->data.binding = binding;
1594
var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
1598
/* rewrite bindless instructions as array deref instructions */
1600
lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
1602
nir_variable **bindless = data;
1604
if (in->type == nir_instr_type_tex) {
1605
nir_tex_instr *tex = nir_instr_as_tex(in);
1606
int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
1610
nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless[1] : bindless[0];
1612
var = create_bindless_texture(b->shader, tex);
1613
b->cursor = nir_before_instr(in);
1614
nir_deref_instr *deref = nir_build_deref_var(b, var);
1615
if (glsl_type_is_array(var->type))
1616
deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
1617
nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
1619
/* bindless sampling uses the variable type directly, which means the tex instr has to exactly
1620
* match up with it in contrast to normal sampler ops where things are a bit more flexible;
1621
* this results in cases where a shader is passed with sampler2DArray but the tex instr only has
1622
* 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
1624
* to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
1625
* - Warhammer 40k: Dawn of War III
1627
unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
1628
unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1629
unsigned coord_components = nir_src_num_components(tex->src[c].src);
1630
if (coord_components < needed_components) {
1631
nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
1632
nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
1633
tex->coord_components = needed_components;
1637
if (in->type != nir_instr_type_intrinsic)
1639
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
1641
nir_intrinsic_op op;
1642
#define OP_SWAP(OP) \
1643
case nir_intrinsic_bindless_image_##OP: \
1644
op = nir_intrinsic_image_deref_##OP; \
1648
/* convert bindless intrinsics to deref intrinsics */
1649
switch (instr->intrinsic) {
1652
OP_SWAP(atomic_comp_swap)
1653
OP_SWAP(atomic_dec_wrap)
1654
OP_SWAP(atomic_exchange)
1655
OP_SWAP(atomic_fadd)
1656
OP_SWAP(atomic_fmax)
1657
OP_SWAP(atomic_fmin)
1658
OP_SWAP(atomic_imax)
1659
OP_SWAP(atomic_imin)
1660
OP_SWAP(atomic_inc_wrap)
1662
OP_SWAP(atomic_umax)
1663
OP_SWAP(atomic_umin)
1675
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
1676
nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless[3] : bindless[2];
1678
var = create_bindless_image(b->shader, dim);
1679
instr->intrinsic = op;
1680
b->cursor = nir_before_instr(in);
1681
nir_deref_instr *deref = nir_build_deref_var(b, var);
1682
if (glsl_type_is_array(var->type))
1683
deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
1684
nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
1689
lower_bindless(nir_shader *shader, nir_variable **bindless)
1691
if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
1693
nir_fixup_deref_modes(shader);
1694
NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1695
optimize_nir(shader);
1699
/* convert shader image/texture io variables to int64 handles for bindless indexing */
1701
lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
1703
if (in->type != nir_instr_type_intrinsic)
1705
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
1706
if (instr->intrinsic != nir_intrinsic_load_deref &&
1707
instr->intrinsic != nir_intrinsic_store_deref)
1710
nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
1711
nir_variable *var = nir_deref_instr_get_variable(src_deref);
1712
if (var->data.bindless)
1714
if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
1716
if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
1719
var->type = glsl_int64_t_type();
1720
var->data.bindless = 1;
1721
b->cursor = nir_before_instr(in);
1722
nir_deref_instr *deref = nir_build_deref_var(b, var);
1723
if (instr->intrinsic == nir_intrinsic_load_deref) {
1724
nir_ssa_def *def = nir_load_deref(b, deref);
1725
nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
1726
nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
1728
nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
1730
nir_instr_remove(in);
1731
nir_instr_remove(&src_deref->instr);
1736
lower_bindless_io(nir_shader *shader)
1738
return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
1742
zink_binding(gl_shader_stage stage, VkDescriptorType type, int index)
1744
if (stage == MESA_SHADER_NONE) {
1745
unreachable("not supported");
1748
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1749
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1750
assert(index < PIPE_MAX_CONSTANT_BUFFERS);
1751
return (stage * PIPE_MAX_CONSTANT_BUFFERS) + index;
1753
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1754
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1755
assert(index < PIPE_MAX_SAMPLERS);
1756
return (stage * PIPE_MAX_SAMPLERS) + index;
1758
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1759
assert(index < PIPE_MAX_SHADER_BUFFERS);
1760
return (stage * PIPE_MAX_SHADER_BUFFERS) + index;
1762
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1763
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1764
assert(index < PIPE_MAX_SHADER_IMAGES);
1765
return (stage * PIPE_MAX_SHADER_IMAGES) + index;
1768
unreachable("unexpected type");
1774
handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, nir_variable **bindless)
1776
if (glsl_type_is_struct(type)) {
1777
for (unsigned i = 0; i < glsl_get_length(type); i++)
1778
handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
1782
/* just a random scalar in a struct */
1783
if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
1786
VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
1789
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1792
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1795
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1798
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1802
unreachable("unknown");
1804
if (!bindless[binding]) {
1805
bindless[binding] = nir_variable_clone(var, nir);
1806
bindless[binding]->data.bindless = 0;
1807
bindless[binding]->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1808
bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
1809
bindless[binding]->data.driver_location = bindless[binding]->data.binding = binding;
1810
if (!bindless[binding]->data.image.format)
1811
bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
1812
nir_shader_add_variable(nir, bindless[binding]);
1814
assert(glsl_get_sampler_dim(glsl_without_array(bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
1816
var->data.mode = nir_var_shader_temp;
1819
static enum pipe_prim_type
1820
prim_to_pipe(enum shader_prim primitive_type)
1822
switch (primitive_type) {
1823
case SHADER_PRIM_POINTS:
1824
return PIPE_PRIM_POINTS;
1825
case SHADER_PRIM_LINES:
1826
case SHADER_PRIM_LINE_LOOP:
1827
case SHADER_PRIM_LINE_STRIP:
1828
case SHADER_PRIM_LINES_ADJACENCY:
1829
case SHADER_PRIM_LINE_STRIP_ADJACENCY:
1830
return PIPE_PRIM_LINES;
1832
return PIPE_PRIM_TRIANGLES;
1836
static enum pipe_prim_type
1837
tess_prim_to_pipe(enum tess_primitive_mode prim_mode)
1839
switch (prim_mode) {
1840
case TESS_PRIMITIVE_ISOLINES:
1841
return PIPE_PRIM_LINES;
1843
return PIPE_PRIM_TRIANGLES;
1847
static enum pipe_prim_type
1848
get_shader_base_prim_type(struct nir_shader *nir)
1850
switch (nir->info.stage) {
1851
case MESA_SHADER_GEOMETRY:
1852
return prim_to_pipe(nir->info.gs.output_primitive);
1853
case MESA_SHADER_TESS_EVAL:
1854
return nir->info.tess.point_mode ? PIPE_PRIM_POINTS : tess_prim_to_pipe(nir->info.tess._primitive_mode);
1858
return PIPE_PRIM_MAX;
1862
convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
1864
struct zink_screen *screen = data;
1865
if (instr->type != nir_instr_type_tex)
1867
nir_tex_instr *tex = nir_instr_as_tex(instr);
1868
if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
1870
if (tex->is_sparse && screen->need_2D_sparse) {
1871
/* no known case of this exists: only nvidia can hit it, and nothing uses it */
1872
mesa_loge("unhandled/unsupported 1D sparse texture!");
1875
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1876
b->cursor = nir_before_instr(instr);
1877
tex->coord_components++;
1884
for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
1885
unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
1888
if (tex->src[c].src.ssa->num_components == tex->coord_components)
1891
nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
1892
if (tex->src[c].src.ssa->num_components == 1)
1893
def = nir_vec2(b, tex->src[c].src.ssa, zero);
1895
def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
1896
nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
1898
b->cursor = nir_after_instr(instr);
1899
unsigned needed_components = nir_tex_instr_dest_size(tex);
1900
unsigned num_components = tex->dest.ssa.num_components;
1901
if (needed_components > num_components) {
1902
tex->dest.ssa.num_components = needed_components;
1903
assert(num_components < 3);
1904
/* take either xz or just x since this is promoted to 2D from 1D */
1905
uint32_t mask = num_components == 2 ? (1|4) : 1;
1906
nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
1907
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
1913
lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
1916
nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
1917
const struct glsl_type *type = glsl_without_array(var->type);
1918
unsigned length = glsl_get_length(var->type);
1919
if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
1921
const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
1922
var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
1927
nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
1932
scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
1934
nir_foreach_function(function, shader) {
1935
if (!function->impl)
1937
nir_foreach_block_safe(block, function->impl) {
1938
nir_foreach_instr_safe(instr, block) {
1939
if (instr->type == nir_instr_type_tex) {
1940
nir_tex_instr *tex = nir_instr_as_tex(instr);
1941
zs->sinfo.have_sparse |= tex->is_sparse;
1943
if (instr->type != nir_instr_type_intrinsic)
1945
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1946
if (intr->intrinsic == nir_intrinsic_image_deref_load ||
1947
intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
1948
intr->intrinsic == nir_intrinsic_image_deref_store ||
1949
intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
1950
intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
1951
intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
1952
intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
1953
intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
1954
intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
1955
intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
1956
intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
1957
intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
1958
intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
1959
intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
1960
intr->intrinsic == nir_intrinsic_image_deref_size ||
1961
intr->intrinsic == nir_intrinsic_image_deref_samples ||
1962
intr->intrinsic == nir_intrinsic_image_deref_format ||
1963
intr->intrinsic == nir_intrinsic_image_deref_order) {
1966
nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
1968
/* Structs have been lowered already, so get_aoa_size is sufficient. */
1969
const unsigned size =
1970
glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
1971
unsigned mask = ((1ull << MAX2(size, 1)) - 1) << var->data.binding;
1973
shader->info.images_used |= mask;
1975
if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
1976
intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
1977
zs->sinfo.have_sparse = true;
1979
static bool warned = false;
1980
if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
1981
switch (intr->intrinsic) {
1982
case nir_intrinsic_image_deref_atomic_add: {
1983
nir_variable *var = nir_intrinsic_get_var(intr, 0);
1984
if (util_format_is_float(var->data.image.format))
1985
fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
1998
lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
2000
if (in->type != nir_instr_type_intrinsic)
2002
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
2003
if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
2006
/* vulkan vec can only be a vec4, but this is (maybe) vec5,
2007
* so just rewrite as the first component since ntv is going to use a different
2008
* method for storing the residency value anyway
2010
b->cursor = nir_before_instr(&instr->instr);
2011
nir_instr *parent = instr->src[0].ssa->parent_instr;
2012
assert(parent->type == nir_instr_type_alu);
2013
nir_alu_instr *alu = nir_instr_as_alu(parent);
2014
nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
2015
nir_instr_remove(parent);
2020
lower_sparse(nir_shader *shader)
2022
return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
2025
struct zink_shader *
2026
zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
2027
const struct pipe_stream_output_info *so_info)
2029
struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
2030
bool have_psiz = false;
2032
ret->hash = _mesa_hash_pointer(ret);
2033
ret->reduced_prim = get_shader_base_prim_type(nir);
2035
ret->programs = _mesa_pointer_set_create(NULL);
2036
simple_mtx_init(&ret->lock, mtx_plain);
2038
nir_variable_mode indirect_derefs_modes = nir_var_function_temp;
2039
if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
2040
nir->info.stage == MESA_SHADER_TESS_EVAL)
2041
indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
2043
NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
2046
if (nir->info.stage == MESA_SHADER_VERTEX)
2047
create_vs_pushconst(nir);
2048
else if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
2049
nir->info.stage == MESA_SHADER_TESS_EVAL)
2050
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
2051
else if (nir->info.stage == MESA_SHADER_KERNEL)
2052
create_cs_pushconst(nir);
2054
if (nir->info.stage < MESA_SHADER_FRAGMENT)
2055
have_psiz = check_psiz(nir);
2056
NIR_PASS_V(nir, lower_basevertex);
2057
NIR_PASS_V(nir, lower_work_dim);
2058
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2059
NIR_PASS_V(nir, lower_baseinstance);
2060
NIR_PASS_V(nir, lower_sparse);
2062
if (screen->need_2D_zs)
2063
NIR_PASS_V(nir, lower_1d_shadow, screen);
2066
nir_lower_subgroups_options subgroup_options = {0};
2067
subgroup_options.lower_to_scalar = true;
2068
subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
2069
subgroup_options.ballot_bit_size = 32;
2070
subgroup_options.ballot_components = 4;
2071
subgroup_options.lower_subgroup_masks = true;
2072
NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
2076
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
2077
NIR_PASS_V(nir, nir_lower_discard_if);
2078
NIR_PASS_V(nir, nir_lower_fragcolor,
2079
nir->info.fs.color_is_dual_source ? 1 : 8);
2080
NIR_PASS_V(nir, lower_64bit_vertex_attribs);
2081
NIR_PASS_V(nir, unbreak_bos);
2082
/* run in compile if there could be inlined uniforms */
2083
if (!screen->driconf.inline_uniforms) {
2084
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
2085
NIR_PASS_V(nir, rewrite_bo_access, screen);
2086
NIR_PASS_V(nir, remove_bo_access);
2089
if (zink_debug & ZINK_DEBUG_NIR) {
2090
fprintf(stderr, "NIR shader:\n---8<---\n");
2091
nir_print_shader(nir, stderr);
2092
fprintf(stderr, "---8<---\n");
2095
nir_variable *bindless[4] = {0};
2096
bool has_bindless_io = false;
2097
nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
2098
if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
2099
has_bindless_io = true;
2103
if (has_bindless_io)
2104
NIR_PASS_V(nir, lower_bindless_io);
2109
scan_nir(screen, nir, ret);
2111
foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
2112
if (_nir_shader_variable_has_mode(var, nir_var_uniform |
2115
nir_var_mem_ssbo)) {
2116
enum zink_descriptor_type ztype;
2117
const struct glsl_type *type = glsl_without_array(var->type);
2118
if (var->data.mode == nir_var_mem_ubo) {
2119
ztype = ZINK_DESCRIPTOR_TYPE_UBO;
2120
/* buffer 0 is a push descriptor */
2121
var->data.descriptor_set = !!var->data.driver_location;
2122
var->data.binding = !var->data.driver_location ? nir->info.stage :
2123
zink_binding(nir->info.stage,
2124
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
2125
var->data.driver_location);
2126
assert(var->data.driver_location || var->data.binding < 10);
2127
VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
2128
int binding = var->data.binding;
2130
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
2131
ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
2132
ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
2133
ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
2134
ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
2135
ret->num_bindings[ztype]++;
2136
} else if (var->data.mode == nir_var_mem_ssbo) {
2137
ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
2138
var->data.descriptor_set = ztype + 1;
2139
var->data.binding = zink_binding(nir->info.stage,
2140
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
2141
var->data.driver_location);
2142
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
2143
ret->ssbos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
2144
ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
2145
ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2146
ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
2147
ret->num_bindings[ztype]++;
2149
assert(var->data.mode == nir_var_uniform ||
2150
var->data.mode == nir_var_image);
2151
if (var->data.bindless) {
2152
ret->bindless = true;
2153
handle_bindless_var(nir, var, type, bindless);
2154
} else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
2155
VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
2156
ztype = zink_desc_type_from_vktype(vktype);
2157
if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
2158
ret->num_texel_buffers++;
2159
var->data.driver_location = var->data.binding;
2160
var->data.descriptor_set = ztype + 1;
2161
var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location);
2162
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
2163
ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
2164
ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
2165
if (glsl_type_is_array(var->type))
2166
ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
2168
ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
2169
ret->num_bindings[ztype]++;
2174
bool bindless_lowered = false;
2175
NIR_PASS(bindless_lowered, nir, lower_bindless, bindless);
2176
ret->bindless |= bindless_lowered;
2179
if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings)
2180
update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);
2181
else if (have_psiz) {
2182
bool have_fake_psiz = false;
2183
nir_variable *psiz = NULL;
2184
nir_foreach_shader_out_variable(var, nir) {
2185
if (var->data.location == VARYING_SLOT_PSIZ) {
2186
if (!var->data.explicit_location)
2187
have_fake_psiz = true;
2192
if (have_fake_psiz && psiz) {
2193
psiz->data.mode = nir_var_shader_temp;
2194
nir_fixup_deref_modes(nir);
2195
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2199
ret->can_inline = true;
2205
zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
2207
struct zink_screen *screen = zink_screen(pscreen);
2208
nir_shader *nir = nirptr;
2210
nir_lower_tex_options tex_opts = {0};
2212
Sampled Image must be an object whose type is OpTypeSampledImage.
2213
The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
2214
or Rect, and the Arrayed and MS operands must be 0.
2215
- SPIRV, OpImageSampleProj* opcodes
2217
tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
2218
BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
2219
tex_opts.lower_txp_array = true;
2220
if (!screen->info.feats.features.shaderImageGatherExtended)
2221
tex_opts.lower_tg4_offsets = true;
2222
NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
2223
NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false);
2224
if (nir->info.stage == MESA_SHADER_GEOMETRY)
2225
NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
2227
if (nir->info.num_ubos || nir->info.num_ssbos)
2228
NIR_PASS_V(nir, nir_lower_dynamic_bo_access);
2229
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2230
if (screen->driconf.inline_uniforms)
2231
nir_find_inlinable_uniforms(nir);
2237
zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
2239
set_foreach(shader->programs, entry) {
2240
if (shader->nir->info.stage == MESA_SHADER_COMPUTE) {
2241
struct zink_compute_program *comp = (void*)entry->key;
2242
if (!comp->base.removed) {
2243
_mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader);
2244
comp->base.removed = true;
2246
comp->shader = NULL;
2247
zink_compute_program_reference(ctx, &comp, NULL);
2249
struct zink_gfx_program *prog = (void*)entry->key;
2250
enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage);
2251
assert(pstage < ZINK_SHADER_COUNT);
2252
if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) {
2253
_mesa_hash_table_remove_key(&ctx->program_cache[prog->stages_present >> 2], prog->shaders);
2254
prog->base.removed = true;
2256
if (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)
2257
prog->shaders[pstage] = NULL;
2258
/* only remove generated tcs during parent tes destruction */
2259
if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
2260
prog->shaders[PIPE_SHADER_TESS_CTRL] = NULL;
2261
zink_gfx_program_reference(ctx, &prog, NULL);
2264
if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) {
2265
/* automatically destroy generated tcs shaders when tes is destroyed */
2266
zink_shader_free(ctx, shader->generated);
2267
shader->generated = NULL;
2269
_mesa_set_destroy(shader->programs, NULL);
2270
ralloc_free(shader->nir);
2271
ralloc_free(shader->spirv);
2277
zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
2279
assert(zs->nir->info.stage == MESA_SHADER_TESS_CTRL);
2280
/* shortcut all the nir passes since we just have to change this one word */
2281
zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
2282
return zink_shader_spirv_compile(screen, zs, NULL);
2285
/* creating a passthrough tcs shader that's roughly:
2288
#extension GL_ARB_tessellation_shader : require
2290
in vec4 some_var[gl_MaxPatchVertices];
2291
out vec4 some_var_out;
2293
layout(push_constant) uniform tcsPushConstants {
2294
layout(offset = 0) float TessLevelInner[2];
2295
layout(offset = 8) float TessLevelOuter[4];
2296
} u_tcsPushConstants;
2297
layout(vertices = $vertices_per_patch) out;
2300
gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
2301
gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
2302
some_var_out = some_var[gl_InvocationID];
2306
struct zink_shader *
2307
zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch)
2309
struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
2310
ret->hash = _mesa_hash_pointer(ret);
2311
ret->programs = _mesa_pointer_set_create(NULL);
2312
simple_mtx_init(&ret->lock, mtx_plain);
2314
nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
2315
nir_function *fn = nir_function_create(nir, "main");
2316
fn->is_entrypoint = true;
2317
nir_function_impl *impl = nir_function_impl_create(fn);
2320
nir_builder_init(&b, impl);
2321
b.cursor = nir_before_block(nir_start_block(impl));
2323
nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
2325
nir_foreach_shader_out_variable(var, vs->nir) {
2326
const struct glsl_type *type = var->type;
2327
const struct glsl_type *in_type = var->type;
2328
const struct glsl_type *out_type = var->type;
2330
snprintf(buf, sizeof(buf), "%s_out", var->name);
2331
in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
2332
out_type = glsl_array_type(type, vertices_per_patch, 0);
2334
nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
2335
nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
2336
out->data.location = in->data.location = var->data.location;
2337
out->data.location_frac = in->data.location_frac = var->data.location_frac;
2339
/* gl_in[] receives values from equivalent built-in output
2340
variables written by the vertex shader (section 2.14.7). Each array
2341
element of gl_in[] is a structure holding values for a specific vertex of
2342
the input patch. The length of gl_in[] is equal to the
2343
implementation-dependent maximum patch size (gl_MaxPatchVertices).
2344
- ARB_tessellation_shader
2346
/* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
2347
nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
2348
nir_ssa_def *load = nir_load_deref(&b, in_array_var);
2349
nir_deref_instr *out_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
2350
nir_store_deref(&b, out_array_var, load, 0xff);
2352
nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
2353
gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
2354
gl_TessLevelInner->data.patch = 1;
2355
nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
2356
gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
2357
gl_TessLevelOuter->data.patch = 1;
2359
/* hacks so we can size these right for now */
2360
struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3);
2361
/* just use a single blob for padding here because it's easier */
2362
fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0);
2363
fields[0].name = ralloc_asprintf(nir, "padding");
2364
fields[0].offset = 0;
2365
fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0);
2366
fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner");
2367
fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
2368
fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0);
2369
fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
2370
fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level);
2371
nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
2372
glsl_struct_type(fields, 3, "struct", false), "pushconst");
2373
pushconst->data.location = VARYING_SLOT_VAR0;
2375
nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8);
2376
nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16);
2378
for (unsigned i = 0; i < 2; i++) {
2379
nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
2380
nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
2382
for (unsigned i = 0; i < 4; i++) {
2383
nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
2384
nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
2387
nir->info.tess.tcs_vertices_out = vertices_per_patch;
2388
nir_validate_shader(nir, "created");
2390
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2392
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
2393
NIR_PASS_V(nir, nir_convert_from_ssa, true);
2396
ret->is_generated = true;