2
* Copyright © Microsoft Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
#include "d3d12_compiler.h"
25
#include "d3d12_context.h"
26
#include "d3d12_debug.h"
27
#include "d3d12_screen.h"
28
#include "d3d12_nir_passes.h"
29
#include "nir_to_dxil.h"
32
#include "pipe/p_state.h"
35
#include "nir/nir_draw_helpers.h"
36
#include "nir/tgsi_to_nir.h"
37
#include "compiler/nir/nir_builder.h"
38
#include "tgsi/tgsi_from_mesa.h"
39
#include "tgsi/tgsi_ureg.h"
41
#include "util/u_memory.h"
42
#include "util/u_prim.h"
43
#include "util/u_simple_shaders.h"
44
#include "util/u_dl.h"
46
#include <directx/d3d12.h>
47
#include <dxguids/dxguids.h>
50
#include "tgsi/tgsi_parse.h"
51
#include "tgsi/tgsi_point_sprite.h"
55
#include "dxil_validator.h"
59
d3d12_get_compiler_options(struct pipe_screen *screen,
60
enum pipe_shader_ir ir,
61
enum pipe_shader_type shader)
63
assert(ir == PIPE_SHADER_IR_NIR);
64
return &d3d12_screen(screen)->nir_options;
68
resource_dimension(enum glsl_sampler_dim dim)
71
case GLSL_SAMPLER_DIM_1D:
72
return RESOURCE_DIMENSION_TEXTURE1D;
73
case GLSL_SAMPLER_DIM_2D:
74
return RESOURCE_DIMENSION_TEXTURE2D;
75
case GLSL_SAMPLER_DIM_3D:
76
return RESOURCE_DIMENSION_TEXTURE3D;
77
case GLSL_SAMPLER_DIM_CUBE:
78
return RESOURCE_DIMENSION_TEXTURECUBE;
80
return RESOURCE_DIMENSION_UNKNOWN;
85
can_remove_dead_sampler(nir_variable *var, void *data)
87
const struct glsl_type *base_type = glsl_without_array(var->type);
88
return glsl_type_is_sampler(base_type) && !glsl_type_is_bare_sampler(base_type);
91
static struct d3d12_shader *
92
compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
93
struct d3d12_shader_key *key, struct nir_shader *nir)
95
struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
96
struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
99
sel->current = shader;
101
NIR_PASS_V(nir, nir_lower_samplers);
102
NIR_PASS_V(nir, dxil_nir_split_typed_samplers);
104
NIR_PASS_V(nir, nir_opt_dce);
105
struct nir_remove_dead_variables_options dead_var_opts = {};
106
dead_var_opts.can_remove_var = can_remove_dead_sampler;
107
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, &dead_var_opts);
109
if (key->samples_int_textures)
110
NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
111
key->tex_wrap_states, key->swizzle_state,
112
screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
114
if (key->vs.needs_format_emulation)
115
d3d12_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
117
uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
118
uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
119
NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
120
shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
121
nir->info.num_ubos > num_ubos_before_lower_to_ubo;
123
if (key->last_vertex_processing_stage) {
124
if (key->invert_depth)
125
NIR_PASS_V(nir, d3d12_nir_invert_depth, key->invert_depth);
126
NIR_PASS_V(nir, nir_lower_clip_halfz);
127
NIR_PASS_V(nir, d3d12_lower_yflip);
129
NIR_PASS_V(nir, nir_lower_packed_ubo_loads);
130
NIR_PASS_V(nir, d3d12_lower_load_draw_params);
131
NIR_PASS_V(nir, d3d12_lower_load_patch_vertices_in);
132
NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
133
NIR_PASS_V(nir, dxil_nir_lower_bool_input);
134
NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil);
135
NIR_PASS_V(nir, dxil_nir_lower_atomics_to_dxil);
136
NIR_PASS_V(nir, dxil_nir_lower_double_math);
138
if (key->fs.multisample_disabled)
139
NIR_PASS_V(nir, d3d12_disable_multisampling);
141
struct nir_to_dxil_options opts = {};
142
opts.interpolate_at_vertex = screen->have_load_at_vertex;
143
opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
144
opts.no_ubo0 = !shader->has_default_ubo0;
145
opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0;
146
opts.provoking_vertex = key->fs.provoking_vertex;
147
opts.input_clip_size = key->input_clip_size;
148
opts.environment = DXIL_ENVIRONMENT_GL;
151
if (!nir_to_dxil(nir, &opts, &tmp)) {
152
debug_printf("D3D12: nir_to_dxil failed\n");
157
shader->begin_srv_binding = (UINT_MAX);
158
nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
159
auto type_no_array = glsl_without_array(var->type);
160
if (glsl_type_is_texture(type_no_array)) {
161
unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
162
for (unsigned i = 0; i < count; ++i) {
163
shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
165
shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
166
shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
170
nir_foreach_image_variable(var, nir) {
171
auto type_no_array = glsl_without_array(var->type);
172
unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
173
for (unsigned i = 0; i < count; ++i) {
174
shader->uav_bindings[var->data.driver_location + i].format = var->data.image.format;
175
shader->uav_bindings[var->data.driver_location + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
180
if(nir->info.num_ubos) {
181
// Ignore state_vars ubo as it is bound as root constants
182
unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
183
for(unsigned i = shader->has_default_ubo0 ? 0 : 1; i < num_ubo_bindings; ++i) {
184
shader->cb_bindings[shader->num_cb_bindings++].binding = i;
189
if (ctx->dxil_validator) {
190
if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
192
if (!dxil_validate_module(ctx->dxil_validator, tmp.data,
193
tmp.size, &err) && err) {
195
"== VALIDATION ERROR =============================================\n"
197
"== END ==========================================================\n",
203
if (d3d12_debug & D3D12_DEBUG_DISASS) {
204
char *str = dxil_disasm_module(ctx->dxil_validator, tmp.data,
207
"== BEGIN SHADER ============================================\n"
209
"== END SHADER ==============================================\n",
216
blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
218
if (d3d12_debug & D3D12_DEBUG_DXIL) {
221
snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
222
FILE *fp = fopen(buf, "wb");
223
fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
225
fprintf(stderr, "wrote '%s'...\n", buf);
230
struct d3d12_selection_context {
231
struct d3d12_context *ctx;
232
bool needs_point_sprite_lowering;
233
bool needs_vertex_reordering;
234
unsigned provoking_vertex;
236
unsigned fill_mode_lowered;
237
unsigned cull_mode_lowered;
238
bool manual_depth_range;
239
unsigned missing_dual_src_outputs;
240
unsigned frag_result_color_lowering;
241
const unsigned *variable_workgroup_size;
245
missing_dual_src_outputs(struct d3d12_context *ctx)
247
if (!ctx->gfx_pipeline_state.blend->is_dual_src)
250
struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
251
nir_shader *s = fs->initial;
253
unsigned indices_seen = 0;
254
nir_foreach_function(function, s) {
255
if (function->impl) {
256
nir_foreach_block(block, function->impl) {
257
nir_foreach_instr(instr, block) {
258
if (instr->type != nir_instr_type_intrinsic)
261
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
262
if (intr->intrinsic != nir_intrinsic_store_deref)
265
nir_variable *var = nir_intrinsic_get_var(intr, 0);
266
if (var->data.mode != nir_var_shader_out)
269
unsigned index = var->data.index;
270
if (var->data.location > FRAG_RESULT_DATA0)
271
index = var->data.location - FRAG_RESULT_DATA0;
272
else if (var->data.location != FRAG_RESULT_COLOR &&
273
var->data.location != FRAG_RESULT_DATA0)
276
indices_seen |= 1u << index;
277
if ((indices_seen & 3) == 3)
284
return 3 & ~indices_seen;
288
frag_result_color_lowering(struct d3d12_context *ctx)
290
struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
293
if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
294
return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
300
manual_depth_range(struct d3d12_context *ctx)
302
if (!d3d12_need_zero_one_depth_range(ctx))
306
* If we can't use the D3D12 zero-one depth-range, we might have to apply
307
* depth-range ourselves.
309
* Because we only need to override the depth-range to zero-one range in
310
* the case where we write frag-depth, we only need to apply manual
311
* depth-range to gl_FragCoord.z.
313
* No extra care is needed to be taken in the case where gl_FragDepth is
314
* written conditionally, because the GLSL 4.60 spec states:
316
* If a shader statically assigns a value to gl_FragDepth, and there
317
* is an execution path through the shader that does not set
318
* gl_FragDepth, then the value of the fragment’s depth may be
319
* undefined for executions of the shader that take that path. That
320
* is, if the set of linked fragment shaders statically contain a
321
* write to gl_FragDepth, then it is responsible for always writing
325
struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
326
return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
330
needs_edge_flag_fix(enum pipe_prim_type mode)
332
return (mode == PIPE_PRIM_QUADS ||
333
mode == PIPE_PRIM_QUAD_STRIP ||
334
mode == PIPE_PRIM_POLYGON);
338
fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
340
struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
342
if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
343
!ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
344
ctx->gfx_pipeline_state.rast == NULL ||
345
(dinfo->mode != PIPE_PRIM_TRIANGLES &&
346
dinfo->mode != PIPE_PRIM_TRIANGLE_STRIP))
347
return PIPE_POLYGON_MODE_FILL;
349
/* D3D12 supports line mode (wireframe) but doesn't support edge flags */
350
if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
351
ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
352
(ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
353
ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
354
(vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
355
needs_edge_flag_fix(ctx->initial_api_prim)))
356
return PIPE_POLYGON_MODE_LINE;
358
if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
359
return PIPE_POLYGON_MODE_POINT;
361
return PIPE_POLYGON_MODE_FILL;
365
has_stream_out_for_streams(struct d3d12_context *ctx)
367
unsigned mask = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->initial->info.gs.active_stream_mask & ~1;
368
for (unsigned i = 0; i < ctx->gfx_pipeline_state.so_info.num_outputs; ++i) {
369
unsigned stream = ctx->gfx_pipeline_state.so_info.output[i].stream;
370
if (((1 << stream) & mask) &&
371
ctx->so_buffer_views[stream].SizeInBytes)
378
needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
380
struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
381
struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
383
if (gs != NULL && !gs->is_variant) {
384
/* There is an user GS; Check if it outputs points with PSIZE */
385
return (gs->initial->info.gs.output_primitive == GL_POINTS &&
386
(gs->initial->info.outputs_written & VARYING_BIT_PSIZ ||
387
ctx->gfx_pipeline_state.rast->base.point_size > 1.0) &&
388
(gs->initial->info.gs.active_stream_mask == 1 ||
389
!has_stream_out_for_streams(ctx)));
391
/* No user GS; check if we are drawing wide points */
392
return ((dinfo->mode == PIPE_PRIM_POINTS ||
393
fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
394
(ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
395
ctx->gfx_pipeline_state.rast->base.offset_point ||
396
(ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
397
vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
398
(vs->initial->info.outputs_written & VARYING_BIT_POS));
403
cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
405
if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
406
!ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
407
ctx->gfx_pipeline_state.rast == NULL ||
408
ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
409
return PIPE_FACE_NONE;
411
return ctx->gfx_pipeline_state.rast->base.cull_face;
415
get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate, const struct pipe_draw_info *dinfo)
417
if (dinfo->mode == GL_PATCHES) {
422
struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
423
struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
424
struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_variant ? gs : vs;
426
/* Make sure GL prims match Gallium prims */
427
STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS);
428
STATIC_ASSERT(GL_LINES == PIPE_PRIM_LINES);
429
STATIC_ASSERT(GL_LINE_STRIP == PIPE_PRIM_LINE_STRIP);
431
enum pipe_prim_type mode;
432
switch (last_vertex_stage->stage) {
433
case PIPE_SHADER_GEOMETRY:
434
mode = (enum pipe_prim_type)last_vertex_stage->current->nir->info.gs.output_primitive;
436
case PIPE_SHADER_VERTEX:
437
mode = (enum pipe_prim_type)dinfo->mode;
440
unreachable("Tesselation shaders are not supported");
443
bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
444
sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
445
*alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
446
(!gs || gs->is_variant ||
447
gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
448
return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
452
has_flat_varyings(struct d3d12_context *ctx)
454
struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
456
if (!fs || !fs->current)
459
nir_foreach_variable_with_modes(input, fs->current->nir,
461
if (input->data.interpolation == INTERP_MODE_FLAT &&
462
/* Disregard sysvals */
463
(input->data.location >= VARYING_SLOT_VAR0 ||
464
input->data.location <= VARYING_SLOT_TEX7))
472
needs_vertex_reordering(struct d3d12_selection_context *sel_ctx, const struct pipe_draw_info *dinfo)
474
struct d3d12_context *ctx = sel_ctx->ctx;
475
bool flat = has_flat_varyings(ctx);
476
bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
478
if (fill_mode_lowered(ctx, dinfo) != PIPE_POLYGON_MODE_FILL)
481
/* TODO add support for line primitives */
483
/* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
484
If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
485
if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
486
sel_ctx->alternate_tri))
489
/* When transform feedback is enabled and the output is alternating (triangle strip or triangle
490
strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
491
only works when there is no flat shading involved. In that scenario, we don't care about
492
the provoking vertex. */
493
if (xfb && !flat && sel_ctx->alternate_tri) {
494
sel_ctx->provoking_vertex = 0;
501
static nir_variable *
502
create_varying_from_info(nir_shader *nir, struct d3d12_varying_info *info,
503
unsigned slot, unsigned slot_frac, nir_variable_mode mode, bool patch)
508
snprintf(tmp, ARRAY_SIZE(tmp),
509
mode == nir_var_shader_in ? "in_%d" : "out_%d",
510
info->slots[slot].vars[slot_frac].driver_location);
511
var = nir_variable_create(nir, mode, info->slots[slot].types[slot_frac], tmp);
512
var->data.location = slot;
513
var->data.location_frac = slot_frac;
514
var->data.driver_location = info->slots[slot].vars[slot_frac].driver_location;
515
var->data.interpolation = info->slots[slot].vars[slot_frac].interpolation;
516
var->data.patch = info->slots[slot].patch;
517
var->data.compact = info->slots[slot].vars[slot_frac].compact;
519
var->data.location += VARYING_SLOT_PATCH0;
521
if (mode == nir_var_shader_out)
522
NIR_PASS_V(nir, d3d12_write_0_to_new_varying, var);
528
create_varyings_from_info(nir_shader *nir, struct d3d12_varying_info *info,
529
unsigned slot, nir_variable_mode mode, bool patch)
531
unsigned mask = info->slots[slot].location_frac_mask;
533
create_varying_from_info(nir, info, slot, u_bit_scan(&mask), mode, patch);
537
fill_varyings(struct d3d12_varying_info *info, nir_shader *s,
538
nir_variable_mode modes, uint64_t mask, bool patch)
540
nir_foreach_variable_with_modes(var, s, modes) {
541
unsigned slot = var->data.location;
542
bool is_generic_patch = slot >= VARYING_SLOT_PATCH0;
543
if (patch ^ is_generic_patch)
545
if (is_generic_patch)
546
slot -= VARYING_SLOT_PATCH0;
547
uint64_t slot_bit = BITFIELD64_BIT(slot);
549
if (!(mask & slot_bit))
552
const struct glsl_type *type = var->type;
553
if ((s->info.stage == MESA_SHADER_GEOMETRY ||
554
s->info.stage == MESA_SHADER_TESS_CTRL) &&
555
(modes & nir_var_shader_in) &&
556
glsl_type_is_array(type))
557
type = glsl_get_array_element(type);
558
info->slots[slot].types[var->data.location_frac] = type;
560
info->slots[slot].patch = var->data.patch;
561
auto& var_slot = info->slots[slot].vars[var->data.location_frac];
562
var_slot.driver_location = var->data.driver_location;
563
var_slot.interpolation = var->data.interpolation;
564
var_slot.compact = var->data.compact;
565
info->mask |= slot_bit;
566
info->slots[slot].location_frac_mask |= (1 << var->data.location_frac);
571
fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
573
if (!fs || !fs->current)
576
nir_foreach_variable_with_modes(input, fs->current->nir,
578
if (input->data.interpolation == INTERP_MODE_FLAT)
579
key->flat_varyings |= BITFIELD64_BIT(input->data.location);
584
validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
586
struct d3d12_context *ctx = sel_ctx->ctx;
587
d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
588
d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
589
struct d3d12_gs_variant_key key = {0};
590
bool variant_needed = false;
592
d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
594
/* Nothing to do if there is a user geometry shader bound */
595
if (gs != NULL && !gs->is_variant)
598
/* Fill the geometry shader variant key */
599
if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
600
key.fill_mode = sel_ctx->fill_mode_lowered;
601
key.cull_mode = sel_ctx->cull_mode_lowered;
602
key.has_front_face = BITSET_TEST(fs->initial->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
603
if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
604
key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
605
key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
606
fill_flat_varyings(&key, fs);
607
if (key.flat_varyings != 0)
608
key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
609
variant_needed = true;
610
} else if (sel_ctx->needs_point_sprite_lowering) {
611
key.passthrough = true;
612
variant_needed = true;
613
} else if (sel_ctx->needs_vertex_reordering) {
614
/* TODO support cases where flat shading (pv != 0) and xfb are enabled */
615
key.provoking_vertex = sel_ctx->provoking_vertex;
616
key.alternate_tri = sel_ctx->alternate_tri;
617
variant_needed = true;
620
if (variant_needed) {
621
fill_varyings(&key.varyings, vs->initial, nir_var_shader_out,
622
vs->initial->info.outputs_written, false);
625
/* Check if the currently bound geometry shader variant is correct */
626
if (gs && memcmp(&gs->gs_key, &key, sizeof(key)) == 0)
629
/* Find/create the proper variant and bind it */
630
gs = variant_needed ? d3d12_get_gs_variant(ctx, &key) : NULL;
631
ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
635
validate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx)
637
struct d3d12_context *ctx = sel_ctx->ctx;
638
d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
639
d3d12_shader_selector *tcs = ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
640
d3d12_shader_selector *tes = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
641
struct d3d12_tcs_variant_key key = {0};
643
/* Nothing to do if there is a user tess ctrl shader bound */
644
if (tcs != NULL && !tcs->is_variant)
647
bool variant_needed = tes != nullptr;
649
/* Fill the variant key */
650
if (variant_needed) {
651
fill_varyings(&key.varyings, vs->initial, nir_var_shader_out,
652
vs->initial->info.outputs_written, false);
653
key.vertices_out = ctx->patch_vertices;
656
/* Check if the currently bound tessellation control shader variant is correct */
657
if (tcs && memcmp(&tcs->tcs_key, &key, sizeof(key)) == 0)
660
/* Find/create the proper variant and bind it */
661
tcs = variant_needed ? d3d12_get_tcs_variant(ctx, &key) : NULL;
662
ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs;
666
d3d12_compare_shader_keys(const d3d12_shader_key *expect, const d3d12_shader_key *have)
668
assert(expect->stage == have->stage);
672
/* Because we only add varyings we check that a shader has at least the expected in-
674
if (memcmp(&expect->required_varying_inputs, &have->required_varying_inputs,
675
sizeof(struct d3d12_varying_info)) ||
676
memcmp(&expect->required_varying_outputs, &have->required_varying_outputs,
677
sizeof(struct d3d12_varying_info)) ||
678
(expect->next_varying_inputs != have->next_varying_inputs) ||
679
(expect->prev_varying_outputs != have->prev_varying_outputs))
682
if (expect->stage == PIPE_SHADER_GEOMETRY) {
683
if (expect->gs.writes_psize) {
684
if (!have->gs.writes_psize ||
685
expect->gs.point_pos_stream_out != have->gs.point_pos_stream_out ||
686
expect->gs.sprite_coord_enable != have->gs.sprite_coord_enable ||
687
expect->gs.sprite_origin_upper_left != have->gs.sprite_origin_upper_left ||
688
expect->gs.point_size_per_vertex != have->gs.point_size_per_vertex)
690
} else if (have->gs.writes_psize) {
693
if (expect->gs.primitive_id != have->gs.primitive_id ||
694
expect->gs.triangle_strip != have->gs.triangle_strip)
696
} else if (expect->stage == PIPE_SHADER_FRAGMENT) {
697
if (expect->fs.frag_result_color_lowering != have->fs.frag_result_color_lowering ||
698
expect->fs.manual_depth_range != have->fs.manual_depth_range ||
699
expect->fs.polygon_stipple != have->fs.polygon_stipple ||
700
expect->fs.cast_to_uint != have->fs.cast_to_uint ||
701
expect->fs.cast_to_int != have->fs.cast_to_int ||
702
expect->fs.remap_front_facing != have->fs.remap_front_facing ||
703
expect->fs.missing_dual_src_outputs != have->fs.missing_dual_src_outputs ||
704
expect->fs.multisample_disabled != have->fs.multisample_disabled)
706
} else if (expect->stage == PIPE_SHADER_COMPUTE) {
707
if (memcmp(expect->cs.workgroup_size, have->cs.workgroup_size,
708
sizeof(have->cs.workgroup_size)))
710
} else if (expect->stage == PIPE_SHADER_TESS_CTRL) {
711
if (expect->hs.primitive_mode != have->hs.primitive_mode ||
712
expect->hs.ccw != have->hs.ccw ||
713
expect->hs.point_mode != have->hs.point_mode ||
714
expect->hs.spacing != have->hs.spacing ||
715
expect->hs.patch_vertices_in != have->hs.patch_vertices_in ||
716
memcmp(&expect->hs.required_patch_outputs, &have->hs.required_patch_outputs,
717
sizeof(struct d3d12_varying_info)) ||
718
expect->hs.next_patch_inputs != have->hs.next_patch_inputs)
720
} else if (expect->stage == PIPE_SHADER_TESS_EVAL) {
721
if (expect->ds.tcs_vertices_out != have->ds.tcs_vertices_out ||
722
memcmp(&expect->ds.required_patch_inputs, &have->ds.required_patch_inputs,
723
sizeof(struct d3d12_varying_info)) ||
724
expect->ds.prev_patch_outputs != have ->ds.prev_patch_outputs)
728
if (expect->input_clip_size != have->input_clip_size)
731
if (expect->tex_saturate_s != have->tex_saturate_s ||
732
expect->tex_saturate_r != have->tex_saturate_r ||
733
expect->tex_saturate_t != have->tex_saturate_t)
736
if (expect->samples_int_textures != have->samples_int_textures)
739
if (expect->n_texture_states != have->n_texture_states)
742
if (expect->n_images != have->n_images)
745
if (memcmp(expect->tex_wrap_states, have->tex_wrap_states,
746
expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
749
if (memcmp(expect->swizzle_state, have->swizzle_state,
750
expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
753
if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
754
expect->n_texture_states * sizeof(enum compare_func)))
757
if (memcmp(expect->image_format_conversion, have->image_format_conversion,
758
expect->n_images * sizeof(struct d3d12_image_format_conversion_info)))
761
if (expect->invert_depth != have->invert_depth)
764
if (expect->stage == PIPE_SHADER_VERTEX) {
765
if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
768
if (expect->vs.needs_format_emulation) {
769
if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
770
PIPE_MAX_ATTRIBS * sizeof (enum pipe_format)))
775
if (expect->fs.provoking_vertex != have->fs.provoking_vertex)
782
d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
783
d3d12_shader_key *key, d3d12_shader_selector *sel,
784
d3d12_shader_selector *prev, d3d12_shader_selector *next)
786
pipe_shader_type stage = sel->stage;
788
uint64_t system_generated_in_values =
790
VARYING_BIT_PRIMITIVE_ID;
792
uint64_t system_out_values =
793
VARYING_BIT_CLIP_DIST0 |
794
VARYING_BIT_CLIP_DIST1;
796
memset(key, 0, sizeof(d3d12_shader_key));
800
/* We require as inputs what the previous stage has written,
801
* except certain system values */
802
if (stage == PIPE_SHADER_FRAGMENT || stage == PIPE_SHADER_GEOMETRY)
803
system_out_values |= VARYING_BIT_POS;
804
if (stage == PIPE_SHADER_FRAGMENT)
805
system_out_values |= VARYING_BIT_PSIZ | VARYING_BIT_VIEWPORT;
806
uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values;
807
fill_varyings(&key->required_varying_inputs, prev->current->nir,
808
nir_var_shader_out, mask, false);
809
key->prev_varying_outputs = prev->current->nir->info.outputs_written;
811
if (stage == PIPE_SHADER_TESS_EVAL) {
812
uint32_t patch_mask = prev->current->nir->info.patch_outputs_written;
813
fill_varyings(&key->ds.required_patch_inputs, prev->current->nir,
814
nir_var_shader_out, patch_mask, true);
815
key->ds.prev_patch_outputs = patch_mask;
818
/* Set the provoking vertex based on the previous shader output. Only set the
819
* key value if the driver actually supports changing the provoking vertex though */
820
if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
821
!sel_ctx->needs_vertex_reordering &&
822
d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
823
key->fs.provoking_vertex = sel_ctx->provoking_vertex;
825
/* Get the input clip distance size. The info's clip_distance_array_size corresponds
826
* to the output, and in cases of TES or GS you could have differently-sized inputs
827
* and outputs. For FS, there is no output, so it's repurposed to mean input.
829
if (stage != PIPE_SHADER_FRAGMENT)
830
key->input_clip_size = prev->current->nir->info.clip_distance_array_size;
833
/* We require as outputs what the next stage reads,
834
* except certain system values */
836
if (!next->is_variant) {
837
if (stage == PIPE_SHADER_VERTEX)
838
system_generated_in_values |= VARYING_BIT_POS;
839
uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values;
840
fill_varyings(&key->required_varying_outputs, next->current->nir,
841
nir_var_shader_in, mask, false);
843
if (stage == PIPE_SHADER_TESS_CTRL) {
844
uint32_t patch_mask = next->current->nir->info.patch_outputs_read;
845
fill_varyings(&key->hs.required_patch_outputs, prev->current->nir,
846
nir_var_shader_in, patch_mask, true);
847
key->hs.next_patch_inputs = patch_mask;
850
key->next_varying_inputs = next->current->nir->info.inputs_read;
854
if (stage == PIPE_SHADER_GEOMETRY ||
855
((stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_TESS_EVAL) &&
856
(!next || next->stage == PIPE_SHADER_FRAGMENT))) {
857
key->last_vertex_processing_stage = 1;
858
key->invert_depth = sel_ctx->ctx->reverse_depth_range;
859
if (sel_ctx->ctx->pstipple.enabled)
860
key->next_varying_inputs |= VARYING_BIT_POS;
863
if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
864
struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
865
if (sel_ctx->needs_point_sprite_lowering) {
866
key->gs.writes_psize = 1;
867
key->gs.point_size_per_vertex = rast->point_size_per_vertex;
868
key->gs.sprite_coord_enable = rast->sprite_coord_enable;
869
key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
870
if (sel_ctx->ctx->flip_y < 0)
871
key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
872
key->gs.aa_point = rast->point_smooth;
873
key->gs.stream_output_factor = 6;
874
} else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
875
key->gs.stream_output_factor = 2;
876
} else if (sel_ctx->needs_vertex_reordering && !sel->is_variant) {
877
key->gs.triangle_strip = 1;
880
if (sel->is_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
881
key->gs.primitive_id = 1;
882
} else if (stage == PIPE_SHADER_FRAGMENT) {
883
key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
884
key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
885
key->fs.manual_depth_range = sel_ctx->manual_depth_range;
886
key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled;
887
key->fs.multisample_disabled = sel_ctx->ctx->gfx_pipeline_state.rast &&
888
!sel_ctx->ctx->gfx_pipeline_state.rast->desc.MultisampleEnable;
889
if (sel_ctx->ctx->gfx_pipeline_state.blend &&
890
sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
891
!sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
892
key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
893
key->fs.cast_to_int = !key->fs.cast_to_uint;
895
} else if (stage == PIPE_SHADER_TESS_CTRL) {
896
if (next && next->current->nir->info.stage == MESA_SHADER_TESS_EVAL) {
897
key->hs.primitive_mode = next->current->nir->info.tess._primitive_mode;
898
key->hs.ccw = next->current->nir->info.tess.ccw;
899
key->hs.point_mode = next->current->nir->info.tess.point_mode;
900
key->hs.spacing = next->current->nir->info.tess.spacing;
902
key->hs.primitive_mode = TESS_PRIMITIVE_QUADS;
904
key->hs.point_mode = false;
905
key->hs.spacing = TESS_SPACING_EQUAL;
907
key->hs.patch_vertices_in = MAX2(sel_ctx->ctx->patch_vertices, 1);
908
} else if (stage == PIPE_SHADER_TESS_EVAL) {
909
if (prev && prev->current->nir->info.stage == MESA_SHADER_TESS_CTRL)
910
key->ds.tcs_vertices_out = prev->current->nir->info.tess.tcs_vertices_out;
912
key->ds.tcs_vertices_out = 32;
915
if (sel->samples_int_textures) {
916
key->samples_int_textures = sel->samples_int_textures;
917
key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
918
/* Copy only states with integer textures */
919
for(int i = 0; i < key->n_texture_states; ++i) {
920
auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
921
if (wrap_state.is_int_sampler) {
922
memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
923
key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
928
for (unsigned i = 0; i < sel_ctx->ctx->num_samplers[stage]; ++i) {
929
if (!sel_ctx->ctx->samplers[stage][i] ||
930
sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
933
if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
934
key->tex_saturate_r |= 1 << i;
935
if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
936
key->tex_saturate_s |= 1 << i;
937
if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
938
key->tex_saturate_t |= 1 << i;
941
if (sel->compare_with_lod_bias_grad) {
942
key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
943
memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
944
key->n_texture_states * sizeof(enum compare_func));
945
memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
946
key->n_texture_states * sizeof(dxil_texture_swizzle_state));
949
if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
950
key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
951
if (key->vs.needs_format_emulation) {
952
memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
953
sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format));
957
if (stage == PIPE_SHADER_FRAGMENT &&
958
sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
959
sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant &&
960
sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
961
key->fs.remap_front_facing = 1;
964
if (stage == PIPE_SHADER_COMPUTE && sel_ctx->variable_workgroup_size) {
965
memcpy(key->cs.workgroup_size, sel_ctx->variable_workgroup_size, sizeof(key->cs.workgroup_size));
968
key->n_images = sel_ctx->ctx->num_image_views[stage];
969
for (int i = 0; i < key->n_images; ++i) {
970
key->image_format_conversion[i].emulated_format = sel_ctx->ctx->image_view_emulation_formats[stage][i];
971
if (key->image_format_conversion[i].emulated_format != PIPE_FORMAT_NONE)
972
key->image_format_conversion[i].view_format = sel_ctx->ctx->image_views[stage][i].format;
977
select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
978
d3d12_shader_selector *prev, d3d12_shader_selector *next)
980
struct d3d12_context *ctx = sel_ctx->ctx;
981
d3d12_shader_key key;
982
nir_shader *new_nir_variant;
983
unsigned pstipple_binding = UINT32_MAX;
985
d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
987
/* Check for an existing variant */
988
for (d3d12_shader *variant = sel->first; variant;
989
variant = variant->next_variant) {
991
if (d3d12_compare_shader_keys(&key, &variant->key)) {
992
sel->current = variant;
997
/* Clone the NIR shader */
998
new_nir_variant = nir_shader_clone(sel, sel->initial);
1000
/* Apply any needed lowering passes */
1001
if (key.gs.writes_psize) {
1002
NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
1003
!key.gs.sprite_origin_upper_left,
1004
key.gs.point_size_per_vertex,
1005
key.gs.sprite_coord_enable,
1006
key.next_varying_inputs);
1008
nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1009
nir_shader_gather_info(new_nir_variant, impl);
1012
if (key.gs.primitive_id) {
1013
NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
1015
nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1016
nir_shader_gather_info(new_nir_variant, impl);
1019
if (key.gs.triangle_strip)
1020
NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
1022
if (key.fs.polygon_stipple) {
1023
NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
1024
&pstipple_binding, 0, false);
1026
nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1027
nir_shader_gather_info(new_nir_variant, impl);
1030
if (key.fs.remap_front_facing) {
1031
d3d12_forward_front_face(new_nir_variant);
1033
nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1034
nir_shader_gather_info(new_nir_variant, impl);
1037
if (key.fs.missing_dual_src_outputs) {
1038
NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
1039
key.fs.missing_dual_src_outputs);
1040
} else if (key.fs.frag_result_color_lowering) {
1041
NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
1042
key.fs.frag_result_color_lowering);
1045
if (key.fs.manual_depth_range)
1046
NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
1048
if (sel->compare_with_lod_bias_grad) {
1049
STATIC_ASSERT(sizeof(dxil_texture_swizzle_state) ==
1050
sizeof(nir_lower_tex_shadow_swizzle));
1052
NIR_PASS_V(new_nir_variant, nir_lower_tex_shadow, key.n_texture_states,
1053
key.sampler_compare_funcs, (nir_lower_tex_shadow_swizzle *)key.swizzle_state);
1056
if (key.fs.cast_to_uint)
1057
NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
1058
if (key.fs.cast_to_int)
1059
NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
1062
NIR_PASS_V(new_nir_variant, d3d12_lower_image_casts, key.image_format_conversion);
1064
if (sel->workgroup_size_variable) {
1065
new_nir_variant->info.workgroup_size[0] = key.cs.workgroup_size[0];
1066
new_nir_variant->info.workgroup_size[1] = key.cs.workgroup_size[1];
1067
new_nir_variant->info.workgroup_size[2] = key.cs.workgroup_size[2];
1070
if (new_nir_variant->info.stage == MESA_SHADER_TESS_CTRL) {
1071
new_nir_variant->info.tess._primitive_mode = (tess_primitive_mode)key.hs.primitive_mode;
1072
new_nir_variant->info.tess.ccw = key.hs.ccw;
1073
new_nir_variant->info.tess.point_mode = key.hs.point_mode;
1074
new_nir_variant->info.tess.spacing = key.hs.spacing;
1076
NIR_PASS_V(new_nir_variant, dxil_nir_set_tcs_patches_in, key.hs.patch_vertices_in);
1077
} else if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL) {
1078
new_nir_variant->info.tess.tcs_vertices_out = key.ds.tcs_vertices_out;
1082
struct nir_lower_tex_options tex_options = { };
1083
tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
1084
tex_options.lower_rect = true;
1085
tex_options.lower_rect_offset = true;
1086
tex_options.saturate_s = key.tex_saturate_s;
1087
tex_options.saturate_r = key.tex_saturate_r;
1088
tex_options.saturate_t = key.tex_saturate_t;
1090
NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
1093
/* Add the needed in and outputs, and re-sort */
1095
uint64_t mask = key.required_varying_inputs.mask & ~new_nir_variant->info.inputs_read;
1096
new_nir_variant->info.inputs_read |= mask;
1098
int slot = u_bit_scan64(&mask);
1099
create_varyings_from_info(new_nir_variant, &key.required_varying_inputs, slot, nir_var_shader_in, false);
1102
if (sel->stage == PIPE_SHADER_TESS_EVAL) {
1103
uint32_t patch_mask = (uint32_t)key.ds.required_patch_inputs.mask & ~new_nir_variant->info.patch_inputs_read;
1104
new_nir_variant->info.patch_inputs_read |= patch_mask;
1105
while (patch_mask) {
1106
int slot = u_bit_scan(&patch_mask);
1107
create_varyings_from_info(new_nir_variant, &key.ds.required_patch_inputs, slot, nir_var_shader_in, true);
1110
dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in,
1111
key.prev_varying_outputs);
1116
uint64_t mask = key.required_varying_outputs.mask & ~new_nir_variant->info.outputs_written;
1117
new_nir_variant->info.outputs_written |= mask;
1119
int slot = u_bit_scan64(&mask);
1120
create_varyings_from_info(new_nir_variant, &key.required_varying_outputs, slot, nir_var_shader_out, false);
1123
if (sel->stage == PIPE_SHADER_TESS_CTRL) {
1124
uint32_t patch_mask = (uint32_t)key.hs.required_patch_outputs.mask & ~new_nir_variant->info.patch_outputs_written;
1125
new_nir_variant->info.patch_outputs_written |= patch_mask;
1126
while (patch_mask) {
1127
int slot = u_bit_scan(&patch_mask);
1128
create_varyings_from_info(new_nir_variant, &key.ds.required_patch_inputs, slot, nir_var_shader_out, true);
1131
dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out,
1132
key.next_varying_inputs);
1135
d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
1136
assert(new_variant);
1138
/* keep track of polygon stipple texture binding */
1139
new_variant->pstipple_binding = pstipple_binding;
1141
/* prepend the new shader in the selector chain and pick it */
1142
new_variant->next_variant = sel->first;
1143
sel->current = sel->first = new_variant;
1146
static d3d12_shader_selector *
1147
get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
1150
case PIPE_SHADER_VERTEX:
1152
case PIPE_SHADER_FRAGMENT:
1153
if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1154
return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1156
case PIPE_SHADER_GEOMETRY:
1157
if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1158
return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1160
case PIPE_SHADER_TESS_EVAL:
1161
if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1162
return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1164
case PIPE_SHADER_TESS_CTRL:
1165
return ctx->gfx_stages[PIPE_SHADER_VERTEX];
1167
unreachable("shader type not supported");
1171
static d3d12_shader_selector *
1172
get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
1175
case PIPE_SHADER_VERTEX:
1176
if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1177
return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1179
case PIPE_SHADER_TESS_CTRL:
1180
if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1181
return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1183
case PIPE_SHADER_TESS_EVAL:
1184
if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1185
return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1187
case PIPE_SHADER_GEOMETRY:
1188
return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
1189
case PIPE_SHADER_FRAGMENT:
1192
unreachable("shader type not supported");
1196
enum tex_scan_flags {
1197
TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
1198
TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
1199
TEX_SCAN_ALL_FLAGS = (1 << 2) - 1
1203
scan_texture_use(nir_shader *nir)
1205
unsigned result = 0;
1206
nir_foreach_function(func, nir) {
1207
nir_foreach_block(block, func->impl) {
1208
nir_foreach_instr(instr, block) {
1209
if (instr->type == nir_instr_type_tex) {
1210
auto tex = nir_instr_as_tex(instr);
1216
result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
1219
if (tex->dest_type & (nir_type_int | nir_type_uint))
1220
result |= TEX_SAMPLE_INTEGER_TEXTURE;
1225
if (TEX_SCAN_ALL_FLAGS == result)
1234
update_so_info(struct pipe_stream_output_info *so_info,
1235
uint64_t outputs_written)
1237
uint64_t so_outputs = 0;
1238
uint8_t reverse_map[64] = {0};
1241
while (outputs_written)
1242
reverse_map[slot++] = u_bit_scan64(&outputs_written);
1244
for (unsigned i = 0; i < so_info->num_outputs; i++) {
1245
struct pipe_stream_output *output = &so_info->output[i];
1247
/* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1248
output->register_index = reverse_map[output->register_index];
1250
so_outputs |= 1ull << output->register_index;
1256
static struct d3d12_shader_selector *
1257
d3d12_create_shader_impl(struct d3d12_context *ctx,
1258
struct d3d12_shader_selector *sel,
1259
struct nir_shader *nir,
1260
struct d3d12_shader_selector *prev,
1261
struct d3d12_shader_selector *next)
1263
unsigned tex_scan_result = scan_texture_use(nir);
1264
sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1265
sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1266
sel->workgroup_size_variable = nir->info.workgroup_size_variable;
1268
/* Integer cube maps are not supported in DirectX because sampling is not supported
1269
* on integer textures and TextureLoad is not supported for cube maps, so we have to
1270
* lower integer cube maps to be handled like 2D textures arrays*/
1271
NIR_PASS_V(nir, d3d12_lower_int_cubmap_to_array);
1273
/* Keep this initial shader as the blue print for possible variants */
1277
* We must compile some shader here, because if the previous or a next shaders exists later
1278
* when the shaders are bound, then the key evaluation in the shader selector will access
1279
* the current variant of these prev and next shader, and we can only assign
1280
* a current variant when it has been successfully compiled.
1282
* For shaders that require lowering because certain instructions are not available
1283
* and their emulation is state depended (like sampling an integer texture that must be
1284
* emulated and needs handling of boundary conditions, or shadow compare sampling with LOD),
1285
* we must go through the shader selector here to create a compilable variant.
1286
* For shaders that are not depended on the state this is just compiling the original
1289
* TODO: get rid of having to compiling the shader here if it can be forseen that it will
1290
* be thrown away (i.e. it depends on states that are likely to change before the shader is
1291
* used for the first time)
1293
struct d3d12_selection_context sel_ctx = {0};
1295
select_shader_variant(&sel_ctx, sel, prev, next);
1297
if (!sel->current) {
1305
struct d3d12_shader_selector *
1306
d3d12_create_shader(struct d3d12_context *ctx,
1307
pipe_shader_type stage,
1308
const struct pipe_shader_state *shader)
1310
struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1313
struct nir_shader *nir = NULL;
1315
if (shader->type == PIPE_SHADER_IR_NIR) {
1316
nir = (nir_shader *)shader->ir.nir;
1318
assert(shader->type == PIPE_SHADER_IR_TGSI);
1319
nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1322
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1323
memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1324
update_so_info(&sel->so_info, nir->info.outputs_written);
1326
assert(nir != NULL);
1327
d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1328
d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1330
uint64_t in_mask = nir->info.stage == MESA_SHADER_VERTEX ?
1331
0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
1333
uint64_t out_mask = nir->info.stage == MESA_SHADER_FRAGMENT ?
1334
(1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK) :
1335
(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
1337
d3d12_fix_io_uint_type(nir, in_mask, out_mask);
1338
NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1339
NIR_PASS_V(nir, d3d12_split_multistream_varyings);
1341
if (nir->info.stage != MESA_SHADER_VERTEX)
1342
nir->info.inputs_read =
1343
dxil_reassign_driver_locations(nir, nir_var_shader_in,
1344
prev ? prev->current->nir->info.outputs_written : 0);
1346
nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in);
1348
if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1349
nir->info.outputs_written =
1350
dxil_reassign_driver_locations(nir, nir_var_shader_out,
1351
next ? next->current->nir->info.inputs_read : 0);
1353
NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1354
NIR_PASS_V(nir, d3d12_lower_sample_pos);
1355
dxil_sort_ps_outputs(nir);
1358
return d3d12_create_shader_impl(ctx, sel, nir, prev, next);
1361
struct d3d12_shader_selector *
1362
d3d12_create_compute_shader(struct d3d12_context *ctx,
1363
const struct pipe_compute_state *shader)
1365
struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1366
sel->stage = PIPE_SHADER_COMPUTE;
1368
struct nir_shader *nir = NULL;
1370
if (shader->ir_type == PIPE_SHADER_IR_NIR) {
1371
nir = (nir_shader *)shader->prog;
1373
assert(shader->ir_type == PIPE_SHADER_IR_TGSI);
1374
nir = tgsi_to_nir(shader->prog, ctx->base.screen, false);
1377
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1379
NIR_PASS_V(nir, d3d12_lower_compute_state_vars);
1381
return d3d12_create_shader_impl(ctx, sel, nir, nullptr, nullptr);
1385
d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1387
static unsigned order[] = {
1389
PIPE_SHADER_TESS_CTRL,
1390
PIPE_SHADER_TESS_EVAL,
1391
PIPE_SHADER_GEOMETRY,
1392
PIPE_SHADER_FRAGMENT
1394
struct d3d12_selection_context sel_ctx;
1397
sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1398
sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1399
sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1400
sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri, dinfo);
1401
sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx, dinfo);
1402
sel_ctx.missing_dual_src_outputs = missing_dual_src_outputs(ctx);
1403
sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1404
sel_ctx.manual_depth_range = manual_depth_range(ctx);
1406
validate_geometry_shader_variant(&sel_ctx);
1407
validate_tess_ctrl_shader_variant(&sel_ctx);
1409
for (unsigned i = 0; i < ARRAY_SIZE(order); ++i) {
1410
auto sel = ctx->gfx_stages[order[i]];
1414
d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1415
d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1417
select_shader_variant(&sel_ctx, sel, prev, next);
1421
static const unsigned *
1422
workgroup_size_variable(struct d3d12_context *ctx,
1423
const struct pipe_grid_info *info)
1425
if (ctx->compute_state->workgroup_size_variable)
1431
d3d12_select_compute_shader_variants(struct d3d12_context *ctx, const struct pipe_grid_info *info)
1433
struct d3d12_selection_context sel_ctx = {};
1436
sel_ctx.variable_workgroup_size = workgroup_size_variable(ctx, info);
1438
select_shader_variant(&sel_ctx, ctx->compute_state, nullptr, nullptr);
1442
d3d12_shader_free(struct d3d12_shader_selector *sel)
1444
auto shader = sel->first;
1446
free(shader->bytecode);
1447
shader = shader->next_variant;
1449
ralloc_free(sel->initial);