38
38
#include "program/prog_parameter.h"
39
39
#include "program/prog_print.h"
40
40
#include "program/prog_to_nir.h"
41
#include "program/programopt.h"
43
42
#include "compiler/glsl/gl_nir.h"
44
43
#include "compiler/glsl/gl_nir_linker.h"
50
49
#include "pipe/p_defines.h"
51
50
#include "pipe/p_shader_tokens.h"
52
51
#include "draw/draw_context.h"
53
#include "tgsi/tgsi_dump.h"
54
#include "tgsi/tgsi_parse.h"
55
#include "tgsi/tgsi_ureg.h"
56
#include "nir/nir_to_tgsi.h"
58
53
#include "util/u_memory.h"
325
320
p->variants = NULL;
327
if (p->state.tokens) {
328
ureg_free_tokens(p->state.tokens);
329
p->state.tokens = NULL;
332
322
/* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
333
323
* it has resulted in the driver taking ownership of the NIR. Those
334
324
* callers should be NULLing out the nir field in any pipe_shader_state
379
369
struct pipe_screen *screen = st->screen;
381
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
382
nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
371
NIR_PASS_V(nir, nir_lower_reg_intrinsics_to_ssa);
372
nir_validate_shader(nir, "after st/ptn lower_reg_intrinsics_to_ssa");
384
374
/* Lower outputs to temporaries to avoid reading from output variables (which
385
375
* is permitted by the language but generally not implemented in HW).
459
447
/* Determine the (default) output register mapping for each output. */
460
448
unsigned num_outputs = 0;
461
ubyte output_mapping[VARYING_SLOT_TESS_MAX];
449
uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
462
450
memset(output_mapping, 0, sizeof(output_mapping));
464
452
for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
505
493
st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state)
507
495
struct pipe_context *pipe = st->pipe;
508
struct pipe_screen *screen = st->screen;
510
497
assert(state->type == PIPE_SHADER_IR_NIR);
511
498
nir_shader *nir = state->ir.nir;
512
499
struct shader_info info = nir->info;
513
500
gl_shader_stage stage = nir->info.stage;
514
enum pipe_shader_type sh = pipe_shader_type_from_mesa(stage);
516
502
if (ST_DEBUG & DEBUG_PRINT_IR) {
517
503
fprintf(stderr, "NIR before handing off to driver:\n");
518
504
nir_print_shader(nir, stderr);
521
if (PIPE_SHADER_IR_NIR !=
522
screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_PREFERRED_IR)) {
523
/* u_screen.c defaults to images as deref enabled for some reason (which
524
* is what radeonsi wants), but nir-to-tgsi requires lowered images.
526
if (screen->get_param(screen, PIPE_CAP_NIR_IMAGES_AS_DEREF))
527
NIR_PASS_V(nir, gl_nir_lower_images, false);
529
state->type = PIPE_SHADER_IR_TGSI;
530
state->tokens = nir_to_tgsi(nir, screen);
532
if (ST_DEBUG & DEBUG_PRINT_IR) {
533
fprintf(stderr, "TGSI for driver after nir-to-tgsi:\n");
534
tgsi_dump(state->tokens, 0);
535
fprintf(stderr, "\n");
539
507
struct pipe_shader_state *shader;
541
509
case MESA_SHADER_VERTEX:
584
549
st_translate_vertex_program(struct st_context *st,
585
550
struct gl_program *prog)
588
if (prog->arb.IsPositionInvariant)
589
_mesa_insert_mvp_code(st->ctx, prog);
591
552
/* This determines which states will be updated when the assembly
592
553
* shader is bound.
609
570
prog->state.type = PIPE_SHADER_IR_NIR;
610
prog->nir = st_translate_prog_to_nir(st, prog,
571
if (prog->arb.Instructions)
572
prog->nir = st_translate_prog_to_nir(st, prog,
574
st_prog_to_nir_postprocess(st, prog->nir, prog);
612
575
prog->info = prog->nir->info;
614
577
st_prepare_vertex_program(prog);
838
803
st_translate_fragment_program(struct st_context *st,
839
struct gl_program *fp)
804
struct gl_program *prog)
841
806
/* This determines which states will be updated when the assembly
842
807
* shader is bound.
844
809
* fragment.position and glDrawPixels always use constants.
846
fp->affected_states = ST_NEW_FS_STATE |
811
prog->affected_states = ST_NEW_FS_STATE |
847
812
ST_NEW_SAMPLE_SHADING |
848
813
ST_NEW_FS_CONSTANTS;
851
816
/* Just set them for ATI_fs unconditionally. */
852
fp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
817
prog->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
856
if (fp->SamplersUsed)
857
fp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
861
/* Translate to NIR. ATI_fs translates at variant time. */
864
st_translate_prog_to_nir(st, fp, MESA_SHADER_FRAGMENT);
867
ralloc_free(fp->nir);
868
if (fp->serialized_nir) {
869
free(fp->serialized_nir);
870
fp->serialized_nir = NULL;
872
fp->state.type = PIPE_SHADER_IR_NIR;
821
if (prog->SamplersUsed)
822
prog->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
826
/* Translate to NIR. */
827
if (prog->nir && prog->arb.Instructions)
828
ralloc_free(prog->nir);
830
if (prog->serialized_nir) {
831
free(prog->serialized_nir);
832
prog->serialized_nir = NULL;
835
prog->state.type = PIPE_SHADER_IR_NIR;
836
if (prog->arb.Instructions) {
837
prog->nir = st_translate_prog_to_nir(st, prog,
838
MESA_SHADER_FRAGMENT);
839
} else if (prog->ati_fs) {
840
const struct nir_shader_compiler_options *options =
841
st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT);
844
prog->nir = st_translate_atifs_program(prog->ati_fs, prog, options);
846
st_prog_to_nir_postprocess(st, prog->nir, prog);
848
prog->info = prog->nir->info;
850
/* ATI_fs will lower fixed function fog at variant time, after the FF vertex
851
* prog has been generated. So we have to always declare a read of FOGC so
852
* that FF vp feeds it to us just in case.
854
prog->info.inputs_read |= VARYING_BIT_FOGC;
899
882
/* Translate ATI_fs to NIR at variant time because that's when we have the
903
const struct nir_shader_compiler_options *options =
904
st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT);
906
nir_shader *s = st_translate_atifs_program(fp->ati_fs, key, fp, options);
908
st_prog_to_nir_postprocess(st, s, fp);
912
state.ir.nir = get_nir_shader(st, fp);
885
state.ir.nir = get_nir_shader(st, fp);
914
886
state.type = PIPE_SHADER_IR_NIR;
916
888
bool finalize = false;
892
NIR_PASS_V(state.ir.nir, st_nir_lower_fog, key->fog, fp->Parameters);
893
NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
894
nir_shader_get_entrypoint(state.ir.nir),
896
nir_lower_global_vars_to_local(state.ir.nir);
899
NIR_PASS_V(state.ir.nir, st_nir_lower_atifs_samplers, key->texture_index);
918
904
if (key->clamp_color) {
919
905
NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1014
1000
bool need_lower_tex_src_plane = false;
1016
if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1002
if (unlikely(key->external.lower_nv12 || key->external.lower_nv21 ||
1003
key->external.lower_iyuv ||
1017
1004
key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1005
key->external.lower_yx_xvxu || key->external.lower_xy_vxux ||
1018
1006
key->external.lower_ayuv || key->external.lower_xyuv ||
1019
1007
key->external.lower_yuv || key->external.lower_yu_yv ||
1020
key->external.lower_y41x)) {
1008
key->external.lower_yv_yu || key->external.lower_y41x)) {
1022
1010
st_nir_lower_samplers(st->screen, state.ir.nir,
1023
1011
fp->shader_program, fp);
1025
1013
nir_lower_tex_options options = {0};
1026
1014
options.lower_y_uv_external = key->external.lower_nv12;
1015
options.lower_y_vu_external = key->external.lower_nv21;
1027
1016
options.lower_y_u_v_external = key->external.lower_iyuv;
1028
1017
options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1018
options.lower_xy_vxux_external = key->external.lower_xy_vxux;
1029
1019
options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1020
options.lower_yx_xvxu_external = key->external.lower_yx_xvxu;
1030
1021
options.lower_ayuv_external = key->external.lower_ayuv;
1031
1022
options.lower_xyuv_external = key->external.lower_xyuv;
1032
1023
options.lower_yuv_external = key->external.lower_yuv;
1033
1024
options.lower_yu_yv_external = key->external.lower_yu_yv;
1025
options.lower_yv_yu_external = key->external.lower_yv_yu;
1034
1026
options.lower_y41x_external = key->external.lower_y41x;
1035
1027
options.bt709_external = key->external.bt709;
1036
1028
options.bt2020_external = key->external.bt2020;
1050
1042
if (unlikely(need_lower_tex_src_plane)) {
1051
1043
NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1052
1044
~fp->SamplersUsed,
1053
key->external.lower_nv12 | key->external.lower_xy_uxvx |
1054
key->external.lower_yx_xuxv,
1045
key->external.lower_nv12 | key->external.lower_nv21 |
1046
key->external.lower_xy_uxvx | key->external.lower_xy_vxux |
1047
key->external.lower_yx_xuxv | key->external.lower_yx_xvxu,
1055
1048
key->external.lower_iyuv);
1056
1049
finalize = true;
1052
/* It is undefined behavior when an ARB assembly uses SHADOW2D target
1053
* with a texture in not depth format. In this case NVIDIA automatically
1054
* replaces SHADOW sampler with a normal sampler and some games like
1055
* Penumbra Overture which abuses this UB (issues/8425) works fine but
1056
* breaks with mesa. Replace the shadow sampler with a normal one here
1058
if (!fp->shader_program && ~key->depth_textures & fp->ShadowSamplers) {
1059
NIR_PASS_V(state.ir.nir, nir_remove_tex_shadow,
1060
~key->depth_textures & fp->ShadowSamplers);
1059
1064
if (finalize || !st->allow_st_finalize_nir_twice) {
1060
1065
/* Some of the lowering above may have introduced new varyings */
1061
1066
nir_shader_gather_info(state.ir.nir,
1098
1103
if (fp->variants != NULL) {
1099
1104
_mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
1100
"Compiling fragment shader variant (%s%s%s%s%s%s%s%s%s%s%s%s)",
1105
"Compiling fragment shader variant (%s%s%s%s%s%s%s%s%s%s%s%s%s%d)",
1101
1106
key->bitmap ? "bitmap," : "",
1102
1107
key->drawpixels ? "drawpixels," : "",
1103
1108
key->scaleAndBias ? "scale_bias," : "",
1107
1112
key->fog ? "fog," : "",
1108
1113
key->lower_two_sided_color ? "twoside," : "",
1109
1114
key->lower_flatshade ? "flatshade," : "",
1110
key->lower_alpha_func ? "alpha_compare," : "",
1115
key->lower_alpha_func != COMPARE_FUNC_ALWAYS ? "alpha_compare," : "",
1111
1116
/* skipped ATI_fs targets */
1112
1117
fp->ExternalSamplersUsed ? "external?," : "",
1113
key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "");
1118
key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "",
1119
"depth_textures=", key->depth_textures);
1116
1122
fpv = st_create_fp_variant(st, fp, key);
1229
1235
destroy_shader_program_variants_cb, st);
1233
st_can_add_pointsize_to_program(struct st_context *st, struct gl_program *prog)
1235
nir_shader *nir = prog->nir;
1237
return true; //fixedfunction
1238
assert(nir->info.stage == MESA_SHADER_VERTEX ||
1239
nir->info.stage == MESA_SHADER_TESS_EVAL ||
1240
nir->info.stage == MESA_SHADER_GEOMETRY);
1241
if (nir->info.outputs_written & VARYING_BIT_PSIZ)
1243
unsigned max_components = nir->info.stage == MESA_SHADER_GEOMETRY ?
1244
st->ctx->Const.MaxGeometryTotalOutputComponents :
1245
st->ctx->Const.Program[nir->info.stage].MaxOutputComponents;
1246
unsigned num_components = 0;
1247
unsigned needed_components = nir->info.stage == MESA_SHADER_GEOMETRY ? nir->info.gs.vertices_out : 1;
1248
nir_foreach_shader_out_variable(var, nir) {
1249
num_components += glsl_count_dword_slots(var->type, false);
1251
/* Ensure that there is enough attribute space to emit at least one primitive */
1252
if (nir->info.stage == MESA_SHADER_GEOMETRY) {
1253
if (num_components + needed_components > st->ctx->Const.Program[nir->info.stage].MaxOutputComponents)
1255
num_components *= nir->info.gs.vertices_out;
1258
return num_components + needed_components <= max_components;
1262
1239
* Compile one shader variant.
1300
1277
for (int i = 0; i < ARRAY_SIZE(key.texture_index); i++)
1301
1278
key.texture_index[i] = TEXTURE_2D_INDEX;
1281
/* Shadow samplers require texture in depth format, which we lower to
1282
* non-shadow if necessary for ARB programs
1284
if (!prog->shader_program)
1285
key.depth_textures = prog->ShadowSamplers;
1303
1287
st_get_fp_variant(st, prog, &key);
1384
1370
if (target == GL_FRAGMENT_PROGRAM_ARB ||
1385
1371
target == GL_FRAGMENT_SHADER_ATI) {
1386
if (target == GL_FRAGMENT_SHADER_ATI) {
1387
assert(prog->ati_fs);
1388
assert(prog->ati_fs->Program == prog);
1390
st_init_atifs_prog(ctx, prog);
1393
1372
if (!st_translate_fragment_program(st, prog))
1395
1374
} else if (target == GL_VERTEX_PROGRAM_ARB) {
1396
1375
if (!st_translate_vertex_program(st, prog))
1398
if (st->lower_point_size && st_can_add_pointsize_to_program(st, prog)) {
1377
if (st->lower_point_size &&
1378
gl_nir_can_add_pointsize_to_program(&st->ctx->Const, prog)) {
1399
1379
prog->skip_pointsize_xfb = true;
1400
NIR_PASS_V(prog->nir, st_nir_add_point_size);
1380
NIR_PASS_V(prog->nir, gl_nir_add_point_size);