~mmach/netext73/mesa-ryzen

« back to all changes in this revision

Viewing changes to src/gallium/drivers/zink/zink_compiler.c

  • Committer: mmach
  • Date: 2023-11-02 21:31:35 UTC
  • Revision ID: netbit73@gmail.com-20231102213135-18d4tzh7tj0uz752
2023-11-02 22:11:57

Show diffs side-by-side

added added

removed removed

Lines of Context:
40
40
 
41
41
#include "nir/tgsi_to_nir.h"
42
42
#include "tgsi/tgsi_dump.h"
43
 
#include "tgsi/tgsi_from_mesa.h"
44
43
 
45
44
#include "util/u_memory.h"
46
45
 
237
236
   load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
238
237
   nir_intrinsic_set_range(load, 4);
239
238
   load->num_components = 1;
240
 
   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
 
239
   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32);
241
240
   nir_builder_instr_insert(b, &load->instr);
242
241
 
243
242
   nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
278
277
   load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
279
278
   nir_intrinsic_set_range(load, 4);
280
279
   load->num_components = 1;
281
 
   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
 
280
   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32);
282
281
   nir_builder_instr_insert(b, &load->instr);
283
282
 
284
283
   nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
340
339
   nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
341
340
   w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
342
341
   // halt_w_delta = w_delta / 2
343
 
   nir_ssa_def *half_w_delta = nir_fmul(b, w_delta, nir_imm_float(b, 0.5));
 
342
   nir_ssa_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
344
343
 
345
344
   // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
346
345
   nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
347
346
   h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
348
347
   // halt_h_delta = h_delta / 2
349
 
   nir_ssa_def *half_h_delta = nir_fmul(b, h_delta, nir_imm_float(b, 0.5));
 
348
   nir_ssa_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
350
349
 
351
350
   nir_ssa_def *point_dir[4][2] = {
352
351
      { nir_imm_float(b, -1), nir_imm_float(b, -1) },
381
380
lower_gl_point_gs(nir_shader *shader)
382
381
{
383
382
   struct lower_gl_point_state state;
384
 
   nir_builder b;
385
383
 
386
 
   shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
 
384
   shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
387
385
   shader->info.gs.vertices_out *= 4;
388
386
 
389
387
   // Gets the gl_Position in and out
398
396
   if (!state.gl_pos_out || !state.gl_point_size)
399
397
      return false;
400
398
 
401
 
   nir_function_impl *entry = nir_shader_get_entrypoint(shader);
402
 
   nir_builder_init(&b, entry);
403
 
   b.cursor = nir_before_cf_list(&entry->body);
404
 
 
405
399
   return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
406
400
                                       nir_metadata_dominance, &state);
407
401
}
422
416
                            nir_ssa_def *index)
423
417
{
424
418
   nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset);
425
 
   return nir_imod(b, nir_iadd(b, index, ring_offset),
426
 
                      nir_imm_int(b, state->ring_size));
 
419
   return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
 
420
                          state->ring_size);
427
421
}
428
422
 
429
423
/* Given the final deref of chain of derefs this function will walk up the chain
434
428
static nir_deref_instr*
435
429
replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
436
430
{
437
 
   nir_deref_instr *parent = nir_src_as_deref(old->parent);
 
431
   nir_deref_instr *parent = nir_deref_instr_parent(old);
 
432
   if (!parent)
 
433
      return new;
438
434
   switch(old->deref_type) {
439
435
   case nir_deref_type_var:
440
436
      return new;
619
615
}
620
616
 
621
617
static unsigned int
622
 
lower_pv_mode_vertices_for_prim(enum shader_prim prim)
 
618
lower_pv_mode_vertices_for_prim(enum mesa_prim prim)
623
619
{
624
620
   switch (prim) {
625
 
   case SHADER_PRIM_POINTS:
 
621
   case MESA_PRIM_POINTS:
626
622
      return 1;
627
 
   case SHADER_PRIM_LINE_STRIP:
 
623
   case MESA_PRIM_LINE_STRIP:
628
624
      return 2;
629
 
   case SHADER_PRIM_TRIANGLE_STRIP:
 
625
   case MESA_PRIM_TRIANGLE_STRIP:
630
626
      return 3;
631
627
   default:
632
628
      unreachable("unsupported primitive for gs output");
641
637
   memset(state.varyings, 0, sizeof(state.varyings));
642
638
 
643
639
   nir_function_impl *entry = nir_shader_get_entrypoint(shader);
644
 
   nir_builder_init(&b, entry);
645
 
   b.cursor = nir_before_cf_list(&entry->body);
 
640
   b = nir_builder_at(nir_before_cf_list(&entry->body));
646
641
 
647
642
   state.primitive_vert_count =
648
643
      lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive);
702
697
             nir_ssa_def *scale)
703
698
{
704
699
   nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
705
 
   nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, vert, 0x3),
 
700
   nir_ssa_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
706
701
                                        w_recip);
707
702
   return nir_fmul(b, ndc_point, scale);
708
703
}
794
789
   state.line_rectangular = line_rectangular;
795
790
   // initialize pos_counter and stipple_counter
796
791
   nir_function_impl *entry = nir_shader_get_entrypoint(shader);
797
 
   nir_builder_init(&b, entry);
798
 
   b.cursor = nir_before_cf_list(&entry->body);
 
792
   b = nir_builder_at(nir_before_cf_list(&entry->body));
799
793
   nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
800
794
   nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
801
795
 
808
802
{
809
803
   nir_builder b;
810
804
   nir_function_impl *entry = nir_shader_get_entrypoint(shader);
811
 
   nir_builder_init(&b, entry);
 
805
   b = nir_builder_at(nir_after_cf_list(&entry->body));
812
806
 
813
807
   // create stipple counter
814
808
   nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
829
823
      sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
830
824
   }
831
825
 
832
 
   b.cursor = nir_after_cf_list(&entry->body);
833
 
 
834
826
   nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
835
827
                                                 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
836
828
                                                 .base = 1);
1123
1115
 
1124
1116
   // initialize pos_counter
1125
1117
   nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1126
 
   nir_builder_init(&b, entry);
1127
 
   b.cursor = nir_before_cf_list(&entry->body);
 
1118
   b = nir_builder_at(nir_before_cf_list(&entry->body));
1128
1119
   nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
1129
1120
 
1130
1121
   shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
1131
 
   shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
 
1122
   shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1132
1123
 
1133
1124
   return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
1134
1125
                                       nir_metadata_dominance, &state);
1157
1148
 
1158
1149
      // initialize stipple_pattern
1159
1150
      nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1160
 
      nir_builder_init(&b, entry);
1161
 
      b.cursor = nir_before_cf_list(&entry->body);
 
1151
      b = nir_builder_at(nir_before_cf_list(&entry->body));
1162
1152
      nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
1163
1153
                                                   nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
1164
1154
                                                   .base = 1);
1219
1209
 
1220
1210
nir_shader *
1221
1211
zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1222
 
                               const nir_shader *prev_stage,
1223
 
                               int last_pv_vert_offset)
 
1212
                               const nir_shader *prev_stage)
1224
1213
{
1225
1214
   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1226
1215
                                                  options,
1227
1216
                                                  "filled quad gs");
1228
1217
 
1229
1218
   nir_shader *nir = b.shader;
1230
 
   nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY;
1231
 
   nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
 
1219
   nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
 
1220
   nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1232
1221
   nir->info.gs.vertices_in = 4;
1233
1222
   nir->info.gs.vertices_out = 6;
1234
1223
   nir->info.gs.invocations = 1;
1237
1226
   nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1238
1227
   memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1239
1228
   if (prev_stage->xfb_info) {
1240
 
      nir->xfb_info = mem_dup(prev_stage->xfb_info, sizeof(nir_xfb_info));
 
1229
      nir->xfb_info = mem_dup(prev_stage->xfb_info, nir_xfb_info_size(prev_stage->xfb_info->output_count));
1241
1230
   }
1242
1231
 
1243
1232
   nir_variable *in_vars[VARYING_SLOT_MAX];
1285
1274
 
1286
1275
   int mapping_first[] = {0, 1, 2, 0, 2, 3};
1287
1276
   int mapping_last[] = {0, 1, 3, 1, 2, 3};
1288
 
   nir_ssa_def *last_pv_vert_def = nir_load_ubo(&b, 1, 32,
1289
 
                                                nir_imm_int(&b, 0), nir_imm_int(&b, last_pv_vert_offset),
1290
 
                                                .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
 
1277
   nir_ssa_def *last_pv_vert_def = nir_load_provoking_last(&b);
1291
1278
   last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1292
1279
   for (unsigned i = 0; i < 6; ++i) {
1293
1280
      /* swap indices 2 and 3 */
1313
1300
   return nir;
1314
1301
}
1315
1302
 
 
1303
static bool
 
1304
lower_system_values_to_inlined_uniforms_instr(nir_builder *b, nir_instr *instr, void *data)
 
1305
{
 
1306
   if (instr->type != nir_instr_type_intrinsic)
 
1307
      return false;
 
1308
 
 
1309
   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
1310
 
 
1311
   int inlined_uniform_offset;
 
1312
   switch (intrin->intrinsic) {
 
1313
   case nir_intrinsic_load_flat_mask:
 
1314
      inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
 
1315
      break;
 
1316
   case nir_intrinsic_load_provoking_last:
 
1317
      inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
 
1318
      break;
 
1319
   default:
 
1320
      return false;
 
1321
   }
 
1322
 
 
1323
   b->cursor = nir_before_instr(&intrin->instr);
 
1324
   nir_ssa_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
 
1325
                                            nir_imm_int(b, inlined_uniform_offset),
 
1326
                                            .align_mul = 4, .align_offset = 0,
 
1327
                                            .range_base = 0, .range = ~0);
 
1328
   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest_def);
 
1329
   nir_instr_remove(instr);
 
1330
   return true;
 
1331
}
 
1332
 
 
1333
bool
 
1334
zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
 
1335
{
 
1336
   return nir_shader_instructions_pass(nir, lower_system_values_to_inlined_uniforms_instr,
 
1337
                                       nir_metadata_dominance, NULL);
 
1338
}
 
1339
 
1316
1340
void
1317
1341
zink_screen_init_compiler(struct zink_screen *screen)
1318
1342
{
1351
1375
      .lower_uniforms_to_ubo = true,
1352
1376
      .has_fsub = true,
1353
1377
      .has_isub = true,
1354
 
      .has_txs = true,
1355
1378
      .lower_mul_2x32_64 = true,
1356
1379
      .support_16bit_alu = true, /* not quite what it sounds like */
1357
1380
      .max_unroll_iterations = 0,
1629
1652
   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1630
1653
   if (intr->intrinsic != nir_intrinsic_load_deref)
1631
1654
      return false;
1632
 
   nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
 
1655
   nir_variable *var = nir_intrinsic_get_var(intr, 0);
1633
1656
   if (!var->data.fb_fetch_output)
1634
1657
      return false;
1635
1658
   b->cursor = nir_after_instr(instr);
1711
1734
      nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr);
1712
1735
   }
1713
1736
   nir_ssa_dest_init(&levels->instr, &levels->dest,
1714
 
                     nir_tex_instr_dest_size(levels), 32, NULL);
 
1737
                     nir_tex_instr_dest_size(levels), 32);
1715
1738
   nir_builder_instr_insert(b, &levels->instr);
1716
1739
 
1717
1740
   nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa));
1759
1782
static nir_variable *
1760
1783
find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
1761
1784
{
 
1785
   assert((int)location >= 0);
 
1786
 
1762
1787
   unsigned found = 0;
1763
1788
   if (!location_frac && location != VARYING_SLOT_PSIZ) {
1764
1789
      nir_foreach_shader_out_variable(var, nir) {
2129
2154
   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2130
2155
   b->cursor = nir_before_instr(instr);
2131
2156
   switch (intr->intrinsic) {
2132
 
   case nir_intrinsic_ssbo_atomic_fadd:
2133
 
   case nir_intrinsic_ssbo_atomic_add:
2134
 
   case nir_intrinsic_ssbo_atomic_umin:
2135
 
   case nir_intrinsic_ssbo_atomic_imin:
2136
 
   case nir_intrinsic_ssbo_atomic_umax:
2137
 
   case nir_intrinsic_ssbo_atomic_imax:
2138
 
   case nir_intrinsic_ssbo_atomic_and:
2139
 
   case nir_intrinsic_ssbo_atomic_or:
2140
 
   case nir_intrinsic_ssbo_atomic_xor:
2141
 
   case nir_intrinsic_ssbo_atomic_exchange:
2142
 
   case nir_intrinsic_ssbo_atomic_comp_swap: {
 
2157
   case nir_intrinsic_ssbo_atomic:
 
2158
   case nir_intrinsic_ssbo_atomic_swap: {
2143
2159
      /* convert offset to uintN_t[idx] */
2144
2160
      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
2145
2161
      nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2300
2316
{
2301
2317
   nir_intrinsic_op op;
2302
2318
   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2303
 
   switch (intr->intrinsic) {
2304
 
   case nir_intrinsic_ssbo_atomic_fadd:
2305
 
      op = nir_intrinsic_deref_atomic_fadd;
2306
 
      break;
2307
 
   case nir_intrinsic_ssbo_atomic_fmin:
2308
 
      op = nir_intrinsic_deref_atomic_fmin;
2309
 
      break;
2310
 
   case nir_intrinsic_ssbo_atomic_fmax:
2311
 
      op = nir_intrinsic_deref_atomic_fmax;
2312
 
      break;
2313
 
   case nir_intrinsic_ssbo_atomic_fcomp_swap:
2314
 
      op = nir_intrinsic_deref_atomic_fcomp_swap;
2315
 
      break;
2316
 
   case nir_intrinsic_ssbo_atomic_add:
2317
 
      op = nir_intrinsic_deref_atomic_add;
2318
 
      break;
2319
 
   case nir_intrinsic_ssbo_atomic_umin:
2320
 
      op = nir_intrinsic_deref_atomic_umin;
2321
 
      break;
2322
 
   case nir_intrinsic_ssbo_atomic_imin:
2323
 
      op = nir_intrinsic_deref_atomic_imin;
2324
 
      break;
2325
 
   case nir_intrinsic_ssbo_atomic_umax:
2326
 
      op = nir_intrinsic_deref_atomic_umax;
2327
 
      break;
2328
 
   case nir_intrinsic_ssbo_atomic_imax:
2329
 
      op = nir_intrinsic_deref_atomic_imax;
2330
 
      break;
2331
 
   case nir_intrinsic_ssbo_atomic_and:
2332
 
      op = nir_intrinsic_deref_atomic_and;
2333
 
      break;
2334
 
   case nir_intrinsic_ssbo_atomic_or:
2335
 
      op = nir_intrinsic_deref_atomic_or;
2336
 
      break;
2337
 
   case nir_intrinsic_ssbo_atomic_xor:
2338
 
      op = nir_intrinsic_deref_atomic_xor;
2339
 
      break;
2340
 
   case nir_intrinsic_ssbo_atomic_exchange:
2341
 
      op = nir_intrinsic_deref_atomic_exchange;
2342
 
      break;
2343
 
   case nir_intrinsic_ssbo_atomic_comp_swap:
2344
 
      op = nir_intrinsic_deref_atomic_comp_swap;
2345
 
      break;
2346
 
   default:
 
2319
   if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
 
2320
      op = nir_intrinsic_deref_atomic;
 
2321
   else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
 
2322
      op = nir_intrinsic_deref_atomic_swap;
 
2323
   else
2347
2324
      unreachable("unknown intrinsic");
2348
 
   }
2349
2325
   nir_ssa_def *offset = intr->src[1].ssa;
2350
2326
   nir_src *src = &intr->src[0];
2351
2327
   nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2362
2338
   for (unsigned i = 0; i < num_components; i++) {
2363
2339
      nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2364
2340
      nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2365
 
      nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
 
2341
      nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1,
 
2342
                        nir_dest_bit_size(intr->dest));
 
2343
      nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
2366
2344
      new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
2367
2345
      /* deref ops have no offset src, so copy the srcs after it */
2368
2346
      for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
2392
2370
   nir_src *src;
2393
2371
   bool ssbo = true;
2394
2372
   switch (intr->intrinsic) {
2395
 
   case nir_intrinsic_ssbo_atomic_fadd:
2396
 
   case nir_intrinsic_ssbo_atomic_fmin:
2397
 
   case nir_intrinsic_ssbo_atomic_fmax:
2398
 
   case nir_intrinsic_ssbo_atomic_fcomp_swap:
2399
 
   case nir_intrinsic_ssbo_atomic_add:
2400
 
   case nir_intrinsic_ssbo_atomic_umin:
2401
 
   case nir_intrinsic_ssbo_atomic_imin:
2402
 
   case nir_intrinsic_ssbo_atomic_umax:
2403
 
   case nir_intrinsic_ssbo_atomic_imax:
2404
 
   case nir_intrinsic_ssbo_atomic_and:
2405
 
   case nir_intrinsic_ssbo_atomic_or:
2406
 
   case nir_intrinsic_ssbo_atomic_xor:
2407
 
   case nir_intrinsic_ssbo_atomic_exchange:
2408
 
   case nir_intrinsic_ssbo_atomic_comp_swap:
 
2373
   case nir_intrinsic_ssbo_atomic:
 
2374
   case nir_intrinsic_ssbo_atomic_swap:
2409
2375
      rewrite_atomic_ssbo_instr(b, instr, bo);
2410
2376
      return true;
2411
2377
   case nir_intrinsic_store_ssbo:
2564
2530
   } else {
2565
2531
      nir_builder b;
2566
2532
      nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2567
 
      nir_builder_init(&b, impl);
 
2533
      b = nir_builder_at(nir_after_cf_list(&impl->body));
2568
2534
      assert(impl->end_block->predecessors->entries == 1);
2569
 
      b.cursor = nir_after_cf_list(&impl->body);
2570
2535
      clamp_layer_output_emit(&b, &state);
2571
2536
      nir_metadata_preserve(impl, nir_metadata_dominance);
2572
2537
   }
2582
2547
   switch (slot) {
2583
2548
   case -1:
2584
2549
   case VARYING_SLOT_POS:
2585
 
   case VARYING_SLOT_PNTC:
2586
2550
   case VARYING_SLOT_PSIZ:
2587
2551
   case VARYING_SLOT_LAYER:
2588
2552
   case VARYING_SLOT_PRIMITIVE_ID:
2633
2597
   unsigned slot = var->data.location;
2634
2598
   switch (slot) {
2635
2599
   case VARYING_SLOT_POS:
2636
 
   case VARYING_SLOT_PNTC:
2637
2600
   case VARYING_SLOT_PSIZ:
2638
2601
   case VARYING_SLOT_LAYER:
2639
2602
   case VARYING_SLOT_PRIMITIVE_ID:
2659
2622
            var->data.driver_location = -1;
2660
2623
            return true;
2661
2624
         }
 
2625
         /* patch variables may be read in the workgroup */
2662
2626
         if (stage != MESA_SHADER_TESS_CTRL)
2663
2627
            /* dead io */
2664
2628
            return false;
2665
 
         /* patch variables may be read in the workgroup */
2666
 
         slot_map[slot] = (*reserved)++;
 
2629
         unsigned num_slots;
 
2630
         if (nir_is_arrayed_io(var, stage))
 
2631
            num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
 
2632
         else
 
2633
            num_slots = glsl_count_vec4_slots(var->type, false, false);
 
2634
         assert(*reserved + num_slots <= MAX_VARYING);
 
2635
         for (unsigned i = 0; i < num_slots; i++)
 
2636
            slot_map[slot + i] = (*reserved)++;
2667
2637
      }
2668
2638
      var->data.driver_location = slot_map[slot];
2669
2639
   }
2845
2815
   bool func_progress = false;
2846
2816
   if (!function->impl)
2847
2817
      return false;
2848
 
   nir_builder b;
2849
 
   nir_builder_init(&b, function->impl);
 
2818
   nir_builder b = nir_builder_create(function->impl);
2850
2819
   nir_foreach_block(block, function->impl) {
2851
2820
      nir_foreach_instr_safe(instr, block) {
2852
2821
         switch (instr->type) {
3032
3001
                  for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3033
3002
                     nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3034
3003
                     nir_ssa_def *load = nir_load_deref(&b, strct);
3035
 
                     comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
 
3004
                     comp[i * 2] = nir_pack_64_2x32(&b,
 
3005
                                                    nir_trim_vector(&b, load, 2));
3036
3006
                     if (num_components > 2)
3037
3007
                        comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3038
3008
                  }
3126
3096
            bool func_progress = false;
3127
3097
            if (!function->impl)
3128
3098
               continue;
3129
 
            nir_builder b;
3130
 
            nir_builder_init(&b, function->impl);
 
3099
            nir_builder b = nir_builder_create(function->impl);
3131
3100
            nir_foreach_block(block, function->impl) {
3132
3101
               nir_foreach_instr_safe(instr, block) {
3133
3102
                  switch (instr->type) {
3167
3136
}
3168
3137
 
3169
3138
static void
3170
 
zink_shader_dump(void *words, size_t size, const char *file)
 
3139
zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
3171
3140
{
3172
3141
   FILE *fp = fopen(file, "wb");
3173
3142
   if (fp) {
3174
3143
      fwrite(words, 1, size, fp);
3175
3144
      fclose(fp);
3176
 
      fprintf(stderr, "wrote '%s'...\n", file);
3177
 
   }
3178
 
}
3179
 
 
3180
 
VkShaderModule
3181
 
zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv)
3182
 
{
3183
 
   VkShaderModule mod;
 
3145
      fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
 
3146
   }
 
3147
}
 
3148
 
 
3149
static VkShaderStageFlagBits
 
3150
zink_get_next_stage(gl_shader_stage stage)
 
3151
{
 
3152
   switch (stage) {
 
3153
   case MESA_SHADER_VERTEX:
 
3154
      return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
 
3155
             VK_SHADER_STAGE_GEOMETRY_BIT |
 
3156
             VK_SHADER_STAGE_FRAGMENT_BIT;
 
3157
   case MESA_SHADER_TESS_CTRL:
 
3158
      return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
 
3159
   case MESA_SHADER_TESS_EVAL:
 
3160
      return VK_SHADER_STAGE_GEOMETRY_BIT |
 
3161
             VK_SHADER_STAGE_FRAGMENT_BIT;
 
3162
   case MESA_SHADER_GEOMETRY:
 
3163
      return VK_SHADER_STAGE_FRAGMENT_BIT;
 
3164
   case MESA_SHADER_FRAGMENT:
 
3165
   case MESA_SHADER_COMPUTE:
 
3166
      return 0;
 
3167
   default:
 
3168
      unreachable("invalid shader stage");
 
3169
   }
 
3170
}
 
3171
 
 
3172
struct zink_shader_object
 
3173
zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
 
3174
{
3184
3175
   VkShaderModuleCreateInfo smci = {0};
 
3176
   VkShaderCreateInfoEXT sci = {0};
3185
3177
 
3186
3178
   if (!spirv)
3187
3179
      spirv = zs->spirv;
3190
3182
      char buf[256];
3191
3183
      static int i;
3192
3184
      snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3193
 
      zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3194
 
   }
 
3185
      zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
 
3186
   }
 
3187
 
 
3188
   sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
 
3189
   sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
 
3190
   sci.nextStage = zink_get_next_stage(zs->info.stage);
 
3191
   sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
 
3192
   sci.codeSize = spirv->num_words * sizeof(uint32_t);
 
3193
   sci.pCode = spirv->words;
 
3194
   sci.pName = "main";
 
3195
   VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
 
3196
   if (pg) {
 
3197
      sci.setLayoutCount = pg->num_dsl;
 
3198
      sci.pSetLayouts = pg->dsl;
 
3199
   } else {
 
3200
      sci.setLayoutCount = zs->info.stage + 1;
 
3201
      dsl[zs->info.stage] = zs->precompile.dsl;;
 
3202
      sci.pSetLayouts = dsl;
 
3203
   }
 
3204
   VkPushConstantRange pcr;
 
3205
   pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
 
3206
   pcr.offset = 0;
 
3207
   pcr.size = sizeof(struct zink_gfx_push_constant);
 
3208
   sci.pushConstantRangeCount = 1;
 
3209
   sci.pPushConstantRanges = &pcr;
3195
3210
 
3196
3211
   smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3197
3212
   smci.codeSize = spirv->num_words * sizeof(uint32_t);
3271
3286
   }
3272
3287
#endif
3273
3288
 
3274
 
   VkResult ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod);
 
3289
   VkResult ret;
 
3290
   struct zink_shader_object obj = {0};
 
3291
   if (!can_shobj || !screen->info.have_EXT_shader_object)
 
3292
      ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
 
3293
   else
 
3294
      ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3275
3295
   bool success = zink_screen_handle_vkresult(screen, ret);
3276
3296
   assert(success);
3277
 
   return success ? mod : VK_NULL_HANDLE;
 
3297
   return obj;
3278
3298
}
3279
3299
 
3280
3300
static void
3301
3321
}
3302
3322
 
3303
3323
static nir_ssa_def *
3304
 
rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, void *data)
 
3324
rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3305
3325
{
3306
3326
   assert(var);
3307
3327
   const struct glsl_type *type = glsl_without_array(var->type);
3315
3335
   if (bit_size == dest_size && !rewrite_depth)
3316
3336
      return NULL;
3317
3337
   nir_ssa_def *dest = &tex->dest.ssa;
3318
 
   if (rewrite_depth && data) {
3319
 
      if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3320
 
         flag_shadow_tex(var, data);
3321
 
      else
3322
 
         mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
 
3338
   if (rewrite_depth && zs) {
 
3339
      /* If only .x is used in the NIR, then it's effectively not a legacy depth
 
3340
       * sample anyway and we don't want to ask for shader recompiles.  This is
 
3341
       * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
 
3342
       * LUMINANCE, so apps just use the first channel.
 
3343
       */
 
3344
      if (nir_ssa_def_components_read(dest) & ~1) {
 
3345
         if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
 
3346
            flag_shadow_tex(var, zs);
 
3347
         else
 
3348
            mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
 
3349
      }
3323
3350
      return NULL;
3324
3351
   }
3325
3352
   if (bit_size != dest_size) {
3447
3474
   return true;
3448
3475
}
3449
3476
 
 
3477
/* Applies in-shader swizzles when necessary for depth/shadow sampling.
 
3478
 *
 
3479
 * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
 
3480
 * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
 
3481
 * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
 
3482
 * shader to expand out to vec4.  Since this depends on sampler state, it's a
 
3483
 * draw-time shader recompile to do so.
 
3484
 *
 
3485
 * We may also need to apply shader swizzles for
 
3486
 * driver_workarounds.needs_zs_shader_swizzle.
 
3487
 */
3450
3488
static bool
3451
3489
lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3452
3490
{
 
3491
   /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
3453
3492
   unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3454
3493
   struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3455
3494
   return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3461
3500
   if (instr->type != nir_instr_type_intrinsic)
3462
3501
      return false;
3463
3502
   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3464
 
   if (intr->intrinsic != nir_intrinsic_load_deref)
3465
 
      return false;
3466
 
   nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
3467
 
   if (deref_var->data.location != VARYING_SLOT_PNTC)
 
3503
   if (intr->intrinsic != nir_intrinsic_load_point_coord)
3468
3504
      return false;
3469
3505
   b->cursor = nir_after_instr(instr);
3470
3506
   nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
3471
 
                                  nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1)));
 
3507
                                  nir_fsub_imm(b, 1.0, nir_channel(b, &intr->dest.ssa, 1)));
3472
3508
   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3473
3509
   return true;
3474
3510
}
3476
3512
static bool
3477
3513
invert_point_coord(nir_shader *nir)
3478
3514
{
3479
 
   if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)))
 
3515
   if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
3480
3516
      return false;
3481
3517
   return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
3482
3518
}
3483
3519
 
3484
 
static VkShaderModule
3485
 
compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir)
 
3520
static struct zink_shader_object
 
3521
compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
3486
3522
{
3487
 
   VkShaderModule mod = VK_NULL_HANDLE;
3488
3523
   struct zink_shader_info *sinfo = &zs->sinfo;
3489
3524
   prune_io(nir);
3490
3525
 
3491
 
   NIR_PASS_V(nir, nir_convert_from_ssa, true);
 
3526
   NIR_PASS_V(nir, nir_convert_from_ssa, true, false);
3492
3527
 
 
3528
   struct zink_shader_object obj;
3493
3529
   struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3494
3530
   if (spirv)
3495
 
      mod = zink_shader_spirv_compile(screen, zs, spirv);
 
3531
      obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
3496
3532
 
3497
3533
   /* TODO: determine if there's any reason to cache spirv output? */
3498
3534
   if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3499
3535
      zs->spirv = spirv;
3500
3536
   else
3501
 
      ralloc_free(spirv);
3502
 
   return mod;
 
3537
      obj.spirv = spirv;
 
3538
   return obj;
3503
3539
}
3504
3540
 
3505
 
VkShaderModule
3506
 
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
3507
 
                    nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
 
3541
struct zink_shader_object
 
3542
zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
 
3543
                    nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
3508
3544
{
3509
 
   VkShaderModule mod = VK_NULL_HANDLE;
3510
3545
   struct zink_shader_info *sinfo = &zs->sinfo;
3511
3546
   bool need_optimize = false;
3512
3547
   bool inlined_uniforms = false;
3630
3665
            NIR_PASS_V(nir, lower_dual_blend);
3631
3666
         }
3632
3667
         if (zink_fs_key_base(key)->coord_replace_bits)
3633
 
            NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, false, false);
 
3668
            NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
3634
3669
         if (zink_fs_key_base(key)->point_coord_yinvert)
3635
3670
            NIR_PASS_V(nir, invert_point_coord);
3636
3671
         if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3694
3729
   } else if (need_optimize)
3695
3730
      optimize_nir(nir, zs);
3696
3731
   
3697
 
   mod = compile_module(screen, zs, nir);
 
3732
   struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
3698
3733
   ralloc_free(nir);
3699
 
   return mod;
 
3734
   return obj;
3700
3735
}
3701
3736
 
3702
 
VkShaderModule
 
3737
struct zink_shader_object
3703
3738
zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
3704
3739
{
3705
3740
   nir_shader *nir = zink_shader_deserialize(screen, zs);
3706
 
   int set = nir->info.stage == MESA_SHADER_FRAGMENT;
 
3741
   /* TODO: maybe compile multiple variants for different set counts for compact mode? */
 
3742
   int set = zs->info.stage == MESA_SHADER_FRAGMENT;
 
3743
   if (screen->info.have_EXT_shader_object)
 
3744
      set = zs->info.stage;
3707
3745
   unsigned offsets[4];
3708
3746
   zink_descriptor_shader_get_binding_offsets(zs, offsets);
3709
3747
   nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
3727
3765
      default: break;
3728
3766
      }
3729
3767
   }
 
3768
   if (screen->driconf.inline_uniforms) {
 
3769
      NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
 
3770
      NIR_PASS_V(nir, rewrite_bo_access, screen);
 
3771
      NIR_PASS_V(nir, remove_bo_access, zs);
 
3772
   }
3730
3773
   optimize_nir(nir, zs);
3731
 
   VkShaderModule mod = compile_module(screen, zs, nir);
 
3774
   zink_descriptor_shader_init(screen, zs);
 
3775
   zs->sinfo.last_vertex = zs->sinfo.have_xfb;
 
3776
   nir_shader *nir_clone = NULL;
 
3777
   if (screen->info.have_EXT_shader_object)
 
3778
      nir_clone = nir_shader_clone(nir, nir);
 
3779
   struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
 
3780
   if (screen->info.have_EXT_shader_object && !zs->info.internal) {
 
3781
      /* always try to pre-generate a tcs in case it's needed */
 
3782
      if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
 
3783
         nir_shader *nir_tcs = NULL;
 
3784
         /* use max pcp for compat */
 
3785
         zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir_clone, 32, &nir_tcs);
 
3786
         nir_tcs->info.separate_shader = true;
 
3787
         zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
 
3788
         ralloc_free(nir_tcs);
 
3789
      }
 
3790
      if (zs->info.stage == MESA_SHADER_VERTEX || zs->info.stage == MESA_SHADER_TESS_EVAL) {
 
3791
         /* create a second variant with PSIZ removed:
 
3792
          * this works around a bug in drivers using nir_assign_io_var_locations()
 
3793
          * where builtins that aren't read by following stages get assigned
 
3794
          * driver locations before varyings and break the i/o interface between shaders even
 
3795
          * though zink has correctly assigned all locations
 
3796
          */
 
3797
         nir_variable *var = nir_find_variable_with_location(nir_clone, nir_var_shader_out, VARYING_SLOT_PSIZ);
 
3798
         if (var && !var->data.explicit_location) {
 
3799
            var->data.mode = nir_var_shader_temp;
 
3800
            nir_fixup_deref_modes(nir_clone);
 
3801
            NIR_PASS_V(nir_clone, nir_remove_dead_variables, nir_var_shader_temp, NULL);
 
3802
            optimize_nir(nir_clone, NULL);
 
3803
            zs->precompile.no_psiz_obj = compile_module(screen, zs, nir_clone, true, NULL);
 
3804
            spirv_shader_delete(zs->precompile.no_psiz_obj.spirv);
 
3805
            zs->precompile.no_psiz_obj.spirv = NULL;
 
3806
         }
 
3807
      }
 
3808
   }
3732
3809
   ralloc_free(nir);
3733
 
   return mod;
 
3810
   spirv_shader_delete(obj.spirv);
 
3811
   obj.spirv = NULL;
 
3812
   return obj;
3734
3813
}
3735
3814
 
3736
3815
static bool
3906
3985
            ret = true;
3907
3986
            break;
3908
3987
         }
3909
 
         case nir_intrinsic_ssbo_atomic_fadd:
3910
 
         case nir_intrinsic_ssbo_atomic_add:
3911
 
         case nir_intrinsic_ssbo_atomic_imin:
3912
 
         case nir_intrinsic_ssbo_atomic_umin:
3913
 
         case nir_intrinsic_ssbo_atomic_imax:
3914
 
         case nir_intrinsic_ssbo_atomic_umax:
3915
 
         case nir_intrinsic_ssbo_atomic_and:
3916
 
         case nir_intrinsic_ssbo_atomic_or:
3917
 
         case nir_intrinsic_ssbo_atomic_xor:
3918
 
         case nir_intrinsic_ssbo_atomic_exchange:
3919
 
         case nir_intrinsic_ssbo_atomic_comp_swap:
3920
 
         case nir_intrinsic_ssbo_atomic_fmin:
3921
 
         case nir_intrinsic_ssbo_atomic_fmax:
3922
 
         case nir_intrinsic_ssbo_atomic_fcomp_swap:
 
3988
         case nir_intrinsic_ssbo_atomic:
 
3989
         case nir_intrinsic_ssbo_atomic_swap:
3923
3990
         case nir_intrinsic_load_ssbo:
3924
3991
            zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3925
3992
            break;
4021
4088
 
4022
4089
   /* convert bindless intrinsics to deref intrinsics */
4023
4090
   switch (instr->intrinsic) {
4024
 
   OP_SWAP(atomic_add)
4025
 
   OP_SWAP(atomic_and)
4026
 
   OP_SWAP(atomic_comp_swap)
4027
 
   OP_SWAP(atomic_dec_wrap)
4028
 
   OP_SWAP(atomic_exchange)
4029
 
   OP_SWAP(atomic_fadd)
4030
 
   OP_SWAP(atomic_fmax)
4031
 
   OP_SWAP(atomic_fmin)
4032
 
   OP_SWAP(atomic_imax)
4033
 
   OP_SWAP(atomic_imin)
4034
 
   OP_SWAP(atomic_inc_wrap)
4035
 
   OP_SWAP(atomic_or)
4036
 
   OP_SWAP(atomic_umax)
4037
 
   OP_SWAP(atomic_umin)
4038
 
   OP_SWAP(atomic_xor)
 
4091
   OP_SWAP(atomic)
 
4092
   OP_SWAP(atomic_swap)
4039
4093
   OP_SWAP(format)
4040
4094
   OP_SWAP(load)
4041
4095
   OP_SWAP(order)
4296
4350
            if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4297
4351
                intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4298
4352
                intr->intrinsic == nir_intrinsic_image_deref_store ||
4299
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4300
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4301
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4302
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4303
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4304
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4305
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4306
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4307
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4308
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4309
 
                intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
 
4353
                intr->intrinsic == nir_intrinsic_image_deref_atomic ||
 
4354
                intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4310
4355
                intr->intrinsic == nir_intrinsic_image_deref_size ||
4311
4356
                intr->intrinsic == nir_intrinsic_image_deref_samples ||
4312
4357
                intr->intrinsic == nir_intrinsic_image_deref_format ||
4313
4358
                intr->intrinsic == nir_intrinsic_image_deref_order) {
4314
4359
 
4315
 
                nir_variable *var =
4316
 
                   nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
 
4360
                nir_variable *var = nir_intrinsic_get_var(intr, 0);
4317
4361
 
4318
4362
                /* Structs have been lowered already, so get_aoa_size is sufficient. */
4319
4363
                const unsigned size =
4328
4372
            static bool warned = false;
4329
4373
            if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4330
4374
               switch (intr->intrinsic) {
4331
 
               case nir_intrinsic_image_deref_atomic_add: {
 
4375
               case nir_intrinsic_image_deref_atomic: {
4332
4376
                  nir_variable *var = nir_intrinsic_get_var(intr, 0);
4333
 
                  if (util_format_is_float(var->data.image.format))
 
4377
                  if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
 
4378
                      util_format_is_float(var->data.image.format))
4334
4379
                     fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4335
4380
                  break;
4336
4381
               }
4547
4592
            if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4548
4593
               intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4549
4594
               intr->intrinsic == nir_intrinsic_image_deref_store ||
4550
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4551
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4552
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4553
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4554
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4555
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4556
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4557
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4558
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4559
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4560
 
               intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
 
4595
               intr->intrinsic == nir_intrinsic_image_deref_atomic ||
 
4596
               intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4561
4597
               intr->intrinsic == nir_intrinsic_image_deref_samples ||
4562
4598
               intr->intrinsic == nir_intrinsic_image_deref_format ||
4563
4599
               intr->intrinsic == nir_intrinsic_image_deref_order) {
4610
4646
   var->data.mode = nir_var_shader_temp;
4611
4647
}
4612
4648
 
4613
 
static nir_variable *
4614
 
find_sampler_var(nir_shader *nir, unsigned texture_index)
4615
 
{
4616
 
   nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4617
 
      unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4618
 
      if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) &&
4619
 
          (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index)))
4620
 
         return var;
4621
 
   }
4622
 
   return NULL;
4623
 
}
4624
 
 
4625
4649
static bool
4626
4650
type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4627
4651
{
4643
4667
               break;
4644
4668
            }
4645
4669
            *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4646
 
            nir_variable *var = find_sampler_var(nir, tex->texture_index);
 
4670
            nir_variable *var = nir_find_sampler_variable_with_tex_index(nir, tex->texture_index);
4647
4671
            assert(var);
4648
4672
            if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4649
4673
               continue;
4673
4697
               continue;
4674
4698
            }
4675
4699
            *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4676
 
            nir_variable *var = find_sampler_var(nir, tex->texture_index);
 
4700
            nir_variable *var = nir_find_sampler_variable_with_tex_index(nir, tex->texture_index);
4677
4701
            assert(var);
4678
4702
            if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4679
4703
               continue;
4719
4743
static bool
4720
4744
fixup_io_locations(nir_shader *nir)
4721
4745
{
4722
 
   nir_variable_mode mode = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4723
 
   /* i/o interface blocks are required to be EXACT matches between stages:
4724
 
    * iterate over all locations and set locations incrementally
4725
 
    */
4726
 
   unsigned slot = 0;
4727
 
   for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4728
 
      if (nir_slot_is_sysval_output(i))
4729
 
         continue;
4730
 
      nir_variable *var = nir_find_variable_with_location(nir, mode, i);
4731
 
      if (!var) {
4732
 
         /* locations used between stages are not required to be contiguous */
4733
 
         if (i >= VARYING_SLOT_VAR0)
4734
 
            slot++;
4735
 
         continue;
 
4746
   nir_variable_mode modes;
 
4747
   if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
 
4748
      modes = nir_var_shader_in | nir_var_shader_out;
 
4749
   else
 
4750
      modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
 
4751
   u_foreach_bit(mode, modes) {
 
4752
      /* i/o interface blocks are required to be EXACT matches between stages:
 
4753
      * iterate over all locations and set locations incrementally
 
4754
      */
 
4755
      unsigned slot = 0;
 
4756
      for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
 
4757
         if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
 
4758
            continue;
 
4759
         bool found = false;
 
4760
         unsigned size = 0;
 
4761
         nir_foreach_variable_with_modes(var, nir, 1<<mode) {
 
4762
            if (var->data.location != i)
 
4763
               continue;
 
4764
            /* only add slots for non-component vars or first-time component vars */
 
4765
            if (!var->data.location_frac || !size) {
 
4766
               /* ensure variable is given enough slots */
 
4767
               if (nir_is_arrayed_io(var, nir->info.stage))
 
4768
                  size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
 
4769
               else
 
4770
                  size += glsl_count_vec4_slots(var->type, false, false);
 
4771
            }
 
4772
            var->data.driver_location = slot;
 
4773
            found = true;
 
4774
         }
 
4775
         slot += size;
 
4776
         if (found) {
 
4777
            /* ensure the consumed slots aren't double iterated */
 
4778
            i += size - 1;
 
4779
         } else {
 
4780
            /* locations used between stages are not required to be contiguous */
 
4781
            if (i >= VARYING_SLOT_VAR0)
 
4782
               slot++;
 
4783
         }
4736
4784
      }
4737
 
      unsigned size;
4738
 
      /* ensure variable is given enough slots */
4739
 
      if (nir_is_arrayed_io(var, nir->info.stage))
4740
 
         size = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4741
 
      else
4742
 
         size = glsl_count_vec4_slots(var->type, false, false);
4743
 
      var->data.driver_location = slot;
4744
 
      slot += size;
4745
 
      /* ensure the consumed slots aren't double iterated */
4746
 
      i += size - 1;
4747
4785
   }
4748
4786
   return true;
4749
4787
}
4804
4842
      NIR_PASS_V(nir, fixup_io_locations);
4805
4843
 
4806
4844
   NIR_PASS_V(nir, lower_basevertex);
4807
 
   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
4808
4845
   NIR_PASS_V(nir, lower_baseinstance);
4809
4846
   NIR_PASS_V(nir, lower_sparse);
4810
4847
   NIR_PASS_V(nir, split_bitfields);
4843
4880
                                          nir_lower_terminate_if_to_cf));
4844
4881
   NIR_PASS_V(nir, nir_lower_fragcolor,
4845
4882
         nir->info.fs.color_is_dual_source ? 1 : 8);
 
4883
 
4846
4884
   NIR_PASS_V(nir, lower_64bit_vertex_attribs);
4847
4885
   bool needs_size = analyze_io(ret, nir);
4848
4886
   NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
4866
4904
      var->data.is_xfb = false;
4867
4905
      if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
4868
4906
         has_bindless_io = true;
4869
 
         break;
4870
4907
      }
4871
4908
   }
4872
4909
   if (has_bindless_io)
4984
5021
   if (!nir->info.internal)
4985
5022
      nir_foreach_shader_out_variable(var, nir)
4986
5023
         var->data.explicit_xfb_buffer = 0;
4987
 
   if (so_info && so_info->num_outputs)
 
5024
   if (so_info && so_info->num_outputs && nir->info.outputs_written)
4988
5025
      update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
4989
5026
   else if (have_psiz) {
4990
5027
      bool have_fake_psiz = false;
5043
5080
void
5044
5081
zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5045
5082
{
 
5083
   _mesa_set_destroy(shader->programs, NULL);
 
5084
   util_queue_fence_wait(&shader->precompile.fence);
 
5085
   util_queue_fence_destroy(&shader->precompile.fence);
 
5086
   zink_descriptor_shader_deinit(screen, shader);
 
5087
   if (screen->info.have_EXT_shader_object) {
 
5088
      VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
 
5089
      VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.no_psiz_obj.obj, NULL);
 
5090
   } else {
 
5091
      if (shader->precompile.obj.mod)
 
5092
         VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
 
5093
      if (shader->precompile.gpl)
 
5094
         VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
 
5095
   }
 
5096
   blob_finish(&shader->blob);
 
5097
   ralloc_free(shader->spirv);
 
5098
   free(shader->precompile.bindings);
 
5099
   ralloc_free(shader);
 
5100
}
 
5101
 
 
5102
void
 
5103
zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
 
5104
{
5046
5105
   assert(shader->info.stage != MESA_SHADER_COMPUTE);
 
5106
   util_queue_fence_wait(&shader->precompile.fence);
5047
5107
   set_foreach(shader->programs, entry) {
5048
5108
      struct zink_gfx_program *prog = (void*)entry->key;
5049
5109
      gl_shader_stage stage = shader->info.stage;
5107
5167
   if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5108
5168
       shader->non_fs.generated_tcs) {
5109
5169
      /* automatically destroy generated tcs shaders when tes is destroyed */
5110
 
      zink_shader_free(screen, shader->non_fs.generated_tcs);
 
5170
      zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5111
5171
      shader->non_fs.generated_tcs = NULL;
5112
5172
   }
5113
5173
   for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5115
5175
         if (shader->info.stage != MESA_SHADER_FRAGMENT &&
5116
5176
             shader->non_fs.generated_gs[i][j]) {
5117
5177
            /* automatically destroy generated gs shaders when owner is destroyed */
5118
 
            zink_shader_free(screen, shader->non_fs.generated_gs[i][j]);
 
5178
            zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5119
5179
            shader->non_fs.generated_gs[i][j] = NULL;
5120
5180
         }
5121
5181
      }
5122
5182
   }
5123
 
   _mesa_set_destroy(shader->programs, NULL);
5124
 
   util_queue_fence_wait(&shader->precompile.fence);
5125
 
   util_queue_fence_destroy(&shader->precompile.fence);
5126
 
   zink_descriptor_shader_deinit(screen, shader);
5127
 
   if (shader->precompile.mod)
5128
 
      VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.mod, NULL);
5129
 
   if (shader->precompile.gpl)
5130
 
      VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5131
 
   blob_finish(&shader->blob);
5132
 
   ralloc_free(shader->spirv);
5133
 
   free(shader->precompile.bindings);
5134
 
   ralloc_free(shader);
 
5183
   zink_shader_free(screen, shader);
5135
5184
}
5136
5185
 
5137
5186
 
5138
 
VkShaderModule
5139
 
zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
 
5187
struct zink_shader_object
 
5188
zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
5140
5189
{
5141
5190
   assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5142
5191
   /* shortcut all the nir passes since we just have to change this one word */
5143
5192
   zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5144
 
   return zink_shader_spirv_compile(screen, zs, NULL);
 
5193
   return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
5145
5194
}
5146
5195
 
5147
5196
/* creating a passthrough tcs shader that's roughly:
5166
5215
 
5167
5216
*/
5168
5217
struct zink_shader *
5169
 
zink_shader_tcs_create(struct zink_screen *screen, nir_shader *vs, unsigned vertices_per_patch, nir_shader **nir_ret)
 
5218
zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5170
5219
{
5171
5220
   struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5172
5221
   util_queue_fence_init(&ret->precompile.fence);
5179
5228
   fn->is_entrypoint = true;
5180
5229
   nir_function_impl *impl = nir_function_impl_create(fn);
5181
5230
 
5182
 
   nir_builder b;
5183
 
   nir_builder_init(&b, impl);
5184
 
   b.cursor = nir_before_block(nir_start_block(impl));
 
5231
   nir_builder b = nir_builder_at(nir_before_block(nir_start_block(impl)));
5185
5232
 
5186
5233
   nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
5187
5234
 
5188
 
   nir_foreach_shader_out_variable(var, vs) {
5189
 
      const struct glsl_type *type = var->type;
 
5235
   nir_foreach_shader_in_variable(var, tes) {
 
5236
      if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
 
5237
         continue;
5190
5238
      const struct glsl_type *in_type = var->type;
5191
5239
      const struct glsl_type *out_type = var->type;
5192
5240
      char buf[1024];
5193
5241
      snprintf(buf, sizeof(buf), "%s_out", var->name);
5194
 
      in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5195
 
      out_type = glsl_array_type(type, vertices_per_patch, 0);
 
5242
      if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
 
5243
         const struct glsl_type *type = var->type;
 
5244
         in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
 
5245
         out_type = glsl_array_type(type, vertices_per_patch, 0);
 
5246
      }
5196
5247
 
5197
5248
      nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5198
5249
      nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5239
5290
   nir->info.tess.tcs_vertices_out = vertices_per_patch;
5240
5291
   nir_validate_shader(nir, "created");
5241
5292
 
5242
 
   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
5243
5293
   optimize_nir(nir, NULL);
5244
5294
   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5245
 
   NIR_PASS_V(nir, nir_convert_from_ssa, true);
 
5295
   NIR_PASS_V(nir, nir_convert_from_ssa, true, false);
5246
5296
 
5247
5297
   *nir_ret = nir;
5248
5298
   zink_shader_serialize_blob(nir, &ret->blob);
5287
5337
#endif
5288
5338
   nir_serialize(blob, nir, strip);
5289
5339
}
 
5340
 
 
5341
void
 
5342
zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
 
5343
{
 
5344
   nir_shader *nir = zink_shader_deserialize(screen, zs);
 
5345
   nir_print_shader(nir, fp);
 
5346
   ralloc_free(nir);
 
5347
}