~mmach/netext73/mesa-ryzen

« back to all changes in this revision

Viewing changes to src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c

  • Committer: mmach
  • Date: 2023-11-02 21:31:35 UTC
  • Revision ID: netbit73@gmail.com-20231102213135-18d4tzh7tj0uz752
2023-11-02 22:11:57

Show diffs side-by-side

added added

removed removed

Lines of Context:
79
79
   nir_ssa_def *set_ptr;
80
80
   if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
81
81
       layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
82
 
      unsigned idx = state->pipeline_layout->set[desc_set].dynamic_offset_start +
83
 
                     layout->binding[binding].dynamic_offset_offset;
 
82
      unsigned idx =
 
83
         state->pipeline_layout->set[desc_set].dynamic_offset_start + layout->binding[binding].dynamic_offset_offset;
84
84
      set_ptr = get_scalar_arg(b, 1, state->args->ac.push_constants);
85
85
      offset = state->pipeline_layout->push_constant_size + idx * 16;
86
86
      stride = 16;
99
99
      assert(stride == 16);
100
100
      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
101
101
   } else {
102
 
      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
103
 
                               nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
 
102
      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
104
103
   }
105
104
   nir_instr_remove(&intrin->instr);
106
105
}
107
106
 
108
107
static void
109
 
visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state,
110
 
                              nir_intrinsic_instr *intrin)
 
108
visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
111
109
{
112
110
   VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
113
111
   if (desc_type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
121
119
 
122
120
      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
123
121
   } else {
124
 
      assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
125
 
             desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
 
122
      assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
126
123
 
127
124
      nir_ssa_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1);
128
125
      nir_ssa_def *stride = nir_channel(b, intrin->src[0].ssa, 2);
132
129
 
133
130
      binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
134
131
 
135
 
      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
136
 
                               nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
 
132
      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
137
133
   }
138
134
   nir_instr_remove(&intrin->instr);
139
135
}
142
138
visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
143
139
{
144
140
   if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
145
 
      nir_ssa_def *addr =
146
 
         convert_pointer_to_64_bit(b, state,
147
 
                                   nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
148
 
                                            nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa)));
 
141
      nir_ssa_def *addr = convert_pointer_to_64_bit(b, state,
 
142
                                                    nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
 
143
                                                             nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa)));
149
144
      nir_ssa_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE);
150
145
 
151
146
      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
152
147
   } else {
153
 
      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
154
 
                               nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
 
148
      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
155
149
   }
156
150
   nir_instr_remove(&intrin->instr);
157
151
}
159
153
static nir_ssa_def *
160
154
load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc)
161
155
{
162
 
   uint32_t desc_type =
163
 
      S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
164
 
      S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
 
156
   uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
 
157
                        S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
165
158
   if (state->gfx_level >= GFX11) {
166
 
      desc_type |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
167
 
                   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
 
159
      desc_type |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
168
160
   } else if (state->gfx_level >= GFX10) {
169
 
      desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
170
 
                   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
 
161
      desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
 
162
                   S_008F0C_RESOURCE_LEVEL(1);
171
163
   } else {
172
 
      desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
173
 
                   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
164
      desc_type |=
 
165
         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
174
166
   }
175
167
 
176
 
   return nir_vec4(b, rsrc, nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(state->address32_hi)),
177
 
                   nir_imm_int(b, 0xffffffff), nir_imm_int(b, desc_type));
 
168
   return nir_vec4(b, rsrc, nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(state->address32_hi)), nir_imm_int(b, 0xffffffff),
 
169
                   nir_imm_int(b, desc_type));
178
170
}
179
171
 
180
172
static nir_ssa_def *
181
 
load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc,
182
 
                       unsigned access)
 
173
load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc, unsigned access)
183
174
{
184
175
   nir_binding binding = nir_chase_binding(nir_src_for_ssa(rsrc));
185
176
 
187
178
    * VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK.
188
179
    */
189
180
   if (binding.success) {
190
 
      struct radv_descriptor_set_layout *layout =
191
 
         state->pipeline_layout->set[binding.desc_set].layout;
 
181
      struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[binding.desc_set].layout;
192
182
      if (layout->binding[binding.binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
193
183
         rsrc = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
194
184
         return load_inline_buffer_descriptor(b, state, rsrc);
212
202
      nir_ssa_def *ptr = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
213
203
      ptr = nir_iadd_imm(b, ptr, 8);
214
204
      ptr = convert_pointer_to_64_bit(b, state, ptr);
215
 
      size =
216
 
         nir_build_load_global(b, 4, 32, ptr, .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER,
217
 
                               .align_mul = 16, .align_offset = 4);
 
205
      size = nir_build_load_global(b, 4, 32, ptr, .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER, .align_mul = 16,
 
206
                                   .align_offset = 4);
218
207
   } else {
219
208
      /* load the entire descriptor so it can be CSE'd */
220
209
      nir_ssa_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
227
216
}
228
217
 
229
218
static nir_ssa_def *
230
 
get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref,
231
 
                 enum ac_descriptor_type desc_type, bool non_uniform, nir_tex_instr *tex,
232
 
                 bool write)
 
219
get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref, enum ac_descriptor_type desc_type,
 
220
                 bool non_uniform, nir_tex_instr *tex, bool write)
233
221
{
234
222
   nir_variable *var = nir_deref_instr_get_variable(deref);
235
223
   assert(var);
259
247
 
260
248
      uint32_t dword0_mask = tex->op == nir_texop_tg4 ? C_008F30_TRUNC_COORD : 0xffffffffu;
261
249
      const uint32_t *samplers = radv_immutable_samplers(layout, binding);
262
 
      return nir_imm_ivec4(b, samplers[constant_index * 4 + 0] & dword0_mask,
263
 
                           samplers[constant_index * 4 + 1], samplers[constant_index * 4 + 2],
264
 
                           samplers[constant_index * 4 + 3]);
 
250
      return nir_imm_ivec4(b, samplers[constant_index * 4 + 0] & dword0_mask, samplers[constant_index * 4 + 1],
 
251
                           samplers[constant_index * 4 + 2], samplers[constant_index * 4 + 3]);
265
252
   }
266
253
 
267
254
   unsigned size = 8;
322
309
    * use the tail from plane 1 so that we can store only the first 16 bytes
323
310
    * of the last plane. */
324
311
   if (desc_type == AC_DESC_PLANE_2) {
325
 
      nir_ssa_def *desc2 =
326
 
         get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write);
 
312
      nir_ssa_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write);
327
313
 
328
314
      nir_ssa_def *comp[8];
329
315
      for (unsigned i = 0; i < 4; i++)
364
350
{
365
351
   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
366
352
   const enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
367
 
   bool is_load = intrin->intrinsic == nir_intrinsic_image_deref_load ||
368
 
                  intrin->intrinsic == nir_intrinsic_image_deref_sparse_load;
 
353
   bool is_load =
 
354
      intrin->intrinsic == nir_intrinsic_image_deref_load || intrin->intrinsic == nir_intrinsic_image_deref_sparse_load;
369
355
 
370
 
   nir_ssa_def *desc = get_sampler_desc(
371
 
      b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE,
372
 
      nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load);
 
356
   nir_ssa_def *desc = get_sampler_desc(b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE,
 
357
                                        nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load);
373
358
 
374
359
   if (intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd) {
375
360
      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
397
382
      break;
398
383
   case nir_intrinsic_load_ubo:
399
384
   case nir_intrinsic_load_ssbo:
400
 
   case nir_intrinsic_ssbo_atomic_add:
401
 
   case nir_intrinsic_ssbo_atomic_fadd:
402
 
   case nir_intrinsic_ssbo_atomic_imin:
403
 
   case nir_intrinsic_ssbo_atomic_umin:
404
 
   case nir_intrinsic_ssbo_atomic_fmin:
405
 
   case nir_intrinsic_ssbo_atomic_imax:
406
 
   case nir_intrinsic_ssbo_atomic_umax:
407
 
   case nir_intrinsic_ssbo_atomic_fmax:
408
 
   case nir_intrinsic_ssbo_atomic_and:
409
 
   case nir_intrinsic_ssbo_atomic_or:
410
 
   case nir_intrinsic_ssbo_atomic_xor:
411
 
   case nir_intrinsic_ssbo_atomic_exchange:
412
 
   case nir_intrinsic_ssbo_atomic_comp_swap:
 
385
   case nir_intrinsic_ssbo_atomic:
 
386
   case nir_intrinsic_ssbo_atomic_swap:
413
387
      rsrc = load_buffer_descriptor(b, state, intrin->src[0].ssa, nir_intrinsic_access(intrin));
414
388
      nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[0], rsrc);
415
389
      break;
423
397
   case nir_intrinsic_image_deref_load:
424
398
   case nir_intrinsic_image_deref_sparse_load:
425
399
   case nir_intrinsic_image_deref_store:
426
 
   case nir_intrinsic_image_deref_atomic_add:
427
 
   case nir_intrinsic_image_deref_atomic_imin:
428
 
   case nir_intrinsic_image_deref_atomic_umin:
429
 
   case nir_intrinsic_image_deref_atomic_fmin:
430
 
   case nir_intrinsic_image_deref_atomic_imax:
431
 
   case nir_intrinsic_image_deref_atomic_umax:
432
 
   case nir_intrinsic_image_deref_atomic_fmax:
433
 
   case nir_intrinsic_image_deref_atomic_and:
434
 
   case nir_intrinsic_image_deref_atomic_or:
435
 
   case nir_intrinsic_image_deref_atomic_xor:
436
 
   case nir_intrinsic_image_deref_atomic_exchange:
437
 
   case nir_intrinsic_image_deref_atomic_comp_swap:
438
 
   case nir_intrinsic_image_deref_atomic_fadd:
439
 
   case nir_intrinsic_image_deref_atomic_inc_wrap:
440
 
   case nir_intrinsic_image_deref_atomic_dec_wrap:
 
400
   case nir_intrinsic_image_deref_atomic:
 
401
   case nir_intrinsic_image_deref_atomic_swap:
441
402
   case nir_intrinsic_image_deref_size:
442
403
   case nir_intrinsic_image_deref_samples:
443
404
   case nir_intrinsic_image_deref_descriptor_amd:
478
439
   if (plane >= 0) {
479
440
      assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical);
480
441
      assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF);
481
 
      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane,
482
 
                               tex->texture_non_uniform, tex, false);
 
442
      image =
 
443
         get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane, tex->texture_non_uniform, tex, false);
483
444
   } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
484
 
      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER,
485
 
                               tex->texture_non_uniform, tex, false);
 
445
      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER, tex->texture_non_uniform, tex, false);
486
446
   } else if (tex->op == nir_texop_fragment_mask_fetch_amd) {
487
 
      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK,
488
 
                               tex->texture_non_uniform, tex, false);
 
447
      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK, tex->texture_non_uniform, tex, false);
489
448
   } else {
490
 
      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE,
491
 
                               tex->texture_non_uniform, tex, false);
 
449
      image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE, tex->texture_non_uniform, tex, false);
492
450
   }
493
451
 
494
452
   if (sampler_deref_instr) {
495
 
      sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER,
496
 
                                 tex->sampler_non_uniform, tex, false);
 
453
      sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER, tex->sampler_non_uniform, tex, false);
497
454
 
498
 
      if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT &&
499
 
          state->gfx_level < GFX8) {
 
455
      if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT && state->gfx_level < GFX8) {
500
456
         /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
501
457
          *
502
458
          * GFX6-GFX7:
543
499
 
544
500
void
545
501
radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
546
 
                               const struct radv_pipeline_layout *layout,
547
 
                               const struct radv_shader_info *info,
 
502
                               const struct radv_pipeline_layout *layout, const struct radv_shader_info *info,
548
503
                               const struct radv_shader_args *args)
549
504
{
550
505
   apply_layout_state state = {
563
518
      if (!function->impl)
564
519
         continue;
565
520
 
566
 
      nir_builder_init(&b, function->impl);
 
521
      b = nir_builder_create(function->impl);
567
522
 
568
523
      /* Iterate in reverse so load_ubo lowering can look at
569
524
       * the vulkan_resource_index to tell if it's an inline