~mmach/netext73/mesa-ryzen

« back to all changes in this revision

Viewing changes to src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c

  • Committer: mmach
  • Date: 2023-11-02 21:31:35 UTC
  • Revision ID: netbit73@gmail.com-20231102213135-18d4tzh7tj0uz752
2023-11-02 22:11:57

Show diffs side-by-side

added added

removed removed

Lines of Context:
38
38
} lower_vs_inputs_state;
39
39
 
40
40
static nir_ssa_def *
41
 
lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin,
42
 
                                lower_vs_inputs_state *s)
 
41
lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs_state *s)
43
42
{
44
43
   nir_src *offset_src = nir_get_io_offset_src(intrin);
45
44
   assert(nir_src_is_const(*offset_src));
57
56
   const unsigned arg_bit_size = MAX2(bit_size, 32);
58
57
 
59
58
   unsigned num_input_args = 1;
60
 
   nir_ssa_def *input_args[2] = {
61
 
      ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
 
59
   nir_ssa_def *input_args[2] = {ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
62
60
   if (component * 32 + arg_bit_size * num_components > 128) {
63
61
      assert(bit_size == 64);
64
62
 
115
113
}
116
114
 
117
115
static nir_ssa_def *
118
 
oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned bit_size,
119
 
                     const bool is_float)
 
116
oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned bit_size, const bool is_float)
120
117
{
121
118
   /* 22.1.1. Attribute Location and Component Assignment of Vulkan 1.3 specification:
122
119
    * For 64-bit data types, no default attribute values are provided. Input variables
136
133
}
137
134
 
138
135
static unsigned
139
 
count_format_bytes(const struct util_format_description *f, const unsigned first_channel,
140
 
                   const unsigned num_channels)
 
136
count_format_bytes(const struct util_format_description *f, const unsigned first_channel, const unsigned num_channels)
141
137
{
142
138
   if (!num_channels)
143
139
      return 0;
165
161
}
166
162
 
167
163
static unsigned
168
 
first_used_swizzled_channel(const struct util_format_description *f, const unsigned mask,
169
 
                            const bool backwards)
 
164
first_used_swizzled_channel(const struct util_format_description *f, const unsigned mask, const bool backwards)
170
165
{
171
166
   unsigned first_used = backwards ? 0 : f->nr_channels;
172
167
   const unsigned it_mask = mask & BITFIELD_MASK(f->nr_channels);
181
176
}
182
177
 
183
178
static nir_ssa_def *
184
 
adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust,
185
 
                          nir_ssa_def *alpha)
 
179
adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust, nir_ssa_def *alpha)
186
180
{
187
181
   if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED)
188
182
      alpha = nir_f2u32(b, alpha);
247
241
   const struct util_format_description *f = util_format_description(attrib_format);
248
242
   const struct ac_vtx_format_info *vtx_info =
249
243
      ac_get_vtx_format_info(s->rad_info->gfx_level, s->rad_info->family, attrib_format);
250
 
   const unsigned binding_index =
251
 
      s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
252
 
   const unsigned desc_index =
253
 
      util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index));
 
244
   const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
 
245
   const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index));
254
246
 
255
247
   nir_ssa_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
256
248
   nir_ssa_def *vertex_buffers =
257
249
      nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->rad_info->address32_hi));
258
 
   nir_ssa_def *descriptor =
259
 
      nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
 
250
   nir_ssa_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
260
251
   nir_ssa_def *base_index = calc_vs_input_index(b, location, s);
261
252
   nir_ssa_def *zero = nir_imm_int(b, 0);
262
253
 
283
274
    * Don't shrink the format here because this might allow the backend to
284
275
    * emit fewer (but larger than needed) HW instructions.
285
276
    */
286
 
   const unsigned first_trailing_unused_channel =
287
 
      first_used_swizzled_channel(f, dest_use_mask, true) + 1;
 
277
   const unsigned first_trailing_unused_channel = first_used_swizzled_channel(f, dest_use_mask, true) + 1;
288
278
   const unsigned max_loaded_channels = MIN2(first_trailing_unused_channel, f->nr_channels);
289
279
   const unsigned fetch_num_channels =
290
280
      first_used_channel >= max_loaded_channels ? 0 : max_loaded_channels - skipped_start;
320
310
       * Note, NONE seems to occur in real use and is considered an array format.
321
311
       */
322
312
      if (f->is_array && fetch_format != PIPE_FORMAT_NONE) {
323
 
         while (channels > 1 && attrib_stride &&
324
 
                (const_off + count_format_bytes(f, start, channels)) > attrib_stride) {
 
313
         while (channels > 1 && attrib_stride && (const_off + count_format_bytes(f, start, channels)) > attrib_stride) {
325
314
            channels--;
326
315
         }
327
316
 
328
317
         /* Keep the fetch format as large as possible to let the backend emit
329
318
          * larger load instructions when it deems them beneficial.
330
319
          */
331
 
         fetch_format =
332
 
            util_format_get_array(f->channel[0].type, f->channel[0].size, f->nr_channels - start,
333
 
                                  f->is_unorm || f->is_snorm, f->channel[0].pure_integer);
 
320
         fetch_format = util_format_get_array(f->channel[0].type, f->channel[0].size, f->nr_channels - start,
 
321
                                              f->is_unorm || f->is_snorm, f->channel[0].pure_integer);
334
322
      }
335
323
 
336
324
      assert(f->is_array || channels == fetch_num_channels);
339
327
       * Typed loads can cause GPU hangs when used with improper alignment.
340
328
       */
341
329
      if (can_use_untyped_load(f, bit_size)) {
342
 
         loads[num_loads++] =
343
 
            nir_load_buffer_amd(b, channels, bit_size, descriptor, zero, zero, index,
344
 
                                .base = const_off, .memory_modes = nir_var_shader_in);
 
330
         loads[num_loads++] = nir_load_buffer_amd(b, channels, bit_size, descriptor, zero, zero, index,
 
331
                                                  .base = const_off, .memory_modes = nir_var_shader_in);
345
332
      } else {
346
333
         const unsigned align_mul = MAX2(1, s->pl_key->vs.vertex_binding_align[attrib_binding]);
347
334
         const unsigned align_offset = const_off % align_mul;
348
335
 
349
336
         loads[num_loads++] = nir_load_typed_buffer_amd(
350
 
            b, channels, bit_size, descriptor, zero, zero, index, .base = const_off,
351
 
            .format = fetch_format, .align_mul = align_mul, .align_offset = align_offset,
352
 
            .memory_modes = nir_var_shader_in);
 
337
            b, channels, bit_size, descriptor, zero, zero, index, .base = const_off, .format = fetch_format,
 
338
            .align_mul = align_mul, .align_offset = align_offset, .memory_modes = nir_var_shader_in);
353
339
      }
354
340
   }
355
341
 
356
342
   nir_ssa_def *load = loads[0];
357
343
 
358
344
   /* Extract the channels we actually need when we couldn't skip starting
359
 
    * components or had to emit more than one load instrinsic.
 
345
    * components or had to emit more than one load intrinsic.
360
346
    */
361
347
   if (num_loads > 0 && (first_used_channel > skipped_start || num_loads != 1))
362
348
      load = nir_extract_bits(b, loads, num_loads, (first_used_channel - skipped_start) * bit_size,
363
349
                              max_loaded_channels - first_used_channel, bit_size);
364
350
 
365
351
   /* Return early if possible to avoid generating unnecessary IR. */
366
 
   if (num_loads > 0 && first_used_channel == component &&
367
 
       load->num_components == dest_num_components && !needs_swizzle &&
368
 
       alpha_adjust == AC_ALPHA_ADJUST_NONE)
 
352
   if (num_loads > 0 && first_used_channel == component && load->num_components == dest_num_components &&
 
353
       !needs_swizzle && alpha_adjust == AC_ALPHA_ADJUST_NONE)
369
354
      return load;
370
355
 
371
356
   /* Fill unused and OOB components.
443
428
      .rad_info = rad_info,
444
429
   };
445
430
 
446
 
   return nir_shader_instructions_pass(shader, lower_vs_input_instr,
447
 
                                       nir_metadata_dominance | nir_metadata_block_index, &state);
 
431
   return nir_shader_instructions_pass(shader, lower_vs_input_instr, nir_metadata_dominance | nir_metadata_block_index,
 
432
                                       &state);
448
433
}