38
38
} lower_vs_inputs_state;
40
40
static nir_ssa_def *
41
lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin,
42
lower_vs_inputs_state *s)
41
lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs_state *s)
44
43
nir_src *offset_src = nir_get_io_offset_src(intrin);
45
44
assert(nir_src_is_const(*offset_src));
57
56
const unsigned arg_bit_size = MAX2(bit_size, 32);
59
58
unsigned num_input_args = 1;
60
nir_ssa_def *input_args[2] = {
61
ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
59
nir_ssa_def *input_args[2] = {ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
62
60
if (component * 32 + arg_bit_size * num_components > 128) {
63
61
assert(bit_size == 64);
117
115
static nir_ssa_def *
118
oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned bit_size,
116
oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned bit_size, const bool is_float)
121
118
/* 22.1.1. Attribute Location and Component Assignment of Vulkan 1.3 specification:
122
119
* For 64-bit data types, no default attribute values are provided. Input variables
247
241
const struct util_format_description *f = util_format_description(attrib_format);
248
242
const struct ac_vtx_format_info *vtx_info =
249
243
ac_get_vtx_format_info(s->rad_info->gfx_level, s->rad_info->family, attrib_format);
250
const unsigned binding_index =
251
s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
252
const unsigned desc_index =
253
util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index));
244
const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
245
const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index));
255
247
nir_ssa_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
256
248
nir_ssa_def *vertex_buffers =
257
249
nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->rad_info->address32_hi));
258
nir_ssa_def *descriptor =
259
nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
250
nir_ssa_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
260
251
nir_ssa_def *base_index = calc_vs_input_index(b, location, s);
261
252
nir_ssa_def *zero = nir_imm_int(b, 0);
283
274
* Don't shrink the format here because this might allow the backend to
284
275
* emit fewer (but larger than needed) HW instructions.
286
const unsigned first_trailing_unused_channel =
287
first_used_swizzled_channel(f, dest_use_mask, true) + 1;
277
const unsigned first_trailing_unused_channel = first_used_swizzled_channel(f, dest_use_mask, true) + 1;
288
278
const unsigned max_loaded_channels = MIN2(first_trailing_unused_channel, f->nr_channels);
289
279
const unsigned fetch_num_channels =
290
280
first_used_channel >= max_loaded_channels ? 0 : max_loaded_channels - skipped_start;
320
310
* Note, NONE seems to occur in real use and is considered an array format.
322
312
if (f->is_array && fetch_format != PIPE_FORMAT_NONE) {
323
while (channels > 1 && attrib_stride &&
324
(const_off + count_format_bytes(f, start, channels)) > attrib_stride) {
313
while (channels > 1 && attrib_stride && (const_off + count_format_bytes(f, start, channels)) > attrib_stride) {
328
317
/* Keep the fetch format as large as possible to let the backend emit
329
318
* larger load instructions when it deems them beneficial.
332
util_format_get_array(f->channel[0].type, f->channel[0].size, f->nr_channels - start,
333
f->is_unorm || f->is_snorm, f->channel[0].pure_integer);
320
fetch_format = util_format_get_array(f->channel[0].type, f->channel[0].size, f->nr_channels - start,
321
f->is_unorm || f->is_snorm, f->channel[0].pure_integer);
336
324
assert(f->is_array || channels == fetch_num_channels);
339
327
* Typed loads can cause GPU hangs when used with improper alignment.
341
329
if (can_use_untyped_load(f, bit_size)) {
343
nir_load_buffer_amd(b, channels, bit_size, descriptor, zero, zero, index,
344
.base = const_off, .memory_modes = nir_var_shader_in);
330
loads[num_loads++] = nir_load_buffer_amd(b, channels, bit_size, descriptor, zero, zero, index,
331
.base = const_off, .memory_modes = nir_var_shader_in);
346
333
const unsigned align_mul = MAX2(1, s->pl_key->vs.vertex_binding_align[attrib_binding]);
347
334
const unsigned align_offset = const_off % align_mul;
349
336
loads[num_loads++] = nir_load_typed_buffer_amd(
350
b, channels, bit_size, descriptor, zero, zero, index, .base = const_off,
351
.format = fetch_format, .align_mul = align_mul, .align_offset = align_offset,
352
.memory_modes = nir_var_shader_in);
337
b, channels, bit_size, descriptor, zero, zero, index, .base = const_off, .format = fetch_format,
338
.align_mul = align_mul, .align_offset = align_offset, .memory_modes = nir_var_shader_in);
356
342
nir_ssa_def *load = loads[0];
358
344
/* Extract the channels we actually need when we couldn't skip starting
359
* components or had to emit more than one load instrinsic.
345
* components or had to emit more than one load intrinsic.
361
347
if (num_loads > 0 && (first_used_channel > skipped_start || num_loads != 1))
362
348
load = nir_extract_bits(b, loads, num_loads, (first_used_channel - skipped_start) * bit_size,
363
349
max_loaded_channels - first_used_channel, bit_size);
365
351
/* Return early if possible to avoid generating unnecessary IR. */
366
if (num_loads > 0 && first_used_channel == component &&
367
load->num_components == dest_num_components && !needs_swizzle &&
368
alpha_adjust == AC_ALPHA_ADJUST_NONE)
352
if (num_loads > 0 && first_used_channel == component && load->num_components == dest_num_components &&
353
!needs_swizzle && alpha_adjust == AC_ALPHA_ADJUST_NONE)
371
356
/* Fill unused and OOB components.