154
148
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
156
150
if (info->vs.needs_base_instance) {
157
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance,
158
AC_UD_VS_BASE_VERTEX_START_INSTANCE);
151
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
164
declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
165
struct radv_shader_args *args, bool merged_vs_tcs)
157
declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, struct radv_shader_args *args,
167
160
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
168
if (args->type != RADV_SHADER_TYPE_GS_COPY) {
161
if (info->type != RADV_SHADER_TYPE_GS_COPY) {
169
162
if (info->vs.as_ls || merged_vs_tcs) {
171
164
if (gfx_level >= GFX11) {
291
281
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
292
282
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
294
if (args->remap_spi_ps_input) {
295
/* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then
296
* communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the
297
* VGPR arguments here.
299
for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) {
300
if (args->ac.args[i].file != AC_ARG_VGPR) {
304
if (!(spi_ps_input & (1 << vgpr_arg))) {
305
args->ac.args[i].skip = true;
307
args->ac.args[i].offset = vgpr_reg;
308
vgpr_reg += args->ac.args[i].size;
284
if (args->remap_spi_ps_input)
285
ac_compact_ps_vgpr_args(&args->ac, info->ps.spi_ps_input);
316
declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
317
bool has_ngg_query, bool has_ngg_provoking_vtx)
289
declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, bool has_ngg_query,
290
bool has_ngg_provoking_vtx)
319
292
if (has_ngg_query)
320
293
add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_query_state, AC_UD_NGG_QUERY_STATE);
352
323
radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args)
354
add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt_shader_pc, AC_UD_SCRATCH_RING_OFFSETS);
355
add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0],
356
AC_UD_INDIRECT_DESCRIPTOR_SETS);
325
add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.shader_pc, AC_UD_SCRATCH_RING_OFFSETS);
326
add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS);
357
327
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
358
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.sbt_descriptors);
359
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size);
328
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors);
329
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader);
330
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.rt.launch_size);
360
331
if (gfx_level < GFX9) {
361
332
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
362
333
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets);
365
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.ray_launch_id);
366
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt_dynamic_callable_stack_base);
336
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.rt.launch_id);
337
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.dynamic_callable_stack_base);
338
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.next_shader);
339
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.shader_record);
341
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.payload_offset);
342
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_FLOAT, &args->ac.rt.ray_origin);
343
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_FLOAT, &args->ac.rt.ray_direction);
344
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.rt.ray_tmin);
345
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.rt.ray_tmax);
346
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.cull_mask_and_flags);
348
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.accel_struct);
349
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.sbt_offset);
350
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.sbt_stride);
351
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.miss_index);
353
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.instance_addr);
354
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.primitive_id);
355
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.geometry_id_and_flags);
356
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.hit_kind);
360
radv_ps_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_pipeline_key *key)
362
if (info->ps.needs_sample_positions && key->dynamic_rasterization_samples)
365
if (key->dynamic_line_rast_mode)
368
if (info->ps.reads_sample_mask_in && (info->ps.uses_sample_shading || key->ps.sample_shading_enable))
371
/* For computing barycentrics when the primitive topology is unknown at compile time (GPL). */
372
if (info->ps.load_rasterization_prim && key->unknown_rast_prim)
370
379
declare_shader_args(const struct radv_device *device, const struct radv_pipeline_key *key,
371
const struct radv_shader_info *info, gl_shader_stage stage,
372
gl_shader_stage previous_stage, enum radv_shader_type type,
380
const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
373
381
struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
375
383
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
376
384
bool needs_view_index = info->uses_view_index;
377
385
bool has_ngg_query = info->has_ngg_prim_query || info->has_ngg_xfb_query ||
378
386
(stage == MESA_SHADER_GEOMETRY && info->gs.has_ngg_pipeline_stat_query);
379
bool has_ngg_provoking_vtx = (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) &&
380
key->dynamic_provoking_vtx_mode;
387
bool has_ngg_provoking_vtx =
388
(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) && key->dynamic_provoking_vtx_mode;
382
390
if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) {
383
391
/* Handle all NGG shaders as GS to simplify the code here. */
423
430
if (info->cs.is_rt_shader) {
424
add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.sbt_descriptors,
425
AC_UD_CS_SBT_DESCRIPTORS);
426
add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.ray_launch_size_addr,
427
AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
428
add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt_traversal_shader_addr,
429
AC_UD_CS_TRAVERSAL_SHADER_ADDR);
430
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.rt_dynamic_callable_stack_base,
431
add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS);
432
add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
433
add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.launch_size_addr, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
434
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.rt.dynamic_callable_stack_base,
431
435
AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
659
663
add_ud_arg(args, 1, AC_ARG_INT, &args->ps_epilog_pc, AC_UD_PS_EPILOG_PC);
662
if (info->ps.needs_sample_positions && key->dynamic_rasterization_samples) {
663
add_ud_arg(args, 1, AC_ARG_INT, &args->ps_num_samples, AC_UD_PS_NUM_SAMPLES);
666
if (radv_ps_needs_state_sgpr(info, key))
667
add_ud_arg(args, 1, AC_ARG_INT, &args->ps_state, AC_UD_PS_STATE);
666
669
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
671
if (info->ps.pops && gfx_level < GFX11) {
672
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.pops_collision_wave_id);
675
if (info->ps.load_provoking_vtx) {
676
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.load_provoking_vtx);
667
679
if (args->explicit_scratch_args && gfx_level < GFX11) {
668
680
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);