2
2
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3
3
* Copyright 2018 Advanced Micro Devices, Inc.
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the "Software"),
8
* to deal in the Software without restriction, including without limitation
9
* on the rights to use, copy, modify, merge, publish, distribute, sub
10
* license, and/or sell copies of the Software, and to permit persons to whom
11
* the Software is furnished to do so, subject to the following conditions:
13
* The above copyright notice and this permission notice (including the next
14
* paragraph) shall be included in all copies or substantial portions of the
17
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23
* USE OR OTHER DEALINGS IN THE SOFTWARE.
5
* SPDX-License-Identifier: MIT
26
8
#include "si_pipe.h"
34
16
#include "ac_shadowed_regs.h"
35
17
#include "compiler/nir/nir.h"
36
18
#include "util/disk_cache.h"
37
20
#include "util/u_cpu_detect.h"
38
21
#include "util/u_log.h"
39
22
#include "util/u_memory.h"
60
43
{"nir", DBG(NIR), "Print final NIR after lowering when shader variants are created"},
61
44
{"initllvm", DBG(INIT_LLVM), "Print initial LLVM IR before optimizations"},
62
45
{"llvm", DBG(LLVM), "Print final LLVM IR"},
46
{"initaco", DBG(INIT_ACO), "Print initial ACO IR before optimizations"},
47
{"aco", DBG(ACO), "Print final ACO IR"},
63
48
{"asm", DBG(ASM), "Print final shaders in asm"},
49
{"stats", DBG(STATS), "Print shader-db stats to stderr"},
65
51
/* Shader compiler options the shader cache should be aware of: */
66
52
{"w32ge", DBG(W32_GE), "Use Wave32 for vertex, tessellation, and geometry shaders."},
75
61
{"checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR"},
76
62
{"mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand"},
77
63
{"nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants."},
64
{"useaco", DBG(USE_ACO), "Use ACO as shader compiler when possible"},
79
66
/* Information logging options: */
80
67
{"info", DBG(INFO), "Print driver information"},
206
192
si_release_all_descriptors(sctx);
208
194
if (sctx->gfx_level >= GFX10 && sctx->has_graphics)
209
gfx10_destroy_query(sctx);
195
gfx11_destroy_query(sctx);
211
if (sctx->thread_trace) {
212
198
struct si_screen *sscreen = sctx->screen;
213
199
if (sscreen->info.has_stable_pstate && sscreen->b.num_contexts == 1 &&
214
200
!(sctx->context_flags & SI_CONTEXT_FLAG_AUX))
215
201
sscreen->ws->cs_set_pstate(&sctx->gfx_cs, RADEON_CTX_PSTATE_NONE);
217
si_destroy_thread_trace(sctx);
203
si_destroy_sqtt(sctx);
220
206
pipe_resource_reference(&sctx->esgs_ring, NULL);
238
224
if (sctx->cs_preamble_state_tmz)
239
225
si_pm4_free_state(sctx, sctx->cs_preamble_state_tmz, ~0);
241
for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
242
si_pm4_free_state(sctx, sctx->vgt_shader_config[i], SI_STATE_IDX(vgt_shader_config));
244
227
if (sctx->fixed_func_tcs_shader_cache) {
245
228
hash_table_foreach(sctx->fixed_func_tcs_shader_cache, entry) {
246
229
sctx->b.delete_tcs_state(&sctx->b, entry->data);
349
332
sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL);
350
333
si_resource_reference(&sctx->eop_bug_scratch, NULL);
351
334
si_resource_reference(&sctx->eop_bug_scratch_tmz, NULL);
352
si_resource_reference(&sctx->shadowed_regs, NULL);
335
si_resource_reference(&sctx->shadowing.registers, NULL);
336
si_resource_reference(&sctx->shadowing.csa, NULL);
354
si_destroy_compiler(&sctx->compiler);
338
if (sctx->compiler) {
339
si_destroy_compiler(sctx->compiler);
340
FREE(sctx->compiler);
356
343
si_saved_cs_reference(&sctx->current_saved_cs, NULL);
383
370
if (sctx->context_flags & SI_CONTEXT_FLAG_AUX)
384
371
return PIPE_NO_RESET;
387
enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(sctx->ctx, false, &needs_reset);
389
if (status != PIPE_NO_RESET && needs_reset && !(sctx->context_flags & SI_CONTEXT_FLAG_AUX)) {
390
/* Call the gallium frontend to set a no-op API dispatch. */
391
if (sctx->device_reset_callback.reset) {
392
sctx->device_reset_callback.reset(sctx->device_reset_callback.data, status);
373
bool needs_reset, reset_completed;
374
enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(sctx->ctx, false,
375
&needs_reset, &reset_completed);
377
if (status != PIPE_NO_RESET) {
378
if (sctx->has_reset_been_notified && reset_completed)
379
return PIPE_NO_RESET;
381
sctx->has_reset_been_notified = true;
383
if (!(sctx->context_flags & SI_CONTEXT_FLAG_AUX)) {
384
/* Call the gallium frontend to set a no-op API dispatch. */
385
if (needs_reset && sctx->device_reset_callback.reset)
386
sctx->device_reset_callback.reset(sctx->device_reset_callback.data, status);
421
415
dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number);
423
if (sctx->thread_trace_enabled)
417
if (sctx->sqtt_enabled)
424
418
si_write_user_event(sctx, &sctx->gfx_cs, UserEventTrigger, string, len);
510
504
sctx->ws = sscreen->ws;
511
505
sctx->family = sscreen->info.family;
512
506
sctx->gfx_level = sscreen->info.gfx_level;
507
sctx->vcn_ip_ver = sscreen->info.vcn_ip_version;
514
509
if (sctx->gfx_level == GFX7 || sctx->gfx_level == GFX8 || sctx->gfx_level == GFX9) {
515
510
sctx->eop_bug_scratch = si_aligned_buffer_create(
642
637
/* Initialize graphics-only context functions. */
643
638
if (sctx->has_graphics) {
644
639
if (sctx->gfx_level >= GFX10)
645
gfx10_init_query(sctx);
640
gfx11_init_query(sctx);
646
641
si_init_msaa_functions(sctx);
647
642
si_init_shader_functions(sctx);
648
643
si_init_state_functions(sctx);
700
695
sctx->sample_mask = 0xffff;
702
697
/* Initialize multimedia functions. */
703
if (sscreen->info.ip[AMD_IP_UVD].num_queues || sscreen->info.has_video_hw.vcn_decode ||
698
if (sscreen->info.ip[AMD_IP_UVD].num_queues ||
699
((sscreen->info.vcn_ip_version >= VCN_4_0_0) ?
700
sscreen->info.ip[AMD_IP_VCN_UNIFIED].num_queues : sscreen->info.ip[AMD_IP_VCN_DEC].num_queues) ||
704
701
sscreen->info.ip[AMD_IP_VCN_JPEG].num_queues || sscreen->info.ip[AMD_IP_VCE].num_queues ||
705
702
sscreen->info.ip[AMD_IP_UVD_ENC].num_queues || sscreen->info.ip[AMD_IP_VCN_ENC].num_queues) {
706
703
sctx->b.create_video_codec = si_uvd_create_decoder;
816
813
struct si_context *saux = si_get_aux_context(sscreen);
818
815
enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(
819
saux->ctx, true, NULL);
816
saux->ctx, true, NULL, NULL);
820
817
if (status != PIPE_NO_RESET) {
821
818
/* We lost the aux_context, create a new one */
822
819
struct u_log_context *aux_log = (saux)->log;
887
884
"detected. Force the GPU into a profiling mode with e.g. "
888
885
"\"echo profile_peak > "
889
886
"/sys/class/drm/card0/device/power_dpm_force_performance_level\"\n");
890
} else if (!si_init_thread_trace((struct si_context *)ctx)) {
887
} else if (!si_init_sqtt((struct si_context *)ctx)) {
974
971
/* Release the reference on glsl types of the compiler threads. */
975
972
glsl_type_singleton_decref();
977
for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++)
978
si_destroy_compiler(&sscreen->compiler[i]);
974
for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++) {
975
if (sscreen->compiler[i]) {
976
si_destroy_compiler(sscreen->compiler[i]);
977
FREE(sscreen->compiler[i]);
980
for (i = 0; i < ARRAY_SIZE(sscreen->compiler_lowp); i++)
981
si_destroy_compiler(&sscreen->compiler_lowp[i]);
981
for (i = 0; i < ARRAY_SIZE(sscreen->compiler_lowp); i++) {
982
if (sscreen->compiler_lowp[i]) {
983
si_destroy_compiler(sscreen->compiler_lowp[i]);
984
FREE(sscreen->compiler_lowp[i]);
983
988
/* Free shader parts. */
984
989
for (i = 0; i < ARRAY_SIZE(parts); i++) {
999
1004
simple_mtx_destroy(&sscreen->gpu_load_mutex);
1000
1005
simple_mtx_destroy(&sscreen->gds_mutex);
1002
radeon_bo_reference(sscreen->ws, &sscreen->gds, NULL);
1003
1007
radeon_bo_reference(sscreen->ws, &sscreen->gds_oa, NULL);
1005
1009
slab_destroy_parent(&sscreen->pool_transfers);
1095
1100
_mesa_sha1_final(&ctx, sha1);
1096
disk_cache_format_hex_id(cache_id, sha1, 20 * 2);
1101
mesa_bytes_to_hex(cache_id, sha1, 20);
1098
1103
sscreen->disk_shader_cache = disk_cache_create(sscreen->info.name, cache_id,
1099
1104
sscreen->info.address32_hi);
1106
1111
/* This function doesn't allow a greater number of threads than
1107
1112
* the queue had at its creation. */
1108
util_queue_adjust_num_threads(&sscreen->shader_compiler_queue, max_threads);
1113
util_queue_adjust_num_threads(&sscreen->shader_compiler_queue, max_threads, false);
1109
1114
/* Don't change the number of threads on the low priority queue. */
1150
1155
sscreen->debug_flags |= debug_get_flags_option("AMD_DEBUG", radeonsi_debug_options, 0);
1151
1156
test_flags = debug_get_flags_option("AMD_TEST", test_options, 0);
1158
if (sscreen->debug_flags & DBG(NO_DISPLAY_DCC)) {
1159
sscreen->info.use_display_dcc_unaligned = false;
1160
sscreen->info.use_display_dcc_with_retile_blit = false;
1163
if (sscreen->debug_flags & DBG(SHADOW_REGS) ||
1164
sscreen->info.gfx_level >= GFX11) {
1165
sscreen->info.register_shadowing_required = true;
1166
/* Recompute has_set_pairs_packets. */
1167
sscreen->info.has_set_pairs_packets = sscreen->info.gfx_level >= GFX11 &&
1168
sscreen->info.register_shadowing_required &&
1169
sscreen->info.has_dedicated_vram;
1153
1172
if (sscreen->debug_flags & DBG(NO_GFX))
1154
1173
sscreen->info.has_graphics = false;
1156
1175
if ((sscreen->debug_flags & DBG(TMZ)) &&
1157
1176
!sscreen->info.has_tmz_support) {
1158
1177
fprintf(stderr, "radeonsi: requesting TMZ features but TMZ is not supported\n");
1178
FREE(sscreen->nir_options);
1163
1183
/* Initialize just one compiler instance to check for errors. The other compiler instances are
1164
1184
* initialized on demand.
1166
if (!si_init_compiler(sscreen, &sscreen->compiler[0])) {
1186
sscreen->compiler[0] = CALLOC_STRUCT(ac_llvm_compiler);
1187
if (!si_init_compiler(sscreen, sscreen->compiler[0])) {
1167
1188
/* The callee prints the error message. */
1189
FREE(sscreen->nir_options);
1178
1200
sscreen->b.is_parallel_shader_compilation_finished = si_is_parallel_shader_compilation_finished;
1179
1201
sscreen->b.finalize_nir = si_finalize_nir;
1203
sscreen->nir_options = CALLOC_STRUCT(nir_shader_compiler_options);
1181
1205
si_init_screen_get_functions(sscreen);
1182
1206
si_init_screen_buffer_functions(sscreen);
1183
1207
si_init_screen_fence_functions(sscreen);
1189
1213
sscreen->max_texel_buffer_elements = sscreen->b.get_param(
1190
1214
&sscreen->b, PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT);
1192
/* Set these flags in debug_flags early, so that the shader cache takes
1193
* them into account.
1195
* Enable FS_CORRECT_DERIVS_AFTER_KILL by default if LLVM is >= 13. This makes
1196
* nir_opt_move_discards_to_top more effective.
1198
if (driQueryOptionb(config->options, "glsl_correct_derivatives_after_discard") ||
1199
LLVM_VERSION_MAJOR >= 13)
1200
sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL);
1202
1216
if (sscreen->debug_flags & DBG(INFO))
1203
1217
ac_print_gpu_info(&sscreen->info, stdout);
1278
1293
UTIL_QUEUE_INIT_SCALE_THREADS |
1279
1294
UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) {
1280
1295
si_destroy_shader_cache(sscreen);
1296
FREE(sscreen->nir_options);
1282
1298
glsl_type_singleton_decref();
1290
1306
UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY |
1291
1307
UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY, NULL)) {
1292
1308
si_destroy_shader_cache(sscreen);
1309
FREE(sscreen->nir_options);
1294
1311
glsl_type_singleton_decref();
1317
1334
if (sscreen->info.gfx_level >= GFX11) {
1318
1335
sscreen->use_ngg = true;
1319
sscreen->use_ngg_streamout = true;
1320
1336
/* TODO: Disable for now. Investigate if it helps. */
1321
1337
sscreen->use_ngg_culling = (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL)) &&
1322
1338
!(sscreen->debug_flags & DBG(NO_NGG_CULLING));
1325
1341
sscreen->info.gfx_level >= GFX10 &&
1326
1342
(sscreen->info.family != CHIP_NAVI14 ||
1327
1343
sscreen->info.is_pro_graphics);
1328
sscreen->use_ngg_streamout = false;
1329
1344
sscreen->use_ngg_culling = sscreen->use_ngg &&
1330
1345
sscreen->info.max_render_backends >= 2 &&
1331
!(sscreen->debug_flags & DBG(NO_NGG_CULLING)) &&
1332
LLVM_VERSION_MAJOR >= 12; /* hangs on 11, see #4874 */
1346
!(sscreen->debug_flags & DBG(NO_NGG_CULLING));
1335
1349
/* Only set this for the cases that are known to work, which are:
1400
* Only MSAA color and depth buffers are overriden.
1414
* Only MSAA color and depth buffers are overridden.
1402
1416
if (sscreen->info.has_eqaa_surface_allocator) {
1403
1417
const char *eqaa = debug_get_option("EQAA", NULL);