2
2
* Copyright 2020 Advanced Micro Devices, Inc.
3
3
* Copyright © 2020 Valve Corporation
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
5
* SPDX-License-Identifier: MIT
192
176
if (os_get_total_physical_memory(&system_ram_size))
193
177
chunk->system_ram_size = system_ram_size / (1024 * 1024);
195
/* Parse cpuinfo to get more detailled information. */
179
/* Parse cpuinfo to get more detailed information. */
196
180
f = fopen("/proc/cpuinfo", "r");
345
329
uint32_t lds_granularity;
346
330
uint16_t cu_mask[SQTT_MAX_NUM_SE][SQTT_SA_PER_SE];
347
331
char reserved1[128];
332
uint32_t active_pixel_packer_mask[SQTT_ACTIVE_PIXEL_PACKER_MASK_DWORDS];
334
uint32_t gl1_cache_size;
335
uint32_t instruction_cache_size;
336
uint32_t scalar_cache_size;
337
uint32_t mall_cache_size;
351
static_assert(sizeof(struct sqtt_file_chunk_asic_info) == 720,
341
static_assert(sizeof(struct sqtt_file_chunk_asic_info) == 768,
352
342
"sqtt_file_chunk_asic_info doesn't match RGP spec");
354
344
static enum sqtt_gfxip_level ac_gfx_level_to_sqtt_gfxip_level(enum amd_gfx_level gfx_level)
409
399
chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_ASIC_INFO;
410
400
chunk->header.chunk_id.index = 0;
411
401
chunk->header.major_version = 0;
412
chunk->header.minor_version = 4;
402
chunk->header.minor_version = 5;
413
403
chunk->header.size_in_bytes = sizeof(*chunk);
415
405
chunk->flags = 0;
490
480
chunk->cu_mask[se][sa] = rad_info->cu_mask[se][sa];
484
chunk->gl1_cache_size = rad_info->l1_cache_size;
485
chunk->instruction_cache_size = rad_info->sqc_inst_cache_size;
486
chunk->scalar_cache_size = rad_info->sqc_scalar_cache_size;
487
chunk->mall_cache_size = rad_info->l3_cache_size_mb * 1024 * 1024;
561
555
union sqtt_instruction_trace_data instruction_trace_data;
564
static_assert(sizeof(struct sqtt_file_chunk_api_info) == 1064,
558
static_assert(sizeof(struct sqtt_file_chunk_api_info) == 560,
565
559
"sqtt_file_chunk_api_info doesn't match RGP spec");
567
561
static void ac_sqtt_fill_api_info(struct sqtt_file_chunk_api_info *chunk)
569
563
chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_API_INFO;
570
564
chunk->header.chunk_id.index = 0;
571
565
chunk->header.major_version = 0;
572
chunk->header.minor_version = 1;
566
chunk->header.minor_version = 2;
573
567
chunk->header.size_in_bytes = sizeof(*chunk);
575
569
chunk->api_type = SQTT_API_TYPE_VULKAN;
595
ac_sqtt_fill_code_object(struct rgp_code_object *rgp_code_object,
596
struct sqtt_file_chunk_code_object_database *chunk,
597
size_t file_offset, uint32_t chunk_size)
589
ac_sqtt_fill_code_object(const struct rgp_code_object *rgp_code_object,
590
struct sqtt_file_chunk_code_object_database *chunk, size_t file_offset,
599
593
chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_CODE_OBJECT_DATABASE;
600
594
chunk->header.chunk_id.index = 0;
659
ac_sqtt_fill_pso_correlation(struct rgp_pso_correlation *rgp_pso_correlation,
660
struct sqtt_file_chunk_pso_correlation *chunk,
653
ac_sqtt_fill_pso_correlation(const struct rgp_pso_correlation *rgp_pso_correlation,
654
struct sqtt_file_chunk_pso_correlation *chunk, size_t file_offset)
663
656
chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_PSO_CORRELATION;
664
657
chunk->header.chunk_id.index = 0;
716
static void ac_sqtt_fill_sqtt_desc(struct radeon_info *info,
709
static void ac_sqtt_fill_sqtt_desc(const struct radeon_info *info,
717
710
struct sqtt_file_chunk_sqtt_desc *chunk, int32_t chunk_index,
718
711
int32_t shader_engine_index, int32_t compute_unit_index)
795
788
"sqtt_queue_event_record doesn't match RGP spec");
798
ac_sqtt_fill_queue_event_timings(struct rgp_queue_info *rgp_queue_info,
799
struct rgp_queue_event *rgp_queue_event,
791
ac_sqtt_fill_queue_event_timings(const struct rgp_queue_info *rgp_queue_info,
792
const struct rgp_queue_event *rgp_queue_event,
800
793
struct sqtt_file_chunk_queue_event_timings *chunk)
802
795
unsigned queue_info_size =
877
870
struct sqtt_spm_counter_info {
878
871
enum ac_pc_gpu_block block;
879
872
uint32_t instance;
873
uint32_t event_index; /* index of counter within the block */
880
874
uint32_t data_offset; /* offset of counter from the beginning of the chunk */
881
uint32_t event_index; /* index of counter within the block */
875
uint32_t data_size; /* size in bytes of a single counter data item */
884
878
struct sqtt_file_chunk_spm_db {
885
879
struct sqtt_file_chunk_header header;
881
uint32_t preamble_size;
887
882
uint32_t num_timestamps;
888
883
uint32_t num_spm_counter_info;
884
uint32_t spm_counter_info_size;
889
885
uint32_t sample_interval;
892
static_assert(sizeof(struct sqtt_file_chunk_spm_db) == 32,
888
static_assert(sizeof(struct sqtt_file_chunk_spm_db) == 40,
893
889
"sqtt_file_chunk_spm_db doesn't match RGP spec");
895
static void ac_sqtt_fill_spm_db(const struct ac_spm_trace_data *spm_trace,
891
static void ac_sqtt_fill_spm_db(const struct ac_spm_trace *spm_trace,
896
892
struct sqtt_file_chunk_spm_db *chunk,
897
893
uint32_t num_samples,
898
894
uint32_t chunk_size)
900
896
chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_SPM_DB;
901
897
chunk->header.chunk_id.index = 0;
902
chunk->header.major_version = 1;
903
chunk->header.minor_version = 3;
898
chunk->header.major_version = 2;
899
chunk->header.minor_version = 0;
904
900
chunk->header.size_in_bytes = chunk_size;
906
902
chunk->flags = 0;
903
chunk->preamble_size = sizeof(struct sqtt_file_chunk_spm_db);
907
904
chunk->num_timestamps = num_samples;
908
905
chunk->num_spm_counter_info = spm_trace->num_counters;
906
chunk->spm_counter_info_size = sizeof(struct sqtt_spm_counter_info);
909
907
chunk->sample_interval = spm_trace->sample_interval;
912
static void ac_sqtt_dump_spm(const struct ac_spm_trace_data *spm_trace,
910
static void ac_sqtt_dump_spm(const struct ac_spm_trace *spm_trace,
913
911
size_t file_offset,
916
uint32_t sample_size_in_bytes = ac_spm_get_sample_size(spm_trace);
917
uint32_t num_samples = ac_spm_get_num_samples(spm_trace);
914
uint32_t sample_size_in_bytes = spm_trace->sample_size_in_bytes;
915
uint32_t num_samples = spm_trace->num_samples;
918
916
uint8_t *spm_data_ptr = (uint8_t *)spm_trace->ptr;
919
917
struct sqtt_file_chunk_spm_db spm_db;
920
918
size_t file_spm_db_offset = file_offset;
947
945
.block = spm_trace->counters[c].gpu_block,
948
946
.instance = spm_trace->counters[c].instance,
949
947
.data_offset = counter_values_offset,
948
.data_size = sizeof(uint16_t),
950
949
.event_index = spm_trace->counters[c].event_id,
983
982
#if defined(USE_LIBELF)
984
static void ac_sqtt_dump_data(struct radeon_info *rad_info,
985
struct ac_thread_trace *thread_trace,
986
const struct ac_spm_trace_data *spm_trace,
984
ac_sqtt_dump_data(const struct radeon_info *rad_info, struct ac_sqtt_trace *sqtt_trace,
985
const struct ac_spm_trace *spm_trace, FILE *output)
989
struct ac_thread_trace_data *thread_trace_data = thread_trace->data;
990
987
struct sqtt_file_chunk_asic_info asic_info = {0};
991
988
struct sqtt_file_chunk_cpu_info cpu_info = {0};
992
989
struct sqtt_file_chunk_api_info api_info = {0};
993
990
struct sqtt_file_header header = {0};
994
991
size_t file_offset = 0;
995
struct rgp_code_object *rgp_code_object =
996
&thread_trace_data->rgp_code_object;
997
struct rgp_loader_events *rgp_loader_events =
998
&thread_trace_data->rgp_loader_events;
999
struct rgp_pso_correlation *rgp_pso_correlation =
1000
&thread_trace_data->rgp_pso_correlation;
1001
struct rgp_queue_info *rgp_queue_info = &thread_trace_data->rgp_queue_info;
1002
struct rgp_queue_event *rgp_queue_event = &thread_trace_data->rgp_queue_event;
1003
struct rgp_clock_calibration *rgp_clock_calibration = &thread_trace_data->rgp_clock_calibration;
992
const struct rgp_code_object *rgp_code_object = sqtt_trace->rgp_code_object;
993
const struct rgp_loader_events *rgp_loader_events = sqtt_trace->rgp_loader_events;
994
const struct rgp_pso_correlation *rgp_pso_correlation = sqtt_trace->rgp_pso_correlation;
995
const struct rgp_queue_info *rgp_queue_info = sqtt_trace->rgp_queue_info;
996
const struct rgp_queue_event *rgp_queue_event = sqtt_trace->rgp_queue_event;
997
const struct rgp_clock_calibration *rgp_clock_calibration = sqtt_trace->rgp_clock_calibration;
1005
999
/* SQTT header file. */
1006
1000
ac_sqtt_fill_header(&header);
1143
for (unsigned i = 0; i < thread_trace->num_traces; i++) {
1144
const struct ac_thread_trace_se *se = &thread_trace->traces[i];
1145
const struct ac_thread_trace_info *info = &se->info;
1137
for (unsigned i = 0; i < sqtt_trace->num_traces; i++) {
1138
const struct ac_sqtt_data_se *se = &sqtt_trace->traces[i];
1139
const struct ac_sqtt_data_info *info = &se->info;
1146
1140
struct sqtt_file_chunk_sqtt_desc desc = {0};
1147
1141
struct sqtt_file_chunk_sqtt_data data = {0};
1148
1142
uint64_t size = info->cur_offset * 32; /* unit of 32 bytes */
1172
int ac_dump_rgp_capture(struct radeon_info *info,
1173
struct ac_thread_trace *thread_trace,
1174
const struct ac_spm_trace_data *spm_trace)
1167
ac_dump_rgp_capture(const struct radeon_info *info, struct ac_sqtt_trace *sqtt_trace,
1168
const struct ac_spm_trace *spm_trace)
1176
1170
#if !defined(USE_LIBELF)
1195
ac_sqtt_dump_data(info, thread_trace, spm_trace, f);
1189
ac_sqtt_dump_data(info, sqtt_trace, spm_trace, f);
1197
1191
fprintf(stderr, "RGP capture saved to '%s'\n", filename);