~mmach/netext73/mesa-ryzen

« back to all changes in this revision

Viewing changes to src/amd/common/ac_rgp.c

  • Committer: mmach
  • Date: 2023-11-02 21:31:35 UTC
  • Revision ID: netbit73@gmail.com-20231102213135-18d4tzh7tj0uz752
2023-11-02 22:11:57

Show diffs side-by-side

added added

removed removed

Lines of Context:
2
2
 * Copyright 2020 Advanced Micro Devices, Inc.
3
3
 * Copyright © 2020 Valve Corporation
4
4
 *
5
 
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 
 * copy of this software and associated documentation files (the "Software"),
7
 
 * to deal in the Software without restriction, including without limitation
8
 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 
 * and/or sell copies of the Software, and to permit persons to whom the
10
 
 * Software is furnished to do so, subject to the following conditions:
11
 
 *
12
 
 * The above copyright notice and this permission notice (including the next
13
 
 * paragraph) shall be included in all copies or substantial portions of the
14
 
 * Software.
15
 
 *
16
 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
 
 * IN THE SOFTWARE.
 
5
 * SPDX-License-Identifier: MIT
23
6
 */
24
7
#include "ac_rgp.h"
25
8
 
44
27
#define SQTT_GPU_NAME_MAX_SIZE 256
45
28
#define SQTT_MAX_NUM_SE        32
46
29
#define SQTT_SA_PER_SE         2
 
30
#define SQTT_ACTIVE_PIXEL_PACKER_MASK_DWORDS 4
47
31
 
48
32
enum sqtt_version
49
33
{
192
176
   if (os_get_total_physical_memory(&system_ram_size))
193
177
      chunk->system_ram_size = system_ram_size / (1024 * 1024);
194
178
 
195
 
   /* Parse cpuinfo to get more detailled information. */
 
179
   /* Parse cpuinfo to get more detailed information. */
196
180
   f = fopen("/proc/cpuinfo", "r");
197
181
   if (!f)
198
182
      return;
345
329
   uint32_t lds_granularity;
346
330
   uint16_t cu_mask[SQTT_MAX_NUM_SE][SQTT_SA_PER_SE];
347
331
   char reserved1[128];
 
332
   uint32_t active_pixel_packer_mask[SQTT_ACTIVE_PIXEL_PACKER_MASK_DWORDS];
 
333
   char reserved2[16];
 
334
   uint32_t gl1_cache_size;
 
335
   uint32_t instruction_cache_size;
 
336
   uint32_t scalar_cache_size;
 
337
   uint32_t mall_cache_size;
348
338
   char padding[4];
349
339
};
350
340
 
351
 
static_assert(sizeof(struct sqtt_file_chunk_asic_info) == 720,
 
341
static_assert(sizeof(struct sqtt_file_chunk_asic_info) == 768,
352
342
              "sqtt_file_chunk_asic_info doesn't match RGP spec");
353
343
 
354
344
static enum sqtt_gfxip_level ac_gfx_level_to_sqtt_gfxip_level(enum amd_gfx_level gfx_level)
401
391
   }
402
392
}
403
393
 
404
 
static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
 
394
static void ac_sqtt_fill_asic_info(const struct radeon_info *rad_info,
405
395
                                   struct sqtt_file_chunk_asic_info *chunk)
406
396
{
407
397
   bool has_wave32 = rad_info->gfx_level >= GFX10;
409
399
   chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_ASIC_INFO;
410
400
   chunk->header.chunk_id.index = 0;
411
401
   chunk->header.major_version = 0;
412
 
   chunk->header.minor_version = 4;
 
402
   chunk->header.minor_version = 5;
413
403
   chunk->header.size_in_bytes = sizeof(*chunk);
414
404
 
415
405
   chunk->flags = 0;
490
480
         chunk->cu_mask[se][sa] = rad_info->cu_mask[se][sa];
491
481
      }
492
482
   }
 
483
 
 
484
   chunk->gl1_cache_size = rad_info->l1_cache_size;
 
485
   chunk->instruction_cache_size = rad_info->sqc_inst_cache_size;
 
486
   chunk->scalar_cache_size = rad_info->sqc_scalar_cache_size;
 
487
   chunk->mall_cache_size = rad_info->l3_cache_size_mb * 1024 * 1024;
493
488
}
494
489
 
495
490
/**
543
538
   } api_pso_data;
544
539
 
545
540
   struct {
546
 
      char start[256];
547
 
      char end[256];
548
 
   } user_marker_data;
 
541
      uint32_t mask;
 
542
   } shader_engine_filter;
549
543
};
550
544
 
551
545
struct sqtt_file_chunk_api_info {
561
555
   union sqtt_instruction_trace_data instruction_trace_data;
562
556
};
563
557
 
564
 
static_assert(sizeof(struct sqtt_file_chunk_api_info) == 1064,
 
558
static_assert(sizeof(struct sqtt_file_chunk_api_info) == 560,
565
559
              "sqtt_file_chunk_api_info doesn't match RGP spec");
566
560
 
567
561
static void ac_sqtt_fill_api_info(struct sqtt_file_chunk_api_info *chunk)
569
563
   chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_API_INFO;
570
564
   chunk->header.chunk_id.index = 0;
571
565
   chunk->header.major_version = 0;
572
 
   chunk->header.minor_version = 1;
 
566
   chunk->header.minor_version = 2;
573
567
   chunk->header.size_in_bytes = sizeof(*chunk);
574
568
 
575
569
   chunk->api_type = SQTT_API_TYPE_VULKAN;
592
586
};
593
587
 
594
588
static void
595
 
ac_sqtt_fill_code_object(struct rgp_code_object *rgp_code_object,
596
 
                         struct sqtt_file_chunk_code_object_database *chunk,
597
 
                         size_t file_offset, uint32_t chunk_size)
 
589
ac_sqtt_fill_code_object(const struct rgp_code_object *rgp_code_object,
 
590
                         struct sqtt_file_chunk_code_object_database *chunk, size_t file_offset,
 
591
                         uint32_t chunk_size)
598
592
{
599
593
   chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_CODE_OBJECT_DATABASE;
600
594
   chunk->header.chunk_id.index = 0;
624
618
};
625
619
 
626
620
static void
627
 
ac_sqtt_fill_loader_events(struct rgp_loader_events *rgp_loader_events,
 
621
ac_sqtt_fill_loader_events(const struct rgp_loader_events *rgp_loader_events,
628
622
                           struct sqtt_file_chunk_code_object_loader_events *chunk,
629
623
                           size_t file_offset)
630
624
{
656
650
};
657
651
 
658
652
static void
659
 
ac_sqtt_fill_pso_correlation(struct rgp_pso_correlation *rgp_pso_correlation,
660
 
                             struct sqtt_file_chunk_pso_correlation *chunk,
661
 
                             size_t file_offset)
 
653
ac_sqtt_fill_pso_correlation(const struct rgp_pso_correlation *rgp_pso_correlation,
 
654
                             struct sqtt_file_chunk_pso_correlation *chunk, size_t file_offset)
662
655
{
663
656
   chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_PSO_CORRELATION;
664
657
   chunk->header.chunk_id.index = 0;
713
706
   }
714
707
}
715
708
 
716
 
static void ac_sqtt_fill_sqtt_desc(struct radeon_info *info,
 
709
static void ac_sqtt_fill_sqtt_desc(const struct radeon_info *info,
717
710
                                   struct sqtt_file_chunk_sqtt_desc *chunk, int32_t chunk_index,
718
711
                                   int32_t shader_engine_index, int32_t compute_unit_index)
719
712
{
795
788
              "sqtt_queue_event_record doesn't match RGP spec");
796
789
 
797
790
static void
798
 
ac_sqtt_fill_queue_event_timings(struct rgp_queue_info *rgp_queue_info,
799
 
                                 struct rgp_queue_event *rgp_queue_event,
 
791
ac_sqtt_fill_queue_event_timings(const struct rgp_queue_info *rgp_queue_info,
 
792
                                 const struct rgp_queue_event *rgp_queue_event,
800
793
                                 struct sqtt_file_chunk_queue_event_timings *chunk)
801
794
{
802
795
   unsigned queue_info_size =
877
870
struct sqtt_spm_counter_info {
878
871
   enum ac_pc_gpu_block block;
879
872
   uint32_t instance;
 
873
   uint32_t event_index; /* index of counter within the block */
880
874
   uint32_t data_offset; /* offset of counter from the beginning of the chunk */
881
 
   uint32_t event_index; /* index of counter within the block */
 
875
   uint32_t data_size;   /* size in bytes of a single counter data item */
882
876
};
883
877
 
884
878
struct sqtt_file_chunk_spm_db {
885
879
   struct sqtt_file_chunk_header header;
886
880
   uint32_t flags;
 
881
   uint32_t preamble_size;
887
882
   uint32_t num_timestamps;
888
883
   uint32_t num_spm_counter_info;
 
884
   uint32_t spm_counter_info_size;
889
885
   uint32_t sample_interval;
890
886
};
891
887
 
892
 
static_assert(sizeof(struct sqtt_file_chunk_spm_db) == 32,
 
888
static_assert(sizeof(struct sqtt_file_chunk_spm_db) == 40,
893
889
              "sqtt_file_chunk_spm_db doesn't match RGP spec");
894
890
 
895
 
static void ac_sqtt_fill_spm_db(const struct ac_spm_trace_data *spm_trace,
 
891
static void ac_sqtt_fill_spm_db(const struct ac_spm_trace *spm_trace,
896
892
                                struct sqtt_file_chunk_spm_db *chunk,
897
893
                                uint32_t num_samples,
898
894
                                uint32_t chunk_size)
899
895
{
900
896
   chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_SPM_DB;
901
897
   chunk->header.chunk_id.index = 0;
902
 
   chunk->header.major_version = 1;
903
 
   chunk->header.minor_version = 3;
 
898
   chunk->header.major_version = 2;
 
899
   chunk->header.minor_version = 0;
904
900
   chunk->header.size_in_bytes = chunk_size;
905
901
 
906
902
   chunk->flags = 0;
 
903
   chunk->preamble_size = sizeof(struct sqtt_file_chunk_spm_db);
907
904
   chunk->num_timestamps = num_samples;
908
905
   chunk->num_spm_counter_info = spm_trace->num_counters;
 
906
   chunk->spm_counter_info_size = sizeof(struct sqtt_spm_counter_info);
909
907
   chunk->sample_interval = spm_trace->sample_interval;
910
908
}
911
909
 
912
 
static void ac_sqtt_dump_spm(const struct ac_spm_trace_data *spm_trace,
 
910
static void ac_sqtt_dump_spm(const struct ac_spm_trace *spm_trace,
913
911
                             size_t file_offset,
914
912
                             FILE *output)
915
913
{
916
 
   uint32_t sample_size_in_bytes = ac_spm_get_sample_size(spm_trace);
917
 
   uint32_t num_samples = ac_spm_get_num_samples(spm_trace);
 
914
   uint32_t sample_size_in_bytes = spm_trace->sample_size_in_bytes;
 
915
   uint32_t num_samples = spm_trace->num_samples;
918
916
   uint8_t *spm_data_ptr = (uint8_t *)spm_trace->ptr;
919
917
   struct sqtt_file_chunk_spm_db spm_db;
920
918
   size_t file_spm_db_offset = file_offset;
947
945
         .block = spm_trace->counters[c].gpu_block,
948
946
         .instance = spm_trace->counters[c].instance,
949
947
         .data_offset = counter_values_offset,
 
948
         .data_size = sizeof(uint16_t),
950
949
         .event_index = spm_trace->counters[c].event_id,
951
950
      };
952
951
 
981
980
}
982
981
 
983
982
#if defined(USE_LIBELF)
984
 
static void ac_sqtt_dump_data(struct radeon_info *rad_info,
985
 
                              struct ac_thread_trace *thread_trace,
986
 
                              const struct ac_spm_trace_data *spm_trace,
987
 
                              FILE *output)
 
983
static void
 
984
ac_sqtt_dump_data(const struct radeon_info *rad_info, struct ac_sqtt_trace *sqtt_trace,
 
985
                  const struct ac_spm_trace *spm_trace, FILE *output)
988
986
{
989
 
   struct ac_thread_trace_data *thread_trace_data = thread_trace->data;
990
987
   struct sqtt_file_chunk_asic_info asic_info = {0};
991
988
   struct sqtt_file_chunk_cpu_info cpu_info = {0};
992
989
   struct sqtt_file_chunk_api_info api_info = {0};
993
990
   struct sqtt_file_header header = {0};
994
991
   size_t file_offset = 0;
995
 
   struct rgp_code_object *rgp_code_object =
996
 
                                          &thread_trace_data->rgp_code_object;
997
 
   struct rgp_loader_events *rgp_loader_events =
998
 
                                        &thread_trace_data->rgp_loader_events;
999
 
   struct rgp_pso_correlation *rgp_pso_correlation =
1000
 
                                      &thread_trace_data->rgp_pso_correlation;
1001
 
   struct rgp_queue_info *rgp_queue_info = &thread_trace_data->rgp_queue_info;
1002
 
   struct rgp_queue_event *rgp_queue_event = &thread_trace_data->rgp_queue_event;
1003
 
   struct rgp_clock_calibration *rgp_clock_calibration = &thread_trace_data->rgp_clock_calibration;
 
992
   const struct rgp_code_object *rgp_code_object = sqtt_trace->rgp_code_object;
 
993
   const struct rgp_loader_events *rgp_loader_events = sqtt_trace->rgp_loader_events;
 
994
   const struct rgp_pso_correlation *rgp_pso_correlation = sqtt_trace->rgp_pso_correlation;
 
995
   const struct rgp_queue_info *rgp_queue_info = sqtt_trace->rgp_queue_info;
 
996
   const struct rgp_queue_event *rgp_queue_event = sqtt_trace->rgp_queue_event;
 
997
   const struct rgp_clock_calibration *rgp_clock_calibration = sqtt_trace->rgp_clock_calibration;
1004
998
 
1005
999
   /* SQTT header file. */
1006
1000
   ac_sqtt_fill_header(&header);
1139
1133
      }
1140
1134
   }
1141
1135
 
1142
 
   if (thread_trace) {
1143
 
      for (unsigned i = 0; i < thread_trace->num_traces; i++) {
1144
 
         const struct ac_thread_trace_se *se = &thread_trace->traces[i];
1145
 
         const struct ac_thread_trace_info *info = &se->info;
 
1136
   if (sqtt_trace) {
 
1137
      for (unsigned i = 0; i < sqtt_trace->num_traces; i++) {
 
1138
         const struct ac_sqtt_data_se *se = &sqtt_trace->traces[i];
 
1139
         const struct ac_sqtt_data_info *info = &se->info;
1146
1140
         struct sqtt_file_chunk_sqtt_desc desc = {0};
1147
1141
         struct sqtt_file_chunk_sqtt_data data = {0};
1148
1142
         uint64_t size = info->cur_offset * 32; /* unit of 32 bytes */
1169
1163
}
1170
1164
#endif
1171
1165
 
1172
 
int ac_dump_rgp_capture(struct radeon_info *info,
1173
 
                        struct ac_thread_trace *thread_trace,
1174
 
                        const struct ac_spm_trace_data *spm_trace)
 
1166
int
 
1167
ac_dump_rgp_capture(const struct radeon_info *info, struct ac_sqtt_trace *sqtt_trace,
 
1168
                    const struct ac_spm_trace *spm_trace)
1175
1169
{
1176
1170
#if !defined(USE_LIBELF)
1177
1171
   return -1;
1192
1186
   if (!f)
1193
1187
      return -1;
1194
1188
 
1195
 
   ac_sqtt_dump_data(info, thread_trace, spm_trace, f);
 
1189
   ac_sqtt_dump_data(info, sqtt_trace, spm_trace, f);
1196
1190
 
1197
1191
   fprintf(stderr, "RGP capture saved to '%s'\n", filename);
1198
1192