2
* Copyright 2021 Valve Corporation
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* on the rights to use, copy, modify, merge, publish, distribute, sub
9
* license, and/or sell copies of the Software, and to permit persons to whom
10
* the Software is furnished to do so, subject to the following conditions:
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE.
27
#include "util/bitscan.h"
28
#include "util/u_memory.h"
30
static struct ac_spm_block_select *
31
ac_spm_get_block_select(struct ac_spm_trace_data *spm_trace,
32
const struct ac_pc_block *block)
34
struct ac_spm_block_select *block_sel, *new_block_sel;
35
uint32_t num_block_sel;
37
for (uint32_t i = 0; i < spm_trace->num_block_sel; i++) {
38
if (spm_trace->block_sel[i].b->b->b->gpu_block == block->b->b->gpu_block)
39
return &spm_trace->block_sel[i];
42
/* Allocate a new select block if it doesn't already exist. */
43
num_block_sel = spm_trace->num_block_sel + 1;
44
block_sel = realloc(spm_trace->block_sel, num_block_sel * sizeof(*block_sel));
48
spm_trace->num_block_sel = num_block_sel;
49
spm_trace->block_sel = block_sel;
51
/* Initialize the new select block. */
52
new_block_sel = &spm_trace->block_sel[spm_trace->num_block_sel - 1];
53
memset(new_block_sel, 0, sizeof(*new_block_sel));
55
new_block_sel->b = block;
56
new_block_sel->num_counters = block->b->b->num_spm_counters;
62
ac_spm_init_muxsel(const struct ac_pc_block *block,
63
struct ac_spm_counter_info *counter,
66
struct ac_spm_muxsel *muxsel = &counter->muxsel;
68
muxsel->counter = 2 * spm_wire + (counter->is_even ? 0 : 1);
69
muxsel->block = block->b->b->spm_block_select;
70
muxsel->shader_array = 0;
75
ac_spm_map_counter(struct ac_spm_trace_data *spm_trace,
76
struct ac_spm_block_select *block_sel,
77
struct ac_spm_counter_info *counter,
80
if (block_sel->b->b->b->gpu_block == SQ) {
81
for (unsigned i = 0; i < ARRAY_SIZE(spm_trace->sq_block_sel); i++) {
82
struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[i];
83
struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
84
if (i < spm_trace->num_used_sq_block_sel)
87
/* SQ doesn't support 16-bit counters. */
88
cntr_sel->sel0 |= S_036700_PERF_SEL(counter->event_id) |
89
S_036700_SPM_MODE(3) | /* 32-bit clamp */
90
S_036700_PERF_MODE(0);
91
cntr_sel->active |= 0x3;
93
/* 32-bits counter are always even. */
94
counter->is_even = true;
96
/* One wire per SQ module. */
99
spm_trace->num_used_sq_block_sel++;
103
/* Generic blocks. */
104
for (unsigned i = 0; i < block_sel->num_counters; i++) {
105
struct ac_spm_counter_select *cntr_sel = &block_sel->counters[i];
106
int index = ffs(~cntr_sel->active) - 1;
109
case 0: /* use S_037004_PERF_SEL */
110
cntr_sel->sel0 |= S_037004_PERF_SEL(counter->event_id) |
111
S_037004_CNTR_MODE(1) | /* 16-bit clamp */
112
S_037004_PERF_MODE(0); /* accum */
114
case 1: /* use S_037004_PERF_SEL1 */
115
cntr_sel->sel0 |= S_037004_PERF_SEL1(counter->event_id) |
116
S_037004_PERF_MODE1(0);
118
case 2: /* use S_037004_PERF_SEL2 */
119
cntr_sel->sel1 |= S_037008_PERF_SEL2(counter->event_id) |
120
S_037008_PERF_MODE2(0);
122
case 3: /* use S_037004_PERF_SEL3 */
123
cntr_sel->sel1 |= S_037008_PERF_SEL3(counter->event_id) |
124
S_037008_PERF_MODE3(0);
130
/* Mark this 16-bit counter as used. */
131
cntr_sel->active |= 1 << index;
133
/* Determine if the counter is even or odd. */
134
counter->is_even = !(index % 2);
136
/* Determine the SPM wire (one wire holds two 16-bit counters). */
137
*spm_wire = !!(index >= 2);
147
ac_spm_add_counter(const struct ac_perfcounters *pc,
148
struct ac_spm_trace_data *spm_trace,
149
const struct ac_spm_counter_create_info *info)
151
struct ac_spm_counter_info *counter;
152
struct ac_spm_block_select *block_sel;
153
struct ac_pc_block *block;
156
/* Check if the GPU block is valid. */
157
block = ac_pc_get_block(pc, info->gpu_block);
159
fprintf(stderr, "ac/spm: Invalid GPU block.\n");
163
/* Check if the number of instances is valid. */
164
if (info->instance > block->num_instances) {
165
fprintf(stderr, "ac/spm: Invalid instance ID.\n");
169
/* Check if the event ID is valid. */
170
if (info->event_id > block->b->selectors) {
171
fprintf(stderr, "ac/spm: Invalid event ID.\n");
175
counter = &spm_trace->counters[spm_trace->num_counters];
176
spm_trace->num_counters++;
178
counter->gpu_block = info->gpu_block;
179
counter->instance = info->instance;
180
counter->event_id = info->event_id;
182
/* Get the select block used to configure the counter. */
183
block_sel = ac_spm_get_block_select(spm_trace, block);
187
/* Map the counter to the select block. */
188
if (!ac_spm_map_counter(spm_trace, block_sel, counter, &spm_wire)) {
189
fprintf(stderr, "ac/spm: No free slots available!\n");
193
/* Determine the counter segment type. */
194
if (block->b->b->flags & AC_PC_BLOCK_SE) {
195
counter->segment_type = AC_SPM_SEGMENT_TYPE_SE0; // XXX
197
counter->segment_type = AC_SPM_SEGMENT_TYPE_GLOBAL;
200
/* Configure the muxsel for SPM. */
201
ac_spm_init_muxsel(block, counter, spm_wire);
206
bool ac_init_spm(const struct radeon_info *info,
207
const struct ac_perfcounters *pc,
208
unsigned num_counters,
209
const struct ac_spm_counter_create_info *counters,
210
struct ac_spm_trace_data *spm_trace)
212
spm_trace->counters = CALLOC(num_counters, sizeof(*spm_trace->counters));
213
if (!spm_trace->counters)
216
for (unsigned i = 0; i < num_counters; i++) {
217
if (!ac_spm_add_counter(pc, spm_trace, &counters[i])) {
218
fprintf(stderr, "ac/spm: Failed to add SPM counter (%d).\n", i);
223
/* Determine the segment size and create a muxsel ram for every segment. */
224
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
225
unsigned num_even_counters = 0, num_odd_counters = 0;
227
if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
228
/* The global segment always start with a 64-bit timestamp. */
229
num_even_counters += AC_SPM_GLOBAL_TIMESTAMP_COUNTERS;
232
/* Count the number of even/odd counters for this segment. */
233
for (unsigned c = 0; c < spm_trace->num_counters; c++) {
234
struct ac_spm_counter_info *counter = &spm_trace->counters[c];
236
if (counter->segment_type != s)
239
if (counter->is_even) {
246
/* Compute the number of lines. */
247
unsigned even_lines =
248
DIV_ROUND_UP(num_even_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
250
DIV_ROUND_UP(num_odd_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
251
unsigned num_lines = (even_lines > odd_lines) ? (2 * even_lines - 1) : (2 * odd_lines);
253
spm_trace->muxsel_lines[s] = CALLOC(num_lines, sizeof(*spm_trace->muxsel_lines[s]));
254
if (!spm_trace->muxsel_lines[s])
256
spm_trace->num_muxsel_lines[s] = num_lines;
259
/* RLC uses the following order: Global, SE0, SE1, SE2, SE3. */
260
const enum ac_spm_segment_type ordered_segment[AC_SPM_SEGMENT_TYPE_COUNT] =
262
AC_SPM_SEGMENT_TYPE_GLOBAL,
263
AC_SPM_SEGMENT_TYPE_SE0,
264
AC_SPM_SEGMENT_TYPE_SE1,
265
AC_SPM_SEGMENT_TYPE_SE2,
266
AC_SPM_SEGMENT_TYPE_SE3,
269
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
270
if (!spm_trace->muxsel_lines[s])
273
uint32_t segment_offset = 0;
274
for (unsigned i = 0; s != ordered_segment[i]; i++) {
275
segment_offset += spm_trace->num_muxsel_lines[ordered_segment[i]] *
276
AC_SPM_NUM_COUNTER_PER_MUXSEL;
279
uint32_t even_counter_idx = 0, even_line_idx = 0;
280
uint32_t odd_counter_idx = 0, odd_line_idx = 1;
282
/* Add the global timestamps first. */
283
if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
284
struct ac_spm_muxsel global_timestamp_muxsel = {
291
for (unsigned i = 0; i < 4; i++) {
292
spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx++] = global_timestamp_muxsel;
296
for (unsigned i = 0; i < spm_trace->num_counters; i++) {
297
struct ac_spm_counter_info *counter = &spm_trace->counters[i];
299
if (counter->segment_type != s)
302
if (counter->is_even) {
303
counter->offset = segment_offset + even_line_idx *
304
AC_SPM_NUM_COUNTER_PER_MUXSEL + even_counter_idx;
306
spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx] = spm_trace->counters[i].muxsel;
307
if (++even_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
308
even_counter_idx = 0;
312
counter->offset = segment_offset + odd_line_idx *
313
AC_SPM_NUM_COUNTER_PER_MUXSEL + odd_counter_idx;
315
spm_trace->muxsel_lines[s][odd_line_idx].muxsel[odd_counter_idx] = spm_trace->counters[i].muxsel;
316
if (++odd_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
327
void ac_destroy_spm(struct ac_spm_trace_data *spm_trace)
329
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
330
FREE(spm_trace->muxsel_lines[s]);
332
FREE(spm_trace->block_sel);
333
FREE(spm_trace->counters);
336
uint32_t ac_spm_get_sample_size(const struct ac_spm_trace_data *spm_trace)
338
uint32_t sample_size = 0; /* in bytes */
340
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
341
sample_size += spm_trace->num_muxsel_lines[s] * AC_SPM_MUXSEL_LINE_SIZE * 4;
347
uint32_t ac_spm_get_num_samples(const struct ac_spm_trace_data *spm_trace)
349
uint32_t sample_size = ac_spm_get_sample_size(spm_trace);
350
uint32_t *ptr = (uint32_t *)spm_trace->ptr;
351
uint32_t data_size, num_lines_written;
352
uint32_t num_samples = 0;
354
/* Get the data size (in bytes) written by the hw to the ring buffer. */
357
/* Compute the number of 256 bits (16 * 16-bits counters) lines written. */
358
num_lines_written = data_size / (2 * AC_SPM_NUM_COUNTER_PER_MUXSEL);
360
/* Check for overflow. */
361
if (num_lines_written % (sample_size / 32)) {
364
num_samples = num_lines_written / (sample_size / 32);