2
* Copyright © 2021 Raspberry Pi Ltd
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25
* Gallium query object support for performance counters
27
* This contains the performance V3D counters queries.
30
#include "v3d_query.h"
32
struct v3d_query_perfcnt
34
struct v3d_query base;
37
struct v3d_perfmon_state *perfmon;
40
static const char *v3d_counter_names[] = {
41
"FEP-valid-primitives-no-rendered-pixels",
42
"FEP-valid-primitives-rendered-pixels",
45
"TLB-quads-not-passing-stencil-test",
46
"TLB-quads-not-passing-z-and-stencil-test",
47
"TLB-quads-passing-z-and-stencil-test",
48
"TLB-quads-with-zero-coverage",
49
"TLB-quads-with-non-zero-coverage",
50
"TLB-quads-written-to-color-buffer",
51
"PTB-primitives-discarded-outside-viewport",
52
"PTB-primitives-need-clipping",
53
"PTB-primitives-discared-reversed",
54
"QPU-total-idle-clk-cycles",
55
"QPU-total-active-clk-cycles-vertex-coord-shading",
56
"QPU-total-active-clk-cycles-fragment-shading",
57
"QPU-total-clk-cycles-executing-valid-instr",
58
"QPU-total-clk-cycles-waiting-TMU",
59
"QPU-total-clk-cycles-waiting-scoreboard",
60
"QPU-total-clk-cycles-waiting-varyings",
61
"QPU-total-instr-cache-hit",
62
"QPU-total-instr-cache-miss",
63
"QPU-total-uniform-cache-hit",
64
"QPU-total-uniform-cache-miss",
65
"TMU-total-text-quads-access",
66
"TMU-total-text-cache-miss",
67
"VPM-total-clk-cycles-VDW-stalled",
68
"VPM-total-clk-cycles-VCD-stalled",
69
"CLE-bin-thread-active-cycles",
70
"CLE-render-thread-active-cycles",
71
"L2T-total-cache-hit",
72
"L2T-total-cache-miss",
74
"QPU-total-clk-cycles-waiting-vertex-coord-shading",
75
"QPU-total-clk-cycles-waiting-fragment-shading",
76
"PTB-primitives-binned",
77
"AXI-writes-seen-watch-0",
78
"AXI-reads-seen-watch-0",
79
"AXI-writes-stalled-seen-watch-0",
80
"AXI-reads-stalled-seen-watch-0",
81
"AXI-write-bytes-seen-watch-0",
82
"AXI-read-bytes-seen-watch-0",
83
"AXI-writes-seen-watch-1",
84
"AXI-reads-seen-watch-1",
85
"AXI-writes-stalled-seen-watch-1",
86
"AXI-reads-stalled-seen-watch-1",
87
"AXI-write-bytes-seen-watch-1",
88
"AXI-read-bytes-seen-watch-1",
89
"TLB-partial-quads-written-to-color-buffer",
90
"TMU-total-config-access",
92
"L2T-command-queue-stalled",
96
"CLE-thread-active-cycles",
100
"L2T-TMU-config-reads",
104
"L2T-TMU-write-miss",
108
"L2T-TMU-config-read-miss",
109
"L2T-SLC0-read-miss",
110
"L2T-SLC1-read-miss",
111
"L2T-SLC2-read-miss",
112
"core-memory-writes",
122
"PTB-memory-words-writes",
123
"TLB-memory-words-writes",
124
"PSE-memory-words-reads",
125
"TLB-memory-words-reads",
127
"compute-active-cycles",
131
kperfmon_destroy(struct v3d_context *v3d, struct v3d_perfmon_state *perfmon)
133
struct drm_v3d_perfmon_destroy destroyreq;
135
destroyreq.id = perfmon->kperfmon_id;
136
int ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_DESTROY, &destroyreq);
138
fprintf(stderr, "failed to destroy perfmon %d: %s\n",
139
perfmon->kperfmon_id, strerror(errno));
143
v3d_get_driver_query_group_info_perfcnt(struct v3d_screen *screen, unsigned index,
144
struct pipe_driver_query_group_info *info)
146
if (!screen->has_perfmon)
155
info->name = "V3D counters";
156
info->max_active_queries = DRM_V3D_MAX_PERF_COUNTERS;
157
info->num_queries = ARRAY_SIZE(v3d_counter_names);
163
v3d_get_driver_query_info_perfcnt(struct v3d_screen *screen, unsigned index,
164
struct pipe_driver_query_info *info)
166
if (!screen->has_perfmon)
170
return ARRAY_SIZE(v3d_counter_names);
172
if (index >= ARRAY_SIZE(v3d_counter_names))
176
info->name = v3d_counter_names[index];
177
info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
178
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
179
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
180
info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
186
v3d_destroy_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query)
188
struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
190
assert(pquery->perfmon);
192
if (v3d->active_perfmon == pquery->perfmon) {
193
fprintf(stderr, "Query is active; end query before destroying\n");
196
if (pquery->perfmon->kperfmon_id)
197
kperfmon_destroy(v3d, pquery->perfmon);
199
v3d_fence_unreference(&pquery->perfmon->last_job_fence);
200
free(pquery->perfmon);
205
v3d_begin_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query)
207
struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
208
struct drm_v3d_perfmon_create createreq = { 0 };
211
/* Only one perfmon can be activated per context */
212
if (v3d->active_perfmon) {
214
"Another query is already active; "
215
"finish it before starting a new one\n");
219
assert(pquery->perfmon);
221
/* Reset the counters by destroying the previously allocated perfmon */
222
if (pquery->perfmon->kperfmon_id)
223
kperfmon_destroy(v3d, pquery->perfmon);
225
for (i = 0; i < pquery->num_queries; i++)
226
createreq.counters[i] = pquery->perfmon->counters[i];
228
createreq.ncounters = pquery->num_queries;
229
ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_CREATE, &createreq);
233
pquery->perfmon->kperfmon_id = createreq.id;
234
pquery->perfmon->job_submitted = false;
235
v3d_fence_unreference(&pquery->perfmon->last_job_fence);
237
/* Ensure all pending jobs are flushed before activating the
240
v3d_flush((struct pipe_context *)v3d);
241
v3d->active_perfmon = pquery->perfmon;
247
v3d_end_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query)
249
struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
251
assert(pquery->perfmon);
253
if (v3d->active_perfmon != pquery->perfmon) {
254
fprintf(stderr, "This query is not active\n");
258
/* Ensure all pending jobs are flushed before deactivating the
261
v3d_flush((struct pipe_context *)v3d);
263
/* Get a copy of latest submitted job's fence to wait for its
266
if (v3d->active_perfmon->job_submitted)
267
v3d->active_perfmon->last_job_fence = v3d_fence_create(v3d);
269
v3d->active_perfmon = NULL;
275
v3d_get_query_result_perfcnt(struct v3d_context *v3d, struct v3d_query *query,
276
bool wait, union pipe_query_result *vresult)
278
struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
279
struct drm_v3d_perfmon_get_values req = { 0 };
282
assert(pquery->perfmon);
284
if (pquery->perfmon->job_submitted) {
285
if (!v3d_fence_wait(v3d->screen,
286
pquery->perfmon->last_job_fence,
287
wait ? PIPE_TIMEOUT_INFINITE : 0))
290
req.id = pquery->perfmon->kperfmon_id;
291
req.values_ptr = (uintptr_t)pquery->perfmon->values;
292
ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_GET_VALUES, &req);
294
fprintf(stderr, "Can't request perfmon counters values\n");
299
for (i = 0; i < pquery->num_queries; i++)
300
vresult->batch[i].u64 = pquery->perfmon->values[i];
305
static const struct v3d_query_funcs perfcnt_query_funcs = {
306
.destroy_query = v3d_destroy_query_perfcnt,
307
.begin_query = v3d_begin_query_perfcnt,
308
.end_query = v3d_end_query_perfcnt,
309
.get_query_result = v3d_get_query_result_perfcnt,
313
v3d_create_batch_query_perfcnt(struct v3d_context *v3d, unsigned num_queries,
314
unsigned *query_types)
316
struct v3d_query_perfcnt *pquery = NULL;
317
struct v3d_query *query;
318
struct v3d_perfmon_state *perfmon = NULL;
321
/* Validate queries */
322
for (i = 0; i < num_queries; i++) {
323
if (query_types[i] < PIPE_QUERY_DRIVER_SPECIFIC ||
324
query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC +
325
ARRAY_SIZE(v3d_counter_names)) {
326
fprintf(stderr, "Invalid query type\n");
331
pquery = calloc(1, sizeof(*pquery));
335
perfmon = calloc(1, sizeof(*perfmon));
341
for (i = 0; i < num_queries; i++)
342
perfmon->counters[i] = query_types[i] - PIPE_QUERY_DRIVER_SPECIFIC;
344
pquery->perfmon = perfmon;
345
pquery->num_queries = num_queries;
347
query = &pquery->base;
348
query->funcs = &perfcnt_query_funcs;
350
/* Note that struct pipe_query isn't actually defined anywhere. */
351
return (struct pipe_query *)query;