2
* Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
* OTHER DEALINGS IN THE SOFTWARE.
29
#include <libconfig.h>
38
#include "drm/freedreno_drmif.h"
39
#include "drm/freedreno_ringbuffer.h"
41
#include "util/os_file.h"
43
#include "freedreno_dt.h"
44
#include "freedreno_perfcntr.h"
46
#define MAX_CNTR_PER_GROUP 24
48
/* NOTE first counter group should always be CP, since we unconditionally
49
* use CP counter to measure the gpu freq.
52
struct counter_group {
53
const struct fd_perfcntr_group *group;
56
const struct fd_perfcntr_counter *counter;
58
volatile uint32_t *val_hi;
59
volatile uint32_t *val_lo;
60
} counter[MAX_CNTR_PER_GROUP];
62
/* last sample time: */
63
uint32_t stime[MAX_CNTR_PER_GROUP];
64
/* for now just care about the low 32b value.. at least then we don't
65
* have to really care that we can't sample both hi and lo regs at the
68
uint32_t last[MAX_CNTR_PER_GROUP];
69
/* current value, ie. by how many did the counter increase in last
70
* sampling period divided by the sampling period:
72
float current[MAX_CNTR_PER_GROUP];
73
/* name of currently selected counters (for UI): */
74
const char *label[MAX_CNTR_PER_GROUP];
82
/* per-generation table of counters: */
84
struct counter_group *groups;
85
/* drm device (for writing select regs via ring): */
86
struct fd_device *dev;
88
struct fd_submit *submit;
89
struct fd_ringbuffer *ring;
92
static void config_save(void);
93
static void config_restore(void);
94
static void restore_counter_groups(void);
104
clock_gettime(CLOCK_MONOTONIC, &ts);
105
return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
109
delta(uint32_t a, uint32_t b)
111
/* deal with rollover: */
113
return 0xffffffff - a + b;
123
dev.dev = fd_device_open();
125
err(1, "could not open drm device");
127
dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
130
ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
132
err(1, "could not get gpu-id");
136
#define CHIP_FMT "d%d%d.%d"
137
#define CHIP_ARGS(chipid) \
138
((chipid) >> 24) & 0xff, ((chipid) >> 16) & 0xff, ((chipid) >> 8) & 0xff, \
139
((chipid) >> 0) & 0xff
140
printf("device: a%" CHIP_FMT "\n", CHIP_ARGS(dev.chipid));
142
/* try MAX_FREQ first as that will work regardless of old dt
143
* dt bindings vs upstream bindings:
145
ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
147
printf("falling back to parsing DT bindings for freq\n");
148
if (!fd_dt_find_freqs(&dev.min_freq, &dev.max_freq))
149
err(1, "could not find GPU freqs");
155
printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
157
dev.io = fd_dt_find_io();
159
err(1, "could not map device");
162
fd_pipe_set_param(dev.pipe, FD_SYSPROF, 1);
177
struct fd_submit_fence fence = {};
178
util_queue_fence_init(&fence.ready);
180
ret = fd_submit_flush(dev.submit, -1, &fence);
183
errx(1, "submit failed: %d", ret);
184
util_queue_fence_wait(&fence.ready);
185
fd_ringbuffer_del(dev.ring);
186
fd_submit_del(dev.submit);
193
select_counter(struct counter_group *group, int ctr, int n)
195
assert(n < group->group->num_countables);
196
assert(ctr < group->group->num_counters);
198
group->label[ctr] = group->group->countables[n].name;
199
group->counter[ctr].select_val = n;
202
dev.submit = fd_submit_new(dev.pipe);
203
dev.ring = fd_submit_new_ringbuffer(
204
dev.submit, 0x1000, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
207
/* bashing select register directly while gpu is active will end
208
* in tears.. so we need to write it via the ring:
210
* TODO it would help startup time, if gpu is loaded, to batch
211
* all the initial writes and do a single flush.. although that
212
* makes things more complicated for capturing inital sample value
214
struct fd_ringbuffer *ring = dev.ring;
215
switch (dev.chipid >> 24) {
219
OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
220
OUT_RING(ring, 0x00000000);
222
if (group->group->counters[ctr].enable) {
223
OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
227
if (group->group->counters[ctr].clear) {
228
OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
231
OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
235
OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
238
if (group->group->counters[ctr].enable) {
239
OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
246
OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
248
if (group->group->counters[ctr].enable) {
249
OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
253
if (group->group->counters[ctr].clear) {
254
OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
257
OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
261
OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
264
if (group->group->counters[ctr].enable) {
265
OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
272
group->last[ctr] = *group->counter[ctr].val_lo;
273
group->stime[ctr] = gettime_us();
277
resample_counter(struct counter_group *group, int ctr)
279
uint32_t val = *group->counter[ctr].val_lo;
280
uint32_t t = gettime_us();
281
uint32_t dt = delta(group->stime[ctr], t);
282
uint32_t dval = delta(group->last[ctr], val);
283
group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
284
group->last[ctr] = val;
285
group->stime[ctr] = t;
288
#define REFRESH_MS 500
290
/* sample all the counters: */
294
static uint64_t last_time;
295
uint64_t current_time = gettime_us();
297
if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
300
last_time = current_time;
302
for (unsigned i = 0; i < dev.ngroups; i++) {
303
struct counter_group *group = &dev.groups[i];
304
for (unsigned j = 0; j < group->group->num_counters; j++) {
305
resample_counter(group, j);
314
#define COLOR_GROUP_HEADER 1
315
#define COLOR_FOOTER 2
316
#define COLOR_INVERSE 3
319
static int ctr_width;
320
static int max_rows, current_cntr = 1;
323
redraw_footer(WINDOW *win)
328
n = asprintf(&footer, " fdperf: a%" CHIP_FMT " (%.2fMHz..%.2fMHz)",
329
CHIP_ARGS(dev.chipid), ((float)dev.min_freq) / 1000000.0,
330
((float)dev.max_freq) / 1000000.0);
332
wmove(win, h - 1, 0);
333
wattron(win, COLOR_PAIR(COLOR_FOOTER));
334
waddstr(win, footer);
335
whline(win, ' ', w - n);
336
wattroff(win, COLOR_PAIR(COLOR_FOOTER));
342
redraw_group_header(WINDOW *win, int row, const char *name)
345
wattron(win, A_BOLD);
346
wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
348
whline(win, ' ', w - strlen(name));
349
wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
350
wattroff(win, A_BOLD);
354
redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
356
int n = strlen(name);
357
assert(n <= ctr_width);
359
whline(win, ' ', ctr_width - n);
360
wmove(win, row, ctr_width - n);
362
wattron(win, COLOR_PAIR(COLOR_INVERSE));
365
wattroff(win, COLOR_PAIR(COLOR_INVERSE));
370
redraw_counter_value_cycles(WINDOW *win, float val)
373
int x = getcurx(win);
374
int valwidth = w - x;
377
/* convert to fraction of max freq: */
378
val = val / (float)dev.max_freq;
380
/* figure out percentage-bar width: */
381
barwidth = (int)(val * valwidth);
383
/* sometimes things go over 100%.. idk why, could be
384
* things running faster than base clock, or counter
385
* summing up cycles in multiple cores?
387
barwidth = MIN2(barwidth, valwidth - 1);
389
n = asprintf(&str, "%.2f%%", 100.0 * val);
390
wattron(win, COLOR_PAIR(COLOR_INVERSE));
391
waddnstr(win, str, barwidth);
393
whline(win, ' ', barwidth - n);
394
wmove(win, getcury(win), x + barwidth);
396
wattroff(win, COLOR_PAIR(COLOR_INVERSE));
398
waddstr(win, str + barwidth);
399
whline(win, ' ', w - getcurx(win));
405
redraw_counter_value_raw(WINDOW *win, float val)
408
(void)asprintf(&str, "%'.2f", val);
410
whline(win, ' ', w - getcurx(win));
415
redraw_counter(WINDOW *win, int row, struct counter_group *group, int ctr,
418
redraw_counter_label(win, row, group->label[ctr], selected);
420
/* quick hack, if the label has "CYCLE" in the name, it is
421
* probably a cycle counter ;-)
422
* Perhaps add more info in rnndb schema to know how to
423
* treat individual counters (ie. which are cycles, and
424
* for those we want to present as a percentage do we
425
* need to scale the result.. ie. is it running at some
426
* multiple or divisor of core clk, etc)
428
* TODO it would be much more clever to get this from xml
429
* Also.. in some cases I think we want to know how many
430
* units the counter is counting for, ie. if a320 has 2x
431
* shader as a306 we might need to scale the result..
433
if (strstr(group->label[ctr], "CYCLE") ||
434
strstr(group->label[ctr], "BUSY") || strstr(group->label[ctr], "IDLE"))
435
redraw_counter_value_cycles(win, group->current[ctr]);
437
redraw_counter_value_raw(win, group->current[ctr]);
443
static int scroll = 0;
451
if ((current_cntr - scroll) > (max - 1)) {
452
scroll = current_cntr - (max - 1);
453
} else if ((current_cntr - 1) < scroll) {
454
scroll = current_cntr - 1;
457
for (unsigned i = 0; i < dev.ngroups; i++) {
458
struct counter_group *group = &dev.groups[i];
461
/* NOTE skip CP the first CP counter */
465
if (j < group->group->num_counters) {
466
if ((scroll <= row) && ((row - scroll) < max))
467
redraw_group_header(win, row - scroll, group->group->name);
471
for (; j < group->group->num_counters; j++) {
472
if ((scroll <= row) && ((row - scroll) < max))
473
redraw_counter(win, row - scroll, group, j, row == current_cntr);
478
/* convert back to physical (unscrolled) offset: */
481
redraw_group_header(win, row, "Status");
484
/* Draw GPU freq row: */
485
redraw_counter_label(win, row, "Freq (MHz)", false);
486
redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
494
static struct counter_group *
495
current_counter(int *ctr)
499
for (unsigned i = 0; i < dev.ngroups; i++) {
500
struct counter_group *group = &dev.groups[i];
503
/* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
507
/* account for group header: */
508
if (j < group->group->num_counters) {
509
/* cannot select group header.. return null to indicate this
512
if (n == current_cntr)
517
for (; j < group->group->num_counters; j++) {
518
if (n == current_cntr) {
535
struct counter_group *group;
536
int cnt = 0, current = 0, scroll;
538
/* figure out dialog size: */
540
int dw = ctr_width + 2;
542
group = current_counter(&cnt);
544
/* find currently selected idx (note there can be discontinuities
545
* so the selected value does not map 1:1 to current idx)
547
uint32_t selected = group->counter[cnt].select_val;
548
for (int i = 0; i < group->group->num_countables; i++) {
549
if (group->group->countables[i].selector == selected) {
555
/* scrolling offset, if dialog is too small for all the choices: */
558
dialog = newwin(dh, dw, (h - dh) / 2, (w - dw) / 2);
561
keypad(dialog, TRUE);
564
int max = MIN2(dh - 2, group->group->num_countables);
567
if ((current - scroll) >= (dh - 3)) {
568
scroll = current - (dh - 3);
569
} else if (current < scroll) {
573
for (int i = 0; i < max; i++) {
575
wmove(dialog, i + 1, 1);
577
assert(n < group->group->num_countables);
578
selector = group->group->countables[n].selector;
579
wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
581
if (n < group->group->num_countables)
582
waddstr(dialog, group->group->countables[n].name);
583
whline(dialog, ' ', dw - getcurx(dialog) - 1);
585
wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
588
assert(selector >= 0);
590
switch (wgetch(dialog)) {
592
current = MAX2(0, current - 1);
595
current = MIN2(group->group->num_countables - 1, current + 1);
599
/* select new sampler */
600
select_counter(group, cnt, selector);
615
wborder(dialog, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
620
scroll_cntr(int amount)
623
current_cntr = MAX2(1, current_cntr + amount);
624
if (current_counter(NULL) == NULL) {
625
current_cntr = MAX2(1, current_cntr - 1);
628
current_cntr = MIN2(max_rows - 1, current_cntr + amount);
629
if (current_counter(NULL) == NULL)
630
current_cntr = MIN2(max_rows - 1, current_cntr + 1);
638
uint32_t last_time = gettime_us();
646
wtimeout(mainwin, REFRESH_MS);
648
keypad(mainwin, TRUE);
651
init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
652
init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
653
init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
656
switch (wgetch(mainwin)) {
663
case KEY_NPAGE: /* page-down */
664
/* TODO figure out # of rows visible? */
667
case KEY_PPAGE: /* page-up */
668
/* TODO figure out # of rows visible? */
684
/* restore the counters every 0.5s in case the GPU has suspended,
685
* in which case the current selected countables will have reset:
687
uint32_t t = gettime_us();
688
if (delta(last_time, t) > 500000) {
689
restore_counter_groups();
695
/* restore settings.. maybe we need an atexit()??*/
703
restore_counter_groups(void)
705
for (unsigned i = 0; i < dev.ngroups; i++) {
706
struct counter_group *group = &dev.groups[i];
709
/* NOTE skip CP the first CP counter */
713
for (; j < group->group->num_counters; j++) {
714
select_counter(group, j, group->counter[j].select_val);
720
setup_counter_groups(const struct fd_perfcntr_group *groups)
722
for (unsigned i = 0; i < dev.ngroups; i++) {
723
struct counter_group *group = &dev.groups[i];
725
group->group = &groups[i];
727
max_rows += group->group->num_counters + 1;
729
/* the first CP counter is hidden: */
732
if (group->group->num_counters <= 1)
736
for (unsigned j = 0; j < group->group->num_counters; j++) {
737
group->counter[j].counter = &group->group->counters[j];
739
group->counter[j].val_hi =
740
dev.io + (group->counter[j].counter->counter_reg_hi * 4);
741
group->counter[j].val_lo =
742
dev.io + (group->counter[j].counter->counter_reg_lo * 4);
744
group->counter[j].select_val = j;
747
for (unsigned j = 0; j < group->group->num_countables; j++) {
749
MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
755
* configuration / persistence
759
static config_setting_t *setting;
764
for (unsigned i = 0; i < dev.ngroups; i++) {
765
struct counter_group *group = &dev.groups[i];
768
/* NOTE skip CP the first CP counter */
772
config_setting_t *sect =
773
config_setting_get_member(setting, group->group->name);
775
for (; j < group->group->num_counters; j++) {
776
char name[] = "counter0000";
777
sprintf(name, "counter%d", j);
778
config_setting_t *s = config_setting_lookup(sect, name);
779
config_setting_set_int(s, group->counter[j].select_val);
783
config_write_file(&cfg, "fdperf.cfg");
793
/* Read the file. If there is an error, report it and exit. */
794
if (!config_read_file(&cfg, "fdperf.cfg")) {
795
warn("could not restore settings");
798
config_setting_t *root = config_root_setting(&cfg);
800
/* per device settings: */
801
(void)asprintf(&str, "a%dxx", dev.chipid >> 24);
802
setting = config_setting_get_member(root, str);
804
setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
807
for (unsigned i = 0; i < dev.ngroups; i++) {
808
struct counter_group *group = &dev.groups[i];
811
/* NOTE skip CP the first CP counter */
815
config_setting_t *sect =
816
config_setting_get_member(setting, group->group->name);
820
config_setting_add(setting, group->group->name, CONFIG_TYPE_GROUP);
823
for (; j < group->group->num_counters; j++) {
824
char name[] = "counter0000";
825
sprintf(name, "counter%d", j);
826
config_setting_t *s = config_setting_lookup(sect, name);
828
config_setting_add(sect, name, CONFIG_TYPE_INT);
831
select_counter(group, j, config_setting_get_int(s));
841
main(int argc, char **argv)
845
const struct fd_perfcntr_group *groups;
846
struct fd_dev_id dev_id = {
847
.gpu_id = (dev.chipid >> 24) * 100,
849
groups = fd_perfcntrs(&dev_id, &dev.ngroups);
851
errx(1, "no perfcntr support");
854
dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
856
setlocale(LC_NUMERIC, "en_US.UTF-8");
858
setup_counter_groups(groups);
859
restore_counter_groups();