2
* Copyright © 2020 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice shall be included
12
* in all copies or substantial portions of the Software.
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
* DEALINGS IN THE SOFTWARE.
24
* @file intel_measure.c
27
#include "intel_measure.h"
34
#include <sys/types.h>
37
#define __STDC_FORMAT_MACROS 1
40
#include "dev/intel_device_info.h"
41
#include "util/debug.h"
42
#include "util/macros.h"
43
#include "util/u_debug.h"
46
static const struct debug_control debug_control[] = {
47
{ "draw", INTEL_MEASURE_DRAW },
48
{ "rt", INTEL_MEASURE_RENDERPASS },
49
{ "shader", INTEL_MEASURE_SHADER },
50
{ "batch", INTEL_MEASURE_BATCH },
51
{ "frame", INTEL_MEASURE_FRAME },
54
static struct intel_measure_config config;
57
intel_measure_init(struct intel_measure_device *device)
59
static bool once = false;
60
const char *env = getenv("INTEL_MEASURE");
61
if (unlikely(!once)) {
63
memset(&config, 0, sizeof(struct intel_measure_config));
68
config.flags = parse_debug_string(env, debug_control);
70
config.flags = INTEL_MEASURE_DRAW;
71
config.enabled = true;
72
config.event_interval = 1;
73
config.control_fh = -1;
75
/* Overflows of the following defaults will drop data and generate a
76
* warning on the output filehandle.
79
/* default batch_size allows for 64k renders in a single batch */
80
const int DEFAULT_BATCH_SIZE = 64 * 1024;
81
config.batch_size = DEFAULT_BATCH_SIZE;
83
/* Default buffer_size allows for 64k batches per line of output in the
84
* csv. Overflow may occur for offscreen workloads or large 'interval'
87
const int DEFAULT_BUFFER_SIZE = 64 * 1024;
88
config.buffer_size = DEFAULT_BUFFER_SIZE;
90
const char *filename = strstr(env, "file=");
91
const char *start_frame_s = strstr(env, "start=");
92
const char *count_frame_s = strstr(env, "count=");
93
const char *control_path = strstr(env, "control=");
94
const char *interval_s = strstr(env, "interval=");
95
const char *batch_size_s = strstr(env, "batch_size=");
96
const char *buffer_size_s = strstr(env, "buffer_size=");
98
char *sep = strrchr(env, ',');
104
if (filename && !__check_suid()) {
106
config.file = fopen(filename, "w");
108
fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n",
109
filename, strerror (errno));
116
const int start_frame = atoi(start_frame_s);
117
if (start_frame < 0) {
118
fprintf(stderr, "INTEL_MEASURE start frame may "
119
"not be negative: %d\n", start_frame);
123
config.start_frame = start_frame;
124
config.enabled = false;
129
const int count_frame = atoi(count_frame_s);
130
if (count_frame <= 0) {
131
fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n",
136
config.end_frame = config.start_frame + count_frame;
141
if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) {
142
if (errno != EEXIST) {
143
fprintf(stderr, "INTEL_MEASURE failed to create control "
144
"fifo %s: %s\n", control_path, strerror (errno));
149
config.control_fh = openat(AT_FDCWD, control_path,
150
O_RDONLY | O_NONBLOCK);
151
if (config.control_fh == -1) {
152
fprintf(stderr, "INTEL_MEASURE failed to open control fifo "
153
"%s: %s\n", control_path, strerror (errno));
157
/* when using a control fifo, do not start until the user triggers
160
config.enabled = false;
165
const int event_interval = atoi(interval_s);
166
if (event_interval < 1) {
167
fprintf(stderr, "INTEL_MEASURE event_interval must be positive: "
168
"%d\n", event_interval);
171
config.event_interval = event_interval;
176
const int batch_size = atoi(batch_size_s);
177
if (batch_size < DEFAULT_BATCH_SIZE) {
178
fprintf(stderr, "INTEL_MEASURE minimum batch_size is 4k: "
182
if (batch_size > DEFAULT_BATCH_SIZE * 1024) {
183
fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: "
188
config.batch_size = batch_size;
193
const int buffer_size = atoi(buffer_size_s);
194
if (buffer_size < DEFAULT_BUFFER_SIZE) {
195
fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: "
196
"%d\n", DEFAULT_BUFFER_SIZE);
198
if (buffer_size > DEFAULT_BUFFER_SIZE * 1024) {
199
fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: "
200
"%d\n", buffer_size);
203
config.buffer_size = buffer_size;
206
fputs("draw_start,draw_end,frame,batch,"
207
"event_index,event_count,type,count,vs,tcs,tes,"
208
"gs,fs,cs,framebuffer,idle_us,time_us\n",
212
device->config = NULL;
214
pthread_mutex_init(&device->mutex, NULL);
215
list_inithead(&device->queued_snapshots);
218
device->config = &config;
222
intel_measure_snapshot_string(enum intel_measure_snapshot_type type)
224
const char *names[] = {
225
[INTEL_SNAPSHOT_UNDEFINED] = "undefined",
226
[INTEL_SNAPSHOT_BLIT] = "blit",
227
[INTEL_SNAPSHOT_CCS_AMBIGUATE] = "ccs ambiguate",
228
[INTEL_SNAPSHOT_CCS_COLOR_CLEAR] = "ccs color clear",
229
[INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve",
230
[INTEL_SNAPSHOT_CCS_RESOLVE] = "ccs resolve",
231
[INTEL_SNAPSHOT_COMPUTE] = "compute",
232
[INTEL_SNAPSHOT_COPY] = "copy",
233
[INTEL_SNAPSHOT_DRAW] = "draw",
234
[INTEL_SNAPSHOT_HIZ_AMBIGUATE] = "hiz ambiguate",
235
[INTEL_SNAPSHOT_HIZ_CLEAR] = "hiz clear",
236
[INTEL_SNAPSHOT_HIZ_RESOLVE] = "hiz resolve",
237
[INTEL_SNAPSHOT_MCS_COLOR_CLEAR] = "mcs color clear",
238
[INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve",
239
[INTEL_SNAPSHOT_SLOW_COLOR_CLEAR] = "slow color clear",
240
[INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR] = "slow depth clear",
241
[INTEL_SNAPSHOT_SECONDARY_BATCH] = "secondary command buffer",
242
[INTEL_SNAPSHOT_END] = "end",
244
assert(type < ARRAY_SIZE(names));
245
assert(names[type] != NULL);
246
assert(type != INTEL_SNAPSHOT_UNDEFINED);
251
* Indicate to the caller whether a new snapshot should be started.
253
* Callers provide rendering state to this method to determine whether the
254
* current start event should be skipped. Depending on the configuration
255
* flags, a new snapshot may start:
257
* - when the program changes
258
* - after a batch is submitted
259
* - at frame boundaries
261
* Returns true if a snapshot should be started.
264
intel_measure_state_changed(const struct intel_measure_batch *batch,
265
uintptr_t vs, uintptr_t tcs, uintptr_t tes,
266
uintptr_t gs, uintptr_t fs, uintptr_t cs)
268
if (batch->index == 0) {
269
/* always record the first event */
273
const struct intel_measure_snapshot *last_snap =
274
&batch->snapshots[batch->index - 1];
276
if (config.flags & INTEL_MEASURE_DRAW)
279
if (batch->index % 2 == 0) {
280
/* no snapshot is running, but we have a start event */
284
if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) {
285
/* only start collection when index == 0, at the beginning of a batch */
289
if (config.flags & INTEL_MEASURE_RENDERPASS) {
290
return ((last_snap->framebuffer != batch->framebuffer) ||
291
/* compute workloads are always in their own renderpass */
295
/* remaining comparisons check the state of the render pipeline for
296
* INTEL_MEASURE_PROGRAM
298
assert(config.flags & INTEL_MEASURE_SHADER);
300
if (!vs && !tcs && !tes && !gs && !fs && !cs) {
301
/* blorp always changes program */
305
return (last_snap->vs != (uintptr_t) vs ||
306
last_snap->tcs != (uintptr_t) tcs ||
307
last_snap->tes != (uintptr_t) tes ||
308
last_snap->gs != (uintptr_t) gs ||
309
last_snap->fs != (uintptr_t) fs ||
310
last_snap->cs != (uintptr_t) cs);
314
* Notify intel_measure that a frame is about to begin.
316
* Configuration values and the control fifo may commence measurement at frame
320
intel_measure_frame_transition(unsigned frame)
322
if (frame == config.start_frame)
323
config.enabled = true;
324
else if (frame == config.end_frame)
325
config.enabled = false;
327
/* user commands to the control fifo will override any start/count
328
* environment settings
330
if (config.control_fh != -1) {
332
const unsigned BUF_SIZE = 128;
334
ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1);
338
fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n",
344
char *nptr = buf, *endptr = buf;
345
while (*nptr != '\0' && *endptr != '\0') {
346
long fcount = strtol(nptr, &endptr, 10);
347
if (nptr == endptr) {
348
config.enabled = false;
349
fprintf(stderr, "INTEL_MEASURE invalid frame count on "
351
lseek(config.control_fh, 0, SEEK_END);
353
} else if (fcount == 0) {
354
config.enabled = false;
356
config.enabled = true;
357
config.end_frame = frame + fcount;
366
#define TIMESTAMP_BITS 36
368
raw_timestamp_delta(uint64_t time0, uint64_t time1)
371
return (1ULL << TIMESTAMP_BITS) + time1 - time0;
373
return time1 - time0;
378
* Verify that rendering has completed for the batch
380
* Rendering is complete when the last timestamp has been written.
383
intel_measure_ready(struct intel_measure_batch *batch)
385
assert(batch->timestamps);
386
assert(batch->index > 1);
387
return (batch->timestamps[batch->index - 1] != 0);
391
* Submit completed snapshots for buffering.
393
* Snapshot data becomes available when asynchronous rendering completes.
394
* Depending on configuration, snapshot data may need to be collated before
395
* writing to the output file.
398
intel_measure_push_result(struct intel_measure_device *device,
399
struct intel_measure_batch *batch)
401
struct intel_measure_ringbuffer *rb = device->ringbuffer;
403
uint64_t *timestamps = batch->timestamps;
404
assert(timestamps != NULL);
405
assert(timestamps[0] != 0);
407
for (int i = 0; i < batch->index; i += 2) {
408
const struct intel_measure_snapshot *begin = &batch->snapshots[i];
409
const struct intel_measure_snapshot *end = &batch->snapshots[i+1];
411
assert (end->type == INTEL_SNAPSHOT_END);
413
if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) {
414
assert(begin->secondary != NULL);
415
begin->secondary->batch_count = batch->batch_count;
416
intel_measure_push_result(device, begin->secondary);
420
const uint64_t prev_end_ts = rb->results[rb->head].end_ts;
422
/* advance ring buffer */
423
if (++rb->head == config.buffer_size)
425
if (rb->head == rb->tail) {
426
static bool warned = false;
427
if (unlikely(!warned)) {
429
"WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. "
430
"Data has been dropped. "
431
"Increase setting with INTEL_MEASURE=buffer_size={count}\n",
438
struct intel_measure_buffered_result *buffered_result =
439
&rb->results[rb->head];
441
memset(buffered_result, 0, sizeof(*buffered_result));
442
memcpy(&buffered_result->snapshot, begin,
443
sizeof(struct intel_measure_snapshot));
444
buffered_result->start_ts = timestamps[i];
445
buffered_result->end_ts = timestamps[i+1];
446
buffered_result->idle_duration =
447
raw_timestamp_delta(prev_end_ts, buffered_result->start_ts);
448
buffered_result->frame = batch->frame;
449
buffered_result->batch_count = batch->batch_count;
450
buffered_result->event_index = i / 2;
451
buffered_result->snapshot.event_count = end->event_count;
456
ringbuffer_size(const struct intel_measure_ringbuffer *rb)
458
unsigned head = rb->head;
460
head += config.buffer_size;
461
return head - rb->tail;
464
static const struct intel_measure_buffered_result *
465
ringbuffer_pop(struct intel_measure_ringbuffer *rb)
467
if (rb->tail == rb->head) {
468
/* encountered ringbuffer overflow while processing events */
472
if (++rb->tail == config.buffer_size)
474
return &rb->results[rb->tail];
477
static const struct intel_measure_buffered_result *
478
ringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index)
480
int result_offset = rb->tail + index + 1;
481
if (result_offset >= config.buffer_size)
482
result_offset -= config.buffer_size;
483
return &rb->results[result_offset];
488
* Determine the number of buffered events that must be combined for the next
489
* line of csv output. Returns 0 if more events are needed.
492
buffered_event_count(struct intel_measure_device *device)
494
const struct intel_measure_ringbuffer *rb = device->ringbuffer;
495
const unsigned buffered_event_count = ringbuffer_size(rb);
496
if (buffered_event_count == 0) {
497
/* no events to collect */
501
/* count the number of buffered events required to meet the configuration */
502
if (config.flags & (INTEL_MEASURE_DRAW |
503
INTEL_MEASURE_RENDERPASS |
504
INTEL_MEASURE_SHADER)) {
505
/* For these flags, every buffered event represents a line in the
506
* output. None of these events span batches. If the event interval
507
* crosses a batch boundary, then the next interval starts with the new
513
const unsigned start_frame = ringbuffer_peek(rb, 0)->frame;
514
if (config.flags & INTEL_MEASURE_BATCH) {
515
/* each buffered event is a command buffer. The number of events to
516
* process is the same as the interval, unless the interval crosses a
519
if (buffered_event_count < config.event_interval) {
520
/* not enough events */
524
/* Imperfect frame tracking requires us to allow for *older* frames */
525
if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) {
526
/* No frame transition. The next {interval} events should be combined. */
527
return config.event_interval;
530
/* Else a frame transition occurs within the interval. Find the
531
* transition, so the following line of output begins with the batch
532
* that starts the new frame.
534
for (int event_index = 1;
535
event_index <= config.event_interval;
537
if (ringbuffer_peek(rb, event_index)->frame > start_frame)
544
/* Else we need to search buffered events to find the matching frame
545
* transition for our interval.
547
assert(config.flags & INTEL_MEASURE_FRAME);
548
for (int event_index = 1;
549
event_index < buffered_event_count;
551
const int latest_frame = ringbuffer_peek(rb, event_index)->frame;
552
if (latest_frame - start_frame >= config.event_interval)
560
* Take result_count events from the ringbuffer and output them as a single
564
print_combined_results(struct intel_measure_device *measure_device,
566
struct intel_device_info *info)
568
if (result_count == 0)
571
struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer;
572
assert(ringbuffer_size(result_rb) >= result_count);
573
const struct intel_measure_buffered_result* start_result =
574
ringbuffer_pop(result_rb);
575
const struct intel_measure_buffered_result* current_result = start_result;
577
if (start_result == NULL)
581
uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts,
582
current_result->end_ts);
583
unsigned event_count = start_result->snapshot.event_count;
584
while (result_count-- > 0) {
585
assert(ringbuffer_size(result_rb) > 0);
586
current_result = ringbuffer_pop(result_rb);
587
if (current_result == NULL)
589
duration_ts += raw_timestamp_delta(current_result->start_ts,
590
current_result->end_ts);
591
event_count += current_result->snapshot.event_count;
594
uint64_t duration_idle_ns =
595
intel_device_info_timebase_scale(info, start_result->idle_duration);
596
uint64_t duration_time_ns =
597
intel_device_info_timebase_scale(info, duration_ts);
598
const struct intel_measure_snapshot *begin = &start_result->snapshot;
599
fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%u,%u,%s,%u,"
600
"0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR","
601
"0x%"PRIxPTR",0x%"PRIxPTR",%.3lf,%.3lf\n",
602
start_result->start_ts, current_result->end_ts,
603
start_result->frame, start_result->batch_count,
604
start_result->event_index, event_count,
605
begin->event_name, begin->count,
606
begin->vs, begin->tcs, begin->tes, begin->gs, begin->fs, begin->cs,
608
(double)duration_idle_ns / 1000.0,
609
(double)duration_time_ns / 1000.0);
613
* Empty the ringbuffer of events that can be printed.
616
intel_measure_print(struct intel_measure_device *device,
617
struct intel_device_info *info)
620
const int events_to_combine = buffered_event_count(device);
621
if (events_to_combine == 0)
623
print_combined_results(device, events_to_combine, info);
628
* Collect snapshots from completed command buffers and submit them to
629
* intel_measure for printing.
632
intel_measure_gather(struct intel_measure_device *measure_device,
633
struct intel_device_info *info)
635
pthread_mutex_lock(&measure_device->mutex);
637
/* Iterate snapshots and collect if ready. Each snapshot queue will be
638
* in-order, but we must determine which queue has the oldest batch.
640
/* iterate snapshots and collect if ready */
641
while (!list_is_empty(&measure_device->queued_snapshots)) {
642
struct intel_measure_batch *batch =
643
list_first_entry(&measure_device->queued_snapshots,
644
struct intel_measure_batch, link);
646
if (!intel_measure_ready(batch)) {
647
/* command buffer has begun execution on the gpu, but has not
653
list_del(&batch->link);
654
assert(batch->index % 2 == 0);
656
intel_measure_push_result(measure_device, batch);
662
intel_measure_print(measure_device, info);
663
pthread_mutex_unlock(&measure_device->mutex);