2
* Copyright © 2020 Valve Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
#include "vk_common_entrypoints.h"
25
#include "radv_private.h"
26
#include "radv_shader.h"
32
radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer,
33
enum rgp_sqtt_marker_general_api_type api_type)
35
struct rgp_sqtt_marker_general_api marker = {0};
36
struct radeon_cmdbuf *cs = cmd_buffer->cs;
38
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
39
marker.api_type = api_type;
41
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
45
radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer,
46
enum rgp_sqtt_marker_general_api_type api_type)
48
struct rgp_sqtt_marker_general_api marker = {0};
49
struct radeon_cmdbuf *cs = cmd_buffer->cs;
51
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
52
marker.api_type = api_type;
55
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
59
radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer,
60
enum rgp_sqtt_marker_event_type api_type, uint32_t vertex_offset_user_data,
61
uint32_t instance_offset_user_data, uint32_t draw_index_user_data)
63
struct rgp_sqtt_marker_event marker = {0};
64
struct radeon_cmdbuf *cs = cmd_buffer->cs;
66
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
67
marker.api_type = api_type;
68
marker.cmd_id = cmd_buffer->state.num_events++;
71
if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) {
72
vertex_offset_user_data = 0;
73
instance_offset_user_data = 0;
76
if (draw_index_user_data == UINT_MAX)
77
draw_index_user_data = vertex_offset_user_data;
79
marker.vertex_offset_reg_idx = vertex_offset_user_data;
80
marker.instance_offset_reg_idx = instance_offset_user_data;
81
marker.draw_index_reg_idx = draw_index_user_data;
83
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
87
radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer,
88
enum rgp_sqtt_marker_event_type api_type, uint32_t x, uint32_t y,
91
struct rgp_sqtt_marker_event_with_dims marker = {0};
92
struct radeon_cmdbuf *cs = cmd_buffer->cs;
94
marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
95
marker.event.api_type = api_type;
96
marker.event.cmd_id = cmd_buffer->state.num_events++;
97
marker.event.cb_id = 0;
98
marker.event.has_thread_dims = 1;
104
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
108
radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer,
109
enum rgp_sqtt_marker_user_event_type type, const char *str)
111
struct radeon_cmdbuf *cs = cmd_buffer->cs;
113
if (type == UserEventPop) {
115
struct rgp_sqtt_marker_user_event marker = {0};
116
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
117
marker.data_type = type;
119
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
122
unsigned len = strlen(str);
123
struct rgp_sqtt_marker_user_event_with_length marker = {0};
124
marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
125
marker.user_event.data_type = type;
126
marker.length = align(len, 4);
128
uint8_t *buffer = alloca(sizeof(marker) + marker.length);
129
memset(buffer, 0, sizeof(marker) + marker.length);
130
memcpy(buffer, &marker, sizeof(marker));
131
memcpy(buffer + sizeof(marker), str, len);
133
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, buffer,
134
sizeof(marker) / 4 + marker.length / 4);
139
radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
141
uint64_t device_id = (uintptr_t)cmd_buffer->device;
142
struct rgp_sqtt_marker_cb_start marker = {0};
143
struct radeon_cmdbuf *cs = cmd_buffer->cs;
145
if (likely(!cmd_buffer->device->thread_trace.bo))
148
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
150
marker.device_id_low = device_id;
151
marker.device_id_high = device_id >> 32;
152
marker.queue = cmd_buffer->qf;
153
marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT;
155
if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
156
marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
158
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
162
radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
164
uint64_t device_id = (uintptr_t)cmd_buffer->device;
165
struct rgp_sqtt_marker_cb_end marker = {0};
166
struct radeon_cmdbuf *cs = cmd_buffer->cs;
168
if (likely(!cmd_buffer->device->thread_trace.bo))
171
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
173
marker.device_id_low = device_id;
174
marker.device_id_high = device_id >> 32;
176
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
180
radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
182
if (likely(!cmd_buffer->device->thread_trace.bo))
185
radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX,
190
radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z)
192
if (likely(!cmd_buffer->device->thread_trace.bo))
195
radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, x, y, z);
199
radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
200
VkImageAspectFlagBits aspects)
202
cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT)
203
? EventRenderPassColorClear
204
: EventRenderPassDepthStencilClear;
208
radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer)
210
cmd_buffer->state.current_event_type = EventInternalUnknown;
214
radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
216
cmd_buffer->state.current_event_type = EventRenderPassResolve;
220
radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
222
cmd_buffer->state.current_event_type = EventInternalUnknown;
226
radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
228
struct rgp_sqtt_marker_barrier_end marker = {0};
229
struct radeon_cmdbuf *cs = cmd_buffer->cs;
231
if (likely(!cmd_buffer->device->thread_trace.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
234
cmd_buffer->state.pending_sqtt_barrier_end = false;
236
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
239
marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
241
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
242
marker.wait_on_eop_ts = true;
243
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
244
marker.vs_partial_flush = true;
245
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
246
marker.ps_partial_flush = true;
247
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
248
marker.cs_partial_flush = true;
249
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
250
marker.pfp_sync_me = true;
251
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
252
marker.sync_cp_dma = true;
253
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
254
marker.inval_tcp = true;
255
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
256
marker.inval_sqI = true;
257
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
258
marker.inval_sqK = true;
259
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
260
marker.flush_tcc = true;
261
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
262
marker.inval_tcc = true;
263
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
264
marker.flush_cb = true;
265
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
266
marker.inval_cb = true;
267
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
268
marker.flush_db = true;
269
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
270
marker.inval_db = true;
271
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
272
marker.inval_gl1 = true;
274
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
276
cmd_buffer->state.num_layout_transitions = 0;
280
radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason)
282
struct rgp_sqtt_marker_barrier_start marker = {0};
283
struct radeon_cmdbuf *cs = cmd_buffer->cs;
285
if (likely(!cmd_buffer->device->thread_trace.bo))
288
radv_describe_barrier_end_delayed(cmd_buffer);
289
cmd_buffer->state.sqtt_flush_bits = 0;
291
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
293
marker.dword02 = reason;
295
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
299
radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer)
301
cmd_buffer->state.pending_sqtt_barrier_end = true;
305
radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
306
const struct radv_barrier_data *barrier)
308
struct rgp_sqtt_marker_layout_transition marker = {0};
309
struct radeon_cmdbuf *cs = cmd_buffer->cs;
311
if (likely(!cmd_buffer->device->thread_trace.bo))
314
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
315
marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
316
marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
317
marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
318
marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
319
marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
320
marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
321
marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
322
marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
324
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
326
cmd_buffer->state.num_layout_transitions++;
330
radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer,
331
VkPipelineBindPoint pipelineBindPoint, struct radv_pipeline *pipeline)
333
struct rgp_sqtt_marker_pipeline_bind marker = {0};
334
struct radeon_cmdbuf *cs = cmd_buffer->cs;
336
if (likely(!cmd_buffer->device->thread_trace.bo))
339
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
341
marker.bind_point = pipelineBindPoint;
342
marker.api_pso_hash[0] = pipeline->pipeline_hash;
343
marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
345
radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
348
/* TODO: Improve the way to trigger capture (overlay, etc). */
350
radv_handle_thread_trace(VkQueue _queue)
352
RADV_FROM_HANDLE(radv_queue, queue, _queue);
353
static bool thread_trace_enabled = false;
354
static uint64_t num_frames = 0;
355
bool resize_trigger = false;
357
if (thread_trace_enabled) {
358
struct ac_thread_trace thread_trace = {0};
360
radv_end_thread_trace(queue);
361
thread_trace_enabled = false;
363
/* TODO: Do something better than this whole sync. */
364
queue->device->vk.dispatch_table.QueueWaitIdle(_queue);
366
if (radv_get_thread_trace(queue, &thread_trace)) {
367
struct ac_spm_trace_data *spm_trace = NULL;
369
if (queue->device->spm_trace.bo)
370
spm_trace = &queue->device->spm_trace;
372
ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace, spm_trace);
374
/* Trigger a new capture if the driver failed to get
375
* the trace because the buffer was too small.
377
resize_trigger = true;
381
if (!thread_trace_enabled) {
382
bool frame_trigger = num_frames == queue->device->thread_trace.start_frame;
383
bool file_trigger = false;
385
if (queue->device->thread_trace.trigger_file &&
386
access(queue->device->thread_trace.trigger_file, W_OK) == 0) {
387
if (unlink(queue->device->thread_trace.trigger_file) == 0) {
390
/* Do not enable tracing if we cannot remove the file,
391
* because by then we'll trace every frame ... */
392
fprintf(stderr, "RADV: could not remove thread trace trigger file, ignoring\n");
397
if (frame_trigger || file_trigger || resize_trigger) {
398
if (ac_check_profile_state(&queue->device->physical_device->rad_info)) {
399
fprintf(stderr, "radv: Canceling RGP trace request as a hang condition has been "
400
"detected. Force the GPU into a profiling mode with e.g. "
401
"\"echo profile_peak > "
402
"/sys/class/drm/card0/device/power_dpm_force_performance_level\"\n");
406
radv_begin_thread_trace(queue);
407
assert(!thread_trace_enabled);
408
thread_trace_enabled = true;
414
VKAPI_ATTR VkResult VKAPI_CALL
415
sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
419
result = radv_QueuePresentKHR(_queue, pPresentInfo);
420
if (result != VK_SUCCESS)
423
radv_handle_thread_trace(_queue);
428
#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) \
429
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
430
radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
431
cmd_buffer->state.current_event_type = EventCmd##api_name; \
432
radv_Cmd##cmd_name(__VA_ARGS__); \
433
cmd_buffer->state.current_event_type = EventInternalUnknown; \
434
radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
436
#define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
438
VKAPI_ATTR void VKAPI_CALL
439
sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
440
uint32_t firstVertex, uint32_t firstInstance)
442
EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
445
VKAPI_ATTR void VKAPI_CALL
446
sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
447
uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
449
EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset,
453
VKAPI_ATTR void VKAPI_CALL
454
sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
455
uint32_t drawCount, uint32_t stride)
457
EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);
460
VKAPI_ATTR void VKAPI_CALL
461
sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
462
uint32_t drawCount, uint32_t stride)
464
EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);
467
VKAPI_ATTR void VKAPI_CALL
468
sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
469
VkBuffer countBuffer, VkDeviceSize countBufferOffset,
470
uint32_t maxDrawCount, uint32_t stride)
472
EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
473
maxDrawCount, stride);
476
VKAPI_ATTR void VKAPI_CALL
477
sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer,
478
VkDeviceSize offset, VkBuffer countBuffer,
479
VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
482
EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer,
483
countBufferOffset, maxDrawCount, stride);
486
VKAPI_ATTR void VKAPI_CALL
487
sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
489
EVENT_MARKER(Dispatch, commandBuffer, x, y, z);
492
VKAPI_ATTR void VKAPI_CALL
493
sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)
495
EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
498
VKAPI_ATTR void VKAPI_CALL
499
sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo)
501
EVENT_MARKER_ALIAS(CopyBuffer2, CopyBuffer, commandBuffer, pCopyBufferInfo);
504
VKAPI_ATTR void VKAPI_CALL
505
sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
506
VkDeviceSize fillSize, uint32_t data)
508
EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);
511
VKAPI_ATTR void VKAPI_CALL
512
sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
513
VkDeviceSize dataSize, const void *pData)
515
EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);
518
VKAPI_ATTR void VKAPI_CALL
519
sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyImageInfo)
521
EVENT_MARKER_ALIAS(CopyImage2, CopyImage, commandBuffer, pCopyImageInfo);
524
VKAPI_ATTR void VKAPI_CALL
525
sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
526
const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
528
EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer,
529
pCopyBufferToImageInfo);
532
VKAPI_ATTR void VKAPI_CALL
533
sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
534
const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
536
EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer,
537
pCopyImageToBufferInfo);
540
VKAPI_ATTR void VKAPI_CALL
541
sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer, const VkBlitImageInfo2 *pBlitImageInfo)
543
EVENT_MARKER_ALIAS(BlitImage2, BlitImage, commandBuffer, pBlitImageInfo);
546
VKAPI_ATTR void VKAPI_CALL
547
sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
548
const VkClearColorValue *pColor, uint32_t rangeCount,
549
const VkImageSubresourceRange *pRanges)
551
EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);
554
VKAPI_ATTR void VKAPI_CALL
555
sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,
556
VkImageLayout imageLayout,
557
const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
558
const VkImageSubresourceRange *pRanges)
560
EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil,
561
rangeCount, pRanges);
564
VKAPI_ATTR void VKAPI_CALL
565
sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
566
const VkClearAttachment *pAttachments, uint32_t rectCount,
567
const VkClearRect *pRects)
569
EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
572
VKAPI_ATTR void VKAPI_CALL
573
sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer,
574
const VkResolveImageInfo2 *pResolveImageInfo)
576
EVENT_MARKER_ALIAS(ResolveImage2, ResolveImage, commandBuffer, pResolveImageInfo);
579
VKAPI_ATTR void VKAPI_CALL
580
sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents,
581
const VkDependencyInfo* pDependencyInfos)
583
EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents,
587
VKAPI_ATTR void VKAPI_CALL
588
sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
589
const VkDependencyInfo* pDependencyInfo)
591
EVENT_MARKER_ALIAS(PipelineBarrier2, PipelineBarrier, commandBuffer, pDependencyInfo);
594
VKAPI_ATTR void VKAPI_CALL
595
sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
598
EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);
601
VKAPI_ATTR void VKAPI_CALL
602
sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
603
uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer,
604
VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags)
606
EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer,
607
dstOffset, stride, flags);
611
#define API_MARKER_ALIAS(cmd_name, api_name, ...) \
612
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
613
radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
614
radv_Cmd##cmd_name(__VA_ARGS__); \
615
radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
617
#define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
619
VKAPI_ATTR void VKAPI_CALL
620
sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
621
VkPipeline _pipeline)
623
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
625
API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
627
radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
630
VKAPI_ATTR void VKAPI_CALL
631
sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
632
VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount,
633
const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
634
const uint32_t *pDynamicOffsets)
636
API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet,
637
descriptorSetCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
640
VKAPI_ATTR void VKAPI_CALL
641
sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
642
VkIndexType indexType)
644
API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
647
VKAPI_ATTR void VKAPI_CALL
648
sqtt_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding,
649
uint32_t bindingCount, const VkBuffer *pBuffers,
650
const VkDeviceSize *pOffsets)
652
API_MARKER(BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers, pOffsets);
655
VKAPI_ATTR void VKAPI_CALL
656
sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding,
657
uint32_t bindingCount, const VkBuffer *pBuffers,
658
const VkDeviceSize *pOffsets, const VkDeviceSize* pSizes,
659
const VkDeviceSize* pStrides)
661
API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding,
662
bindingCount, pBuffers, pOffsets, pSizes, pStrides);
665
VKAPI_ATTR void VKAPI_CALL
666
sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
667
VkQueryControlFlags flags)
669
API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
672
VKAPI_ATTR void VKAPI_CALL
673
sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
675
API_MARKER(EndQuery, commandBuffer, queryPool, query);
678
VKAPI_ATTR void VKAPI_CALL
679
sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage,
680
VkQueryPool queryPool, uint32_t query)
682
API_MARKER_ALIAS(WriteTimestamp2, WriteTimestamp, commandBuffer, stage, queryPool, query);
685
VKAPI_ATTR void VKAPI_CALL
686
sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
687
VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
690
API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
693
VKAPI_ATTR void VKAPI_CALL
694
sqtt_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
695
const VkRenderPassBeginInfo *pRenderPassBeginInfo,
696
const VkSubpassBeginInfo *pSubpassBeginInfo)
698
API_MARKER_ALIAS(BeginRenderPass2, BeginRenderPass, commandBuffer, pRenderPassBeginInfo,
702
VKAPI_ATTR void VKAPI_CALL
703
sqtt_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo,
704
const VkSubpassEndInfo *pSubpassEndInfo)
706
API_MARKER_ALIAS(NextSubpass2, NextSubpass, commandBuffer, pSubpassBeginInfo, pSubpassEndInfo);
709
VKAPI_ATTR void VKAPI_CALL
710
sqtt_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo)
712
API_MARKER_ALIAS(EndRenderPass2, EndRenderPass, commandBuffer, pSubpassEndInfo);
715
VKAPI_ATTR void VKAPI_CALL
716
sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
717
const VkCommandBuffer *pCmdBuffers)
719
API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);
722
VKAPI_ATTR void VKAPI_CALL
723
sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
724
const VkViewport *pViewports)
726
API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports);
729
VKAPI_ATTR void VKAPI_CALL
730
sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
731
const VkRect2D *pScissors)
733
API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors);
736
VKAPI_ATTR void VKAPI_CALL
737
sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
739
API_MARKER(SetLineWidth, commandBuffer, lineWidth);
742
VKAPI_ATTR void VKAPI_CALL
743
sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor,
744
float depthBiasClamp, float depthBiasSlopeFactor)
746
API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp,
747
depthBiasSlopeFactor);
750
VKAPI_ATTR void VKAPI_CALL
751
sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
753
API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
756
VKAPI_ATTR void VKAPI_CALL
757
sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
759
API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);
762
VKAPI_ATTR void VKAPI_CALL
763
sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
764
uint32_t compareMask)
766
API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
769
VKAPI_ATTR void VKAPI_CALL
770
sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
773
API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
776
VKAPI_ATTR void VKAPI_CALL
777
sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
780
API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
783
/* VK_EXT_debug_marker */
784
VKAPI_ATTR void VKAPI_CALL
785
sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,
786
const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
788
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
789
radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);
792
VKAPI_ATTR void VKAPI_CALL
793
sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)
795
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
796
radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
799
VKAPI_ATTR void VKAPI_CALL
800
sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,
801
const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
803
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
804
radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);
807
VKAPI_ATTR VkResult VKAPI_CALL
808
sqtt_DebugMarkerSetObjectNameEXT(VkDevice device, const VkDebugMarkerObjectNameInfoEXT *pNameInfo)
814
VKAPI_ATTR VkResult VKAPI_CALL
815
sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo)
821
VKAPI_ATTR void VKAPI_CALL
822
sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,
823
const VkDebugUtilsLabelEXT *pLabelInfo)
825
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
826
radv_write_user_event_marker(cmd_buffer, UserEventPush, pLabelInfo->pLabelName);
828
vk_common_CmdBeginDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
831
VKAPI_ATTR void VKAPI_CALL
832
sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)
834
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
835
radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
837
vk_common_CmdEndDebugUtilsLabelEXT(commandBuffer);
840
VKAPI_ATTR void VKAPI_CALL
841
sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,
842
const VkDebugUtilsLabelEXT *pLabelInfo)
844
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
845
radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pLabelInfo->pLabelName);
847
vk_common_CmdInsertDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
851
static enum rgp_hardware_stages
852
radv_mesa_to_rgp_shader_stage(struct radv_pipeline *pipeline, gl_shader_stage stage)
854
struct radv_shader *shader = pipeline->shaders[stage];
857
case MESA_SHADER_VERTEX:
858
if (shader->info.vs.as_ls)
859
return RGP_HW_STAGE_LS;
860
else if (shader->info.vs.as_es)
861
return RGP_HW_STAGE_ES;
862
else if (shader->info.is_ngg)
863
return RGP_HW_STAGE_GS;
865
return RGP_HW_STAGE_VS;
866
case MESA_SHADER_TESS_CTRL:
867
return RGP_HW_STAGE_HS;
868
case MESA_SHADER_TESS_EVAL:
869
if (shader->info.tes.as_es)
870
return RGP_HW_STAGE_ES;
871
else if (shader->info.is_ngg)
872
return RGP_HW_STAGE_GS;
874
return RGP_HW_STAGE_VS;
875
case MESA_SHADER_GEOMETRY:
876
return RGP_HW_STAGE_GS;
877
case MESA_SHADER_FRAGMENT:
878
return RGP_HW_STAGE_PS;
879
case MESA_SHADER_COMPUTE:
880
return RGP_HW_STAGE_CS;
882
unreachable("invalid mesa shader stage");
887
radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
889
struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
890
struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
891
struct rgp_code_object_record *record;
893
record = malloc(sizeof(struct rgp_code_object_record));
895
return VK_ERROR_OUT_OF_HOST_MEMORY;
897
record->shader_stages_mask = 0;
898
record->num_shaders_combined = 0;
899
record->pipeline_hash[0] = pipeline->pipeline_hash;
900
record->pipeline_hash[1] = pipeline->pipeline_hash;
902
for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
903
struct radv_shader *shader = pipeline->shaders[i];
910
code = malloc(shader->code_size);
913
return VK_ERROR_OUT_OF_HOST_MEMORY;
915
memcpy(code, shader->code_ptr, shader->code_size);
917
va = radv_shader_get_va(shader);
919
record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader;
920
record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32;
921
record->shader_data[i].code_size = shader->code_size;
922
record->shader_data[i].code = code;
923
record->shader_data[i].vgpr_count = shader->config.num_vgprs;
924
record->shader_data[i].sgpr_count = shader->config.num_sgprs;
925
record->shader_data[i].scratch_memory_size = shader->config.scratch_bytes_per_wave;
926
record->shader_data[i].wavefront_size = shader->info.wave_size;
927
record->shader_data[i].base_address = va & 0xffffffffffff;
928
record->shader_data[i].elf_symbol_offset = 0;
929
record->shader_data[i].hw_stage = radv_mesa_to_rgp_shader_stage(pipeline, i);
930
record->shader_data[i].is_combined = false;
932
record->shader_stages_mask |= (1 << i);
933
record->num_shaders_combined++;
936
simple_mtx_lock(&code_object->lock);
937
list_addtail(&record->list, &code_object->record);
938
code_object->record_count++;
939
simple_mtx_unlock(&code_object->lock);
945
radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
948
uint64_t base_va = ~0;
950
result = ac_sqtt_add_pso_correlation(&device->thread_trace, pipeline->pipeline_hash);
952
return VK_ERROR_OUT_OF_HOST_MEMORY;
954
/* Find the lowest shader BO VA. */
955
for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
956
struct radv_shader *shader = pipeline->shaders[i];
962
va = radv_shader_get_va(shader);
963
base_va = MIN2(base_va, va);
967
ac_sqtt_add_code_object_loader_event(&device->thread_trace, pipeline->pipeline_hash, base_va);
969
return VK_ERROR_OUT_OF_HOST_MEMORY;
971
result = radv_add_code_object(device, pipeline);
972
if (result != VK_SUCCESS)
979
radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
981
struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
982
struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation;
983
struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events;
984
struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
986
/* Destroy the PSO correlation record. */
987
simple_mtx_lock(&pso_correlation->lock);
988
list_for_each_entry_safe(struct rgp_pso_correlation_record, record, &pso_correlation->record,
991
if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
992
pso_correlation->record_count--;
993
list_del(&record->list);
998
simple_mtx_unlock(&pso_correlation->lock);
1000
/* Destroy the code object loader record. */
1001
simple_mtx_lock(&loader_events->lock);
1002
list_for_each_entry_safe(struct rgp_loader_events_record, record, &loader_events->record, list)
1004
if (record->code_object_hash[0] == pipeline->pipeline_hash) {
1005
loader_events->record_count--;
1006
list_del(&record->list);
1011
simple_mtx_unlock(&loader_events->lock);
1013
/* Destroy the code object record. */
1014
simple_mtx_lock(&code_object->lock);
1015
list_for_each_entry_safe(struct rgp_code_object_record, record, &code_object->record, list)
1017
if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
1018
uint32_t mask = record->shader_stages_mask;
1021
/* Free the disassembly. */
1023
i = u_bit_scan(&mask);
1024
free(record->shader_data[i].code);
1027
code_object->record_count--;
1028
list_del(&record->list);
1033
simple_mtx_unlock(&code_object->lock);
1036
VKAPI_ATTR VkResult VKAPI_CALL
1037
sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1038
const VkGraphicsPipelineCreateInfo *pCreateInfos,
1039
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
1041
RADV_FROM_HANDLE(radv_device, device, _device);
1044
result = radv_CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1046
if (result != VK_SUCCESS)
1049
for (unsigned i = 0; i < count; i++) {
1050
RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1055
result = radv_register_pipeline(device, pipeline);
1056
if (result != VK_SUCCESS)
1063
for (unsigned i = 0; i < count; i++) {
1064
sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1065
pPipelines[i] = VK_NULL_HANDLE;
1070
VKAPI_ATTR VkResult VKAPI_CALL
1071
sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1072
const VkComputePipelineCreateInfo *pCreateInfos,
1073
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
1075
RADV_FROM_HANDLE(radv_device, device, _device);
1078
result = radv_CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1080
if (result != VK_SUCCESS)
1083
for (unsigned i = 0; i < count; i++) {
1084
RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1089
result = radv_register_pipeline(device, pipeline);
1090
if (result != VK_SUCCESS)
1097
for (unsigned i = 0; i < count; i++) {
1098
sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1099
pPipelines[i] = VK_NULL_HANDLE;
1104
VKAPI_ATTR void VKAPI_CALL
1105
sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline,
1106
const VkAllocationCallbacks *pAllocator)
1108
RADV_FROM_HANDLE(radv_device, device, _device);
1109
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1114
radv_unregister_pipeline(device, pipeline);
1116
radv_DestroyPipeline(_device, _pipeline, pAllocator);