2
* Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39
#include <sys/types.h>
42
#include "freedreno_pm4.h"
51
/* ************************************************************************* */
52
/* originally based on kernel recovery dump code: */
54
static const struct cffdec_options *options;
56
static bool needs_wfi = false;
57
static bool summary = false;
58
static bool in_summary = false;
61
static inline unsigned
64
if (options->gpu_id >= 500)
73
return options->gpu_id >= 500;
79
uint32_t size; /* in dwords */
80
/* Generally cmdstream consists of multiple IB calls to different
81
* buffers, which are themselves often re-used for each tile. The
82
* triggered flag serves two purposes to help make it more clear
83
* what part of the cmdstream is before vs after the the GPU hang:
85
* 1) if in IB2 we are passed the point within the IB2 buffer where
86
* the GPU hung, but IB1 is not passed the point within its
87
* buffer where the GPU had hung, then we know the GPU hang
88
* happens on a future use of that IB2 buffer.
90
* 2) if in an IB1 or IB2 buffer that is not the one where the GPU
91
* hung, but we've already passed the trigger point at the same
92
* IB level, we know that we are passed the point where the GPU
95
* So this is a one way switch, false->true. And a higher #'d
96
* IB level isn't considered triggered unless the lower #'d IB
103
static int draw_count;
104
static int current_draw_count;
106
/* query mode.. to handle symbolic register name queries, we need to
107
* defer parsing query string until after gpu_id is know and rnn db
110
static int *queryvals;
115
if ((options->draw_filter != -1) &&
116
(options->draw_filter != current_draw_count))
118
if ((lvl >= 3) && (summary || options->querystrs || options->script))
120
if ((lvl >= 2) && (options->querystrs || options->script))
126
printl(int lvl, const char *fmt, ...)
136
static const char *levels[] = {
145
"\t\t\t\t\t\t\t\t\t",
160
/* SDS (CP_SET_DRAW_STATE) helpers: */
161
static void load_all_groups(int level);
162
static void disable_all_groups(void);
164
static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
166
static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
169
highlight_gpuaddr(uint64_t gpuaddr)
171
if (!options->ibs[ib].base)
174
if ((ib > 0) && options->ibs[ib - 1].base && !ibs[ib - 1].triggered)
177
if (ibs[ib].triggered)
178
return options->color;
180
if (options->ibs[ib].base != ibs[ib].base)
183
uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
184
uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
186
bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
188
ibs[ib].triggered |= triggered;
191
printf("ESTIMATED CRASH LOCATION!\n");
193
return triggered & options->color;
197
dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
205
for (i = 0; i < sizedwords; i += 8) {
208
/* always show first row: */
212
for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
216
if (zero && !lastzero)
224
uint64_t addr = gpuaddr(&dwords[i]);
225
bool highlight = highlight_gpuaddr(addr);
228
printf("\x1b[0;1;31m");
231
printf("%016" PRIx64 ":%s", addr, levels[level]);
233
printf("%08x:%s", (uint32_t)addr, levels[level]);
239
printf("%04x:", i * 4);
241
for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
242
printf(" %08x", dwords[i + j]);
250
dump_float(float *dwords, uint32_t sizedwords, int level)
253
for (i = 0; i < sizedwords; i++) {
256
printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
258
printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
263
printf("%8f", *(dwords++));
271
/* I believe the surface format is low bits:
272
#define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
273
comments in sys2gmem_tex_const indicate that address is [31:12], but
274
looks like at least some of the bits above the format have different meaning..
277
parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
280
assert(!is_64b()); /* this is only used on a2xx */
281
*gpuaddr = dword & ~mask;
282
*flags = dword & mask;
285
static uint32_t type0_reg_vals[0xffff + 1];
286
static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
287
8]; /* written since last draw */
288
static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
289
static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
292
reg_rewritten(uint32_t regbase)
294
return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
298
reg_written(uint32_t regbase)
300
return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
304
clear_rewritten(void)
306
memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
312
memset(type0_reg_written, 0, sizeof(type0_reg_written));
317
reg_lastval(uint32_t regbase)
319
return lastvals[regbase];
325
memset(lastvals, 0, sizeof(lastvals));
329
reg_val(uint32_t regbase)
331
return type0_reg_vals[regbase];
335
reg_set(uint32_t regbase, uint32_t val)
337
assert(regbase < regcnt());
338
type0_reg_vals[regbase] = val;
339
type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
340
type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
344
reg_dump_scratch(const char *name, uint32_t dword, int level)
351
r = regbase("CP_SCRATCH[0].REG");
353
// if not, try old a2xx/a3xx version:
355
r = regbase("CP_SCRATCH_REG0");
360
printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
361
reg_val(r + 6), reg_val(r + 7));
365
dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
372
buf = hostptr(gpuaddr);
374
dump_hex(buf, sizedwords, level + 1);
379
dump_gpuaddr(uint64_t gpuaddr, int level)
381
dump_gpuaddr_size(gpuaddr, level, 64, 3);
385
reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
387
dump_gpuaddr(dword, level);
392
reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
398
reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
400
dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
404
reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
406
dump_gpuaddr(qword, level);
410
dump_shader(const char *ext, void *buf, int bufsz)
412
if (options->dump_shaders) {
416
sprintf(filename, "%04d.%s", n++, ext);
417
fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
419
write(fd, buf, bufsz);
426
disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
430
gpuaddr &= 0xfffffffffffffff0;
435
buf = hostptr(gpuaddr);
437
uint32_t sizedwords = hostlen(gpuaddr) / 4;
440
dump_hex(buf, min(64, sizedwords), level + 1);
441
try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->gpu_id);
443
/* this is a bit ugly way, but oh well.. */
444
if (strstr(name, "SP_VS_OBJ")) {
446
} else if (strstr(name, "SP_FS_OBJ")) {
448
} else if (strstr(name, "SP_GS_OBJ")) {
450
} else if (strstr(name, "SP_CS_OBJ")) {
457
dump_shader(ext, buf, sizedwords * 4);
462
reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
464
disasm_gpuaddr(name, dword, level);
468
reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
470
disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
474
reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
476
disasm_gpuaddr(name, qword, level);
479
/* Find the value of the TEX_COUNT register that corresponds to the named
480
* TEX_SAMP/TEX_CONST reg.
482
* Note, this kinda assumes an equal # of samplers and textures, but not
483
* really sure if there is a much better option. I suppose on a6xx we
484
* could instead decode the bitfields in SP_xS_CONFIG
487
get_tex_count(const char *name)
489
char count_reg[strlen(name) + 5];
492
p = strstr(name, "CONST");
494
p = strstr(name, "SAMP");
499
strncpy(count_reg, name, n);
500
strcpy(count_reg + n, "COUNT");
502
return reg_val(regbase(count_reg));
506
reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
511
int num_unit = get_tex_count(name);
512
uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
513
void *buf = hostptr(gpuaddr);
518
dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
522
reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
527
int num_unit = get_tex_count(name);
528
uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
529
void *buf = hostptr(gpuaddr);
534
dump_tex_const(buf, num_unit, level + 1);
538
* Registers with special handling (rnndec_decode() handles rest):
540
#define REG(x, fxn) { #x, fxn }
541
#define REG64(x, fxn) { #x, .fxn64 = fxn, .is_reg64 = true }
544
void (*fxn)(const char *name, uint32_t dword, int level);
545
void (*fxn64)(const char *name, uint64_t qword, int level);
549
REG(CP_SCRATCH_REG0, reg_dump_scratch),
550
REG(CP_SCRATCH_REG1, reg_dump_scratch),
551
REG(CP_SCRATCH_REG2, reg_dump_scratch),
552
REG(CP_SCRATCH_REG3, reg_dump_scratch),
553
REG(CP_SCRATCH_REG4, reg_dump_scratch),
554
REG(CP_SCRATCH_REG5, reg_dump_scratch),
555
REG(CP_SCRATCH_REG6, reg_dump_scratch),
556
REG(CP_SCRATCH_REG7, reg_dump_scratch),
559
REG(CP_SCRATCH_REG0, reg_dump_scratch),
560
REG(CP_SCRATCH_REG1, reg_dump_scratch),
561
REG(CP_SCRATCH_REG2, reg_dump_scratch),
562
REG(CP_SCRATCH_REG3, reg_dump_scratch),
563
REG(CP_SCRATCH_REG4, reg_dump_scratch),
564
REG(CP_SCRATCH_REG5, reg_dump_scratch),
565
REG(CP_SCRATCH_REG6, reg_dump_scratch),
566
REG(CP_SCRATCH_REG7, reg_dump_scratch),
567
REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
568
REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
569
REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
570
REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
571
REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
572
REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
575
REG(CP_SCRATCH[0].REG, reg_dump_scratch),
576
REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
577
REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
578
REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
579
REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
580
REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
581
REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
582
REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
583
REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
584
REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
585
REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
586
REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
587
REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
588
REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
589
REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
590
REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
591
REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
592
REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
593
REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
594
REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
595
REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
596
REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
597
REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
598
REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
599
REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
602
REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
603
REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
604
REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
605
REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
606
REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
607
REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
608
REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
609
REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
610
REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
611
REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
612
REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
613
REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
614
REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
615
REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
616
REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
617
REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
618
REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
619
REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
620
REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
621
REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
622
REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
623
REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
624
REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
625
REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
626
REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
627
REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
628
REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
629
REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
630
REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
631
REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
632
REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
633
REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
634
REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
635
REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
636
REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
637
REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
638
REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
639
REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
640
REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
641
REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
642
REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
643
REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
644
// REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
645
// REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
646
// REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
647
// REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
648
// REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
649
// REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
650
// REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
651
// REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
652
// REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
653
// REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
654
// REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
655
// REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
656
// REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
657
// REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
658
// REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
659
// REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
660
// REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
661
// REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
662
// REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
663
// REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
664
// REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
665
// REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
666
// REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
667
// REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
668
// REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
669
// REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
671
// REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
672
// REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
673
// REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
674
// REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
675
// REG(RB_2D_DST_LO, reg_gpuaddr_lo),
676
// REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
677
// REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
678
// REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
682
REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
683
REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
684
REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
685
REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
687
REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
688
REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
689
REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
690
REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
691
REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
692
REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
694
REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
695
REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
696
REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
697
REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
698
REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
699
REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
700
REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
701
REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
702
REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
703
REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
704
REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
705
REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
710
static struct rnn *rnn;
713
init_rnn(const char *gpuname)
715
rnn = rnn_new(!options->color);
717
rnn_load(rnn, gpuname);
719
if (options->querystrs) {
721
queryvals = calloc(options->nquery, sizeof(queryvals[0]));
723
for (i = 0; i < options->nquery; i++) {
724
int val = strtol(options->querystrs[i], NULL, 0);
727
val = regbase(options->querystrs[i]);
730
printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
734
for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
735
type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
736
if (!type0_reg[idx].regbase) {
737
printf("invalid register name: %s\n", type0_reg[idx].regname);
748
memset(&ibs, 0, sizeof(ibs));
752
cffdec_init(const struct cffdec_options *_options)
755
summary = options->summary;
757
/* in case we're decoding multiple files: */
762
/* TODO we need an API to free/cleanup any previous rnn */
764
switch (options->gpu_id) {
766
type0_reg = reg_a2xx;
770
type0_reg = reg_a3xx;
774
type0_reg = reg_a4xx;
778
type0_reg = reg_a5xx;
782
type0_reg = reg_a6xx;
786
errx(-1, "unsupported gpu");
791
pktname(unsigned opc)
793
return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
797
regname(uint32_t regbase, int color)
799
return rnn_regname(rnn, regbase, color);
803
regbase(const char *name)
805
return rnn_regbase(rnn, name);
809
endswith(uint32_t regbase, const char *suffix)
811
const char *name = regname(regbase, 0);
812
const char *s = strstr(name, suffix);
815
return (s - strlen(name) + strlen(suffix)) == name;
819
dump_register_val(uint32_t regbase, uint32_t dword, int level)
821
struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
823
if (info && info->typeinfo) {
824
uint64_t gpuaddr = 0;
825
char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
826
printf("%s%s: %s", levels[level], info->name, decoded);
828
/* Try and figure out if we are looking at a gpuaddr.. this
829
* might be useful for other gen's too, but at least a5xx has
830
* the _HI/_LO suffix we can look for. Maybe a better approach
831
* would be some special annotation in the xml..
832
* for a6xx use "address" and "waddress" types
834
if (options->gpu_id >= 600) {
835
if (!strcmp(info->typeinfo->name, "address") ||
836
!strcmp(info->typeinfo->name, "waddress")) {
837
gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
839
} else if (options->gpu_id >= 500) {
840
if (endswith(regbase, "_HI") && endswith(regbase - 1, "_LO")) {
841
gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase - 1);
842
} else if (endswith(regbase, "_LO") && endswith(regbase + 1, "_HI")) {
843
gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
847
if (gpuaddr && hostptr(gpuaddr)) {
848
printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
849
gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
850
hostlen(gpubaseaddr(gpuaddr)));
857
printf("%s%s: %08x\n", levels[level], info->name, dword);
859
printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
869
dump_register(uint32_t regbase, uint32_t dword, int level)
872
dump_register_val(regbase, dword, level);
875
for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
876
if (type0_reg[idx].regbase == regbase) {
877
if (type0_reg[idx].is_reg64) {
878
uint64_t qword = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
879
type0_reg[idx].fxn64(type0_reg[idx].regname, qword, level);
881
type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
889
is_banked_reg(uint32_t regbase)
891
return (0x2000 <= regbase) && (regbase < 0x2400);
895
dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
898
while (sizedwords--) {
899
int last_summary = summary;
901
/* access to non-banked registers needs a WFI:
902
* TODO banked register range for a2xx??
904
if (needs_wfi && !is_banked_reg(regbase))
905
printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
907
reg_set(regbase, *dwords);
908
dump_register(regbase, *dwords, level);
911
summary = last_summary;
916
dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
918
struct rnndomain *dom;
921
dom = rnn_finddomain(rnn->db, name);
927
script_packet(dwords, sizedwords, rnn, dom);
932
for (i = 0; i < sizedwords; i++) {
933
struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
935
if (!(info && info->typeinfo))
937
uint64_t value = dwords[i];
938
if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
939
value |= (uint64_t)dwords[i + 1] << 32;
940
i++; /* skip the next dword since we're printing it now */
942
decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
943
/* Unlike the register printing path, we don't print the name
944
* of the register, so if it doesn't contain other named
945
* things (i.e. it isn't a bitset) then print the register
946
* name as if it's a bitset with a single entry. This avoids
947
* having to create a dummy register with a single entry to
948
* get a name in the decoding.
950
if (info->typeinfo->type == RNN_TTYPE_BITSET ||
951
info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
952
printf("%s%s\n", levels[level], decoded);
954
printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
955
info->name, rnn->vc->colors->reset, decoded);
963
static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
964
static unsigned mode;
965
static const char *render_mode;
970
MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
971
} enable_mask = MODE_ALL;
972
static bool skip_ib2_enable_global;
973
static bool skip_ib2_enable_local;
976
print_mode(int level)
978
if ((options->gpu_id >= 500) && !quiet(2)) {
979
printf("%smode: %s\n", levels[level], render_mode);
980
printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
981
skip_ib2_enable_local);
988
switch (options->query_mode) {
993
for (int i = 0; i < options->nquery; i++) {
994
uint32_t regbase = queryvals[i];
995
if (!reg_written(regbase)) {
998
if (reg_rewritten(regbase)) {
1004
for (int i = 0; i < options->nquery; i++) {
1005
uint32_t regbase = queryvals[i];
1006
if (!reg_written(regbase)) {
1009
uint32_t lastval = reg_val(regbase);
1010
if (lastval != lastvals[regbase]) {
1020
__do_query(const char *primtype, uint32_t num_indices)
1024
if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
1025
uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1026
uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1028
bin_x1 = scissor_tl & 0xffff;
1029
bin_y1 = scissor_tl >> 16;
1030
bin_x2 = scissor_br & 0xffff;
1031
bin_y2 = scissor_br >> 16;
1034
for (int i = 0; i < options->nquery; i++) {
1035
uint32_t regbase = queryvals[i];
1036
if (reg_written(regbase)) {
1037
uint32_t lastval = reg_val(regbase);
1038
printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1039
bin_y1, bin_x2, bin_y2, num_indices);
1040
if (options->gpu_id >= 500)
1041
printf("%s:", render_mode);
1042
printf("\t%08x", lastval);
1043
if (lastval != lastvals[regbase]) {
1048
if (reg_rewritten(regbase)) {
1053
dump_register_val(regbase, lastval, 0);
1063
do_query_compare(const char *primtype, uint32_t num_indices)
1065
unsigned saved_enable_mask = enable_mask;
1066
const char *saved_render_mode = render_mode;
1068
/* in 'query-compare' mode, we want to see if the register is writtten
1069
* or changed in any mode:
1071
* (NOTE: this could cause false-positive for 'query-delta' if the reg
1072
* is written with different values in binning vs sysmem/gmem mode, as
1073
* we don't track previous values per-mode, but I think we can live with
1076
enable_mask = MODE_ALL;
1081
if (!skip_query()) {
1082
/* dump binning pass values: */
1083
enable_mask = MODE_BINNING;
1084
render_mode = "BINNING";
1087
__do_query(primtype, num_indices);
1089
/* dump draw pass values: */
1090
enable_mask = MODE_GMEM | MODE_BYPASS;
1091
render_mode = "DRAW";
1094
__do_query(primtype, num_indices);
1099
enable_mask = saved_enable_mask;
1100
render_mode = saved_render_mode;
1102
disable_all_groups();
1105
/* well, actually query and script..
1106
* NOTE: call this before dump_register_summary()
1109
do_query(const char *primtype, uint32_t num_indices)
1112
script_draw(primtype, num_indices);
1114
if (options->query_compare) {
1115
do_query_compare(primtype, num_indices);
1122
__do_query(primtype, num_indices);
1126
cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1128
uint32_t start = dwords[1] >> 16;
1129
uint32_t size = dwords[1] & 0xffff;
1130
const char *type = NULL, *ext = NULL;
1131
gl_shader_stage disasm_type;
1133
switch (dwords[0]) {
1137
disasm_type = MESA_SHADER_VERTEX;
1142
disasm_type = MESA_SHADER_FRAGMENT;
1150
printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1152
disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1154
/* dump raw shader: */
1156
dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1160
cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1162
uint32_t reg = dwords[0] & 0xffff;
1164
for (i = 1; i < sizedwords; i++) {
1165
dump_register(reg, dwords[i], level + 1);
1166
reg_set(reg, dwords[i]);
1174
TEX_MIPADDR, /* a3xx only */
1178
// image/ssbo state:
1185
// unknown things, just to hexdumps:
1191
enum adreno_state_block {
1193
SB_VERT_MIPADDR = 1,
1195
SB_FRAG_MIPADDR = 3,
1199
SB_COMPUTE_SHADER = 7,
1202
/* TODO there is probably a clever way to let rnndec parse things so
1203
* we don't have to care about packet format differences across gens
1207
a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1208
enum state_t *state, enum state_src_t *src)
1210
unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1211
unsigned state_type = dwords[1] & 0x3;
1212
static const struct {
1213
gl_shader_stage stage;
1215
} lookup[0xf][0x3] = {
1216
[SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1217
[SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1218
[SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1219
[SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1220
[SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1221
[SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1222
[SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1223
[SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1226
*stage = lookup[state_block_id][state_type].stage;
1227
*state = lookup[state_block_id][state_type].state;
1228
unsigned state_src = (dwords[0] >> 16) & 0x7;
1229
if (state_src == 0 /* SS_DIRECT */)
1230
*src = STATE_SRC_DIRECT;
1232
*src = STATE_SRC_INDIRECT;
1235
static enum state_src_t
1236
_get_state_src(unsigned dword0)
1238
switch ((dword0 >> 16) & 0x3) {
1239
case 0: /* SS4_DIRECT / SS6_DIRECT */
1240
return STATE_SRC_DIRECT;
1241
case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1242
return STATE_SRC_INDIRECT;
1243
case 1: /* SS6_BINDLESS */
1244
return STATE_SRC_BINDLESS;
1246
return STATE_SRC_DIRECT;
1251
_get_state_type(unsigned state_block_id, unsigned state_type,
1252
gl_shader_stage *stage, enum state_t *state)
1254
static const struct {
1255
gl_shader_stage stage;
1257
} lookup[0x10][0x4] = {
1259
[0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1260
[0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1261
[0x0][2] = {MESA_SHADER_VERTEX, UBO},
1263
[0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1264
[0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1265
[0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1267
[0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1268
[0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1269
[0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1271
[0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1272
[0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1273
[0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1275
[0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1276
[0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1277
[0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1279
[0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1280
[0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1281
[0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1283
[0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1284
[0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1285
[0x8][2] = {MESA_SHADER_VERTEX, UBO},
1287
[0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1288
[0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1289
[0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1291
[0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1292
[0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1293
[0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1295
[0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1296
[0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1297
[0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1299
[0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1300
[0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1301
[0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1303
[0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1304
[0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1305
[0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1306
[0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1307
// SB4_SSBO (shared across all stages)
1308
[0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1309
[0xe][1] = {0, SSBO_1},
1310
[0xe][2] = {0, SSBO_2},
1312
[0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1313
[0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1314
[0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1316
/* This looks like combined UBO state for 3d stages (a5xx and
1317
* before?? I think a6xx has UBO state per shader stage:
1319
[0x6][2] = {0, UBO},
1320
[0x7][1] = {0, UNKNOWN_2DWORDS},
1323
*stage = lookup[state_block_id][state_type].stage;
1324
*state = lookup[state_block_id][state_type].state;
1328
a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1329
enum state_t *state, enum state_src_t *src)
1331
unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1332
unsigned state_type = dwords[1] & 0x3;
1333
_get_state_type(state_block_id, state_type, stage, state);
1334
*src = _get_state_src(dwords[0]);
1338
a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1339
enum state_t *state, enum state_src_t *src)
1341
unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1342
unsigned state_type = (dwords[0] >> 14) & 0x3;
1343
_get_state_type(state_block_id, state_type, stage, state);
1344
*src = _get_state_src(dwords[0]);
1348
dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1350
for (int i = 0; i < num_unit; i++) {
1351
/* work-around to reduce noise for opencl blob which always
1352
* writes the max # regardless of # of textures used
1354
if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1357
if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1358
dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1359
dump_hex(texsamp, 2, level + 1);
1361
} else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1362
dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1363
dump_hex(texsamp, 2, level + 1);
1365
} else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1366
dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1367
dump_hex(texsamp, 4, level + 1);
1369
} else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1370
dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1371
dump_hex(texsamp, 4, level + 1);
1372
texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1378
dump_tex_const(uint32_t *texconst, int num_unit, int level)
1380
for (int i = 0; i < num_unit; i++) {
1381
/* work-around to reduce noise for opencl blob which always
1382
* writes the max # regardless of # of textures used
1384
if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1385
(texconst[2] == 0) && (texconst[3] == 0))
1388
if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1389
dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1390
dump_hex(texconst, 4, level + 1);
1392
} else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1393
dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1394
if (options->dump_textures) {
1395
uint32_t addr = texconst[4] & ~0x1f;
1396
dump_gpuaddr(addr, level - 2);
1398
dump_hex(texconst, 8, level + 1);
1400
} else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1401
dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1402
if (options->dump_textures) {
1404
(((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1405
dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1407
dump_hex(texconst, 12, level + 1);
1409
} else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1410
dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1411
if (options->dump_textures) {
1413
(((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1414
dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1416
dump_hex(texconst, 16, level + 1);
1423
cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1425
gl_shader_stage stage;
1427
enum state_src_t src;
1428
uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1429
uint64_t ext_src_addr;
1433
if (quiet(2) && !options->script)
1436
if (options->gpu_id >= 600)
1437
a6xx_get_state_type(dwords, &stage, &state, &src);
1438
else if (options->gpu_id >= 400)
1439
a4xx_get_state_type(dwords, &stage, &state, &src);
1441
a3xx_get_state_type(dwords, &stage, &state, &src);
1444
case STATE_SRC_DIRECT:
1447
case STATE_SRC_INDIRECT:
1449
ext_src_addr = dwords[1] & 0xfffffffc;
1450
ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1452
ext_src_addr = dwords[1] & 0xfffffffc;
1456
case STATE_SRC_BINDLESS: {
1457
const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1458
? regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR")
1459
: regbase("HLSQ_BINDLESS_BASE[0].ADDR");
1462
const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1463
ext_src_addr = reg_val(reg) & 0xfffffffc;
1464
ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1466
const unsigned reg = base_reg + (dwords[1] >> 28);
1467
ext_src_addr = reg_val(reg) & 0xfffffffc;
1470
ext_src_addr += 4 * (dwords[1] & 0xffffff);
1476
contents = hostptr(ext_src_addr);
1478
contents = is_64b() ? dwords + 3 : dwords + 2;
1485
const char *ext = NULL;
1490
if (options->gpu_id >= 400)
1492
else if (options->gpu_id >= 300)
1497
* note: num_unit seems to be # of instruction groups, where
1498
* an instruction group has 4 64bit instructions.
1500
if (stage == MESA_SHADER_VERTEX) {
1502
} else if (stage == MESA_SHADER_GEOMETRY) {
1504
} else if (stage == MESA_SHADER_COMPUTE) {
1506
} else if (stage == MESA_SHADER_FRAGMENT) {
1511
try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1514
/* dump raw shader: */
1516
dump_shader(ext, contents, num_unit * 2 * 4);
1520
case SHADER_CONST: {
1526
* note: num_unit seems to be # of pairs of dwords??
1529
if (options->gpu_id >= 400)
1532
dump_float(contents, num_unit * 2, level + 1);
1533
dump_hex(contents, num_unit * 2, level + 1);
1538
uint32_t *addrs = contents;
1543
/* mipmap consts block just appears to be array of num_unit gpu addr's: */
1544
for (i = 0; i < num_unit; i++) {
1545
void *ptr = hostptr(addrs[i]);
1546
printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1547
if (options->dump_textures) {
1548
printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1549
dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1555
dump_tex_samp(contents, src, num_unit, level);
1559
dump_tex_const(contents, num_unit, level);
1563
uint32_t *ssboconst = (uint32_t *)contents;
1565
for (i = 0; i < num_unit; i++) {
1567
if (400 <= options->gpu_id && options->gpu_id < 500) {
1568
dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1569
} else if (500 <= options->gpu_id && options->gpu_id < 600) {
1570
dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1571
} else if (600 <= options->gpu_id && options->gpu_id < 700) {
1573
dump_domain(ssboconst, 16, level + 2, "A6XX_TEX_CONST");
1575
dump_hex(ssboconst, sz, level + 1);
1581
uint32_t *ssboconst = (uint32_t *)contents;
1583
for (i = 0; i < num_unit; i++) {
1584
if (400 <= options->gpu_id && options->gpu_id < 500)
1585
dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1586
else if (500 <= options->gpu_id && options->gpu_id < 600)
1587
dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1588
dump_hex(ssboconst, 2, level + 1);
1594
uint32_t *ssboconst = (uint32_t *)contents;
1596
for (i = 0; i < num_unit; i++) {
1597
/* TODO a4xx and a5xx might be same: */
1598
if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1599
dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1600
dump_hex(ssboconst, 2, level + 1);
1602
if (options->dump_textures) {
1604
(((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1605
dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1612
uint32_t *uboconst = (uint32_t *)contents;
1614
for (i = 0; i < num_unit; i++) {
1615
// TODO probably similar on a4xx..
1616
if (500 <= options->gpu_id && options->gpu_id < 600)
1617
dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1618
else if (600 <= options->gpu_id && options->gpu_id < 700)
1619
dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1620
dump_hex(uboconst, 2, level + 1);
1621
uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1625
case UNKNOWN_DWORDS: {
1628
dump_hex(contents, num_unit, level + 1);
1631
case UNKNOWN_2DWORDS: {
1634
dump_hex(contents, num_unit * 2, level + 1);
1637
case UNKNOWN_4DWORDS: {
1640
dump_hex(contents, num_unit * 4, level + 1);
1647
dump_hex(contents, num_unit, level + 1);
1653
cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1655
bin_x1 = dwords[1] & 0xffff;
1656
bin_y1 = dwords[1] >> 16;
1657
bin_x2 = dwords[2] & 0xffff;
1658
bin_y2 = dwords[2] >> 16;
1662
dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1666
uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1667
uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1668
static const char *filter[] = {
1673
static const char *clamp[] = {
1678
static const char swiznames[] = "xyzw01??";
1680
/* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1682
/* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1683
* RFMode=ZeroClamp-1, Dim=1:2d, pitch
1685
p = (dwords[0] >> 22) << 5;
1686
clamp_x = (dwords[0] >> 10) & 0x3;
1687
clamp_y = (dwords[0] >> 13) & 0x3;
1688
clamp_z = (dwords[0] >> 16) & 0x3;
1690
/* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1691
* NearestClamp=1:OGL Mode
1693
parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1695
/* Width, Height, EndianSwap=0:None */
1696
w = (dwords[2] & 0x1fff) + 1;
1697
h = ((dwords[2] >> 13) & 0x1fff) + 1;
1699
/* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1702
mag = (dwords[3] >> 19) & 0x3;
1703
min = (dwords[3] >> 21) & 0x3;
1704
swiz = (dwords[3] >> 1) & 0xfff;
1706
/* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1711
/* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1712
* Dim=1:2d, MipPacking=0
1714
parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1716
printf("%sset texture const %04x\n", levels[level], val);
1717
printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1718
clamp[clamp_y], clamp[clamp_z]);
1719
printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1721
printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1722
swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1723
swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1724
printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1725
levels[level + 1], gpuaddr, flags, w, h, p,
1726
rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1727
printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1732
dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1736
printf("%sset shader const %04x\n", levels[level], val);
1737
for (i = 0; i < sizedwords;) {
1738
uint32_t gpuaddr, flags;
1739
parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1740
void *addr = hostptr(gpuaddr);
1743
rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1744
uint32_t size = dwords[i++];
1745
printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1747
// TODO maybe dump these as bytes instead of dwords?
1748
size = (size + 3) / 4; // for now convert to dwords
1749
dump_hex(addr, min(size, 64), level + 1);
1750
if (size > min(size, 64))
1751
printf("%s\t\t...\n", levels[level + 1]);
1752
dump_float(addr, min(size, 64), level + 1);
1753
if (size > min(size, 64))
1754
printf("%s\t\t...\n", levels[level + 1]);
1760
cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1762
uint32_t val = dwords[0] & 0xffff;
1763
switch ((dwords[0] >> 16) & 0xf) {
1765
dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1768
/* need to figure out how const space is partitioned between
1769
* attributes, textures, etc..
1772
dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1774
dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1778
printf("%sset bool const %04x\n", levels[level], val);
1781
printf("%sset loop const %04x\n", levels[level], val);
1785
if (dwords[0] & 0x80000000) {
1786
uint32_t srcreg = dwords[1];
1787
uint32_t dstval = dwords[2];
1789
/* TODO: not sure what happens w/ payload != 2.. */
1790
assert(sizedwords == 3);
1791
assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1793
/* note: rnn_regname uses a static buf so we can't do
1794
* two regname() calls for one printf..
1796
printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1797
printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1799
dstval += type0_reg_vals[srcreg];
1801
dump_registers(val, &dstval, 1, level + 1);
1803
dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1809
static void dump_register_summary(int level);
1812
cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1814
const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1815
printl(2, "%sevent %s\n", levels[level], name);
1817
if (name && (options->gpu_id > 500)) {
1819
snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1820
if (!strcmp(name, "BLIT")) {
1821
do_query(eventname, 0);
1823
dump_register_summary(level);
1829
dump_register_summary(int level)
1832
bool saved_summary = summary;
1837
/* dump current state of registers: */
1838
printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1839
for (i = 0; i < regcnt(); i++) {
1840
uint32_t regbase = i;
1841
uint32_t lastval = reg_val(regbase);
1842
/* skip registers that haven't been updated since last draw/blit: */
1843
if (!(options->allregs || reg_rewritten(regbase)))
1845
if (!reg_written(regbase))
1847
if (lastval != lastvals[regbase]) {
1849
lastvals[regbase] = lastval;
1853
if (reg_rewritten(regbase)) {
1858
printl(2, "\t%08x", lastval);
1860
dump_register(regbase, lastval, level);
1869
summary = saved_summary;
1873
draw_indx_common(uint32_t *dwords, int level)
1875
uint32_t prim_type = dwords[1] & 0x1f;
1876
uint32_t source_select = (dwords[1] >> 6) & 0x3;
1877
uint32_t num_indices = dwords[2];
1878
const char *primtype;
1880
primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1882
do_query(primtype, num_indices);
1884
printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
1885
printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype, prim_type);
1886
printl(2, "%ssource_select: %s (%d)\n", levels[level],
1887
rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
1888
printl(2, "%snum_indices: %d\n", levels[level], num_indices);
1890
vertices += num_indices;
1897
enum pc_di_index_size {
1899
INDEX_SIZE_16_BIT = 0,
1900
INDEX_SIZE_32_BIT = 1,
1901
INDEX_SIZE_8_BIT = 2,
1902
INDEX_SIZE_INVALID = 0,
1906
cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1908
uint32_t num_indices = draw_indx_common(dwords, level);
1912
/* if we have an index buffer, dump that: */
1913
if (sizedwords == 5) {
1914
void *ptr = hostptr(dwords[3]);
1915
printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
1916
printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
1918
enum pc_di_index_size size =
1919
((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1922
printf("%sidxs: ", levels[level]);
1923
if (size == INDEX_SIZE_8_BIT) {
1925
for (i = 0; i < dwords[4]; i++)
1926
printf(" %u", idx[i]);
1927
} else if (size == INDEX_SIZE_16_BIT) {
1928
uint16_t *idx = ptr;
1929
for (i = 0; i < dwords[4] / 2; i++)
1930
printf(" %u", idx[i]);
1931
} else if (size == INDEX_SIZE_32_BIT) {
1932
uint32_t *idx = ptr;
1933
for (i = 0; i < dwords[4] / 4; i++)
1934
printf(" %u", idx[i]);
1937
dump_hex(ptr, dwords[4] / 4, level + 1);
1942
/* don't bother dumping registers for the dummy draw_indx's.. */
1943
if (num_indices > 0)
1944
dump_register_summary(level);
1950
cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
1952
uint32_t num_indices = draw_indx_common(dwords, level);
1953
enum pc_di_index_size size =
1954
((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1955
void *ptr = &dwords[3];
1960
/* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1963
printf("%sidxs: ", levels[level]);
1964
if (size == INDEX_SIZE_8_BIT) {
1966
for (i = 0; i < num_indices; i++)
1967
printf(" %u", idx[i]);
1969
} else if (size == INDEX_SIZE_16_BIT) {
1970
uint16_t *idx = ptr;
1971
for (i = 0; i < num_indices; i++)
1972
printf(" %u", idx[i]);
1973
sz = num_indices * 2;
1974
} else if (size == INDEX_SIZE_32_BIT) {
1975
uint32_t *idx = ptr;
1976
for (i = 0; i < num_indices; i++)
1977
printf(" %u", idx[i]);
1978
sz = num_indices * 4;
1981
dump_hex(ptr, sz / 4, level + 1);
1984
/* don't bother dumping registers for the dummy draw_indx's.. */
1985
if (num_indices > 0)
1986
dump_register_summary(level);
1990
cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
1992
uint32_t num_indices = dwords[2];
1993
uint32_t prim_type = dwords[0] & 0x1f;
1995
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
1998
/* don't bother dumping registers for the dummy draw_indx's.. */
1999
if (num_indices > 0)
2000
dump_register_summary(level);
2004
cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2006
uint32_t prim_type = dwords[0] & 0x1f;
2009
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2013
addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2016
dump_gpuaddr_size(addr, level, 0x10, 2);
2019
addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2022
dump_gpuaddr_size(addr, level, 0x10, 2);
2024
dump_register_summary(level);
2028
cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2030
uint32_t prim_type = dwords[0] & 0x1f;
2033
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2036
addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2037
dump_gpuaddr_size(addr, level, 0x10, 2);
2039
dump_register_summary(level);
2043
cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2045
uint32_t prim_type = dwords[0] & 0x1f;
2046
uint32_t count = dwords[2];
2048
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2051
struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2052
uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2053
uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2054
uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2057
uint64_t count_addr =
2058
((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2059
uint32_t *buf = hostptr(count_addr);
2061
/* Don't print more draws than this if we don't know the indirect
2062
* count. It's possible the user will give ~0 or some other large
2063
* value, expecting the GPU to fill in the draw count, and we don't
2064
* want to print a gazillion draws in that case:
2066
const uint32_t max_draw_count = 0x100;
2068
/* Assume the indirect count is garbage if it's larger than this
2069
* (quite large) value or 0. Hopefully this catches most cases.
2071
const uint32_t max_indirect_draw_count = 0x10000;
2074
printf("%sindirect count: %u\n", levels[level], *buf);
2075
if (*buf == 0 || *buf > max_indirect_draw_count) {
2077
count = min(count, max_draw_count);
2080
count = min(count, *buf);
2083
count = min(count, max_draw_count);
2087
if (addr_dword && stride_dword) {
2089
((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2090
uint32_t stride = dwords[stride_dword];
2092
for (unsigned i = 0; i < count; i++, addr += stride) {
2093
printf("%sdraw %d:\n", levels[level], i);
2094
dump_gpuaddr_size(addr, level, 0x10, 2);
2098
dump_register_summary(level);
2102
cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2104
do_query("COMPUTE", 1);
2105
dump_register_summary(level);
2109
cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2111
const char *buf = (void *)dwords;
2117
// blob doesn't use CP_NOP for string_marker but it does
2118
// use it for things that end up looking like, but aren't
2120
if (!options->decode_markers)
2123
for (i = 0; i < 4 * sizedwords; i++) {
2126
if (isascii(buf[i]))
2127
printf("%c", buf[i]);
2133
cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2135
/* traverse indirect buffers */
2138
uint32_t *ptr = NULL;
2141
/* a5xx+.. high 32b of gpu addr, then size: */
2143
ibaddr |= ((uint64_t)dwords[1]) << 32;
2152
printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2154
printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2156
printf("%sibsize:%08x\n", levels[level], ibsize);
2159
if (options->once && has_dumped(ibaddr, enable_mask))
2162
/* 'query-compare' mode implies 'once' mode, although we need only to
2163
* process the cmdstream for *any* enable_mask mode, since we are
2164
* comparing binning vs draw reg values at the same time, ie. it is
2165
* not useful to process the same draw in both binning and draw pass.
2167
if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2170
/* map gpuaddr back to hostptr: */
2171
ptr = hostptr(ibaddr);
2174
/* If the GPU hung within the target IB, the trigger point will be
2175
* just after the current CP_INDIRECT_BUFFER. Because the IB is
2176
* executed but never returns. Account for this by checking if
2179
highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
2182
ibs[ib].base = ibaddr;
2183
ibs[ib].size = ibsize;
2185
dump_commands(ptr, ibsize, level);
2188
fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2193
cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
2198
uint32_t *ptr = NULL;
2200
loopcount = dwords[0];
2202
ibaddr |= ((uint64_t)dwords[2]) << 32;
2205
/* map gpuaddr back to hostptr: */
2206
ptr = hostptr(ibaddr);
2209
/* If the GPU hung within the target IB, the trigger point will be
2210
* just after the current CP_START_BIN. Because the IB is
2211
* executed but never returns. Account for this by checking if
2214
highlight_gpuaddr(gpuaddr(&dwords[5]));
2216
/* TODO: we should duplicate the body of the loop after each bin, so
2217
* that draws get the correct state. We should also figure out if there
2218
* are any registers that can tell us what bin we're in when we hang so
2219
* that crashdec points to the right place.
2222
for (uint32_t i = 0; i < loopcount; i++) {
2223
ibs[ib].base = ibaddr;
2224
ibs[ib].size = ibsize;
2225
printf("%sbin %u\n", levels[level], i);
2226
dump_commands(ptr, ibsize, level);
2232
fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2237
cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2243
cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2249
uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2250
printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2251
dump_hex(&dwords[2], sizedwords - 2, level + 1);
2253
if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2254
dump_commands(&dwords[2], sizedwords - 2, level + 1);
2256
uint32_t gpuaddr = dwords[0];
2257
printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2258
dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2263
cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2265
uint32_t val = dwords[0] & 0xffff;
2266
uint32_t and = dwords[1];
2267
uint32_t or = dwords[2];
2268
printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2271
printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2273
reg_set(val, (reg_val(val) & and) | or);
2277
cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2279
uint32_t val = dwords[0] & 0xffff;
2280
printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2285
uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2286
printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2287
void *ptr = hostptr(gpuaddr);
2289
uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2290
dump_hex(ptr, cnt, level + 1);
2295
uint16_t enable_mask;
2301
struct draw_state state[32];
2303
#define FLAG_DIRTY 0x1
2304
#define FLAG_DISABLE 0x2
2305
#define FLAG_DISABLE_ALL_GROUPS 0x4
2306
#define FLAG_LOAD_IMMED 0x8
2308
static int draw_mode;
2311
disable_group(unsigned group_id)
2313
struct draw_state *ds = &state[group_id];
2314
memset(ds, 0, sizeof(*ds));
2318
disable_all_groups(void)
2320
for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2325
load_group(unsigned group_id, int level)
2327
struct draw_state *ds = &state[group_id];
2332
printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2333
printl(2, "%scount: %d\n", levels[level], ds->count);
2334
printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2335
printl(2, "%sflags: %x\n", levels[level], ds->flags);
2337
if (options->gpu_id >= 600) {
2338
printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2340
if (!(ds->enable_mask & enable_mask)) {
2341
printl(2, "%s\tskipped!\n\n", levels[level]);
2346
void *ptr = hostptr(ds->addr);
2349
dump_hex(ptr, ds->count, level + 1);
2352
dump_commands(ptr, ds->count, level + 1);
2358
load_all_groups(int level)
2360
/* sanity check, we should never recursively hit recursion here, and if
2361
* we do bad things happen:
2363
static bool loading_groups = false;
2364
if (loading_groups) {
2365
printf("ERROR: nothing in draw state should trigger recursively loading "
2369
loading_groups = true;
2370
for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2371
load_group(i, level);
2372
loading_groups = false;
2374
/* in 'query-compare' mode, defer disabling all groups until we have a
2375
* chance to process the query:
2377
if (!options->query_compare)
2378
disable_all_groups();
2382
cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2386
for (i = 0; i < sizedwords;) {
2387
struct draw_state *ds;
2388
uint32_t count = dwords[i] & 0xffff;
2389
uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2390
uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2391
uint32_t flags = (dwords[i] >> 16) & 0xf;
2395
addr = dwords[i + 1];
2396
addr |= ((uint64_t)dwords[i + 2]) << 32;
2399
addr = dwords[i + 1];
2403
if (flags & FLAG_DISABLE_ALL_GROUPS) {
2404
disable_all_groups();
2408
if (flags & FLAG_DISABLE) {
2409
disable_group(group_id);
2413
assert(group_id < ARRAY_SIZE(state));
2414
disable_group(group_id);
2416
ds = &state[group_id];
2418
ds->enable_mask = enable_mask;
2423
if (flags & FLAG_LOAD_IMMED) {
2424
load_group(group_id, level);
2425
disable_group(group_id);
2431
cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2433
draw_mode = dwords[0];
2436
/* execute compute shader */
2438
cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2440
do_query("compute", 0);
2441
dump_register_summary(level);
2445
cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2450
addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2455
printl(3, "%saddr: %016llx\n", levels[level], addr);
2456
dump_gpuaddr_size(addr, level, 0x10, 2);
2458
do_query("compute", 0);
2459
dump_register_summary(level);
2463
cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2465
render_mode = rnn_enumname(rnn, "a6xx_marker", dwords[0] & 0xf);
2467
if (!strcmp(render_mode, "RM6_BINNING")) {
2468
enable_mask = MODE_BINNING;
2469
} else if (!strcmp(render_mode, "RM6_GMEM")) {
2470
enable_mask = MODE_GMEM;
2471
} else if (!strcmp(render_mode, "RM6_BYPASS")) {
2472
enable_mask = MODE_BYPASS;
2477
cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2484
/* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2485
* not sure if this can come in different sizes.
2487
* First ptr doesn't seem to be cmdstream, second one does.
2489
* Comment from downstream kernel:
2491
* SRM -- set render mode (ex binning, direct render etc)
2492
* SRM is set by UMD usually at start of IB to tell CP the type of
2494
* KMD needs to set SRM to NULL to indicate CP that rendering is
2496
* ------------------------------------------------------------------
2498
* Seems to always be one of these two:
2499
* 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2500
* 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2505
assert(options->gpu_id >= 500);
2507
render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2509
if (sizedwords == 1)
2513
addr |= ((uint64_t)dwords[2]) << 32;
2517
dump_gpuaddr(addr, level + 1);
2519
if (sizedwords == 5)
2522
assert(sizedwords == 8);
2526
addr |= ((uint64_t)dwords[7]) << 32;
2528
printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2529
printl(3, "%slen: 0x%x\n", levels[level], len);
2531
ptr = hostptr(addr);
2536
dump_commands(ptr, len, level + 1);
2538
dump_hex(ptr, len, level + 1);
2544
cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2550
assert(options->gpu_id >= 500);
2552
assert(sizedwords == 8);
2555
addr |= ((uint64_t)dwords[6]) << 32;
2558
printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2559
printl(3, "%slen: 0x%x\n", levels[level], len);
2561
ptr = hostptr(addr);
2566
dump_commands(ptr, len, level + 1);
2568
dump_hex(ptr, len, level + 1);
2574
cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2576
do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2578
dump_register_summary(level);
2582
cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2586
/* NOTE: seems to write same reg multiple times.. not sure if different parts
2587
* of these are triggered by the FLUSH_SO_n events?? (if that is what they
2590
bool saved_summary = summary;
2593
for (i = 0; i < sizedwords; i += 2) {
2594
dump_register(dwords[i + 0], dwords[i + 1], level + 1);
2595
reg_set(dwords[i + 0], dwords[i + 1]);
2598
summary = saved_summary;
2602
cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2604
uint32_t reg = dwords[1] & 0xffff;
2606
dump_register(reg, dwords[2], level + 1);
2607
reg_set(reg, dwords[2]);
2611
cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2614
uint32_t size = dwords[2] & 0xffff;
2617
addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2620
printf("%saddr=%" PRIx64 "\n", levels[level], addr);
2623
ptr = hostptr(addr);
2625
dump_commands(ptr, size, level + 1);
2630
cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2632
skip_ib2_enable_global = dwords[0];
2636
cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2638
skip_ib2_enable_local = dwords[0];
2641
#define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2642
static const struct type3_op {
2644
void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2646
bool load_all_groups;
2650
CP(INDIRECT_BUFFER, cp_indirect),
2651
CP(INDIRECT_BUFFER_PFD, cp_indirect),
2652
CP(WAIT_FOR_IDLE, cp_wfi),
2653
CP(REG_RMW, cp_rmw),
2654
CP(REG_TO_MEM, cp_reg_mem),
2655
CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2656
CP(MEM_WRITE, cp_mem_write),
2657
CP(EVENT_WRITE, cp_event_write),
2658
CP(RUN_OPENCL, cp_run_cl),
2659
CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2660
CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2661
CP(SET_CONSTANT, cp_set_const),
2662
CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2663
CP(WIDE_REG_WRITE, cp_wide_reg_write),
2666
CP(LOAD_STATE, cp_load_state),
2667
CP(SET_BIN, cp_set_bin),
2670
CP(LOAD_STATE4, cp_load_state),
2671
CP(SET_DRAW_STATE, cp_set_draw_state),
2672
CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2673
CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2674
CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2677
CP(SET_RENDER_MODE, cp_set_render_mode),
2678
CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2680
CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2681
CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2682
CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2683
CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2684
CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2685
CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2688
CP(LOAD_STATE6_GEOM, cp_load_state),
2689
CP(LOAD_STATE6_FRAG, cp_load_state),
2690
CP(LOAD_STATE6, cp_load_state),
2691
CP(SET_MODE, cp_set_mode),
2692
CP(SET_MARKER, cp_set_marker),
2693
CP(REG_WRITE, cp_reg_write),
2695
CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2697
CP(START_BIN, cp_start_bin),
2701
noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2705
static const struct type3_op *
2706
get_type3_op(unsigned opc)
2708
static const struct type3_op dummy_op = {
2711
const char *name = pktname(opc);
2716
for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2717
if (!strcmp(name, type3_op[i].name))
2718
return &type3_op[i];
2724
dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2726
int dwords_left = sizedwords;
2727
uint32_t count = 0; /* dword count including packet header */
2732
printf("NULL cmd buffer!\n");
2736
assert(ib < ARRAY_SIZE(draws));
2739
while (dwords_left > 0) {
2741
current_draw_count = draw_count;
2743
/* hack, this looks like a -1 underflow, in some versions
2744
* when it tries to write zero registers via pkt0
2746
// if ((dwords[0] >> 16) == 0xffff)
2749
if (pkt_is_type0(dwords[0])) {
2751
count = type0_pkt_size(dwords[0]) + 1;
2752
val = type0_pkt_offset(dwords[0]);
2753
assert(val < regcnt());
2754
printl(3, "%swrite %s%s (%04x)\n", levels[level + 1], regname(val, 1),
2755
(dwords[0] & 0x8000) ? " (same register)" : "", val);
2756
dump_registers(val, dwords + 1, count - 1, level + 2);
2758
dump_hex(dwords, count, level + 1);
2759
} else if (pkt_is_type4(dwords[0])) {
2760
/* basically the same(ish) as type0 prior to a5xx */
2762
count = type4_pkt_size(dwords[0]) + 1;
2763
val = type4_pkt_offset(dwords[0]);
2764
assert(val < regcnt());
2765
printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
2767
dump_registers(val, dwords + 1, count - 1, level + 2);
2769
dump_hex(dwords, count, level + 1);
2771
} else if (pkt_is_type1(dwords[0])) {
2774
val = dwords[0] & 0xfff;
2775
printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2776
dump_registers(val, dwords+1, 1, level+2);
2777
val = (dwords[0] >> 12) & 0xfff;
2778
printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2779
dump_registers(val, dwords+2, 1, level+2);
2781
dump_hex(dwords, count, level+1);
2782
} else if (pkt_is_type2(dwords[0])) {
2784
printf("%sNOP\n", levels[level+1]);
2787
dump_hex(dwords, count, level+1);
2789
} else if (pkt_is_type3(dwords[0])) {
2790
count = type3_pkt_size(dwords[0]) + 1;
2791
val = cp_type3_opcode(dwords[0]);
2792
const struct type3_op *op = get_type3_op(val);
2793
if (op->options.load_all_groups)
2794
load_all_groups(level + 1);
2796
const char *name = pktname(val);
2798
printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
2799
rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
2800
count, (dwords[0] & 0x1) ? " (predicated)" : "");
2803
dump_domain(dwords + 1, count - 1, level + 2, name);
2804
op->fxn(dwords + 1, count - 1, level + 1);
2806
dump_hex(dwords, count, level + 1);
2807
} else if (pkt_is_type7(dwords[0])) {
2808
count = type7_pkt_size(dwords[0]) + 1;
2809
val = cp_type7_opcode(dwords[0]);
2810
const struct type3_op *op = get_type3_op(val);
2811
if (op->options.load_all_groups)
2812
load_all_groups(level + 1);
2814
const char *name = pktname(val);
2816
printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
2817
rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
2821
/* special hack for two packets that decode the same way
2824
if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
2825
!strcmp(name, "CP_LOAD_STATE6_GEOM"))
2826
name = "CP_LOAD_STATE6";
2827
dump_domain(dwords + 1, count - 1, level + 2, name);
2829
op->fxn(dwords + 1, count - 1, level + 1);
2831
dump_hex(dwords, count, level + 1);
2832
} else if (pkt_is_type2(dwords[0])) {
2834
printl(3, "%snop\n", levels[level + 1]);
2836
/* for 5xx+ we can do a passable job of looking for start of next valid
2838
if (options->gpu_id >= 500) {
2839
while (dwords_left > 0) {
2840
if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
2842
printf("bad type! %08x\n", dwords[0]);
2847
printf("bad type! %08x\n", dwords[0]);
2853
dwords_left -= count;
2856
if (dwords_left < 0)
2857
printf("**** this ain't right!! dwords_left=%d\n", dwords_left);