71
95
OUT_RING(ring, target->stride);
98
static inline unsigned
99
max_indices(const struct pipe_draw_info *info, unsigned index_offset)
101
struct pipe_resource *idx = info->index.resource;
103
assert((info->index_size == 1) ||
104
(info->index_size == 2) ||
105
(info->index_size == 4));
107
/* Conceptually we divide by the index_size. But if we had
108
* log2(index_size) we could convert that into a right-shift
109
* instead. Conveniently the index_size will only be 1, 2,
110
* or 4. And dividing by two (right-shift by one) gives us
111
* the same answer for those three values. So instead of
112
* divide we can do two right-shifts.
114
unsigned index_size_shift = info->index_size >> 1;
115
return (idx->width0 - index_offset) >> index_size_shift;
118
template <draw_type DRAW>
75
draw_emit_indirect(struct fd_ringbuffer *ring,
120
draw_emit_indirect(struct fd_context *ctx,
121
struct fd_ringbuffer *ring,
76
122
struct CP_DRAW_INDX_OFFSET_0 *draw0,
77
123
const struct pipe_draw_info *info,
78
124
const struct pipe_draw_indirect_info *indirect,
79
unsigned index_offset)
125
unsigned index_offset, uint32_t driver_param)
81
127
struct fd_resource *ind = fd_resource(indirect->buffer);
83
if (info->index_size) {
84
struct pipe_resource *idx = info->index.resource;
85
unsigned max_indices = (idx->width0 - index_offset) / info->index_size;
87
OUT_PKT(ring, CP_DRAW_INDX_INDIRECT, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
88
A5XX_CP_DRAW_INDX_INDIRECT_INDX_BASE(fd_resource(idx)->bo,
90
A5XX_CP_DRAW_INDX_INDIRECT_3(.max_indices = max_indices),
91
A5XX_CP_DRAW_INDX_INDIRECT_INDIRECT(ind->bo, indirect->offset));
93
OUT_PKT(ring, CP_DRAW_INDIRECT, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
94
A5XX_CP_DRAW_INDIRECT_INDIRECT(ind->bo, indirect->offset));
129
if (DRAW == DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED) {
130
OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 11);
131
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
133
(A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDIRECT_COUNT_INDEXED)
134
| A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
135
struct fd_resource *count_buf = fd_resource(indirect->indirect_draw_count);
136
struct pipe_resource *idx = info->index.resource;
137
OUT_RING(ring, indirect->draw_count);
138
OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
139
OUT_RING(ring, max_indices(info, index_offset));
140
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
141
OUT_RELOC(ring, count_buf->bo, indirect->indirect_draw_count_offset, 0, 0);
142
OUT_RING(ring, indirect->stride);
143
} else if (DRAW == DRAW_INDIRECT_OP_INDEXED) {
144
OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 9);
145
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
147
(A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDEXED)
148
| A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
149
struct pipe_resource *idx = info->index.resource;
150
OUT_RING(ring, indirect->draw_count);
152
OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
154
OUT_RING(ring, max_indices(info, index_offset));
155
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
156
OUT_RING(ring, indirect->stride);
157
} else if(DRAW == DRAW_INDIRECT_OP_INDIRECT_COUNT) {
158
OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 8);
159
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
161
(A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDIRECT_COUNT)
162
| A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
163
struct fd_resource *count_buf = fd_resource(indirect->indirect_draw_count);
164
OUT_RING(ring, indirect->draw_count);
165
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
166
OUT_RELOC(ring, count_buf->bo, indirect->indirect_draw_count_offset, 0, 0);
167
OUT_RING(ring, indirect->stride);
168
} else if (DRAW == DRAW_INDIRECT_OP_NORMAL) {
169
OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 6);
170
OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
172
(A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_NORMAL)
173
| A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
174
OUT_RING(ring, indirect->draw_count);
175
OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
176
OUT_RING(ring, indirect->stride);
180
template <draw_type DRAW>
99
182
draw_emit(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
100
183
const struct pipe_draw_info *info,
101
184
const struct pipe_draw_start_count_bias *draw, unsigned index_offset)
103
if (info->index_size) {
186
if (DRAW == DRAW_DIRECT_OP_INDEXED) {
104
187
assert(!info->has_user_indices);
106
189
struct pipe_resource *idx_buffer = info->index.resource;
107
unsigned max_indices =
108
(idx_buffer->width0 - index_offset) / info->index_size;
110
191
OUT_PKT(ring, CP_DRAW_INDX_OFFSET, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
111
192
CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count),
413
529
emit_marker6(ring, 7);
414
fd_reset_wfi(ctx->batch);
416
531
flush_streamout(ctx, &emit);
418
533
fd_context_all_clean(ctx);
422
fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) assert_dt
424
struct fd_ringbuffer *ring;
425
struct fd_screen *screen = batch->ctx->screen;
427
ring = fd_batch_get_prologue(batch);
429
emit_marker6(ring, 7);
430
OUT_PKT7(ring, CP_SET_MARKER, 1);
431
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
432
emit_marker6(ring, 7);
436
fd6_emit_ccu_cntl(ring, screen, false);
439
A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
440
.ds_state = true, .gs_state = true,
441
.fs_state = true, .cs_state = true,
442
.cs_ibo = true, .gfx_ibo = true,
443
.gfx_shared_const = true,
444
.cs_bindless = 0x1f, .gfx_bindless = 0x1f));
446
emit_marker6(ring, 7);
447
OUT_PKT7(ring, CP_SET_MARKER, 1);
448
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
449
emit_marker6(ring, 7);
451
OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
454
OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
455
OUT_RING(ring, 0x00000000);
456
OUT_RING(ring, 0x00000000);
457
OUT_RING(ring, 0x00000000);
458
OUT_RING(ring, 0x00000000);
459
OUT_RING(ring, 0x00000000);
460
OUT_RING(ring, 0x00000000);
461
OUT_RING(ring, 0x00000000);
462
OUT_RING(ring, 0x00000000);
463
OUT_RING(ring, 0x00000000);
464
OUT_RING(ring, 0x00000000);
465
OUT_RING(ring, 0x00000000);
466
OUT_RING(ring, 0x00000000);
467
OUT_RING(ring, 0x00000000);
469
OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1);
470
OUT_RING(ring, 0x0000f410);
472
OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
474
A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(FMT6_16_UNORM) | 0x4f00080);
476
OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
477
OUT_RING(ring, A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(FMT6_16_UNORM) | 0x4f00080);
479
fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
480
fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
483
OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
484
OUT_RING(ring, fui(depth));
485
OUT_RING(ring, 0x00000000);
486
OUT_RING(ring, 0x00000000);
487
OUT_RING(ring, 0x00000000);
489
OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
490
OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_16_UNORM) |
491
A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
492
A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
493
OUT_RELOC(ring, zsbuf->lrz, 0, 0, 0);
494
OUT_RING(ring, A6XX_RB_2D_DST_PITCH(zsbuf->lrz_pitch * 2).value);
495
OUT_RING(ring, 0x00000000);
496
OUT_RING(ring, 0x00000000);
497
OUT_RING(ring, 0x00000000);
498
OUT_RING(ring, 0x00000000);
499
OUT_RING(ring, 0x00000000);
501
OUT_REG(ring, A6XX_GRAS_2D_SRC_TL_X(0), A6XX_GRAS_2D_SRC_BR_X(0),
502
A6XX_GRAS_2D_SRC_TL_Y(0), A6XX_GRAS_2D_SRC_BR_Y(0));
504
OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
505
OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
506
OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) |
507
A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1));
509
fd6_event_write(batch, ring, (enum vgt_event_type)0x3f, false);
511
if (screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit != screen->info->a6xx.magic.RB_DBG_ECO_CNTL) {
512
/* This a non-context register, so we have to WFI before changing. */
514
OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
515
OUT_RING(ring, screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
518
OUT_PKT7(ring, CP_BLIT, 1);
519
OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
521
if (screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit != screen->info->a6xx.magic.RB_DBG_ECO_CNTL) {
523
OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
524
OUT_RING(ring, screen->info->a6xx.magic.RB_DBG_ECO_CNTL);
527
fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
528
fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
529
fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
532
fd6_cache_inv(batch, ring);
536
template <chip CHIP, fd6_pipeline_type PIPELINE>
538
fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
539
unsigned drawid_offset,
540
const struct pipe_draw_indirect_info *indirect,
541
const struct pipe_draw_start_count_bias *draws,
543
unsigned index_offset)
546
/* Non-indirect case is where we are more likely to see a high draw rate: */
547
if (likely(!indirect)) {
548
if (info->index_size) {
549
draw_vbos<CHIP, PIPELINE, DRAW_DIRECT_OP_INDEXED>(
550
ctx, info, drawid_offset, NULL, draws, num_draws, index_offset);
552
draw_vbos<CHIP, PIPELINE, DRAW_DIRECT_OP_NORMAL>(
553
ctx, info, drawid_offset, NULL, draws, num_draws, index_offset);
555
} else if (indirect->count_from_stream_output) {
556
draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_XFB>(
557
ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
558
} else if (indirect->indirect_draw_count && info->index_size) {
559
draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED>(
560
ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
561
} else if (indirect->indirect_draw_count) {
562
draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_INDIRECT_COUNT>(
563
ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
564
} else if (info->index_size) {
565
draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_INDEXED>(
566
ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
568
draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_NORMAL>(
569
ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
575
fd6_update_draw(struct fd_context *ctx)
577
const uint32_t gs_tess_stages = BIT(MESA_SHADER_TESS_CTRL) |
578
BIT(MESA_SHADER_TESS_EVAL) | BIT(MESA_SHADER_GEOMETRY);
580
if (ctx->bound_shader_stages & gs_tess_stages) {
581
ctx->draw_vbos = fd6_draw_vbos<CHIP, HAS_TESS_GS>;
583
ctx->draw_vbos = fd6_draw_vbos<CHIP, NO_TESS_GS>;
536
is_z32(enum pipe_format format)
588
do_lrz_clear(struct fd_context *ctx, enum fd_buffer_mask buffers)
539
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
540
case PIPE_FORMAT_Z32_UNORM:
541
case PIPE_FORMAT_Z32_FLOAT:
590
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
595
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
597
return (buffers & FD_BUFFER_DEPTH) && zsbuf->lrz;
551
603
unsigned stencil) assert_dt
553
605
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
554
const bool has_depth = pfb->zsbuf;
606
struct fd_batch_subpass *subpass = ctx->batch->subpass;
555
607
unsigned color_buffers = buffers >> 2;
557
/* If we're clearing after draws, fallback to 3D pipe clears. We could
558
* use blitter clears in the draw batch but then we'd have to patch up the
559
* gmem offsets. This doesn't seem like a useful thing to optimize for
561
if (ctx->batch->num_draws > 0)
564
if (has_depth && (buffers & FD_BUFFER_DEPTH)) {
609
if (pfb->samples > 1) {
610
/* we need to do multisample clear on 3d pipe, so fallback to u_blitter.
611
* But we do this ourselves so that we can still benefit from LRZ, as
612
* normally zfunc==ALWAYS would invalidate LRZ. So we want to mark the
613
* LRZ state as valid *after* the fallback clear.
615
fd_blitter_clear(&ctx->base, (unsigned)buffers, color, depth, stencil);
618
/* If we are clearing after draws, split out a new subpass:
620
if (subpass->num_draws > 0) {
621
/* If we won't be able to do any fast-clears, avoid pointlessly
622
* splitting out a new subpass:
624
if (pfb->samples > 1 && !do_lrz_clear(ctx, buffers))
627
subpass = fd_batch_create_subpass(ctx->batch);
629
/* If doing an LRZ clear, replace the existing LRZ buffer with a
630
* freshly allocated one so that we have valid LRZ state for the
631
* new pass. Otherwise unconditional writes to the depth buffer
632
* would cause LRZ state to be invalid.
634
if (do_lrz_clear(ctx, buffers)) {
635
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
637
fd_bo_del(subpass->lrz);
638
subpass->lrz = fd_bo_new(ctx->screen->dev, fd_bo_size(zsbuf->lrz),
640
fd_bo_del(zsbuf->lrz);
641
zsbuf->lrz = fd_bo_ref(subpass->lrz);
645
if (do_lrz_clear(ctx, buffers)) {
565
646
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
566
if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) {
567
zsbuf->lrz_valid = true;
568
zsbuf->lrz_direction = FD_LRZ_UNKNOWN;
569
fd6_clear_lrz(ctx->batch, zsbuf, depth);
648
zsbuf->lrz_valid = true;
649
zsbuf->lrz_direction = FD_LRZ_UNKNOWN;
650
subpass->clear_depth = depth;
651
subpass->fast_cleared |= FD_BUFFER_LRZ;
653
STATIC_ASSERT((FD_BUFFER_LRZ & FD_BUFFER_ALL) == 0);
573
/* we need to do multisample clear on 3d pipe, so fallback to u_blitter: */
656
/* We've already done the fallback 3d clear: */
574
657
if (pfb->samples > 1)
577
660
u_foreach_bit (i, color_buffers)
578
ctx->batch->clear_color[i] = *color;
661
subpass->clear_color[i] = *color;
579
662
if (buffers & FD_BUFFER_DEPTH)
580
ctx->batch->clear_depth = depth;
663
subpass->clear_depth = depth;
581
664
if (buffers & FD_BUFFER_STENCIL)
582
ctx->batch->clear_stencil = stencil;
665
subpass->clear_stencil = stencil;
584
ctx->batch->fast_cleared |= buffers;
667
subpass->fast_cleared |= buffers;
590
fd6_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
674
fd6_draw_init(struct pipe_context *pctx)
675
disable_thread_safety_analysis
592
677
struct fd_context *ctx = fd_context(pctx);
593
ctx->draw_vbos = fd6_draw_vbos;
594
678
ctx->clear = fd6_clear;
679
ctx->update_draw = fd6_update_draw<CHIP>;
680
fd6_update_draw<CHIP>(ctx);
683
/* Teach the compiler about needed variants: */
684
template void fd6_draw_init<A6XX>(struct pipe_context *pctx);
685
template void fd6_draw_init<A7XX>(struct pipe_context *pctx);