620
619
b->shader->info.num_textures = 1;
621
620
BITSET_SET(b->shader->info.textures_used, 0);
623
tex->src[0].src_type = nir_tex_src_coord;
624
tex->src[0].src = nir_src_for_ssa(nir_load_var(b, in_coords));
622
tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord,
623
nir_load_var(b, in_coords));
625
624
tex->coord_components = coord_components;
627
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
626
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32);
628
627
nir_builder_instr_insert(b, &tex->instr);
630
629
nir_store_var(b, out_color, &tex->dest.ssa, 0xf);
677
676
nir_ssa_def *coord = nir_f2i32(b, nir_load_var(b, in_coords));
679
tex->src[0].src_type = nir_tex_src_coord;
680
tex->src[0].src = nir_src_for_ssa(coord);
678
tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
681
679
tex->coord_components = 2;
683
tex->src[1].src_type = nir_tex_src_ms_index;
684
tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(b));
681
tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
682
nir_load_sample_id(b));
686
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
684
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32);
687
685
nir_builder_instr_insert(b, &tex->instr);
689
687
nir_store_var(b, out_color, &tex->dest.ssa, 0xf);
1101
r3d_src_depth(struct tu_cmd_buffer *cmd,
1103
const struct tu_image_view *iview,
1106
uint32_t desc[A6XX_TEX_CONST_DWORDS];
1108
memcpy(desc, iview->view.descriptor, sizeof(desc));
1109
uint64_t va = iview->depth_base_addr;
1111
desc[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK |
1112
A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
1113
A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK |
1114
A6XX_TEX_CONST_0_SWAP__MASK);
1115
desc[0] |= A6XX_TEX_CONST_0_FMT(FMT6_32_FLOAT) |
1116
A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
1117
A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) |
1118
A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) |
1119
A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
1121
A6XX_TEX_CONST_2_PITCH(iview->depth_pitch) |
1122
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
1123
desc[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(iview->depth_layer_size) |
1124
(iview->view.descriptor[3] & ~A6XX_TEX_CONST_3_ARRAY_PITCH__MASK);
1128
r3d_src_common(cmd, cs, desc,
1129
iview->depth_layer_size * layer,
1130
iview->view.ubwc_layer_size * layer,
1135
r3d_src_stencil(struct tu_cmd_buffer *cmd,
1137
const struct tu_image_view *iview,
1140
uint32_t desc[A6XX_TEX_CONST_DWORDS];
1142
memcpy(desc, iview->view.descriptor, sizeof(desc));
1143
uint64_t va = iview->stencil_base_addr;
1145
desc[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK |
1146
A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
1147
A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK |
1148
A6XX_TEX_CONST_0_SWAP__MASK);
1149
desc[0] |= A6XX_TEX_CONST_0_FMT(FMT6_8_UINT) |
1150
A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
1151
A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) |
1152
A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) |
1153
A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
1155
A6XX_TEX_CONST_2_PITCH(iview->stencil_pitch) |
1156
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
1157
desc[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(iview->stencil_layer_size);
1160
for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
1163
r3d_src_common(cmd, cs, desc, iview->stencil_layer_size * layer, 0,
1168
r3d_src_gmem_load(struct tu_cmd_buffer *cmd,
1170
const struct tu_image_view *iview,
1173
uint32_t desc[A6XX_TEX_CONST_DWORDS];
1175
memcpy(desc, iview->view.descriptor, sizeof(desc));
1177
/* Fixup D24 formats because we always load both depth and stencil. */
1178
enum pipe_format format = iview->view.format;
1179
if (format == PIPE_FORMAT_X24S8_UINT ||
1180
format == PIPE_FORMAT_Z24X8_UNORM ||
1181
format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
1182
desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK;
1183
if (iview->view.ubwc_enabled)
1184
desc[0] |= A6XX_TEX_CONST_0_FMT(FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8);
1186
desc[0] |= A6XX_TEX_CONST_0_FMT(FMT6_8_8_8_8_UNORM);
1189
/* When loading/storing GMEM we always load the full image and don't do any
1190
* swizzling or swapping, that's done in the draw when reading/writing
1191
* GMEM, so we need to fixup the swizzle and swap.
1193
desc[0] &= ~(A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
1194
A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK |
1195
A6XX_TEX_CONST_0_SWAP__MASK);
1196
desc[0] |= A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
1197
A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) |
1198
A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) |
1199
A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
1201
r3d_src_common(cmd, cs, desc,
1202
iview->view.layer_size * layer,
1203
iview->view.ubwc_layer_size * layer,
1103
1208
r3d_src_gmem(struct tu_cmd_buffer *cmd,
1104
1209
struct tu_cs *cs,
1105
1210
const struct tu_image_view *iview,
1212
1317
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
1321
r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
1322
const struct tu_image_view *iview,
1323
const struct tu_render_pass_attachment *att,
1324
bool separate_stencil, unsigned layer)
1326
unsigned RB_MRT_BUF_INFO;
1327
unsigned gmem_offset;
1329
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
1330
if (!separate_stencil) {
1331
RB_MRT_BUF_INFO = tu_image_view_depth(iview, RB_MRT_BUF_INFO);
1332
gmem_offset = tu_attachment_gmem_offset(cmd, att, layer);
1334
RB_MRT_BUF_INFO = tu_image_view_stencil(iview, RB_MRT_BUF_INFO);
1335
gmem_offset = tu_attachment_gmem_offset_stencil(cmd, att, layer);
1338
RB_MRT_BUF_INFO = iview->view.RB_MRT_BUF_INFO;
1339
gmem_offset = tu_attachment_gmem_offset(cmd, att, layer);
1343
A6XX_RB_MRT_BUF_INFO(0, .dword = RB_MRT_BUF_INFO),
1344
A6XX_RB_MRT_PITCH(0, 0),
1345
A6XX_RB_MRT_ARRAY_PITCH(0, 0),
1346
A6XX_RB_MRT_BASE(0, 0),
1347
A6XX_RB_MRT_BASE_GMEM(0, gmem_offset));
1349
enum a6xx_format color_format =
1350
(enum a6xx_format)(RB_MRT_BUF_INFO & A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK);
1352
A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = color_format));
1354
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
1216
1358
aspect_write_mask(enum pipe_format format, VkImageAspectFlags aspect_mask)
1252
1399
tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff);
1255
tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000));
1256
tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.dword = 0xc00000));
1402
if (!(blit_param & R3D_DST_GMEM)) {
1403
tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.buffers_location = BUFFERS_IN_SYSMEM));
1404
tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.buffers_location = BUFFERS_IN_SYSMEM));
1258
r3d_common(cmd, cs, !clear, 1, blit_param, samples);
1407
r3d_common(cmd, cs, !clear, 1, blit_param & R3D_Z_SCALE, samples);
1260
1409
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
1261
1410
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
3089
3238
if (!separate_stencil) {
3090
3239
tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO));
3091
3240
tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * i);
3092
tu_cs_emit(cs, iview->depth_PITCH);
3241
tu_cs_emit(cs, A6XX_RB_2D_DST_PITCH(iview->depth_pitch).value);
3094
3243
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
3095
3244
tu_cs_image_flag_ref(cs, &iview->view, i);
3097
3246
tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
3098
3247
tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * i);
3099
tu_cs_emit(cs, iview->stencil_PITCH);
3248
tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(iview->stencil_pitch).value);
3102
3251
tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO);
3310
struct apply_load_coords_state {
3315
fdm_apply_load_coords(struct tu_cs *cs, void *data, VkRect2D bin,
3316
unsigned views, VkExtent2D *frag_areas)
3318
const struct apply_load_coords_state *state =
3319
(const struct apply_load_coords_state *)data;
3320
assert(state->view < views);
3321
VkExtent2D frag_area = frag_areas[state->view];
3323
assert(bin.extent.width % frag_area.width == 0);
3324
assert(bin.extent.height % frag_area.height == 0);
3325
uint32_t scaled_width = bin.extent.width / frag_area.width;
3326
uint32_t scaled_height = bin.extent.height / frag_area.height;
3328
const float coords[] = {
3329
bin.offset.x, bin.offset.y,
3330
bin.offset.x, bin.offset.y,
3331
bin.offset.x + scaled_width, bin.offset.y + scaled_height,
3332
bin.offset.x + bin.extent.width, bin.offset.y + bin.extent.height,
3334
r3d_coords_raw(cs, coords);
3338
load_3d_blit(struct tu_cmd_buffer *cmd,
3340
const struct tu_image_view *iview,
3341
const struct tu_render_pass_attachment *att,
3342
bool separate_stencil)
3344
const struct tu_framebuffer *fb = cmd->state.framebuffer;
3345
enum pipe_format format = iview->view.format;
3346
if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
3347
if (separate_stencil)
3348
format = PIPE_FORMAT_S8_UINT;
3350
format = PIPE_FORMAT_Z32_FLOAT;
3352
r3d_setup(cmd, cs, format, format,
3353
VK_IMAGE_ASPECT_COLOR_BIT, R3D_DST_GMEM, false,
3354
iview->view.ubwc_enabled, iview->image->vk.samples);
3356
if (!cmd->state.pass->has_fdm) {
3357
r3d_coords(cs, (VkOffset2D) { 0, 0 }, (VkOffset2D) { 0, 0 },
3358
(VkExtent2D) { fb->width, fb->height });
3361
/* Normal loads read directly from system memory, so we have to invalidate
3362
* UCHE in case it contains stale data.
3364
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
3366
/* Wait for CACHE_INVALIDATE to land */
3369
for_each_layer(i, att->clear_views, cmd->state.framebuffer->layers) {
3370
if (cmd->state.pass->has_fdm) {
3371
struct apply_load_coords_state state = {
3372
.view = att->clear_views ? i : 0,
3374
tu_create_fdm_bin_patchpoint(cmd, cs, 1 + 3 + 8, fdm_apply_load_coords, state);
3377
r3d_dst_gmem(cmd, cs, iview, att, separate_stencil, i);
3379
if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
3380
if (separate_stencil)
3381
r3d_src_stencil(cmd, cs, iview, i);
3383
r3d_src_depth(cmd, cs, iview, i);
3385
r3d_src_gmem_load(cmd, cs, iview, i);
3391
r3d_teardown(cmd, cs);
3393
/* It seems we need to WFI here for depth/stencil because color writes here
3394
* aren't synchronized with depth/stencil writes.
3396
* Note: the blob also uses a WFI for color attachments but this hasn't
3397
* been seen to be necessary.
3399
if (vk_format_is_depth_or_stencil(att->format))
3161
3404
tu_begin_load_store_cond_exec(struct tu_cmd_buffer *cmd,
3162
3405
struct tu_cs *cs, bool load)
3234
3477
tu_begin_load_store_cond_exec(cmd, cs, true);
3237
tu_emit_blit(cmd, cs, iview, attachment, false, false);
3240
tu_emit_blit(cmd, cs, iview, attachment, false, true);
3479
if (TU_DEBUG(3D_LOAD) ||
3480
cmd->state.pass->has_fdm) {
3481
if (load_common || load_stencil)
3482
tu_disable_draw_states(cmd, cs);
3485
load_3d_blit(cmd, cs, iview, attachment, false);
3488
load_3d_blit(cmd, cs, iview, attachment, true);
3491
tu_emit_blit(cmd, cs, iview, attachment, false, false);
3494
tu_emit_blit(cmd, cs, iview, attachment, false, true);
3243
3498
tu_end_load_store_cond_exec(cmd, cs, true);
3429
3687
cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout];
3690
struct apply_store_coords_state {
3695
fdm_apply_store_coords(struct tu_cs *cs, void *data, VkRect2D bin,
3696
unsigned views, VkExtent2D *frag_areas)
3698
const struct apply_store_coords_state *state =
3699
(const struct apply_store_coords_state *)data;
3700
assert(state->view < views);
3701
VkExtent2D frag_area = frag_areas[state->view];
3703
/* The bin width/height must be a multiple of the frag_area to make sure
3704
* that the scaling happens correctly. This means there may be some
3705
* destination pixels jut out of the framebuffer, but they should be
3706
* clipped by the render area.
3708
assert(bin.extent.width % frag_area.width == 0);
3709
assert(bin.extent.height % frag_area.height == 0);
3710
uint32_t scaled_width = bin.extent.width / frag_area.width;
3711
uint32_t scaled_height = bin.extent.height / frag_area.height;
3714
A6XX_GRAS_2D_DST_TL(.x = bin.offset.x,
3716
A6XX_GRAS_2D_DST_BR(.x = bin.offset.x + bin.extent.width - 1,
3717
.y = bin.offset.y + bin.extent.height - 1));
3719
A6XX_GRAS_2D_SRC_TL_X(bin.offset.x),
3720
A6XX_GRAS_2D_SRC_BR_X(bin.offset.x + scaled_width - 1),
3721
A6XX_GRAS_2D_SRC_TL_Y(bin.offset.y),
3722
A6XX_GRAS_2D_SRC_BR_Y(bin.offset.y + scaled_height - 1));
3433
3726
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
3434
3727
struct tu_cs *cs,
3530
r2d_coords(cs, render_area->offset, render_area->offset, render_area->extent);
3823
if (!cmd->state.pass->has_fdm) {
3824
r2d_coords(cs, render_area->offset, render_area->offset,
3825
render_area->extent);
3827
/* Usually GRAS_2D_RESOLVE_CNTL_* clips the destination to the bin
3828
* area and the coordinates span the entire render area, but for
3829
* FDM we need to scale the coordinates so we need to take the
3830
* opposite aproach, specifying the exact bin size in the destination
3831
* coordinates and using GRAS_2D_RESOLVE_CNTL_* to clip to the render
3835
A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = render_area->offset.x,
3836
.y = render_area->offset.y,),
3837
A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = render_area->offset.x + render_area->extent.width - 1,
3838
.y = render_area->offset.y + render_area->extent.height - 1,));
3532
for_each_layer(i, layer_mask, layers) {
3841
for_each_layer (i, layer_mask, layers) {
3842
if (cmd->state.pass->has_fdm) {
3843
unsigned view = layer_mask ? i : 0;
3844
struct apply_store_coords_state state = {
3847
tu_create_fdm_bin_patchpoint(cmd, cs, 8, fdm_apply_store_coords,
3533
3850
if (store_common) {
3534
3851
store_cp_blit(cmd, cs, iview, src->samples, false, src_format,
3535
3852
dst_format, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp);