~mmach/netext73/mesa-ryzen

« back to all changes in this revision

Viewing changes to src/freedreno/vulkan/tu_clear_blit.cc

  • Committer: mmach
  • Date: 2023-11-02 21:31:35 UTC
  • Revision ID: netbit73@gmail.com-20231102213135-18d4tzh7tj0uz752
2023-11-02 22:11:57

Show diffs side-by-side

added added

removed removed

Lines of Context:
278
278
   tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE);
279
279
   tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer);
280
280
   /* SP_PS_2D_SRC_PITCH has shifted pitch field */
281
 
   tu_cs_emit(cs, iview->depth_PITCH << 9);
 
281
   tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_PITCH(.pitch = iview->depth_pitch).value);
282
282
 
283
283
   tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS, 3);
284
284
   tu_cs_image_flag_ref(cs, &iview->view, layer);
295
295
   tu_cs_emit(cs, tu_image_view_stencil(iview, SP_PS_2D_SRC_INFO) & ~A6XX_SP_PS_2D_SRC_INFO_FLAGS);
296
296
   tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE);
297
297
   tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
298
 
   /* SP_PS_2D_SRC_PITCH has shifted pitch field */
299
 
   tu_cs_emit(cs, iview->stencil_PITCH << 9);
 
298
   tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_PITCH(.pitch = iview->stencil_pitch).value);
300
299
}
301
300
 
302
301
static void
349
348
   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
350
349
   tu_cs_emit(cs, tu_image_view_depth(iview, RB_2D_DST_INFO));
351
350
   tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer);
352
 
   tu_cs_emit(cs, iview->depth_PITCH);
 
351
   tu_cs_emit(cs, A6XX_RB_2D_DST_PITCH(iview->depth_pitch).value);
353
352
 
354
353
   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS, 3);
355
354
   tu_cs_image_flag_ref(cs, &iview->view, layer);
361
360
   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
362
361
   tu_cs_emit(cs, tu_image_view_stencil(iview, RB_2D_DST_INFO) & ~A6XX_RB_2D_DST_INFO_FLAGS);
363
362
   tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
364
 
   tu_cs_emit(cs, iview->stencil_PITCH);
 
363
   tu_cs_emit(cs, A6XX_RB_2D_DST_PITCH(iview->stencil_pitch).value);
365
364
}
366
365
 
367
366
static void
620
619
   b->shader->info.num_textures = 1;
621
620
   BITSET_SET(b->shader->info.textures_used, 0);
622
621
 
623
 
   tex->src[0].src_type = nir_tex_src_coord;
624
 
   tex->src[0].src = nir_src_for_ssa(nir_load_var(b, in_coords));
 
622
   tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord,
 
623
                                     nir_load_var(b, in_coords));
625
624
   tex->coord_components = coord_components;
626
625
 
627
 
   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
 
626
   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32);
628
627
   nir_builder_instr_insert(b, &tex->instr);
629
628
 
630
629
   nir_store_var(b, out_color, &tex->dest.ssa, 0xf);
676
675
 
677
676
   nir_ssa_def *coord = nir_f2i32(b, nir_load_var(b, in_coords));
678
677
 
679
 
   tex->src[0].src_type = nir_tex_src_coord;
680
 
   tex->src[0].src = nir_src_for_ssa(coord);
 
678
   tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
681
679
   tex->coord_components = 2;
682
680
 
683
 
   tex->src[1].src_type = nir_tex_src_ms_index;
684
 
   tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(b));
 
681
   tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
 
682
                                     nir_load_sample_id(b));
685
683
 
686
 
   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
 
684
   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32);
687
685
   nir_builder_instr_insert(b, &tex->instr);
688
686
 
689
687
   nir_store_var(b, out_color, &tex->dest.ssa, 0xf);
1100
1098
}
1101
1099
 
1102
1100
static void
 
1101
r3d_src_depth(struct tu_cmd_buffer *cmd,
 
1102
              struct tu_cs *cs,
 
1103
              const struct tu_image_view *iview,
 
1104
              uint32_t layer)
 
1105
{
 
1106
   uint32_t desc[A6XX_TEX_CONST_DWORDS];
 
1107
 
 
1108
   memcpy(desc, iview->view.descriptor, sizeof(desc));
 
1109
   uint64_t va = iview->depth_base_addr;
 
1110
 
 
1111
   desc[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK |
 
1112
                A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
 
1113
                A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK |
 
1114
                A6XX_TEX_CONST_0_SWAP__MASK);
 
1115
   desc[0] |= A6XX_TEX_CONST_0_FMT(FMT6_32_FLOAT) |
 
1116
              A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
 
1117
              A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) |
 
1118
              A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) |
 
1119
              A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
 
1120
   desc[2] =
 
1121
      A6XX_TEX_CONST_2_PITCH(iview->depth_pitch) |
 
1122
      A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
 
1123
   desc[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(iview->depth_layer_size) |
 
1124
      (iview->view.descriptor[3] & ~A6XX_TEX_CONST_3_ARRAY_PITCH__MASK);
 
1125
   desc[4] = va;
 
1126
   desc[5] = va >> 32;
 
1127
 
 
1128
   r3d_src_common(cmd, cs, desc,
 
1129
                  iview->depth_layer_size * layer, 
 
1130
                  iview->view.ubwc_layer_size * layer,
 
1131
                  VK_FILTER_NEAREST);
 
1132
}
 
1133
 
 
1134
static void
 
1135
r3d_src_stencil(struct tu_cmd_buffer *cmd,
 
1136
                struct tu_cs *cs,
 
1137
                const struct tu_image_view *iview,
 
1138
                uint32_t layer)
 
1139
{
 
1140
   uint32_t desc[A6XX_TEX_CONST_DWORDS];
 
1141
 
 
1142
   memcpy(desc, iview->view.descriptor, sizeof(desc));
 
1143
   uint64_t va = iview->stencil_base_addr;
 
1144
 
 
1145
   desc[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK |
 
1146
                A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
 
1147
                A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK |
 
1148
                A6XX_TEX_CONST_0_SWAP__MASK);
 
1149
   desc[0] |= A6XX_TEX_CONST_0_FMT(FMT6_8_UINT) |
 
1150
              A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
 
1151
              A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) |
 
1152
              A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) |
 
1153
              A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
 
1154
   desc[2] =
 
1155
      A6XX_TEX_CONST_2_PITCH(iview->stencil_pitch) |
 
1156
      A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
 
1157
   desc[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(iview->stencil_layer_size);
 
1158
   desc[4] = va;
 
1159
   desc[5] = va >> 32;
 
1160
   for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
 
1161
      desc[i] = 0;
 
1162
 
 
1163
   r3d_src_common(cmd, cs, desc, iview->stencil_layer_size * layer, 0,
 
1164
                  VK_FILTER_NEAREST);
 
1165
}
 
1166
 
 
1167
static void
 
1168
r3d_src_gmem_load(struct tu_cmd_buffer *cmd,
 
1169
                  struct tu_cs *cs,
 
1170
                  const struct tu_image_view *iview,
 
1171
                  uint32_t layer)
 
1172
{
 
1173
   uint32_t desc[A6XX_TEX_CONST_DWORDS];
 
1174
 
 
1175
   memcpy(desc, iview->view.descriptor, sizeof(desc));
 
1176
 
 
1177
   /* Fixup D24 formats because we always load both depth and stencil. */
 
1178
   enum pipe_format format = iview->view.format;
 
1179
   if (format == PIPE_FORMAT_X24S8_UINT ||
 
1180
       format == PIPE_FORMAT_Z24X8_UNORM ||
 
1181
       format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 
1182
      desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK;
 
1183
      if (iview->view.ubwc_enabled)
 
1184
         desc[0] |= A6XX_TEX_CONST_0_FMT(FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8);
 
1185
      else
 
1186
         desc[0] |= A6XX_TEX_CONST_0_FMT(FMT6_8_8_8_8_UNORM);
 
1187
   }
 
1188
 
 
1189
   /* When loading/storing GMEM we always load the full image and don't do any
 
1190
    * swizzling or swapping, that's done in the draw when reading/writing
 
1191
    * GMEM, so we need to fixup the swizzle and swap.
 
1192
    */
 
1193
   desc[0] &= ~(A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
 
1194
                A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK |
 
1195
                A6XX_TEX_CONST_0_SWAP__MASK);
 
1196
   desc[0] |= A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
 
1197
              A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) |
 
1198
              A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) |
 
1199
              A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
 
1200
 
 
1201
   r3d_src_common(cmd, cs, desc,
 
1202
                  iview->view.layer_size * layer,
 
1203
                  iview->view.ubwc_layer_size * layer,
 
1204
                  VK_FILTER_NEAREST);
 
1205
}
 
1206
 
 
1207
static void
1103
1208
r3d_src_gmem(struct tu_cmd_buffer *cmd,
1104
1209
             struct tu_cs *cs,
1105
1210
             const struct tu_image_view *iview,
1212
1317
   tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
1213
1318
}
1214
1319
 
 
1320
static void
 
1321
r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
 
1322
             const struct tu_image_view *iview,
 
1323
             const struct tu_render_pass_attachment *att,
 
1324
             bool separate_stencil, unsigned layer)
 
1325
{
 
1326
   unsigned RB_MRT_BUF_INFO;
 
1327
   unsigned gmem_offset;
 
1328
 
 
1329
   if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
 
1330
      if (!separate_stencil) {
 
1331
         RB_MRT_BUF_INFO = tu_image_view_depth(iview, RB_MRT_BUF_INFO);
 
1332
         gmem_offset = tu_attachment_gmem_offset(cmd, att, layer);
 
1333
      } else {
 
1334
         RB_MRT_BUF_INFO = tu_image_view_stencil(iview, RB_MRT_BUF_INFO);
 
1335
         gmem_offset = tu_attachment_gmem_offset_stencil(cmd, att, layer);
 
1336
      }
 
1337
   } else {
 
1338
      RB_MRT_BUF_INFO = iview->view.RB_MRT_BUF_INFO;
 
1339
      gmem_offset = tu_attachment_gmem_offset(cmd, att, layer);
 
1340
   }
 
1341
 
 
1342
   tu_cs_emit_regs(cs,
 
1343
                   A6XX_RB_MRT_BUF_INFO(0, .dword = RB_MRT_BUF_INFO),
 
1344
                   A6XX_RB_MRT_PITCH(0, 0),
 
1345
                   A6XX_RB_MRT_ARRAY_PITCH(0, 0),
 
1346
                   A6XX_RB_MRT_BASE(0, 0),
 
1347
                   A6XX_RB_MRT_BASE_GMEM(0, gmem_offset));
 
1348
 
 
1349
   enum a6xx_format color_format =
 
1350
      (enum a6xx_format)(RB_MRT_BUF_INFO & A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK);
 
1351
   tu_cs_emit_regs(cs,
 
1352
                   A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = color_format));
 
1353
 
 
1354
   tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
 
1355
}
 
1356
 
1215
1357
static uint8_t
1216
1358
aspect_write_mask(enum pipe_format format, VkImageAspectFlags aspect_mask)
1217
1359
{
1229
1371
   return mask;
1230
1372
}
1231
1373
 
 
1374
enum r3d_blit_param {
 
1375
   R3D_Z_SCALE = 1 << 0,
 
1376
   R3D_DST_GMEM = 1 << 1,
 
1377
};
 
1378
 
1232
1379
static void
1233
1380
r3d_setup(struct tu_cmd_buffer *cmd,
1234
1381
          struct tu_cs *cs,
1252
1399
      tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff);
1253
1400
   }
1254
1401
 
1255
 
   tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000));
1256
 
   tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.dword = 0xc00000));
 
1402
   if (!(blit_param & R3D_DST_GMEM)) {
 
1403
      tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.buffers_location = BUFFERS_IN_SYSMEM));
 
1404
      tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.buffers_location = BUFFERS_IN_SYSMEM));
 
1405
   }
1257
1406
 
1258
 
   r3d_common(cmd, cs, !clear, 1, blit_param, samples);
 
1407
   r3d_common(cmd, cs, !clear, 1, blit_param & R3D_Z_SCALE, samples);
1259
1408
 
1260
1409
   tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
1261
1410
   tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
1653
1802
       filter == VK_FILTER_CUBIC_EXT ||
1654
1803
       z_scale) {
1655
1804
      ops = &r3d_ops;
1656
 
      blit_param = z_scale;
 
1805
      blit_param = z_scale ? R3D_Z_SCALE : 0;
1657
1806
   }
1658
1807
 
1659
1808
   /* use the right format in setup() for D32_S8
3089
3238
         if (!separate_stencil) {
3090
3239
            tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO));
3091
3240
            tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * i);
3092
 
            tu_cs_emit(cs, iview->depth_PITCH);
 
3241
            tu_cs_emit(cs, A6XX_RB_2D_DST_PITCH(iview->depth_pitch).value);
3093
3242
 
3094
3243
            tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
3095
3244
            tu_cs_image_flag_ref(cs, &iview->view, i);
3096
3245
         } else {
3097
3246
            tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
3098
3247
            tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * i);
3099
 
            tu_cs_emit(cs, iview->stencil_PITCH);
 
3248
            tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(iview->stencil_pitch).value);
3100
3249
         }
3101
3250
      } else {
3102
3251
         tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO);
3147
3296
   case VK_FORMAT_R8G8_UNORM:
3148
3297
   case VK_FORMAT_R8G8_UINT:
3149
3298
   case VK_FORMAT_R8G8_SINT:
 
3299
   case VK_FORMAT_R8G8_SRGB:
3150
3300
   /* TODO: this one should be able to work? */
3151
3301
   case VK_FORMAT_D24_UNORM_S8_UINT:
3152
3302
      return false;
3157
3307
   return true;
3158
3308
}
3159
3309
 
 
3310
struct apply_load_coords_state {
 
3311
   unsigned view;
 
3312
};
 
3313
 
 
3314
static void
 
3315
fdm_apply_load_coords(struct tu_cs *cs, void *data, VkRect2D bin,
 
3316
                      unsigned views, VkExtent2D *frag_areas)
 
3317
{
 
3318
   const struct apply_load_coords_state *state =
 
3319
      (const struct apply_load_coords_state *)data;
 
3320
   assert(state->view < views);
 
3321
   VkExtent2D frag_area = frag_areas[state->view];
 
3322
 
 
3323
   assert(bin.extent.width % frag_area.width == 0);
 
3324
   assert(bin.extent.height % frag_area.height == 0);
 
3325
   uint32_t scaled_width = bin.extent.width / frag_area.width;
 
3326
   uint32_t scaled_height = bin.extent.height / frag_area.height;
 
3327
 
 
3328
   const float coords[] = {
 
3329
      bin.offset.x,                    bin.offset.y,
 
3330
      bin.offset.x,                    bin.offset.y,
 
3331
      bin.offset.x + scaled_width,     bin.offset.y + scaled_height,
 
3332
      bin.offset.x + bin.extent.width, bin.offset.y + bin.extent.height,
 
3333
   };
 
3334
   r3d_coords_raw(cs, coords);
 
3335
}
 
3336
 
 
3337
static void
 
3338
load_3d_blit(struct tu_cmd_buffer *cmd,
 
3339
             struct tu_cs *cs,
 
3340
             const struct tu_image_view *iview,
 
3341
             const struct tu_render_pass_attachment *att,
 
3342
             bool separate_stencil)
 
3343
{
 
3344
   const struct tu_framebuffer *fb = cmd->state.framebuffer;
 
3345
   enum pipe_format format = iview->view.format;
 
3346
   if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
 
3347
      if (separate_stencil)
 
3348
         format = PIPE_FORMAT_S8_UINT;
 
3349
      else
 
3350
         format = PIPE_FORMAT_Z32_FLOAT;
 
3351
   }
 
3352
   r3d_setup(cmd, cs, format, format,
 
3353
             VK_IMAGE_ASPECT_COLOR_BIT, R3D_DST_GMEM, false,
 
3354
             iview->view.ubwc_enabled, iview->image->vk.samples);
 
3355
 
 
3356
   if (!cmd->state.pass->has_fdm) {
 
3357
      r3d_coords(cs, (VkOffset2D) { 0, 0 }, (VkOffset2D) { 0, 0 },
 
3358
                 (VkExtent2D) { fb->width, fb->height });
 
3359
   }
 
3360
 
 
3361
   /* Normal loads read directly from system memory, so we have to invalidate
 
3362
    * UCHE in case it contains stale data.
 
3363
    */
 
3364
   tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
 
3365
 
 
3366
   /* Wait for CACHE_INVALIDATE to land */
 
3367
   tu_cs_emit_wfi(cs);
 
3368
 
 
3369
   for_each_layer(i, att->clear_views, cmd->state.framebuffer->layers) {
 
3370
      if (cmd->state.pass->has_fdm) {
 
3371
         struct apply_load_coords_state state = {
 
3372
            .view = att->clear_views ? i : 0,
 
3373
         };
 
3374
         tu_create_fdm_bin_patchpoint(cmd, cs, 1 + 3 + 8, fdm_apply_load_coords, state);
 
3375
      }
 
3376
 
 
3377
      r3d_dst_gmem(cmd, cs, iview, att, separate_stencil, i);
 
3378
 
 
3379
      if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
 
3380
         if (separate_stencil)
 
3381
            r3d_src_stencil(cmd, cs, iview, i);
 
3382
         else
 
3383
            r3d_src_depth(cmd, cs, iview, i);
 
3384
      } else {
 
3385
         r3d_src_gmem_load(cmd, cs, iview, i);
 
3386
      }
 
3387
 
 
3388
      r3d_run(cmd, cs);
 
3389
   }
 
3390
 
 
3391
   r3d_teardown(cmd, cs);
 
3392
 
 
3393
   /* It seems we need to WFI here for depth/stencil because color writes here
 
3394
    * aren't synchronized with depth/stencil writes.
 
3395
    *
 
3396
    * Note: the blob also uses a WFI for color attachments but this hasn't
 
3397
    * been seen to be necessary.
 
3398
    */
 
3399
   if (vk_format_is_depth_or_stencil(att->format))
 
3400
      tu_cs_emit_wfi(cs);
 
3401
}
 
3402
 
3160
3403
static void
3161
3404
tu_begin_load_store_cond_exec(struct tu_cmd_buffer *cmd,
3162
3405
                              struct tu_cs *cs, bool load)
3233
3476
   if (cond_exec)
3234
3477
      tu_begin_load_store_cond_exec(cmd, cs, true);
3235
3478
 
3236
 
   if (load_common)
3237
 
      tu_emit_blit(cmd, cs, iview, attachment, false, false);
3238
 
 
3239
 
   if (load_stencil)
3240
 
      tu_emit_blit(cmd, cs, iview, attachment, false, true);
 
3479
   if (TU_DEBUG(3D_LOAD) ||
 
3480
       cmd->state.pass->has_fdm) {
 
3481
      if (load_common || load_stencil)
 
3482
         tu_disable_draw_states(cmd, cs);
 
3483
 
 
3484
      if (load_common)
 
3485
         load_3d_blit(cmd, cs, iview, attachment, false);
 
3486
 
 
3487
      if (load_stencil)
 
3488
         load_3d_blit(cmd, cs, iview, attachment, true);
 
3489
   } else {
 
3490
      if (load_common)
 
3491
         tu_emit_blit(cmd, cs, iview, attachment, false, false);
 
3492
 
 
3493
      if (load_stencil)
 
3494
         tu_emit_blit(cmd, cs, iview, attachment, false, true);
 
3495
   }
3241
3496
 
3242
3497
   if (cond_exec)
3243
3498
      tu_end_load_store_cond_exec(cmd, cs, true);
3284
3539
                                         !util_format_is_depth_or_stencil(dst_format),
3285
3540
                      .unk20 = 1,
3286
3541
                      .unk22 = 1),
3287
 
                   /* note: src size does not matter when not scaling */
3288
 
                   A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
 
3542
                   A6XX_SP_PS_2D_SRC_SIZE( .width = iview->vk.extent.width, .height = iview->vk.extent.height),
3289
3543
                   A6XX_SP_PS_2D_SRC(.qword = cmd->device->physical_device->gmem_base + gmem_offset),
3290
3544
                   A6XX_SP_PS_2D_SRC_PITCH(.pitch = cmd->state.tiling->tile0.width * cpp));
3291
3545
 
3385
3639
   if (TU_DEBUG(UNALIGNED_STORE))
3386
3640
      return true;
3387
3641
 
 
3642
   /* We always use the unaligned store path when scaling rendering. */
 
3643
   if (cmd->state.pass->has_fdm)
 
3644
      return true;
 
3645
 
3388
3646
   uint32_t x1 = render_area->offset.x;
3389
3647
   uint32_t y1 = render_area->offset.y;
3390
3648
   uint32_t x2 = x1 + render_area->extent.width;
3429
3687
   cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout];
3430
3688
}
3431
3689
 
 
3690
struct apply_store_coords_state {
 
3691
   unsigned view;
 
3692
};
 
3693
 
 
3694
static void
 
3695
fdm_apply_store_coords(struct tu_cs *cs, void *data, VkRect2D bin,
 
3696
                       unsigned views, VkExtent2D *frag_areas)
 
3697
{
 
3698
   const struct apply_store_coords_state *state =
 
3699
      (const struct apply_store_coords_state *)data;
 
3700
   assert(state->view < views);
 
3701
   VkExtent2D frag_area = frag_areas[state->view];
 
3702
 
 
3703
   /* The bin width/height must be a multiple of the frag_area to make sure
 
3704
    * that the scaling happens correctly. This means there may be some
 
3705
    * destination pixels jut out of the framebuffer, but they should be
 
3706
    * clipped by the render area.
 
3707
    */
 
3708
   assert(bin.extent.width % frag_area.width == 0);
 
3709
   assert(bin.extent.height % frag_area.height == 0);
 
3710
   uint32_t scaled_width = bin.extent.width / frag_area.width;
 
3711
   uint32_t scaled_height = bin.extent.height / frag_area.height;
 
3712
 
 
3713
   tu_cs_emit_regs(cs,
 
3714
      A6XX_GRAS_2D_DST_TL(.x = bin.offset.x,
 
3715
                          .y = bin.offset.y),
 
3716
      A6XX_GRAS_2D_DST_BR(.x = bin.offset.x + bin.extent.width - 1,
 
3717
                          .y = bin.offset.y + bin.extent.height - 1));
 
3718
   tu_cs_emit_regs(cs,
 
3719
                   A6XX_GRAS_2D_SRC_TL_X(bin.offset.x),
 
3720
                   A6XX_GRAS_2D_SRC_BR_X(bin.offset.x + scaled_width - 1),
 
3721
                   A6XX_GRAS_2D_SRC_TL_Y(bin.offset.y),
 
3722
                   A6XX_GRAS_2D_SRC_BR_Y(bin.offset.y + scaled_height - 1));
 
3723
}
 
3724
 
3432
3725
void
3433
3726
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
3434
3727
                         struct tu_cs *cs,
3527
3820
         }
3528
3821
      }
3529
3822
   } else {
3530
 
      r2d_coords(cs, render_area->offset, render_area->offset, render_area->extent);
 
3823
      if (!cmd->state.pass->has_fdm) {
 
3824
         r2d_coords(cs, render_area->offset, render_area->offset,
 
3825
                    render_area->extent);
 
3826
      } else {
 
3827
         /* Usually GRAS_2D_RESOLVE_CNTL_* clips the destination to the bin
 
3828
          * area and the coordinates span the entire render area, but for
 
3829
          * FDM we need to scale the coordinates so we need to take the
 
3830
          * opposite aproach, specifying the exact bin size in the destination
 
3831
          * coordinates and using GRAS_2D_RESOLVE_CNTL_* to clip to the render
 
3832
          * area.
 
3833
          */
 
3834
         tu_cs_emit_regs(cs,
 
3835
                         A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = render_area->offset.x,
 
3836
                                                     .y = render_area->offset.y,),
 
3837
                         A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = render_area->offset.x + render_area->extent.width - 1,
 
3838
                                                     .y = render_area->offset.y + render_area->extent.height - 1,));
 
3839
      }
3531
3840
 
3532
 
      for_each_layer(i, layer_mask, layers) {
 
3841
      for_each_layer (i, layer_mask, layers) {
 
3842
         if (cmd->state.pass->has_fdm) {
 
3843
            unsigned view = layer_mask ? i : 0;
 
3844
            struct apply_store_coords_state state = {
 
3845
               .view = view,
 
3846
            };
 
3847
            tu_create_fdm_bin_patchpoint(cmd, cs, 8, fdm_apply_store_coords,
 
3848
                                         state);
 
3849
         }
3533
3850
         if (store_common) {
3534
3851
            store_cp_blit(cmd, cs, iview, src->samples, false, src_format,
3535
3852
                          dst_format, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp);