~mmach/netext73/mesa-haswell

« back to all changes in this revision

Viewing changes to src/broadcom/vulkan/v3dv_pass.c

  • Committer: mmach
  • Date: 2022-09-22 19:56:13 UTC
  • Revision ID: netbit73@gmail.com-20220922195613-wtik9mmy20tmor0i
2022-09-22 21:17:09

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright © 2019 Raspberry Pi Ltd
3
 
 *
4
 
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 
 * copy of this software and associated documentation files (the "Software"),
6
 
 * to deal in the Software without restriction, including without limitation
7
 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 
 * and/or sell copies of the Software, and to permit persons to whom the
9
 
 * Software is furnished to do so, subject to the following conditions:
10
 
 *
11
 
 * The above copyright notice and this permission notice (including the next
12
 
 * paragraph) shall be included in all copies or substantial portions of the
13
 
 * Software.
14
 
 *
15
 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 
 * IN THE SOFTWARE.
22
 
 */
23
 
 
24
 
#include "v3dv_private.h"
25
 
 
26
 
static uint32_t
27
 
num_subpass_attachments(const VkSubpassDescription2 *desc)
28
 
{
29
 
   return desc->inputAttachmentCount +
30
 
          desc->colorAttachmentCount +
31
 
          (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
32
 
          (desc->pDepthStencilAttachment != NULL);
33
 
}
34
 
 
35
 
static void
36
 
set_try_tlb_resolve(struct v3dv_device *device,
37
 
                    struct v3dv_render_pass_attachment *att)
38
 
{
39
 
   const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format);
40
 
   att->try_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format);
41
 
}
42
 
 
43
 
static void
44
 
pass_find_subpass_range_for_attachments(struct v3dv_device *device,
45
 
                                        struct v3dv_render_pass *pass)
46
 
{
47
 
   for (uint32_t i = 0; i < pass->attachment_count; i++) {
48
 
      pass->attachments[i].first_subpass = pass->subpass_count - 1;
49
 
      pass->attachments[i].last_subpass = 0;
50
 
      if (pass->multiview_enabled) {
51
 
         for (uint32_t j = 0; j < MAX_MULTIVIEW_VIEW_COUNT; j++) {
52
 
            pass->attachments[i].views[j].first_subpass = pass->subpass_count - 1;
53
 
            pass->attachments[i].views[j].last_subpass = 0;
54
 
         }
55
 
      }
56
 
   }
57
 
 
58
 
   for (uint32_t i = 0; i < pass->subpass_count; i++) {
59
 
      const struct v3dv_subpass *subpass = &pass->subpasses[i];
60
 
 
61
 
      for (uint32_t j = 0; j < subpass->color_count; j++) {
62
 
         uint32_t attachment_idx = subpass->color_attachments[j].attachment;
63
 
         if (attachment_idx == VK_ATTACHMENT_UNUSED)
64
 
            continue;
65
 
 
66
 
         struct v3dv_render_pass_attachment *att =
67
 
            &pass->attachments[attachment_idx];
68
 
 
69
 
         if (i < att->first_subpass)
70
 
            att->first_subpass = i;
71
 
         if (i > att->last_subpass)
72
 
            att->last_subpass = i;
73
 
 
74
 
         uint32_t view_mask = subpass->view_mask;
75
 
         while (view_mask) {
76
 
            uint32_t view_index = u_bit_scan(&view_mask);
77
 
            if (i < att->views[view_index].first_subpass)
78
 
               att->views[view_index].first_subpass = i;
79
 
            if (i > att->views[view_index].last_subpass)
80
 
               att->views[view_index].last_subpass = i;
81
 
         }
82
 
 
83
 
         if (subpass->resolve_attachments &&
84
 
             subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) {
85
 
            set_try_tlb_resolve(device, att);
86
 
         }
87
 
      }
88
 
 
89
 
      uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
90
 
      if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
91
 
         if (i < pass->attachments[ds_attachment_idx].first_subpass)
92
 
            pass->attachments[ds_attachment_idx].first_subpass = i;
93
 
         if (i > pass->attachments[ds_attachment_idx].last_subpass)
94
 
            pass->attachments[ds_attachment_idx].last_subpass = i;
95
 
 
96
 
         if (subpass->ds_resolve_attachment.attachment != VK_ATTACHMENT_UNUSED)
97
 
            set_try_tlb_resolve(device, &pass->attachments[ds_attachment_idx]);
98
 
      }
99
 
 
100
 
      for (uint32_t j = 0; j < subpass->input_count; j++) {
101
 
         uint32_t input_attachment_idx = subpass->input_attachments[j].attachment;
102
 
         if (input_attachment_idx == VK_ATTACHMENT_UNUSED)
103
 
            continue;
104
 
         if (i < pass->attachments[input_attachment_idx].first_subpass)
105
 
            pass->attachments[input_attachment_idx].first_subpass = i;
106
 
         if (i > pass->attachments[input_attachment_idx].last_subpass)
107
 
            pass->attachments[input_attachment_idx].last_subpass = i;
108
 
      }
109
 
 
110
 
      if (subpass->resolve_attachments) {
111
 
         for (uint32_t j = 0; j < subpass->color_count; j++) {
112
 
            uint32_t attachment_idx = subpass->resolve_attachments[j].attachment;
113
 
            if (attachment_idx == VK_ATTACHMENT_UNUSED)
114
 
               continue;
115
 
            if (i < pass->attachments[attachment_idx].first_subpass)
116
 
               pass->attachments[attachment_idx].first_subpass = i;
117
 
            if (i > pass->attachments[attachment_idx].last_subpass)
118
 
               pass->attachments[attachment_idx].last_subpass = i;
119
 
         }
120
 
      }
121
 
   }
122
 
}
123
 
 
124
 
 
125
 
VKAPI_ATTR VkResult VKAPI_CALL
126
 
v3dv_CreateRenderPass2(VkDevice _device,
127
 
                       const VkRenderPassCreateInfo2 *pCreateInfo,
128
 
                       const VkAllocationCallbacks *pAllocator,
129
 
                       VkRenderPass *pRenderPass)
130
 
{
131
 
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
132
 
   struct v3dv_render_pass *pass;
133
 
 
134
 
   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
135
 
 
136
 
   /* From the VK_KHR_multiview spec:
137
 
    *
138
 
    *   When a subpass uses a non-zero view mask, multiview functionality is
139
 
    *   considered to be enabled. Multiview is all-or-nothing for a render
140
 
    *   pass - that is, either all subpasses must have a non-zero view mask
141
 
    *   (though some subpasses may have only one view) or all must be zero.
142
 
    */
143
 
   bool multiview_enabled = pCreateInfo->subpassCount &&
144
 
      pCreateInfo->pSubpasses[0].viewMask;
145
 
 
146
 
   size_t size = sizeof(*pass);
147
 
   size_t subpasses_offset = size;
148
 
   size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
149
 
   size_t attachments_offset = size;
150
 
   size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
151
 
 
152
 
   pass = vk_object_zalloc(&device->vk, pAllocator, size,
153
 
                           VK_OBJECT_TYPE_RENDER_PASS);
154
 
   if (pass == NULL)
155
 
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
156
 
 
157
 
   pass->multiview_enabled = multiview_enabled;
158
 
   pass->attachment_count = pCreateInfo->attachmentCount;
159
 
   pass->attachments = (void *) pass + attachments_offset;
160
 
   pass->subpass_count = pCreateInfo->subpassCount;
161
 
   pass->subpasses = (void *) pass + subpasses_offset;
162
 
 
163
 
   for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++)
164
 
      pass->attachments[i].desc = pCreateInfo->pAttachments[i];
165
 
 
166
 
   uint32_t subpass_attachment_count = 0;
167
 
   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
168
 
      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
169
 
      subpass_attachment_count += num_subpass_attachments(desc);
170
 
   }
171
 
 
172
 
   if (subpass_attachment_count) {
173
 
      const size_t subpass_attachment_bytes =
174
 
         subpass_attachment_count * sizeof(struct v3dv_subpass_attachment);
175
 
      pass->subpass_attachments =
176
 
         vk_alloc2(&device->vk.alloc, pAllocator, subpass_attachment_bytes, 8,
177
 
                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
178
 
      if (pass->subpass_attachments == NULL) {
179
 
         vk_object_free(&device->vk, pAllocator, pass);
180
 
         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
181
 
      }
182
 
   } else {
183
 
      pass->subpass_attachments = NULL;
184
 
   }
185
 
 
186
 
   struct v3dv_subpass_attachment *p = pass->subpass_attachments;
187
 
   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
188
 
      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
189
 
      struct v3dv_subpass *subpass = &pass->subpasses[i];
190
 
 
191
 
      subpass->input_count = desc->inputAttachmentCount;
192
 
      subpass->color_count = desc->colorAttachmentCount;
193
 
      subpass->view_mask = desc->viewMask;
194
 
 
195
 
      if (desc->inputAttachmentCount > 0) {
196
 
         subpass->input_attachments = p;
197
 
         p += desc->inputAttachmentCount;
198
 
 
199
 
         for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
200
 
            subpass->input_attachments[j] = (struct v3dv_subpass_attachment) {
201
 
               .attachment = desc->pInputAttachments[j].attachment,
202
 
               .layout = desc->pInputAttachments[j].layout,
203
 
            };
204
 
         }
205
 
      }
206
 
 
207
 
      if (desc->colorAttachmentCount > 0) {
208
 
         subpass->color_attachments = p;
209
 
         p += desc->colorAttachmentCount;
210
 
 
211
 
         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
212
 
            subpass->color_attachments[j] = (struct v3dv_subpass_attachment) {
213
 
               .attachment = desc->pColorAttachments[j].attachment,
214
 
               .layout = desc->pColorAttachments[j].layout,
215
 
            };
216
 
         }
217
 
      }
218
 
 
219
 
      if (desc->pResolveAttachments) {
220
 
         subpass->resolve_attachments = p;
221
 
         p += desc->colorAttachmentCount;
222
 
 
223
 
         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
224
 
            subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) {
225
 
               .attachment = desc->pResolveAttachments[j].attachment,
226
 
               .layout = desc->pResolveAttachments[j].layout,
227
 
            };
228
 
         }
229
 
      }
230
 
 
231
 
      if (desc->pDepthStencilAttachment) {
232
 
         subpass->ds_attachment = (struct v3dv_subpass_attachment) {
233
 
            .attachment = desc->pDepthStencilAttachment->attachment,
234
 
            .layout = desc->pDepthStencilAttachment->layout,
235
 
         };
236
 
 
237
 
         /* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa),
238
 
          * the clear might get lost. If a subpass has this then we can't emit
239
 
          * the clear using the TLB and we have to do it as a draw call.
240
 
          *
241
 
          * FIXME: separate stencil.
242
 
          */
243
 
         if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
244
 
            struct v3dv_render_pass_attachment *att =
245
 
               &pass->attachments[subpass->ds_attachment.attachment];
246
 
            if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
247
 
               if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
248
 
                   att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
249
 
                  subpass->do_depth_clear_with_draw = true;
250
 
               } else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD &&
251
 
                          att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
252
 
                  subpass->do_stencil_clear_with_draw = true;
253
 
               }
254
 
            }
255
 
         }
256
 
 
257
 
         /* VK_KHR_depth_stencil_resolve */
258
 
         const VkSubpassDescriptionDepthStencilResolveKHR *resolve_desc =
259
 
            vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
260
 
         const VkAttachmentReference2 *resolve_att =
261
 
            resolve_desc && resolve_desc->pDepthStencilResolveAttachment &&
262
 
            resolve_desc->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED ?
263
 
               resolve_desc->pDepthStencilResolveAttachment : NULL;
264
 
         if (resolve_att) {
265
 
            subpass->ds_resolve_attachment = (struct v3dv_subpass_attachment) {
266
 
               .attachment = resolve_att->attachment,
267
 
               .layout = resolve_att->layout,
268
 
            };
269
 
            assert(resolve_desc->depthResolveMode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT ||
270
 
                   resolve_desc->stencilResolveMode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT);
271
 
            subpass->resolve_depth =
272
 
               resolve_desc->depthResolveMode != VK_RESOLVE_MODE_NONE &&
273
 
               resolve_att->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
274
 
            subpass->resolve_stencil =
275
 
               resolve_desc->stencilResolveMode != VK_RESOLVE_MODE_NONE &&
276
 
               resolve_att->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
277
 
         } else {
278
 
            subpass->ds_resolve_attachment.attachment = VK_ATTACHMENT_UNUSED;
279
 
            subpass->resolve_depth = false;
280
 
            subpass->resolve_stencil = false;
281
 
         }
282
 
      } else {
283
 
         subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED;
284
 
         subpass->ds_resolve_attachment.attachment = VK_ATTACHMENT_UNUSED;
285
 
         subpass->resolve_depth = false;
286
 
         subpass->resolve_stencil = false;
287
 
      }
288
 
   }
289
 
 
290
 
   pass_find_subpass_range_for_attachments(device, pass);
291
 
 
292
 
   /* FIXME: handle subpass dependencies */
293
 
 
294
 
   *pRenderPass = v3dv_render_pass_to_handle(pass);
295
 
 
296
 
   return VK_SUCCESS;
297
 
}
298
 
 
299
 
VKAPI_ATTR void VKAPI_CALL
300
 
v3dv_DestroyRenderPass(VkDevice _device,
301
 
                       VkRenderPass _pass,
302
 
                       const VkAllocationCallbacks *pAllocator)
303
 
{
304
 
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
305
 
   V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass);
306
 
 
307
 
   if (!_pass)
308
 
      return;
309
 
 
310
 
   vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
311
 
   vk_object_free(&device->vk, pAllocator, pass);
312
 
}
313
 
 
314
 
static void
315
 
subpass_get_granularity(struct v3dv_device *device,
316
 
                        struct v3dv_render_pass *pass,
317
 
                        uint32_t subpass_idx,
318
 
                        VkExtent2D *granularity)
319
 
{
320
 
   /* Granularity is defined by the tile size */
321
 
   assert(subpass_idx < pass->subpass_count);
322
 
   struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
323
 
   const uint32_t color_attachment_count = subpass->color_count;
324
 
 
325
 
   bool msaa = false;
326
 
   uint32_t max_bpp = 0;
327
 
   for (uint32_t i = 0; i < color_attachment_count; i++) {
328
 
      uint32_t attachment_idx = subpass->color_attachments[i].attachment;
329
 
      if (attachment_idx == VK_ATTACHMENT_UNUSED)
330
 
         continue;
331
 
      const VkAttachmentDescription2 *desc =
332
 
         &pass->attachments[attachment_idx].desc;
333
 
      const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format);
334
 
      uint32_t internal_type, internal_bpp;
335
 
      v3dv_X(device, get_internal_type_bpp_for_output_format)
336
 
         (format->rt_type, &internal_type, &internal_bpp);
337
 
 
338
 
      max_bpp = MAX2(max_bpp, internal_bpp);
339
 
 
340
 
      if (desc->samples > VK_SAMPLE_COUNT_1_BIT)
341
 
         msaa = true;
342
 
   }
343
 
 
344
 
   uint32_t width, height;
345
 
   bool double_buffer = (V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa;
346
 
   v3d_choose_tile_size(color_attachment_count, max_bpp, msaa,
347
 
                        double_buffer, &width, &height);
348
 
   *granularity = (VkExtent2D) {
349
 
      .width = width,
350
 
      .height = height
351
 
   };
352
 
}
353
 
 
354
 
VKAPI_ATTR void VKAPI_CALL
355
 
v3dv_GetRenderAreaGranularity(VkDevice _device,
356
 
                              VkRenderPass renderPass,
357
 
                              VkExtent2D *pGranularity)
358
 
{
359
 
   V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
360
 
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
361
 
 
362
 
   *pGranularity = (VkExtent2D) {
363
 
      .width = 64,
364
 
      .height = 64,
365
 
   };
366
 
 
367
 
   for (uint32_t i = 0; i < pass->subpass_count; i++) {
368
 
      VkExtent2D sg;
369
 
      subpass_get_granularity(device, pass, i, &sg);
370
 
      pGranularity->width = MIN2(pGranularity->width, sg.width);
371
 
      pGranularity->height = MIN2(pGranularity->height, sg.height);
372
 
   }
373
 
}
374
 
 
375
 
/* Checks whether the render area rectangle covers a region that is aligned to
376
 
 * tile boundaries. This means that we are writing to all pixels covered by
377
 
 * all tiles in that area (except for pixels on edge tiles that are outside
378
 
 * the framebuffer dimensions).
379
 
 *
380
 
 * When our framebuffer is aligned to tile boundaries we know we are writing
381
 
 * valid data to all all pixels in each tile and we can apply certain
382
 
 * optimizations, like avoiding tile loads, since we know that none of the
383
 
 * original pixel values in each tile for that area need to be preserved.
384
 
 * We also use this to decide if we can use TLB clears, as these clear whole
385
 
 * tiles so we can't use them if the render area is not aligned.
386
 
 *
387
 
 * Note that when an image is created it will possibly include padding blocks
388
 
 * depending on its tiling layout. When the framebuffer dimensions are not
389
 
 * aligned to tile boundaries then edge tiles are only partially covered by the
390
 
 * framebuffer pixels, but tile stores still seem to store full tiles
391
 
 * writing to the padded sections. This is important when the framebuffer
392
 
 * is aliasing a smaller section of a larger image, as in that case the edge
393
 
 * tiles of the framebuffer would overwrite valid pixels in the larger image.
394
 
 * In that case, we can't flag the area as being aligned.
395
 
 */
396
 
bool
397
 
v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
398
 
                                  const VkRect2D *area,
399
 
                                  struct v3dv_framebuffer *fb,
400
 
                                  struct v3dv_render_pass *pass,
401
 
                                  uint32_t subpass_idx)
402
 
{
403
 
   assert(subpass_idx < pass->subpass_count);
404
 
 
405
 
   VkExtent2D granularity;
406
 
   subpass_get_granularity(device, pass, subpass_idx, &granularity);
407
 
 
408
 
   return area->offset.x % granularity.width == 0 &&
409
 
          area->offset.y % granularity.height == 0 &&
410
 
         (area->extent.width % granularity.width == 0 ||
411
 
          (fb->has_edge_padding &&
412
 
           area->offset.x + area->extent.width >= fb->width)) &&
413
 
         (area->extent.height % granularity.height == 0 ||
414
 
          (fb->has_edge_padding &&
415
 
           area->offset.y + area->extent.height >= fb->height));
416
 
}